aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-04-16 18:20:36 -0400
commit1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ia64
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
Diffstat (limited to 'arch/ia64')
-rw-r--r--arch/ia64/Kconfig420
-rw-r--r--arch/ia64/Kconfig.debug64
-rw-r--r--arch/ia64/Makefile115
-rw-r--r--arch/ia64/configs/bigsur_defconfig1172
-rw-r--r--arch/ia64/configs/sim_defconfig534
-rw-r--r--arch/ia64/configs/sn2_defconfig1038
-rw-r--r--arch/ia64/configs/tiger_defconfig1098
-rw-r--r--arch/ia64/configs/zx1_defconfig1273
-rw-r--r--arch/ia64/defconfig1199
-rw-r--r--arch/ia64/dig/Makefile9
-rw-r--r--arch/ia64/dig/machvec.c3
-rw-r--r--arch/ia64/dig/setup.c86
-rw-r--r--arch/ia64/hp/common/Makefile10
-rw-r--r--arch/ia64/hp/common/hwsw_iommu.c185
-rw-r--r--arch/ia64/hp/common/sba_iommu.c2121
-rw-r--r--arch/ia64/hp/sim/Kconfig20
-rw-r--r--arch/ia64/hp/sim/Makefile16
-rw-r--r--arch/ia64/hp/sim/boot/Makefile37
-rw-r--r--arch/ia64/hp/sim/boot/boot_head.S144
-rw-r--r--arch/ia64/hp/sim/boot/bootloader.c176
-rw-r--r--arch/ia64/hp/sim/boot/bootloader.lds65
-rw-r--r--arch/ia64/hp/sim/boot/fw-emu.c398
-rw-r--r--arch/ia64/hp/sim/boot/ssc.h35
-rw-r--r--arch/ia64/hp/sim/hpsim.S10
-rw-r--r--arch/ia64/hp/sim/hpsim_console.c65
-rw-r--r--arch/ia64/hp/sim/hpsim_irq.c51
-rw-r--r--arch/ia64/hp/sim/hpsim_machvec.c3
-rw-r--r--arch/ia64/hp/sim/hpsim_setup.c52
-rw-r--r--arch/ia64/hp/sim/hpsim_ssc.h36
-rw-r--r--arch/ia64/hp/sim/simeth.c530
-rw-r--r--arch/ia64/hp/sim/simscsi.c404
-rw-r--r--arch/ia64/hp/sim/simserial.c1032
-rw-r--r--arch/ia64/hp/zx1/Makefile8
-rw-r--r--arch/ia64/hp/zx1/hpzx1_machvec.c3
-rw-r--r--arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c3
-rw-r--r--arch/ia64/ia32/Makefile12
-rw-r--r--arch/ia64/ia32/binfmt_elf32.c294
-rw-r--r--arch/ia64/ia32/elfcore32.h138
-rw-r--r--arch/ia64/ia32/ia32_entry.S500
-rw-r--r--arch/ia64/ia32/ia32_ioctl.c48
-rw-r--r--arch/ia64/ia32/ia32_ldt.c147
-rw-r--r--arch/ia64/ia32/ia32_signal.c1036
-rw-r--r--arch/ia64/ia32/ia32_support.c264
-rw-r--r--arch/ia64/ia32/ia32_traps.c156
-rw-r--r--arch/ia64/ia32/ia32priv.h544
-rw-r--r--arch/ia64/ia32/sys_ia32.c2747
-rw-r--r--arch/ia64/install.sh40
-rw-r--r--arch/ia64/kernel/Makefile52
-rw-r--r--arch/ia64/kernel/acpi-ext.c100
-rw-r--r--arch/ia64/kernel/acpi.c841
-rw-r--r--arch/ia64/kernel/asm-offsets.c239
-rw-r--r--arch/ia64/kernel/brl_emu.c234
-rw-r--r--arch/ia64/kernel/cyclone.c109
-rw-r--r--arch/ia64/kernel/domain.c382
-rw-r--r--arch/ia64/kernel/efi.c832
-rw-r--r--arch/ia64/kernel/efi_stub.S86
-rw-r--r--arch/ia64/kernel/entry.S1587
-rw-r--r--arch/ia64/kernel/entry.h82
-rw-r--r--arch/ia64/kernel/fsys.S884
-rw-r--r--arch/ia64/kernel/gate-data.S3
-rw-r--r--arch/ia64/kernel/gate.S372
-rw-r--r--arch/ia64/kernel/gate.lds.S95
-rw-r--r--arch/ia64/kernel/head.S996
-rw-r--r--arch/ia64/kernel/ia64_ksyms.c127
-rw-r--r--arch/ia64/kernel/init_task.c46
-rw-r--r--arch/ia64/kernel/iosapic.c827
-rw-r--r--arch/ia64/kernel/irq.c238
-rw-r--r--arch/ia64/kernel/irq_ia64.c278
-rw-r--r--arch/ia64/kernel/irq_lsapic.c37
-rw-r--r--arch/ia64/kernel/ivt.S1619
-rw-r--r--arch/ia64/kernel/machvec.c70
-rw-r--r--arch/ia64/kernel/mca.c1470
-rw-r--r--arch/ia64/kernel/mca_asm.S928
-rw-r--r--arch/ia64/kernel/mca_drv.c639
-rw-r--r--arch/ia64/kernel/mca_drv.h113
-rw-r--r--arch/ia64/kernel/mca_drv_asm.S45
-rw-r--r--arch/ia64/kernel/minstate.h251
-rw-r--r--arch/ia64/kernel/module.c952
-rw-r--r--arch/ia64/kernel/pal.S302
-rw-r--r--arch/ia64/kernel/palinfo.c1023
-rw-r--r--arch/ia64/kernel/patch.c189
-rw-r--r--arch/ia64/kernel/perfmon.c6676
-rw-r--r--arch/ia64/kernel/perfmon_default_smpl.c306
-rw-r--r--arch/ia64/kernel/perfmon_generic.h45
-rw-r--r--arch/ia64/kernel/perfmon_itanium.h115
-rw-r--r--arch/ia64/kernel/perfmon_mckinley.h187
-rw-r--r--arch/ia64/kernel/process.c800
-rw-r--r--arch/ia64/kernel/ptrace.c1627
-rw-r--r--arch/ia64/kernel/sal.c302
-rw-r--r--arch/ia64/kernel/salinfo.c629
-rw-r--r--arch/ia64/kernel/semaphore.c165
-rw-r--r--arch/ia64/kernel/setup.c723
-rw-r--r--arch/ia64/kernel/sigframe.h25
-rw-r--r--arch/ia64/kernel/signal.c691
-rw-r--r--arch/ia64/kernel/smp.c376
-rw-r--r--arch/ia64/kernel/smpboot.c692
-rw-r--r--arch/ia64/kernel/sys_ia64.c298
-rw-r--r--arch/ia64/kernel/time.c255
-rw-r--r--arch/ia64/kernel/topology.c92
-rw-r--r--arch/ia64/kernel/traps.c609
-rw-r--r--arch/ia64/kernel/unaligned.c1521
-rw-r--r--arch/ia64/kernel/unwind.c2306
-rw-r--r--arch/ia64/kernel/unwind_decoder.c459
-rw-r--r--arch/ia64/kernel/unwind_i.h164
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S251
-rw-r--r--arch/ia64/lib/Makefile52
-rw-r--r--arch/ia64/lib/bitop.c88
-rw-r--r--arch/ia64/lib/carta_random.S54
-rw-r--r--arch/ia64/lib/checksum.c102
-rw-r--r--arch/ia64/lib/clear_page.S77
-rw-r--r--arch/ia64/lib/clear_user.S209
-rw-r--r--arch/ia64/lib/copy_page.S98
-rw-r--r--arch/ia64/lib/copy_page_mck.S185
-rw-r--r--arch/ia64/lib/copy_user.S610
-rw-r--r--arch/ia64/lib/csum_partial_copy.c151
-rw-r--r--arch/ia64/lib/dec_and_lock.c42
-rw-r--r--arch/ia64/lib/do_csum.S323
-rw-r--r--arch/ia64/lib/flush.S39
-rw-r--r--arch/ia64/lib/idiv32.S83
-rw-r--r--arch/ia64/lib/idiv64.S80
-rw-r--r--arch/ia64/lib/io.c165
-rw-r--r--arch/ia64/lib/ip_fast_csum.S90
-rw-r--r--arch/ia64/lib/memcpy.S301
-rw-r--r--arch/ia64/lib/memcpy_mck.S661
-rw-r--r--arch/ia64/lib/memset.S362
-rw-r--r--arch/ia64/lib/strlen.S192
-rw-r--r--arch/ia64/lib/strlen_user.S198
-rw-r--r--arch/ia64/lib/strncpy_from_user.S44
-rw-r--r--arch/ia64/lib/strnlen_user.S45
-rw-r--r--arch/ia64/lib/swiotlb.c658
-rw-r--r--arch/ia64/lib/xor.S184
-rw-r--r--arch/ia64/mm/Makefile12
-rw-r--r--arch/ia64/mm/contig.c299
-rw-r--r--arch/ia64/mm/discontig.c737
-rw-r--r--arch/ia64/mm/extable.c90
-rw-r--r--arch/ia64/mm/fault.c261
-rw-r--r--arch/ia64/mm/hugetlbpage.c357
-rw-r--r--arch/ia64/mm/init.c597
-rw-r--r--arch/ia64/mm/numa.c49
-rw-r--r--arch/ia64/mm/tlb.c190
-rw-r--r--arch/ia64/module.lds13
-rw-r--r--arch/ia64/oprofile/Kconfig26
-rw-r--r--arch/ia64/oprofile/Makefile10
-rw-r--r--arch/ia64/oprofile/backtrace.c150
-rw-r--r--arch/ia64/oprofile/init.c38
-rw-r--r--arch/ia64/oprofile/perfmon.c100
-rw-r--r--arch/ia64/pci/Makefile4
-rw-r--r--arch/ia64/pci/pci.c735
-rwxr-xr-xarch/ia64/scripts/check-gas15
-rw-r--r--arch/ia64/scripts/check-gas-asm.S2
-rw-r--r--arch/ia64/scripts/check-model.c1
-rw-r--r--arch/ia64/scripts/check-segrel.S4
-rw-r--r--arch/ia64/scripts/check-segrel.lds11
-rw-r--r--arch/ia64/scripts/check-serialize.S2
-rw-r--r--arch/ia64/scripts/check-text-align.S6
-rwxr-xr-xarch/ia64/scripts/toolchain-flags53
-rwxr-xr-xarch/ia64/scripts/unwcheck.py64
-rw-r--r--arch/ia64/sn/Makefile14
-rw-r--r--arch/ia64/sn/include/ioerror.h81
-rw-r--r--arch/ia64/sn/include/pci/pcibr_provider.h149
-rw-r--r--arch/ia64/sn/include/pci/pcibus_provider_defs.h43
-rw-r--r--arch/ia64/sn/include/pci/pcidev.h54
-rw-r--r--arch/ia64/sn/include/pci/pic.h261
-rw-r--r--arch/ia64/sn/include/pci/tiocp.h256
-rw-r--r--arch/ia64/sn/include/tio.h37
-rw-r--r--arch/ia64/sn/include/xtalk/hubdev.h67
-rw-r--r--arch/ia64/sn/include/xtalk/xbow.h291
-rw-r--r--arch/ia64/sn/include/xtalk/xwidgetdev.h70
-rw-r--r--arch/ia64/sn/kernel/Makefile12
-rw-r--r--arch/ia64/sn/kernel/bte.c453
-rw-r--r--arch/ia64/sn/kernel/bte_error.c198
-rw-r--r--arch/ia64/sn/kernel/huberror.c201
-rw-r--r--arch/ia64/sn/kernel/idle.c30
-rw-r--r--arch/ia64/sn/kernel/io_init.c411
-rw-r--r--arch/ia64/sn/kernel/iomv.c70
-rw-r--r--arch/ia64/sn/kernel/irq.c431
-rw-r--r--arch/ia64/sn/kernel/klconflib.c108
-rw-r--r--arch/ia64/sn/kernel/machvec.c11
-rw-r--r--arch/ia64/sn/kernel/mca.c135
-rw-r--r--arch/ia64/sn/kernel/setup.c621
-rw-r--r--arch/ia64/sn/kernel/sn2/Makefile13
-rw-r--r--arch/ia64/sn/kernel/sn2/cache.c34
-rw-r--r--arch/ia64/sn/kernel/sn2/io.c101
-rw-r--r--arch/ia64/sn/kernel/sn2/prominfo_proc.c279
-rw-r--r--arch/ia64/sn/kernel/sn2/ptc_deadlock.S82
-rw-r--r--arch/ia64/sn/kernel/sn2/sn2_smp.c295
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_hwperf.c690
-rw-r--r--arch/ia64/sn/kernel/sn2/sn_proc_fs.c149
-rw-r--r--arch/ia64/sn/kernel/sn2/timer.c36
-rw-r--r--arch/ia64/sn/kernel/sn2/timer_interrupt.c63
-rw-r--r--arch/ia64/sn/pci/Makefile10
-rw-r--r--arch/ia64/sn/pci/pci_dma.c363
-rw-r--r--arch/ia64/sn/pci/pcibr/Makefile11
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_ate.c188
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_dma.c379
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_provider.c170
-rw-r--r--arch/ia64/sn/pci/pcibr/pcibr_reg.c282
197 files changed, 69771 insertions, 0 deletions
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
new file mode 100644
index 000000000000..33fcb205fcb7
--- /dev/null
+++ b/arch/ia64/Kconfig
@@ -0,0 +1,420 @@
1#
2# For a description of the syntax of this configuration file,
3# see Documentation/kbuild/kconfig-language.txt.
4#
5
6mainmenu "IA-64 Linux Kernel Configuration"
7
8source "init/Kconfig"
9
10menu "Processor type and features"
11
12config IA64
13 bool
14 default y
15 help
16 The Itanium Processor Family is Intel's 64-bit successor to
17 the 32-bit X86 line. The IA-64 Linux project has a home
18 page at <http://www.linuxia64.org/> and a mailing list at
19 <linux-ia64@vger.kernel.org>.
20
21config 64BIT
22 bool
23 default y
24
25config MMU
26 bool
27 default y
28
29config RWSEM_XCHGADD_ALGORITHM
30 bool
31 default y
32
33config GENERIC_CALIBRATE_DELAY
34 bool
35 default y
36
37config TIME_INTERPOLATION
38 bool
39 default y
40
41config EFI
42 bool
43 default y
44
45config GENERIC_IOMAP
46 bool
47 default y
48
49choice
50 prompt "System type"
51 default IA64_GENERIC
52
53config IA64_GENERIC
54 bool "generic"
55 select NUMA
56 select ACPI_NUMA
57 select VIRTUAL_MEM_MAP
58 select DISCONTIGMEM
59 help
60 This selects the system type of your hardware. A "generic" kernel
61 will run on any supported IA-64 system. However, if you configure
62 a kernel for your specific system, it will be faster and smaller.
63
64 generic For any supported IA-64 system
65 DIG-compliant For DIG ("Developer's Interface Guide") compliant systems
66 HP-zx1/sx1000 For HP systems
67 HP-zx1/sx1000+swiotlb For HP systems with (broken) DMA-constrained devices.
68 SGI-SN2 For SGI Altix systems
69 Ski-simulator For the HP simulator <http://www.hpl.hp.com/research/linux/ski/>
70
71 If you don't know what to do, choose "generic".
72
73config IA64_DIG
74 bool "DIG-compliant"
75
76config IA64_HP_ZX1
77 bool "HP-zx1/sx1000"
78 help
79 Build a kernel that runs on HP zx1 and sx1000 systems. This adds
80 support for the HP I/O MMU.
81
82config IA64_HP_ZX1_SWIOTLB
83 bool "HP-zx1/sx1000 with software I/O TLB"
84 help
85 Build a kernel that runs on HP zx1 and sx1000 systems even when they
86 have broken PCI devices which cannot DMA to full 32 bits. Apart
87 from support for the HP I/O MMU, this includes support for the software
88 I/O TLB, which allows supporting the broken devices at the expense of
89 wasting some kernel memory (about 2MB by default).
90
91config IA64_SGI_SN2
92 bool "SGI-SN2"
93 help
94 Selecting this option will optimize the kernel for use on sn2 based
95 systems, but the resulting kernel binary will not run on other
96 types of ia64 systems. If you have an SGI Altix system, it's safe
97 to select this option. If in doubt, select ia64 generic support
98 instead.
99
100config IA64_HP_SIM
101 bool "Ski-simulator"
102
103endchoice
104
105choice
106 prompt "Processor type"
107 default ITANIUM
108
109config ITANIUM
110 bool "Itanium"
111 help
112 Select your IA-64 processor type. The default is Itanium.
113 This choice is safe for all IA-64 systems, but may not perform
114 optimally on systems with, say, Itanium 2 or newer processors.
115
116config MCKINLEY
117 bool "Itanium 2"
118 help
119 Select this to configure for an Itanium 2 (McKinley) processor.
120
121endchoice
122
123choice
124 prompt "Kernel page size"
125 default IA64_PAGE_SIZE_16KB
126
127config IA64_PAGE_SIZE_4KB
128 bool "4KB"
129 help
130 This lets you select the page size of the kernel. For best IA-64
131 performance, a page size of 8KB or 16KB is recommended. For best
132 IA-32 compatibility, a page size of 4KB should be selected (the vast
133 majority of IA-32 binaries work perfectly fine with a larger page
134 size). For Itanium 2 or newer systems, a page size of 64KB can also
135 be selected.
136
137 4KB For best IA-32 compatibility
138 8KB For best IA-64 performance
139 16KB For best IA-64 performance
140 64KB Requires Itanium 2 or newer processor.
141
142 If you don't know what to do, choose 16KB.
143
144config IA64_PAGE_SIZE_8KB
145 bool "8KB"
146
147config IA64_PAGE_SIZE_16KB
148 bool "16KB"
149
150config IA64_PAGE_SIZE_64KB
151 depends on !ITANIUM
152 bool "64KB"
153
154endchoice
155
156config IA64_BRL_EMU
157 bool
158 depends on ITANIUM
159 default y
160
161# align cache-sensitive data to 128 bytes
162config IA64_L1_CACHE_SHIFT
163 int
164 default "7" if MCKINLEY
165 default "6" if ITANIUM
166
167# align cache-sensitive data to 64 bytes
168config NUMA
169 bool "NUMA support"
170 depends on !IA64_HP_SIM
171 default y if IA64_SGI_SN2
172 select ACPI_NUMA
173 help
174 Say Y to compile the kernel to support NUMA (Non-Uniform Memory
175 Access). This option is for configuring high-end multiprocessor
176 server systems. If in doubt, say N.
177
178config VIRTUAL_MEM_MAP
179 bool "Virtual mem map"
180 default y if !IA64_HP_SIM
181 help
182 Say Y to compile the kernel with support for a virtual mem map.
183 This code also only takes effect if a memory hole of greater than
184 1 Gb is found during boot. You must turn this option on if you
185 require the DISCONTIGMEM option for your machine. If you are
186 unsure, say Y.
187
188config HOLES_IN_ZONE
189 bool
190 default y if VIRTUAL_MEM_MAP
191
192config DISCONTIGMEM
193 bool "Discontiguous memory support"
194 depends on (IA64_DIG || IA64_SGI_SN2 || IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB) && NUMA && VIRTUAL_MEM_MAP
195 default y if (IA64_SGI_SN2 || IA64_GENERIC) && NUMA
196 help
197 Say Y to support efficient handling of discontiguous physical memory,
198 for architectures which are either NUMA (Non-Uniform Memory Access)
199 or have huge holes in the physical address space for other reasons.
200 See <file:Documentation/vm/numa> for more.
201
202config IA64_CYCLONE
203 bool "Cyclone (EXA) Time Source support"
204 help
205 Say Y here to enable support for IBM EXA Cyclone time source.
206 If you're unsure, answer N.
207
208config IOSAPIC
209 bool
210 depends on !IA64_HP_SIM
211 default y
212
213config IA64_SGI_SN_SIM
214 bool "SGI Medusa Simulator Support"
215 depends on IA64_SGI_SN2
216 help
217 If you are compiling a kernel that will run under SGI's IA-64
218 simulator (Medusa) then say Y, otherwise say N.
219
220config FORCE_MAX_ZONEORDER
221 int
222 default "18"
223
224config SMP
225 bool "Symmetric multi-processing support"
226 help
227 This enables support for systems with more than one CPU. If you have
228 a system with only one CPU, say N. If you have a system with more
229 than one CPU, say Y.
230
231 If you say N here, the kernel will run on single and multiprocessor
232 systems, but will use only one CPU of a multiprocessor system. If
233 you say Y here, the kernel will run on many, but not all,
234 single processor systems. On a single processor system, the kernel
235 will run faster if you say N here.
236
237 See also the <file:Documentation/smp.txt> and the SMP-HOWTO
238 available at <http://www.tldp.org/docs.html#howto>.
239
240 If you don't know what to do here, say N.
241
242config NR_CPUS
243 int "Maximum number of CPUs (2-512)"
244 range 2 512
245 depends on SMP
246 default "64"
247 help
248 You should set this to the number of CPUs in your system, but
249 keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but
250 only use 2 CPUs on a >2 CPU system. Setting this to a value larger
251 than 64 will cause the use of a CPU mask array, causing a small
252 performance hit.
253
254config HOTPLUG_CPU
255 bool "Support for hot-pluggable CPUs (EXPERIMENTAL)"
256 depends on SMP && EXPERIMENTAL
257 select HOTPLUG
258 default n
259 ---help---
260 Say Y here to experiment with turning CPUs off and on. CPUs
261 can be controlled through /sys/devices/system/cpu/cpu#.
262 Say N if you want to disable CPU hotplug.
263
264config PREEMPT
265 bool "Preemptible Kernel"
266 help
267 This option reduces the latency of the kernel when reacting to
268 real-time or interactive events by allowing a low priority process to
269 be preempted even if it is in kernel mode executing a system call.
270 This allows applications to run more reliably even when the system is
271 under load.
272
273 Say Y here if you are building a kernel for a desktop, embedded
274 or real-time system. Say N if you are unsure.
275
276config HAVE_DEC_LOCK
277 bool
278 depends on (SMP || PREEMPT)
279 default y
280
281config IA32_SUPPORT
282 bool "Support for Linux/x86 binaries"
283 help
284 IA-64 processors can execute IA-32 (X86) instructions. By
285 saying Y here, the kernel will include IA-32 system call
286 emulation support which makes it possible to transparently
287 run IA-32 Linux binaries on an IA-64 Linux system.
288 If in doubt, say Y.
289
290config COMPAT
291 bool
292 depends on IA32_SUPPORT
293 default y
294
295config IA64_MCA_RECOVERY
296 tristate "MCA recovery from errors other than TLB."
297
298config PERFMON
299 bool "Performance monitor support"
300 help
301 Selects whether support for the IA-64 performance monitor hardware
302 is included in the kernel. This makes some kernel data-structures a
303 little bigger and slows down execution a bit, but it is generally
304 a good idea to turn this on. If you're unsure, say Y.
305
306config IA64_PALINFO
307 tristate "/proc/pal support"
308 help
309 If you say Y here, you are able to get PAL (Processor Abstraction
310 Layer) information in /proc/pal. This contains useful information
311 about the processors in your systems, such as cache and TLB sizes
312 and the PAL firmware version in use.
313
314 To use this option, you have to ensure that the "/proc file system
315 support" (CONFIG_PROC_FS) is enabled, too.
316
317config ACPI_DEALLOCATE_IRQ
318 bool
319 depends on IOSAPIC && EXPERIMENTAL
320 default y
321
322source "drivers/firmware/Kconfig"
323
324source "fs/Kconfig.binfmt"
325
326endmenu
327
328menu "Power management and ACPI"
329
330config PM
331 bool "Power Management support"
332 depends on IA64_GENERIC || IA64_DIG || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB
333 default y
334 help
335 "Power Management" means that parts of your computer are shut
336 off or put into a power conserving "sleep" mode if they are not
337 being used. There are two competing standards for doing this: APM
338 and ACPI. If you want to use either one, say Y here and then also
339 to the requisite support below.
340
341 Power Management is most important for battery powered laptop
342 computers; if you have a laptop, check out the Linux Laptop home
343 page on the WWW at <http://www.linux-on-laptops.com/> and the
344 Battery Powered Linux mini-HOWTO, available from
345 <http://www.tldp.org/docs.html#howto>.
346
347 Note that, even if you say N here, Linux on the x86 architecture
348 will issue the hlt instruction if nothing is to be done, thereby
349 sending the processor to sleep and saving power.
350
351config ACPI
352 bool
353 depends on !IA64_HP_SIM
354 default y
355
356if !IA64_HP_SIM
357
358source "drivers/acpi/Kconfig"
359
360endif
361
362endmenu
363
364if !IA64_HP_SIM
365
366menu "Bus options (PCI, PCMCIA)"
367
368config PCI
369 bool "PCI support"
370 help
371 Find out whether you have a PCI motherboard. PCI is the name of a
372 bus system, i.e. the way the CPU talks to the other stuff inside
373 your box. Other bus systems are ISA, EISA, MicroChannel (MCA) or
374 VESA. If you have PCI, say Y, otherwise N.
375
376 The PCI-HOWTO, available from
377 <http://www.tldp.org/docs.html#howto>, contains valuable
378 information about which PCI hardware does work under Linux and which
379 doesn't.
380
381config PCI_DOMAINS
382 bool
383 default PCI
384
385source "drivers/pci/Kconfig"
386
387source "drivers/pci/hotplug/Kconfig"
388
389source "drivers/pcmcia/Kconfig"
390
391endmenu
392
393endif
394
395source "drivers/Kconfig"
396
397source "fs/Kconfig"
398
399source "lib/Kconfig"
400
401#
402# Use the generic interrupt handling code in kernel/irq/:
403#
404config GENERIC_HARDIRQS
405 bool
406 default y
407
408config GENERIC_IRQ_PROBE
409 bool
410 default y
411
412source "arch/ia64/hp/sim/Kconfig"
413
414source "arch/ia64/oprofile/Kconfig"
415
416source "arch/ia64/Kconfig.debug"
417
418source "security/Kconfig"
419
420source "crypto/Kconfig"
diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug
new file mode 100644
index 000000000000..de9d507ba0fd
--- /dev/null
+++ b/arch/ia64/Kconfig.debug
@@ -0,0 +1,64 @@
1menu "Kernel hacking"
2
3source "lib/Kconfig.debug"
4
5choice
6 prompt "Physical memory granularity"
7 default IA64_GRANULE_64MB
8
9config IA64_GRANULE_16MB
10 bool "16MB"
11 help
12 IA-64 identity-mapped regions use a large page size called "granules".
13
14 Select "16MB" for a small granule size.
15 Select "64MB" for a large granule size. This is the current default.
16
17config IA64_GRANULE_64MB
18 bool "64MB"
19 depends on !(IA64_GENERIC || IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_SGI_SN2)
20
21endchoice
22
23config IA64_PRINT_HAZARDS
24 bool "Print possible IA-64 dependency violations to console"
25 depends on DEBUG_KERNEL
26 help
27 Selecting this option prints more information for Illegal Dependency
28 Faults, that is, for Read-after-Write (RAW), Write-after-Write (WAW),
29 or Write-after-Read (WAR) violations. This option is ignored if you
30 are compiling for an Itanium A step processor
31 (CONFIG_ITANIUM_ASTEP_SPECIFIC). If you're unsure, select Y.
32
33config DISABLE_VHPT
34 bool "Disable VHPT"
35 depends on DEBUG_KERNEL
36 help
37 The Virtual Hash Page Table (VHPT) enhances virtual address
38 translation performance. Normally you want the VHPT active but you
39 can select this option to disable the VHPT for debugging. If you're
40 unsure, answer N.
41
42config IA64_DEBUG_CMPXCHG
43 bool "Turn on compare-and-exchange bug checking (slow!)"
44 depends on DEBUG_KERNEL
45 help
46 Selecting this option turns on bug checking for the IA-64
47 compare-and-exchange instructions. This is slow! Itaniums
48 from step B3 or later don't have this problem. If you're unsure,
49 select N.
50
51config IA64_DEBUG_IRQ
52 bool "Turn on irq debug checks (slow!)"
53 depends on DEBUG_KERNEL
54 help
55 Selecting this option turns on bug checking for the IA-64 irq_save
56 and restore instructions. It's useful for tracking down spinlock
57 problems, but slow! If you're unsure, select N.
58
59config SYSVIPC_COMPAT
60 bool
61 depends on COMPAT && SYSVIPC
62 default y
63
64endmenu
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
new file mode 100644
index 000000000000..f9bd88ada708
--- /dev/null
+++ b/arch/ia64/Makefile
@@ -0,0 +1,115 @@
1#
2# ia64/Makefile
3#
4# This file is subject to the terms and conditions of the GNU General Public
5# License. See the file "COPYING" in the main directory of this archive
6# for more details.
7#
8# Copyright (C) 1998-2004 by David Mosberger-Tang <davidm@hpl.hp.com>
9#
10
11NM := $(CROSS_COMPILE)nm -B
12READELF := $(CROSS_COMPILE)readelf
13
14export AWK
15
16CHECKFLAGS += -m64 -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
17
18OBJCOPYFLAGS := --strip-all
19LDFLAGS_vmlinux := -static
20LDFLAGS_MODULE += -T $(srctree)/arch/ia64/module.lds
21AFLAGS_KERNEL := -mconstant-gp
22EXTRA :=
23
24cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
25 -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
26CFLAGS_KERNEL := -mconstant-gp
27
28GCC_VERSION := $(call cc-version)
29GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
30CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)")
31
32ifeq ($(GAS_STATUS),buggy)
33$(error Sorry, you need a newer version of the assember, one that is built from \
34 a source-tree that post-dates 18-Dec-2002. You can find a pre-compiled \
35 static binary of such an assembler at: \
36 \
37 ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz)
38endif
39
40ifneq ($(shell if [ $(GCC_VERSION) -lt 0300 ] ; then echo "bad"; fi ;),)
41$(error Sorry, your compiler is too old. GCC v2.96 is known to generate bad code.)
42endif
43
44ifeq ($(GCC_VERSION),0304)
45 cflags-$(CONFIG_ITANIUM) += -mtune=merced
46 cflags-$(CONFIG_MCKINLEY) += -mtune=mckinley
47endif
48
49CFLAGS += $(cflags-y)
50head-y := arch/ia64/kernel/head.o arch/ia64/kernel/init_task.o
51
52libs-y += arch/ia64/lib/
53core-y += arch/ia64/kernel/ arch/ia64/mm/
54core-$(CONFIG_IA32_SUPPORT) += arch/ia64/ia32/
55core-$(CONFIG_IA64_DIG) += arch/ia64/dig/
56core-$(CONFIG_IA64_GENERIC) += arch/ia64/dig/
57core-$(CONFIG_IA64_HP_ZX1) += arch/ia64/dig/
58core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
59core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
60
61drivers-$(CONFIG_PCI) += arch/ia64/pci/
62drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
63drivers-$(CONFIG_IA64_HP_ZX1) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
64drivers-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/hp/common/ arch/ia64/hp/zx1/
65drivers-$(CONFIG_IA64_GENERIC) += arch/ia64/hp/common/ arch/ia64/hp/zx1/ arch/ia64/hp/sim/ arch/ia64/sn/
66drivers-$(CONFIG_OPROFILE) += arch/ia64/oprofile/
67
68boot := arch/ia64/hp/sim/boot
69
70.PHONY: boot compressed check
71
72all: compressed unwcheck
73
74compressed: vmlinux.gz
75
76vmlinux.gz: vmlinux
77 $(Q)$(MAKE) $(build)=$(boot) $@
78
79unwcheck: vmlinux
80 -$(Q)READELF=$(READELF) $(srctree)/arch/ia64/scripts/unwcheck.py $<
81
82archclean:
83 $(Q)$(MAKE) $(clean)=$(boot)
84
85CLEAN_FILES += include/asm-ia64/.offsets.h.stamp vmlinux.gz bootloader
86
87MRPROPER_FILES += include/asm-ia64/offsets.h
88
89prepare: include/asm-ia64/offsets.h
90
91arch/ia64/kernel/asm-offsets.s: include/asm include/linux/version.h include/config/MARKER
92
93include/asm-ia64/offsets.h: arch/ia64/kernel/asm-offsets.s
94 $(call filechk,gen-asm-offsets)
95
96arch/ia64/kernel/asm-offsets.s: include/asm-ia64/.offsets.h.stamp
97
98include/asm-ia64/.offsets.h.stamp:
99 mkdir -p include/asm-ia64
100 [ -s include/asm-ia64/offsets.h ] \
101 || echo "#define IA64_TASK_SIZE 0" > include/asm-ia64/offsets.h
102 touch $@
103
104boot: lib/lib.a vmlinux
105 $(Q)$(MAKE) $(build)=$(boot) $@
106
107install: vmlinux.gz
108 sh $(srctree)/arch/ia64/install.sh $(KERNELRELEASE) $< System.map "$(INSTALL_PATH)"
109
110define archhelp
111 echo '* compressed - Build compressed kernel image'
112 echo ' install - Install compressed kernel image'
113 echo ' boot - Build vmlinux and bootloader for Ski simulator'
114 echo '* unwcheck - Check vmlinux for invalid unwind info'
115endef
diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig
new file mode 100644
index 000000000000..b95fcf86ea00
--- /dev/null
+++ b/arch/ia64/configs/bigsur_defconfig
@@ -0,0 +1,1172 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.10-rc2
4# Mon Nov 29 13:27:48 2004
5#
6
7#
8# Code maturity level options
9#
10CONFIG_EXPERIMENTAL=y
11CONFIG_CLEAN_COMPILE=y
12CONFIG_LOCK_KERNEL=y
13
14#
15# General setup
16#
17CONFIG_LOCALVERSION=""
18CONFIG_SWAP=y
19CONFIG_SYSVIPC=y
20CONFIG_POSIX_MQUEUE=y
21# CONFIG_BSD_PROCESS_ACCT is not set
22CONFIG_SYSCTL=y
23# CONFIG_AUDIT is not set
24CONFIG_LOG_BUF_SHIFT=16
25CONFIG_HOTPLUG=y
26CONFIG_KOBJECT_UEVENT=y
27# CONFIG_IKCONFIG is not set
28# CONFIG_EMBEDDED is not set
29CONFIG_KALLSYMS=y
30# CONFIG_KALLSYMS_ALL is not set
31# CONFIG_KALLSYMS_EXTRA_PASS is not set
32CONFIG_FUTEX=y
33CONFIG_EPOLL=y
34# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
35CONFIG_SHMEM=y
36CONFIG_CC_ALIGN_FUNCTIONS=0
37CONFIG_CC_ALIGN_LABELS=0
38CONFIG_CC_ALIGN_LOOPS=0
39CONFIG_CC_ALIGN_JUMPS=0
40# CONFIG_TINY_SHMEM is not set
41
42#
43# Loadable module support
44#
45CONFIG_MODULES=y
46CONFIG_MODULE_UNLOAD=y
47# CONFIG_MODULE_FORCE_UNLOAD is not set
48CONFIG_OBSOLETE_MODPARM=y
49# CONFIG_MODVERSIONS is not set
50# CONFIG_MODULE_SRCVERSION_ALL is not set
51CONFIG_KMOD=y
52CONFIG_STOP_MACHINE=y
53
54#
55# Processor type and features
56#
57CONFIG_IA64=y
58CONFIG_64BIT=y
59CONFIG_MMU=y
60CONFIG_RWSEM_XCHGADD_ALGORITHM=y
61CONFIG_TIME_INTERPOLATION=y
62CONFIG_EFI=y
63CONFIG_GENERIC_IOMAP=y
64# CONFIG_IA64_GENERIC is not set
65CONFIG_IA64_DIG=y
66# CONFIG_IA64_HP_ZX1 is not set
67# CONFIG_IA64_SGI_SN2 is not set
68# CONFIG_IA64_HP_SIM is not set
69CONFIG_ITANIUM=y
70# CONFIG_MCKINLEY is not set
71# CONFIG_IA64_PAGE_SIZE_4KB is not set
72# CONFIG_IA64_PAGE_SIZE_8KB is not set
73CONFIG_IA64_PAGE_SIZE_16KB=y
74# CONFIG_IA64_PAGE_SIZE_64KB is not set
75CONFIG_IA64_BRL_EMU=y
76CONFIG_IA64_L1_CACHE_SHIFT=6
77# CONFIG_NUMA is not set
78# CONFIG_VIRTUAL_MEM_MAP is not set
79# CONFIG_IA64_CYCLONE is not set
80CONFIG_IOSAPIC=y
81CONFIG_FORCE_MAX_ZONEORDER=18
82CONFIG_SMP=y
83CONFIG_NR_CPUS=2
84# CONFIG_HOTPLUG_CPU is not set
85CONFIG_PREEMPT=y
86CONFIG_HAVE_DEC_LOCK=y
87CONFIG_IA32_SUPPORT=y
88CONFIG_COMPAT=y
89# CONFIG_IA64_MCA_RECOVERY is not set
90CONFIG_PERFMON=y
91CONFIG_IA64_PALINFO=y
92
93#
94# Firmware Drivers
95#
96CONFIG_EFI_VARS=y
97CONFIG_EFI_PCDP=y
98CONFIG_BINFMT_ELF=y
99CONFIG_BINFMT_MISC=m
100
101#
102# Power management and ACPI
103#
104CONFIG_PM=y
105CONFIG_ACPI=y
106
107#
108# ACPI (Advanced Configuration and Power Interface) Support
109#
110CONFIG_ACPI_BOOT=y
111CONFIG_ACPI_INTERPRETER=y
112CONFIG_ACPI_BUTTON=m
113CONFIG_ACPI_VIDEO=m
114CONFIG_ACPI_FAN=m
115CONFIG_ACPI_PROCESSOR=m
116CONFIG_ACPI_THERMAL=m
117CONFIG_ACPI_BLACKLIST_YEAR=0
118# CONFIG_ACPI_DEBUG is not set
119CONFIG_ACPI_BUS=y
120CONFIG_ACPI_POWER=y
121CONFIG_ACPI_PCI=y
122CONFIG_ACPI_SYSTEM=y
123
124#
125# Bus options (PCI, PCMCIA)
126#
127CONFIG_PCI=y
128CONFIG_PCI_DOMAINS=y
129# CONFIG_PCI_MSI is not set
130CONFIG_PCI_LEGACY_PROC=y
131CONFIG_PCI_NAMES=y
132
133#
134# PCI Hotplug Support
135#
136# CONFIG_HOTPLUG_PCI is not set
137
138#
139# PCCARD (PCMCIA/CardBus) support
140#
141# CONFIG_PCCARD is not set
142
143#
144# PC-card bridges
145#
146
147#
148# Device Drivers
149#
150
151#
152# Generic Driver Options
153#
154CONFIG_STANDALONE=y
155CONFIG_PREVENT_FIRMWARE_BUILD=y
156# CONFIG_FW_LOADER is not set
157# CONFIG_DEBUG_DRIVER is not set
158
159#
160# Memory Technology Devices (MTD)
161#
162# CONFIG_MTD is not set
163
164#
165# Parallel port support
166#
167# CONFIG_PARPORT is not set
168
169#
170# Plug and Play support
171#
172# CONFIG_PNP is not set
173
174#
175# Block devices
176#
177# CONFIG_BLK_CPQ_DA is not set
178# CONFIG_BLK_CPQ_CISS_DA is not set
179# CONFIG_BLK_DEV_DAC960 is not set
180# CONFIG_BLK_DEV_UMEM is not set
181CONFIG_BLK_DEV_LOOP=m
182CONFIG_BLK_DEV_CRYPTOLOOP=m
183CONFIG_BLK_DEV_NBD=m
184# CONFIG_BLK_DEV_SX8 is not set
185# CONFIG_BLK_DEV_UB is not set
186CONFIG_BLK_DEV_RAM=m
187CONFIG_BLK_DEV_RAM_SIZE=4096
188CONFIG_INITRAMFS_SOURCE=""
189# CONFIG_CDROM_PKTCDVD is not set
190
191#
192# IO Schedulers
193#
194CONFIG_IOSCHED_NOOP=y
195CONFIG_IOSCHED_AS=y
196CONFIG_IOSCHED_DEADLINE=y
197CONFIG_IOSCHED_CFQ=y
198
199#
200# ATA/ATAPI/MFM/RLL support
201#
202CONFIG_IDE=m
203CONFIG_BLK_DEV_IDE=m
204
205#
206# Please see Documentation/ide.txt for help/info on IDE drives
207#
208# CONFIG_BLK_DEV_IDE_SATA is not set
209CONFIG_BLK_DEV_IDEDISK=m
210# CONFIG_IDEDISK_MULTI_MODE is not set
211CONFIG_BLK_DEV_IDECD=m
212# CONFIG_BLK_DEV_IDETAPE is not set
213CONFIG_BLK_DEV_IDEFLOPPY=m
214# CONFIG_BLK_DEV_IDESCSI is not set
215# CONFIG_IDE_TASK_IOCTL is not set
216
217#
218# IDE chipset support/bugfixes
219#
220CONFIG_IDE_GENERIC=m
221CONFIG_BLK_DEV_IDEPCI=y
222CONFIG_IDEPCI_SHARE_IRQ=y
223# CONFIG_BLK_DEV_OFFBOARD is not set
224CONFIG_BLK_DEV_GENERIC=m
225# CONFIG_BLK_DEV_OPTI621 is not set
226CONFIG_BLK_DEV_IDEDMA_PCI=y
227# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
228CONFIG_IDEDMA_PCI_AUTO=y
229# CONFIG_IDEDMA_ONLYDISK is not set
230# CONFIG_BLK_DEV_AEC62XX is not set
231# CONFIG_BLK_DEV_ALI15X3 is not set
232# CONFIG_BLK_DEV_AMD74XX is not set
233# CONFIG_BLK_DEV_CMD64X is not set
234# CONFIG_BLK_DEV_TRIFLEX is not set
235# CONFIG_BLK_DEV_CY82C693 is not set
236# CONFIG_BLK_DEV_CS5520 is not set
237# CONFIG_BLK_DEV_CS5530 is not set
238# CONFIG_BLK_DEV_HPT34X is not set
239# CONFIG_BLK_DEV_HPT366 is not set
240# CONFIG_BLK_DEV_SC1200 is not set
241CONFIG_BLK_DEV_PIIX=m
242# CONFIG_BLK_DEV_NS87415 is not set
243# CONFIG_BLK_DEV_PDC202XX_OLD is not set
244# CONFIG_BLK_DEV_PDC202XX_NEW is not set
245# CONFIG_BLK_DEV_SVWKS is not set
246# CONFIG_BLK_DEV_SIIMAGE is not set
247# CONFIG_BLK_DEV_SLC90E66 is not set
248# CONFIG_BLK_DEV_TRM290 is not set
249# CONFIG_BLK_DEV_VIA82CXXX is not set
250# CONFIG_IDE_ARM is not set
251CONFIG_BLK_DEV_IDEDMA=y
252# CONFIG_IDEDMA_IVB is not set
253CONFIG_IDEDMA_AUTO=y
254# CONFIG_BLK_DEV_HD is not set
255
256#
257# SCSI device support
258#
259CONFIG_SCSI=y
260CONFIG_SCSI_PROC_FS=y
261
262#
263# SCSI support type (disk, tape, CD-ROM)
264#
265CONFIG_BLK_DEV_SD=y
266# CONFIG_CHR_DEV_ST is not set
267# CONFIG_CHR_DEV_OSST is not set
268# CONFIG_BLK_DEV_SR is not set
269# CONFIG_CHR_DEV_SG is not set
270
271#
272# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
273#
274# CONFIG_SCSI_MULTI_LUN is not set
275CONFIG_SCSI_CONSTANTS=y
276CONFIG_SCSI_LOGGING=y
277
278#
279# SCSI Transport Attributes
280#
281CONFIG_SCSI_SPI_ATTRS=m
282# CONFIG_SCSI_FC_ATTRS is not set
283
284#
285# SCSI low-level drivers
286#
287# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
288# CONFIG_SCSI_3W_9XXX is not set
289# CONFIG_SCSI_ACARD is not set
290# CONFIG_SCSI_AACRAID is not set
291# CONFIG_SCSI_AIC7XXX is not set
292# CONFIG_SCSI_AIC7XXX_OLD is not set
293# CONFIG_SCSI_AIC79XX is not set
294# CONFIG_MEGARAID_NEWGEN is not set
295# CONFIG_MEGARAID_LEGACY is not set
296# CONFIG_SCSI_SATA is not set
297# CONFIG_SCSI_BUSLOGIC is not set
298# CONFIG_SCSI_DMX3191D is not set
299# CONFIG_SCSI_EATA is not set
300# CONFIG_SCSI_EATA_PIO is not set
301# CONFIG_SCSI_FUTURE_DOMAIN is not set
302# CONFIG_SCSI_GDTH is not set
303# CONFIG_SCSI_IPS is not set
304# CONFIG_SCSI_INITIO is not set
305# CONFIG_SCSI_INIA100 is not set
306# CONFIG_SCSI_SYM53C8XX_2 is not set
307# CONFIG_SCSI_IPR is not set
308# CONFIG_SCSI_QLOGIC_ISP is not set
309# CONFIG_SCSI_QLOGIC_FC is not set
310CONFIG_SCSI_QLOGIC_1280=y
311# CONFIG_SCSI_QLOGIC_1280_1040 is not set
312CONFIG_SCSI_QLA2XXX=y
313# CONFIG_SCSI_QLA21XX is not set
314# CONFIG_SCSI_QLA22XX is not set
315# CONFIG_SCSI_QLA2300 is not set
316# CONFIG_SCSI_QLA2322 is not set
317# CONFIG_SCSI_QLA6312 is not set
318# CONFIG_SCSI_QLA6322 is not set
319# CONFIG_SCSI_DC395x is not set
320# CONFIG_SCSI_DC390T is not set
321# CONFIG_SCSI_DEBUG is not set
322
323#
324# Multi-device support (RAID and LVM)
325#
326CONFIG_MD=y
327CONFIG_BLK_DEV_MD=m
328CONFIG_MD_LINEAR=m
329CONFIG_MD_RAID0=m
330CONFIG_MD_RAID1=m
331CONFIG_MD_RAID10=m
332CONFIG_MD_RAID5=m
333CONFIG_MD_RAID6=m
334CONFIG_MD_MULTIPATH=m
335# CONFIG_MD_FAULTY is not set
336CONFIG_BLK_DEV_DM=m
337CONFIG_DM_CRYPT=m
338CONFIG_DM_SNAPSHOT=m
339CONFIG_DM_MIRROR=m
340CONFIG_DM_ZERO=m
341
342#
343# Fusion MPT device support
344#
345# CONFIG_FUSION is not set
346
347#
348# IEEE 1394 (FireWire) support
349#
350# CONFIG_IEEE1394 is not set
351
352#
353# I2O device support
354#
355# CONFIG_I2O is not set
356
357#
358# Networking support
359#
360CONFIG_NET=y
361
362#
363# Networking options
364#
365CONFIG_PACKET=y
366CONFIG_PACKET_MMAP=y
367# CONFIG_NETLINK_DEV is not set
368CONFIG_UNIX=y
369# CONFIG_NET_KEY is not set
370CONFIG_INET=y
371# CONFIG_IP_MULTICAST is not set
372# CONFIG_IP_ADVANCED_ROUTER is not set
373# CONFIG_IP_PNP is not set
374# CONFIG_NET_IPIP is not set
375# CONFIG_NET_IPGRE is not set
376# CONFIG_ARPD is not set
377# CONFIG_SYN_COOKIES is not set
378# CONFIG_INET_AH is not set
379# CONFIG_INET_ESP is not set
380# CONFIG_INET_IPCOMP is not set
381# CONFIG_INET_TUNNEL is not set
382CONFIG_IP_TCPDIAG=y
383# CONFIG_IP_TCPDIAG_IPV6 is not set
384# CONFIG_IPV6 is not set
385# CONFIG_NETFILTER is not set
386
387#
388# SCTP Configuration (EXPERIMENTAL)
389#
390# CONFIG_IP_SCTP is not set
391# CONFIG_ATM is not set
392# CONFIG_BRIDGE is not set
393# CONFIG_VLAN_8021Q is not set
394# CONFIG_DECNET is not set
395# CONFIG_LLC2 is not set
396# CONFIG_IPX is not set
397# CONFIG_ATALK is not set
398# CONFIG_X25 is not set
399# CONFIG_LAPB is not set
400# CONFIG_NET_DIVERT is not set
401# CONFIG_ECONET is not set
402# CONFIG_WAN_ROUTER is not set
403
404#
405# QoS and/or fair queueing
406#
407# CONFIG_NET_SCHED is not set
408# CONFIG_NET_CLS_ROUTE is not set
409
410#
411# Network testing
412#
413# CONFIG_NET_PKTGEN is not set
414# CONFIG_NETPOLL is not set
415# CONFIG_NET_POLL_CONTROLLER is not set
416# CONFIG_HAMRADIO is not set
417# CONFIG_IRDA is not set
418# CONFIG_BT is not set
419CONFIG_NETDEVICES=y
420CONFIG_DUMMY=y
421# CONFIG_BONDING is not set
422# CONFIG_EQUALIZER is not set
423# CONFIG_TUN is not set
424
425#
426# ARCnet devices
427#
428# CONFIG_ARCNET is not set
429
430#
431# Ethernet (10 or 100Mbit)
432#
433CONFIG_NET_ETHERNET=y
434CONFIG_MII=y
435# CONFIG_HAPPYMEAL is not set
436# CONFIG_SUNGEM is not set
437# CONFIG_NET_VENDOR_3COM is not set
438
439#
440# Tulip family network device support
441#
442# CONFIG_NET_TULIP is not set
443# CONFIG_HP100 is not set
444CONFIG_NET_PCI=y
445# CONFIG_PCNET32 is not set
446# CONFIG_AMD8111_ETH is not set
447# CONFIG_ADAPTEC_STARFIRE is not set
448# CONFIG_B44 is not set
449# CONFIG_FORCEDETH is not set
450# CONFIG_DGRS is not set
451CONFIG_EEPRO100=y
452# CONFIG_EEPRO100_PIO is not set
453# CONFIG_E100 is not set
454# CONFIG_FEALNX is not set
455# CONFIG_NATSEMI is not set
456# CONFIG_NE2K_PCI is not set
457# CONFIG_8139CP is not set
458# CONFIG_8139TOO is not set
459# CONFIG_SIS900 is not set
460# CONFIG_EPIC100 is not set
461# CONFIG_SUNDANCE is not set
462# CONFIG_VIA_RHINE is not set
463
464#
465# Ethernet (1000 Mbit)
466#
467# CONFIG_ACENIC is not set
468# CONFIG_DL2K is not set
469# CONFIG_E1000 is not set
470# CONFIG_NS83820 is not set
471# CONFIG_HAMACHI is not set
472# CONFIG_YELLOWFIN is not set
473# CONFIG_R8169 is not set
474# CONFIG_SK98LIN is not set
475# CONFIG_VIA_VELOCITY is not set
476# CONFIG_TIGON3 is not set
477
478#
479# Ethernet (10000 Mbit)
480#
481# CONFIG_IXGB is not set
482# CONFIG_S2IO is not set
483
484#
485# Token Ring devices
486#
487# CONFIG_TR is not set
488
489#
490# Wireless LAN (non-hamradio)
491#
492# CONFIG_NET_RADIO is not set
493
494#
495# Wan interfaces
496#
497# CONFIG_WAN is not set
498# CONFIG_FDDI is not set
499# CONFIG_HIPPI is not set
500# CONFIG_PPP is not set
501# CONFIG_SLIP is not set
502# CONFIG_NET_FC is not set
503# CONFIG_SHAPER is not set
504# CONFIG_NETCONSOLE is not set
505
506#
507# ISDN subsystem
508#
509# CONFIG_ISDN is not set
510
511#
512# Telephony Support
513#
514# CONFIG_PHONE is not set
515
516#
517# Input device support
518#
519CONFIG_INPUT=y
520
521#
522# Userland interfaces
523#
524CONFIG_INPUT_MOUSEDEV=y
525CONFIG_INPUT_MOUSEDEV_PSAUX=y
526CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
527CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
528# CONFIG_INPUT_JOYDEV is not set
529# CONFIG_INPUT_TSDEV is not set
530CONFIG_INPUT_EVDEV=y
531# CONFIG_INPUT_EVBUG is not set
532
533#
534# Input I/O drivers
535#
536# CONFIG_GAMEPORT is not set
537CONFIG_SOUND_GAMEPORT=y
538CONFIG_SERIO=y
539CONFIG_SERIO_I8042=y
540CONFIG_SERIO_SERPORT=y
541# CONFIG_SERIO_CT82C710 is not set
542# CONFIG_SERIO_PCIPS2 is not set
543# CONFIG_SERIO_RAW is not set
544
545#
546# Input Device Drivers
547#
548CONFIG_INPUT_KEYBOARD=y
549CONFIG_KEYBOARD_ATKBD=y
550# CONFIG_KEYBOARD_SUNKBD is not set
551# CONFIG_KEYBOARD_LKKBD is not set
552# CONFIG_KEYBOARD_XTKBD is not set
553# CONFIG_KEYBOARD_NEWTON is not set
554CONFIG_INPUT_MOUSE=y
555CONFIG_MOUSE_PS2=y
556# CONFIG_MOUSE_SERIAL is not set
557# CONFIG_MOUSE_VSXXXAA is not set
558# CONFIG_INPUT_JOYSTICK is not set
559# CONFIG_INPUT_TOUCHSCREEN is not set
560# CONFIG_INPUT_MISC is not set
561
562#
563# Character devices
564#
565CONFIG_VT=y
566CONFIG_VT_CONSOLE=y
567CONFIG_HW_CONSOLE=y
568# CONFIG_SERIAL_NONSTANDARD is not set
569
570#
571# Serial drivers
572#
573CONFIG_SERIAL_8250=y
574CONFIG_SERIAL_8250_CONSOLE=y
575CONFIG_SERIAL_8250_ACPI=y
576CONFIG_SERIAL_8250_NR_UARTS=4
577CONFIG_SERIAL_8250_EXTENDED=y
578CONFIG_SERIAL_8250_SHARE_IRQ=y
579# CONFIG_SERIAL_8250_DETECT_IRQ is not set
580# CONFIG_SERIAL_8250_MULTIPORT is not set
581# CONFIG_SERIAL_8250_RSA is not set
582
583#
584# Non-8250 serial port support
585#
586CONFIG_SERIAL_CORE=y
587CONFIG_SERIAL_CORE_CONSOLE=y
588CONFIG_UNIX98_PTYS=y
589CONFIG_LEGACY_PTYS=y
590CONFIG_LEGACY_PTY_COUNT=256
591
592#
593# IPMI
594#
595# CONFIG_IPMI_HANDLER is not set
596
597#
598# Watchdog Cards
599#
600# CONFIG_WATCHDOG is not set
601# CONFIG_HW_RANDOM is not set
602CONFIG_EFI_RTC=y
603# CONFIG_DTLK is not set
604# CONFIG_R3964 is not set
605# CONFIG_APPLICOM is not set
606
607#
608# Ftape, the floppy tape device driver
609#
610CONFIG_AGP=m
611CONFIG_AGP_I460=m
612CONFIG_DRM=y
613# CONFIG_DRM_TDFX is not set
614CONFIG_DRM_R128=m
615# CONFIG_DRM_RADEON is not set
616# CONFIG_DRM_MGA is not set
617# CONFIG_DRM_SIS is not set
618# CONFIG_RAW_DRIVER is not set
619# CONFIG_HPET is not set
620
621#
622# I2C support
623#
624CONFIG_I2C=y
625CONFIG_I2C_CHARDEV=y
626
627#
628# I2C Algorithms
629#
630CONFIG_I2C_ALGOBIT=y
631# CONFIG_I2C_ALGOPCF is not set
632# CONFIG_I2C_ALGOPCA is not set
633
634#
635# I2C Hardware Bus support
636#
637# CONFIG_I2C_ALI1535 is not set
638# CONFIG_I2C_ALI1563 is not set
639# CONFIG_I2C_ALI15X3 is not set
640# CONFIG_I2C_AMD756 is not set
641# CONFIG_I2C_AMD8111 is not set
642# CONFIG_I2C_I801 is not set
643# CONFIG_I2C_I810 is not set
644# CONFIG_I2C_ISA is not set
645# CONFIG_I2C_NFORCE2 is not set
646# CONFIG_I2C_PARPORT_LIGHT is not set
647# CONFIG_I2C_PROSAVAGE is not set
648# CONFIG_I2C_SAVAGE4 is not set
649# CONFIG_SCx200_ACB is not set
650# CONFIG_I2C_SIS5595 is not set
651# CONFIG_I2C_SIS630 is not set
652# CONFIG_I2C_SIS96X is not set
653# CONFIG_I2C_STUB is not set
654# CONFIG_I2C_VIA is not set
655# CONFIG_I2C_VIAPRO is not set
656# CONFIG_I2C_VOODOO3 is not set
657# CONFIG_I2C_PCA_ISA is not set
658
659#
660# Hardware Sensors Chip support
661#
662# CONFIG_I2C_SENSOR is not set
663# CONFIG_SENSORS_ADM1021 is not set
664# CONFIG_SENSORS_ADM1025 is not set
665# CONFIG_SENSORS_ADM1031 is not set
666# CONFIG_SENSORS_ASB100 is not set
667# CONFIG_SENSORS_DS1621 is not set
668# CONFIG_SENSORS_FSCHER is not set
669# CONFIG_SENSORS_GL518SM is not set
670# CONFIG_SENSORS_IT87 is not set
671# CONFIG_SENSORS_LM63 is not set
672# CONFIG_SENSORS_LM75 is not set
673# CONFIG_SENSORS_LM77 is not set
674# CONFIG_SENSORS_LM78 is not set
675# CONFIG_SENSORS_LM80 is not set
676# CONFIG_SENSORS_LM83 is not set
677# CONFIG_SENSORS_LM85 is not set
678# CONFIG_SENSORS_LM87 is not set
679# CONFIG_SENSORS_LM90 is not set
680# CONFIG_SENSORS_MAX1619 is not set
681# CONFIG_SENSORS_PC87360 is not set
682# CONFIG_SENSORS_SMSC47M1 is not set
683# CONFIG_SENSORS_VIA686A is not set
684# CONFIG_SENSORS_W83781D is not set
685# CONFIG_SENSORS_W83L785TS is not set
686# CONFIG_SENSORS_W83627HF is not set
687
688#
689# Other I2C Chip support
690#
691# CONFIG_SENSORS_EEPROM is not set
692# CONFIG_SENSORS_PCF8574 is not set
693# CONFIG_SENSORS_PCF8591 is not set
694# CONFIG_SENSORS_RTC8564 is not set
695# CONFIG_I2C_DEBUG_CORE is not set
696# CONFIG_I2C_DEBUG_ALGO is not set
697# CONFIG_I2C_DEBUG_BUS is not set
698# CONFIG_I2C_DEBUG_CHIP is not set
699
700#
701# Dallas's 1-wire bus
702#
703# CONFIG_W1 is not set
704
705#
706# Misc devices
707#
708
709#
710# Multimedia devices
711#
712# CONFIG_VIDEO_DEV is not set
713
714#
715# Digital Video Broadcasting Devices
716#
717# CONFIG_DVB is not set
718
719#
720# Graphics support
721#
722# CONFIG_FB is not set
723
724#
725# Console display driver support
726#
727CONFIG_VGA_CONSOLE=y
728CONFIG_DUMMY_CONSOLE=y
729
730#
731# Sound
732#
733CONFIG_SOUND=m
734
735#
736# Advanced Linux Sound Architecture
737#
738CONFIG_SND=m
739CONFIG_SND_TIMER=m
740CONFIG_SND_PCM=m
741CONFIG_SND_HWDEP=m
742CONFIG_SND_RAWMIDI=m
743CONFIG_SND_SEQUENCER=m
744# CONFIG_SND_SEQ_DUMMY is not set
745CONFIG_SND_OSSEMUL=y
746CONFIG_SND_MIXER_OSS=m
747CONFIG_SND_PCM_OSS=m
748# CONFIG_SND_SEQUENCER_OSS is not set
749# CONFIG_SND_VERBOSE_PRINTK is not set
750# CONFIG_SND_DEBUG is not set
751
752#
753# Generic devices
754#
755CONFIG_SND_OPL3_LIB=m
756# CONFIG_SND_DUMMY is not set
757# CONFIG_SND_VIRMIDI is not set
758# CONFIG_SND_MTPAV is not set
759# CONFIG_SND_SERIAL_U16550 is not set
760# CONFIG_SND_MPU401 is not set
761
762#
763# PCI devices
764#
765CONFIG_SND_AC97_CODEC=m
766# CONFIG_SND_ALI5451 is not set
767# CONFIG_SND_ATIIXP is not set
768# CONFIG_SND_ATIIXP_MODEM is not set
769# CONFIG_SND_AU8810 is not set
770# CONFIG_SND_AU8820 is not set
771# CONFIG_SND_AU8830 is not set
772# CONFIG_SND_AZT3328 is not set
773# CONFIG_SND_BT87X is not set
774# CONFIG_SND_CS46XX is not set
775CONFIG_SND_CS4281=m
776# CONFIG_SND_EMU10K1 is not set
777# CONFIG_SND_KORG1212 is not set
778# CONFIG_SND_MIXART is not set
779# CONFIG_SND_NM256 is not set
780# CONFIG_SND_RME32 is not set
781# CONFIG_SND_RME96 is not set
782# CONFIG_SND_RME9652 is not set
783# CONFIG_SND_HDSP is not set
784# CONFIG_SND_TRIDENT is not set
785# CONFIG_SND_YMFPCI is not set
786# CONFIG_SND_ALS4000 is not set
787# CONFIG_SND_CMIPCI is not set
788# CONFIG_SND_ENS1370 is not set
789# CONFIG_SND_ENS1371 is not set
790# CONFIG_SND_ES1938 is not set
791# CONFIG_SND_ES1968 is not set
792# CONFIG_SND_MAESTRO3 is not set
793# CONFIG_SND_FM801 is not set
794# CONFIG_SND_ICE1712 is not set
795# CONFIG_SND_ICE1724 is not set
796# CONFIG_SND_INTEL8X0 is not set
797# CONFIG_SND_INTEL8X0M is not set
798# CONFIG_SND_SONICVIBES is not set
799# CONFIG_SND_VIA82XX is not set
800# CONFIG_SND_VX222 is not set
801
802#
803# USB devices
804#
805# CONFIG_SND_USB_AUDIO is not set
806# CONFIG_SND_USB_USX2Y is not set
807
808#
809# Open Sound System
810#
811# CONFIG_SOUND_PRIME is not set
812
813#
814# USB support
815#
816CONFIG_USB=m
817# CONFIG_USB_DEBUG is not set
818
819#
820# Miscellaneous USB options
821#
822CONFIG_USB_DEVICEFS=y
823# CONFIG_USB_BANDWIDTH is not set
824# CONFIG_USB_DYNAMIC_MINORS is not set
825# CONFIG_USB_SUSPEND is not set
826# CONFIG_USB_OTG is not set
827CONFIG_USB_ARCH_HAS_HCD=y
828CONFIG_USB_ARCH_HAS_OHCI=y
829
830#
831# USB Host Controller Drivers
832#
833# CONFIG_USB_EHCI_HCD is not set
834# CONFIG_USB_OHCI_HCD is not set
835CONFIG_USB_UHCI_HCD=m
836
837#
838# USB Device Class drivers
839#
840CONFIG_USB_AUDIO=m
841CONFIG_USB_BLUETOOTH_TTY=m
842CONFIG_USB_MIDI=m
843CONFIG_USB_ACM=m
844CONFIG_USB_PRINTER=m
845CONFIG_USB_STORAGE=m
846# CONFIG_USB_STORAGE_DEBUG is not set
847# CONFIG_USB_STORAGE_RW_DETECT is not set
848# CONFIG_USB_STORAGE_DATAFAB is not set
849# CONFIG_USB_STORAGE_FREECOM is not set
850# CONFIG_USB_STORAGE_ISD200 is not set
851# CONFIG_USB_STORAGE_DPCM is not set
852# CONFIG_USB_STORAGE_HP8200e is not set
853# CONFIG_USB_STORAGE_SDDR09 is not set
854# CONFIG_USB_STORAGE_SDDR55 is not set
855# CONFIG_USB_STORAGE_JUMPSHOT is not set
856
857#
858# USB Input Devices
859#
860CONFIG_USB_HID=m
861CONFIG_USB_HIDINPUT=y
862# CONFIG_HID_FF is not set
863CONFIG_USB_HIDDEV=y
864
865#
866# USB HID Boot Protocol drivers
867#
868# CONFIG_USB_KBD is not set
869# CONFIG_USB_MOUSE is not set
870# CONFIG_USB_AIPTEK is not set
871# CONFIG_USB_WACOM is not set
872# CONFIG_USB_KBTAB is not set
873# CONFIG_USB_POWERMATE is not set
874# CONFIG_USB_MTOUCH is not set
875# CONFIG_USB_EGALAX is not set
876# CONFIG_USB_XPAD is not set
877# CONFIG_USB_ATI_REMOTE is not set
878
879#
880# USB Imaging devices
881#
882# CONFIG_USB_MDC800 is not set
883# CONFIG_USB_MICROTEK is not set
884# CONFIG_USB_HPUSBSCSI is not set
885
886#
887# USB Multimedia devices
888#
889# CONFIG_USB_DABUSB is not set
890
891#
892# Video4Linux support is needed for USB Multimedia device support
893#
894
895#
896# USB Network Adapters
897#
898# CONFIG_USB_CATC is not set
899# CONFIG_USB_KAWETH is not set
900# CONFIG_USB_PEGASUS is not set
901# CONFIG_USB_RTL8150 is not set
902# CONFIG_USB_USBNET is not set
903
904#
905# USB port drivers
906#
907
908#
909# USB Serial Converter support
910#
911# CONFIG_USB_SERIAL is not set
912
913#
914# USB Miscellaneous drivers
915#
916# CONFIG_USB_EMI62 is not set
917# CONFIG_USB_EMI26 is not set
918# CONFIG_USB_TIGL is not set
919# CONFIG_USB_AUERSWALD is not set
920# CONFIG_USB_RIO500 is not set
921# CONFIG_USB_LEGOTOWER is not set
922# CONFIG_USB_LCD is not set
923# CONFIG_USB_LED is not set
924# CONFIG_USB_CYTHERM is not set
925# CONFIG_USB_PHIDGETKIT is not set
926# CONFIG_USB_PHIDGETSERVO is not set
927# CONFIG_USB_TEST is not set
928
929#
930# USB ATM/DSL drivers
931#
932
933#
934# USB Gadget Support
935#
936# CONFIG_USB_GADGET is not set
937
938#
939# File systems
940#
941CONFIG_EXT2_FS=y
942# CONFIG_EXT2_FS_XATTR is not set
943CONFIG_EXT3_FS=y
944CONFIG_EXT3_FS_XATTR=y
945# CONFIG_EXT3_FS_POSIX_ACL is not set
946# CONFIG_EXT3_FS_SECURITY is not set
947CONFIG_JBD=y
948# CONFIG_JBD_DEBUG is not set
949CONFIG_FS_MBCACHE=y
950# CONFIG_REISERFS_FS is not set
951# CONFIG_JFS_FS is not set
952CONFIG_FS_POSIX_ACL=y
953CONFIG_XFS_FS=y
954# CONFIG_XFS_RT is not set
955CONFIG_XFS_QUOTA=y
956CONFIG_XFS_SECURITY=y
957CONFIG_XFS_POSIX_ACL=y
958# CONFIG_MINIX_FS is not set
959# CONFIG_ROMFS_FS is not set
960# CONFIG_QUOTA is not set
961CONFIG_QUOTACTL=y
962CONFIG_DNOTIFY=y
963CONFIG_AUTOFS_FS=m
964CONFIG_AUTOFS4_FS=m
965
966#
967# CD-ROM/DVD Filesystems
968#
969CONFIG_ISO9660_FS=m
970CONFIG_JOLIET=y
971# CONFIG_ZISOFS is not set
972CONFIG_UDF_FS=m
973CONFIG_UDF_NLS=y
974
975#
976# DOS/FAT/NT Filesystems
977#
978CONFIG_FAT_FS=y
979# CONFIG_MSDOS_FS is not set
980CONFIG_VFAT_FS=y
981CONFIG_FAT_DEFAULT_CODEPAGE=437
982CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
983# CONFIG_NTFS_FS is not set
984
985#
986# Pseudo filesystems
987#
988CONFIG_PROC_FS=y
989CONFIG_PROC_KCORE=y
990CONFIG_SYSFS=y
991# CONFIG_DEVFS_FS is not set
992CONFIG_DEVPTS_FS_XATTR=y
993CONFIG_DEVPTS_FS_SECURITY=y
994CONFIG_TMPFS=y
995# CONFIG_TMPFS_XATTR is not set
996CONFIG_HUGETLBFS=y
997CONFIG_HUGETLB_PAGE=y
998CONFIG_RAMFS=y
999
1000#
1001# Miscellaneous filesystems
1002#
1003# CONFIG_ADFS_FS is not set
1004# CONFIG_AFFS_FS is not set
1005# CONFIG_HFS_FS is not set
1006# CONFIG_HFSPLUS_FS is not set
1007# CONFIG_BEFS_FS is not set
1008# CONFIG_BFS_FS is not set
1009# CONFIG_EFS_FS is not set
1010# CONFIG_CRAMFS is not set
1011# CONFIG_VXFS_FS is not set
1012# CONFIG_HPFS_FS is not set
1013# CONFIG_QNX4FS_FS is not set
1014# CONFIG_SYSV_FS is not set
1015# CONFIG_UFS_FS is not set
1016
1017#
1018# Network File Systems
1019#
1020CONFIG_NFS_FS=m
1021CONFIG_NFS_V3=y
1022CONFIG_NFS_V4=y
1023# CONFIG_NFS_DIRECTIO is not set
1024CONFIG_NFSD=m
1025CONFIG_NFSD_V3=y
1026CONFIG_NFSD_V4=y
1027CONFIG_NFSD_TCP=y
1028CONFIG_LOCKD=m
1029CONFIG_LOCKD_V4=y
1030CONFIG_EXPORTFS=m
1031CONFIG_SUNRPC=m
1032CONFIG_SUNRPC_GSS=m
1033CONFIG_RPCSEC_GSS_KRB5=m
1034# CONFIG_RPCSEC_GSS_SPKM3 is not set
1035# CONFIG_SMB_FS is not set
1036CONFIG_CIFS=m
1037CONFIG_CIFS_STATS=y
1038CONFIG_CIFS_XATTR=y
1039CONFIG_CIFS_POSIX=y
1040# CONFIG_NCP_FS is not set
1041# CONFIG_CODA_FS is not set
1042# CONFIG_AFS_FS is not set
1043
1044#
1045# Partition Types
1046#
1047CONFIG_PARTITION_ADVANCED=y
1048# CONFIG_ACORN_PARTITION is not set
1049# CONFIG_OSF_PARTITION is not set
1050# CONFIG_AMIGA_PARTITION is not set
1051# CONFIG_ATARI_PARTITION is not set
1052# CONFIG_MAC_PARTITION is not set
1053CONFIG_MSDOS_PARTITION=y
1054# CONFIG_BSD_DISKLABEL is not set
1055# CONFIG_MINIX_SUBPARTITION is not set
1056# CONFIG_SOLARIS_X86_PARTITION is not set
1057# CONFIG_UNIXWARE_DISKLABEL is not set
1058# CONFIG_LDM_PARTITION is not set
1059CONFIG_SGI_PARTITION=y
1060# CONFIG_ULTRIX_PARTITION is not set
1061# CONFIG_SUN_PARTITION is not set
1062CONFIG_EFI_PARTITION=y
1063
1064#
1065# Native Language Support
1066#
1067CONFIG_NLS=y
1068CONFIG_NLS_DEFAULT="iso8859-1"
1069CONFIG_NLS_CODEPAGE_437=y
1070# CONFIG_NLS_CODEPAGE_737 is not set
1071# CONFIG_NLS_CODEPAGE_775 is not set
1072# CONFIG_NLS_CODEPAGE_850 is not set
1073# CONFIG_NLS_CODEPAGE_852 is not set
1074# CONFIG_NLS_CODEPAGE_855 is not set
1075# CONFIG_NLS_CODEPAGE_857 is not set
1076# CONFIG_NLS_CODEPAGE_860 is not set
1077# CONFIG_NLS_CODEPAGE_861 is not set
1078# CONFIG_NLS_CODEPAGE_862 is not set
1079# CONFIG_NLS_CODEPAGE_863 is not set
1080# CONFIG_NLS_CODEPAGE_864 is not set
1081# CONFIG_NLS_CODEPAGE_865 is not set
1082# CONFIG_NLS_CODEPAGE_866 is not set
1083# CONFIG_NLS_CODEPAGE_869 is not set
1084# CONFIG_NLS_CODEPAGE_936 is not set
1085# CONFIG_NLS_CODEPAGE_950 is not set
1086# CONFIG_NLS_CODEPAGE_932 is not set
1087# CONFIG_NLS_CODEPAGE_949 is not set
1088# CONFIG_NLS_CODEPAGE_874 is not set
1089# CONFIG_NLS_ISO8859_8 is not set
1090# CONFIG_NLS_CODEPAGE_1250 is not set
1091# CONFIG_NLS_CODEPAGE_1251 is not set
1092# CONFIG_NLS_ASCII is not set
1093CONFIG_NLS_ISO8859_1=y
1094# CONFIG_NLS_ISO8859_2 is not set
1095# CONFIG_NLS_ISO8859_3 is not set
1096# CONFIG_NLS_ISO8859_4 is not set
1097# CONFIG_NLS_ISO8859_5 is not set
1098# CONFIG_NLS_ISO8859_6 is not set
1099# CONFIG_NLS_ISO8859_7 is not set
1100# CONFIG_NLS_ISO8859_9 is not set
1101# CONFIG_NLS_ISO8859_13 is not set
1102# CONFIG_NLS_ISO8859_14 is not set
1103# CONFIG_NLS_ISO8859_15 is not set
1104# CONFIG_NLS_KOI8_R is not set
1105# CONFIG_NLS_KOI8_U is not set
1106CONFIG_NLS_UTF8=m
1107
1108#
1109# Library routines
1110#
1111# CONFIG_CRC_CCITT is not set
1112CONFIG_CRC32=y
1113# CONFIG_LIBCRC32C is not set
1114
1115#
1116# Profiling support
1117#
1118CONFIG_PROFILING=y
1119CONFIG_OPROFILE=y
1120
1121#
1122# Kernel hacking
1123#
1124CONFIG_DEBUG_KERNEL=y
1125CONFIG_MAGIC_SYSRQ=y
1126# CONFIG_SCHEDSTATS is not set
1127# CONFIG_DEBUG_SLAB is not set
1128# CONFIG_DEBUG_SPINLOCK is not set
1129# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
1130# CONFIG_DEBUG_KOBJECT is not set
1131# CONFIG_DEBUG_INFO is not set
1132# CONFIG_IA64_GRANULE_16MB is not set
1133CONFIG_IA64_GRANULE_64MB=y
1134# CONFIG_IA64_PRINT_HAZARDS is not set
1135# CONFIG_DISABLE_VHPT is not set
1136# CONFIG_IA64_DEBUG_CMPXCHG is not set
1137# CONFIG_IA64_DEBUG_IRQ is not set
1138CONFIG_SYSVIPC_COMPAT=y
1139
1140#
1141# Security options
1142#
1143# CONFIG_KEYS is not set
1144# CONFIG_SECURITY is not set
1145
1146#
1147# Cryptographic options
1148#
1149CONFIG_CRYPTO=y
1150# CONFIG_CRYPTO_HMAC is not set
1151# CONFIG_CRYPTO_NULL is not set
1152# CONFIG_CRYPTO_MD4 is not set
1153CONFIG_CRYPTO_MD5=y
1154# CONFIG_CRYPTO_SHA1 is not set
1155# CONFIG_CRYPTO_SHA256 is not set
1156# CONFIG_CRYPTO_SHA512 is not set
1157# CONFIG_CRYPTO_WP512 is not set
1158CONFIG_CRYPTO_DES=y
1159# CONFIG_CRYPTO_BLOWFISH is not set
1160# CONFIG_CRYPTO_TWOFISH is not set
1161# CONFIG_CRYPTO_SERPENT is not set
1162# CONFIG_CRYPTO_AES is not set
1163# CONFIG_CRYPTO_CAST5 is not set
1164# CONFIG_CRYPTO_CAST6 is not set
1165# CONFIG_CRYPTO_TEA is not set
1166# CONFIG_CRYPTO_ARC4 is not set
1167# CONFIG_CRYPTO_KHAZAD is not set
1168# CONFIG_CRYPTO_ANUBIS is not set
1169# CONFIG_CRYPTO_DEFLATE is not set
1170# CONFIG_CRYPTO_MICHAEL_MIC is not set
1171# CONFIG_CRYPTO_CRC32C is not set
1172# CONFIG_CRYPTO_TEST is not set
diff --git a/arch/ia64/configs/sim_defconfig b/arch/ia64/configs/sim_defconfig
new file mode 100644
index 000000000000..a26781cfe8bf
--- /dev/null
+++ b/arch/ia64/configs/sim_defconfig
@@ -0,0 +1,534 @@
1#
2# Automatically generated make config: don't edit
3#
4
5#
6# Code maturity level options
7#
8CONFIG_EXPERIMENTAL=y
9# CONFIG_CLEAN_COMPILE is not set
10# CONFIG_STANDALONE is not set
11CONFIG_BROKEN=y
12CONFIG_BROKEN_ON_SMP=y
13
14#
15# General setup
16#
17CONFIG_SWAP=y
18CONFIG_SYSVIPC=y
19# CONFIG_POSIX_MQUEUE is not set
20# CONFIG_BSD_PROCESS_ACCT is not set
21CONFIG_SYSCTL=y
22# CONFIG_AUDIT is not set
23CONFIG_LOG_BUF_SHIFT=16
24# CONFIG_HOTPLUG is not set
25CONFIG_IKCONFIG=y
26CONFIG_IKCONFIG_PROC=y
27# CONFIG_EMBEDDED is not set
28CONFIG_KALLSYMS=y
29# CONFIG_KALLSYMS_ALL is not set
30CONFIG_FUTEX=y
31CONFIG_EPOLL=y
32CONFIG_IOSCHED_NOOP=y
33CONFIG_IOSCHED_AS=y
34CONFIG_IOSCHED_DEADLINE=y
35CONFIG_IOSCHED_CFQ=y
36# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
37
38#
39# Loadable module support
40#
41CONFIG_MODULES=y
42CONFIG_MODULE_UNLOAD=y
43CONFIG_MODULE_FORCE_UNLOAD=y
44CONFIG_OBSOLETE_MODPARM=y
45CONFIG_MODVERSIONS=y
46CONFIG_KMOD=y
47CONFIG_STOP_MACHINE=y
48
49#
50# Processor type and features
51#
52CONFIG_IA64=y
53CONFIG_64BIT=y
54CONFIG_MMU=y
55CONFIG_RWSEM_XCHGADD_ALGORITHM=y
56CONFIG_TIME_INTERPOLATION=y
57CONFIG_EFI=y
58# CONFIG_IA64_GENERIC is not set
59# CONFIG_IA64_DIG is not set
60# CONFIG_IA64_HP_ZX1 is not set
61# CONFIG_IA64_SGI_SN2 is not set
62CONFIG_IA64_HP_SIM=y
63# CONFIG_ITANIUM is not set
64CONFIG_MCKINLEY=y
65# CONFIG_IA64_PAGE_SIZE_4KB is not set
66# CONFIG_IA64_PAGE_SIZE_8KB is not set
67# CONFIG_IA64_PAGE_SIZE_16KB is not set
68CONFIG_IA64_PAGE_SIZE_64KB=y
69CONFIG_IA64_L1_CACHE_SHIFT=7
70# CONFIG_MCKINLEY_ASTEP_SPECIFIC is not set
71# CONFIG_VIRTUAL_MEM_MAP is not set
72# CONFIG_IA64_CYCLONE is not set
73CONFIG_FORCE_MAX_ZONEORDER=18
74CONFIG_SMP=y
75CONFIG_NR_CPUS=64
76CONFIG_PREEMPT=y
77CONFIG_HAVE_DEC_LOCK=y
78CONFIG_IA32_SUPPORT=y
79CONFIG_COMPAT=y
80# CONFIG_PERFMON is not set
81CONFIG_IA64_PALINFO=m
82
83#
84# Firmware Drivers
85#
86CONFIG_EFI_VARS=y
87# CONFIG_SMBIOS is not set
88CONFIG_BINFMT_ELF=y
89CONFIG_BINFMT_MISC=y
90
91#
92# Power management and ACPI
93#
94
95#
96# Device Drivers
97#
98
99#
100# Generic Driver Options
101#
102# CONFIG_DEBUG_DRIVER is not set
103
104#
105# Memory Technology Devices (MTD)
106#
107# CONFIG_MTD is not set
108
109#
110# Parallel port support
111#
112# CONFIG_PARPORT is not set
113
114#
115# Plug and Play support
116#
117
118#
119# Block devices
120#
121CONFIG_BLK_DEV_LOOP=y
122# CONFIG_BLK_DEV_CRYPTOLOOP is not set
123# CONFIG_BLK_DEV_NBD is not set
124CONFIG_BLK_DEV_RAM=y
125CONFIG_BLK_DEV_RAM_SIZE=4096
126# CONFIG_BLK_DEV_INITRD is not set
127
128#
129# ATA/ATAPI/MFM/RLL support
130#
131# CONFIG_IDE is not set
132
133#
134# SCSI device support
135#
136CONFIG_SCSI=y
137CONFIG_SCSI_PROC_FS=y
138
139#
140# SCSI support type (disk, tape, CD-ROM)
141#
142CONFIG_BLK_DEV_SD=y
143# CONFIG_CHR_DEV_ST is not set
144# CONFIG_CHR_DEV_OSST is not set
145# CONFIG_BLK_DEV_SR is not set
146# CONFIG_CHR_DEV_SG is not set
147
148#
149# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
150#
151CONFIG_SCSI_MULTI_LUN=y
152CONFIG_SCSI_CONSTANTS=y
153CONFIG_SCSI_LOGGING=y
154
155#
156# SCSI Transport Attributes
157#
158CONFIG_SCSI_SPI_ATTRS=y
159# CONFIG_SCSI_FC_ATTRS is not set
160
161#
162# SCSI low-level drivers
163#
164# CONFIG_SCSI_AIC7XXX_OLD is not set
165# CONFIG_SCSI_SATA is not set
166# CONFIG_SCSI_EATA_PIO is not set
167# CONFIG_SCSI_DEBUG is not set
168
169#
170# Multi-device support (RAID and LVM)
171#
172# CONFIG_MD is not set
173
174#
175# Fusion MPT device support
176#
177
178#
179# IEEE 1394 (FireWire) support
180#
181# CONFIG_IEEE1394 is not set
182
183#
184# I2O device support
185#
186
187#
188# Networking support
189#
190CONFIG_NET=y
191
192#
193# Networking options
194#
195CONFIG_PACKET=y
196# CONFIG_PACKET_MMAP is not set
197# CONFIG_NETLINK_DEV is not set
198# CONFIG_UNIX is not set
199# CONFIG_NET_KEY is not set
200CONFIG_INET=y
201CONFIG_IP_MULTICAST=y
202# CONFIG_IP_ADVANCED_ROUTER is not set
203# CONFIG_IP_PNP is not set
204# CONFIG_NET_IPIP is not set
205# CONFIG_NET_IPGRE is not set
206# CONFIG_IP_MROUTE is not set
207# CONFIG_ARPD is not set
208# CONFIG_SYN_COOKIES is not set
209# CONFIG_INET_AH is not set
210# CONFIG_INET_ESP is not set
211# CONFIG_INET_IPCOMP is not set
212# CONFIG_IPV6 is not set
213# CONFIG_NETFILTER is not set
214
215#
216# SCTP Configuration (EXPERIMENTAL)
217#
218# CONFIG_IP_SCTP is not set
219# CONFIG_ATM is not set
220# CONFIG_BRIDGE is not set
221# CONFIG_VLAN_8021Q is not set
222# CONFIG_DECNET is not set
223# CONFIG_LLC2 is not set
224# CONFIG_IPX is not set
225# CONFIG_ATALK is not set
226# CONFIG_X25 is not set
227# CONFIG_LAPB is not set
228# CONFIG_NET_DIVERT is not set
229# CONFIG_ECONET is not set
230# CONFIG_WAN_ROUTER is not set
231# CONFIG_NET_HW_FLOWCONTROL is not set
232
233#
234# QoS and/or fair queueing
235#
236# CONFIG_NET_SCHED is not set
237
238#
239# Network testing
240#
241# CONFIG_NET_PKTGEN is not set
242# CONFIG_NETPOLL is not set
243# CONFIG_NET_POLL_CONTROLLER is not set
244# CONFIG_HAMRADIO is not set
245# CONFIG_IRDA is not set
246# CONFIG_BT is not set
247# CONFIG_NETDEVICES is not set
248
249#
250# ISDN subsystem
251#
252# CONFIG_ISDN is not set
253
254#
255# Telephony Support
256#
257# CONFIG_PHONE is not set
258
259#
260# Input device support
261#
262CONFIG_INPUT=y
263
264#
265# Userland interfaces
266#
267CONFIG_INPUT_MOUSEDEV=y
268CONFIG_INPUT_MOUSEDEV_PSAUX=y
269CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
270CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
271# CONFIG_INPUT_JOYDEV is not set
272# CONFIG_INPUT_TSDEV is not set
273# CONFIG_INPUT_EVDEV is not set
274# CONFIG_INPUT_EVBUG is not set
275
276#
277# Input I/O drivers
278#
279# CONFIG_GAMEPORT is not set
280CONFIG_SOUND_GAMEPORT=y
281CONFIG_SERIO=y
282# CONFIG_SERIO_I8042 is not set
283CONFIG_SERIO_SERPORT=y
284# CONFIG_SERIO_CT82C710 is not set
285
286#
287# Input Device Drivers
288#
289# CONFIG_INPUT_KEYBOARD is not set
290# CONFIG_INPUT_MOUSE is not set
291# CONFIG_INPUT_JOYSTICK is not set
292# CONFIG_INPUT_TOUCHSCREEN is not set
293# CONFIG_INPUT_MISC is not set
294
295#
296# Character devices
297#
298CONFIG_VT=y
299CONFIG_VT_CONSOLE=y
300CONFIG_HW_CONSOLE=y
301# CONFIG_SERIAL_NONSTANDARD is not set
302
303#
304# Serial drivers
305#
306# CONFIG_SERIAL_8250 is not set
307
308#
309# Non-8250 serial port support
310#
311CONFIG_UNIX98_PTYS=y
312# CONFIG_LEGACY_PTYS is not set
313# CONFIG_QIC02_TAPE is not set
314
315#
316# IPMI
317#
318# CONFIG_IPMI_HANDLER is not set
319
320#
321# Watchdog Cards
322#
323# CONFIG_WATCHDOG is not set
324CONFIG_EFI_RTC=y
325# CONFIG_DTLK is not set
326# CONFIG_R3964 is not set
327# CONFIG_APPLICOM is not set
328
329#
330# Ftape, the floppy tape device driver
331#
332# CONFIG_FTAPE is not set
333# CONFIG_AGP is not set
334# CONFIG_DRM is not set
335# CONFIG_RAW_DRIVER is not set
336
337#
338# I2C support
339#
340# CONFIG_I2C is not set
341
342#
343# Misc devices
344#
345
346#
347# Multimedia devices
348#
349# CONFIG_VIDEO_DEV is not set
350
351#
352# Digital Video Broadcasting Devices
353#
354# CONFIG_DVB is not set
355
356#
357# Graphics support
358#
359# CONFIG_FB is not set
360
361#
362# Console display driver support
363#
364# CONFIG_VGA_CONSOLE is not set
365# CONFIG_MDA_CONSOLE is not set
366CONFIG_DUMMY_CONSOLE=y
367
368#
369# Sound
370#
371# CONFIG_SOUND is not set
372
373#
374# USB support
375#
376
377#
378# USB Gadget Support
379#
380# CONFIG_USB_GADGET is not set
381
382#
383# File systems
384#
385CONFIG_EXT2_FS=y
386# CONFIG_EXT2_FS_XATTR is not set
387CONFIG_EXT3_FS=y
388# CONFIG_EXT3_FS_XATTR is not set
389CONFIG_JBD=y
390# CONFIG_JBD_DEBUG is not set
391# CONFIG_REISERFS_FS is not set
392# CONFIG_JFS_FS is not set
393# CONFIG_XFS_FS is not set
394# CONFIG_MINIX_FS is not set
395# CONFIG_ROMFS_FS is not set
396# CONFIG_QUOTA is not set
397# CONFIG_AUTOFS_FS is not set
398# CONFIG_AUTOFS4_FS is not set
399
400#
401# CD-ROM/DVD Filesystems
402#
403# CONFIG_ISO9660_FS is not set
404# CONFIG_UDF_FS is not set
405
406#
407# DOS/FAT/NT Filesystems
408#
409# CONFIG_FAT_FS is not set
410# CONFIG_NTFS_FS is not set
411
412#
413# Pseudo filesystems
414#
415CONFIG_PROC_FS=y
416CONFIG_PROC_KCORE=y
417CONFIG_SYSFS=y
418# CONFIG_DEVFS_FS is not set
419# CONFIG_DEVPTS_FS_XATTR is not set
420# CONFIG_TMPFS is not set
421CONFIG_HUGETLBFS=y
422CONFIG_HUGETLB_PAGE=y
423CONFIG_RAMFS=y
424
425#
426# Miscellaneous filesystems
427#
428# CONFIG_ADFS_FS is not set
429# CONFIG_AFFS_FS is not set
430# CONFIG_HFS_FS is not set
431# CONFIG_HFSPLUS_FS is not set
432# CONFIG_BEFS_FS is not set
433# CONFIG_BFS_FS is not set
434# CONFIG_EFS_FS is not set
435# CONFIG_CRAMFS is not set
436# CONFIG_VXFS_FS is not set
437# CONFIG_HPFS_FS is not set
438# CONFIG_QNX4FS_FS is not set
439# CONFIG_SYSV_FS is not set
440# CONFIG_UFS_FS is not set
441
442#
443# Network File Systems
444#
445CONFIG_NFS_FS=y
446# CONFIG_NFS_V3 is not set
447# CONFIG_NFS_V4 is not set
448CONFIG_NFS_DIRECTIO=y
449CONFIG_NFSD=y
450CONFIG_NFSD_V3=y
451# CONFIG_NFSD_V4 is not set
452# CONFIG_NFSD_TCP is not set
453CONFIG_LOCKD=y
454CONFIG_LOCKD_V4=y
455CONFIG_EXPORTFS=y
456CONFIG_SUNRPC=y
457# CONFIG_RPCSEC_GSS_KRB5 is not set
458# CONFIG_SMB_FS is not set
459# CONFIG_CIFS is not set
460# CONFIG_NCP_FS is not set
461# CONFIG_CODA_FS is not set
462# CONFIG_AFS_FS is not set
463
464#
465# Partition Types
466#
467CONFIG_PARTITION_ADVANCED=y
468# CONFIG_ACORN_PARTITION is not set
469# CONFIG_OSF_PARTITION is not set
470# CONFIG_AMIGA_PARTITION is not set
471# CONFIG_ATARI_PARTITION is not set
472# CONFIG_MAC_PARTITION is not set
473CONFIG_MSDOS_PARTITION=y
474# CONFIG_BSD_DISKLABEL is not set
475# CONFIG_MINIX_SUBPARTITION is not set
476# CONFIG_SOLARIS_X86_PARTITION is not set
477# CONFIG_UNIXWARE_DISKLABEL is not set
478# CONFIG_LDM_PARTITION is not set
479# CONFIG_NEC98_PARTITION is not set
480# CONFIG_SGI_PARTITION is not set
481# CONFIG_ULTRIX_PARTITION is not set
482# CONFIG_SUN_PARTITION is not set
483CONFIG_EFI_PARTITION=y
484
485#
486# Native Language Support
487#
488# CONFIG_NLS is not set
489
490#
491# Library routines
492#
493CONFIG_CRC32=y
494# CONFIG_LIBCRC32C is not set
495
496#
497# HP Simulator drivers
498#
499CONFIG_HP_SIMETH=y
500CONFIG_HP_SIMSERIAL=y
501CONFIG_HP_SIMSERIAL_CONSOLE=y
502CONFIG_HP_SIMSCSI=y
503
504#
505# Profiling support
506#
507# CONFIG_PROFILING is not set
508
509#
510# Kernel hacking
511#
512# CONFIG_IA64_GRANULE_16MB is not set
513CONFIG_IA64_GRANULE_64MB=y
514CONFIG_DEBUG_KERNEL=y
515# CONFIG_IA64_PRINT_HAZARDS is not set
516# CONFIG_DISABLE_VHPT is not set
517# CONFIG_MAGIC_SYSRQ is not set
518# CONFIG_DEBUG_SLAB is not set
519# CONFIG_DEBUG_SPINLOCK is not set
520# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
521# CONFIG_IA64_DEBUG_CMPXCHG is not set
522# CONFIG_IA64_DEBUG_IRQ is not set
523CONFIG_DEBUG_INFO=y
524CONFIG_SYSVIPC_COMPAT=y
525
526#
527# Security options
528#
529# CONFIG_SECURITY is not set
530
531#
532# Cryptographic options
533#
534# CONFIG_CRYPTO is not set
diff --git a/arch/ia64/configs/sn2_defconfig b/arch/ia64/configs/sn2_defconfig
new file mode 100644
index 000000000000..bfeb952fe8e2
--- /dev/null
+++ b/arch/ia64/configs/sn2_defconfig
@@ -0,0 +1,1038 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.10
4# Mon Jan 10 13:57:35 2005
5#
6
7#
8# Code maturity level options
9#
10CONFIG_EXPERIMENTAL=y
11CONFIG_CLEAN_COMPILE=y
12CONFIG_LOCK_KERNEL=y
13
14#
15# General setup
16#
17CONFIG_LOCALVERSION=""
18CONFIG_SWAP=y
19CONFIG_SYSVIPC=y
20CONFIG_POSIX_MQUEUE=y
21# CONFIG_BSD_PROCESS_ACCT is not set
22CONFIG_SYSCTL=y
23# CONFIG_AUDIT is not set
24CONFIG_LOG_BUF_SHIFT=20
25CONFIG_HOTPLUG=y
26CONFIG_KOBJECT_UEVENT=y
27# CONFIG_IKCONFIG is not set
28# CONFIG_EMBEDDED is not set
29CONFIG_KALLSYMS=y
30CONFIG_KALLSYMS_ALL=y
31# CONFIG_KALLSYMS_EXTRA_PASS is not set
32CONFIG_FUTEX=y
33CONFIG_EPOLL=y
34CONFIG_CPUSETS=y
35# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
36CONFIG_SHMEM=y
37CONFIG_CC_ALIGN_FUNCTIONS=0
38CONFIG_CC_ALIGN_LABELS=0
39CONFIG_CC_ALIGN_LOOPS=0
40CONFIG_CC_ALIGN_JUMPS=0
41# CONFIG_TINY_SHMEM is not set
42
43#
44# Loadable module support
45#
46CONFIG_MODULES=y
47CONFIG_MODULE_UNLOAD=y
48# CONFIG_MODULE_FORCE_UNLOAD is not set
49CONFIG_OBSOLETE_MODPARM=y
50# CONFIG_MODVERSIONS is not set
51# CONFIG_MODULE_SRCVERSION_ALL is not set
52CONFIG_KMOD=y
53CONFIG_STOP_MACHINE=y
54
55#
56# Processor type and features
57#
58CONFIG_IA64=y
59CONFIG_64BIT=y
60CONFIG_MMU=y
61CONFIG_RWSEM_XCHGADD_ALGORITHM=y
62CONFIG_GENERIC_CALIBRATE_DELAY=y
63CONFIG_TIME_INTERPOLATION=y
64CONFIG_EFI=y
65CONFIG_GENERIC_IOMAP=y
66# CONFIG_IA64_GENERIC is not set
67# CONFIG_IA64_DIG is not set
68# CONFIG_IA64_HP_ZX1 is not set
69CONFIG_IA64_SGI_SN2=y
70# CONFIG_IA64_HP_SIM is not set
71# CONFIG_ITANIUM is not set
72CONFIG_MCKINLEY=y
73# CONFIG_IA64_PAGE_SIZE_4KB is not set
74# CONFIG_IA64_PAGE_SIZE_8KB is not set
75CONFIG_IA64_PAGE_SIZE_16KB=y
76# CONFIG_IA64_PAGE_SIZE_64KB is not set
77CONFIG_IA64_L1_CACHE_SHIFT=7
78CONFIG_NUMA=y
79CONFIG_VIRTUAL_MEM_MAP=y
80CONFIG_HOLES_IN_ZONE=y
81CONFIG_DISCONTIGMEM=y
82# CONFIG_IA64_CYCLONE is not set
83CONFIG_IOSAPIC=y
84CONFIG_IA64_SGI_SN_SIM=y
85CONFIG_FORCE_MAX_ZONEORDER=18
86CONFIG_SMP=y
87CONFIG_NR_CPUS=512
88# CONFIG_HOTPLUG_CPU is not set
89CONFIG_PREEMPT=y
90CONFIG_HAVE_DEC_LOCK=y
91CONFIG_IA32_SUPPORT=y
92CONFIG_COMPAT=y
93CONFIG_IA64_MCA_RECOVERY=y
94CONFIG_PERFMON=y
95CONFIG_IA64_PALINFO=y
96CONFIG_ACPI_DEALLOCATE_IRQ=y
97
98#
99# Firmware Drivers
100#
101CONFIG_EFI_VARS=y
102# CONFIG_EFI_PCDP is not set
103CONFIG_BINFMT_ELF=y
104# CONFIG_BINFMT_MISC is not set
105
106#
107# Power management and ACPI
108#
109CONFIG_ACPI=y
110
111#
112# ACPI (Advanced Configuration and Power Interface) Support
113#
114CONFIG_ACPI_BOOT=y
115CONFIG_ACPI_INTERPRETER=y
116# CONFIG_ACPI_BUTTON is not set
117CONFIG_ACPI_VIDEO=m
118# CONFIG_ACPI_FAN is not set
119# CONFIG_ACPI_PROCESSOR is not set
120CONFIG_ACPI_NUMA=y
121CONFIG_ACPI_BLACKLIST_YEAR=0
122# CONFIG_ACPI_DEBUG is not set
123CONFIG_ACPI_BUS=y
124CONFIG_ACPI_POWER=y
125CONFIG_ACPI_PCI=y
126CONFIG_ACPI_SYSTEM=y
127# CONFIG_ACPI_CONTAINER is not set
128
129#
130# Bus options (PCI, PCMCIA)
131#
132CONFIG_PCI=y
133CONFIG_PCI_DOMAINS=y
134# CONFIG_PCI_MSI is not set
135CONFIG_PCI_LEGACY_PROC=y
136CONFIG_PCI_NAMES=y
137
138#
139# PCI Hotplug Support
140#
141CONFIG_HOTPLUG_PCI=y
142# CONFIG_HOTPLUG_PCI_FAKE is not set
143# CONFIG_HOTPLUG_PCI_ACPI is not set
144# CONFIG_HOTPLUG_PCI_CPCI is not set
145# CONFIG_HOTPLUG_PCI_PCIE is not set
146# CONFIG_HOTPLUG_PCI_SHPC is not set
147CONFIG_HOTPLUG_PCI_SGI=y
148
149#
150# PCCARD (PCMCIA/CardBus) support
151#
152# CONFIG_PCCARD is not set
153
154#
155# PC-card bridges
156#
157
158#
159# Device Drivers
160#
161
162#
163# Generic Driver Options
164#
165CONFIG_STANDALONE=y
166CONFIG_PREVENT_FIRMWARE_BUILD=y
167CONFIG_FW_LOADER=m
168# CONFIG_DEBUG_DRIVER is not set
169
170#
171# Memory Technology Devices (MTD)
172#
173# CONFIG_MTD is not set
174
175#
176# Parallel port support
177#
178# CONFIG_PARPORT is not set
179
180#
181# Plug and Play support
182#
183# CONFIG_PNP is not set
184
185#
186# Block devices
187#
188# CONFIG_BLK_CPQ_DA is not set
189# CONFIG_BLK_CPQ_CISS_DA is not set
190# CONFIG_BLK_DEV_DAC960 is not set
191# CONFIG_BLK_DEV_UMEM is not set
192CONFIG_BLK_DEV_LOOP=y
193CONFIG_BLK_DEV_CRYPTOLOOP=m
194CONFIG_BLK_DEV_NBD=m
195# CONFIG_BLK_DEV_SX8 is not set
196# CONFIG_BLK_DEV_UB is not set
197CONFIG_BLK_DEV_RAM=y
198CONFIG_BLK_DEV_RAM_COUNT=16
199CONFIG_BLK_DEV_RAM_SIZE=4096
200CONFIG_BLK_DEV_INITRD=y
201CONFIG_INITRAMFS_SOURCE=""
202# CONFIG_CDROM_PKTCDVD is not set
203
204#
205# IO Schedulers
206#
207CONFIG_IOSCHED_NOOP=y
208CONFIG_IOSCHED_AS=y
209CONFIG_IOSCHED_DEADLINE=y
210CONFIG_IOSCHED_CFQ=y
211CONFIG_ATA_OVER_ETH=m
212
213#
214# ATA/ATAPI/MFM/RLL support
215#
216CONFIG_IDE=y
217CONFIG_BLK_DEV_IDE=y
218
219#
220# Please see Documentation/ide.txt for help/info on IDE drives
221#
222# CONFIG_BLK_DEV_IDE_SATA is not set
223CONFIG_BLK_DEV_IDEDISK=y
224# CONFIG_IDEDISK_MULTI_MODE is not set
225CONFIG_BLK_DEV_IDECD=y
226# CONFIG_BLK_DEV_IDETAPE is not set
227# CONFIG_BLK_DEV_IDEFLOPPY is not set
228# CONFIG_BLK_DEV_IDESCSI is not set
229# CONFIG_IDE_TASK_IOCTL is not set
230
231#
232# IDE chipset support/bugfixes
233#
234CONFIG_IDE_GENERIC=y
235CONFIG_BLK_DEV_IDEPCI=y
236# CONFIG_IDEPCI_SHARE_IRQ is not set
237# CONFIG_BLK_DEV_OFFBOARD is not set
238# CONFIG_BLK_DEV_GENERIC is not set
239# CONFIG_BLK_DEV_OPTI621 is not set
240CONFIG_BLK_DEV_IDEDMA_PCI=y
241# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
242CONFIG_IDEDMA_PCI_AUTO=y
243# CONFIG_IDEDMA_ONLYDISK is not set
244# CONFIG_BLK_DEV_AEC62XX is not set
245# CONFIG_BLK_DEV_ALI15X3 is not set
246# CONFIG_BLK_DEV_AMD74XX is not set
247# CONFIG_BLK_DEV_CMD64X is not set
248# CONFIG_BLK_DEV_TRIFLEX is not set
249# CONFIG_BLK_DEV_CY82C693 is not set
250# CONFIG_BLK_DEV_CS5520 is not set
251# CONFIG_BLK_DEV_CS5530 is not set
252# CONFIG_BLK_DEV_HPT34X is not set
253# CONFIG_BLK_DEV_HPT366 is not set
254# CONFIG_BLK_DEV_SC1200 is not set
255# CONFIG_BLK_DEV_PIIX is not set
256# CONFIG_BLK_DEV_NS87415 is not set
257# CONFIG_BLK_DEV_PDC202XX_OLD is not set
258# CONFIG_BLK_DEV_PDC202XX_NEW is not set
259# CONFIG_BLK_DEV_SVWKS is not set
260CONFIG_BLK_DEV_SGIIOC4=y
261# CONFIG_BLK_DEV_SIIMAGE is not set
262# CONFIG_BLK_DEV_SLC90E66 is not set
263# CONFIG_BLK_DEV_TRM290 is not set
264# CONFIG_BLK_DEV_VIA82CXXX is not set
265# CONFIG_IDE_ARM is not set
266CONFIG_BLK_DEV_IDEDMA=y
267# CONFIG_IDEDMA_IVB is not set
268CONFIG_IDEDMA_AUTO=y
269# CONFIG_BLK_DEV_HD is not set
270
271#
272# SCSI device support
273#
274CONFIG_SCSI=y
275CONFIG_SCSI_PROC_FS=y
276
277#
278# SCSI support type (disk, tape, CD-ROM)
279#
280CONFIG_BLK_DEV_SD=y
281CONFIG_CHR_DEV_ST=m
282# CONFIG_CHR_DEV_OSST is not set
283CONFIG_BLK_DEV_SR=m
284# CONFIG_BLK_DEV_SR_VENDOR is not set
285CONFIG_CHR_DEV_SG=m
286
287#
288# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
289#
290# CONFIG_SCSI_MULTI_LUN is not set
291CONFIG_SCSI_CONSTANTS=y
292# CONFIG_SCSI_LOGGING is not set
293
294#
295# SCSI Transport Attributes
296#
297CONFIG_SCSI_SPI_ATTRS=y
298CONFIG_SCSI_FC_ATTRS=y
299# CONFIG_SCSI_ISCSI_ATTRS is not set
300
301#
302# SCSI low-level drivers
303#
304# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
305# CONFIG_SCSI_3W_9XXX is not set
306# CONFIG_SCSI_ACARD is not set
307# CONFIG_SCSI_AACRAID is not set
308# CONFIG_SCSI_AIC7XXX is not set
309# CONFIG_SCSI_AIC7XXX_OLD is not set
310# CONFIG_SCSI_AIC79XX is not set
311# CONFIG_MEGARAID_NEWGEN is not set
312# CONFIG_MEGARAID_LEGACY is not set
313CONFIG_SCSI_SATA=y
314# CONFIG_SCSI_SATA_AHCI is not set
315# CONFIG_SCSI_SATA_SVW is not set
316# CONFIG_SCSI_ATA_PIIX is not set
317# CONFIG_SCSI_SATA_NV is not set
318# CONFIG_SCSI_SATA_PROMISE is not set
319# CONFIG_SCSI_SATA_SX4 is not set
320# CONFIG_SCSI_SATA_SIL is not set
321# CONFIG_SCSI_SATA_SIS is not set
322# CONFIG_SCSI_SATA_ULI is not set
323# CONFIG_SCSI_SATA_VIA is not set
324CONFIG_SCSI_SATA_VITESSE=y
325# CONFIG_SCSI_BUSLOGIC is not set
326# CONFIG_SCSI_DMX3191D is not set
327# CONFIG_SCSI_EATA is not set
328# CONFIG_SCSI_EATA_PIO is not set
329# CONFIG_SCSI_FUTURE_DOMAIN is not set
330# CONFIG_SCSI_GDTH is not set
331# CONFIG_SCSI_IPS is not set
332# CONFIG_SCSI_INITIO is not set
333# CONFIG_SCSI_INIA100 is not set
334# CONFIG_SCSI_SYM53C8XX_2 is not set
335# CONFIG_SCSI_IPR is not set
336# CONFIG_SCSI_QLOGIC_ISP is not set
337# CONFIG_SCSI_QLOGIC_FC is not set
338CONFIG_SCSI_QLOGIC_1280=y
339# CONFIG_SCSI_QLOGIC_1280_1040 is not set
340CONFIG_SCSI_QLA2XXX=y
341# CONFIG_SCSI_QLA21XX is not set
342CONFIG_SCSI_QLA22XX=y
343CONFIG_SCSI_QLA2300=y
344CONFIG_SCSI_QLA2322=y
345# CONFIG_SCSI_QLA6312 is not set
346# CONFIG_SCSI_DC395x is not set
347# CONFIG_SCSI_DC390T is not set
348# CONFIG_SCSI_DEBUG is not set
349
350#
351# Multi-device support (RAID and LVM)
352#
353CONFIG_MD=y
354CONFIG_BLK_DEV_MD=y
355CONFIG_MD_LINEAR=y
356CONFIG_MD_RAID0=y
357CONFIG_MD_RAID1=y
358# CONFIG_MD_RAID10 is not set
359CONFIG_MD_RAID5=y
360# CONFIG_MD_RAID6 is not set
361CONFIG_MD_MULTIPATH=y
362# CONFIG_MD_FAULTY is not set
363CONFIG_BLK_DEV_DM=y
364CONFIG_DM_CRYPT=m
365CONFIG_DM_SNAPSHOT=m
366CONFIG_DM_MIRROR=m
367CONFIG_DM_ZERO=m
368
369#
370# Fusion MPT device support
371#
372CONFIG_FUSION=y
373CONFIG_FUSION_MAX_SGE=128
374CONFIG_FUSION_CTL=m
375
376#
377# IEEE 1394 (FireWire) support
378#
379# CONFIG_IEEE1394 is not set
380
381#
382# I2O device support
383#
384# CONFIG_I2O is not set
385
386#
387# Networking support
388#
389CONFIG_NET=y
390
391#
392# Networking options
393#
394CONFIG_PACKET=y
395CONFIG_PACKET_MMAP=y
396CONFIG_NETLINK_DEV=y
397CONFIG_UNIX=y
398# CONFIG_NET_KEY is not set
399CONFIG_INET=y
400CONFIG_IP_MULTICAST=y
401# CONFIG_IP_ADVANCED_ROUTER is not set
402# CONFIG_IP_PNP is not set
403# CONFIG_NET_IPIP is not set
404# CONFIG_NET_IPGRE is not set
405# CONFIG_IP_MROUTE is not set
406# CONFIG_ARPD is not set
407CONFIG_SYN_COOKIES=y
408# CONFIG_INET_AH is not set
409# CONFIG_INET_ESP is not set
410# CONFIG_INET_IPCOMP is not set
411# CONFIG_INET_TUNNEL is not set
412CONFIG_IP_TCPDIAG=y
413# CONFIG_IP_TCPDIAG_IPV6 is not set
414CONFIG_IPV6=m
415# CONFIG_IPV6_PRIVACY is not set
416# CONFIG_INET6_AH is not set
417# CONFIG_INET6_ESP is not set
418# CONFIG_INET6_IPCOMP is not set
419# CONFIG_INET6_TUNNEL is not set
420# CONFIG_IPV6_TUNNEL is not set
421# CONFIG_NETFILTER is not set
422
423#
424# SCTP Configuration (EXPERIMENTAL)
425#
426# CONFIG_IP_SCTP is not set
427# CONFIG_ATM is not set
428# CONFIG_BRIDGE is not set
429# CONFIG_VLAN_8021Q is not set
430# CONFIG_DECNET is not set
431# CONFIG_LLC2 is not set
432# CONFIG_IPX is not set
433# CONFIG_ATALK is not set
434# CONFIG_X25 is not set
435# CONFIG_LAPB is not set
436# CONFIG_NET_DIVERT is not set
437# CONFIG_ECONET is not set
438# CONFIG_WAN_ROUTER is not set
439
440#
441# QoS and/or fair queueing
442#
443# CONFIG_NET_SCHED is not set
444# CONFIG_NET_CLS_ROUTE is not set
445
446#
447# Network testing
448#
449# CONFIG_NET_PKTGEN is not set
450CONFIG_NETPOLL=y
451# CONFIG_NETPOLL_RX is not set
452# CONFIG_NETPOLL_TRAP is not set
453CONFIG_NET_POLL_CONTROLLER=y
454# CONFIG_HAMRADIO is not set
455# CONFIG_IRDA is not set
456# CONFIG_BT is not set
457CONFIG_NETDEVICES=y
458# CONFIG_DUMMY is not set
459# CONFIG_BONDING is not set
460# CONFIG_EQUALIZER is not set
461# CONFIG_TUN is not set
462# CONFIG_ETHERTAP is not set
463
464#
465# ARCnet devices
466#
467# CONFIG_ARCNET is not set
468
469#
470# Ethernet (10 or 100Mbit)
471#
472# CONFIG_NET_ETHERNET is not set
473
474#
475# Ethernet (1000 Mbit)
476#
477# CONFIG_ACENIC is not set
478# CONFIG_DL2K is not set
479# CONFIG_E1000 is not set
480# CONFIG_NS83820 is not set
481# CONFIG_HAMACHI is not set
482# CONFIG_YELLOWFIN is not set
483# CONFIG_R8169 is not set
484# CONFIG_SK98LIN is not set
485CONFIG_TIGON3=y
486
487#
488# Ethernet (10000 Mbit)
489#
490# CONFIG_IXGB is not set
491CONFIG_S2IO=m
492# CONFIG_S2IO_NAPI is not set
493# CONFIG_2BUFF_MODE is not set
494
495#
496# Token Ring devices
497#
498# CONFIG_TR is not set
499
500#
501# Wireless LAN (non-hamradio)
502#
503# CONFIG_NET_RADIO is not set
504
505#
506# Wan interfaces
507#
508# CONFIG_WAN is not set
509# CONFIG_FDDI is not set
510# CONFIG_HIPPI is not set
511# CONFIG_PPP is not set
512# CONFIG_SLIP is not set
513# CONFIG_NET_FC is not set
514# CONFIG_SHAPER is not set
515CONFIG_NETCONSOLE=y
516
517#
518# ISDN subsystem
519#
520# CONFIG_ISDN is not set
521
522#
523# Telephony Support
524#
525# CONFIG_PHONE is not set
526
527#
528# Input device support
529#
530CONFIG_INPUT=y
531
532#
533# Userland interfaces
534#
535CONFIG_INPUT_MOUSEDEV=y
536# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
537CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
538CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
539# CONFIG_INPUT_JOYDEV is not set
540# CONFIG_INPUT_TSDEV is not set
541# CONFIG_INPUT_EVDEV is not set
542# CONFIG_INPUT_EVBUG is not set
543
544#
545# Input I/O drivers
546#
547# CONFIG_GAMEPORT is not set
548CONFIG_SOUND_GAMEPORT=y
549# CONFIG_SERIO is not set
550# CONFIG_SERIO_I8042 is not set
551
552#
553# Input Device Drivers
554#
555# CONFIG_INPUT_KEYBOARD is not set
556# CONFIG_INPUT_MOUSE is not set
557# CONFIG_INPUT_JOYSTICK is not set
558# CONFIG_INPUT_TOUCHSCREEN is not set
559# CONFIG_INPUT_MISC is not set
560
561#
562# Character devices
563#
564CONFIG_VT=y
565CONFIG_VT_CONSOLE=y
566CONFIG_HW_CONSOLE=y
567CONFIG_SERIAL_NONSTANDARD=y
568# CONFIG_ROCKETPORT is not set
569# CONFIG_CYCLADES is not set
570# CONFIG_MOXA_SMARTIO is not set
571# CONFIG_ISI is not set
572# CONFIG_SYNCLINK is not set
573# CONFIG_SYNCLINKMP is not set
574# CONFIG_N_HDLC is not set
575# CONFIG_STALDRV is not set
576CONFIG_SGI_SNSC=y
577
578#
579# Serial drivers
580#
581# CONFIG_SERIAL_8250 is not set
582
583#
584# Non-8250 serial port support
585#
586CONFIG_SERIAL_CORE=y
587CONFIG_SERIAL_CORE_CONSOLE=y
588CONFIG_SERIAL_SGI_L1_CONSOLE=y
589CONFIG_UNIX98_PTYS=y
590CONFIG_LEGACY_PTYS=y
591CONFIG_LEGACY_PTY_COUNT=256
592
593#
594# IPMI
595#
596# CONFIG_IPMI_HANDLER is not set
597
598#
599# Watchdog Cards
600#
601# CONFIG_WATCHDOG is not set
602# CONFIG_HW_RANDOM is not set
603CONFIG_EFI_RTC=y
604# CONFIG_DTLK is not set
605# CONFIG_R3964 is not set
606# CONFIG_APPLICOM is not set
607
608#
609# Ftape, the floppy tape device driver
610#
611# CONFIG_AGP is not set
612# CONFIG_DRM is not set
613CONFIG_RAW_DRIVER=m
614# CONFIG_HPET is not set
615CONFIG_MAX_RAW_DEVS=256
616CONFIG_MMTIMER=y
617
618#
619# I2C support
620#
621# CONFIG_I2C is not set
622
623#
624# Dallas's 1-wire bus
625#
626# CONFIG_W1 is not set
627
628#
629# Misc devices
630#
631
632#
633# Multimedia devices
634#
635# CONFIG_VIDEO_DEV is not set
636
637#
638# Digital Video Broadcasting Devices
639#
640# CONFIG_DVB is not set
641
642#
643# Graphics support
644#
645# CONFIG_FB is not set
646
647#
648# Console display driver support
649#
650# CONFIG_VGA_CONSOLE is not set
651CONFIG_DUMMY_CONSOLE=y
652
653#
654# Sound
655#
656# CONFIG_SOUND is not set
657
658#
659# USB support
660#
661CONFIG_USB=m
662# CONFIG_USB_DEBUG is not set
663
664#
665# Miscellaneous USB options
666#
667# CONFIG_USB_DEVICEFS is not set
668# CONFIG_USB_BANDWIDTH is not set
669# CONFIG_USB_DYNAMIC_MINORS is not set
670# CONFIG_USB_OTG is not set
671CONFIG_USB_ARCH_HAS_HCD=y
672CONFIG_USB_ARCH_HAS_OHCI=y
673
674#
675# USB Host Controller Drivers
676#
677CONFIG_USB_EHCI_HCD=m
678# CONFIG_USB_EHCI_SPLIT_ISO is not set
679# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
680CONFIG_USB_OHCI_HCD=m
681CONFIG_USB_UHCI_HCD=m
682# CONFIG_USB_SL811_HCD is not set
683
684#
685# USB Device Class drivers
686#
687# CONFIG_USB_BLUETOOTH_TTY is not set
688# CONFIG_USB_ACM is not set
689# CONFIG_USB_PRINTER is not set
690
691#
692# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
693#
694# CONFIG_USB_STORAGE is not set
695
696#
697# USB Input Devices
698#
699CONFIG_USB_HID=m
700CONFIG_USB_HIDINPUT=y
701# CONFIG_HID_FF is not set
702# CONFIG_USB_HIDDEV is not set
703
704#
705# USB HID Boot Protocol drivers
706#
707# CONFIG_USB_KBD is not set
708# CONFIG_USB_MOUSE is not set
709# CONFIG_USB_AIPTEK is not set
710# CONFIG_USB_WACOM is not set
711# CONFIG_USB_KBTAB is not set
712# CONFIG_USB_POWERMATE is not set
713# CONFIG_USB_MTOUCH is not set
714# CONFIG_USB_EGALAX is not set
715# CONFIG_USB_XPAD is not set
716# CONFIG_USB_ATI_REMOTE is not set
717
718#
719# USB Imaging devices
720#
721# CONFIG_USB_MDC800 is not set
722# CONFIG_USB_MICROTEK is not set
723
724#
725# USB Multimedia devices
726#
727# CONFIG_USB_DABUSB is not set
728
729#
730# Video4Linux support is needed for USB Multimedia device support
731#
732
733#
734# USB Network Adapters
735#
736# CONFIG_USB_CATC is not set
737# CONFIG_USB_KAWETH is not set
738# CONFIG_USB_PEGASUS is not set
739# CONFIG_USB_RTL8150 is not set
740# CONFIG_USB_USBNET is not set
741
742#
743# USB port drivers
744#
745
746#
747# USB Serial Converter support
748#
749# CONFIG_USB_SERIAL is not set
750
751#
752# USB Miscellaneous drivers
753#
754# CONFIG_USB_EMI62 is not set
755# CONFIG_USB_EMI26 is not set
756# CONFIG_USB_AUERSWALD is not set
757# CONFIG_USB_RIO500 is not set
758# CONFIG_USB_LEGOTOWER is not set
759# CONFIG_USB_LCD is not set
760# CONFIG_USB_LED is not set
761# CONFIG_USB_CYTHERM is not set
762# CONFIG_USB_PHIDGETKIT is not set
763# CONFIG_USB_PHIDGETSERVO is not set
764
765#
766# USB ATM/DSL drivers
767#
768
769#
770# USB Gadget Support
771#
772# CONFIG_USB_GADGET is not set
773
774#
775# MMC/SD Card support
776#
777# CONFIG_MMC is not set
778
779#
780# InfiniBand support
781#
782CONFIG_INFINIBAND=m
783CONFIG_INFINIBAND_MTHCA=m
784# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
785CONFIG_INFINIBAND_IPOIB=m
786# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
787
788#
789# File systems
790#
791CONFIG_EXT2_FS=y
792CONFIG_EXT2_FS_XATTR=y
793CONFIG_EXT2_FS_POSIX_ACL=y
794CONFIG_EXT2_FS_SECURITY=y
795CONFIG_EXT3_FS=y
796CONFIG_EXT3_FS_XATTR=y
797CONFIG_EXT3_FS_POSIX_ACL=y
798CONFIG_EXT3_FS_SECURITY=y
799CONFIG_JBD=y
800# CONFIG_JBD_DEBUG is not set
801CONFIG_FS_MBCACHE=y
802CONFIG_REISERFS_FS=y
803# CONFIG_REISERFS_CHECK is not set
804# CONFIG_REISERFS_PROC_INFO is not set
805CONFIG_REISERFS_FS_XATTR=y
806CONFIG_REISERFS_FS_POSIX_ACL=y
807CONFIG_REISERFS_FS_SECURITY=y
808# CONFIG_JFS_FS is not set
809CONFIG_FS_POSIX_ACL=y
810CONFIG_XFS_FS=y
811CONFIG_XFS_RT=y
812CONFIG_XFS_QUOTA=y
813# CONFIG_XFS_SECURITY is not set
814CONFIG_XFS_POSIX_ACL=y
815# CONFIG_MINIX_FS is not set
816# CONFIG_ROMFS_FS is not set
817CONFIG_QUOTA=y
818# CONFIG_QFMT_V1 is not set
819# CONFIG_QFMT_V2 is not set
820CONFIG_QUOTACTL=y
821CONFIG_DNOTIFY=y
822CONFIG_AUTOFS_FS=m
823CONFIG_AUTOFS4_FS=m
824
825#
826# CD-ROM/DVD Filesystems
827#
828CONFIG_ISO9660_FS=y
829CONFIG_JOLIET=y
830# CONFIG_ZISOFS is not set
831CONFIG_UDF_FS=m
832CONFIG_UDF_NLS=y
833
834#
835# DOS/FAT/NT Filesystems
836#
837CONFIG_FAT_FS=y
838# CONFIG_MSDOS_FS is not set
839CONFIG_VFAT_FS=y
840CONFIG_FAT_DEFAULT_CODEPAGE=437
841CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
842# CONFIG_NTFS_FS is not set
843
844#
845# Pseudo filesystems
846#
847CONFIG_PROC_FS=y
848CONFIG_PROC_KCORE=y
849CONFIG_SYSFS=y
850# CONFIG_DEVFS_FS is not set
851# CONFIG_DEVPTS_FS_XATTR is not set
852CONFIG_TMPFS=y
853CONFIG_TMPFS_XATTR=y
854CONFIG_TMPFS_SECURITY=y
855CONFIG_HUGETLBFS=y
856CONFIG_HUGETLB_PAGE=y
857CONFIG_RAMFS=y
858
859#
860# Miscellaneous filesystems
861#
862# CONFIG_ADFS_FS is not set
863# CONFIG_AFFS_FS is not set
864# CONFIG_HFS_FS is not set
865# CONFIG_HFSPLUS_FS is not set
866# CONFIG_BEFS_FS is not set
867# CONFIG_BFS_FS is not set
868# CONFIG_EFS_FS is not set
869# CONFIG_CRAMFS is not set
870# CONFIG_VXFS_FS is not set
871# CONFIG_HPFS_FS is not set
872# CONFIG_QNX4FS_FS is not set
873# CONFIG_SYSV_FS is not set
874# CONFIG_UFS_FS is not set
875
876#
877# Network File Systems
878#
879CONFIG_NFS_FS=m
880CONFIG_NFS_V3=y
881CONFIG_NFS_V4=y
882CONFIG_NFS_DIRECTIO=y
883CONFIG_NFSD=m
884CONFIG_NFSD_V3=y
885CONFIG_NFSD_V4=y
886CONFIG_NFSD_TCP=y
887CONFIG_LOCKD=m
888CONFIG_LOCKD_V4=y
889CONFIG_EXPORTFS=m
890CONFIG_SUNRPC=m
891CONFIG_SUNRPC_GSS=m
892CONFIG_RPCSEC_GSS_KRB5=m
893# CONFIG_RPCSEC_GSS_SPKM3 is not set
894CONFIG_SMB_FS=m
895# CONFIG_SMB_NLS_DEFAULT is not set
896CONFIG_CIFS=m
897# CONFIG_CIFS_STATS is not set
898# CONFIG_CIFS_XATTR is not set
899# CONFIG_CIFS_EXPERIMENTAL is not set
900# CONFIG_NCP_FS is not set
901# CONFIG_CODA_FS is not set
902# CONFIG_AFS_FS is not set
903
904#
905# Partition Types
906#
907CONFIG_PARTITION_ADVANCED=y
908# CONFIG_ACORN_PARTITION is not set
909# CONFIG_OSF_PARTITION is not set
910# CONFIG_AMIGA_PARTITION is not set
911# CONFIG_ATARI_PARTITION is not set
912# CONFIG_MAC_PARTITION is not set
913CONFIG_MSDOS_PARTITION=y
914# CONFIG_BSD_DISKLABEL is not set
915# CONFIG_MINIX_SUBPARTITION is not set
916# CONFIG_SOLARIS_X86_PARTITION is not set
917# CONFIG_UNIXWARE_DISKLABEL is not set
918# CONFIG_LDM_PARTITION is not set
919CONFIG_SGI_PARTITION=y
920# CONFIG_ULTRIX_PARTITION is not set
921# CONFIG_SUN_PARTITION is not set
922CONFIG_EFI_PARTITION=y
923
924#
925# Native Language Support
926#
927CONFIG_NLS=y
928CONFIG_NLS_DEFAULT="iso8859-1"
929CONFIG_NLS_CODEPAGE_437=y
930# CONFIG_NLS_CODEPAGE_737 is not set
931# CONFIG_NLS_CODEPAGE_775 is not set
932# CONFIG_NLS_CODEPAGE_850 is not set
933# CONFIG_NLS_CODEPAGE_852 is not set
934# CONFIG_NLS_CODEPAGE_855 is not set
935# CONFIG_NLS_CODEPAGE_857 is not set
936# CONFIG_NLS_CODEPAGE_860 is not set
937# CONFIG_NLS_CODEPAGE_861 is not set
938# CONFIG_NLS_CODEPAGE_862 is not set
939# CONFIG_NLS_CODEPAGE_863 is not set
940# CONFIG_NLS_CODEPAGE_864 is not set
941# CONFIG_NLS_CODEPAGE_865 is not set
942# CONFIG_NLS_CODEPAGE_866 is not set
943# CONFIG_NLS_CODEPAGE_869 is not set
944# CONFIG_NLS_CODEPAGE_936 is not set
945# CONFIG_NLS_CODEPAGE_950 is not set
946# CONFIG_NLS_CODEPAGE_932 is not set
947# CONFIG_NLS_CODEPAGE_949 is not set
948# CONFIG_NLS_CODEPAGE_874 is not set
949# CONFIG_NLS_ISO8859_8 is not set
950# CONFIG_NLS_CODEPAGE_1250 is not set
951# CONFIG_NLS_CODEPAGE_1251 is not set
952# CONFIG_NLS_ASCII is not set
953CONFIG_NLS_ISO8859_1=y
954# CONFIG_NLS_ISO8859_2 is not set
955# CONFIG_NLS_ISO8859_3 is not set
956# CONFIG_NLS_ISO8859_4 is not set
957# CONFIG_NLS_ISO8859_5 is not set
958# CONFIG_NLS_ISO8859_6 is not set
959# CONFIG_NLS_ISO8859_7 is not set
960# CONFIG_NLS_ISO8859_9 is not set
961# CONFIG_NLS_ISO8859_13 is not set
962# CONFIG_NLS_ISO8859_14 is not set
963# CONFIG_NLS_ISO8859_15 is not set
964# CONFIG_NLS_KOI8_R is not set
965# CONFIG_NLS_KOI8_U is not set
966CONFIG_NLS_UTF8=y
967
968#
969# Library routines
970#
971# CONFIG_CRC_CCITT is not set
972CONFIG_CRC32=y
973# CONFIG_LIBCRC32C is not set
974CONFIG_ZLIB_INFLATE=m
975CONFIG_ZLIB_DEFLATE=m
976
977#
978# Profiling support
979#
980# CONFIG_PROFILING is not set
981
982#
983# Kernel hacking
984#
985CONFIG_DEBUG_KERNEL=y
986CONFIG_MAGIC_SYSRQ=y
987# CONFIG_SCHEDSTATS is not set
988# CONFIG_DEBUG_SLAB is not set
989# CONFIG_DEBUG_SPINLOCK is not set
990# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
991# CONFIG_DEBUG_KOBJECT is not set
992CONFIG_DEBUG_INFO=y
993# CONFIG_DEBUG_FS is not set
994CONFIG_IA64_GRANULE_16MB=y
995# CONFIG_IA64_GRANULE_64MB is not set
996# CONFIG_IA64_PRINT_HAZARDS is not set
997# CONFIG_DISABLE_VHPT is not set
998# CONFIG_IA64_DEBUG_CMPXCHG is not set
999# CONFIG_IA64_DEBUG_IRQ is not set
1000CONFIG_SYSVIPC_COMPAT=y
1001
1002#
1003# Security options
1004#
1005# CONFIG_KEYS is not set
1006# CONFIG_SECURITY is not set
1007
1008#
1009# Cryptographic options
1010#
1011CONFIG_CRYPTO=y
1012CONFIG_CRYPTO_HMAC=y
1013# CONFIG_CRYPTO_NULL is not set
1014# CONFIG_CRYPTO_MD4 is not set
1015CONFIG_CRYPTO_MD5=m
1016CONFIG_CRYPTO_SHA1=m
1017# CONFIG_CRYPTO_SHA256 is not set
1018# CONFIG_CRYPTO_SHA512 is not set
1019# CONFIG_CRYPTO_WP512 is not set
1020CONFIG_CRYPTO_DES=m
1021# CONFIG_CRYPTO_BLOWFISH is not set
1022# CONFIG_CRYPTO_TWOFISH is not set
1023# CONFIG_CRYPTO_SERPENT is not set
1024# CONFIG_CRYPTO_AES is not set
1025# CONFIG_CRYPTO_CAST5 is not set
1026# CONFIG_CRYPTO_CAST6 is not set
1027# CONFIG_CRYPTO_TEA is not set
1028# CONFIG_CRYPTO_ARC4 is not set
1029# CONFIG_CRYPTO_KHAZAD is not set
1030# CONFIG_CRYPTO_ANUBIS is not set
1031CONFIG_CRYPTO_DEFLATE=m
1032# CONFIG_CRYPTO_MICHAEL_MIC is not set
1033# CONFIG_CRYPTO_CRC32C is not set
1034# CONFIG_CRYPTO_TEST is not set
1035
1036#
1037# Hardware crypto devices
1038#
diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig
new file mode 100644
index 000000000000..99830e8fc9ba
--- /dev/null
+++ b/arch/ia64/configs/tiger_defconfig
@@ -0,0 +1,1098 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.11-rc2
4# Sat Jan 22 11:17:02 2005
5#
6
7#
8# Code maturity level options
9#
10CONFIG_EXPERIMENTAL=y
11CONFIG_CLEAN_COMPILE=y
12CONFIG_LOCK_KERNEL=y
13
14#
15# General setup
16#
17CONFIG_LOCALVERSION=""
18CONFIG_SWAP=y
19CONFIG_SYSVIPC=y
20CONFIG_POSIX_MQUEUE=y
21# CONFIG_BSD_PROCESS_ACCT is not set
22CONFIG_SYSCTL=y
23# CONFIG_AUDIT is not set
24CONFIG_LOG_BUF_SHIFT=20
25CONFIG_HOTPLUG=y
26CONFIG_KOBJECT_UEVENT=y
27CONFIG_IKCONFIG=y
28CONFIG_IKCONFIG_PROC=y
29# CONFIG_EMBEDDED is not set
30CONFIG_KALLSYMS=y
31CONFIG_KALLSYMS_ALL=y
32# CONFIG_KALLSYMS_EXTRA_PASS is not set
33CONFIG_FUTEX=y
34CONFIG_EPOLL=y
35# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
36CONFIG_SHMEM=y
37CONFIG_CC_ALIGN_FUNCTIONS=0
38CONFIG_CC_ALIGN_LABELS=0
39CONFIG_CC_ALIGN_LOOPS=0
40CONFIG_CC_ALIGN_JUMPS=0
41# CONFIG_TINY_SHMEM is not set
42
43#
44# Loadable module support
45#
46CONFIG_MODULES=y
47CONFIG_MODULE_UNLOAD=y
48# CONFIG_MODULE_FORCE_UNLOAD is not set
49CONFIG_OBSOLETE_MODPARM=y
50CONFIG_MODVERSIONS=y
51CONFIG_MODULE_SRCVERSION_ALL=y
52CONFIG_KMOD=y
53CONFIG_STOP_MACHINE=y
54
55#
56# Processor type and features
57#
58CONFIG_IA64=y
59CONFIG_64BIT=y
60CONFIG_MMU=y
61CONFIG_RWSEM_XCHGADD_ALGORITHM=y
62CONFIG_GENERIC_CALIBRATE_DELAY=y
63CONFIG_TIME_INTERPOLATION=y
64CONFIG_EFI=y
65CONFIG_GENERIC_IOMAP=y
66# CONFIG_IA64_GENERIC is not set
67CONFIG_IA64_DIG=y
68# CONFIG_IA64_HP_ZX1 is not set
69# CONFIG_IA64_HP_ZX1_SWIOTLB is not set
70# CONFIG_IA64_SGI_SN2 is not set
71# CONFIG_IA64_HP_SIM is not set
72# CONFIG_ITANIUM is not set
73CONFIG_MCKINLEY=y
74# CONFIG_IA64_PAGE_SIZE_4KB is not set
75# CONFIG_IA64_PAGE_SIZE_8KB is not set
76CONFIG_IA64_PAGE_SIZE_16KB=y
77# CONFIG_IA64_PAGE_SIZE_64KB is not set
78CONFIG_IA64_L1_CACHE_SHIFT=7
79# CONFIG_NUMA is not set
80CONFIG_VIRTUAL_MEM_MAP=y
81CONFIG_HOLES_IN_ZONE=y
82CONFIG_IA64_CYCLONE=y
83CONFIG_IOSAPIC=y
84CONFIG_FORCE_MAX_ZONEORDER=18
85CONFIG_SMP=y
86CONFIG_NR_CPUS=4
87CONFIG_HOTPLUG_CPU=y
88# CONFIG_PREEMPT is not set
89CONFIG_HAVE_DEC_LOCK=y
90CONFIG_IA32_SUPPORT=y
91CONFIG_COMPAT=y
92CONFIG_IA64_MCA_RECOVERY=y
93CONFIG_PERFMON=y
94CONFIG_IA64_PALINFO=y
95CONFIG_ACPI_DEALLOCATE_IRQ=y
96
97#
98# Firmware Drivers
99#
100CONFIG_EFI_VARS=y
101CONFIG_EFI_PCDP=y
102CONFIG_BINFMT_ELF=y
103CONFIG_BINFMT_MISC=m
104
105#
106# Power management and ACPI
107#
108CONFIG_PM=y
109CONFIG_ACPI=y
110
111#
112# ACPI (Advanced Configuration and Power Interface) Support
113#
114CONFIG_ACPI_BOOT=y
115CONFIG_ACPI_INTERPRETER=y
116CONFIG_ACPI_BUTTON=m
117# CONFIG_ACPI_VIDEO is not set
118CONFIG_ACPI_FAN=m
119CONFIG_ACPI_PROCESSOR=m
120# CONFIG_ACPI_HOTPLUG_CPU is not set
121CONFIG_ACPI_THERMAL=m
122CONFIG_ACPI_BLACKLIST_YEAR=0
123# CONFIG_ACPI_DEBUG is not set
124CONFIG_ACPI_BUS=y
125CONFIG_ACPI_POWER=y
126CONFIG_ACPI_PCI=y
127CONFIG_ACPI_SYSTEM=y
128# CONFIG_ACPI_CONTAINER is not set
129
130#
131# Bus options (PCI, PCMCIA)
132#
133CONFIG_PCI=y
134CONFIG_PCI_DOMAINS=y
135# CONFIG_PCI_MSI is not set
136CONFIG_PCI_LEGACY_PROC=y
137CONFIG_PCI_NAMES=y
138
139#
140# PCI Hotplug Support
141#
142CONFIG_HOTPLUG_PCI=m
143# CONFIG_HOTPLUG_PCI_FAKE is not set
144CONFIG_HOTPLUG_PCI_ACPI=m
145# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
146# CONFIG_HOTPLUG_PCI_CPCI is not set
147# CONFIG_HOTPLUG_PCI_SHPC is not set
148
149#
150# PCCARD (PCMCIA/CardBus) support
151#
152# CONFIG_PCCARD is not set
153
154#
155# PC-card bridges
156#
157
158#
159# Device Drivers
160#
161
162#
163# Generic Driver Options
164#
165CONFIG_STANDALONE=y
166CONFIG_PREVENT_FIRMWARE_BUILD=y
167# CONFIG_FW_LOADER is not set
168# CONFIG_DEBUG_DRIVER is not set
169
170#
171# Memory Technology Devices (MTD)
172#
173# CONFIG_MTD is not set
174
175#
176# Parallel port support
177#
178# CONFIG_PARPORT is not set
179
180#
181# Plug and Play support
182#
183# CONFIG_PNP is not set
184
185#
186# Block devices
187#
188# CONFIG_BLK_CPQ_DA is not set
189# CONFIG_BLK_CPQ_CISS_DA is not set
190# CONFIG_BLK_DEV_DAC960 is not set
191# CONFIG_BLK_DEV_UMEM is not set
192# CONFIG_BLK_DEV_COW_COMMON is not set
193CONFIG_BLK_DEV_LOOP=m
194CONFIG_BLK_DEV_CRYPTOLOOP=m
195CONFIG_BLK_DEV_NBD=m
196# CONFIG_BLK_DEV_SX8 is not set
197# CONFIG_BLK_DEV_UB is not set
198CONFIG_BLK_DEV_RAM=m
199CONFIG_BLK_DEV_RAM_COUNT=16
200CONFIG_BLK_DEV_RAM_SIZE=4096
201CONFIG_INITRAMFS_SOURCE=""
202# CONFIG_CDROM_PKTCDVD is not set
203
204#
205# IO Schedulers
206#
207CONFIG_IOSCHED_NOOP=y
208CONFIG_IOSCHED_AS=y
209CONFIG_IOSCHED_DEADLINE=y
210CONFIG_IOSCHED_CFQ=y
211# CONFIG_ATA_OVER_ETH is not set
212
213#
214# ATA/ATAPI/MFM/RLL support
215#
216CONFIG_IDE=y
217CONFIG_BLK_DEV_IDE=y
218
219#
220# Please see Documentation/ide.txt for help/info on IDE drives
221#
222# CONFIG_BLK_DEV_IDE_SATA is not set
223CONFIG_BLK_DEV_IDEDISK=y
224# CONFIG_IDEDISK_MULTI_MODE is not set
225CONFIG_BLK_DEV_IDECD=y
226# CONFIG_BLK_DEV_IDETAPE is not set
227CONFIG_BLK_DEV_IDEFLOPPY=y
228CONFIG_BLK_DEV_IDESCSI=m
229# CONFIG_IDE_TASK_IOCTL is not set
230
231#
232# IDE chipset support/bugfixes
233#
234CONFIG_IDE_GENERIC=y
235CONFIG_BLK_DEV_IDEPCI=y
236# CONFIG_IDEPCI_SHARE_IRQ is not set
237# CONFIG_BLK_DEV_OFFBOARD is not set
238CONFIG_BLK_DEV_GENERIC=y
239# CONFIG_BLK_DEV_OPTI621 is not set
240CONFIG_BLK_DEV_IDEDMA_PCI=y
241# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
242CONFIG_IDEDMA_PCI_AUTO=y
243# CONFIG_IDEDMA_ONLYDISK is not set
244# CONFIG_BLK_DEV_AEC62XX is not set
245# CONFIG_BLK_DEV_ALI15X3 is not set
246# CONFIG_BLK_DEV_AMD74XX is not set
247CONFIG_BLK_DEV_CMD64X=y
248# CONFIG_BLK_DEV_TRIFLEX is not set
249# CONFIG_BLK_DEV_CY82C693 is not set
250# CONFIG_BLK_DEV_CS5520 is not set
251# CONFIG_BLK_DEV_CS5530 is not set
252# CONFIG_BLK_DEV_HPT34X is not set
253# CONFIG_BLK_DEV_HPT366 is not set
254# CONFIG_BLK_DEV_SC1200 is not set
255CONFIG_BLK_DEV_PIIX=y
256# CONFIG_BLK_DEV_NS87415 is not set
257# CONFIG_BLK_DEV_PDC202XX_OLD is not set
258# CONFIG_BLK_DEV_PDC202XX_NEW is not set
259# CONFIG_BLK_DEV_SVWKS is not set
260# CONFIG_BLK_DEV_SIIMAGE is not set
261# CONFIG_BLK_DEV_SLC90E66 is not set
262# CONFIG_BLK_DEV_TRM290 is not set
263# CONFIG_BLK_DEV_VIA82CXXX is not set
264# CONFIG_IDE_ARM is not set
265CONFIG_BLK_DEV_IDEDMA=y
266# CONFIG_IDEDMA_IVB is not set
267CONFIG_IDEDMA_AUTO=y
268# CONFIG_BLK_DEV_HD is not set
269
270#
271# SCSI device support
272#
273CONFIG_SCSI=y
274CONFIG_SCSI_PROC_FS=y
275
276#
277# SCSI support type (disk, tape, CD-ROM)
278#
279CONFIG_BLK_DEV_SD=y
280CONFIG_CHR_DEV_ST=m
281# CONFIG_CHR_DEV_OSST is not set
282CONFIG_BLK_DEV_SR=m
283# CONFIG_BLK_DEV_SR_VENDOR is not set
284CONFIG_CHR_DEV_SG=m
285
286#
287# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
288#
289# CONFIG_SCSI_MULTI_LUN is not set
290# CONFIG_SCSI_CONSTANTS is not set
291# CONFIG_SCSI_LOGGING is not set
292
293#
294# SCSI Transport Attributes
295#
296CONFIG_SCSI_SPI_ATTRS=y
297CONFIG_SCSI_FC_ATTRS=y
298# CONFIG_SCSI_ISCSI_ATTRS is not set
299
300#
301# SCSI low-level drivers
302#
303# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
304# CONFIG_SCSI_3W_9XXX is not set
305# CONFIG_SCSI_ACARD is not set
306# CONFIG_SCSI_AACRAID is not set
307# CONFIG_SCSI_AIC7XXX is not set
308# CONFIG_SCSI_AIC7XXX_OLD is not set
309# CONFIG_SCSI_AIC79XX is not set
310# CONFIG_MEGARAID_NEWGEN is not set
311# CONFIG_MEGARAID_LEGACY is not set
312# CONFIG_SCSI_SATA is not set
313# CONFIG_SCSI_BUSLOGIC is not set
314# CONFIG_SCSI_DMX3191D is not set
315# CONFIG_SCSI_EATA is not set
316# CONFIG_SCSI_EATA_PIO is not set
317# CONFIG_SCSI_FUTURE_DOMAIN is not set
318# CONFIG_SCSI_GDTH is not set
319# CONFIG_SCSI_IPS is not set
320# CONFIG_SCSI_INITIO is not set
321# CONFIG_SCSI_INIA100 is not set
322CONFIG_SCSI_SYM53C8XX_2=y
323CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
324CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
325CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
326# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
327# CONFIG_SCSI_IPR is not set
328# CONFIG_SCSI_QLOGIC_ISP is not set
329CONFIG_SCSI_QLOGIC_FC=y
330# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
331CONFIG_SCSI_QLOGIC_1280=y
332# CONFIG_SCSI_QLOGIC_1280_1040 is not set
333CONFIG_SCSI_QLA2XXX=y
334CONFIG_SCSI_QLA21XX=m
335CONFIG_SCSI_QLA22XX=m
336CONFIG_SCSI_QLA2300=m
337CONFIG_SCSI_QLA2322=m
338# CONFIG_SCSI_QLA6312 is not set
339# CONFIG_SCSI_DC395x is not set
340# CONFIG_SCSI_DC390T is not set
341# CONFIG_SCSI_DEBUG is not set
342
343#
344# Multi-device support (RAID and LVM)
345#
346CONFIG_MD=y
347CONFIG_BLK_DEV_MD=m
348CONFIG_MD_LINEAR=m
349CONFIG_MD_RAID0=m
350CONFIG_MD_RAID1=m
351# CONFIG_MD_RAID10 is not set
352CONFIG_MD_RAID5=m
353CONFIG_MD_RAID6=m
354CONFIG_MD_MULTIPATH=m
355# CONFIG_MD_FAULTY is not set
356CONFIG_BLK_DEV_DM=m
357CONFIG_DM_CRYPT=m
358CONFIG_DM_SNAPSHOT=m
359CONFIG_DM_MIRROR=m
360CONFIG_DM_ZERO=m
361
362#
363# Fusion MPT device support
364#
365CONFIG_FUSION=y
366CONFIG_FUSION_MAX_SGE=40
367# CONFIG_FUSION_CTL is not set
368
369#
370# IEEE 1394 (FireWire) support
371#
372# CONFIG_IEEE1394 is not set
373
374#
375# I2O device support
376#
377# CONFIG_I2O is not set
378
379#
380# Networking support
381#
382CONFIG_NET=y
383
384#
385# Networking options
386#
387CONFIG_PACKET=y
388# CONFIG_PACKET_MMAP is not set
389CONFIG_NETLINK_DEV=y
390CONFIG_UNIX=y
391# CONFIG_NET_KEY is not set
392CONFIG_INET=y
393CONFIG_IP_MULTICAST=y
394# CONFIG_IP_ADVANCED_ROUTER is not set
395# CONFIG_IP_PNP is not set
396# CONFIG_NET_IPIP is not set
397# CONFIG_NET_IPGRE is not set
398# CONFIG_IP_MROUTE is not set
399CONFIG_ARPD=y
400CONFIG_SYN_COOKIES=y
401# CONFIG_INET_AH is not set
402# CONFIG_INET_ESP is not set
403# CONFIG_INET_IPCOMP is not set
404# CONFIG_INET_TUNNEL is not set
405CONFIG_IP_TCPDIAG=y
406# CONFIG_IP_TCPDIAG_IPV6 is not set
407# CONFIG_IPV6 is not set
408# CONFIG_NETFILTER is not set
409
410#
411# SCTP Configuration (EXPERIMENTAL)
412#
413# CONFIG_IP_SCTP is not set
414# CONFIG_ATM is not set
415# CONFIG_BRIDGE is not set
416# CONFIG_VLAN_8021Q is not set
417# CONFIG_DECNET is not set
418# CONFIG_LLC2 is not set
419# CONFIG_IPX is not set
420# CONFIG_ATALK is not set
421# CONFIG_X25 is not set
422# CONFIG_LAPB is not set
423# CONFIG_NET_DIVERT is not set
424# CONFIG_ECONET is not set
425# CONFIG_WAN_ROUTER is not set
426
427#
428# QoS and/or fair queueing
429#
430# CONFIG_NET_SCHED is not set
431# CONFIG_NET_CLS_ROUTE is not set
432
433#
434# Network testing
435#
436# CONFIG_NET_PKTGEN is not set
437CONFIG_NETPOLL=y
438# CONFIG_NETPOLL_RX is not set
439# CONFIG_NETPOLL_TRAP is not set
440CONFIG_NET_POLL_CONTROLLER=y
441# CONFIG_HAMRADIO is not set
442# CONFIG_IRDA is not set
443# CONFIG_BT is not set
444CONFIG_NETDEVICES=y
445CONFIG_DUMMY=m
446# CONFIG_BONDING is not set
447# CONFIG_EQUALIZER is not set
448# CONFIG_TUN is not set
449# CONFIG_ETHERTAP is not set
450
451#
452# ARCnet devices
453#
454# CONFIG_ARCNET is not set
455
456#
457# Ethernet (10 or 100Mbit)
458#
459CONFIG_NET_ETHERNET=y
460CONFIG_MII=m
461# CONFIG_HAPPYMEAL is not set
462# CONFIG_SUNGEM is not set
463# CONFIG_NET_VENDOR_3COM is not set
464
465#
466# Tulip family network device support
467#
468CONFIG_NET_TULIP=y
469# CONFIG_DE2104X is not set
470CONFIG_TULIP=m
471# CONFIG_TULIP_MWI is not set
472# CONFIG_TULIP_MMIO is not set
473# CONFIG_TULIP_NAPI is not set
474# CONFIG_DE4X5 is not set
475# CONFIG_WINBOND_840 is not set
476# CONFIG_DM9102 is not set
477# CONFIG_HP100 is not set
478CONFIG_NET_PCI=y
479# CONFIG_PCNET32 is not set
480# CONFIG_AMD8111_ETH is not set
481# CONFIG_ADAPTEC_STARFIRE is not set
482# CONFIG_B44 is not set
483# CONFIG_FORCEDETH is not set
484# CONFIG_DGRS is not set
485CONFIG_EEPRO100=m
486CONFIG_E100=m
487# CONFIG_E100_NAPI is not set
488# CONFIG_FEALNX is not set
489# CONFIG_NATSEMI is not set
490# CONFIG_NE2K_PCI is not set
491# CONFIG_8139CP is not set
492# CONFIG_8139TOO is not set
493# CONFIG_SIS900 is not set
494# CONFIG_EPIC100 is not set
495# CONFIG_SUNDANCE is not set
496# CONFIG_VIA_RHINE is not set
497
498#
499# Ethernet (1000 Mbit)
500#
501# CONFIG_ACENIC is not set
502# CONFIG_DL2K is not set
503CONFIG_E1000=y
504# CONFIG_E1000_NAPI is not set
505# CONFIG_NS83820 is not set
506# CONFIG_HAMACHI is not set
507# CONFIG_YELLOWFIN is not set
508# CONFIG_R8169 is not set
509# CONFIG_SK98LIN is not set
510# CONFIG_VIA_VELOCITY is not set
511CONFIG_TIGON3=y
512
513#
514# Ethernet (10000 Mbit)
515#
516# CONFIG_IXGB is not set
517# CONFIG_S2IO is not set
518
519#
520# Token Ring devices
521#
522# CONFIG_TR is not set
523
524#
525# Wireless LAN (non-hamradio)
526#
527# CONFIG_NET_RADIO is not set
528
529#
530# Wan interfaces
531#
532# CONFIG_WAN is not set
533# CONFIG_FDDI is not set
534# CONFIG_HIPPI is not set
535# CONFIG_PPP is not set
536# CONFIG_SLIP is not set
537# CONFIG_NET_FC is not set
538# CONFIG_SHAPER is not set
539CONFIG_NETCONSOLE=y
540
541#
542# ISDN subsystem
543#
544# CONFIG_ISDN is not set
545
546#
547# Telephony Support
548#
549# CONFIG_PHONE is not set
550
551#
552# Input device support
553#
554CONFIG_INPUT=y
555
556#
557# Userland interfaces
558#
559CONFIG_INPUT_MOUSEDEV=y
560CONFIG_INPUT_MOUSEDEV_PSAUX=y
561CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
562CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
563# CONFIG_INPUT_JOYDEV is not set
564# CONFIG_INPUT_TSDEV is not set
565# CONFIG_INPUT_EVDEV is not set
566# CONFIG_INPUT_EVBUG is not set
567
568#
569# Input I/O drivers
570#
571CONFIG_GAMEPORT=m
572CONFIG_SOUND_GAMEPORT=m
573# CONFIG_GAMEPORT_NS558 is not set
574# CONFIG_GAMEPORT_L4 is not set
575# CONFIG_GAMEPORT_EMU10K1 is not set
576# CONFIG_GAMEPORT_VORTEX is not set
577# CONFIG_GAMEPORT_FM801 is not set
578# CONFIG_GAMEPORT_CS461X is not set
579CONFIG_SERIO=y
580CONFIG_SERIO_I8042=y
581# CONFIG_SERIO_SERPORT is not set
582# CONFIG_SERIO_CT82C710 is not set
583# CONFIG_SERIO_PCIPS2 is not set
584CONFIG_SERIO_LIBPS2=y
585# CONFIG_SERIO_RAW is not set
586
587#
588# Input Device Drivers
589#
590CONFIG_INPUT_KEYBOARD=y
591CONFIG_KEYBOARD_ATKBD=y
592# CONFIG_KEYBOARD_SUNKBD is not set
593# CONFIG_KEYBOARD_LKKBD is not set
594# CONFIG_KEYBOARD_XTKBD is not set
595# CONFIG_KEYBOARD_NEWTON is not set
596CONFIG_INPUT_MOUSE=y
597CONFIG_MOUSE_PS2=y
598# CONFIG_MOUSE_SERIAL is not set
599# CONFIG_MOUSE_VSXXXAA is not set
600# CONFIG_INPUT_JOYSTICK is not set
601# CONFIG_INPUT_TOUCHSCREEN is not set
602# CONFIG_INPUT_MISC is not set
603
604#
605# Character devices
606#
607CONFIG_VT=y
608CONFIG_VT_CONSOLE=y
609CONFIG_HW_CONSOLE=y
610CONFIG_SERIAL_NONSTANDARD=y
611# CONFIG_ROCKETPORT is not set
612# CONFIG_CYCLADES is not set
613# CONFIG_MOXA_SMARTIO is not set
614# CONFIG_ISI is not set
615# CONFIG_SYNCLINK is not set
616# CONFIG_SYNCLINKMP is not set
617# CONFIG_N_HDLC is not set
618# CONFIG_STALDRV is not set
619
620#
621# Serial drivers
622#
623CONFIG_SERIAL_8250=y
624CONFIG_SERIAL_8250_CONSOLE=y
625CONFIG_SERIAL_8250_ACPI=y
626CONFIG_SERIAL_8250_NR_UARTS=6
627CONFIG_SERIAL_8250_EXTENDED=y
628CONFIG_SERIAL_8250_SHARE_IRQ=y
629# CONFIG_SERIAL_8250_DETECT_IRQ is not set
630# CONFIG_SERIAL_8250_MULTIPORT is not set
631# CONFIG_SERIAL_8250_RSA is not set
632
633#
634# Non-8250 serial port support
635#
636CONFIG_SERIAL_CORE=y
637CONFIG_SERIAL_CORE_CONSOLE=y
638CONFIG_UNIX98_PTYS=y
639CONFIG_LEGACY_PTYS=y
640CONFIG_LEGACY_PTY_COUNT=256
641
642#
643# IPMI
644#
645# CONFIG_IPMI_HANDLER is not set
646
647#
648# Watchdog Cards
649#
650# CONFIG_WATCHDOG is not set
651# CONFIG_HW_RANDOM is not set
652CONFIG_EFI_RTC=y
653# CONFIG_DTLK is not set
654# CONFIG_R3964 is not set
655# CONFIG_APPLICOM is not set
656
657#
658# Ftape, the floppy tape device driver
659#
660CONFIG_AGP=m
661CONFIG_AGP_I460=m
662CONFIG_DRM=m
663CONFIG_DRM_TDFX=m
664CONFIG_DRM_R128=m
665CONFIG_DRM_RADEON=m
666CONFIG_DRM_MGA=m
667CONFIG_DRM_SIS=m
668CONFIG_RAW_DRIVER=m
669CONFIG_HPET=y
670# CONFIG_HPET_RTC_IRQ is not set
671CONFIG_HPET_MMAP=y
672CONFIG_MAX_RAW_DEVS=256
673
674#
675# I2C support
676#
677# CONFIG_I2C is not set
678
679#
680# Dallas's 1-wire bus
681#
682# CONFIG_W1 is not set
683
684#
685# Misc devices
686#
687
688#
689# Multimedia devices
690#
691# CONFIG_VIDEO_DEV is not set
692
693#
694# Digital Video Broadcasting Devices
695#
696# CONFIG_DVB is not set
697
698#
699# Graphics support
700#
701# CONFIG_FB is not set
702
703#
704# Console display driver support
705#
706CONFIG_VGA_CONSOLE=y
707CONFIG_DUMMY_CONSOLE=y
708# CONFIG_BACKLIGHT_LCD_SUPPORT is not set
709
710#
711# Sound
712#
713# CONFIG_SOUND is not set
714
715#
716# USB support
717#
718CONFIG_USB=y
719# CONFIG_USB_DEBUG is not set
720
721#
722# Miscellaneous USB options
723#
724CONFIG_USB_DEVICEFS=y
725# CONFIG_USB_BANDWIDTH is not set
726# CONFIG_USB_DYNAMIC_MINORS is not set
727# CONFIG_USB_SUSPEND is not set
728# CONFIG_USB_OTG is not set
729CONFIG_USB_ARCH_HAS_HCD=y
730CONFIG_USB_ARCH_HAS_OHCI=y
731
732#
733# USB Host Controller Drivers
734#
735CONFIG_USB_EHCI_HCD=m
736# CONFIG_USB_EHCI_SPLIT_ISO is not set
737# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
738CONFIG_USB_OHCI_HCD=m
739CONFIG_USB_UHCI_HCD=y
740# CONFIG_USB_SL811_HCD is not set
741
742#
743# USB Device Class drivers
744#
745# CONFIG_USB_BLUETOOTH_TTY is not set
746# CONFIG_USB_ACM is not set
747# CONFIG_USB_PRINTER is not set
748
749#
750# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
751#
752CONFIG_USB_STORAGE=m
753# CONFIG_USB_STORAGE_DEBUG is not set
754# CONFIG_USB_STORAGE_RW_DETECT is not set
755# CONFIG_USB_STORAGE_DATAFAB is not set
756# CONFIG_USB_STORAGE_FREECOM is not set
757# CONFIG_USB_STORAGE_ISD200 is not set
758# CONFIG_USB_STORAGE_DPCM is not set
759# CONFIG_USB_STORAGE_HP8200e is not set
760# CONFIG_USB_STORAGE_SDDR09 is not set
761# CONFIG_USB_STORAGE_SDDR55 is not set
762# CONFIG_USB_STORAGE_JUMPSHOT is not set
763
764#
765# USB Input Devices
766#
767CONFIG_USB_HID=y
768CONFIG_USB_HIDINPUT=y
769# CONFIG_HID_FF is not set
770# CONFIG_USB_HIDDEV is not set
771# CONFIG_USB_AIPTEK is not set
772# CONFIG_USB_WACOM is not set
773# CONFIG_USB_KBTAB is not set
774# CONFIG_USB_POWERMATE is not set
775# CONFIG_USB_MTOUCH is not set
776# CONFIG_USB_EGALAX is not set
777# CONFIG_USB_XPAD is not set
778# CONFIG_USB_ATI_REMOTE is not set
779
780#
781# USB Imaging devices
782#
783# CONFIG_USB_MDC800 is not set
784# CONFIG_USB_MICROTEK is not set
785
786#
787# USB Multimedia devices
788#
789# CONFIG_USB_DABUSB is not set
790
791#
792# Video4Linux support is needed for USB Multimedia device support
793#
794
795#
796# USB Network Adapters
797#
798# CONFIG_USB_CATC is not set
799# CONFIG_USB_KAWETH is not set
800# CONFIG_USB_PEGASUS is not set
801# CONFIG_USB_RTL8150 is not set
802# CONFIG_USB_USBNET is not set
803
804#
805# USB port drivers
806#
807
808#
809# USB Serial Converter support
810#
811# CONFIG_USB_SERIAL is not set
812
813#
814# USB Miscellaneous drivers
815#
816# CONFIG_USB_EMI62 is not set
817# CONFIG_USB_EMI26 is not set
818# CONFIG_USB_AUERSWALD is not set
819# CONFIG_USB_RIO500 is not set
820# CONFIG_USB_LEGOTOWER is not set
821# CONFIG_USB_LCD is not set
822# CONFIG_USB_LED is not set
823# CONFIG_USB_CYTHERM is not set
824# CONFIG_USB_PHIDGETKIT is not set
825# CONFIG_USB_PHIDGETSERVO is not set
826# CONFIG_USB_IDMOUSE is not set
827# CONFIG_USB_TEST is not set
828
829#
830# USB ATM/DSL drivers
831#
832
833#
834# USB Gadget Support
835#
836# CONFIG_USB_GADGET is not set
837
838#
839# MMC/SD Card support
840#
841# CONFIG_MMC is not set
842
843#
844# InfiniBand support
845#
846# CONFIG_INFINIBAND is not set
847
848#
849# File systems
850#
851CONFIG_EXT2_FS=y
852CONFIG_EXT2_FS_XATTR=y
853CONFIG_EXT2_FS_POSIX_ACL=y
854CONFIG_EXT2_FS_SECURITY=y
855CONFIG_EXT3_FS=y
856CONFIG_EXT3_FS_XATTR=y
857CONFIG_EXT3_FS_POSIX_ACL=y
858CONFIG_EXT3_FS_SECURITY=y
859CONFIG_JBD=y
860# CONFIG_JBD_DEBUG is not set
861CONFIG_FS_MBCACHE=y
862CONFIG_REISERFS_FS=y
863# CONFIG_REISERFS_CHECK is not set
864# CONFIG_REISERFS_PROC_INFO is not set
865CONFIG_REISERFS_FS_XATTR=y
866CONFIG_REISERFS_FS_POSIX_ACL=y
867CONFIG_REISERFS_FS_SECURITY=y
868# CONFIG_JFS_FS is not set
869CONFIG_FS_POSIX_ACL=y
870CONFIG_XFS_FS=y
871# CONFIG_XFS_RT is not set
872# CONFIG_XFS_QUOTA is not set
873# CONFIG_XFS_SECURITY is not set
874# CONFIG_XFS_POSIX_ACL is not set
875# CONFIG_MINIX_FS is not set
876# CONFIG_ROMFS_FS is not set
877# CONFIG_QUOTA is not set
878CONFIG_DNOTIFY=y
879CONFIG_AUTOFS_FS=y
880CONFIG_AUTOFS4_FS=y
881
882#
883# CD-ROM/DVD Filesystems
884#
885CONFIG_ISO9660_FS=m
886CONFIG_JOLIET=y
887# CONFIG_ZISOFS is not set
888CONFIG_UDF_FS=m
889CONFIG_UDF_NLS=y
890
891#
892# DOS/FAT/NT Filesystems
893#
894CONFIG_FAT_FS=y
895# CONFIG_MSDOS_FS is not set
896CONFIG_VFAT_FS=y
897CONFIG_FAT_DEFAULT_CODEPAGE=437
898CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
899CONFIG_NTFS_FS=m
900# CONFIG_NTFS_DEBUG is not set
901# CONFIG_NTFS_RW is not set
902
903#
904# Pseudo filesystems
905#
906CONFIG_PROC_FS=y
907CONFIG_PROC_KCORE=y
908CONFIG_SYSFS=y
909# CONFIG_DEVFS_FS is not set
910# CONFIG_DEVPTS_FS_XATTR is not set
911CONFIG_TMPFS=y
912CONFIG_TMPFS_XATTR=y
913CONFIG_TMPFS_SECURITY=y
914CONFIG_HUGETLBFS=y
915CONFIG_HUGETLB_PAGE=y
916CONFIG_RAMFS=y
917
918#
919# Miscellaneous filesystems
920#
921# CONFIG_ADFS_FS is not set
922# CONFIG_AFFS_FS is not set
923# CONFIG_HFS_FS is not set
924# CONFIG_HFSPLUS_FS is not set
925# CONFIG_BEFS_FS is not set
926# CONFIG_BFS_FS is not set
927# CONFIG_EFS_FS is not set
928# CONFIG_CRAMFS is not set
929# CONFIG_VXFS_FS is not set
930# CONFIG_HPFS_FS is not set
931# CONFIG_QNX4FS_FS is not set
932# CONFIG_SYSV_FS is not set
933# CONFIG_UFS_FS is not set
934
935#
936# Network File Systems
937#
938CONFIG_NFS_FS=m
939CONFIG_NFS_V3=y
940CONFIG_NFS_V4=y
941CONFIG_NFS_DIRECTIO=y
942CONFIG_NFSD=m
943CONFIG_NFSD_V3=y
944CONFIG_NFSD_V4=y
945CONFIG_NFSD_TCP=y
946CONFIG_LOCKD=m
947CONFIG_LOCKD_V4=y
948CONFIG_EXPORTFS=m
949CONFIG_SUNRPC=m
950CONFIG_SUNRPC_GSS=m
951CONFIG_RPCSEC_GSS_KRB5=m
952# CONFIG_RPCSEC_GSS_SPKM3 is not set
953CONFIG_SMB_FS=m
954CONFIG_SMB_NLS_DEFAULT=y
955CONFIG_SMB_NLS_REMOTE="cp437"
956CONFIG_CIFS=m
957# CONFIG_CIFS_STATS is not set
958# CONFIG_CIFS_XATTR is not set
959# CONFIG_CIFS_EXPERIMENTAL is not set
960# CONFIG_NCP_FS is not set
961# CONFIG_CODA_FS is not set
962# CONFIG_AFS_FS is not set
963
964#
965# Partition Types
966#
967CONFIG_PARTITION_ADVANCED=y
968# CONFIG_ACORN_PARTITION is not set
969# CONFIG_OSF_PARTITION is not set
970# CONFIG_AMIGA_PARTITION is not set
971# CONFIG_ATARI_PARTITION is not set
972# CONFIG_MAC_PARTITION is not set
973CONFIG_MSDOS_PARTITION=y
974# CONFIG_BSD_DISKLABEL is not set
975# CONFIG_MINIX_SUBPARTITION is not set
976# CONFIG_SOLARIS_X86_PARTITION is not set
977# CONFIG_UNIXWARE_DISKLABEL is not set
978# CONFIG_LDM_PARTITION is not set
979CONFIG_SGI_PARTITION=y
980# CONFIG_ULTRIX_PARTITION is not set
981# CONFIG_SUN_PARTITION is not set
982CONFIG_EFI_PARTITION=y
983
984#
985# Native Language Support
986#
987CONFIG_NLS=y
988CONFIG_NLS_DEFAULT="iso8859-1"
989CONFIG_NLS_CODEPAGE_437=y
990CONFIG_NLS_CODEPAGE_737=m
991CONFIG_NLS_CODEPAGE_775=m
992CONFIG_NLS_CODEPAGE_850=m
993CONFIG_NLS_CODEPAGE_852=m
994CONFIG_NLS_CODEPAGE_855=m
995CONFIG_NLS_CODEPAGE_857=m
996CONFIG_NLS_CODEPAGE_860=m
997CONFIG_NLS_CODEPAGE_861=m
998CONFIG_NLS_CODEPAGE_862=m
999CONFIG_NLS_CODEPAGE_863=m
1000CONFIG_NLS_CODEPAGE_864=m
1001CONFIG_NLS_CODEPAGE_865=m
1002CONFIG_NLS_CODEPAGE_866=m
1003CONFIG_NLS_CODEPAGE_869=m
1004CONFIG_NLS_CODEPAGE_936=m
1005CONFIG_NLS_CODEPAGE_950=m
1006CONFIG_NLS_CODEPAGE_932=m
1007CONFIG_NLS_CODEPAGE_949=m
1008CONFIG_NLS_CODEPAGE_874=m
1009CONFIG_NLS_ISO8859_8=m
1010CONFIG_NLS_CODEPAGE_1250=m
1011CONFIG_NLS_CODEPAGE_1251=m
1012# CONFIG_NLS_ASCII is not set
1013CONFIG_NLS_ISO8859_1=y
1014CONFIG_NLS_ISO8859_2=m
1015CONFIG_NLS_ISO8859_3=m
1016CONFIG_NLS_ISO8859_4=m
1017CONFIG_NLS_ISO8859_5=m
1018CONFIG_NLS_ISO8859_6=m
1019CONFIG_NLS_ISO8859_7=m
1020CONFIG_NLS_ISO8859_9=m
1021CONFIG_NLS_ISO8859_13=m
1022CONFIG_NLS_ISO8859_14=m
1023CONFIG_NLS_ISO8859_15=m
1024CONFIG_NLS_KOI8_R=m
1025CONFIG_NLS_KOI8_U=m
1026CONFIG_NLS_UTF8=m
1027
1028#
1029# Library routines
1030#
1031# CONFIG_CRC_CCITT is not set
1032CONFIG_CRC32=y
1033# CONFIG_LIBCRC32C is not set
1034CONFIG_GENERIC_HARDIRQS=y
1035CONFIG_GENERIC_IRQ_PROBE=y
1036
1037#
1038# Profiling support
1039#
1040# CONFIG_PROFILING is not set
1041
1042#
1043# Kernel hacking
1044#
1045CONFIG_DEBUG_KERNEL=y
1046CONFIG_MAGIC_SYSRQ=y
1047# CONFIG_SCHEDSTATS is not set
1048# CONFIG_DEBUG_SLAB is not set
1049# CONFIG_DEBUG_SPINLOCK is not set
1050# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
1051# CONFIG_DEBUG_KOBJECT is not set
1052# CONFIG_DEBUG_INFO is not set
1053# CONFIG_DEBUG_FS is not set
1054CONFIG_IA64_GRANULE_16MB=y
1055# CONFIG_IA64_GRANULE_64MB is not set
1056# CONFIG_IA64_PRINT_HAZARDS is not set
1057# CONFIG_DISABLE_VHPT is not set
1058# CONFIG_IA64_DEBUG_CMPXCHG is not set
1059# CONFIG_IA64_DEBUG_IRQ is not set
1060CONFIG_SYSVIPC_COMPAT=y
1061
1062#
1063# Security options
1064#
1065# CONFIG_KEYS is not set
1066# CONFIG_SECURITY is not set
1067
1068#
1069# Cryptographic options
1070#
1071CONFIG_CRYPTO=y
1072# CONFIG_CRYPTO_HMAC is not set
1073# CONFIG_CRYPTO_NULL is not set
1074# CONFIG_CRYPTO_MD4 is not set
1075CONFIG_CRYPTO_MD5=m
1076# CONFIG_CRYPTO_SHA1 is not set
1077# CONFIG_CRYPTO_SHA256 is not set
1078# CONFIG_CRYPTO_SHA512 is not set
1079# CONFIG_CRYPTO_WP512 is not set
1080CONFIG_CRYPTO_DES=m
1081# CONFIG_CRYPTO_BLOWFISH is not set
1082# CONFIG_CRYPTO_TWOFISH is not set
1083# CONFIG_CRYPTO_SERPENT is not set
1084# CONFIG_CRYPTO_AES is not set
1085# CONFIG_CRYPTO_CAST5 is not set
1086# CONFIG_CRYPTO_CAST6 is not set
1087# CONFIG_CRYPTO_TEA is not set
1088# CONFIG_CRYPTO_ARC4 is not set
1089# CONFIG_CRYPTO_KHAZAD is not set
1090# CONFIG_CRYPTO_ANUBIS is not set
1091# CONFIG_CRYPTO_DEFLATE is not set
1092# CONFIG_CRYPTO_MICHAEL_MIC is not set
1093# CONFIG_CRYPTO_CRC32C is not set
1094# CONFIG_CRYPTO_TEST is not set
1095
1096#
1097# Hardware crypto devices
1098#
diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig
new file mode 100644
index 000000000000..21d6f9bab5e9
--- /dev/null
+++ b/arch/ia64/configs/zx1_defconfig
@@ -0,0 +1,1273 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.10
4# Wed Dec 29 09:05:48 2004
5#
6
7#
8# Code maturity level options
9#
10CONFIG_EXPERIMENTAL=y
11# CONFIG_CLEAN_COMPILE is not set
12CONFIG_BROKEN=y
13CONFIG_BROKEN_ON_SMP=y
14CONFIG_LOCK_KERNEL=y
15
16#
17# General setup
18#
19CONFIG_LOCALVERSION=""
20CONFIG_SWAP=y
21CONFIG_SYSVIPC=y
22# CONFIG_POSIX_MQUEUE is not set
23CONFIG_BSD_PROCESS_ACCT=y
24# CONFIG_BSD_PROCESS_ACCT_V3 is not set
25CONFIG_SYSCTL=y
26# CONFIG_AUDIT is not set
27CONFIG_LOG_BUF_SHIFT=17
28CONFIG_HOTPLUG=y
29CONFIG_KOBJECT_UEVENT=y
30# CONFIG_IKCONFIG is not set
31# CONFIG_EMBEDDED is not set
32CONFIG_KALLSYMS=y
33# CONFIG_KALLSYMS_ALL is not set
34# CONFIG_KALLSYMS_EXTRA_PASS is not set
35CONFIG_FUTEX=y
36CONFIG_EPOLL=y
37# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
38CONFIG_SHMEM=y
39CONFIG_CC_ALIGN_FUNCTIONS=0
40CONFIG_CC_ALIGN_LABELS=0
41CONFIG_CC_ALIGN_LOOPS=0
42CONFIG_CC_ALIGN_JUMPS=0
43# CONFIG_TINY_SHMEM is not set
44
45#
46# Loadable module support
47#
48CONFIG_MODULES=y
49# CONFIG_MODULE_UNLOAD is not set
50CONFIG_OBSOLETE_MODPARM=y
51# CONFIG_MODVERSIONS is not set
52# CONFIG_MODULE_SRCVERSION_ALL is not set
53# CONFIG_KMOD is not set
54
55#
56# Processor type and features
57#
58CONFIG_IA64=y
59CONFIG_64BIT=y
60CONFIG_MMU=y
61CONFIG_RWSEM_XCHGADD_ALGORITHM=y
62CONFIG_TIME_INTERPOLATION=y
63CONFIG_EFI=y
64CONFIG_GENERIC_IOMAP=y
65# CONFIG_IA64_GENERIC is not set
66# CONFIG_IA64_DIG is not set
67CONFIG_IA64_HP_ZX1=y
68# CONFIG_IA64_SGI_SN2 is not set
69# CONFIG_IA64_HP_SIM is not set
70# CONFIG_ITANIUM is not set
71CONFIG_MCKINLEY=y
72# CONFIG_IA64_PAGE_SIZE_4KB is not set
73# CONFIG_IA64_PAGE_SIZE_8KB is not set
74CONFIG_IA64_PAGE_SIZE_16KB=y
75# CONFIG_IA64_PAGE_SIZE_64KB is not set
76CONFIG_IA64_L1_CACHE_SHIFT=7
77# CONFIG_NUMA is not set
78CONFIG_VIRTUAL_MEM_MAP=y
79# CONFIG_IA64_CYCLONE is not set
80CONFIG_IOSAPIC=y
81CONFIG_FORCE_MAX_ZONEORDER=18
82CONFIG_SMP=y
83CONFIG_NR_CPUS=16
84# CONFIG_HOTPLUG_CPU is not set
85# CONFIG_PREEMPT is not set
86CONFIG_HAVE_DEC_LOCK=y
87CONFIG_IA32_SUPPORT=y
88CONFIG_COMPAT=y
89CONFIG_IA64_MCA_RECOVERY=y
90CONFIG_PERFMON=y
91CONFIG_IA64_PALINFO=y
92
93#
94# Firmware Drivers
95#
96CONFIG_EFI_VARS=y
97CONFIG_EFI_PCDP=y
98CONFIG_BINFMT_ELF=y
99CONFIG_BINFMT_MISC=y
100
101#
102# Power management and ACPI
103#
104CONFIG_PM=y
105CONFIG_ACPI=y
106
107#
108# ACPI (Advanced Configuration and Power Interface) Support
109#
110CONFIG_ACPI_BOOT=y
111CONFIG_ACPI_INTERPRETER=y
112CONFIG_ACPI_BUTTON=y
113CONFIG_ACPI_VIDEO=m
114CONFIG_ACPI_FAN=y
115CONFIG_ACPI_PROCESSOR=y
116CONFIG_ACPI_THERMAL=y
117CONFIG_ACPI_BLACKLIST_YEAR=0
118# CONFIG_ACPI_DEBUG is not set
119CONFIG_ACPI_BUS=y
120CONFIG_ACPI_POWER=y
121CONFIG_ACPI_PCI=y
122CONFIG_ACPI_SYSTEM=y
123
124#
125# Bus options (PCI, PCMCIA)
126#
127CONFIG_PCI=y
128CONFIG_PCI_DOMAINS=y
129# CONFIG_PCI_MSI is not set
130CONFIG_PCI_LEGACY_PROC=y
131CONFIG_PCI_NAMES=y
132
133#
134# PCI Hotplug Support
135#
136CONFIG_HOTPLUG_PCI=y
137# CONFIG_HOTPLUG_PCI_FAKE is not set
138CONFIG_HOTPLUG_PCI_ACPI=y
139# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
140# CONFIG_HOTPLUG_PCI_CPCI is not set
141# CONFIG_HOTPLUG_PCI_PCIE is not set
142# CONFIG_HOTPLUG_PCI_SHPC is not set
143
144#
145# PCCARD (PCMCIA/CardBus) support
146#
147# CONFIG_PCCARD is not set
148
149#
150# PC-card bridges
151#
152
153#
154# Device Drivers
155#
156
157#
158# Generic Driver Options
159#
160CONFIG_STANDALONE=y
161CONFIG_PREVENT_FIRMWARE_BUILD=y
162# CONFIG_FW_LOADER is not set
163# CONFIG_DEBUG_DRIVER is not set
164
165#
166# Memory Technology Devices (MTD)
167#
168# CONFIG_MTD is not set
169
170#
171# Parallel port support
172#
173# CONFIG_PARPORT is not set
174
175#
176# Plug and Play support
177#
178# CONFIG_PNP is not set
179
180#
181# Block devices
182#
183# CONFIG_BLK_CPQ_DA is not set
184# CONFIG_BLK_CPQ_CISS_DA is not set
185# CONFIG_BLK_DEV_DAC960 is not set
186# CONFIG_BLK_DEV_UMEM is not set
187CONFIG_BLK_DEV_LOOP=y
188# CONFIG_BLK_DEV_CRYPTOLOOP is not set
189# CONFIG_BLK_DEV_NBD is not set
190# CONFIG_BLK_DEV_SX8 is not set
191# CONFIG_BLK_DEV_UB is not set
192CONFIG_BLK_DEV_RAM=y
193CONFIG_BLK_DEV_RAM_COUNT=16
194CONFIG_BLK_DEV_RAM_SIZE=4096
195CONFIG_BLK_DEV_INITRD=y
196CONFIG_INITRAMFS_SOURCE=""
197# CONFIG_CDROM_PKTCDVD is not set
198
199#
200# IO Schedulers
201#
202CONFIG_IOSCHED_NOOP=y
203CONFIG_IOSCHED_AS=y
204CONFIG_IOSCHED_DEADLINE=y
205CONFIG_IOSCHED_CFQ=y
206
207#
208# ATA/ATAPI/MFM/RLL support
209#
210CONFIG_IDE=y
211CONFIG_BLK_DEV_IDE=y
212
213#
214# Please see Documentation/ide.txt for help/info on IDE drives
215#
216# CONFIG_BLK_DEV_IDE_SATA is not set
217CONFIG_BLK_DEV_IDEDISK=y
218# CONFIG_IDEDISK_MULTI_MODE is not set
219CONFIG_BLK_DEV_IDECD=y
220# CONFIG_BLK_DEV_IDETAPE is not set
221# CONFIG_BLK_DEV_IDEFLOPPY is not set
222# CONFIG_BLK_DEV_IDESCSI is not set
223# CONFIG_IDE_TASK_IOCTL is not set
224
225#
226# IDE chipset support/bugfixes
227#
228CONFIG_IDE_GENERIC=y
229CONFIG_BLK_DEV_IDEPCI=y
230CONFIG_IDEPCI_SHARE_IRQ=y
231# CONFIG_BLK_DEV_OFFBOARD is not set
232CONFIG_BLK_DEV_GENERIC=y
233# CONFIG_BLK_DEV_OPTI621 is not set
234CONFIG_BLK_DEV_IDEDMA_PCI=y
235# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
236# CONFIG_IDEDMA_PCI_AUTO is not set
237# CONFIG_BLK_DEV_AEC62XX is not set
238# CONFIG_BLK_DEV_ALI15X3 is not set
239# CONFIG_BLK_DEV_AMD74XX is not set
240CONFIG_BLK_DEV_CMD64X=y
241# CONFIG_BLK_DEV_TRIFLEX is not set
242# CONFIG_BLK_DEV_CY82C693 is not set
243# CONFIG_BLK_DEV_CS5520 is not set
244# CONFIG_BLK_DEV_CS5530 is not set
245# CONFIG_BLK_DEV_HPT34X is not set
246# CONFIG_BLK_DEV_HPT366 is not set
247# CONFIG_BLK_DEV_SC1200 is not set
248# CONFIG_BLK_DEV_PIIX is not set
249# CONFIG_BLK_DEV_NS87415 is not set
250# CONFIG_BLK_DEV_PDC202XX_OLD is not set
251# CONFIG_BLK_DEV_PDC202XX_NEW is not set
252# CONFIG_BLK_DEV_SVWKS is not set
253# CONFIG_BLK_DEV_SIIMAGE is not set
254# CONFIG_BLK_DEV_SLC90E66 is not set
255# CONFIG_BLK_DEV_TRM290 is not set
256# CONFIG_BLK_DEV_VIA82CXXX is not set
257# CONFIG_IDE_ARM is not set
258CONFIG_BLK_DEV_IDEDMA=y
259# CONFIG_IDEDMA_IVB is not set
260# CONFIG_IDEDMA_AUTO is not set
261# CONFIG_BLK_DEV_HD is not set
262
263#
264# SCSI device support
265#
266CONFIG_SCSI=y
267CONFIG_SCSI_PROC_FS=y
268
269#
270# SCSI support type (disk, tape, CD-ROM)
271#
272CONFIG_BLK_DEV_SD=y
273CONFIG_CHR_DEV_ST=y
274CONFIG_CHR_DEV_OSST=y
275CONFIG_BLK_DEV_SR=y
276CONFIG_BLK_DEV_SR_VENDOR=y
277CONFIG_CHR_DEV_SG=y
278
279#
280# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
281#
282CONFIG_SCSI_MULTI_LUN=y
283CONFIG_SCSI_CONSTANTS=y
284CONFIG_SCSI_LOGGING=y
285
286#
287# SCSI Transport Attributes
288#
289CONFIG_SCSI_SPI_ATTRS=y
290# CONFIG_SCSI_FC_ATTRS is not set
291
292#
293# SCSI low-level drivers
294#
295# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
296# CONFIG_SCSI_3W_9XXX is not set
297# CONFIG_SCSI_ACARD is not set
298# CONFIG_SCSI_AACRAID is not set
299# CONFIG_SCSI_AIC7XXX is not set
300# CONFIG_SCSI_AIC7XXX_OLD is not set
301# CONFIG_SCSI_AIC79XX is not set
302# CONFIG_SCSI_ADVANSYS is not set
303# CONFIG_MEGARAID_NEWGEN is not set
304# CONFIG_MEGARAID_LEGACY is not set
305# CONFIG_SCSI_SATA is not set
306# CONFIG_SCSI_BUSLOGIC is not set
307# CONFIG_SCSI_CPQFCTS is not set
308# CONFIG_SCSI_DMX3191D is not set
309# CONFIG_SCSI_EATA is not set
310# CONFIG_SCSI_EATA_PIO is not set
311# CONFIG_SCSI_FUTURE_DOMAIN is not set
312# CONFIG_SCSI_GDTH is not set
313# CONFIG_SCSI_IPS is not set
314# CONFIG_SCSI_INITIO is not set
315# CONFIG_SCSI_INIA100 is not set
316CONFIG_SCSI_SYM53C8XX_2=y
317CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
318CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
319CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
320# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
321# CONFIG_SCSI_IPR is not set
322# CONFIG_SCSI_PCI2000 is not set
323# CONFIG_SCSI_PCI2220I is not set
324# CONFIG_SCSI_QLOGIC_ISP is not set
325# CONFIG_SCSI_QLOGIC_FC is not set
326CONFIG_SCSI_QLOGIC_1280=y
327# CONFIG_SCSI_QLOGIC_1280_1040 is not set
328CONFIG_SCSI_QLA2XXX=y
329# CONFIG_SCSI_QLA21XX is not set
330# CONFIG_SCSI_QLA22XX is not set
331# CONFIG_SCSI_QLA2300 is not set
332# CONFIG_SCSI_QLA2322 is not set
333# CONFIG_SCSI_QLA6312 is not set
334# CONFIG_SCSI_QLA6322 is not set
335# CONFIG_SCSI_DC395x is not set
336# CONFIG_SCSI_DC390T is not set
337# CONFIG_SCSI_DEBUG is not set
338
339#
340# Multi-device support (RAID and LVM)
341#
342# CONFIG_MD is not set
343
344#
345# Fusion MPT device support
346#
347CONFIG_FUSION=y
348CONFIG_FUSION_MAX_SGE=40
349# CONFIG_FUSION_CTL is not set
350
351#
352# IEEE 1394 (FireWire) support
353#
354# CONFIG_IEEE1394 is not set
355
356#
357# I2O device support
358#
359# CONFIG_I2O is not set
360
361#
362# Networking support
363#
364CONFIG_NET=y
365
366#
367# Networking options
368#
369CONFIG_PACKET=y
370# CONFIG_PACKET_MMAP is not set
371# CONFIG_NETLINK_DEV is not set
372CONFIG_UNIX=y
373# CONFIG_NET_KEY is not set
374CONFIG_INET=y
375CONFIG_IP_MULTICAST=y
376# CONFIG_IP_ADVANCED_ROUTER is not set
377# CONFIG_IP_PNP is not set
378# CONFIG_NET_IPIP is not set
379# CONFIG_NET_IPGRE is not set
380# CONFIG_IP_MROUTE is not set
381# CONFIG_ARPD is not set
382# CONFIG_SYN_COOKIES is not set
383# CONFIG_INET_AH is not set
384# CONFIG_INET_ESP is not set
385# CONFIG_INET_IPCOMP is not set
386# CONFIG_INET_TUNNEL is not set
387# CONFIG_IP_TCPDIAG is not set
388# CONFIG_IP_TCPDIAG_IPV6 is not set
389
390#
391# IP: Virtual Server Configuration
392#
393# CONFIG_IP_VS is not set
394# CONFIG_IPV6 is not set
395CONFIG_NETFILTER=y
396# CONFIG_NETFILTER_DEBUG is not set
397
398#
399# IP: Netfilter Configuration
400#
401# CONFIG_IP_NF_CONNTRACK is not set
402# CONFIG_IP_NF_CONNTRACK_MARK is not set
403# CONFIG_IP_NF_QUEUE is not set
404# CONFIG_IP_NF_IPTABLES is not set
405CONFIG_IP_NF_ARPTABLES=y
406# CONFIG_IP_NF_ARPFILTER is not set
407# CONFIG_IP_NF_ARP_MANGLE is not set
408# CONFIG_IP_NF_COMPAT_IPCHAINS is not set
409# CONFIG_IP_NF_COMPAT_IPFWADM is not set
410
411#
412# SCTP Configuration (EXPERIMENTAL)
413#
414# CONFIG_IP_SCTP is not set
415# CONFIG_ATM is not set
416# CONFIG_BRIDGE is not set
417# CONFIG_VLAN_8021Q is not set
418# CONFIG_DECNET is not set
419# CONFIG_LLC2 is not set
420# CONFIG_IPX is not set
421# CONFIG_ATALK is not set
422# CONFIG_X25 is not set
423# CONFIG_LAPB is not set
424# CONFIG_NET_DIVERT is not set
425# CONFIG_ECONET is not set
426# CONFIG_WAN_ROUTER is not set
427
428#
429# QoS and/or fair queueing
430#
431# CONFIG_NET_SCHED is not set
432# CONFIG_NET_CLS_ROUTE is not set
433
434#
435# Network testing
436#
437# CONFIG_NET_PKTGEN is not set
438# CONFIG_NETPOLL is not set
439# CONFIG_NET_POLL_CONTROLLER is not set
440# CONFIG_HAMRADIO is not set
441# CONFIG_IRDA is not set
442# CONFIG_BT is not set
443CONFIG_NETDEVICES=y
444CONFIG_DUMMY=y
445# CONFIG_BONDING is not set
446# CONFIG_EQUALIZER is not set
447# CONFIG_TUN is not set
448
449#
450# ARCnet devices
451#
452# CONFIG_ARCNET is not set
453
454#
455# Ethernet (10 or 100Mbit)
456#
457CONFIG_NET_ETHERNET=y
458CONFIG_MII=y
459# CONFIG_HAPPYMEAL is not set
460# CONFIG_SUNGEM is not set
461# CONFIG_NET_VENDOR_3COM is not set
462
463#
464# Tulip family network device support
465#
466CONFIG_NET_TULIP=y
467# CONFIG_DE2104X is not set
468CONFIG_TULIP=y
469CONFIG_TULIP_MWI=y
470CONFIG_TULIP_MMIO=y
471CONFIG_TULIP_NAPI=y
472CONFIG_TULIP_NAPI_HW_MITIGATION=y
473# CONFIG_DE4X5 is not set
474# CONFIG_WINBOND_840 is not set
475# CONFIG_DM9102 is not set
476# CONFIG_HP100 is not set
477CONFIG_NET_PCI=y
478# CONFIG_PCNET32 is not set
479# CONFIG_AMD8111_ETH is not set
480# CONFIG_ADAPTEC_STARFIRE is not set
481# CONFIG_B44 is not set
482# CONFIG_FORCEDETH is not set
483# CONFIG_DGRS is not set
484# CONFIG_EEPRO100 is not set
485CONFIG_E100=y
486# CONFIG_E100_NAPI is not set
487# CONFIG_FEALNX is not set
488# CONFIG_NATSEMI is not set
489# CONFIG_NE2K_PCI is not set
490# CONFIG_8139CP is not set
491# CONFIG_8139TOO is not set
492# CONFIG_SIS900 is not set
493# CONFIG_EPIC100 is not set
494# CONFIG_SUNDANCE is not set
495# CONFIG_VIA_RHINE is not set
496
497#
498# Ethernet (1000 Mbit)
499#
500# CONFIG_ACENIC is not set
501# CONFIG_DL2K is not set
502CONFIG_E1000=y
503# CONFIG_E1000_NAPI is not set
504# CONFIG_NS83820 is not set
505# CONFIG_HAMACHI is not set
506# CONFIG_YELLOWFIN is not set
507# CONFIG_R8169 is not set
508# CONFIG_SK98LIN is not set
509# CONFIG_VIA_VELOCITY is not set
510CONFIG_TIGON3=y
511
512#
513# Ethernet (10000 Mbit)
514#
515# CONFIG_IXGB is not set
516# CONFIG_S2IO is not set
517
518#
519# Token Ring devices
520#
521# CONFIG_TR is not set
522
523#
524# Wireless LAN (non-hamradio)
525#
526# CONFIG_NET_RADIO is not set
527
528#
529# Wan interfaces
530#
531# CONFIG_WAN is not set
532# CONFIG_FDDI is not set
533# CONFIG_HIPPI is not set
534# CONFIG_PPP is not set
535# CONFIG_SLIP is not set
536# CONFIG_NET_FC is not set
537# CONFIG_SHAPER is not set
538# CONFIG_NETCONSOLE is not set
539
540#
541# ISDN subsystem
542#
543# CONFIG_ISDN is not set
544
545#
546# Telephony Support
547#
548# CONFIG_PHONE is not set
549
550#
551# Input device support
552#
553CONFIG_INPUT=y
554
555#
556# Userland interfaces
557#
558CONFIG_INPUT_MOUSEDEV=y
559CONFIG_INPUT_MOUSEDEV_PSAUX=y
560CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
561CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
562CONFIG_INPUT_JOYDEV=y
563# CONFIG_INPUT_TSDEV is not set
564CONFIG_INPUT_EVDEV=y
565# CONFIG_INPUT_EVBUG is not set
566
567#
568# Input I/O drivers
569#
570# CONFIG_GAMEPORT is not set
571CONFIG_SOUND_GAMEPORT=y
572CONFIG_SERIO=y
573# CONFIG_SERIO_I8042 is not set
574# CONFIG_SERIO_SERPORT is not set
575# CONFIG_SERIO_CT82C710 is not set
576# CONFIG_SERIO_PCIPS2 is not set
577# CONFIG_SERIO_RAW is not set
578
579#
580# Input Device Drivers
581#
582# CONFIG_INPUT_KEYBOARD is not set
583# CONFIG_INPUT_MOUSE is not set
584# CONFIG_INPUT_JOYSTICK is not set
585# CONFIG_INPUT_TOUCHSCREEN is not set
586# CONFIG_INPUT_MISC is not set
587
588#
589# Character devices
590#
591CONFIG_VT=y
592CONFIG_VT_CONSOLE=y
593CONFIG_HW_CONSOLE=y
594# CONFIG_SERIAL_NONSTANDARD is not set
595
596#
597# Serial drivers
598#
599CONFIG_SERIAL_8250=y
600CONFIG_SERIAL_8250_CONSOLE=y
601CONFIG_SERIAL_8250_ACPI=y
602CONFIG_SERIAL_8250_NR_UARTS=8
603CONFIG_SERIAL_8250_EXTENDED=y
604CONFIG_SERIAL_8250_SHARE_IRQ=y
605# CONFIG_SERIAL_8250_DETECT_IRQ is not set
606# CONFIG_SERIAL_8250_MULTIPORT is not set
607# CONFIG_SERIAL_8250_RSA is not set
608
609#
610# Non-8250 serial port support
611#
612CONFIG_SERIAL_CORE=y
613CONFIG_SERIAL_CORE_CONSOLE=y
614CONFIG_UNIX98_PTYS=y
615CONFIG_LEGACY_PTYS=y
616CONFIG_LEGACY_PTY_COUNT=256
617
618#
619# IPMI
620#
621# CONFIG_IPMI_HANDLER is not set
622
623#
624# Watchdog Cards
625#
626# CONFIG_WATCHDOG is not set
627# CONFIG_HW_RANDOM is not set
628CONFIG_EFI_RTC=y
629# CONFIG_DTLK is not set
630# CONFIG_R3964 is not set
631# CONFIG_APPLICOM is not set
632
633#
634# Ftape, the floppy tape device driver
635#
636CONFIG_AGP=y
637CONFIG_AGP_HP_ZX1=y
638CONFIG_DRM=y
639# CONFIG_DRM_TDFX is not set
640# CONFIG_DRM_GAMMA is not set
641# CONFIG_DRM_R128 is not set
642CONFIG_DRM_RADEON=y
643# CONFIG_DRM_MGA is not set
644# CONFIG_DRM_SIS is not set
645# CONFIG_RAW_DRIVER is not set
646# CONFIG_HPET is not set
647
648#
649# I2C support
650#
651CONFIG_I2C=y
652CONFIG_I2C_CHARDEV=y
653
654#
655# I2C Algorithms
656#
657CONFIG_I2C_ALGOBIT=y
658CONFIG_I2C_ALGOPCF=y
659# CONFIG_I2C_ALGOPCA is not set
660
661#
662# I2C Hardware Bus support
663#
664# CONFIG_I2C_ALI1535 is not set
665# CONFIG_I2C_ALI1563 is not set
666# CONFIG_I2C_ALI15X3 is not set
667# CONFIG_I2C_AMD756 is not set
668# CONFIG_I2C_AMD8111 is not set
669# CONFIG_I2C_I801 is not set
670# CONFIG_I2C_I810 is not set
671# CONFIG_I2C_ISA is not set
672# CONFIG_I2C_NFORCE2 is not set
673# CONFIG_I2C_PARPORT_LIGHT is not set
674# CONFIG_I2C_PROSAVAGE is not set
675# CONFIG_I2C_SAVAGE4 is not set
676# CONFIG_SCx200_ACB is not set
677# CONFIG_I2C_SIS5595 is not set
678# CONFIG_I2C_SIS630 is not set
679# CONFIG_I2C_SIS96X is not set
680# CONFIG_I2C_STUB is not set
681# CONFIG_I2C_VIA is not set
682# CONFIG_I2C_VIAPRO is not set
683# CONFIG_I2C_VOODOO3 is not set
684# CONFIG_I2C_PCA_ISA is not set
685
686#
687# Hardware Sensors Chip support
688#
689# CONFIG_I2C_SENSOR is not set
690# CONFIG_SENSORS_ADM1021 is not set
691# CONFIG_SENSORS_ADM1025 is not set
692# CONFIG_SENSORS_ADM1026 is not set
693# CONFIG_SENSORS_ADM1031 is not set
694# CONFIG_SENSORS_ASB100 is not set
695# CONFIG_SENSORS_DS1621 is not set
696# CONFIG_SENSORS_FSCHER is not set
697# CONFIG_SENSORS_GL518SM is not set
698# CONFIG_SENSORS_IT87 is not set
699# CONFIG_SENSORS_LM63 is not set
700# CONFIG_SENSORS_LM75 is not set
701# CONFIG_SENSORS_LM77 is not set
702# CONFIG_SENSORS_LM78 is not set
703# CONFIG_SENSORS_LM80 is not set
704# CONFIG_SENSORS_LM83 is not set
705# CONFIG_SENSORS_LM85 is not set
706# CONFIG_SENSORS_LM87 is not set
707# CONFIG_SENSORS_LM90 is not set
708# CONFIG_SENSORS_MAX1619 is not set
709# CONFIG_SENSORS_PC87360 is not set
710# CONFIG_SENSORS_SMSC47M1 is not set
711# CONFIG_SENSORS_VIA686A is not set
712# CONFIG_SENSORS_W83781D is not set
713# CONFIG_SENSORS_W83L785TS is not set
714# CONFIG_SENSORS_W83627HF is not set
715
716#
717# Other I2C Chip support
718#
719# CONFIG_SENSORS_EEPROM is not set
720# CONFIG_SENSORS_PCF8574 is not set
721# CONFIG_SENSORS_PCF8591 is not set
722# CONFIG_SENSORS_RTC8564 is not set
723# CONFIG_I2C_DEBUG_CORE is not set
724# CONFIG_I2C_DEBUG_ALGO is not set
725# CONFIG_I2C_DEBUG_BUS is not set
726# CONFIG_I2C_DEBUG_CHIP is not set
727
728#
729# Dallas's 1-wire bus
730#
731# CONFIG_W1 is not set
732
733#
734# Misc devices
735#
736
737#
738# Multimedia devices
739#
740CONFIG_VIDEO_DEV=y
741
742#
743# Video For Linux
744#
745
746#
747# Video Adapters
748#
749# CONFIG_VIDEO_BT848 is not set
750# CONFIG_VIDEO_CPIA is not set
751# CONFIG_VIDEO_SAA5246A is not set
752# CONFIG_VIDEO_SAA5249 is not set
753# CONFIG_TUNER_3036 is not set
754# CONFIG_VIDEO_STRADIS is not set
755# CONFIG_VIDEO_ZORAN is not set
756# CONFIG_VIDEO_ZR36120 is not set
757# CONFIG_VIDEO_SAA7134 is not set
758# CONFIG_VIDEO_MXB is not set
759# CONFIG_VIDEO_DPC is not set
760# CONFIG_VIDEO_HEXIUM_ORION is not set
761# CONFIG_VIDEO_HEXIUM_GEMINI is not set
762# CONFIG_VIDEO_CX88 is not set
763# CONFIG_VIDEO_OVCAMCHIP is not set
764
765#
766# Radio Adapters
767#
768# CONFIG_RADIO_GEMTEK_PCI is not set
769# CONFIG_RADIO_MAXIRADIO is not set
770# CONFIG_RADIO_MAESTRO is not set
771
772#
773# Digital Video Broadcasting Devices
774#
775# CONFIG_DVB is not set
776
777#
778# Graphics support
779#
780CONFIG_FB=y
781CONFIG_FB_MODE_HELPERS=y
782# CONFIG_FB_TILEBLITTING is not set
783# CONFIG_FB_CIRRUS is not set
784# CONFIG_FB_PM2 is not set
785# CONFIG_FB_CYBER2000 is not set
786# CONFIG_FB_ASILIANT is not set
787# CONFIG_FB_IMSTT is not set
788# CONFIG_FB_RIVA is not set
789# CONFIG_FB_MATROX is not set
790# CONFIG_FB_RADEON_OLD is not set
791CONFIG_FB_RADEON=y
792CONFIG_FB_RADEON_I2C=y
793CONFIG_FB_RADEON_DEBUG=y
794# CONFIG_FB_ATY128 is not set
795# CONFIG_FB_ATY is not set
796# CONFIG_FB_SAVAGE is not set
797# CONFIG_FB_SIS is not set
798# CONFIG_FB_NEOMAGIC is not set
799# CONFIG_FB_KYRO is not set
800# CONFIG_FB_3DFX is not set
801# CONFIG_FB_VOODOO1 is not set
802# CONFIG_FB_TRIDENT is not set
803# CONFIG_FB_PM3 is not set
804# CONFIG_FB_VIRTUAL is not set
805
806#
807# Console display driver support
808#
809CONFIG_VGA_CONSOLE=y
810CONFIG_DUMMY_CONSOLE=y
811CONFIG_FRAMEBUFFER_CONSOLE=y
812# CONFIG_FONTS is not set
813CONFIG_FONT_8x8=y
814CONFIG_FONT_8x16=y
815
816#
817# Logo configuration
818#
819CONFIG_LOGO=y
820# CONFIG_LOGO_LINUX_MONO is not set
821# CONFIG_LOGO_LINUX_VGA16 is not set
822CONFIG_LOGO_LINUX_CLUT224=y
823
824#
825# Sound
826#
827CONFIG_SOUND=y
828
829#
830# Advanced Linux Sound Architecture
831#
832CONFIG_SND=y
833CONFIG_SND_TIMER=y
834CONFIG_SND_PCM=y
835CONFIG_SND_HWDEP=y
836CONFIG_SND_RAWMIDI=y
837CONFIG_SND_SEQUENCER=y
838# CONFIG_SND_SEQ_DUMMY is not set
839CONFIG_SND_OSSEMUL=y
840CONFIG_SND_MIXER_OSS=y
841CONFIG_SND_PCM_OSS=y
842CONFIG_SND_SEQUENCER_OSS=y
843# CONFIG_SND_VERBOSE_PRINTK is not set
844# CONFIG_SND_DEBUG is not set
845
846#
847# Generic devices
848#
849CONFIG_SND_MPU401_UART=y
850CONFIG_SND_OPL3_LIB=y
851# CONFIG_SND_DUMMY is not set
852# CONFIG_SND_VIRMIDI is not set
853# CONFIG_SND_MTPAV is not set
854# CONFIG_SND_SERIAL_U16550 is not set
855# CONFIG_SND_MPU401 is not set
856
857#
858# PCI devices
859#
860CONFIG_SND_AC97_CODEC=y
861# CONFIG_SND_ALI5451 is not set
862# CONFIG_SND_ATIIXP is not set
863# CONFIG_SND_ATIIXP_MODEM is not set
864# CONFIG_SND_AU8810 is not set
865# CONFIG_SND_AU8820 is not set
866# CONFIG_SND_AU8830 is not set
867# CONFIG_SND_AZT3328 is not set
868# CONFIG_SND_BT87X is not set
869# CONFIG_SND_CS46XX is not set
870# CONFIG_SND_CS4281 is not set
871# CONFIG_SND_EMU10K1 is not set
872# CONFIG_SND_KORG1212 is not set
873# CONFIG_SND_MIXART is not set
874# CONFIG_SND_NM256 is not set
875# CONFIG_SND_RME32 is not set
876# CONFIG_SND_RME96 is not set
877# CONFIG_SND_RME9652 is not set
878# CONFIG_SND_HDSP is not set
879# CONFIG_SND_TRIDENT is not set
880# CONFIG_SND_YMFPCI is not set
881# CONFIG_SND_ALS4000 is not set
882# CONFIG_SND_CMIPCI is not set
883# CONFIG_SND_ENS1370 is not set
884# CONFIG_SND_ENS1371 is not set
885# CONFIG_SND_ES1938 is not set
886# CONFIG_SND_ES1968 is not set
887# CONFIG_SND_MAESTRO3 is not set
888CONFIG_SND_FM801=y
889CONFIG_SND_FM801_TEA575X=y
890# CONFIG_SND_ICE1712 is not set
891# CONFIG_SND_ICE1724 is not set
892# CONFIG_SND_INTEL8X0 is not set
893# CONFIG_SND_INTEL8X0M is not set
894# CONFIG_SND_SONICVIBES is not set
895# CONFIG_SND_VIA82XX is not set
896# CONFIG_SND_VX222 is not set
897
898#
899# USB devices
900#
901# CONFIG_SND_USB_AUDIO is not set
902# CONFIG_SND_USB_USX2Y is not set
903
904#
905# Open Sound System
906#
907# CONFIG_SOUND_PRIME is not set
908
909#
910# USB support
911#
912CONFIG_USB=y
913# CONFIG_USB_DEBUG is not set
914
915#
916# Miscellaneous USB options
917#
918# CONFIG_USB_DEVICEFS is not set
919CONFIG_USB_BANDWIDTH=y
920# CONFIG_USB_DYNAMIC_MINORS is not set
921# CONFIG_USB_SUSPEND is not set
922# CONFIG_USB_OTG is not set
923CONFIG_USB_ARCH_HAS_HCD=y
924CONFIG_USB_ARCH_HAS_OHCI=y
925
926#
927# USB Host Controller Drivers
928#
929CONFIG_USB_EHCI_HCD=y
930# CONFIG_USB_EHCI_SPLIT_ISO is not set
931# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
932CONFIG_USB_OHCI_HCD=y
933CONFIG_USB_UHCI_HCD=y
934# CONFIG_USB_SL811_HCD is not set
935
936#
937# USB Device Class drivers
938#
939# CONFIG_USB_AUDIO is not set
940# CONFIG_USB_BLUETOOTH_TTY is not set
941# CONFIG_USB_MIDI is not set
942# CONFIG_USB_ACM is not set
943# CONFIG_USB_PRINTER is not set
944
945#
946# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
947#
948CONFIG_USB_STORAGE=y
949# CONFIG_USB_STORAGE_DEBUG is not set
950# CONFIG_USB_STORAGE_RW_DETECT is not set
951# CONFIG_USB_STORAGE_DATAFAB is not set
952# CONFIG_USB_STORAGE_FREECOM is not set
953# CONFIG_USB_STORAGE_ISD200 is not set
954# CONFIG_USB_STORAGE_DPCM is not set
955# CONFIG_USB_STORAGE_HP8200e is not set
956# CONFIG_USB_STORAGE_SDDR09 is not set
957# CONFIG_USB_STORAGE_SDDR55 is not set
958# CONFIG_USB_STORAGE_JUMPSHOT is not set
959
960#
961# USB Input Devices
962#
963CONFIG_USB_HID=y
964CONFIG_USB_HIDINPUT=y
965# CONFIG_HID_FF is not set
966CONFIG_USB_HIDDEV=y
967# CONFIG_USB_AIPTEK is not set
968# CONFIG_USB_WACOM is not set
969# CONFIG_USB_KBTAB is not set
970# CONFIG_USB_POWERMATE is not set
971# CONFIG_USB_MTOUCH is not set
972# CONFIG_USB_EGALAX is not set
973# CONFIG_USB_XPAD is not set
974# CONFIG_USB_ATI_REMOTE is not set
975
976#
977# USB Imaging devices
978#
979# CONFIG_USB_MDC800 is not set
980# CONFIG_USB_MICROTEK is not set
981# CONFIG_USB_HPUSBSCSI is not set
982
983#
984# USB Multimedia devices
985#
986# CONFIG_USB_DABUSB is not set
987# CONFIG_USB_VICAM is not set
988# CONFIG_USB_DSBR is not set
989# CONFIG_USB_IBMCAM is not set
990# CONFIG_USB_KONICAWC is not set
991# CONFIG_USB_OV511 is not set
992# CONFIG_USB_SE401 is not set
993# CONFIG_USB_SN9C102 is not set
994# CONFIG_USB_STV680 is not set
995
996#
997# USB Network Adapters
998#
999# CONFIG_USB_CATC is not set
1000# CONFIG_USB_KAWETH is not set
1001# CONFIG_USB_PEGASUS is not set
1002# CONFIG_USB_RTL8150 is not set
1003# CONFIG_USB_USBNET is not set
1004
1005#
1006# USB port drivers
1007#
1008
1009#
1010# USB Serial Converter support
1011#
1012# CONFIG_USB_SERIAL is not set
1013
1014#
1015# USB Miscellaneous drivers
1016#
1017# CONFIG_USB_EMI62 is not set
1018# CONFIG_USB_EMI26 is not set
1019# CONFIG_USB_TIGL is not set
1020# CONFIG_USB_AUERSWALD is not set
1021# CONFIG_USB_RIO500 is not set
1022# CONFIG_USB_LEGOTOWER is not set
1023# CONFIG_USB_LCD is not set
1024# CONFIG_USB_LED is not set
1025# CONFIG_USB_CYTHERM is not set
1026# CONFIG_USB_PHIDGETKIT is not set
1027# CONFIG_USB_PHIDGETSERVO is not set
1028
1029#
1030# USB ATM/DSL drivers
1031#
1032
1033#
1034# USB Gadget Support
1035#
1036# CONFIG_USB_GADGET is not set
1037
1038#
1039# MMC/SD Card support
1040#
1041# CONFIG_MMC is not set
1042
1043#
1044# File systems
1045#
1046CONFIG_EXT2_FS=y
1047CONFIG_EXT2_FS_XATTR=y
1048# CONFIG_EXT2_FS_POSIX_ACL is not set
1049# CONFIG_EXT2_FS_SECURITY is not set
1050CONFIG_EXT3_FS=y
1051CONFIG_EXT3_FS_XATTR=y
1052# CONFIG_EXT3_FS_POSIX_ACL is not set
1053# CONFIG_EXT3_FS_SECURITY is not set
1054CONFIG_JBD=y
1055# CONFIG_JBD_DEBUG is not set
1056CONFIG_FS_MBCACHE=y
1057# CONFIG_REISERFS_FS is not set
1058# CONFIG_JFS_FS is not set
1059# CONFIG_XFS_FS is not set
1060# CONFIG_MINIX_FS is not set
1061# CONFIG_ROMFS_FS is not set
1062# CONFIG_QUOTA is not set
1063CONFIG_DNOTIFY=y
1064CONFIG_AUTOFS_FS=y
1065# CONFIG_AUTOFS4_FS is not set
1066
1067#
1068# CD-ROM/DVD Filesystems
1069#
1070CONFIG_ISO9660_FS=y
1071CONFIG_JOLIET=y
1072# CONFIG_ZISOFS is not set
1073CONFIG_UDF_FS=y
1074CONFIG_UDF_NLS=y
1075
1076#
1077# DOS/FAT/NT Filesystems
1078#
1079CONFIG_FAT_FS=y
1080CONFIG_MSDOS_FS=y
1081CONFIG_VFAT_FS=y
1082CONFIG_FAT_DEFAULT_CODEPAGE=437
1083CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
1084# CONFIG_NTFS_FS is not set
1085
1086#
1087# Pseudo filesystems
1088#
1089CONFIG_PROC_FS=y
1090CONFIG_PROC_KCORE=y
1091CONFIG_SYSFS=y
1092# CONFIG_DEVFS_FS is not set
1093# CONFIG_DEVPTS_FS_XATTR is not set
1094CONFIG_TMPFS=y
1095CONFIG_TMPFS_XATTR=y
1096CONFIG_TMPFS_SECURITY=y
1097CONFIG_HUGETLBFS=y
1098CONFIG_HUGETLB_PAGE=y
1099CONFIG_RAMFS=y
1100
1101#
1102# Miscellaneous filesystems
1103#
1104# CONFIG_ADFS_FS is not set
1105# CONFIG_AFFS_FS is not set
1106# CONFIG_HFS_FS is not set
1107# CONFIG_HFSPLUS_FS is not set
1108# CONFIG_BEFS_FS is not set
1109# CONFIG_BFS_FS is not set
1110# CONFIG_EFS_FS is not set
1111# CONFIG_CRAMFS is not set
1112# CONFIG_VXFS_FS is not set
1113# CONFIG_HPFS_FS is not set
1114# CONFIG_QNX4FS_FS is not set
1115# CONFIG_SYSV_FS is not set
1116# CONFIG_UFS_FS is not set
1117
1118#
1119# Network File Systems
1120#
1121CONFIG_NFS_FS=y
1122CONFIG_NFS_V3=y
1123CONFIG_NFS_V4=y
1124# CONFIG_NFS_DIRECTIO is not set
1125CONFIG_NFSD=y
1126CONFIG_NFSD_V3=y
1127# CONFIG_NFSD_V4 is not set
1128# CONFIG_NFSD_TCP is not set
1129CONFIG_LOCKD=y
1130CONFIG_LOCKD_V4=y
1131CONFIG_EXPORTFS=y
1132CONFIG_SUNRPC=y
1133CONFIG_SUNRPC_GSS=y
1134CONFIG_RPCSEC_GSS_KRB5=y
1135# CONFIG_RPCSEC_GSS_SPKM3 is not set
1136# CONFIG_SMB_FS is not set
1137# CONFIG_CIFS is not set
1138# CONFIG_NCP_FS is not set
1139# CONFIG_CODA_FS is not set
1140# CONFIG_AFS_FS is not set
1141
1142#
1143# Partition Types
1144#
1145CONFIG_PARTITION_ADVANCED=y
1146# CONFIG_ACORN_PARTITION is not set
1147# CONFIG_OSF_PARTITION is not set
1148# CONFIG_AMIGA_PARTITION is not set
1149# CONFIG_ATARI_PARTITION is not set
1150# CONFIG_MAC_PARTITION is not set
1151CONFIG_MSDOS_PARTITION=y
1152# CONFIG_BSD_DISKLABEL is not set
1153# CONFIG_MINIX_SUBPARTITION is not set
1154# CONFIG_SOLARIS_X86_PARTITION is not set
1155# CONFIG_UNIXWARE_DISKLABEL is not set
1156# CONFIG_LDM_PARTITION is not set
1157# CONFIG_SGI_PARTITION is not set
1158# CONFIG_ULTRIX_PARTITION is not set
1159# CONFIG_SUN_PARTITION is not set
1160CONFIG_EFI_PARTITION=y
1161
1162#
1163# Native Language Support
1164#
1165CONFIG_NLS=y
1166CONFIG_NLS_DEFAULT="iso8859-1"
1167CONFIG_NLS_CODEPAGE_437=y
1168CONFIG_NLS_CODEPAGE_737=y
1169CONFIG_NLS_CODEPAGE_775=y
1170CONFIG_NLS_CODEPAGE_850=y
1171CONFIG_NLS_CODEPAGE_852=y
1172CONFIG_NLS_CODEPAGE_855=y
1173CONFIG_NLS_CODEPAGE_857=y
1174CONFIG_NLS_CODEPAGE_860=y
1175CONFIG_NLS_CODEPAGE_861=y
1176CONFIG_NLS_CODEPAGE_862=y
1177CONFIG_NLS_CODEPAGE_863=y
1178CONFIG_NLS_CODEPAGE_864=y
1179CONFIG_NLS_CODEPAGE_865=y
1180CONFIG_NLS_CODEPAGE_866=y
1181CONFIG_NLS_CODEPAGE_869=y
1182CONFIG_NLS_CODEPAGE_936=y
1183CONFIG_NLS_CODEPAGE_950=y
1184CONFIG_NLS_CODEPAGE_932=y
1185CONFIG_NLS_CODEPAGE_949=y
1186CONFIG_NLS_CODEPAGE_874=y
1187CONFIG_NLS_ISO8859_8=y
1188# CONFIG_NLS_CODEPAGE_1250 is not set
1189CONFIG_NLS_CODEPAGE_1251=y
1190# CONFIG_NLS_ASCII is not set
1191CONFIG_NLS_ISO8859_1=y
1192CONFIG_NLS_ISO8859_2=y
1193CONFIG_NLS_ISO8859_3=y
1194CONFIG_NLS_ISO8859_4=y
1195CONFIG_NLS_ISO8859_5=y
1196CONFIG_NLS_ISO8859_6=y
1197CONFIG_NLS_ISO8859_7=y
1198CONFIG_NLS_ISO8859_9=y
1199CONFIG_NLS_ISO8859_13=y
1200CONFIG_NLS_ISO8859_14=y
1201CONFIG_NLS_ISO8859_15=y
1202CONFIG_NLS_KOI8_R=y
1203CONFIG_NLS_KOI8_U=y
1204CONFIG_NLS_UTF8=y
1205
1206#
1207# Library routines
1208#
1209# CONFIG_CRC_CCITT is not set
1210CONFIG_CRC32=y
1211# CONFIG_LIBCRC32C is not set
1212
1213#
1214# Profiling support
1215#
1216# CONFIG_PROFILING is not set
1217
1218#
1219# Kernel hacking
1220#
1221CONFIG_DEBUG_KERNEL=y
1222CONFIG_MAGIC_SYSRQ=y
1223# CONFIG_SCHEDSTATS is not set
1224# CONFIG_DEBUG_SLAB is not set
1225# CONFIG_DEBUG_SPINLOCK is not set
1226# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
1227# CONFIG_DEBUG_KOBJECT is not set
1228# CONFIG_DEBUG_INFO is not set
1229CONFIG_IA64_GRANULE_16MB=y
1230# CONFIG_IA64_GRANULE_64MB is not set
1231CONFIG_IA64_PRINT_HAZARDS=y
1232# CONFIG_DISABLE_VHPT is not set
1233# CONFIG_IA64_DEBUG_CMPXCHG is not set
1234# CONFIG_IA64_DEBUG_IRQ is not set
1235CONFIG_SYSVIPC_COMPAT=y
1236
1237#
1238# Security options
1239#
1240# CONFIG_KEYS is not set
1241# CONFIG_SECURITY is not set
1242
1243#
1244# Cryptographic options
1245#
1246CONFIG_CRYPTO=y
1247# CONFIG_CRYPTO_HMAC is not set
1248# CONFIG_CRYPTO_NULL is not set
1249# CONFIG_CRYPTO_MD4 is not set
1250CONFIG_CRYPTO_MD5=y
1251# CONFIG_CRYPTO_SHA1 is not set
1252# CONFIG_CRYPTO_SHA256 is not set
1253# CONFIG_CRYPTO_SHA512 is not set
1254# CONFIG_CRYPTO_WP512 is not set
1255CONFIG_CRYPTO_DES=y
1256# CONFIG_CRYPTO_BLOWFISH is not set
1257# CONFIG_CRYPTO_TWOFISH is not set
1258# CONFIG_CRYPTO_SERPENT is not set
1259# CONFIG_CRYPTO_AES is not set
1260# CONFIG_CRYPTO_CAST5 is not set
1261# CONFIG_CRYPTO_CAST6 is not set
1262# CONFIG_CRYPTO_TEA is not set
1263# CONFIG_CRYPTO_ARC4 is not set
1264# CONFIG_CRYPTO_KHAZAD is not set
1265# CONFIG_CRYPTO_ANUBIS is not set
1266# CONFIG_CRYPTO_DEFLATE is not set
1267# CONFIG_CRYPTO_MICHAEL_MIC is not set
1268# CONFIG_CRYPTO_CRC32C is not set
1269# CONFIG_CRYPTO_TEST is not set
1270
1271#
1272# Hardware crypto devices
1273#
diff --git a/arch/ia64/defconfig b/arch/ia64/defconfig
new file mode 100644
index 000000000000..7539e83bf054
--- /dev/null
+++ b/arch/ia64/defconfig
@@ -0,0 +1,1199 @@
1#
2# Automatically generated make config: don't edit
3# Linux kernel version: 2.6.10
4# Thu Jan 6 11:13:13 2005
5#
6
7#
8# Code maturity level options
9#
10CONFIG_EXPERIMENTAL=y
11CONFIG_CLEAN_COMPILE=y
12CONFIG_LOCK_KERNEL=y
13
14#
15# General setup
16#
17CONFIG_LOCALVERSION=""
18CONFIG_SWAP=y
19CONFIG_SYSVIPC=y
20CONFIG_POSIX_MQUEUE=y
21# CONFIG_BSD_PROCESS_ACCT is not set
22CONFIG_SYSCTL=y
23# CONFIG_AUDIT is not set
24CONFIG_LOG_BUF_SHIFT=20
25CONFIG_HOTPLUG=y
26CONFIG_KOBJECT_UEVENT=y
27CONFIG_IKCONFIG=y
28CONFIG_IKCONFIG_PROC=y
29# CONFIG_EMBEDDED is not set
30CONFIG_KALLSYMS=y
31CONFIG_KALLSYMS_ALL=y
32# CONFIG_KALLSYMS_EXTRA_PASS is not set
33CONFIG_FUTEX=y
34CONFIG_EPOLL=y
35# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
36CONFIG_SHMEM=y
37CONFIG_CC_ALIGN_FUNCTIONS=0
38CONFIG_CC_ALIGN_LABELS=0
39CONFIG_CC_ALIGN_LOOPS=0
40CONFIG_CC_ALIGN_JUMPS=0
41# CONFIG_TINY_SHMEM is not set
42
43#
44# Loadable module support
45#
46CONFIG_MODULES=y
47CONFIG_MODULE_UNLOAD=y
48# CONFIG_MODULE_FORCE_UNLOAD is not set
49CONFIG_OBSOLETE_MODPARM=y
50CONFIG_MODVERSIONS=y
51# CONFIG_MODULE_SRCVERSION_ALL is not set
52CONFIG_KMOD=y
53CONFIG_STOP_MACHINE=y
54
55#
56# Processor type and features
57#
58CONFIG_IA64=y
59CONFIG_64BIT=y
60CONFIG_MMU=y
61CONFIG_RWSEM_XCHGADD_ALGORITHM=y
62CONFIG_GENERIC_CALIBRATE_DELAY=y
63CONFIG_TIME_INTERPOLATION=y
64CONFIG_EFI=y
65CONFIG_GENERIC_IOMAP=y
66CONFIG_IA64_GENERIC=y
67# CONFIG_IA64_DIG is not set
68# CONFIG_IA64_HP_ZX1 is not set
69# CONFIG_IA64_SGI_SN2 is not set
70# CONFIG_IA64_HP_SIM is not set
71# CONFIG_ITANIUM is not set
72CONFIG_MCKINLEY=y
73# CONFIG_IA64_PAGE_SIZE_4KB is not set
74# CONFIG_IA64_PAGE_SIZE_8KB is not set
75CONFIG_IA64_PAGE_SIZE_16KB=y
76# CONFIG_IA64_PAGE_SIZE_64KB is not set
77CONFIG_IA64_L1_CACHE_SHIFT=7
78CONFIG_NUMA=y
79CONFIG_VIRTUAL_MEM_MAP=y
80CONFIG_DISCONTIGMEM=y
81CONFIG_IA64_CYCLONE=y
82CONFIG_IOSAPIC=y
83CONFIG_FORCE_MAX_ZONEORDER=18
84CONFIG_SMP=y
85CONFIG_NR_CPUS=512
86CONFIG_HOTPLUG_CPU=y
87# CONFIG_PREEMPT is not set
88CONFIG_HAVE_DEC_LOCK=y
89CONFIG_IA32_SUPPORT=y
90CONFIG_COMPAT=y
91CONFIG_IA64_MCA_RECOVERY=y
92CONFIG_PERFMON=y
93CONFIG_IA64_PALINFO=y
94CONFIG_ACPI_DEALLOCATE_IRQ=y
95
96#
97# Firmware Drivers
98#
99CONFIG_EFI_VARS=y
100CONFIG_EFI_PCDP=y
101CONFIG_BINFMT_ELF=y
102CONFIG_BINFMT_MISC=m
103
104#
105# Power management and ACPI
106#
107CONFIG_PM=y
108CONFIG_ACPI=y
109
110#
111# ACPI (Advanced Configuration and Power Interface) Support
112#
113CONFIG_ACPI_BOOT=y
114CONFIG_ACPI_INTERPRETER=y
115CONFIG_ACPI_BUTTON=m
116CONFIG_ACPI_VIDEO=m
117CONFIG_ACPI_FAN=m
118CONFIG_ACPI_PROCESSOR=m
119CONFIG_ACPI_HOTPLUG_CPU=y
120CONFIG_ACPI_THERMAL=m
121CONFIG_ACPI_NUMA=y
122CONFIG_ACPI_BLACKLIST_YEAR=0
123# CONFIG_ACPI_DEBUG is not set
124CONFIG_ACPI_BUS=y
125CONFIG_ACPI_POWER=y
126CONFIG_ACPI_PCI=y
127CONFIG_ACPI_SYSTEM=y
128CONFIG_ACPI_CONTAINER=m
129
130#
131# Bus options (PCI, PCMCIA)
132#
133CONFIG_PCI=y
134CONFIG_PCI_DOMAINS=y
135# CONFIG_PCI_MSI is not set
136CONFIG_PCI_LEGACY_PROC=y
137CONFIG_PCI_NAMES=y
138
139#
140# PCI Hotplug Support
141#
142CONFIG_HOTPLUG_PCI=m
143# CONFIG_HOTPLUG_PCI_FAKE is not set
144CONFIG_HOTPLUG_PCI_ACPI=m
145# CONFIG_HOTPLUG_PCI_ACPI_IBM is not set
146# CONFIG_HOTPLUG_PCI_CPCI is not set
147# CONFIG_HOTPLUG_PCI_PCIE is not set
148# CONFIG_HOTPLUG_PCI_SHPC is not set
149
150#
151# PCCARD (PCMCIA/CardBus) support
152#
153# CONFIG_PCCARD is not set
154
155#
156# PC-card bridges
157#
158
159#
160# Device Drivers
161#
162
163#
164# Generic Driver Options
165#
166CONFIG_STANDALONE=y
167CONFIG_PREVENT_FIRMWARE_BUILD=y
168# CONFIG_FW_LOADER is not set
169# CONFIG_DEBUG_DRIVER is not set
170
171#
172# Memory Technology Devices (MTD)
173#
174# CONFIG_MTD is not set
175
176#
177# Parallel port support
178#
179# CONFIG_PARPORT is not set
180
181#
182# Plug and Play support
183#
184# CONFIG_PNP is not set
185
186#
187# Block devices
188#
189# CONFIG_BLK_CPQ_DA is not set
190# CONFIG_BLK_CPQ_CISS_DA is not set
191# CONFIG_BLK_DEV_DAC960 is not set
192# CONFIG_BLK_DEV_UMEM is not set
193CONFIG_BLK_DEV_LOOP=m
194CONFIG_BLK_DEV_CRYPTOLOOP=m
195CONFIG_BLK_DEV_NBD=m
196# CONFIG_BLK_DEV_SX8 is not set
197# CONFIG_BLK_DEV_UB is not set
198CONFIG_BLK_DEV_RAM=y
199CONFIG_BLK_DEV_RAM_COUNT=16
200CONFIG_BLK_DEV_RAM_SIZE=4096
201CONFIG_BLK_DEV_INITRD=y
202CONFIG_INITRAMFS_SOURCE=""
203# CONFIG_CDROM_PKTCDVD is not set
204
205#
206# IO Schedulers
207#
208CONFIG_IOSCHED_NOOP=y
209CONFIG_IOSCHED_AS=y
210CONFIG_IOSCHED_DEADLINE=y
211CONFIG_IOSCHED_CFQ=y
212
213#
214# ATA/ATAPI/MFM/RLL support
215#
216CONFIG_IDE=y
217CONFIG_BLK_DEV_IDE=y
218
219#
220# Please see Documentation/ide.txt for help/info on IDE drives
221#
222# CONFIG_BLK_DEV_IDE_SATA is not set
223CONFIG_BLK_DEV_IDEDISK=y
224# CONFIG_IDEDISK_MULTI_MODE is not set
225CONFIG_BLK_DEV_IDECD=y
226# CONFIG_BLK_DEV_IDETAPE is not set
227CONFIG_BLK_DEV_IDEFLOPPY=y
228CONFIG_BLK_DEV_IDESCSI=m
229# CONFIG_IDE_TASK_IOCTL is not set
230
231#
232# IDE chipset support/bugfixes
233#
234CONFIG_IDE_GENERIC=y
235CONFIG_BLK_DEV_IDEPCI=y
236# CONFIG_IDEPCI_SHARE_IRQ is not set
237# CONFIG_BLK_DEV_OFFBOARD is not set
238CONFIG_BLK_DEV_GENERIC=y
239# CONFIG_BLK_DEV_OPTI621 is not set
240CONFIG_BLK_DEV_IDEDMA_PCI=y
241# CONFIG_BLK_DEV_IDEDMA_FORCED is not set
242CONFIG_IDEDMA_PCI_AUTO=y
243# CONFIG_IDEDMA_ONLYDISK is not set
244# CONFIG_BLK_DEV_AEC62XX is not set
245# CONFIG_BLK_DEV_ALI15X3 is not set
246# CONFIG_BLK_DEV_AMD74XX is not set
247CONFIG_BLK_DEV_CMD64X=y
248# CONFIG_BLK_DEV_TRIFLEX is not set
249# CONFIG_BLK_DEV_CY82C693 is not set
250# CONFIG_BLK_DEV_CS5520 is not set
251# CONFIG_BLK_DEV_CS5530 is not set
252# CONFIG_BLK_DEV_HPT34X is not set
253# CONFIG_BLK_DEV_HPT366 is not set
254# CONFIG_BLK_DEV_SC1200 is not set
255CONFIG_BLK_DEV_PIIX=y
256# CONFIG_BLK_DEV_NS87415 is not set
257# CONFIG_BLK_DEV_PDC202XX_OLD is not set
258# CONFIG_BLK_DEV_PDC202XX_NEW is not set
259# CONFIG_BLK_DEV_SVWKS is not set
260CONFIG_BLK_DEV_SGIIOC4=y
261# CONFIG_BLK_DEV_SIIMAGE is not set
262# CONFIG_BLK_DEV_SLC90E66 is not set
263# CONFIG_BLK_DEV_TRM290 is not set
264# CONFIG_BLK_DEV_VIA82CXXX is not set
265# CONFIG_IDE_ARM is not set
266CONFIG_BLK_DEV_IDEDMA=y
267# CONFIG_IDEDMA_IVB is not set
268CONFIG_IDEDMA_AUTO=y
269# CONFIG_BLK_DEV_HD is not set
270
271#
272# SCSI device support
273#
274CONFIG_SCSI=y
275CONFIG_SCSI_PROC_FS=y
276
277#
278# SCSI support type (disk, tape, CD-ROM)
279#
280CONFIG_BLK_DEV_SD=y
281CONFIG_CHR_DEV_ST=m
282# CONFIG_CHR_DEV_OSST is not set
283CONFIG_BLK_DEV_SR=m
284# CONFIG_BLK_DEV_SR_VENDOR is not set
285CONFIG_CHR_DEV_SG=m
286
287#
288# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
289#
290# CONFIG_SCSI_MULTI_LUN is not set
291# CONFIG_SCSI_CONSTANTS is not set
292# CONFIG_SCSI_LOGGING is not set
293
294#
295# SCSI Transport Attributes
296#
297CONFIG_SCSI_SPI_ATTRS=y
298CONFIG_SCSI_FC_ATTRS=y
299# CONFIG_SCSI_ISCSI_ATTRS is not set
300
301#
302# SCSI low-level drivers
303#
304# CONFIG_BLK_DEV_3W_XXXX_RAID is not set
305# CONFIG_SCSI_3W_9XXX is not set
306# CONFIG_SCSI_ACARD is not set
307# CONFIG_SCSI_AACRAID is not set
308# CONFIG_SCSI_AIC7XXX is not set
309# CONFIG_SCSI_AIC7XXX_OLD is not set
310# CONFIG_SCSI_AIC79XX is not set
311# CONFIG_MEGARAID_NEWGEN is not set
312# CONFIG_MEGARAID_LEGACY is not set
313# CONFIG_SCSI_SATA is not set
314# CONFIG_SCSI_BUSLOGIC is not set
315# CONFIG_SCSI_DMX3191D is not set
316# CONFIG_SCSI_EATA is not set
317# CONFIG_SCSI_EATA_PIO is not set
318# CONFIG_SCSI_FUTURE_DOMAIN is not set
319# CONFIG_SCSI_GDTH is not set
320# CONFIG_SCSI_IPS is not set
321# CONFIG_SCSI_INITIO is not set
322# CONFIG_SCSI_INIA100 is not set
323CONFIG_SCSI_SYM53C8XX_2=y
324CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MODE=1
325CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
326CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
327# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
328# CONFIG_SCSI_IPR is not set
329# CONFIG_SCSI_QLOGIC_ISP is not set
330CONFIG_SCSI_QLOGIC_FC=y
331# CONFIG_SCSI_QLOGIC_FC_FIRMWARE is not set
332CONFIG_SCSI_QLOGIC_1280=y
333# CONFIG_SCSI_QLOGIC_1280_1040 is not set
334CONFIG_SCSI_QLA2XXX=y
335CONFIG_SCSI_QLA21XX=m
336CONFIG_SCSI_QLA22XX=m
337CONFIG_SCSI_QLA2300=m
338CONFIG_SCSI_QLA2322=m
339# CONFIG_SCSI_QLA6312 is not set
340# CONFIG_SCSI_DC395x is not set
341# CONFIG_SCSI_DC390T is not set
342# CONFIG_SCSI_DEBUG is not set
343
344#
345# Multi-device support (RAID and LVM)
346#
347CONFIG_MD=y
348CONFIG_BLK_DEV_MD=m
349CONFIG_MD_LINEAR=m
350CONFIG_MD_RAID0=m
351CONFIG_MD_RAID1=m
352# CONFIG_MD_RAID10 is not set
353CONFIG_MD_RAID5=m
354CONFIG_MD_RAID6=m
355CONFIG_MD_MULTIPATH=m
356# CONFIG_MD_FAULTY is not set
357CONFIG_BLK_DEV_DM=m
358CONFIG_DM_CRYPT=m
359CONFIG_DM_SNAPSHOT=m
360CONFIG_DM_MIRROR=m
361CONFIG_DM_ZERO=m
362
363#
364# Fusion MPT device support
365#
366CONFIG_FUSION=y
367CONFIG_FUSION_MAX_SGE=40
368# CONFIG_FUSION_CTL is not set
369
370#
371# IEEE 1394 (FireWire) support
372#
373# CONFIG_IEEE1394 is not set
374
375#
376# I2O device support
377#
378# CONFIG_I2O is not set
379
380#
381# Networking support
382#
383CONFIG_NET=y
384
385#
386# Networking options
387#
388CONFIG_PACKET=y
389# CONFIG_PACKET_MMAP is not set
390CONFIG_NETLINK_DEV=y
391CONFIG_UNIX=y
392# CONFIG_NET_KEY is not set
393CONFIG_INET=y
394CONFIG_IP_MULTICAST=y
395# CONFIG_IP_ADVANCED_ROUTER is not set
396# CONFIG_IP_PNP is not set
397# CONFIG_NET_IPIP is not set
398# CONFIG_NET_IPGRE is not set
399# CONFIG_IP_MROUTE is not set
400CONFIG_ARPD=y
401CONFIG_SYN_COOKIES=y
402# CONFIG_INET_AH is not set
403# CONFIG_INET_ESP is not set
404# CONFIG_INET_IPCOMP is not set
405# CONFIG_INET_TUNNEL is not set
406CONFIG_IP_TCPDIAG=y
407# CONFIG_IP_TCPDIAG_IPV6 is not set
408# CONFIG_IPV6 is not set
409# CONFIG_NETFILTER is not set
410
411#
412# SCTP Configuration (EXPERIMENTAL)
413#
414# CONFIG_IP_SCTP is not set
415# CONFIG_ATM is not set
416# CONFIG_BRIDGE is not set
417# CONFIG_VLAN_8021Q is not set
418# CONFIG_DECNET is not set
419# CONFIG_LLC2 is not set
420# CONFIG_IPX is not set
421# CONFIG_ATALK is not set
422# CONFIG_X25 is not set
423# CONFIG_LAPB is not set
424# CONFIG_NET_DIVERT is not set
425# CONFIG_ECONET is not set
426# CONFIG_WAN_ROUTER is not set
427
428#
429# QoS and/or fair queueing
430#
431# CONFIG_NET_SCHED is not set
432# CONFIG_NET_CLS_ROUTE is not set
433
434#
435# Network testing
436#
437# CONFIG_NET_PKTGEN is not set
438CONFIG_NETPOLL=y
439# CONFIG_NETPOLL_RX is not set
440# CONFIG_NETPOLL_TRAP is not set
441CONFIG_NET_POLL_CONTROLLER=y
442# CONFIG_HAMRADIO is not set
443# CONFIG_IRDA is not set
444# CONFIG_BT is not set
445CONFIG_NETDEVICES=y
446CONFIG_DUMMY=m
447# CONFIG_BONDING is not set
448# CONFIG_EQUALIZER is not set
449# CONFIG_TUN is not set
450# CONFIG_ETHERTAP is not set
451
452#
453# ARCnet devices
454#
455# CONFIG_ARCNET is not set
456
457#
458# Ethernet (10 or 100Mbit)
459#
460CONFIG_NET_ETHERNET=y
461CONFIG_MII=m
462# CONFIG_HAPPYMEAL is not set
463# CONFIG_SUNGEM is not set
464# CONFIG_NET_VENDOR_3COM is not set
465
466#
467# Tulip family network device support
468#
469CONFIG_NET_TULIP=y
470# CONFIG_DE2104X is not set
471CONFIG_TULIP=m
472# CONFIG_TULIP_MWI is not set
473# CONFIG_TULIP_MMIO is not set
474# CONFIG_TULIP_NAPI is not set
475# CONFIG_DE4X5 is not set
476# CONFIG_WINBOND_840 is not set
477# CONFIG_DM9102 is not set
478# CONFIG_HP100 is not set
479CONFIG_NET_PCI=y
480# CONFIG_PCNET32 is not set
481# CONFIG_AMD8111_ETH is not set
482# CONFIG_ADAPTEC_STARFIRE is not set
483# CONFIG_B44 is not set
484# CONFIG_FORCEDETH is not set
485# CONFIG_DGRS is not set
486CONFIG_EEPRO100=m
487# CONFIG_EEPRO100_PIO is not set
488CONFIG_E100=m
489# CONFIG_E100_NAPI is not set
490# CONFIG_FEALNX is not set
491# CONFIG_NATSEMI is not set
492# CONFIG_NE2K_PCI is not set
493# CONFIG_8139CP is not set
494# CONFIG_8139TOO is not set
495# CONFIG_SIS900 is not set
496# CONFIG_EPIC100 is not set
497# CONFIG_SUNDANCE is not set
498# CONFIG_VIA_RHINE is not set
499
500#
501# Ethernet (1000 Mbit)
502#
503# CONFIG_ACENIC is not set
504# CONFIG_DL2K is not set
505CONFIG_E1000=y
506# CONFIG_E1000_NAPI is not set
507# CONFIG_NS83820 is not set
508# CONFIG_HAMACHI is not set
509# CONFIG_YELLOWFIN is not set
510# CONFIG_R8169 is not set
511# CONFIG_SK98LIN is not set
512# CONFIG_VIA_VELOCITY is not set
513CONFIG_TIGON3=y
514
515#
516# Ethernet (10000 Mbit)
517#
518# CONFIG_IXGB is not set
519# CONFIG_S2IO is not set
520
521#
522# Token Ring devices
523#
524# CONFIG_TR is not set
525
526#
527# Wireless LAN (non-hamradio)
528#
529# CONFIG_NET_RADIO is not set
530
531#
532# Wan interfaces
533#
534# CONFIG_WAN is not set
535# CONFIG_FDDI is not set
536# CONFIG_HIPPI is not set
537# CONFIG_PPP is not set
538# CONFIG_SLIP is not set
539# CONFIG_NET_FC is not set
540# CONFIG_SHAPER is not set
541CONFIG_NETCONSOLE=y
542
543#
544# ISDN subsystem
545#
546# CONFIG_ISDN is not set
547
548#
549# Telephony Support
550#
551# CONFIG_PHONE is not set
552
553#
554# Input device support
555#
556CONFIG_INPUT=y
557
558#
559# Userland interfaces
560#
561CONFIG_INPUT_MOUSEDEV=y
562CONFIG_INPUT_MOUSEDEV_PSAUX=y
563CONFIG_INPUT_MOUSEDEV_SCREEN_X=1024
564CONFIG_INPUT_MOUSEDEV_SCREEN_Y=768
565# CONFIG_INPUT_JOYDEV is not set
566# CONFIG_INPUT_TSDEV is not set
567# CONFIG_INPUT_EVDEV is not set
568# CONFIG_INPUT_EVBUG is not set
569
570#
571# Input I/O drivers
572#
573CONFIG_GAMEPORT=m
574CONFIG_SOUND_GAMEPORT=m
575# CONFIG_GAMEPORT_NS558 is not set
576# CONFIG_GAMEPORT_L4 is not set
577# CONFIG_GAMEPORT_EMU10K1 is not set
578# CONFIG_GAMEPORT_VORTEX is not set
579# CONFIG_GAMEPORT_FM801 is not set
580# CONFIG_GAMEPORT_CS461x is not set
581CONFIG_SERIO=y
582CONFIG_SERIO_I8042=y
583# CONFIG_SERIO_SERPORT is not set
584# CONFIG_SERIO_CT82C710 is not set
585# CONFIG_SERIO_PCIPS2 is not set
586CONFIG_SERIO_LIBPS2=y
587# CONFIG_SERIO_RAW is not set
588
589#
590# Input Device Drivers
591#
592CONFIG_INPUT_KEYBOARD=y
593CONFIG_KEYBOARD_ATKBD=y
594# CONFIG_KEYBOARD_SUNKBD is not set
595# CONFIG_KEYBOARD_LKKBD is not set
596# CONFIG_KEYBOARD_XTKBD is not set
597# CONFIG_KEYBOARD_NEWTON is not set
598CONFIG_INPUT_MOUSE=y
599CONFIG_MOUSE_PS2=y
600# CONFIG_MOUSE_SERIAL is not set
601# CONFIG_MOUSE_VSXXXAA is not set
602# CONFIG_INPUT_JOYSTICK is not set
603# CONFIG_INPUT_TOUCHSCREEN is not set
604# CONFIG_INPUT_MISC is not set
605
606#
607# Character devices
608#
609CONFIG_VT=y
610CONFIG_VT_CONSOLE=y
611CONFIG_HW_CONSOLE=y
612CONFIG_SERIAL_NONSTANDARD=y
613# CONFIG_ROCKETPORT is not set
614# CONFIG_CYCLADES is not set
615# CONFIG_MOXA_SMARTIO is not set
616# CONFIG_SYNCLINK is not set
617# CONFIG_SYNCLINKMP is not set
618# CONFIG_N_HDLC is not set
619# CONFIG_STALDRV is not set
620CONFIG_SGI_SNSC=y
621
622#
623# Serial drivers
624#
625CONFIG_SERIAL_8250=y
626CONFIG_SERIAL_8250_CONSOLE=y
627CONFIG_SERIAL_8250_ACPI=y
628CONFIG_SERIAL_8250_NR_UARTS=6
629CONFIG_SERIAL_8250_EXTENDED=y
630CONFIG_SERIAL_8250_SHARE_IRQ=y
631# CONFIG_SERIAL_8250_DETECT_IRQ is not set
632# CONFIG_SERIAL_8250_MULTIPORT is not set
633# CONFIG_SERIAL_8250_RSA is not set
634
635#
636# Non-8250 serial port support
637#
638CONFIG_SERIAL_CORE=y
639CONFIG_SERIAL_CORE_CONSOLE=y
640CONFIG_SERIAL_SGI_L1_CONSOLE=y
641CONFIG_UNIX98_PTYS=y
642CONFIG_LEGACY_PTYS=y
643CONFIG_LEGACY_PTY_COUNT=256
644
645#
646# IPMI
647#
648# CONFIG_IPMI_HANDLER is not set
649
650#
651# Watchdog Cards
652#
653# CONFIG_WATCHDOG is not set
654# CONFIG_HW_RANDOM is not set
655CONFIG_EFI_RTC=y
656# CONFIG_DTLK is not set
657# CONFIG_R3964 is not set
658# CONFIG_APPLICOM is not set
659
660#
661# Ftape, the floppy tape device driver
662#
663CONFIG_AGP=m
664CONFIG_AGP_I460=m
665CONFIG_AGP_HP_ZX1=m
666CONFIG_DRM=m
667CONFIG_DRM_TDFX=m
668CONFIG_DRM_R128=m
669CONFIG_DRM_RADEON=m
670CONFIG_DRM_MGA=m
671CONFIG_DRM_SIS=m
672CONFIG_RAW_DRIVER=m
673CONFIG_HPET=y
674# CONFIG_HPET_RTC_IRQ is not set
675CONFIG_HPET_MMAP=y
676CONFIG_MAX_RAW_DEVS=256
677CONFIG_MMTIMER=y
678
679#
680# I2C support
681#
682# CONFIG_I2C is not set
683
684#
685# Dallas's 1-wire bus
686#
687# CONFIG_W1 is not set
688
689#
690# Misc devices
691#
692
693#
694# Multimedia devices
695#
696# CONFIG_VIDEO_DEV is not set
697
698#
699# Digital Video Broadcasting Devices
700#
701# CONFIG_DVB is not set
702
703#
704# Graphics support
705#
706# CONFIG_FB is not set
707
708#
709# Console display driver support
710#
711CONFIG_VGA_CONSOLE=y
712CONFIG_DUMMY_CONSOLE=y
713
714#
715# Sound
716#
717CONFIG_SOUND=m
718
719#
720# Advanced Linux Sound Architecture
721#
722CONFIG_SND=m
723CONFIG_SND_TIMER=m
724CONFIG_SND_PCM=m
725CONFIG_SND_HWDEP=m
726CONFIG_SND_RAWMIDI=m
727CONFIG_SND_SEQUENCER=m
728CONFIG_SND_SEQ_DUMMY=m
729CONFIG_SND_OSSEMUL=y
730CONFIG_SND_MIXER_OSS=m
731CONFIG_SND_PCM_OSS=m
732CONFIG_SND_SEQUENCER_OSS=y
733CONFIG_SND_VERBOSE_PRINTK=y
734# CONFIG_SND_DEBUG is not set
735
736#
737# Generic devices
738#
739CONFIG_SND_MPU401_UART=m
740CONFIG_SND_OPL3_LIB=m
741CONFIG_SND_DUMMY=m
742CONFIG_SND_VIRMIDI=m
743CONFIG_SND_MTPAV=m
744CONFIG_SND_SERIAL_U16550=m
745CONFIG_SND_MPU401=m
746
747#
748# PCI devices
749#
750CONFIG_SND_AC97_CODEC=m
751# CONFIG_SND_ALI5451 is not set
752# CONFIG_SND_ATIIXP is not set
753# CONFIG_SND_ATIIXP_MODEM is not set
754# CONFIG_SND_AU8810 is not set
755# CONFIG_SND_AU8820 is not set
756# CONFIG_SND_AU8830 is not set
757# CONFIG_SND_AZT3328 is not set
758# CONFIG_SND_BT87X is not set
759CONFIG_SND_CS46XX=m
760CONFIG_SND_CS46XX_NEW_DSP=y
761CONFIG_SND_CS4281=m
762CONFIG_SND_EMU10K1=m
763# CONFIG_SND_KORG1212 is not set
764# CONFIG_SND_MIXART is not set
765# CONFIG_SND_NM256 is not set
766# CONFIG_SND_RME32 is not set
767# CONFIG_SND_RME96 is not set
768# CONFIG_SND_RME9652 is not set
769# CONFIG_SND_HDSP is not set
770# CONFIG_SND_TRIDENT is not set
771# CONFIG_SND_YMFPCI is not set
772# CONFIG_SND_ALS4000 is not set
773# CONFIG_SND_CMIPCI is not set
774# CONFIG_SND_ENS1370 is not set
775# CONFIG_SND_ENS1371 is not set
776# CONFIG_SND_ES1938 is not set
777# CONFIG_SND_ES1968 is not set
778# CONFIG_SND_MAESTRO3 is not set
779CONFIG_SND_FM801=m
780# CONFIG_SND_FM801_TEA575X is not set
781# CONFIG_SND_ICE1712 is not set
782# CONFIG_SND_ICE1724 is not set
783# CONFIG_SND_INTEL8X0 is not set
784# CONFIG_SND_INTEL8X0M is not set
785# CONFIG_SND_SONICVIBES is not set
786# CONFIG_SND_VIA82XX is not set
787# CONFIG_SND_VX222 is not set
788
789#
790# USB devices
791#
792# CONFIG_SND_USB_AUDIO is not set
793# CONFIG_SND_USB_USX2Y is not set
794
795#
796# Open Sound System
797#
798# CONFIG_SOUND_PRIME is not set
799
800#
801# USB support
802#
803CONFIG_USB=m
804# CONFIG_USB_DEBUG is not set
805
806#
807# Miscellaneous USB options
808#
809CONFIG_USB_DEVICEFS=y
810# CONFIG_USB_BANDWIDTH is not set
811# CONFIG_USB_DYNAMIC_MINORS is not set
812# CONFIG_USB_SUSPEND is not set
813# CONFIG_USB_OTG is not set
814CONFIG_USB_ARCH_HAS_HCD=y
815CONFIG_USB_ARCH_HAS_OHCI=y
816
817#
818# USB Host Controller Drivers
819#
820CONFIG_USB_EHCI_HCD=m
821# CONFIG_USB_EHCI_SPLIT_ISO is not set
822# CONFIG_USB_EHCI_ROOT_HUB_TT is not set
823CONFIG_USB_OHCI_HCD=m
824CONFIG_USB_UHCI_HCD=m
825# CONFIG_USB_SL811_HCD is not set
826
827#
828# USB Device Class drivers
829#
830# CONFIG_USB_AUDIO is not set
831# CONFIG_USB_BLUETOOTH_TTY is not set
832# CONFIG_USB_MIDI is not set
833# CONFIG_USB_ACM is not set
834# CONFIG_USB_PRINTER is not set
835
836#
837# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
838#
839CONFIG_USB_STORAGE=m
840# CONFIG_USB_STORAGE_DEBUG is not set
841# CONFIG_USB_STORAGE_RW_DETECT is not set
842# CONFIG_USB_STORAGE_DATAFAB is not set
843# CONFIG_USB_STORAGE_FREECOM is not set
844# CONFIG_USB_STORAGE_ISD200 is not set
845# CONFIG_USB_STORAGE_DPCM is not set
846# CONFIG_USB_STORAGE_HP8200e is not set
847# CONFIG_USB_STORAGE_SDDR09 is not set
848# CONFIG_USB_STORAGE_SDDR55 is not set
849# CONFIG_USB_STORAGE_JUMPSHOT is not set
850
851#
852# USB Input Devices
853#
854CONFIG_USB_HID=m
855CONFIG_USB_HIDINPUT=y
856# CONFIG_HID_FF is not set
857# CONFIG_USB_HIDDEV is not set
858
859#
860# USB HID Boot Protocol drivers
861#
862# CONFIG_USB_KBD is not set
863# CONFIG_USB_MOUSE is not set
864# CONFIG_USB_AIPTEK is not set
865# CONFIG_USB_WACOM is not set
866# CONFIG_USB_KBTAB is not set
867# CONFIG_USB_POWERMATE is not set
868# CONFIG_USB_MTOUCH is not set
869# CONFIG_USB_EGALAX is not set
870# CONFIG_USB_XPAD is not set
871# CONFIG_USB_ATI_REMOTE is not set
872
873#
874# USB Imaging devices
875#
876# CONFIG_USB_MDC800 is not set
877# CONFIG_USB_MICROTEK is not set
878
879#
880# USB Multimedia devices
881#
882# CONFIG_USB_DABUSB is not set
883
884#
885# Video4Linux support is needed for USB Multimedia device support
886#
887
888#
889# USB Network Adapters
890#
891# CONFIG_USB_CATC is not set
892# CONFIG_USB_KAWETH is not set
893# CONFIG_USB_PEGASUS is not set
894# CONFIG_USB_RTL8150 is not set
895# CONFIG_USB_USBNET is not set
896
897#
898# USB port drivers
899#
900
901#
902# USB Serial Converter support
903#
904# CONFIG_USB_SERIAL is not set
905
906#
907# USB Miscellaneous drivers
908#
909# CONFIG_USB_EMI62 is not set
910# CONFIG_USB_EMI26 is not set
911# CONFIG_USB_TIGL is not set
912# CONFIG_USB_AUERSWALD is not set
913# CONFIG_USB_RIO500 is not set
914# CONFIG_USB_LEGOTOWER is not set
915# CONFIG_USB_LCD is not set
916# CONFIG_USB_LED is not set
917# CONFIG_USB_CYTHERM is not set
918# CONFIG_USB_PHIDGETKIT is not set
919# CONFIG_USB_PHIDGETSERVO is not set
920# CONFIG_USB_TEST is not set
921
922#
923# USB ATM/DSL drivers
924#
925
926#
927# USB Gadget Support
928#
929# CONFIG_USB_GADGET is not set
930
931#
932# MMC/SD Card support
933#
934# CONFIG_MMC is not set
935
936#
937# InfiniBand support
938#
939CONFIG_INFINIBAND=m
940CONFIG_INFINIBAND_MTHCA=m
941# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
942CONFIG_INFINIBAND_IPOIB=m
943# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
944
945#
946# File systems
947#
948CONFIG_EXT2_FS=y
949CONFIG_EXT2_FS_XATTR=y
950CONFIG_EXT2_FS_POSIX_ACL=y
951CONFIG_EXT2_FS_SECURITY=y
952CONFIG_EXT3_FS=y
953CONFIG_EXT3_FS_XATTR=y
954CONFIG_EXT3_FS_POSIX_ACL=y
955CONFIG_EXT3_FS_SECURITY=y
956CONFIG_JBD=y
957# CONFIG_JBD_DEBUG is not set
958CONFIG_FS_MBCACHE=y
959CONFIG_REISERFS_FS=y
960# CONFIG_REISERFS_CHECK is not set
961# CONFIG_REISERFS_PROC_INFO is not set
962CONFIG_REISERFS_FS_XATTR=y
963CONFIG_REISERFS_FS_POSIX_ACL=y
964CONFIG_REISERFS_FS_SECURITY=y
965# CONFIG_JFS_FS is not set
966CONFIG_FS_POSIX_ACL=y
967CONFIG_XFS_FS=y
968# CONFIG_XFS_RT is not set
969# CONFIG_XFS_QUOTA is not set
970# CONFIG_XFS_SECURITY is not set
971# CONFIG_XFS_POSIX_ACL is not set
972# CONFIG_MINIX_FS is not set
973# CONFIG_ROMFS_FS is not set
974# CONFIG_QUOTA is not set
975CONFIG_DNOTIFY=y
976CONFIG_AUTOFS_FS=y
977CONFIG_AUTOFS4_FS=y
978
979#
980# CD-ROM/DVD Filesystems
981#
982CONFIG_ISO9660_FS=m
983CONFIG_JOLIET=y
984# CONFIG_ZISOFS is not set
985CONFIG_UDF_FS=m
986CONFIG_UDF_NLS=y
987
988#
989# DOS/FAT/NT Filesystems
990#
991CONFIG_FAT_FS=y
992# CONFIG_MSDOS_FS is not set
993CONFIG_VFAT_FS=y
994CONFIG_FAT_DEFAULT_CODEPAGE=437
995CONFIG_FAT_DEFAULT_IOCHARSET="iso8859-1"
996CONFIG_NTFS_FS=m
997# CONFIG_NTFS_DEBUG is not set
998# CONFIG_NTFS_RW is not set
999
1000#
1001# Pseudo filesystems
1002#
1003CONFIG_PROC_FS=y
1004CONFIG_PROC_KCORE=y
1005CONFIG_SYSFS=y
1006# CONFIG_DEVFS_FS is not set
1007# CONFIG_DEVPTS_FS_XATTR is not set
1008CONFIG_TMPFS=y
1009CONFIG_TMPFS_XATTR=y
1010CONFIG_TMPFS_SECURITY=y
1011CONFIG_HUGETLBFS=y
1012CONFIG_HUGETLB_PAGE=y
1013CONFIG_RAMFS=y
1014
1015#
1016# Miscellaneous filesystems
1017#
1018# CONFIG_ADFS_FS is not set
1019# CONFIG_AFFS_FS is not set
1020# CONFIG_HFS_FS is not set
1021# CONFIG_HFSPLUS_FS is not set
1022# CONFIG_BEFS_FS is not set
1023# CONFIG_BFS_FS is not set
1024# CONFIG_EFS_FS is not set
1025# CONFIG_CRAMFS is not set
1026# CONFIG_VXFS_FS is not set
1027# CONFIG_HPFS_FS is not set
1028# CONFIG_QNX4FS_FS is not set
1029# CONFIG_SYSV_FS is not set
1030# CONFIG_UFS_FS is not set
1031
1032#
1033# Network File Systems
1034#
1035CONFIG_NFS_FS=m
1036CONFIG_NFS_V3=y
1037CONFIG_NFS_V4=y
1038CONFIG_NFS_DIRECTIO=y
1039CONFIG_NFSD=m
1040CONFIG_NFSD_V3=y
1041CONFIG_NFSD_V4=y
1042CONFIG_NFSD_TCP=y
1043CONFIG_LOCKD=m
1044CONFIG_LOCKD_V4=y
1045CONFIG_EXPORTFS=m
1046CONFIG_SUNRPC=m
1047CONFIG_SUNRPC_GSS=m
1048CONFIG_RPCSEC_GSS_KRB5=m
1049# CONFIG_RPCSEC_GSS_SPKM3 is not set
1050CONFIG_SMB_FS=m
1051CONFIG_SMB_NLS_DEFAULT=y
1052CONFIG_SMB_NLS_REMOTE="cp437"
1053CONFIG_CIFS=m
1054# CONFIG_CIFS_STATS is not set
1055# CONFIG_CIFS_XATTR is not set
1056# CONFIG_CIFS_EXPERIMENTAL is not set
1057# CONFIG_NCP_FS is not set
1058# CONFIG_CODA_FS is not set
1059# CONFIG_AFS_FS is not set
1060
1061#
1062# Partition Types
1063#
1064CONFIG_PARTITION_ADVANCED=y
1065# CONFIG_ACORN_PARTITION is not set
1066# CONFIG_OSF_PARTITION is not set
1067# CONFIG_AMIGA_PARTITION is not set
1068# CONFIG_ATARI_PARTITION is not set
1069# CONFIG_MAC_PARTITION is not set
1070CONFIG_MSDOS_PARTITION=y
1071# CONFIG_BSD_DISKLABEL is not set
1072# CONFIG_MINIX_SUBPARTITION is not set
1073# CONFIG_SOLARIS_X86_PARTITION is not set
1074# CONFIG_UNIXWARE_DISKLABEL is not set
1075# CONFIG_LDM_PARTITION is not set
1076CONFIG_SGI_PARTITION=y
1077# CONFIG_ULTRIX_PARTITION is not set
1078# CONFIG_SUN_PARTITION is not set
1079CONFIG_EFI_PARTITION=y
1080
1081#
1082# Native Language Support
1083#
1084CONFIG_NLS=y
1085CONFIG_NLS_DEFAULT="iso8859-1"
1086CONFIG_NLS_CODEPAGE_437=y
1087CONFIG_NLS_CODEPAGE_737=m
1088CONFIG_NLS_CODEPAGE_775=m
1089CONFIG_NLS_CODEPAGE_850=m
1090CONFIG_NLS_CODEPAGE_852=m
1091CONFIG_NLS_CODEPAGE_855=m
1092CONFIG_NLS_CODEPAGE_857=m
1093CONFIG_NLS_CODEPAGE_860=m
1094CONFIG_NLS_CODEPAGE_861=m
1095CONFIG_NLS_CODEPAGE_862=m
1096CONFIG_NLS_CODEPAGE_863=m
1097CONFIG_NLS_CODEPAGE_864=m
1098CONFIG_NLS_CODEPAGE_865=m
1099CONFIG_NLS_CODEPAGE_866=m
1100CONFIG_NLS_CODEPAGE_869=m
1101CONFIG_NLS_CODEPAGE_936=m
1102CONFIG_NLS_CODEPAGE_950=m
1103CONFIG_NLS_CODEPAGE_932=m
1104CONFIG_NLS_CODEPAGE_949=m
1105CONFIG_NLS_CODEPAGE_874=m
1106CONFIG_NLS_ISO8859_8=m
1107CONFIG_NLS_CODEPAGE_1250=m
1108CONFIG_NLS_CODEPAGE_1251=m
1109# CONFIG_NLS_ASCII is not set
1110CONFIG_NLS_ISO8859_1=y
1111CONFIG_NLS_ISO8859_2=m
1112CONFIG_NLS_ISO8859_3=m
1113CONFIG_NLS_ISO8859_4=m
1114CONFIG_NLS_ISO8859_5=m
1115CONFIG_NLS_ISO8859_6=m
1116CONFIG_NLS_ISO8859_7=m
1117CONFIG_NLS_ISO8859_9=m
1118CONFIG_NLS_ISO8859_13=m
1119CONFIG_NLS_ISO8859_14=m
1120CONFIG_NLS_ISO8859_15=m
1121CONFIG_NLS_KOI8_R=m
1122CONFIG_NLS_KOI8_U=m
1123CONFIG_NLS_UTF8=m
1124
1125#
1126# Library routines
1127#
1128# CONFIG_CRC_CCITT is not set
1129CONFIG_CRC32=y
1130# CONFIG_LIBCRC32C is not set
1131
1132#
1133# HP Simulator drivers
1134#
1135# CONFIG_HP_SIMETH is not set
1136# CONFIG_HP_SIMSERIAL is not set
1137# CONFIG_HP_SIMSCSI is not set
1138
1139#
1140# Profiling support
1141#
1142# CONFIG_PROFILING is not set
1143
1144#
1145# Kernel hacking
1146#
1147CONFIG_DEBUG_KERNEL=y
1148CONFIG_MAGIC_SYSRQ=y
1149# CONFIG_SCHEDSTATS is not set
1150# CONFIG_DEBUG_SLAB is not set
1151# CONFIG_DEBUG_SPINLOCK is not set
1152# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
1153# CONFIG_DEBUG_KOBJECT is not set
1154# CONFIG_DEBUG_INFO is not set
1155CONFIG_IA64_GRANULE_16MB=y
1156# CONFIG_IA64_GRANULE_64MB is not set
1157# CONFIG_IA64_PRINT_HAZARDS is not set
1158# CONFIG_DISABLE_VHPT is not set
1159# CONFIG_IA64_DEBUG_CMPXCHG is not set
1160# CONFIG_IA64_DEBUG_IRQ is not set
1161CONFIG_SYSVIPC_COMPAT=y
1162
1163#
1164# Security options
1165#
1166# CONFIG_KEYS is not set
1167# CONFIG_SECURITY is not set
1168
1169#
1170# Cryptographic options
1171#
1172CONFIG_CRYPTO=y
1173# CONFIG_CRYPTO_HMAC is not set
1174# CONFIG_CRYPTO_NULL is not set
1175# CONFIG_CRYPTO_MD4 is not set
1176CONFIG_CRYPTO_MD5=m
1177# CONFIG_CRYPTO_SHA1 is not set
1178# CONFIG_CRYPTO_SHA256 is not set
1179# CONFIG_CRYPTO_SHA512 is not set
1180# CONFIG_CRYPTO_WP512 is not set
1181CONFIG_CRYPTO_DES=m
1182# CONFIG_CRYPTO_BLOWFISH is not set
1183# CONFIG_CRYPTO_TWOFISH is not set
1184# CONFIG_CRYPTO_SERPENT is not set
1185# CONFIG_CRYPTO_AES is not set
1186# CONFIG_CRYPTO_CAST5 is not set
1187# CONFIG_CRYPTO_CAST6 is not set
1188# CONFIG_CRYPTO_TEA is not set
1189# CONFIG_CRYPTO_ARC4 is not set
1190# CONFIG_CRYPTO_KHAZAD is not set
1191# CONFIG_CRYPTO_ANUBIS is not set
1192# CONFIG_CRYPTO_DEFLATE is not set
1193# CONFIG_CRYPTO_MICHAEL_MIC is not set
1194# CONFIG_CRYPTO_CRC32C is not set
1195# CONFIG_CRYPTO_TEST is not set
1196
1197#
1198# Hardware crypto devices
1199#
diff --git a/arch/ia64/dig/Makefile b/arch/ia64/dig/Makefile
new file mode 100644
index 000000000000..971cd7870dd4
--- /dev/null
+++ b/arch/ia64/dig/Makefile
@@ -0,0 +1,9 @@
1#
2# ia64/platform/dig/Makefile
3#
4# Copyright (C) 1999 Silicon Graphics, Inc.
5# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
6#
7
8obj-y := setup.o
9obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/arch/ia64/dig/machvec.c b/arch/ia64/dig/machvec.c
new file mode 100644
index 000000000000..0c55bdafb473
--- /dev/null
+++ b/arch/ia64/dig/machvec.c
@@ -0,0 +1,3 @@
1#define MACHVEC_PLATFORM_NAME dig
2#define MACHVEC_PLATFORM_HEADER <asm/machvec_dig.h>
3#include <asm/machvec_init.h>
diff --git a/arch/ia64/dig/setup.c b/arch/ia64/dig/setup.c
new file mode 100644
index 000000000000..d58003f1ad02
--- /dev/null
+++ b/arch/ia64/dig/setup.c
@@ -0,0 +1,86 @@
1/*
2 * Platform dependent support for DIG64 platforms.
3 *
4 * Copyright (C) 1999 Intel Corp.
5 * Copyright (C) 1999, 2001 Hewlett-Packard Co
6 * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
7 * Copyright (C) 1999 VA Linux Systems
8 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
9 * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
10 */
11#include <linux/config.h>
12
13#include <linux/init.h>
14#include <linux/delay.h>
15#include <linux/kernel.h>
16#include <linux/kdev_t.h>
17#include <linux/string.h>
18#include <linux/tty.h>
19#include <linux/console.h>
20#include <linux/timex.h>
21#include <linux/sched.h>
22#include <linux/root_dev.h>
23
24#include <asm/io.h>
25#include <asm/machvec.h>
26#include <asm/system.h>
27
28/*
29 * This is here so we can use the CMOS detection in ide-probe.c to
30 * determine what drives are present. In theory, we don't need this
31 * as the auto-detection could be done via ide-probe.c:do_probe() but
32 * in practice that would be much slower, which is painful when
33 * running in the simulator. Note that passing zeroes in DRIVE_INFO
34 * is sufficient (the IDE driver will autodetect the drive geometry).
35 */
36char drive_info[4*16];
37
38void __init
39dig_setup (char **cmdline_p)
40{
41 unsigned int orig_x, orig_y, num_cols, num_rows, font_height;
42
43 /*
44 * Default to /dev/sda2. This assumes that the EFI partition
45 * is physical disk 1 partition 1 and the Linux root disk is
46 * physical disk 1 partition 2.
47 */
48 ROOT_DEV = Root_SDA2; /* default to second partition on first drive */
49
50#ifdef CONFIG_SMP
51 init_smp_config();
52#endif
53
54 memset(&screen_info, 0, sizeof(screen_info));
55
56 if (!ia64_boot_param->console_info.num_rows
57 || !ia64_boot_param->console_info.num_cols)
58 {
59 printk(KERN_WARNING "dig_setup: warning: invalid screen-info, guessing 80x25\n");
60 orig_x = 0;
61 orig_y = 0;
62 num_cols = 80;
63 num_rows = 25;
64 font_height = 16;
65 } else {
66 orig_x = ia64_boot_param->console_info.orig_x;
67 orig_y = ia64_boot_param->console_info.orig_y;
68 num_cols = ia64_boot_param->console_info.num_cols;
69 num_rows = ia64_boot_param->console_info.num_rows;
70 font_height = 400 / num_rows;
71 }
72
73 screen_info.orig_x = orig_x;
74 screen_info.orig_y = orig_y;
75 screen_info.orig_video_cols = num_cols;
76 screen_info.orig_video_lines = num_rows;
77 screen_info.orig_video_points = font_height;
78 screen_info.orig_video_mode = 3; /* XXX fake */
79 screen_info.orig_video_isVGA = 1; /* XXX fake */
80 screen_info.orig_video_ega_bx = 3; /* XXX fake */
81}
82
83void __init
84dig_irq_init (void)
85{
86}
diff --git a/arch/ia64/hp/common/Makefile b/arch/ia64/hp/common/Makefile
new file mode 100644
index 000000000000..f61a60057ff7
--- /dev/null
+++ b/arch/ia64/hp/common/Makefile
@@ -0,0 +1,10 @@
1#
2# ia64/platform/hp/common/Makefile
3#
4# Copyright (C) 2002 Hewlett Packard
5# Copyright (C) Alex Williamson (alex_williamson@hp.com)
6#
7
8obj-y := sba_iommu.o
9obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += hwsw_iommu.o
10obj-$(CONFIG_IA64_GENERIC) += hwsw_iommu.o
diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c
new file mode 100644
index 000000000000..80f8ef013939
--- /dev/null
+++ b/arch/ia64/hp/common/hwsw_iommu.c
@@ -0,0 +1,185 @@
1/*
2 * Copyright (c) 2004 Hewlett-Packard Development Company, L.P.
3 * Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
4 *
5 * This is a pseudo I/O MMU which dispatches to the hardware I/O MMU
6 * whenever possible. We assume that the hardware I/O MMU requires
7 * full 32-bit addressability, as is the case, e.g., for HP zx1-based
8 * systems (there, the I/O MMU window is mapped at 3-4GB). If a
9 * device doesn't provide full 32-bit addressability, we fall back on
10 * the sw I/O TLB. This is good enough to let us support broken
11 * hardware such as soundcards which have a DMA engine that can
12 * address only 28 bits.
13 */
14
15#include <linux/device.h>
16
17#include <asm/machvec.h>
18
19/* swiotlb declarations & definitions: */
20extern void swiotlb_init_with_default_size (size_t size);
21extern ia64_mv_dma_alloc_coherent swiotlb_alloc_coherent;
22extern ia64_mv_dma_free_coherent swiotlb_free_coherent;
23extern ia64_mv_dma_map_single swiotlb_map_single;
24extern ia64_mv_dma_unmap_single swiotlb_unmap_single;
25extern ia64_mv_dma_map_sg swiotlb_map_sg;
26extern ia64_mv_dma_unmap_sg swiotlb_unmap_sg;
27extern ia64_mv_dma_supported swiotlb_dma_supported;
28extern ia64_mv_dma_mapping_error swiotlb_dma_mapping_error;
29
30/* hwiommu declarations & definitions: */
31
32extern ia64_mv_dma_alloc_coherent sba_alloc_coherent;
33extern ia64_mv_dma_free_coherent sba_free_coherent;
34extern ia64_mv_dma_map_single sba_map_single;
35extern ia64_mv_dma_unmap_single sba_unmap_single;
36extern ia64_mv_dma_map_sg sba_map_sg;
37extern ia64_mv_dma_unmap_sg sba_unmap_sg;
38extern ia64_mv_dma_supported sba_dma_supported;
39extern ia64_mv_dma_mapping_error sba_dma_mapping_error;
40
41#define hwiommu_alloc_coherent sba_alloc_coherent
42#define hwiommu_free_coherent sba_free_coherent
43#define hwiommu_map_single sba_map_single
44#define hwiommu_unmap_single sba_unmap_single
45#define hwiommu_map_sg sba_map_sg
46#define hwiommu_unmap_sg sba_unmap_sg
47#define hwiommu_dma_supported sba_dma_supported
48#define hwiommu_dma_mapping_error sba_dma_mapping_error
49#define hwiommu_sync_single_for_cpu machvec_dma_sync_single
50#define hwiommu_sync_sg_for_cpu machvec_dma_sync_sg
51#define hwiommu_sync_single_for_device machvec_dma_sync_single
52#define hwiommu_sync_sg_for_device machvec_dma_sync_sg
53
54
55/*
56 * Note: we need to make the determination of whether or not to use
57 * the sw I/O TLB based purely on the device structure. Anything else
58 * would be unreliable or would be too intrusive.
59 */
60static inline int
61use_swiotlb (struct device *dev)
62{
63 return dev && dev->dma_mask && !hwiommu_dma_supported(dev, *dev->dma_mask);
64}
65
66void
67hwsw_init (void)
68{
69 /* default to a smallish 2MB sw I/O TLB */
70 swiotlb_init_with_default_size (2 * (1<<20));
71}
72
73void *
74hwsw_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
75{
76 if (use_swiotlb(dev))
77 return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
78 else
79 return hwiommu_alloc_coherent(dev, size, dma_handle, flags);
80}
81
82void
83hwsw_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
84{
85 if (use_swiotlb(dev))
86 swiotlb_free_coherent(dev, size, vaddr, dma_handle);
87 else
88 hwiommu_free_coherent(dev, size, vaddr, dma_handle);
89}
90
91dma_addr_t
92hwsw_map_single (struct device *dev, void *addr, size_t size, int dir)
93{
94 if (use_swiotlb(dev))
95 return swiotlb_map_single(dev, addr, size, dir);
96 else
97 return hwiommu_map_single(dev, addr, size, dir);
98}
99
100void
101hwsw_unmap_single (struct device *dev, dma_addr_t iova, size_t size, int dir)
102{
103 if (use_swiotlb(dev))
104 return swiotlb_unmap_single(dev, iova, size, dir);
105 else
106 return hwiommu_unmap_single(dev, iova, size, dir);
107}
108
109
110int
111hwsw_map_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
112{
113 if (use_swiotlb(dev))
114 return swiotlb_map_sg(dev, sglist, nents, dir);
115 else
116 return hwiommu_map_sg(dev, sglist, nents, dir);
117}
118
119void
120hwsw_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
121{
122 if (use_swiotlb(dev))
123 return swiotlb_unmap_sg(dev, sglist, nents, dir);
124 else
125 return hwiommu_unmap_sg(dev, sglist, nents, dir);
126}
127
128void
129hwsw_sync_single_for_cpu (struct device *dev, dma_addr_t addr, size_t size, int dir)
130{
131 if (use_swiotlb(dev))
132 swiotlb_sync_single_for_cpu(dev, addr, size, dir);
133 else
134 hwiommu_sync_single_for_cpu(dev, addr, size, dir);
135}
136
137void
138hwsw_sync_sg_for_cpu (struct device *dev, struct scatterlist *sg, int nelems, int dir)
139{
140 if (use_swiotlb(dev))
141 swiotlb_sync_sg_for_cpu(dev, sg, nelems, dir);
142 else
143 hwiommu_sync_sg_for_cpu(dev, sg, nelems, dir);
144}
145
146void
147hwsw_sync_single_for_device (struct device *dev, dma_addr_t addr, size_t size, int dir)
148{
149 if (use_swiotlb(dev))
150 swiotlb_sync_single_for_device(dev, addr, size, dir);
151 else
152 hwiommu_sync_single_for_device(dev, addr, size, dir);
153}
154
155void
156hwsw_sync_sg_for_device (struct device *dev, struct scatterlist *sg, int nelems, int dir)
157{
158 if (use_swiotlb(dev))
159 swiotlb_sync_sg_for_device(dev, sg, nelems, dir);
160 else
161 hwiommu_sync_sg_for_device(dev, sg, nelems, dir);
162}
163
164int
165hwsw_dma_supported (struct device *dev, u64 mask)
166{
167 if (hwiommu_dma_supported(dev, mask))
168 return 1;
169 return swiotlb_dma_supported(dev, mask);
170}
171
172int
173hwsw_dma_mapping_error (dma_addr_t dma_addr)
174{
175 return hwiommu_dma_mapping_error (dma_addr) || swiotlb_dma_mapping_error(dma_addr);
176}
177
178EXPORT_SYMBOL(hwsw_dma_mapping_error);
179EXPORT_SYMBOL(hwsw_map_single);
180EXPORT_SYMBOL(hwsw_unmap_single);
181EXPORT_SYMBOL(hwsw_map_sg);
182EXPORT_SYMBOL(hwsw_unmap_sg);
183EXPORT_SYMBOL(hwsw_dma_supported);
184EXPORT_SYMBOL(hwsw_alloc_coherent);
185EXPORT_SYMBOL(hwsw_free_coherent);
diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
new file mode 100644
index 000000000000..017c9ab5fc1b
--- /dev/null
+++ b/arch/ia64/hp/common/sba_iommu.c
@@ -0,0 +1,2121 @@
1/*
2** IA64 System Bus Adapter (SBA) I/O MMU manager
3**
4** (c) Copyright 2002-2004 Alex Williamson
5** (c) Copyright 2002-2003 Grant Grundler
6** (c) Copyright 2002-2004 Hewlett-Packard Company
7**
8** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code)
9** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code)
10**
11** This program is free software; you can redistribute it and/or modify
12** it under the terms of the GNU General Public License as published by
13** the Free Software Foundation; either version 2 of the License, or
14** (at your option) any later version.
15**
16**
17** This module initializes the IOC (I/O Controller) found on HP
18** McKinley machines and their successors.
19**
20*/
21
22#include <linux/config.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/module.h>
26#include <linux/spinlock.h>
27#include <linux/slab.h>
28#include <linux/init.h>
29#include <linux/mm.h>
30#include <linux/string.h>
31#include <linux/pci.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/acpi.h>
35#include <linux/efi.h>
36#include <linux/nodemask.h>
37#include <linux/bitops.h> /* hweight64() */
38
39#include <asm/delay.h> /* ia64_get_itc() */
40#include <asm/io.h>
41#include <asm/page.h> /* PAGE_OFFSET */
42#include <asm/dma.h>
43#include <asm/system.h> /* wmb() */
44
45#include <asm/acpi-ext.h>
46
47#define PFX "IOC: "
48
49/*
50** Enabling timing search of the pdir resource map. Output in /proc.
51** Disabled by default to optimize performance.
52*/
53#undef PDIR_SEARCH_TIMING
54
55/*
56** This option allows cards capable of 64bit DMA to bypass the IOMMU. If
57** not defined, all DMA will be 32bit and go through the TLB.
58** There's potentially a conflict in the bio merge code with us
59** advertising an iommu, but then bypassing it. Since I/O MMU bypassing
60** appears to give more performance than bio-level virtual merging, we'll
61** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to
62** completely restrict DMA to the IOMMU.
63*/
64#define ALLOW_IOV_BYPASS
65
66/*
67** This option specifically allows/disallows bypassing scatterlists with
68** multiple entries. Coalescing these entries can allow better DMA streaming
69** and in some cases shows better performance than entirely bypassing the
70** IOMMU. Performance increase on the order of 1-2% sequential output/input
71** using bonnie++ on a RAID0 MD device (sym2 & mpt).
72*/
73#undef ALLOW_IOV_BYPASS_SG
74
75/*
76** If a device prefetches beyond the end of a valid pdir entry, it will cause
77** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should
78** disconnect on 4k boundaries and prevent such issues. If the device is
79** particularly agressive, this option will keep the entire pdir valid such
80** that prefetching will hit a valid address. This could severely impact
81** error containment, and is therefore off by default. The page that is
82** used for spill-over is poisoned, so that should help debugging somewhat.
83*/
84#undef FULL_VALID_PDIR
85
86#define ENABLE_MARK_CLEAN
87
88/*
89** The number of debug flags is a clue - this code is fragile. NOTE: since
90** tightening the use of res_lock the resource bitmap and actual pdir are no
91** longer guaranteed to stay in sync. The sanity checking code isn't going to
92** like that.
93*/
94#undef DEBUG_SBA_INIT
95#undef DEBUG_SBA_RUN
96#undef DEBUG_SBA_RUN_SG
97#undef DEBUG_SBA_RESOURCE
98#undef ASSERT_PDIR_SANITY
99#undef DEBUG_LARGE_SG_ENTRIES
100#undef DEBUG_BYPASS
101
102#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY)
103#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive
104#endif
105
106#define SBA_INLINE __inline__
107/* #define SBA_INLINE */
108
109#ifdef DEBUG_SBA_INIT
110#define DBG_INIT(x...) printk(x)
111#else
112#define DBG_INIT(x...)
113#endif
114
115#ifdef DEBUG_SBA_RUN
116#define DBG_RUN(x...) printk(x)
117#else
118#define DBG_RUN(x...)
119#endif
120
121#ifdef DEBUG_SBA_RUN_SG
122#define DBG_RUN_SG(x...) printk(x)
123#else
124#define DBG_RUN_SG(x...)
125#endif
126
127
128#ifdef DEBUG_SBA_RESOURCE
129#define DBG_RES(x...) printk(x)
130#else
131#define DBG_RES(x...)
132#endif
133
134#ifdef DEBUG_BYPASS
135#define DBG_BYPASS(x...) printk(x)
136#else
137#define DBG_BYPASS(x...)
138#endif
139
140#ifdef ASSERT_PDIR_SANITY
141#define ASSERT(expr) \
142 if(!(expr)) { \
143 printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \
144 panic(#expr); \
145 }
146#else
147#define ASSERT(expr)
148#endif
149
150/*
151** The number of pdir entries to "free" before issuing
152** a read to PCOM register to flush out PCOM writes.
153** Interacts with allocation granularity (ie 4 or 8 entries
154** allocated and free'd/purged at a time might make this
155** less interesting).
156*/
157#define DELAYED_RESOURCE_CNT 64
158
159#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP)
160#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP)
161#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP)
162#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP)
163
164#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */
165
166#define IOC_FUNC_ID 0x000
167#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */
168#define IOC_IBASE 0x300 /* IO TLB */
169#define IOC_IMASK 0x308
170#define IOC_PCOM 0x310
171#define IOC_TCNFG 0x318
172#define IOC_PDIR_BASE 0x320
173
174#define IOC_ROPE0_CFG 0x500
175#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */
176
177
178/* AGP GART driver looks for this */
179#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL
180
181/*
182** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register)
183**
184** Some IOCs (sx1000) can run at the above pages sizes, but are
185** really only supported using the IOC at a 4k page size.
186**
187** iovp_size could only be greater than PAGE_SIZE if we are
188** confident the drivers really only touch the next physical
189** page iff that driver instance owns it.
190*/
191static unsigned long iovp_size;
192static unsigned long iovp_shift;
193static unsigned long iovp_mask;
194
195struct ioc {
196 void __iomem *ioc_hpa; /* I/O MMU base address */
197 char *res_map; /* resource map, bit == pdir entry */
198 u64 *pdir_base; /* physical base address */
199 unsigned long ibase; /* pdir IOV Space base */
200 unsigned long imask; /* pdir IOV Space mask */
201
202 unsigned long *res_hint; /* next avail IOVP - circular search */
203 unsigned long dma_mask;
204 spinlock_t res_lock; /* protects the resource bitmap, but must be held when */
205 /* clearing pdir to prevent races with allocations. */
206 unsigned int res_bitshift; /* from the RIGHT! */
207 unsigned int res_size; /* size of resource map in bytes */
208#ifdef CONFIG_NUMA
209 unsigned int node; /* node where this IOC lives */
210#endif
211#if DELAYED_RESOURCE_CNT > 0
212 spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */
213 /* than res_lock for bigger systems. */
214 int saved_cnt;
215 struct sba_dma_pair {
216 dma_addr_t iova;
217 size_t size;
218 } saved[DELAYED_RESOURCE_CNT];
219#endif
220
221#ifdef PDIR_SEARCH_TIMING
222#define SBA_SEARCH_SAMPLE 0x100
223 unsigned long avg_search[SBA_SEARCH_SAMPLE];
224 unsigned long avg_idx; /* current index into avg_search */
225#endif
226
227 /* Stuff we don't need in performance path */
228 struct ioc *next; /* list of IOC's in system */
229 acpi_handle handle; /* for multiple IOC's */
230 const char *name;
231 unsigned int func_id;
232 unsigned int rev; /* HW revision of chip */
233 u32 iov_size;
234 unsigned int pdir_size; /* in bytes, determined by IOV Space size */
235 struct pci_dev *sac_only_dev;
236};
237
238static struct ioc *ioc_list;
239static int reserve_sba_gart = 1;
240
241static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t);
242static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t);
243
244#define sba_sg_address(sg) (page_address((sg)->page) + (sg)->offset)
245
246#ifdef FULL_VALID_PDIR
247static u64 prefetch_spill_page;
248#endif
249
250#ifdef CONFIG_PCI
251# define GET_IOC(dev) (((dev)->bus == &pci_bus_type) \
252 ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL)
253#else
254# define GET_IOC(dev) NULL
255#endif
256
257/*
258** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up
259** (or rather not merge) DMA's into managable chunks.
260** On parisc, this is more of the software/tuning constraint
261** rather than the HW. I/O MMU allocation alogorithms can be
262** faster with smaller size is (to some degree).
263*/
264#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size)
265
266#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1))
267
268/************************************
269** SBA register read and write support
270**
271** BE WARNED: register writes are posted.
272** (ie follow writes which must reach HW with a read)
273**
274*/
275#define READ_REG(addr) __raw_readq(addr)
276#define WRITE_REG(val, addr) __raw_writeq(val, addr)
277
278#ifdef DEBUG_SBA_INIT
279
280/**
281 * sba_dump_tlb - debugging only - print IOMMU operating parameters
282 * @hpa: base address of the IOMMU
283 *
284 * Print the size/location of the IO MMU PDIR.
285 */
286static void
287sba_dump_tlb(char *hpa)
288{
289 DBG_INIT("IO TLB at 0x%p\n", (void *)hpa);
290 DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE));
291 DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK));
292 DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG));
293 DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE));
294 DBG_INIT("\n");
295}
296#endif
297
298
299#ifdef ASSERT_PDIR_SANITY
300
301/**
302 * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry
303 * @ioc: IO MMU structure which owns the pdir we are interested in.
304 * @msg: text to print ont the output line.
305 * @pide: pdir index.
306 *
307 * Print one entry of the IO MMU PDIR in human readable form.
308 */
309static void
310sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide)
311{
312 /* start printing from lowest pde in rval */
313 u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)];
314 unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)];
315 uint rcnt;
316
317 printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n",
318 msg, rptr, pide & (BITS_PER_LONG - 1), *rptr);
319
320 rcnt = 0;
321 while (rcnt < BITS_PER_LONG) {
322 printk(KERN_DEBUG "%s %2d %p %016Lx\n",
323 (rcnt == (pide & (BITS_PER_LONG - 1)))
324 ? " -->" : " ",
325 rcnt, ptr, (unsigned long long) *ptr );
326 rcnt++;
327 ptr++;
328 }
329 printk(KERN_DEBUG "%s", msg);
330}
331
332
333/**
334 * sba_check_pdir - debugging only - consistency checker
335 * @ioc: IO MMU structure which owns the pdir we are interested in.
336 * @msg: text to print ont the output line.
337 *
338 * Verify the resource map and pdir state is consistent
339 */
340static int
341sba_check_pdir(struct ioc *ioc, char *msg)
342{
343 u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]);
344 u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */
345 u64 *pptr = ioc->pdir_base; /* pdir ptr */
346 uint pide = 0;
347
348 while (rptr < rptr_end) {
349 u64 rval;
350 int rcnt; /* number of bits we might check */
351
352 rval = *rptr;
353 rcnt = 64;
354
355 while (rcnt) {
356 /* Get last byte and highest bit from that */
357 u32 pde = ((u32)((*pptr >> (63)) & 0x1));
358 if ((rval & 0x1) ^ pde)
359 {
360 /*
361 ** BUMMER! -- res_map != pdir --
362 ** Dump rval and matching pdir entries
363 */
364 sba_dump_pdir_entry(ioc, msg, pide);
365 return(1);
366 }
367 rcnt--;
368 rval >>= 1; /* try the next bit */
369 pptr++;
370 pide++;
371 }
372 rptr++; /* look at next word of res_map */
373 }
374 /* It'd be nice if we always got here :^) */
375 return 0;
376}
377
378
379/**
380 * sba_dump_sg - debugging only - print Scatter-Gather list
381 * @ioc: IO MMU structure which owns the pdir we are interested in.
382 * @startsg: head of the SG list
383 * @nents: number of entries in SG list
384 *
385 * print the SG list so we can verify it's correct by hand.
386 */
387static void
388sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
389{
390 while (nents-- > 0) {
391 printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents,
392 startsg->dma_address, startsg->dma_length,
393 sba_sg_address(startsg));
394 startsg++;
395 }
396}
397
398static void
399sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents)
400{
401 struct scatterlist *the_sg = startsg;
402 int the_nents = nents;
403
404 while (the_nents-- > 0) {
405 if (sba_sg_address(the_sg) == 0x0UL)
406 sba_dump_sg(NULL, startsg, nents);
407 the_sg++;
408 }
409}
410
411#endif /* ASSERT_PDIR_SANITY */
412
413
414
415
416/**************************************************************
417*
418* I/O Pdir Resource Management
419*
420* Bits set in the resource map are in use.
421* Each bit can represent a number of pages.
422* LSbs represent lower addresses (IOVA's).
423*
424***************************************************************/
425#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */
426
427/* Convert from IOVP to IOVA and vice versa. */
428#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset))
429#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase))
430
431#define PDIR_ENTRY_SIZE sizeof(u64)
432
433#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift)
434
435#define RESMAP_MASK(n) ~(~0UL << (n))
436#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1)
437
438
439/**
440 * For most cases the normal get_order is sufficient, however it limits us
441 * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity.
442 * It only incurs about 1 clock cycle to use this one with the static variable
443 * and makes the code more intuitive.
444 */
445static SBA_INLINE int
446get_iovp_order (unsigned long size)
447{
448 long double d = size - 1;
449 long order;
450
451 order = ia64_getf_exp(d);
452 order = order - iovp_shift - 0xffff + 1;
453 if (order < 0)
454 order = 0;
455 return order;
456}
457
458/**
459 * sba_search_bitmap - find free space in IO PDIR resource bitmap
460 * @ioc: IO MMU structure which owns the pdir we are interested in.
461 * @bits_wanted: number of entries we need.
462 *
463 * Find consecutive free bits in resource bitmap.
464 * Each bit represents one entry in the IO Pdir.
465 * Cool perf optimization: search for log2(size) bits at a time.
466 */
467static SBA_INLINE unsigned long
468sba_search_bitmap(struct ioc *ioc, unsigned long bits_wanted)
469{
470 unsigned long *res_ptr = ioc->res_hint;
471 unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]);
472 unsigned long pide = ~0UL;
473
474 ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0);
475 ASSERT(res_ptr < res_end);
476
477 /*
478 * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts
479 * if a TLB entry is purged while in use. sba_mark_invalid()
480 * purges IOTLB entries in power-of-two sizes, so we also
481 * allocate IOVA space in power-of-two sizes.
482 */
483 bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift);
484
485 if (likely(bits_wanted == 1)) {
486 unsigned int bitshiftcnt;
487 for(; res_ptr < res_end ; res_ptr++) {
488 if (likely(*res_ptr != ~0UL)) {
489 bitshiftcnt = ffz(*res_ptr);
490 *res_ptr |= (1UL << bitshiftcnt);
491 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
492 pide <<= 3; /* convert to bit address */
493 pide += bitshiftcnt;
494 ioc->res_bitshift = bitshiftcnt + bits_wanted;
495 goto found_it;
496 }
497 }
498 goto not_found;
499
500 }
501
502 if (likely(bits_wanted <= BITS_PER_LONG/2)) {
503 /*
504 ** Search the resource bit map on well-aligned values.
505 ** "o" is the alignment.
506 ** We need the alignment to invalidate I/O TLB using
507 ** SBA HW features in the unmap path.
508 */
509 unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift);
510 uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o);
511 unsigned long mask, base_mask;
512
513 base_mask = RESMAP_MASK(bits_wanted);
514 mask = base_mask << bitshiftcnt;
515
516 DBG_RES("%s() o %ld %p", __FUNCTION__, o, res_ptr);
517 for(; res_ptr < res_end ; res_ptr++)
518 {
519 DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr);
520 ASSERT(0 != mask);
521 for (; mask ; mask <<= o, bitshiftcnt += o) {
522 if(0 == ((*res_ptr) & mask)) {
523 *res_ptr |= mask; /* mark resources busy! */
524 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
525 pide <<= 3; /* convert to bit address */
526 pide += bitshiftcnt;
527 ioc->res_bitshift = bitshiftcnt + bits_wanted;
528 goto found_it;
529 }
530 }
531
532 bitshiftcnt = 0;
533 mask = base_mask;
534
535 }
536
537 } else {
538 int qwords, bits, i;
539 unsigned long *end;
540
541 qwords = bits_wanted >> 6; /* /64 */
542 bits = bits_wanted - (qwords * BITS_PER_LONG);
543
544 end = res_end - qwords;
545
546 for (; res_ptr < end; res_ptr++) {
547 for (i = 0 ; i < qwords ; i++) {
548 if (res_ptr[i] != 0)
549 goto next_ptr;
550 }
551 if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits))
552 continue;
553
554 /* Found it, mark it */
555 for (i = 0 ; i < qwords ; i++)
556 res_ptr[i] = ~0UL;
557 res_ptr[i] |= RESMAP_MASK(bits);
558
559 pide = ((unsigned long)res_ptr - (unsigned long)ioc->res_map);
560 pide <<= 3; /* convert to bit address */
561 res_ptr += qwords;
562 ioc->res_bitshift = bits;
563 goto found_it;
564next_ptr:
565 ;
566 }
567 }
568
569not_found:
570 prefetch(ioc->res_map);
571 ioc->res_hint = (unsigned long *) ioc->res_map;
572 ioc->res_bitshift = 0;
573 return (pide);
574
575found_it:
576 ioc->res_hint = res_ptr;
577 return (pide);
578}
579
580
581/**
582 * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap
583 * @ioc: IO MMU structure which owns the pdir we are interested in.
584 * @size: number of bytes to create a mapping for
585 *
586 * Given a size, find consecutive unmarked and then mark those bits in the
587 * resource bit map.
588 */
589static int
590sba_alloc_range(struct ioc *ioc, size_t size)
591{
592 unsigned int pages_needed = size >> iovp_shift;
593#ifdef PDIR_SEARCH_TIMING
594 unsigned long itc_start;
595#endif
596 unsigned long pide;
597 unsigned long flags;
598
599 ASSERT(pages_needed);
600 ASSERT(0 == (size & ~iovp_mask));
601
602 spin_lock_irqsave(&ioc->res_lock, flags);
603
604#ifdef PDIR_SEARCH_TIMING
605 itc_start = ia64_get_itc();
606#endif
607 /*
608 ** "seek and ye shall find"...praying never hurts either...
609 */
610 pide = sba_search_bitmap(ioc, pages_needed);
611 if (unlikely(pide >= (ioc->res_size << 3))) {
612 pide = sba_search_bitmap(ioc, pages_needed);
613 if (unlikely(pide >= (ioc->res_size << 3))) {
614#if DELAYED_RESOURCE_CNT > 0
615 /*
616 ** With delayed resource freeing, we can give this one more shot. We're
617 ** getting close to being in trouble here, so do what we can to make this
618 ** one count.
619 */
620 spin_lock(&ioc->saved_lock);
621 if (ioc->saved_cnt > 0) {
622 struct sba_dma_pair *d;
623 int cnt = ioc->saved_cnt;
624
625 d = &(ioc->saved[ioc->saved_cnt]);
626
627 while (cnt--) {
628 sba_mark_invalid(ioc, d->iova, d->size);
629 sba_free_range(ioc, d->iova, d->size);
630 d--;
631 }
632 ioc->saved_cnt = 0;
633 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
634 }
635 spin_unlock(&ioc->saved_lock);
636
637 pide = sba_search_bitmap(ioc, pages_needed);
638 if (unlikely(pide >= (ioc->res_size << 3)))
639 panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
640 ioc->ioc_hpa);
641#else
642 panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
643 ioc->ioc_hpa);
644#endif
645 }
646 }
647
648#ifdef PDIR_SEARCH_TIMING
649 ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed;
650 ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1;
651#endif
652
653 prefetchw(&(ioc->pdir_base[pide]));
654
655#ifdef ASSERT_PDIR_SANITY
656 /* verify the first enable bit is clear */
657 if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) {
658 sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide);
659 }
660#endif
661
662 DBG_RES("%s(%x) %d -> %lx hint %x/%x\n",
663 __FUNCTION__, size, pages_needed, pide,
664 (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map),
665 ioc->res_bitshift );
666
667 spin_unlock_irqrestore(&ioc->res_lock, flags);
668
669 return (pide);
670}
671
672
673/**
674 * sba_free_range - unmark bits in IO PDIR resource bitmap
675 * @ioc: IO MMU structure which owns the pdir we are interested in.
676 * @iova: IO virtual address which was previously allocated.
677 * @size: number of bytes to create a mapping for
678 *
679 * clear bits in the ioc's resource map
680 */
681static SBA_INLINE void
682sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size)
683{
684 unsigned long iovp = SBA_IOVP(ioc, iova);
685 unsigned int pide = PDIR_INDEX(iovp);
686 unsigned int ridx = pide >> 3; /* convert bit to byte address */
687 unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]);
688 int bits_not_wanted = size >> iovp_shift;
689 unsigned long m;
690
691 /* Round up to power-of-two size: see AR2305 note above */
692 bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift);
693 for (; bits_not_wanted > 0 ; res_ptr++) {
694
695 if (unlikely(bits_not_wanted > BITS_PER_LONG)) {
696
697 /* these mappings start 64bit aligned */
698 *res_ptr = 0UL;
699 bits_not_wanted -= BITS_PER_LONG;
700 pide += BITS_PER_LONG;
701
702 } else {
703
704 /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */
705 m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1));
706 bits_not_wanted = 0;
707
708 DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __FUNCTION__, (uint) iova, size,
709 bits_not_wanted, m, pide, res_ptr, *res_ptr);
710
711 ASSERT(m != 0);
712 ASSERT(bits_not_wanted);
713 ASSERT((*res_ptr & m) == m); /* verify same bits are set */
714 *res_ptr &= ~m;
715 }
716 }
717}
718
719
720/**************************************************************
721*
722* "Dynamic DMA Mapping" support (aka "Coherent I/O")
723*
724***************************************************************/
725
726/**
727 * sba_io_pdir_entry - fill in one IO PDIR entry
728 * @pdir_ptr: pointer to IO PDIR entry
729 * @vba: Virtual CPU address of buffer to map
730 *
731 * SBA Mapping Routine
732 *
733 * Given a virtual address (vba, arg1) sba_io_pdir_entry()
734 * loads the I/O PDIR entry pointed to by pdir_ptr (arg0).
735 * Each IO Pdir entry consists of 8 bytes as shown below
736 * (LSB == bit 0):
737 *
738 * 63 40 11 7 0
739 * +-+---------------------+----------------------------------+----+--------+
740 * |V| U | PPN[39:12] | U | FF |
741 * +-+---------------------+----------------------------------+----+--------+
742 *
743 * V == Valid Bit
744 * U == Unused
745 * PPN == Physical Page Number
746 *
747 * The physical address fields are filled with the results of virt_to_phys()
748 * on the vba.
749 */
750
751#if 1
752#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \
753 | 0x8000000000000000ULL)
754#else
755void SBA_INLINE
756sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba)
757{
758 *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL);
759}
760#endif
761
762#ifdef ENABLE_MARK_CLEAN
763/**
764 * Since DMA is i-cache coherent, any (complete) pages that were written via
765 * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
766 * flush them when they get mapped into an executable vm-area.
767 */
768static void
769mark_clean (void *addr, size_t size)
770{
771 unsigned long pg_addr, end;
772
773 pg_addr = PAGE_ALIGN((unsigned long) addr);
774 end = (unsigned long) addr + size;
775 while (pg_addr + PAGE_SIZE <= end) {
776 struct page *page = virt_to_page((void *)pg_addr);
777 set_bit(PG_arch_1, &page->flags);
778 pg_addr += PAGE_SIZE;
779 }
780}
781#endif
782
783/**
784 * sba_mark_invalid - invalidate one or more IO PDIR entries
785 * @ioc: IO MMU structure which owns the pdir we are interested in.
786 * @iova: IO Virtual Address mapped earlier
787 * @byte_cnt: number of bytes this mapping covers.
788 *
789 * Marking the IO PDIR entry(ies) as Invalid and invalidate
790 * corresponding IO TLB entry. The PCOM (Purge Command Register)
791 * is to purge stale entries in the IO TLB when unmapping entries.
792 *
793 * The PCOM register supports purging of multiple pages, with a minium
794 * of 1 page and a maximum of 2GB. Hardware requires the address be
795 * aligned to the size of the range being purged. The size of the range
796 * must be a power of 2. The "Cool perf optimization" in the
797 * allocation routine helps keep that true.
798 */
799static SBA_INLINE void
800sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt)
801{
802 u32 iovp = (u32) SBA_IOVP(ioc,iova);
803
804 int off = PDIR_INDEX(iovp);
805
806 /* Must be non-zero and rounded up */
807 ASSERT(byte_cnt > 0);
808 ASSERT(0 == (byte_cnt & ~iovp_mask));
809
810#ifdef ASSERT_PDIR_SANITY
811 /* Assert first pdir entry is set */
812 if (!(ioc->pdir_base[off] >> 60)) {
813 sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp));
814 }
815#endif
816
817 if (byte_cnt <= iovp_size)
818 {
819 ASSERT(off < ioc->pdir_size);
820
821 iovp |= iovp_shift; /* set "size" field for PCOM */
822
823#ifndef FULL_VALID_PDIR
824 /*
825 ** clear I/O PDIR entry "valid" bit
826 ** Do NOT clear the rest - save it for debugging.
827 ** We should only clear bits that have previously
828 ** been enabled.
829 */
830 ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
831#else
832 /*
833 ** If we want to maintain the PDIR as valid, put in
834 ** the spill page so devices prefetching won't
835 ** cause a hard fail.
836 */
837 ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
838#endif
839 } else {
840 u32 t = get_iovp_order(byte_cnt) + iovp_shift;
841
842 iovp |= t;
843 ASSERT(t <= 31); /* 2GB! Max value of "size" field */
844
845 do {
846 /* verify this pdir entry is enabled */
847 ASSERT(ioc->pdir_base[off] >> 63);
848#ifndef FULL_VALID_PDIR
849 /* clear I/O Pdir entry "valid" bit first */
850 ioc->pdir_base[off] &= ~(0x80000000000000FFULL);
851#else
852 ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page);
853#endif
854 off++;
855 byte_cnt -= iovp_size;
856 } while (byte_cnt > 0);
857 }
858
859 WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM);
860}
861
862/**
863 * sba_map_single - map one buffer and return IOVA for DMA
864 * @dev: instance of PCI owned by the driver that's asking.
865 * @addr: driver buffer to map.
866 * @size: number of bytes to map in driver buffer.
867 * @dir: R/W or both.
868 *
869 * See Documentation/DMA-mapping.txt
870 */
871dma_addr_t
872sba_map_single(struct device *dev, void *addr, size_t size, int dir)
873{
874 struct ioc *ioc;
875 dma_addr_t iovp;
876 dma_addr_t offset;
877 u64 *pdir_start;
878 int pide;
879#ifdef ASSERT_PDIR_SANITY
880 unsigned long flags;
881#endif
882#ifdef ALLOW_IOV_BYPASS
883 unsigned long pci_addr = virt_to_phys(addr);
884#endif
885
886#ifdef ALLOW_IOV_BYPASS
887 ASSERT(to_pci_dev(dev)->dma_mask);
888 /*
889 ** Check if the PCI device can DMA to ptr... if so, just return ptr
890 */
891 if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) {
892 /*
893 ** Device is bit capable of DMA'ing to the buffer...
894 ** just return the PCI address of ptr
895 */
896 DBG_BYPASS("sba_map_single() bypass mask/addr: 0x%lx/0x%lx\n",
897 to_pci_dev(dev)->dma_mask, pci_addr);
898 return pci_addr;
899 }
900#endif
901 ioc = GET_IOC(dev);
902 ASSERT(ioc);
903
904 prefetch(ioc->res_hint);
905
906 ASSERT(size > 0);
907 ASSERT(size <= DMA_CHUNK_SIZE);
908
909 /* save offset bits */
910 offset = ((dma_addr_t) (long) addr) & ~iovp_mask;
911
912 /* round up to nearest iovp_size */
913 size = (size + offset + ~iovp_mask) & iovp_mask;
914
915#ifdef ASSERT_PDIR_SANITY
916 spin_lock_irqsave(&ioc->res_lock, flags);
917 if (sba_check_pdir(ioc,"Check before sba_map_single()"))
918 panic("Sanity check failed");
919 spin_unlock_irqrestore(&ioc->res_lock, flags);
920#endif
921
922 pide = sba_alloc_range(ioc, size);
923
924 iovp = (dma_addr_t) pide << iovp_shift;
925
926 DBG_RUN("%s() 0x%p -> 0x%lx\n",
927 __FUNCTION__, addr, (long) iovp | offset);
928
929 pdir_start = &(ioc->pdir_base[pide]);
930
931 while (size > 0) {
932 ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */
933 sba_io_pdir_entry(pdir_start, (unsigned long) addr);
934
935 DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start);
936
937 addr += iovp_size;
938 size -= iovp_size;
939 pdir_start++;
940 }
941 /* force pdir update */
942 wmb();
943
944 /* form complete address */
945#ifdef ASSERT_PDIR_SANITY
946 spin_lock_irqsave(&ioc->res_lock, flags);
947 sba_check_pdir(ioc,"Check after sba_map_single()");
948 spin_unlock_irqrestore(&ioc->res_lock, flags);
949#endif
950 return SBA_IOVA(ioc, iovp, offset);
951}
952
953/**
954 * sba_unmap_single - unmap one IOVA and free resources
955 * @dev: instance of PCI owned by the driver that's asking.
956 * @iova: IOVA of driver buffer previously mapped.
957 * @size: number of bytes mapped in driver buffer.
958 * @dir: R/W or both.
959 *
960 * See Documentation/DMA-mapping.txt
961 */
962void sba_unmap_single(struct device *dev, dma_addr_t iova, size_t size, int dir)
963{
964 struct ioc *ioc;
965#if DELAYED_RESOURCE_CNT > 0
966 struct sba_dma_pair *d;
967#endif
968 unsigned long flags;
969 dma_addr_t offset;
970
971 ioc = GET_IOC(dev);
972 ASSERT(ioc);
973
974#ifdef ALLOW_IOV_BYPASS
975 if (likely((iova & ioc->imask) != ioc->ibase)) {
976 /*
977 ** Address does not fall w/in IOVA, must be bypassing
978 */
979 DBG_BYPASS("sba_unmap_single() bypass addr: 0x%lx\n", iova);
980
981#ifdef ENABLE_MARK_CLEAN
982 if (dir == DMA_FROM_DEVICE) {
983 mark_clean(phys_to_virt(iova), size);
984 }
985#endif
986 return;
987 }
988#endif
989 offset = iova & ~iovp_mask;
990
991 DBG_RUN("%s() iovp 0x%lx/%x\n",
992 __FUNCTION__, (long) iova, size);
993
994 iova ^= offset; /* clear offset bits */
995 size += offset;
996 size = ROUNDUP(size, iovp_size);
997
998
999#if DELAYED_RESOURCE_CNT > 0
1000 spin_lock_irqsave(&ioc->saved_lock, flags);
1001 d = &(ioc->saved[ioc->saved_cnt]);
1002 d->iova = iova;
1003 d->size = size;
1004 if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) {
1005 int cnt = ioc->saved_cnt;
1006 spin_lock(&ioc->res_lock);
1007 while (cnt--) {
1008 sba_mark_invalid(ioc, d->iova, d->size);
1009 sba_free_range(ioc, d->iova, d->size);
1010 d--;
1011 }
1012 ioc->saved_cnt = 0;
1013 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
1014 spin_unlock(&ioc->res_lock);
1015 }
1016 spin_unlock_irqrestore(&ioc->saved_lock, flags);
1017#else /* DELAYED_RESOURCE_CNT == 0 */
1018 spin_lock_irqsave(&ioc->res_lock, flags);
1019 sba_mark_invalid(ioc, iova, size);
1020 sba_free_range(ioc, iova, size);
1021 READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */
1022 spin_unlock_irqrestore(&ioc->res_lock, flags);
1023#endif /* DELAYED_RESOURCE_CNT == 0 */
1024#ifdef ENABLE_MARK_CLEAN
1025 if (dir == DMA_FROM_DEVICE) {
1026 u32 iovp = (u32) SBA_IOVP(ioc,iova);
1027 int off = PDIR_INDEX(iovp);
1028 void *addr;
1029
1030 if (size <= iovp_size) {
1031 addr = phys_to_virt(ioc->pdir_base[off] &
1032 ~0xE000000000000FFFULL);
1033 mark_clean(addr, size);
1034 } else {
1035 size_t byte_cnt = size;
1036
1037 do {
1038 addr = phys_to_virt(ioc->pdir_base[off] &
1039 ~0xE000000000000FFFULL);
1040 mark_clean(addr, min(byte_cnt, iovp_size));
1041 off++;
1042 byte_cnt -= iovp_size;
1043
1044 } while (byte_cnt > 0);
1045 }
1046 }
1047#endif
1048}
1049
1050
1051/**
1052 * sba_alloc_coherent - allocate/map shared mem for DMA
1053 * @dev: instance of PCI owned by the driver that's asking.
1054 * @size: number of bytes mapped in driver buffer.
1055 * @dma_handle: IOVA of new buffer.
1056 *
1057 * See Documentation/DMA-mapping.txt
1058 */
1059void *
1060sba_alloc_coherent (struct device *dev, size_t size, dma_addr_t *dma_handle, int flags)
1061{
1062 struct ioc *ioc;
1063 void *addr;
1064
1065 ioc = GET_IOC(dev);
1066 ASSERT(ioc);
1067
1068#ifdef CONFIG_NUMA
1069 {
1070 struct page *page;
1071 page = alloc_pages_node(ioc->node == MAX_NUMNODES ?
1072 numa_node_id() : ioc->node, flags,
1073 get_order(size));
1074
1075 if (unlikely(!page))
1076 return NULL;
1077
1078 addr = page_address(page);
1079 }
1080#else
1081 addr = (void *) __get_free_pages(flags, get_order(size));
1082#endif
1083 if (unlikely(!addr))
1084 return NULL;
1085
1086 memset(addr, 0, size);
1087 *dma_handle = virt_to_phys(addr);
1088
1089#ifdef ALLOW_IOV_BYPASS
1090 ASSERT(dev->coherent_dma_mask);
1091 /*
1092 ** Check if the PCI device can DMA to ptr... if so, just return ptr
1093 */
1094 if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) {
1095 DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n",
1096 dev->coherent_dma_mask, *dma_handle);
1097
1098 return addr;
1099 }
1100#endif
1101
1102 /*
1103 * If device can't bypass or bypass is disabled, pass the 32bit fake
1104 * device to map single to get an iova mapping.
1105 */
1106 *dma_handle = sba_map_single(&ioc->sac_only_dev->dev, addr, size, 0);
1107
1108 return addr;
1109}
1110
1111
1112/**
1113 * sba_free_coherent - free/unmap shared mem for DMA
1114 * @dev: instance of PCI owned by the driver that's asking.
1115 * @size: number of bytes mapped in driver buffer.
1116 * @vaddr: virtual address IOVA of "consistent" buffer.
1117 * @dma_handler: IO virtual address of "consistent" buffer.
1118 *
1119 * See Documentation/DMA-mapping.txt
1120 */
1121void sba_free_coherent (struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle)
1122{
1123 sba_unmap_single(dev, dma_handle, size, 0);
1124 free_pages((unsigned long) vaddr, get_order(size));
1125}
1126
1127
1128/*
1129** Since 0 is a valid pdir_base index value, can't use that
1130** to determine if a value is valid or not. Use a flag to indicate
1131** the SG list entry contains a valid pdir index.
1132*/
1133#define PIDE_FLAG 0x1UL
1134
1135#ifdef DEBUG_LARGE_SG_ENTRIES
1136int dump_run_sg = 0;
1137#endif
1138
1139
1140/**
1141 * sba_fill_pdir - write allocated SG entries into IO PDIR
1142 * @ioc: IO MMU structure which owns the pdir we are interested in.
1143 * @startsg: list of IOVA/size pairs
1144 * @nents: number of entries in startsg list
1145 *
1146 * Take preprocessed SG list and write corresponding entries
1147 * in the IO PDIR.
1148 */
1149
1150static SBA_INLINE int
1151sba_fill_pdir(
1152 struct ioc *ioc,
1153 struct scatterlist *startsg,
1154 int nents)
1155{
1156 struct scatterlist *dma_sg = startsg; /* pointer to current DMA */
1157 int n_mappings = 0;
1158 u64 *pdirp = NULL;
1159 unsigned long dma_offset = 0;
1160
1161 dma_sg--;
1162 while (nents-- > 0) {
1163 int cnt = startsg->dma_length;
1164 startsg->dma_length = 0;
1165
1166#ifdef DEBUG_LARGE_SG_ENTRIES
1167 if (dump_run_sg)
1168 printk(" %2d : %08lx/%05x %p\n",
1169 nents, startsg->dma_address, cnt,
1170 sba_sg_address(startsg));
1171#else
1172 DBG_RUN_SG(" %d : %08lx/%05x %p\n",
1173 nents, startsg->dma_address, cnt,
1174 sba_sg_address(startsg));
1175#endif
1176 /*
1177 ** Look for the start of a new DMA stream
1178 */
1179 if (startsg->dma_address & PIDE_FLAG) {
1180 u32 pide = startsg->dma_address & ~PIDE_FLAG;
1181 dma_offset = (unsigned long) pide & ~iovp_mask;
1182 startsg->dma_address = 0;
1183 dma_sg++;
1184 dma_sg->dma_address = pide | ioc->ibase;
1185 pdirp = &(ioc->pdir_base[pide >> iovp_shift]);
1186 n_mappings++;
1187 }
1188
1189 /*
1190 ** Look for a VCONTIG chunk
1191 */
1192 if (cnt) {
1193 unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
1194 ASSERT(pdirp);
1195
1196 /* Since multiple Vcontig blocks could make up
1197 ** one DMA stream, *add* cnt to dma_len.
1198 */
1199 dma_sg->dma_length += cnt;
1200 cnt += dma_offset;
1201 dma_offset=0; /* only want offset on first chunk */
1202 cnt = ROUNDUP(cnt, iovp_size);
1203 do {
1204 sba_io_pdir_entry(pdirp, vaddr);
1205 vaddr += iovp_size;
1206 cnt -= iovp_size;
1207 pdirp++;
1208 } while (cnt > 0);
1209 }
1210 startsg++;
1211 }
1212 /* force pdir update */
1213 wmb();
1214
1215#ifdef DEBUG_LARGE_SG_ENTRIES
1216 dump_run_sg = 0;
1217#endif
1218 return(n_mappings);
1219}
1220
1221
1222/*
1223** Two address ranges are DMA contiguous *iff* "end of prev" and
1224** "start of next" are both on an IOV page boundary.
1225**
1226** (shift left is a quick trick to mask off upper bits)
1227*/
1228#define DMA_CONTIG(__X, __Y) \
1229 (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL)
1230
1231
1232/**
1233 * sba_coalesce_chunks - preprocess the SG list
1234 * @ioc: IO MMU structure which owns the pdir we are interested in.
1235 * @startsg: list of IOVA/size pairs
1236 * @nents: number of entries in startsg list
1237 *
1238 * First pass is to walk the SG list and determine where the breaks are
1239 * in the DMA stream. Allocates PDIR entries but does not fill them.
1240 * Returns the number of DMA chunks.
1241 *
1242 * Doing the fill separate from the coalescing/allocation keeps the
1243 * code simpler. Future enhancement could make one pass through
1244 * the sglist do both.
1245 */
1246static SBA_INLINE int
1247sba_coalesce_chunks( struct ioc *ioc,
1248 struct scatterlist *startsg,
1249 int nents)
1250{
1251 struct scatterlist *vcontig_sg; /* VCONTIG chunk head */
1252 unsigned long vcontig_len; /* len of VCONTIG chunk */
1253 unsigned long vcontig_end;
1254 struct scatterlist *dma_sg; /* next DMA stream head */
1255 unsigned long dma_offset, dma_len; /* start/len of DMA stream */
1256 int n_mappings = 0;
1257
1258 while (nents > 0) {
1259 unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
1260
1261 /*
1262 ** Prepare for first/next DMA stream
1263 */
1264 dma_sg = vcontig_sg = startsg;
1265 dma_len = vcontig_len = vcontig_end = startsg->length;
1266 vcontig_end += vaddr;
1267 dma_offset = vaddr & ~iovp_mask;
1268
1269 /* PARANOID: clear entries */
1270 startsg->dma_address = startsg->dma_length = 0;
1271
1272 /*
1273 ** This loop terminates one iteration "early" since
1274 ** it's always looking one "ahead".
1275 */
1276 while (--nents > 0) {
1277 unsigned long vaddr; /* tmp */
1278
1279 startsg++;
1280
1281 /* PARANOID */
1282 startsg->dma_address = startsg->dma_length = 0;
1283
1284 /* catch brokenness in SCSI layer */
1285 ASSERT(startsg->length <= DMA_CHUNK_SIZE);
1286
1287 /*
1288 ** First make sure current dma stream won't
1289 ** exceed DMA_CHUNK_SIZE if we coalesce the
1290 ** next entry.
1291 */
1292 if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask)
1293 > DMA_CHUNK_SIZE)
1294 break;
1295
1296 /*
1297 ** Then look for virtually contiguous blocks.
1298 **
1299 ** append the next transaction?
1300 */
1301 vaddr = (unsigned long) sba_sg_address(startsg);
1302 if (vcontig_end == vaddr)
1303 {
1304 vcontig_len += startsg->length;
1305 vcontig_end += startsg->length;
1306 dma_len += startsg->length;
1307 continue;
1308 }
1309
1310#ifdef DEBUG_LARGE_SG_ENTRIES
1311 dump_run_sg = (vcontig_len > iovp_size);
1312#endif
1313
1314 /*
1315 ** Not virtually contigous.
1316 ** Terminate prev chunk.
1317 ** Start a new chunk.
1318 **
1319 ** Once we start a new VCONTIG chunk, dma_offset
1320 ** can't change. And we need the offset from the first
1321 ** chunk - not the last one. Ergo Successive chunks
1322 ** must start on page boundaries and dove tail
1323 ** with it's predecessor.
1324 */
1325 vcontig_sg->dma_length = vcontig_len;
1326
1327 vcontig_sg = startsg;
1328 vcontig_len = startsg->length;
1329
1330 /*
1331 ** 3) do the entries end/start on page boundaries?
1332 ** Don't update vcontig_end until we've checked.
1333 */
1334 if (DMA_CONTIG(vcontig_end, vaddr))
1335 {
1336 vcontig_end = vcontig_len + vaddr;
1337 dma_len += vcontig_len;
1338 continue;
1339 } else {
1340 break;
1341 }
1342 }
1343
1344 /*
1345 ** End of DMA Stream
1346 ** Terminate last VCONTIG block.
1347 ** Allocate space for DMA stream.
1348 */
1349 vcontig_sg->dma_length = vcontig_len;
1350 dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
1351 ASSERT(dma_len <= DMA_CHUNK_SIZE);
1352 dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
1353 | (sba_alloc_range(ioc, dma_len) << iovp_shift)
1354 | dma_offset);
1355 n_mappings++;
1356 }
1357
1358 return n_mappings;
1359}
1360
1361
1362/**
1363 * sba_map_sg - map Scatter/Gather list
1364 * @dev: instance of PCI owned by the driver that's asking.
1365 * @sglist: array of buffer/length pairs
1366 * @nents: number of entries in list
1367 * @dir: R/W or both.
1368 *
1369 * See Documentation/DMA-mapping.txt
1370 */
1371int sba_map_sg(struct device *dev, struct scatterlist *sglist, int nents, int dir)
1372{
1373 struct ioc *ioc;
1374 int coalesced, filled = 0;
1375#ifdef ASSERT_PDIR_SANITY
1376 unsigned long flags;
1377#endif
1378#ifdef ALLOW_IOV_BYPASS_SG
1379 struct scatterlist *sg;
1380#endif
1381
1382 DBG_RUN_SG("%s() START %d entries\n", __FUNCTION__, nents);
1383 ioc = GET_IOC(dev);
1384 ASSERT(ioc);
1385
1386#ifdef ALLOW_IOV_BYPASS_SG
1387 ASSERT(to_pci_dev(dev)->dma_mask);
1388 if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) {
1389 for (sg = sglist ; filled < nents ; filled++, sg++){
1390 sg->dma_length = sg->length;
1391 sg->dma_address = virt_to_phys(sba_sg_address(sg));
1392 }
1393 return filled;
1394 }
1395#endif
1396 /* Fast path single entry scatterlists. */
1397 if (nents == 1) {
1398 sglist->dma_length = sglist->length;
1399 sglist->dma_address = sba_map_single(dev, sba_sg_address(sglist), sglist->length, dir);
1400 return 1;
1401 }
1402
1403#ifdef ASSERT_PDIR_SANITY
1404 spin_lock_irqsave(&ioc->res_lock, flags);
1405 if (sba_check_pdir(ioc,"Check before sba_map_sg()"))
1406 {
1407 sba_dump_sg(ioc, sglist, nents);
1408 panic("Check before sba_map_sg()");
1409 }
1410 spin_unlock_irqrestore(&ioc->res_lock, flags);
1411#endif
1412
1413 prefetch(ioc->res_hint);
1414
1415 /*
1416 ** First coalesce the chunks and allocate I/O pdir space
1417 **
1418 ** If this is one DMA stream, we can properly map using the
1419 ** correct virtual address associated with each DMA page.
1420 ** w/o this association, we wouldn't have coherent DMA!
1421 ** Access to the virtual address is what forces a two pass algorithm.
1422 */
1423 coalesced = sba_coalesce_chunks(ioc, sglist, nents);
1424
1425 /*
1426 ** Program the I/O Pdir
1427 **
1428 ** map the virtual addresses to the I/O Pdir
1429 ** o dma_address will contain the pdir index
1430 ** o dma_len will contain the number of bytes to map
1431 ** o address contains the virtual address.
1432 */
1433 filled = sba_fill_pdir(ioc, sglist, nents);
1434
1435#ifdef ASSERT_PDIR_SANITY
1436 spin_lock_irqsave(&ioc->res_lock, flags);
1437 if (sba_check_pdir(ioc,"Check after sba_map_sg()"))
1438 {
1439 sba_dump_sg(ioc, sglist, nents);
1440 panic("Check after sba_map_sg()\n");
1441 }
1442 spin_unlock_irqrestore(&ioc->res_lock, flags);
1443#endif
1444
1445 ASSERT(coalesced == filled);
1446 DBG_RUN_SG("%s() DONE %d mappings\n", __FUNCTION__, filled);
1447
1448 return filled;
1449}
1450
1451
1452/**
1453 * sba_unmap_sg - unmap Scatter/Gather list
1454 * @dev: instance of PCI owned by the driver that's asking.
1455 * @sglist: array of buffer/length pairs
1456 * @nents: number of entries in list
1457 * @dir: R/W or both.
1458 *
1459 * See Documentation/DMA-mapping.txt
1460 */
1461void sba_unmap_sg (struct device *dev, struct scatterlist *sglist, int nents, int dir)
1462{
1463#ifdef ASSERT_PDIR_SANITY
1464 struct ioc *ioc;
1465 unsigned long flags;
1466#endif
1467
1468 DBG_RUN_SG("%s() START %d entries, %p,%x\n",
1469 __FUNCTION__, nents, sba_sg_address(sglist), sglist->length);
1470
1471#ifdef ASSERT_PDIR_SANITY
1472 ioc = GET_IOC(dev);
1473 ASSERT(ioc);
1474
1475 spin_lock_irqsave(&ioc->res_lock, flags);
1476 sba_check_pdir(ioc,"Check before sba_unmap_sg()");
1477 spin_unlock_irqrestore(&ioc->res_lock, flags);
1478#endif
1479
1480 while (nents && sglist->dma_length) {
1481
1482 sba_unmap_single(dev, sglist->dma_address, sglist->dma_length, dir);
1483 sglist++;
1484 nents--;
1485 }
1486
1487 DBG_RUN_SG("%s() DONE (nents %d)\n", __FUNCTION__, nents);
1488
1489#ifdef ASSERT_PDIR_SANITY
1490 spin_lock_irqsave(&ioc->res_lock, flags);
1491 sba_check_pdir(ioc,"Check after sba_unmap_sg()");
1492 spin_unlock_irqrestore(&ioc->res_lock, flags);
1493#endif
1494
1495}
1496
1497/**************************************************************
1498*
1499* Initialization and claim
1500*
1501***************************************************************/
1502
1503static void __init
1504ioc_iova_init(struct ioc *ioc)
1505{
1506 int tcnfg;
1507 int agp_found = 0;
1508 struct pci_dev *device = NULL;
1509#ifdef FULL_VALID_PDIR
1510 unsigned long index;
1511#endif
1512
1513 /*
1514 ** Firmware programs the base and size of a "safe IOVA space"
1515 ** (one that doesn't overlap memory or LMMIO space) in the
1516 ** IBASE and IMASK registers.
1517 */
1518 ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL;
1519 ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL;
1520
1521 ioc->iov_size = ~ioc->imask + 1;
1522
1523 DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n",
1524 __FUNCTION__, ioc->ioc_hpa, ioc->ibase, ioc->imask,
1525 ioc->iov_size >> 20);
1526
1527 switch (iovp_size) {
1528 case 4*1024: tcnfg = 0; break;
1529 case 8*1024: tcnfg = 1; break;
1530 case 16*1024: tcnfg = 2; break;
1531 case 64*1024: tcnfg = 3; break;
1532 default:
1533 panic(PFX "Unsupported IOTLB page size %ldK",
1534 iovp_size >> 10);
1535 break;
1536 }
1537 WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG);
1538
1539 ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE;
1540 ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL,
1541 get_order(ioc->pdir_size));
1542 if (!ioc->pdir_base)
1543 panic(PFX "Couldn't allocate I/O Page Table\n");
1544
1545 memset(ioc->pdir_base, 0, ioc->pdir_size);
1546
1547 DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __FUNCTION__,
1548 iovp_size >> 10, ioc->pdir_base, ioc->pdir_size);
1549
1550 ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base);
1551 WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE);
1552
1553 /*
1554 ** If an AGP device is present, only use half of the IOV space
1555 ** for PCI DMA. Unfortunately we can't know ahead of time
1556 ** whether GART support will actually be used, for now we
1557 ** can just key on an AGP device found in the system.
1558 ** We program the next pdir index after we stop w/ a key for
1559 ** the GART code to handshake on.
1560 */
1561 for_each_pci_dev(device)
1562 agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP);
1563
1564 if (agp_found && reserve_sba_gart) {
1565 printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n",
1566 ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2);
1567 ioc->pdir_size /= 2;
1568 ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE;
1569 }
1570#ifdef FULL_VALID_PDIR
1571 /*
1572 ** Check to see if the spill page has been allocated, we don't need more than
1573 ** one across multiple SBAs.
1574 */
1575 if (!prefetch_spill_page) {
1576 char *spill_poison = "SBAIOMMU POISON";
1577 int poison_size = 16;
1578 void *poison_addr, *addr;
1579
1580 addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size));
1581 if (!addr)
1582 panic(PFX "Couldn't allocate PDIR spill page\n");
1583
1584 poison_addr = addr;
1585 for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size)
1586 memcpy(poison_addr, spill_poison, poison_size);
1587
1588 prefetch_spill_page = virt_to_phys(addr);
1589
1590 DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __FUNCTION__, prefetch_spill_page);
1591 }
1592 /*
1593 ** Set all the PDIR entries valid w/ the spill page as the target
1594 */
1595 for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++)
1596 ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page);
1597#endif
1598
1599 /* Clear I/O TLB of any possible entries */
1600 WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM);
1601 READ_REG(ioc->ioc_hpa + IOC_PCOM);
1602
1603 /* Enable IOVA translation */
1604 WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE);
1605 READ_REG(ioc->ioc_hpa + IOC_IBASE);
1606}
1607
1608static void __init
1609ioc_resource_init(struct ioc *ioc)
1610{
1611 spin_lock_init(&ioc->res_lock);
1612#if DELAYED_RESOURCE_CNT > 0
1613 spin_lock_init(&ioc->saved_lock);
1614#endif
1615
1616 /* resource map size dictated by pdir_size */
1617 ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */
1618 ioc->res_size >>= 3; /* convert bit count to byte count */
1619 DBG_INIT("%s() res_size 0x%x\n", __FUNCTION__, ioc->res_size);
1620
1621 ioc->res_map = (char *) __get_free_pages(GFP_KERNEL,
1622 get_order(ioc->res_size));
1623 if (!ioc->res_map)
1624 panic(PFX "Couldn't allocate resource map\n");
1625
1626 memset(ioc->res_map, 0, ioc->res_size);
1627 /* next available IOVP - circular search */
1628 ioc->res_hint = (unsigned long *) ioc->res_map;
1629
1630#ifdef ASSERT_PDIR_SANITY
1631 /* Mark first bit busy - ie no IOVA 0 */
1632 ioc->res_map[0] = 0x1;
1633 ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE;
1634#endif
1635#ifdef FULL_VALID_PDIR
1636 /* Mark the last resource used so we don't prefetch beyond IOVA space */
1637 ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */
1638 ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF
1639 | prefetch_spill_page);
1640#endif
1641
1642 DBG_INIT("%s() res_map %x %p\n", __FUNCTION__,
1643 ioc->res_size, (void *) ioc->res_map);
1644}
1645
1646static void __init
1647ioc_sac_init(struct ioc *ioc)
1648{
1649 struct pci_dev *sac = NULL;
1650 struct pci_controller *controller = NULL;
1651
1652 /*
1653 * pci_alloc_coherent() must return a DMA address which is
1654 * SAC (single address cycle) addressable, so allocate a
1655 * pseudo-device to enforce that.
1656 */
1657 sac = kmalloc(sizeof(*sac), GFP_KERNEL);
1658 if (!sac)
1659 panic(PFX "Couldn't allocate struct pci_dev");
1660 memset(sac, 0, sizeof(*sac));
1661
1662 controller = kmalloc(sizeof(*controller), GFP_KERNEL);
1663 if (!controller)
1664 panic(PFX "Couldn't allocate struct pci_controller");
1665 memset(controller, 0, sizeof(*controller));
1666
1667 controller->iommu = ioc;
1668 sac->sysdata = controller;
1669 sac->dma_mask = 0xFFFFFFFFUL;
1670#ifdef CONFIG_PCI
1671 sac->dev.bus = &pci_bus_type;
1672#endif
1673 ioc->sac_only_dev = sac;
1674}
1675
1676static void __init
1677ioc_zx1_init(struct ioc *ioc)
1678{
1679 unsigned long rope_config;
1680 unsigned int i;
1681
1682 if (ioc->rev < 0x20)
1683 panic(PFX "IOC 2.0 or later required for IOMMU support\n");
1684
1685 /* 38 bit memory controller + extra bit for range displaced by MMIO */
1686 ioc->dma_mask = (0x1UL << 39) - 1;
1687
1688 /*
1689 ** Clear ROPE(N)_CONFIG AO bit.
1690 ** Disables "NT Ordering" (~= !"Relaxed Ordering")
1691 ** Overrides bit 1 in DMA Hint Sets.
1692 ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701.
1693 */
1694 for (i=0; i<(8*8); i+=8) {
1695 rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i);
1696 rope_config &= ~IOC_ROPE_AO;
1697 WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i);
1698 }
1699}
1700
1701typedef void (initfunc)(struct ioc *);
1702
1703struct ioc_iommu {
1704 u32 func_id;
1705 char *name;
1706 initfunc *init;
1707};
1708
1709static struct ioc_iommu ioc_iommu_info[] __initdata = {
1710 { ZX1_IOC_ID, "zx1", ioc_zx1_init },
1711 { ZX2_IOC_ID, "zx2", NULL },
1712 { SX1000_IOC_ID, "sx1000", NULL },
1713};
1714
1715static struct ioc * __init
1716ioc_init(u64 hpa, void *handle)
1717{
1718 struct ioc *ioc;
1719 struct ioc_iommu *info;
1720
1721 ioc = kmalloc(sizeof(*ioc), GFP_KERNEL);
1722 if (!ioc)
1723 return NULL;
1724
1725 memset(ioc, 0, sizeof(*ioc));
1726
1727 ioc->next = ioc_list;
1728 ioc_list = ioc;
1729
1730 ioc->handle = handle;
1731 ioc->ioc_hpa = ioremap(hpa, 0x1000);
1732
1733 ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID);
1734 ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL;
1735 ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */
1736
1737 for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) {
1738 if (ioc->func_id == info->func_id) {
1739 ioc->name = info->name;
1740 if (info->init)
1741 (info->init)(ioc);
1742 }
1743 }
1744
1745 iovp_size = (1 << iovp_shift);
1746 iovp_mask = ~(iovp_size - 1);
1747
1748 DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __FUNCTION__,
1749 PAGE_SIZE >> 10, iovp_size >> 10);
1750
1751 if (!ioc->name) {
1752 ioc->name = kmalloc(24, GFP_KERNEL);
1753 if (ioc->name)
1754 sprintf((char *) ioc->name, "Unknown (%04x:%04x)",
1755 ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF);
1756 else
1757 ioc->name = "Unknown";
1758 }
1759
1760 ioc_iova_init(ioc);
1761 ioc_resource_init(ioc);
1762 ioc_sac_init(ioc);
1763
1764 if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask)
1765 ia64_max_iommu_merge_mask = ~iovp_mask;
1766
1767 printk(KERN_INFO PFX
1768 "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n",
1769 ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF,
1770 hpa, ioc->iov_size >> 20, ioc->ibase);
1771
1772 return ioc;
1773}
1774
1775
1776
1777/**************************************************************************
1778**
1779** SBA initialization code (HW and SW)
1780**
1781** o identify SBA chip itself
1782** o FIXME: initialize DMA hints for reasonable defaults
1783**
1784**************************************************************************/
1785
1786#ifdef CONFIG_PROC_FS
1787static void *
1788ioc_start(struct seq_file *s, loff_t *pos)
1789{
1790 struct ioc *ioc;
1791 loff_t n = *pos;
1792
1793 for (ioc = ioc_list; ioc; ioc = ioc->next)
1794 if (!n--)
1795 return ioc;
1796
1797 return NULL;
1798}
1799
1800static void *
1801ioc_next(struct seq_file *s, void *v, loff_t *pos)
1802{
1803 struct ioc *ioc = v;
1804
1805 ++*pos;
1806 return ioc->next;
1807}
1808
1809static void
1810ioc_stop(struct seq_file *s, void *v)
1811{
1812}
1813
1814static int
1815ioc_show(struct seq_file *s, void *v)
1816{
1817 struct ioc *ioc = v;
1818 unsigned long *res_ptr = (unsigned long *)ioc->res_map;
1819 int i, used = 0;
1820
1821 seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n",
1822 ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF));
1823#ifdef CONFIG_NUMA
1824 if (ioc->node != MAX_NUMNODES)
1825 seq_printf(s, "NUMA node : %d\n", ioc->node);
1826#endif
1827 seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024));
1828 seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024);
1829
1830 for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr)
1831 used += hweight64(*res_ptr);
1832
1833 seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3);
1834 seq_printf(s, "PDIR used : %d entries\n", used);
1835
1836#ifdef PDIR_SEARCH_TIMING
1837 {
1838 unsigned long i = 0, avg = 0, min, max;
1839 min = max = ioc->avg_search[0];
1840 for (i = 0; i < SBA_SEARCH_SAMPLE; i++) {
1841 avg += ioc->avg_search[i];
1842 if (ioc->avg_search[i] > max) max = ioc->avg_search[i];
1843 if (ioc->avg_search[i] < min) min = ioc->avg_search[i];
1844 }
1845 avg /= SBA_SEARCH_SAMPLE;
1846 seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n",
1847 min, avg, max);
1848 }
1849#endif
1850#ifndef ALLOW_IOV_BYPASS
1851 seq_printf(s, "IOVA bypass disabled\n");
1852#endif
1853 return 0;
1854}
1855
1856static struct seq_operations ioc_seq_ops = {
1857 .start = ioc_start,
1858 .next = ioc_next,
1859 .stop = ioc_stop,
1860 .show = ioc_show
1861};
1862
1863static int
1864ioc_open(struct inode *inode, struct file *file)
1865{
1866 return seq_open(file, &ioc_seq_ops);
1867}
1868
1869static struct file_operations ioc_fops = {
1870 .open = ioc_open,
1871 .read = seq_read,
1872 .llseek = seq_lseek,
1873 .release = seq_release
1874};
1875
1876static void __init
1877ioc_proc_init(void)
1878{
1879 struct proc_dir_entry *dir, *entry;
1880
1881 dir = proc_mkdir("bus/mckinley", NULL);
1882 if (!dir)
1883 return;
1884
1885 entry = create_proc_entry(ioc_list->name, 0, dir);
1886 if (entry)
1887 entry->proc_fops = &ioc_fops;
1888}
1889#endif
1890
1891static void
1892sba_connect_bus(struct pci_bus *bus)
1893{
1894 acpi_handle handle, parent;
1895 acpi_status status;
1896 struct ioc *ioc;
1897
1898 if (!PCI_CONTROLLER(bus))
1899 panic(PFX "no sysdata on bus %d!\n", bus->number);
1900
1901 if (PCI_CONTROLLER(bus)->iommu)
1902 return;
1903
1904 handle = PCI_CONTROLLER(bus)->acpi_handle;
1905 if (!handle)
1906 return;
1907
1908 /*
1909 * The IOC scope encloses PCI root bridges in the ACPI
1910 * namespace, so work our way out until we find an IOC we
1911 * claimed previously.
1912 */
1913 do {
1914 for (ioc = ioc_list; ioc; ioc = ioc->next)
1915 if (ioc->handle == handle) {
1916 PCI_CONTROLLER(bus)->iommu = ioc;
1917 return;
1918 }
1919
1920 status = acpi_get_parent(handle, &parent);
1921 handle = parent;
1922 } while (ACPI_SUCCESS(status));
1923
1924 printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number);
1925}
1926
1927#ifdef CONFIG_NUMA
1928static void __init
1929sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle)
1930{
1931 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
1932 union acpi_object *obj;
1933 acpi_handle phandle;
1934 unsigned int node;
1935
1936 ioc->node = MAX_NUMNODES;
1937
1938 /*
1939 * Check for a _PXM on this node first. We don't typically see
1940 * one here, so we'll end up getting it from the parent.
1941 */
1942 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) {
1943 if (ACPI_FAILURE(acpi_get_parent(handle, &phandle)))
1944 return;
1945
1946 /* Reset the acpi buffer */
1947 buffer.length = ACPI_ALLOCATE_BUFFER;
1948 buffer.pointer = NULL;
1949
1950 if (ACPI_FAILURE(acpi_evaluate_object(phandle, "_PXM", NULL,
1951 &buffer)))
1952 return;
1953 }
1954
1955 if (!buffer.length || !buffer.pointer)
1956 return;
1957
1958 obj = buffer.pointer;
1959
1960 if (obj->type != ACPI_TYPE_INTEGER ||
1961 obj->integer.value >= MAX_PXM_DOMAINS) {
1962 acpi_os_free(buffer.pointer);
1963 return;
1964 }
1965
1966 node = pxm_to_nid_map[obj->integer.value];
1967 acpi_os_free(buffer.pointer);
1968
1969 if (node >= MAX_NUMNODES || !node_online(node))
1970 return;
1971
1972 ioc->node = node;
1973 return;
1974}
1975#else
1976#define sba_map_ioc_to_node(ioc, handle)
1977#endif
1978
1979static int __init
1980acpi_sba_ioc_add(struct acpi_device *device)
1981{
1982 struct ioc *ioc;
1983 acpi_status status;
1984 u64 hpa, length;
1985 struct acpi_buffer buffer;
1986 struct acpi_device_info *dev_info;
1987
1988 status = hp_acpi_csr_space(device->handle, &hpa, &length);
1989 if (ACPI_FAILURE(status))
1990 return 1;
1991
1992 buffer.length = ACPI_ALLOCATE_LOCAL_BUFFER;
1993 status = acpi_get_object_info(device->handle, &buffer);
1994 if (ACPI_FAILURE(status))
1995 return 1;
1996 dev_info = buffer.pointer;
1997
1998 /*
1999 * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI
2000 * root bridges, and its CSR space includes the IOC function.
2001 */
2002 if (strncmp("HWP0001", dev_info->hardware_id.value, 7) == 0) {
2003 hpa += ZX1_IOC_OFFSET;
2004 /* zx1 based systems default to kernel page size iommu pages */
2005 if (!iovp_shift)
2006 iovp_shift = min(PAGE_SHIFT, 16);
2007 }
2008 ACPI_MEM_FREE(dev_info);
2009
2010 /*
2011 * default anything not caught above or specified on cmdline to 4k
2012 * iommu page size
2013 */
2014 if (!iovp_shift)
2015 iovp_shift = 12;
2016
2017 ioc = ioc_init(hpa, device->handle);
2018 if (!ioc)
2019 return 1;
2020
2021 /* setup NUMA node association */
2022 sba_map_ioc_to_node(ioc, device->handle);
2023 return 0;
2024}
2025
2026static struct acpi_driver acpi_sba_ioc_driver = {
2027 .name = "IOC IOMMU Driver",
2028 .ids = "HWP0001,HWP0004",
2029 .ops = {
2030 .add = acpi_sba_ioc_add,
2031 },
2032};
2033
2034static int __init
2035sba_init(void)
2036{
2037 acpi_bus_register_driver(&acpi_sba_ioc_driver);
2038 if (!ioc_list)
2039 return 0;
2040
2041#ifdef CONFIG_PCI
2042 {
2043 struct pci_bus *b = NULL;
2044 while ((b = pci_find_next_bus(b)) != NULL)
2045 sba_connect_bus(b);
2046 }
2047#endif
2048
2049#ifdef CONFIG_PROC_FS
2050 ioc_proc_init();
2051#endif
2052 return 0;
2053}
2054
2055subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */
2056
2057extern void dig_setup(char**);
2058/*
2059 * MAX_DMA_ADDRESS needs to be setup prior to paging_init to do any good,
2060 * so we use the platform_setup hook to fix it up.
2061 */
2062void __init
2063sba_setup(char **cmdline_p)
2064{
2065 MAX_DMA_ADDRESS = ~0UL;
2066 dig_setup(cmdline_p);
2067}
2068
2069static int __init
2070nosbagart(char *str)
2071{
2072 reserve_sba_gart = 0;
2073 return 1;
2074}
2075
2076int
2077sba_dma_supported (struct device *dev, u64 mask)
2078{
2079 /* make sure it's at least 32bit capable */
2080 return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL);
2081}
2082
2083int
2084sba_dma_mapping_error (dma_addr_t dma_addr)
2085{
2086 return 0;
2087}
2088
2089__setup("nosbagart", nosbagart);
2090
2091static int __init
2092sba_page_override(char *str)
2093{
2094 unsigned long page_size;
2095
2096 page_size = memparse(str, &str);
2097 switch (page_size) {
2098 case 4096:
2099 case 8192:
2100 case 16384:
2101 case 65536:
2102 iovp_shift = ffs(page_size) - 1;
2103 break;
2104 default:
2105 printk("%s: unknown/unsupported iommu page size %ld\n",
2106 __FUNCTION__, page_size);
2107 }
2108
2109 return 1;
2110}
2111
2112__setup("sbapagesize=",sba_page_override);
2113
2114EXPORT_SYMBOL(sba_dma_mapping_error);
2115EXPORT_SYMBOL(sba_map_single);
2116EXPORT_SYMBOL(sba_unmap_single);
2117EXPORT_SYMBOL(sba_map_sg);
2118EXPORT_SYMBOL(sba_unmap_sg);
2119EXPORT_SYMBOL(sba_dma_supported);
2120EXPORT_SYMBOL(sba_alloc_coherent);
2121EXPORT_SYMBOL(sba_free_coherent);
diff --git a/arch/ia64/hp/sim/Kconfig b/arch/ia64/hp/sim/Kconfig
new file mode 100644
index 000000000000..18ccb1266e18
--- /dev/null
+++ b/arch/ia64/hp/sim/Kconfig
@@ -0,0 +1,20 @@
1
2menu "HP Simulator drivers"
3 depends on IA64_HP_SIM || IA64_GENERIC
4
5config HP_SIMETH
6 bool "Simulated Ethernet "
7
8config HP_SIMSERIAL
9 bool "Simulated serial driver support"
10
11config HP_SIMSERIAL_CONSOLE
12 bool "Console for HP simulator"
13 depends on HP_SIMSERIAL
14
15config HP_SIMSCSI
16 tristate "Simulated SCSI disk"
17 depends on SCSI
18
19endmenu
20
diff --git a/arch/ia64/hp/sim/Makefile b/arch/ia64/hp/sim/Makefile
new file mode 100644
index 000000000000..d10da47931d7
--- /dev/null
+++ b/arch/ia64/hp/sim/Makefile
@@ -0,0 +1,16 @@
1#
2# ia64/platform/hp/sim/Makefile
3#
4# Copyright (C) 2002 Hewlett-Packard Co.
5# David Mosberger-Tang <davidm@hpl.hp.com>
6# Copyright (C) 1999 Silicon Graphics, Inc.
7# Copyright (C) Srinivasa Thirumalachar (sprasad@engr.sgi.com)
8#
9
10obj-y := hpsim_irq.o hpsim_setup.o hpsim.o
11obj-$(CONFIG_IA64_GENERIC) += hpsim_machvec.o
12
13obj-$(CONFIG_HP_SIMETH) += simeth.o
14obj-$(CONFIG_HP_SIMSERIAL) += simserial.o
15obj-$(CONFIG_HP_SIMSERIAL_CONSOLE) += hpsim_console.o
16obj-$(CONFIG_HP_SIMSCSI) += simscsi.o
diff --git a/arch/ia64/hp/sim/boot/Makefile b/arch/ia64/hp/sim/boot/Makefile
new file mode 100644
index 000000000000..df6e9968c845
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/Makefile
@@ -0,0 +1,37 @@
1#
2# ia64/boot/Makefile
3#
4# This file is subject to the terms and conditions of the GNU General Public
5# License. See the file "COPYING" in the main directory of this archive
6# for more details.
7#
8# Copyright (C) 1998, 2003 by David Mosberger-Tang <davidm@hpl.hp.com>
9#
10
11targets-$(CONFIG_IA64_HP_SIM) += bootloader
12targets := vmlinux.bin vmlinux.gz $(targets-y)
13
14quiet_cmd_cptotop = LN $@
15 cmd_cptotop = ln -f $< $@
16
17vmlinux.gz: $(obj)/vmlinux.gz $(addprefix $(obj)/,$(targets-y))
18 $(call cmd,cptotop)
19 @echo ' Kernel: $@ is ready'
20
21boot: bootloader
22
23bootloader: $(obj)/bootloader
24 $(call cmd,cptotop)
25
26$(obj)/vmlinux.gz: $(obj)/vmlinux.bin FORCE
27 $(call if_changed,gzip)
28
29$(obj)/vmlinux.bin: vmlinux FORCE
30 $(call if_changed,objcopy)
31
32
33LDFLAGS_bootloader = -static -T
34
35$(obj)/bootloader: $(src)/bootloader.lds $(obj)/bootloader.o $(obj)/boot_head.o $(obj)/fw-emu.o \
36 lib/lib.a arch/ia64/lib/lib.a FORCE
37 $(call if_changed,ld)
diff --git a/arch/ia64/hp/sim/boot/boot_head.S b/arch/ia64/hp/sim/boot/boot_head.S
new file mode 100644
index 000000000000..9364199e5632
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/boot_head.S
@@ -0,0 +1,144 @@
1/*
2 * Copyright (C) 1998-2003 Hewlett-Packard Co
3 * David Mosberger-Tang <davidm@hpl.hp.com>
4 */
5
6#include <asm/asmmacro.h>
7
8 .bss
9 .align 16
10stack_mem:
11 .skip 16834
12
13 .text
14
15/* This needs to be defined because lib/string.c:strlcat() calls it in case of error... */
16GLOBAL_ENTRY(printk)
17 break 0
18END(printk)
19
20GLOBAL_ENTRY(_start)
21 .prologue
22 .save rp, r0
23 .body
24 movl gp = __gp
25 movl sp = stack_mem
26 bsw.1
27 br.call.sptk.many rp=start_bootloader
28END(_start)
29
30/*
31 * Set a break point on this function so that symbols are available to set breakpoints in
32 * the kernel being debugged.
33 */
34GLOBAL_ENTRY(debug_break)
35 br.ret.sptk.many b0
36END(debug_break)
37
38GLOBAL_ENTRY(ssc)
39 .regstk 5,0,0,0
40 mov r15=in4
41 break 0x80001
42 br.ret.sptk.many b0
43END(ssc)
44
45GLOBAL_ENTRY(jmp_to_kernel)
46 .regstk 2,0,0,0
47 mov r28=in0
48 mov b7=in1
49 br.sptk.few b7
50END(jmp_to_kernel)
51
52
53GLOBAL_ENTRY(pal_emulator_static)
54 mov r8=-1
55 mov r9=256
56 ;;
57 cmp.gtu p6,p7=r9,r28 /* r28 <= 255? */
58(p6) br.cond.sptk.few static
59 ;;
60 mov r9=512
61 ;;
62 cmp.gtu p6,p7=r9,r28
63(p6) br.cond.sptk.few stacked
64 ;;
65static: cmp.eq p6,p7=6,r28 /* PAL_PTCE_INFO */
66(p7) br.cond.sptk.few 1f
67 ;;
68 mov r8=0 /* status = 0 */
69 movl r9=0x100000000 /* tc.base */
70 movl r10=0x0000000200000003 /* count[0], count[1] */
71 movl r11=0x1000000000002000 /* stride[0], stride[1] */
72 br.cond.sptk.few rp
731: cmp.eq p6,p7=14,r28 /* PAL_FREQ_RATIOS */
74(p7) br.cond.sptk.few 1f
75 mov r8=0 /* status = 0 */
76 movl r9 =0x100000064 /* proc_ratio (1/100) */
77 movl r10=0x100000100 /* bus_ratio<<32 (1/256) */
78 movl r11=0x100000064 /* itc_ratio<<32 (1/100) */
79 ;;
801: cmp.eq p6,p7=19,r28 /* PAL_RSE_INFO */
81(p7) br.cond.sptk.few 1f
82 mov r8=0 /* status = 0 */
83 mov r9=96 /* num phys stacked */
84 mov r10=0 /* hints */
85 mov r11=0
86 br.cond.sptk.few rp
871: cmp.eq p6,p7=1,r28 /* PAL_CACHE_FLUSH */
88(p7) br.cond.sptk.few 1f
89 mov r9=ar.lc
90 movl r8=524288 /* flush 512k million cache lines (16MB) */
91 ;;
92 mov ar.lc=r8
93 movl r8=0xe000000000000000
94 ;;
95.loop: fc r8
96 add r8=32,r8
97 br.cloop.sptk.few .loop
98 sync.i
99 ;;
100 srlz.i
101 ;;
102 mov ar.lc=r9
103 mov r8=r0
104 ;;
1051: cmp.eq p6,p7=15,r28 /* PAL_PERF_MON_INFO */
106(p7) br.cond.sptk.few 1f
107 mov r8=0 /* status = 0 */
108 movl r9 =0x08122f04 /* generic=4 width=47 retired=8 cycles=18 */
109 mov r10=0 /* reserved */
110 mov r11=0 /* reserved */
111 mov r16=0xffff /* implemented PMC */
112 mov r17=0x3ffff /* implemented PMD */
113 add r18=8,r29 /* second index */
114 ;;
115 st8 [r29]=r16,16 /* store implemented PMC */
116 st8 [r18]=r0,16 /* clear remaining bits */
117 ;;
118 st8 [r29]=r0,16 /* clear remaining bits */
119 st8 [r18]=r0,16 /* clear remaining bits */
120 ;;
121 st8 [r29]=r17,16 /* store implemented PMD */
122 st8 [r18]=r0,16 /* clear remaining bits */
123 mov r16=0xf0 /* cycles count capable PMC */
124 ;;
125 st8 [r29]=r0,16 /* clear remaining bits */
126 st8 [r18]=r0,16 /* clear remaining bits */
127 mov r17=0xf0 /* retired bundles capable PMC */
128 ;;
129 st8 [r29]=r16,16 /* store cycles capable */
130 st8 [r18]=r0,16 /* clear remaining bits */
131 ;;
132 st8 [r29]=r0,16 /* clear remaining bits */
133 st8 [r18]=r0,16 /* clear remaining bits */
134 ;;
135 st8 [r29]=r17,16 /* store retired bundle capable */
136 st8 [r18]=r0,16 /* clear remaining bits */
137 ;;
138 st8 [r29]=r0,16 /* clear remaining bits */
139 st8 [r18]=r0,16 /* clear remaining bits */
140 ;;
1411: br.cond.sptk.few rp
142stacked:
143 br.ret.sptk.few rp
144END(pal_emulator_static)
diff --git a/arch/ia64/hp/sim/boot/bootloader.c b/arch/ia64/hp/sim/boot/bootloader.c
new file mode 100644
index 000000000000..51a7b7b4dd0e
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/bootloader.c
@@ -0,0 +1,176 @@
1/*
2 * arch/ia64/hp/sim/boot/bootloader.c
3 *
4 * Loads an ELF kernel.
5 *
6 * Copyright (C) 1998-2003 Hewlett-Packard Co
7 * David Mosberger-Tang <davidm@hpl.hp.com>
8 * Stephane Eranian <eranian@hpl.hp.com>
9 *
10 * 01/07/99 S.Eranian modified to pass command line arguments to kernel
11 */
12struct task_struct; /* forward declaration for elf.h */
13
14#include <linux/config.h>
15#include <linux/elf.h>
16#include <linux/init.h>
17#include <linux/kernel.h>
18
19#include <asm/elf.h>
20#include <asm/intrinsics.h>
21#include <asm/pal.h>
22#include <asm/pgtable.h>
23#include <asm/sal.h>
24#include <asm/system.h>
25
26#include "ssc.h"
27
28struct disk_req {
29 unsigned long addr;
30 unsigned len;
31};
32
33struct disk_stat {
34 int fd;
35 unsigned count;
36};
37
38extern void jmp_to_kernel (unsigned long bp, unsigned long e_entry);
39extern struct ia64_boot_param *sys_fw_init (const char *args, int arglen);
40extern void debug_break (void);
41
42static void
43cons_write (const char *buf)
44{
45 unsigned long ch;
46
47 while ((ch = *buf++) != '\0') {
48 ssc(ch, 0, 0, 0, SSC_PUTCHAR);
49 if (ch == '\n')
50 ssc('\r', 0, 0, 0, SSC_PUTCHAR);
51 }
52}
53
54#define MAX_ARGS 32
55
56void
57start_bootloader (void)
58{
59 static char mem[4096];
60 static char buffer[1024];
61 unsigned long off;
62 int fd, i;
63 struct disk_req req;
64 struct disk_stat stat;
65 struct elfhdr *elf;
66 struct elf_phdr *elf_phdr; /* program header */
67 unsigned long e_entry, e_phoff, e_phnum;
68 register struct ia64_boot_param *bp;
69 char *kpath, *args;
70 long arglen = 0;
71
72 ssc(0, 0, 0, 0, SSC_CONSOLE_INIT);
73
74 /*
75 * S.Eranian: extract the commandline argument from the simulator
76 *
77 * The expected format is as follows:
78 *
79 * kernelname args...
80 *
81 * Both are optional but you can't have the second one without the first.
82 */
83 arglen = ssc((long) buffer, 0, 0, 0, SSC_GET_ARGS);
84
85 kpath = "vmlinux";
86 args = buffer;
87 if (arglen > 0) {
88 kpath = buffer;
89 while (*args != ' ' && *args != '\0')
90 ++args, --arglen;
91 if (*args == ' ')
92 *args++ = '\0', --arglen;
93 }
94
95 if (arglen <= 0) {
96 args = "";
97 arglen = 1;
98 }
99
100 fd = ssc((long) kpath, 1, 0, 0, SSC_OPEN);
101
102 if (fd < 0) {
103 cons_write(kpath);
104 cons_write(": file not found, reboot now\n");
105 for(;;);
106 }
107 stat.fd = fd;
108 off = 0;
109
110 req.len = sizeof(mem);
111 req.addr = (long) mem;
112 ssc(fd, 1, (long) &req, off, SSC_READ);
113 ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
114
115 elf = (struct elfhdr *) mem;
116 if (elf->e_ident[0] == 0x7f && strncmp(elf->e_ident + 1, "ELF", 3) != 0) {
117 cons_write("not an ELF file\n");
118 return;
119 }
120 if (elf->e_type != ET_EXEC) {
121 cons_write("not an ELF executable\n");
122 return;
123 }
124 if (!elf_check_arch(elf)) {
125 cons_write("kernel not for this processor\n");
126 return;
127 }
128
129 e_entry = elf->e_entry;
130 e_phnum = elf->e_phnum;
131 e_phoff = elf->e_phoff;
132
133 cons_write("loading ");
134 cons_write(kpath);
135 cons_write("...\n");
136
137 for (i = 0; i < e_phnum; ++i) {
138 req.len = sizeof(*elf_phdr);
139 req.addr = (long) mem;
140 ssc(fd, 1, (long) &req, e_phoff, SSC_READ);
141 ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
142 if (stat.count != sizeof(*elf_phdr)) {
143 cons_write("failed to read phdr\n");
144 return;
145 }
146 e_phoff += sizeof(*elf_phdr);
147
148 elf_phdr = (struct elf_phdr *) mem;
149
150 if (elf_phdr->p_type != PT_LOAD)
151 continue;
152
153 req.len = elf_phdr->p_filesz;
154 req.addr = __pa(elf_phdr->p_paddr);
155 ssc(fd, 1, (long) &req, elf_phdr->p_offset, SSC_READ);
156 ssc((long) &stat, 0, 0, 0, SSC_WAIT_COMPLETION);
157 memset((char *)__pa(elf_phdr->p_paddr) + elf_phdr->p_filesz, 0,
158 elf_phdr->p_memsz - elf_phdr->p_filesz);
159 }
160 ssc(fd, 0, 0, 0, SSC_CLOSE);
161
162 cons_write("starting kernel...\n");
163
164 /* fake an I/O base address: */
165 ia64_setreg(_IA64_REG_AR_KR0, 0xffffc000000UL);
166
167 bp = sys_fw_init(args, arglen);
168
169 ssc(0, (long) kpath, 0, 0, SSC_LOAD_SYMBOLS);
170
171 debug_break();
172 jmp_to_kernel((unsigned long) bp, e_entry);
173
174 cons_write("kernel returned!\n");
175 ssc(-1, 0, 0, 0, SSC_EXIT);
176}
diff --git a/arch/ia64/hp/sim/boot/bootloader.lds b/arch/ia64/hp/sim/boot/bootloader.lds
new file mode 100644
index 000000000000..69ae58531033
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/bootloader.lds
@@ -0,0 +1,65 @@
1OUTPUT_FORMAT("elf64-ia64-little")
2OUTPUT_ARCH(ia64)
3ENTRY(_start)
4SECTIONS
5{
6 /* Read-only sections, merged into text segment: */
7 . = 0x100000;
8
9 _text = .;
10 .text : { *(__ivt_section) *(.text) }
11 _etext = .;
12
13 /* Global data */
14 _data = .;
15 .rodata : { *(.rodata) *(.rodata.*) }
16 .data : { *(.data) *(.gnu.linkonce.d*) CONSTRUCTORS }
17 __gp = ALIGN (8) + 0x200000;
18 .got : { *(.got.plt) *(.got) }
19 /* We want the small data sections together, so single-instruction offsets
20 can access them all, and initialized data all before uninitialized, so
21 we can shorten the on-disk segment size. */
22 .sdata : { *(.sdata) }
23 _edata = .;
24
25 _bss = .;
26 .sbss : { *(.sbss) *(.scommon) }
27 .bss : { *(.bss) *(COMMON) }
28 . = ALIGN(64 / 8);
29 _end = . ;
30
31 /* Stabs debugging sections. */
32 .stab 0 : { *(.stab) }
33 .stabstr 0 : { *(.stabstr) }
34 .stab.excl 0 : { *(.stab.excl) }
35 .stab.exclstr 0 : { *(.stab.exclstr) }
36 .stab.index 0 : { *(.stab.index) }
37 .stab.indexstr 0 : { *(.stab.indexstr) }
38 .comment 0 : { *(.comment) }
39 /* DWARF debug sections.
40 Symbols in the DWARF debugging sections are relative to the beginning
41 of the section so we begin them at 0. */
42 /* DWARF 1 */
43 .debug 0 : { *(.debug) }
44 .line 0 : { *(.line) }
45 /* GNU DWARF 1 extensions */
46 .debug_srcinfo 0 : { *(.debug_srcinfo) }
47 .debug_sfnames 0 : { *(.debug_sfnames) }
48 /* DWARF 1.1 and DWARF 2 */
49 .debug_aranges 0 : { *(.debug_aranges) }
50 .debug_pubnames 0 : { *(.debug_pubnames) }
51 /* DWARF 2 */
52 .debug_info 0 : { *(.debug_info) }
53 .debug_abbrev 0 : { *(.debug_abbrev) }
54 .debug_line 0 : { *(.debug_line) }
55 .debug_frame 0 : { *(.debug_frame) }
56 .debug_str 0 : { *(.debug_str) }
57 .debug_loc 0 : { *(.debug_loc) }
58 .debug_macinfo 0 : { *(.debug_macinfo) }
59 /* SGI/MIPS DWARF 2 extensions */
60 .debug_weaknames 0 : { *(.debug_weaknames) }
61 .debug_funcnames 0 : { *(.debug_funcnames) }
62 .debug_typenames 0 : { *(.debug_typenames) }
63 .debug_varnames 0 : { *(.debug_varnames) }
64 /* These must appear regardless of . */
65}
diff --git a/arch/ia64/hp/sim/boot/fw-emu.c b/arch/ia64/hp/sim/boot/fw-emu.c
new file mode 100644
index 000000000000..5c46928e3dc6
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/fw-emu.c
@@ -0,0 +1,398 @@
1/*
2 * PAL & SAL emulation.
3 *
4 * Copyright (C) 1998-2001 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7#include <linux/config.h>
8
9#ifdef CONFIG_PCI
10# include <linux/pci.h>
11#endif
12
13#include <linux/efi.h>
14#include <asm/io.h>
15#include <asm/pal.h>
16#include <asm/sal.h>
17
18#include "ssc.h"
19
20#define MB (1024*1024UL)
21
22#define SIMPLE_MEMMAP 1
23
24#if SIMPLE_MEMMAP
25# define NUM_MEM_DESCS 4
26#else
27# define NUM_MEM_DESCS 16
28#endif
29
30static char fw_mem[( sizeof(struct ia64_boot_param)
31 + sizeof(efi_system_table_t)
32 + sizeof(efi_runtime_services_t)
33 + 1*sizeof(efi_config_table_t)
34 + sizeof(struct ia64_sal_systab)
35 + sizeof(struct ia64_sal_desc_entry_point)
36 + NUM_MEM_DESCS*(sizeof(efi_memory_desc_t))
37 + 1024)] __attribute__ ((aligned (8)));
38
39#define SECS_PER_HOUR (60 * 60)
40#define SECS_PER_DAY (SECS_PER_HOUR * 24)
41
42/* Compute the `struct tm' representation of *T,
43 offset OFFSET seconds east of UTC,
44 and store year, yday, mon, mday, wday, hour, min, sec into *TP.
45 Return nonzero if successful. */
46int
47offtime (unsigned long t, efi_time_t *tp)
48{
49 const unsigned short int __mon_yday[2][13] =
50 {
51 /* Normal years. */
52 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
53 /* Leap years. */
54 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
55 };
56 long int days, rem, y;
57 const unsigned short int *ip;
58
59 days = t / SECS_PER_DAY;
60 rem = t % SECS_PER_DAY;
61 while (rem < 0) {
62 rem += SECS_PER_DAY;
63 --days;
64 }
65 while (rem >= SECS_PER_DAY) {
66 rem -= SECS_PER_DAY;
67 ++days;
68 }
69 tp->hour = rem / SECS_PER_HOUR;
70 rem %= SECS_PER_HOUR;
71 tp->minute = rem / 60;
72 tp->second = rem % 60;
73 /* January 1, 1970 was a Thursday. */
74 y = 1970;
75
76# define DIV(a, b) ((a) / (b) - ((a) % (b) < 0))
77# define LEAPS_THRU_END_OF(y) (DIV (y, 4) - DIV (y, 100) + DIV (y, 400))
78# define __isleap(year) \
79 ((year) % 4 == 0 && ((year) % 100 != 0 || (year) % 400 == 0))
80
81 while (days < 0 || days >= (__isleap (y) ? 366 : 365)) {
82 /* Guess a corrected year, assuming 365 days per year. */
83 long int yg = y + days / 365 - (days % 365 < 0);
84
85 /* Adjust DAYS and Y to match the guessed year. */
86 days -= ((yg - y) * 365 + LEAPS_THRU_END_OF (yg - 1)
87 - LEAPS_THRU_END_OF (y - 1));
88 y = yg;
89 }
90 tp->year = y;
91 ip = __mon_yday[__isleap(y)];
92 for (y = 11; days < (long int) ip[y]; --y)
93 continue;
94 days -= ip[y];
95 tp->month = y + 1;
96 tp->day = days + 1;
97 return 1;
98}
99
100extern void pal_emulator_static (void);
101
102/* Macro to emulate SAL call using legacy IN and OUT calls to CF8, CFC etc.. */
103
104#define BUILD_CMD(addr) ((0x80000000 | (addr)) & ~3)
105
106#define REG_OFFSET(addr) (0x00000000000000FF & (addr))
107#define DEVICE_FUNCTION(addr) (0x000000000000FF00 & (addr))
108#define BUS_NUMBER(addr) (0x0000000000FF0000 & (addr))
109
110static efi_status_t
111fw_efi_get_time (efi_time_t *tm, efi_time_cap_t *tc)
112{
113#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
114 struct {
115 int tv_sec; /* must be 32bits to work */
116 int tv_usec;
117 } tv32bits;
118
119 ssc((unsigned long) &tv32bits, 0, 0, 0, SSC_GET_TOD);
120
121 memset(tm, 0, sizeof(*tm));
122 offtime(tv32bits.tv_sec, tm);
123
124 if (tc)
125 memset(tc, 0, sizeof(*tc));
126#else
127# error Not implemented yet...
128#endif
129 return EFI_SUCCESS;
130}
131
132static void
133efi_reset_system (int reset_type, efi_status_t status, unsigned long data_size, efi_char16_t *data)
134{
135#if defined(CONFIG_IA64_HP_SIM) || defined(CONFIG_IA64_GENERIC)
136 ssc(status, 0, 0, 0, SSC_EXIT);
137#else
138# error Not implemented yet...
139#endif
140}
141
142static efi_status_t
143efi_unimplemented (void)
144{
145 return EFI_UNSUPPORTED;
146}
147
148static struct sal_ret_values
149sal_emulator (long index, unsigned long in1, unsigned long in2,
150 unsigned long in3, unsigned long in4, unsigned long in5,
151 unsigned long in6, unsigned long in7)
152{
153 long r9 = 0;
154 long r10 = 0;
155 long r11 = 0;
156 long status;
157
158 /*
159 * Don't do a "switch" here since that gives us code that
160 * isn't self-relocatable.
161 */
162 status = 0;
163 if (index == SAL_FREQ_BASE) {
164 switch (in1) {
165 case SAL_FREQ_BASE_PLATFORM:
166 r9 = 200000000;
167 break;
168
169 case SAL_FREQ_BASE_INTERVAL_TIMER:
170 /*
171 * Is this supposed to be the cr.itc frequency
172 * or something platform specific? The SAL
173 * doc ain't exactly clear on this...
174 */
175 r9 = 700000000;
176 break;
177
178 case SAL_FREQ_BASE_REALTIME_CLOCK:
179 r9 = 1;
180 break;
181
182 default:
183 status = -1;
184 break;
185 }
186 } else if (index == SAL_SET_VECTORS) {
187 ;
188 } else if (index == SAL_GET_STATE_INFO) {
189 ;
190 } else if (index == SAL_GET_STATE_INFO_SIZE) {
191 ;
192 } else if (index == SAL_CLEAR_STATE_INFO) {
193 ;
194 } else if (index == SAL_MC_RENDEZ) {
195 ;
196 } else if (index == SAL_MC_SET_PARAMS) {
197 ;
198 } else if (index == SAL_CACHE_FLUSH) {
199 ;
200 } else if (index == SAL_CACHE_INIT) {
201 ;
202#ifdef CONFIG_PCI
203 } else if (index == SAL_PCI_CONFIG_READ) {
204 /*
205 * in1 contains the PCI configuration address and in2
206 * the size of the read. The value that is read is
207 * returned via the general register r9.
208 */
209 outl(BUILD_CMD(in1), 0xCF8);
210 if (in2 == 1) /* Reading byte */
211 r9 = inb(0xCFC + ((REG_OFFSET(in1) & 3)));
212 else if (in2 == 2) /* Reading word */
213 r9 = inw(0xCFC + ((REG_OFFSET(in1) & 2)));
214 else /* Reading dword */
215 r9 = inl(0xCFC);
216 status = PCIBIOS_SUCCESSFUL;
217 } else if (index == SAL_PCI_CONFIG_WRITE) {
218 /*
219 * in1 contains the PCI configuration address, in2 the
220 * size of the write, and in3 the actual value to be
221 * written out.
222 */
223 outl(BUILD_CMD(in1), 0xCF8);
224 if (in2 == 1) /* Writing byte */
225 outb(in3, 0xCFC + ((REG_OFFSET(in1) & 3)));
226 else if (in2 == 2) /* Writing word */
227 outw(in3, 0xCFC + ((REG_OFFSET(in1) & 2)));
228 else /* Writing dword */
229 outl(in3, 0xCFC);
230 status = PCIBIOS_SUCCESSFUL;
231#endif /* CONFIG_PCI */
232 } else if (index == SAL_UPDATE_PAL) {
233 ;
234 } else {
235 status = -1;
236 }
237 return ((struct sal_ret_values) {status, r9, r10, r11});
238}
239
240
241/*
242 * This is here to work around a bug in egcs-1.1.1b that causes the
243 * compiler to crash (seems like a bug in the new alias analysis code.
244 */
245void *
246id (long addr)
247{
248 return (void *) addr;
249}
250
251struct ia64_boot_param *
252sys_fw_init (const char *args, int arglen)
253{
254 efi_system_table_t *efi_systab;
255 efi_runtime_services_t *efi_runtime;
256 efi_config_table_t *efi_tables;
257 struct ia64_sal_systab *sal_systab;
258 efi_memory_desc_t *efi_memmap, *md;
259 unsigned long *pal_desc, *sal_desc;
260 struct ia64_sal_desc_entry_point *sal_ed;
261 struct ia64_boot_param *bp;
262 unsigned char checksum = 0;
263 char *cp, *cmd_line;
264 int i = 0;
265# define MAKE_MD(typ, attr, start, end) \
266 do { \
267 md = efi_memmap + i++; \
268 md->type = typ; \
269 md->pad = 0; \
270 md->phys_addr = start; \
271 md->virt_addr = 0; \
272 md->num_pages = (end - start) >> 12; \
273 md->attribute = attr; \
274 } while (0)
275
276 memset(fw_mem, 0, sizeof(fw_mem));
277
278 pal_desc = (unsigned long *) &pal_emulator_static;
279 sal_desc = (unsigned long *) &sal_emulator;
280
281 cp = fw_mem;
282 efi_systab = (void *) cp; cp += sizeof(*efi_systab);
283 efi_runtime = (void *) cp; cp += sizeof(*efi_runtime);
284 efi_tables = (void *) cp; cp += sizeof(*efi_tables);
285 sal_systab = (void *) cp; cp += sizeof(*sal_systab);
286 sal_ed = (void *) cp; cp += sizeof(*sal_ed);
287 efi_memmap = (void *) cp; cp += NUM_MEM_DESCS*sizeof(*efi_memmap);
288 bp = (void *) cp; cp += sizeof(*bp);
289 cmd_line = (void *) cp;
290
291 if (args) {
292 if (arglen >= 1024)
293 arglen = 1023;
294 memcpy(cmd_line, args, arglen);
295 } else {
296 arglen = 0;
297 }
298 cmd_line[arglen] = '\0';
299
300 memset(efi_systab, 0, sizeof(efi_systab));
301 efi_systab->hdr.signature = EFI_SYSTEM_TABLE_SIGNATURE;
302 efi_systab->hdr.revision = EFI_SYSTEM_TABLE_REVISION;
303 efi_systab->hdr.headersize = sizeof(efi_systab->hdr);
304 efi_systab->fw_vendor = __pa("H\0e\0w\0l\0e\0t\0t\0-\0P\0a\0c\0k\0a\0r\0d\0\0");
305 efi_systab->fw_revision = 1;
306 efi_systab->runtime = (void *) __pa(efi_runtime);
307 efi_systab->nr_tables = 1;
308 efi_systab->tables = __pa(efi_tables);
309
310 efi_runtime->hdr.signature = EFI_RUNTIME_SERVICES_SIGNATURE;
311 efi_runtime->hdr.revision = EFI_RUNTIME_SERVICES_REVISION;
312 efi_runtime->hdr.headersize = sizeof(efi_runtime->hdr);
313 efi_runtime->get_time = __pa(&fw_efi_get_time);
314 efi_runtime->set_time = __pa(&efi_unimplemented);
315 efi_runtime->get_wakeup_time = __pa(&efi_unimplemented);
316 efi_runtime->set_wakeup_time = __pa(&efi_unimplemented);
317 efi_runtime->set_virtual_address_map = __pa(&efi_unimplemented);
318 efi_runtime->get_variable = __pa(&efi_unimplemented);
319 efi_runtime->get_next_variable = __pa(&efi_unimplemented);
320 efi_runtime->set_variable = __pa(&efi_unimplemented);
321 efi_runtime->get_next_high_mono_count = __pa(&efi_unimplemented);
322 efi_runtime->reset_system = __pa(&efi_reset_system);
323
324 efi_tables->guid = SAL_SYSTEM_TABLE_GUID;
325 efi_tables->table = __pa(sal_systab);
326
327 /* fill in the SAL system table: */
328 memcpy(sal_systab->signature, "SST_", 4);
329 sal_systab->size = sizeof(*sal_systab);
330 sal_systab->sal_rev_minor = 1;
331 sal_systab->sal_rev_major = 0;
332 sal_systab->entry_count = 1;
333
334#ifdef CONFIG_IA64_GENERIC
335 strcpy(sal_systab->oem_id, "Generic");
336 strcpy(sal_systab->product_id, "IA-64 system");
337#endif
338
339#ifdef CONFIG_IA64_HP_SIM
340 strcpy(sal_systab->oem_id, "Hewlett-Packard");
341 strcpy(sal_systab->product_id, "HP-simulator");
342#endif
343
344#ifdef CONFIG_IA64_SDV
345 strcpy(sal_systab->oem_id, "Intel");
346 strcpy(sal_systab->product_id, "SDV");
347#endif
348
349 /* fill in an entry point: */
350 sal_ed->type = SAL_DESC_ENTRY_POINT;
351 sal_ed->pal_proc = __pa(pal_desc[0]);
352 sal_ed->sal_proc = __pa(sal_desc[0]);
353 sal_ed->gp = __pa(sal_desc[1]);
354
355 for (cp = (char *) sal_systab; cp < (char *) efi_memmap; ++cp)
356 checksum += *cp;
357
358 sal_systab->checksum = -checksum;
359
360#if SIMPLE_MEMMAP
361 /* simulate free memory at physical address zero */
362 MAKE_MD(EFI_BOOT_SERVICES_DATA, EFI_MEMORY_WB, 0*MB, 1*MB);
363 MAKE_MD(EFI_PAL_CODE, EFI_MEMORY_WB, 1*MB, 2*MB);
364 MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, 2*MB, 130*MB);
365 MAKE_MD(EFI_CONVENTIONAL_MEMORY, EFI_MEMORY_WB, 4096*MB, 4128*MB);
366#else
367 MAKE_MD( 4, 0x9, 0x0000000000000000, 0x0000000000001000);
368 MAKE_MD( 7, 0x9, 0x0000000000001000, 0x000000000008a000);
369 MAKE_MD( 4, 0x9, 0x000000000008a000, 0x00000000000a0000);
370 MAKE_MD( 5, 0x8000000000000009, 0x00000000000c0000, 0x0000000000100000);
371 MAKE_MD( 7, 0x9, 0x0000000000100000, 0x0000000004400000);
372 MAKE_MD( 2, 0x9, 0x0000000004400000, 0x0000000004be5000);
373 MAKE_MD( 7, 0x9, 0x0000000004be5000, 0x000000007f77e000);
374 MAKE_MD( 6, 0x8000000000000009, 0x000000007f77e000, 0x000000007fb94000);
375 MAKE_MD( 6, 0x8000000000000009, 0x000000007fb94000, 0x000000007fb95000);
376 MAKE_MD( 6, 0x8000000000000009, 0x000000007fb95000, 0x000000007fc00000);
377 MAKE_MD(13, 0x8000000000000009, 0x000000007fc00000, 0x000000007fc3a000);
378 MAKE_MD( 7, 0x9, 0x000000007fc3a000, 0x000000007fea0000);
379 MAKE_MD( 5, 0x8000000000000009, 0x000000007fea0000, 0x000000007fea8000);
380 MAKE_MD( 7, 0x9, 0x000000007fea8000, 0x000000007feab000);
381 MAKE_MD( 5, 0x8000000000000009, 0x000000007feab000, 0x000000007ffff000);
382 MAKE_MD( 7, 0x9, 0x00000000ff400000, 0x0000000104000000);
383#endif
384
385 bp->efi_systab = __pa(&fw_mem);
386 bp->efi_memmap = __pa(efi_memmap);
387 bp->efi_memmap_size = NUM_MEM_DESCS*sizeof(efi_memory_desc_t);
388 bp->efi_memdesc_size = sizeof(efi_memory_desc_t);
389 bp->efi_memdesc_version = 1;
390 bp->command_line = __pa(cmd_line);
391 bp->console_info.num_cols = 80;
392 bp->console_info.num_rows = 25;
393 bp->console_info.orig_x = 0;
394 bp->console_info.orig_y = 24;
395 bp->fpswa = 0;
396
397 return bp;
398}
diff --git a/arch/ia64/hp/sim/boot/ssc.h b/arch/ia64/hp/sim/boot/ssc.h
new file mode 100644
index 000000000000..3b94c03e43a9
--- /dev/null
+++ b/arch/ia64/hp/sim/boot/ssc.h
@@ -0,0 +1,35 @@
1/*
2 * Copyright (C) 1998-2003 Hewlett-Packard Co
3 * David Mosberger-Tang <davidm@hpl.hp.com>
4 * Stephane Eranian <eranian@hpl.hp.com>
5 */
6#ifndef ssc_h
7#define ssc_h
8
9/* Simulator system calls: */
10
11#define SSC_CONSOLE_INIT 20
12#define SSC_GETCHAR 21
13#define SSC_PUTCHAR 31
14#define SSC_OPEN 50
15#define SSC_CLOSE 51
16#define SSC_READ 52
17#define SSC_WRITE 53
18#define SSC_GET_COMPLETION 54
19#define SSC_WAIT_COMPLETION 55
20#define SSC_CONNECT_INTERRUPT 58
21#define SSC_GENERATE_INTERRUPT 59
22#define SSC_SET_PERIODIC_INTERRUPT 60
23#define SSC_GET_RTC 65
24#define SSC_EXIT 66
25#define SSC_LOAD_SYMBOLS 69
26#define SSC_GET_TOD 74
27
28#define SSC_GET_ARGS 75
29
30/*
31 * Simulator system call.
32 */
33extern long ssc (long arg0, long arg1, long arg2, long arg3, int nr);
34
35#endif /* ssc_h */
diff --git a/arch/ia64/hp/sim/hpsim.S b/arch/ia64/hp/sim/hpsim.S
new file mode 100644
index 000000000000..ff16e8a857d1
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim.S
@@ -0,0 +1,10 @@
1#include <asm/asmmacro.h>
2
3/*
4 * Simulator system call.
5 */
6GLOBAL_ENTRY(ia64_ssc)
7 mov r15=r36
8 break 0x80001
9 br.ret.sptk.many rp
10END(ia64_ssc)
diff --git a/arch/ia64/hp/sim/hpsim_console.c b/arch/ia64/hp/sim/hpsim_console.c
new file mode 100644
index 000000000000..5deff21e5877
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_console.c
@@ -0,0 +1,65 @@
1/*
2 * Platform dependent support for HP simulator.
3 *
4 * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
7 */
8#include <linux/config.h>
9
10#include <linux/init.h>
11#include <linux/kernel.h>
12#include <linux/param.h>
13#include <linux/string.h>
14#include <linux/types.h>
15#include <linux/tty.h>
16#include <linux/kdev_t.h>
17#include <linux/console.h>
18
19#include <asm/delay.h>
20#include <asm/irq.h>
21#include <asm/pal.h>
22#include <asm/machvec.h>
23#include <asm/pgtable.h>
24#include <asm/sal.h>
25
26#include "hpsim_ssc.h"
27
28static int simcons_init (struct console *, char *);
29static void simcons_write (struct console *, const char *, unsigned);
30static struct tty_driver *simcons_console_device (struct console *, int *);
31
32struct console hpsim_cons = {
33 .name = "simcons",
34 .write = simcons_write,
35 .device = simcons_console_device,
36 .setup = simcons_init,
37 .flags = CON_PRINTBUFFER,
38 .index = -1,
39};
40
41static int
42simcons_init (struct console *cons, char *options)
43{
44 return 0;
45}
46
47static void
48simcons_write (struct console *cons, const char *buf, unsigned count)
49{
50 unsigned long ch;
51
52 while (count-- > 0) {
53 ch = *buf++;
54 ia64_ssc(ch, 0, 0, 0, SSC_PUTCHAR);
55 if (ch == '\n')
56 ia64_ssc('\r', 0, 0, 0, SSC_PUTCHAR);
57 }
58}
59
60static struct tty_driver *simcons_console_device (struct console *c, int *index)
61{
62 extern struct tty_driver *hp_simserial_driver;
63 *index = c->index;
64 return hp_simserial_driver;
65}
diff --git a/arch/ia64/hp/sim/hpsim_irq.c b/arch/ia64/hp/sim/hpsim_irq.c
new file mode 100644
index 000000000000..c0d25a2a3e9c
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_irq.c
@@ -0,0 +1,51 @@
1/*
2 * Platform dependent support for HP simulator.
3 *
4 * Copyright (C) 1998-2001 Hewlett-Packard Co
5 * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7
8#include <linux/init.h>
9#include <linux/kernel.h>
10#include <linux/sched.h>
11#include <linux/irq.h>
12
13static unsigned int
14hpsim_irq_startup (unsigned int irq)
15{
16 return 0;
17}
18
19static void
20hpsim_irq_noop (unsigned int irq)
21{
22}
23
24static void
25hpsim_set_affinity_noop (unsigned int a, cpumask_t b)
26{
27}
28
29static struct hw_interrupt_type irq_type_hp_sim = {
30 .typename = "hpsim",
31 .startup = hpsim_irq_startup,
32 .shutdown = hpsim_irq_noop,
33 .enable = hpsim_irq_noop,
34 .disable = hpsim_irq_noop,
35 .ack = hpsim_irq_noop,
36 .end = hpsim_irq_noop,
37 .set_affinity = hpsim_set_affinity_noop,
38};
39
40void __init
41hpsim_irq_init (void)
42{
43 irq_desc_t *idesc;
44 int i;
45
46 for (i = 0; i < NR_IRQS; ++i) {
47 idesc = irq_descp(i);
48 if (idesc->handler == &no_irq_type)
49 idesc->handler = &irq_type_hp_sim;
50 }
51}
diff --git a/arch/ia64/hp/sim/hpsim_machvec.c b/arch/ia64/hp/sim/hpsim_machvec.c
new file mode 100644
index 000000000000..c21419359185
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_machvec.c
@@ -0,0 +1,3 @@
1#define MACHVEC_PLATFORM_NAME hpsim
2#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpsim.h>
3#include <asm/machvec_init.h>
diff --git a/arch/ia64/hp/sim/hpsim_setup.c b/arch/ia64/hp/sim/hpsim_setup.c
new file mode 100644
index 000000000000..694fc86bfbd5
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_setup.c
@@ -0,0 +1,52 @@
1/*
2 * Platform dependent support for HP simulator.
3 *
4 * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
7 */
8#include <linux/config.h>
9#include <linux/console.h>
10#include <linux/init.h>
11#include <linux/kdev_t.h>
12#include <linux/kernel.h>
13#include <linux/major.h>
14#include <linux/param.h>
15#include <linux/root_dev.h>
16#include <linux/string.h>
17#include <linux/types.h>
18
19#include <asm/delay.h>
20#include <asm/irq.h>
21#include <asm/pal.h>
22#include <asm/machvec.h>
23#include <asm/pgtable.h>
24#include <asm/sal.h>
25
26#include "hpsim_ssc.h"
27
28void
29ia64_ssc_connect_irq (long intr, long irq)
30{
31 ia64_ssc(intr, irq, 0, 0, SSC_CONNECT_INTERRUPT);
32}
33
34void
35ia64_ctl_trace (long on)
36{
37 ia64_ssc(on, 0, 0, 0, SSC_CTL_TRACE);
38}
39
40void __init
41hpsim_setup (char **cmdline_p)
42{
43 ROOT_DEV = Root_SDA1; /* default to first SCSI drive */
44
45#ifdef CONFIG_HP_SIMSERIAL_CONSOLE
46 {
47 extern struct console hpsim_cons;
48 if (ia64_platform_is("hpsim"))
49 register_console(&hpsim_cons);
50 }
51#endif
52}
diff --git a/arch/ia64/hp/sim/hpsim_ssc.h b/arch/ia64/hp/sim/hpsim_ssc.h
new file mode 100644
index 000000000000..bfa3906274b3
--- /dev/null
+++ b/arch/ia64/hp/sim/hpsim_ssc.h
@@ -0,0 +1,36 @@
1/*
2 * Platform dependent support for HP simulator.
3 *
4 * Copyright (C) 1998, 1999 Hewlett-Packard Co
5 * Copyright (C) 1998, 1999 David Mosberger-Tang <davidm@hpl.hp.com>
6 * Copyright (C) 1999 Vijay Chander <vijay@engr.sgi.com>
7 */
8#ifndef _IA64_PLATFORM_HPSIM_SSC_H
9#define _IA64_PLATFORM_HPSIM_SSC_H
10
11/* Simulator system calls: */
12
13#define SSC_CONSOLE_INIT 20
14#define SSC_GETCHAR 21
15#define SSC_PUTCHAR 31
16#define SSC_CONNECT_INTERRUPT 58
17#define SSC_GENERATE_INTERRUPT 59
18#define SSC_SET_PERIODIC_INTERRUPT 60
19#define SSC_GET_RTC 65
20#define SSC_EXIT 66
21#define SSC_LOAD_SYMBOLS 69
22#define SSC_GET_TOD 74
23#define SSC_CTL_TRACE 76
24
25#define SSC_NETDEV_PROBE 100
26#define SSC_NETDEV_SEND 101
27#define SSC_NETDEV_RECV 102
28#define SSC_NETDEV_ATTACH 103
29#define SSC_NETDEV_DETACH 104
30
31/*
32 * Simulator system call.
33 */
34extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
35
36#endif /* _IA64_PLATFORM_HPSIM_SSC_H */
diff --git a/arch/ia64/hp/sim/simeth.c b/arch/ia64/hp/sim/simeth.c
new file mode 100644
index 000000000000..ae84a1018a89
--- /dev/null
+++ b/arch/ia64/hp/sim/simeth.c
@@ -0,0 +1,530 @@
1/*
2 * Simulated Ethernet Driver
3 *
4 * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 */
7#include <linux/config.h>
8#include <linux/kernel.h>
9#include <linux/sched.h>
10#include <linux/types.h>
11#include <linux/in.h>
12#include <linux/string.h>
13#include <linux/init.h>
14#include <linux/errno.h>
15#include <linux/interrupt.h>
16#include <linux/netdevice.h>
17#include <linux/etherdevice.h>
18#include <linux/inetdevice.h>
19#include <linux/if_ether.h>
20#include <linux/if_arp.h>
21#include <linux/skbuff.h>
22#include <linux/notifier.h>
23#include <linux/bitops.h>
24#include <asm/system.h>
25#include <asm/irq.h>
26
27#define SIMETH_RECV_MAX 10
28
29/*
30 * Maximum possible received frame for Ethernet.
31 * We preallocate an sk_buff of that size to avoid costly
32 * memcpy for temporary buffer into sk_buff. We do basically
33 * what's done in other drivers, like eepro with a ring.
34 * The difference is, of course, that we don't have real DMA !!!
35 */
36#define SIMETH_FRAME_SIZE ETH_FRAME_LEN
37
38
39#define SSC_NETDEV_PROBE 100
40#define SSC_NETDEV_SEND 101
41#define SSC_NETDEV_RECV 102
42#define SSC_NETDEV_ATTACH 103
43#define SSC_NETDEV_DETACH 104
44
45#define NETWORK_INTR 8
46
47struct simeth_local {
48 struct net_device_stats stats;
49 int simfd; /* descriptor in the simulator */
50};
51
52static int simeth_probe1(void);
53static int simeth_open(struct net_device *dev);
54static int simeth_close(struct net_device *dev);
55static int simeth_tx(struct sk_buff *skb, struct net_device *dev);
56static int simeth_rx(struct net_device *dev);
57static struct net_device_stats *simeth_get_stats(struct net_device *dev);
58static irqreturn_t simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs);
59static void set_multicast_list(struct net_device *dev);
60static int simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr);
61
62static char *simeth_version="0.3";
63
64/*
65 * This variable is used to establish a mapping between the Linux/ia64 kernel
66 * and the host linux kernel.
67 *
68 * As of today, we support only one card, even though most of the code
69 * is ready for many more. The mapping is then:
70 * linux/ia64 -> linux/x86
71 * eth0 -> eth1
72 *
73 * In the future, we some string operations, we could easily support up
74 * to 10 cards (0-9).
75 *
76 * The default mapping can be changed on the kernel command line by
77 * specifying simeth=ethX (or whatever string you want).
78 */
79static char *simeth_device="eth0"; /* default host interface to use */
80
81
82
83static volatile unsigned int card_count; /* how many cards "found" so far */
84static int simeth_debug; /* set to 1 to get debug information */
85
86/*
87 * Used to catch IFF_UP & IFF_DOWN events
88 */
89static struct notifier_block simeth_dev_notifier = {
90 simeth_device_event,
91 0
92};
93
94
95/*
96 * Function used when using a kernel command line option.
97 *
98 * Format: simeth=interface_name (like eth0)
99 */
100static int __init
101simeth_setup(char *str)
102{
103 simeth_device = str;
104 return 1;
105}
106
107__setup("simeth=", simeth_setup);
108
109/*
110 * Function used to probe for simeth devices when not installed
111 * as a loadable module
112 */
113
114int __init
115simeth_probe (void)
116{
117 int r;
118
119 printk(KERN_INFO "simeth: v%s\n", simeth_version);
120
121 r = simeth_probe1();
122
123 if (r == 0) register_netdevice_notifier(&simeth_dev_notifier);
124
125 return r;
126}
127
128extern long ia64_ssc (long, long, long, long, int);
129extern void ia64_ssc_connect_irq (long intr, long irq);
130
131static inline int
132netdev_probe(char *name, unsigned char *ether)
133{
134 return ia64_ssc(__pa(name), __pa(ether), 0,0, SSC_NETDEV_PROBE);
135}
136
137
138static inline int
139netdev_connect(int irq)
140{
141 /* XXX Fix me
142 * this does not support multiple cards
143 * also no return value
144 */
145 ia64_ssc_connect_irq(NETWORK_INTR, irq);
146 return 0;
147}
148
149static inline int
150netdev_attach(int fd, int irq, unsigned int ipaddr)
151{
152 /* this puts the host interface in the right mode (start interrupting) */
153 return ia64_ssc(fd, ipaddr, 0,0, SSC_NETDEV_ATTACH);
154}
155
156
157static inline int
158netdev_detach(int fd)
159{
160 /*
161 * inactivate the host interface (don't interrupt anymore) */
162 return ia64_ssc(fd, 0,0,0, SSC_NETDEV_DETACH);
163}
164
165static inline int
166netdev_send(int fd, unsigned char *buf, unsigned int len)
167{
168 return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_SEND);
169}
170
171static inline int
172netdev_read(int fd, unsigned char *buf, unsigned int len)
173{
174 return ia64_ssc(fd, __pa(buf), len, 0, SSC_NETDEV_RECV);
175}
176
177/*
178 * Function shared with module code, so cannot be in init section
179 *
180 * So far this function "detects" only one card (test_&_set) but could
181 * be extended easily.
182 *
183 * Return:
184 * - -ENODEV is no device found
185 * - -ENOMEM is no more memory
186 * - 0 otherwise
187 */
188static int
189simeth_probe1(void)
190{
191 unsigned char mac_addr[ETH_ALEN];
192 struct simeth_local *local;
193 struct net_device *dev;
194 int fd, i, err;
195
196 /*
197 * XXX Fix me
198 * let's support just one card for now
199 */
200 if (test_and_set_bit(0, &card_count))
201 return -ENODEV;
202
203 /*
204 * check with the simulator for the device
205 */
206 fd = netdev_probe(simeth_device, mac_addr);
207 if (fd == -1)
208 return -ENODEV;
209
210 dev = alloc_etherdev(sizeof(struct simeth_local));
211 if (!dev)
212 return -ENOMEM;
213
214 memcpy(dev->dev_addr, mac_addr, sizeof(mac_addr));
215
216 local = dev->priv;
217 local->simfd = fd; /* keep track of underlying file descriptor */
218
219 dev->open = simeth_open;
220 dev->stop = simeth_close;
221 dev->hard_start_xmit = simeth_tx;
222 dev->get_stats = simeth_get_stats;
223 dev->set_multicast_list = set_multicast_list; /* no yet used */
224
225 err = register_netdev(dev);
226 if (err) {
227 free_netdev(dev);
228 return err;
229 }
230
231 dev->irq = assign_irq_vector(AUTO_ASSIGN);
232
233 /*
234 * attach the interrupt in the simulator, this does enable interrupts
235 * until a netdev_attach() is called
236 */
237 netdev_connect(dev->irq);
238
239 printk(KERN_INFO "%s: hosteth=%s simfd=%d, HwAddr",
240 dev->name, simeth_device, local->simfd);
241 for(i = 0; i < ETH_ALEN; i++) {
242 printk(" %2.2x", dev->dev_addr[i]);
243 }
244 printk(", IRQ %d\n", dev->irq);
245
246 return 0;
247}
248
249/*
250 * actually binds the device to an interrupt vector
251 */
252static int
253simeth_open(struct net_device *dev)
254{
255 if (request_irq(dev->irq, simeth_interrupt, 0, "simeth", dev)) {
256 printk(KERN_WARNING "simeth: unable to get IRQ %d.\n", dev->irq);
257 return -EAGAIN;
258 }
259
260 netif_start_queue(dev);
261
262 return 0;
263}
264
265/* copied from lapbether.c */
266static __inline__ int dev_is_ethdev(struct net_device *dev)
267{
268 return ( dev->type == ARPHRD_ETHER && strncmp(dev->name, "dummy", 5));
269}
270
271
272/*
273 * Handler for IFF_UP or IFF_DOWN
274 *
275 * The reason for that is that we don't want to be interrupted when the
276 * interface is down. There is no way to unconnect in the simualtor. Instead
277 * we use this function to shutdown packet processing in the frame filter
278 * in the simulator. Thus no interrupts are generated
279 *
280 *
281 * That's also the place where we pass the IP address of this device to the
282 * simulator so that that we can start filtering packets for it
283 *
284 * There may be a better way of doing this, but I don't know which yet.
285 */
286static int
287simeth_device_event(struct notifier_block *this,unsigned long event, void *ptr)
288{
289 struct net_device *dev = ptr;
290 struct simeth_local *local;
291 struct in_device *in_dev;
292 struct in_ifaddr **ifap = NULL;
293 struct in_ifaddr *ifa = NULL;
294 int r;
295
296
297 if ( ! dev ) {
298 printk(KERN_WARNING "simeth_device_event dev=0\n");
299 return NOTIFY_DONE;
300 }
301
302 if ( event != NETDEV_UP && event != NETDEV_DOWN ) return NOTIFY_DONE;
303
304 /*
305 * Check whether or not it's for an ethernet device
306 *
307 * XXX Fixme: This works only as long as we support one
308 * type of ethernet device.
309 */
310 if ( !dev_is_ethdev(dev) ) return NOTIFY_DONE;
311
312 if ((in_dev=dev->ip_ptr) != NULL) {
313 for (ifap=&in_dev->ifa_list; (ifa=*ifap) != NULL; ifap=&ifa->ifa_next)
314 if (strcmp(dev->name, ifa->ifa_label) == 0) break;
315 }
316 if ( ifa == NULL ) {
317 printk(KERN_ERR "simeth_open: can't find device %s's ifa\n", dev->name);
318 return NOTIFY_DONE;
319 }
320
321 printk(KERN_INFO "simeth_device_event: %s ipaddr=0x%x\n",
322 dev->name, htonl(ifa->ifa_local));
323
324 /*
325 * XXX Fix me
326 * if the device was up, and we're simply reconfiguring it, not sure
327 * we get DOWN then UP.
328 */
329
330 local = dev->priv;
331 /* now do it for real */
332 r = event == NETDEV_UP ?
333 netdev_attach(local->simfd, dev->irq, htonl(ifa->ifa_local)):
334 netdev_detach(local->simfd);
335
336 printk(KERN_INFO "simeth: netdev_attach/detach: event=%s ->%d\n",
337 event == NETDEV_UP ? "attach":"detach", r);
338
339 return NOTIFY_DONE;
340}
341
342static int
343simeth_close(struct net_device *dev)
344{
345 netif_stop_queue(dev);
346
347 free_irq(dev->irq, dev);
348
349 return 0;
350}
351
352/*
353 * Only used for debug
354 */
355static void
356frame_print(unsigned char *from, unsigned char *frame, int len)
357{
358 int i;
359
360 printk("%s: (%d) %02x", from, len, frame[0] & 0xff);
361 for(i=1; i < 6; i++ ) {
362 printk(":%02x", frame[i] &0xff);
363 }
364 printk(" %2x", frame[6] &0xff);
365 for(i=7; i < 12; i++ ) {
366 printk(":%02x", frame[i] &0xff);
367 }
368 printk(" [%02x%02x]\n", frame[12], frame[13]);
369
370 for(i=14; i < len; i++ ) {
371 printk("%02x ", frame[i] &0xff);
372 if ( (i%10)==0) printk("\n");
373 }
374 printk("\n");
375}
376
377
378/*
379 * Function used to transmit of frame, very last one on the path before
380 * going to the simulator.
381 */
382static int
383simeth_tx(struct sk_buff *skb, struct net_device *dev)
384{
385 struct simeth_local *local = dev->priv;
386
387#if 0
388 /* ensure we have at least ETH_ZLEN bytes (min frame size) */
389 unsigned int length = ETH_ZLEN < skb->len ? skb->len : ETH_ZLEN;
390 /* Where do the extra padding bytes comes from inthe skbuff ? */
391#else
392 /* the real driver in the host system is going to take care of that
393 * or maybe it's the NIC itself.
394 */
395 unsigned int length = skb->len;
396#endif
397
398 local->stats.tx_bytes += skb->len;
399 local->stats.tx_packets++;
400
401
402 if (simeth_debug > 5) frame_print("simeth_tx", skb->data, length);
403
404 netdev_send(local->simfd, skb->data, length);
405
406 /*
407 * we are synchronous on write, so we don't simulate a
408 * trasnmit complete interrupt, thus we don't need to arm a tx
409 */
410
411 dev_kfree_skb(skb);
412 return 0;
413}
414
415static inline struct sk_buff *
416make_new_skb(struct net_device *dev)
417{
418 struct sk_buff *nskb;
419
420 /*
421 * The +2 is used to make sure that the IP header is nicely
422 * aligned (on 4byte boundary I assume 14+2=16)
423 */
424 nskb = dev_alloc_skb(SIMETH_FRAME_SIZE + 2);
425 if ( nskb == NULL ) {
426 printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
427 return NULL;
428 }
429 nskb->dev = dev;
430
431 skb_reserve(nskb, 2); /* Align IP on 16 byte boundaries */
432
433 skb_put(nskb,SIMETH_FRAME_SIZE);
434
435 return nskb;
436}
437
438/*
439 * called from interrupt handler to process a received frame
440 */
441static int
442simeth_rx(struct net_device *dev)
443{
444 struct simeth_local *local;
445 struct sk_buff *skb;
446 int len;
447 int rcv_count = SIMETH_RECV_MAX;
448
449 local = dev->priv;
450 /*
451 * the loop concept has been borrowed from other drivers
452 * looks to me like it's a throttling thing to avoid pushing to many
453 * packets at one time into the stack. Making sure we can process them
454 * upstream and make forward progress overall
455 */
456 do {
457 if ( (skb=make_new_skb(dev)) == NULL ) {
458 printk(KERN_NOTICE "%s: memory squeeze. dropping packet.\n", dev->name);
459 local->stats.rx_dropped++;
460 return 0;
461 }
462 /*
463 * Read only one frame at a time
464 */
465 len = netdev_read(local->simfd, skb->data, SIMETH_FRAME_SIZE);
466 if ( len == 0 ) {
467 if ( simeth_debug > 0 ) printk(KERN_WARNING "%s: count=%d netdev_read=0\n",
468 dev->name, SIMETH_RECV_MAX-rcv_count);
469 break;
470 }
471#if 0
472 /*
473 * XXX Fix me
474 * Should really do a csum+copy here
475 */
476 memcpy(skb->data, frame, len);
477#endif
478 skb->protocol = eth_type_trans(skb, dev);
479
480 if ( simeth_debug > 6 ) frame_print("simeth_rx", skb->data, len);
481
482 /*
483 * push the packet up & trigger software interrupt
484 */
485 netif_rx(skb);
486
487 local->stats.rx_packets++;
488 local->stats.rx_bytes += len;
489
490 } while ( --rcv_count );
491
492 return len; /* 0 = nothing left to read, otherwise, we can try again */
493}
494
495/*
496 * Interrupt handler (Yes, we can do it too !!!)
497 */
498static irqreturn_t
499simeth_interrupt(int irq, void *dev_id, struct pt_regs * regs)
500{
501 struct net_device *dev = dev_id;
502
503 if ( dev == NULL ) {
504 printk(KERN_WARNING "simeth: irq %d for unknown device\n", irq);
505 return IRQ_NONE;
506 }
507
508 /*
509 * very simple loop because we get interrupts only when receiving
510 */
511 while (simeth_rx(dev));
512 return IRQ_HANDLED;
513}
514
515static struct net_device_stats *
516simeth_get_stats(struct net_device *dev)
517{
518 struct simeth_local *local = dev->priv;
519
520 return &local->stats;
521}
522
523/* fake multicast ability */
524static void
525set_multicast_list(struct net_device *dev)
526{
527 printk(KERN_WARNING "%s: set_multicast_list called\n", dev->name);
528}
529
530__initcall(simeth_probe);
diff --git a/arch/ia64/hp/sim/simscsi.c b/arch/ia64/hp/sim/simscsi.c
new file mode 100644
index 000000000000..56405dbfd739
--- /dev/null
+++ b/arch/ia64/hp/sim/simscsi.c
@@ -0,0 +1,404 @@
1/*
2 * Simulated SCSI driver.
3 *
4 * Copyright (C) 1999, 2001-2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 * Stephane Eranian <eranian@hpl.hp.com>
7 *
8 * 02/01/15 David Mosberger Updated for v2.5.1
9 * 99/12/18 David Mosberger Added support for READ10/WRITE10 needed by linux v2.3.33
10 */
11#include <linux/blkdev.h>
12#include <linux/init.h>
13#include <linux/interrupt.h>
14#include <linux/kernel.h>
15#include <linux/timer.h>
16#include <asm/irq.h>
17
18#include <scsi/scsi.h>
19#include <scsi/scsi_cmnd.h>
20#include <scsi/scsi_device.h>
21#include <scsi/scsi_host.h>
22
23#define DEBUG_SIMSCSI 0
24
25#define SIMSCSI_REQ_QUEUE_LEN 64
26#define DEFAULT_SIMSCSI_ROOT "/var/ski-disks/sd"
27
28/* Simulator system calls: */
29
30#define SSC_OPEN 50
31#define SSC_CLOSE 51
32#define SSC_READ 52
33#define SSC_WRITE 53
34#define SSC_GET_COMPLETION 54
35#define SSC_WAIT_COMPLETION 55
36
37#define SSC_WRITE_ACCESS 2
38#define SSC_READ_ACCESS 1
39
40#if DEBUG_SIMSCSI
41 int simscsi_debug;
42# define DBG simscsi_debug
43#else
44# define DBG 0
45#endif
46
47static struct Scsi_Host *host;
48
49static void simscsi_interrupt (unsigned long val);
50static DECLARE_TASKLET(simscsi_tasklet, simscsi_interrupt, 0);
51
52struct disk_req {
53 unsigned long addr;
54 unsigned len;
55};
56
57struct disk_stat {
58 int fd;
59 unsigned count;
60};
61
62extern long ia64_ssc (long arg0, long arg1, long arg2, long arg3, int nr);
63
64static int desc[16] = {
65 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
66};
67
68static struct queue_entry {
69 struct scsi_cmnd *sc;
70} queue[SIMSCSI_REQ_QUEUE_LEN];
71
72static int rd, wr;
73static atomic_t num_reqs = ATOMIC_INIT(0);
74
75/* base name for default disks */
76static char *simscsi_root = DEFAULT_SIMSCSI_ROOT;
77
78#define MAX_ROOT_LEN 128
79
80/*
81 * used to setup a new base for disk images
82 * to use /foo/bar/disk[a-z] as disk images
83 * you have to specify simscsi=/foo/bar/disk on the command line
84 */
85static int __init
86simscsi_setup (char *s)
87{
88 /* XXX Fix me we may need to strcpy() ? */
89 if (strlen(s) > MAX_ROOT_LEN) {
90 printk(KERN_ERR "simscsi_setup: prefix too long---using default %s\n",
91 simscsi_root);
92 }
93 simscsi_root = s;
94 return 1;
95}
96
97__setup("simscsi=", simscsi_setup);
98
99static void
100simscsi_interrupt (unsigned long val)
101{
102 struct scsi_cmnd *sc;
103
104 while ((sc = queue[rd].sc) != 0) {
105 atomic_dec(&num_reqs);
106 queue[rd].sc = 0;
107 if (DBG)
108 printk("simscsi_interrupt: done with %ld\n", sc->serial_number);
109 (*sc->scsi_done)(sc);
110 rd = (rd + 1) % SIMSCSI_REQ_QUEUE_LEN;
111 }
112}
113
114static int
115simscsi_biosparam (struct scsi_device *sdev, struct block_device *n,
116 sector_t capacity, int ip[])
117{
118 ip[0] = 64; /* heads */
119 ip[1] = 32; /* sectors */
120 ip[2] = capacity >> 11; /* cylinders */
121 return 0;
122}
123
124static void
125simscsi_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset, unsigned long len)
126{
127 struct disk_stat stat;
128 struct disk_req req;
129
130 req.addr = __pa(sc->request_buffer);
131 req.len = len; /* # of bytes to transfer */
132
133 if (sc->request_bufflen < req.len)
134 return;
135
136 stat.fd = desc[sc->device->id];
137 if (DBG)
138 printk("simscsi_%s @ %lx (off %lx)\n",
139 mode == SSC_READ ? "read":"write", req.addr, offset);
140 ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
141 ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
142
143 if (stat.count == req.len) {
144 sc->result = GOOD;
145 } else {
146 sc->result = DID_ERROR << 16;
147 }
148}
149
150static void
151simscsi_sg_readwrite (struct scsi_cmnd *sc, int mode, unsigned long offset)
152{
153 int list_len = sc->use_sg;
154 struct scatterlist *sl = (struct scatterlist *)sc->buffer;
155 struct disk_stat stat;
156 struct disk_req req;
157
158 stat.fd = desc[sc->device->id];
159
160 while (list_len) {
161 req.addr = __pa(page_address(sl->page) + sl->offset);
162 req.len = sl->length;
163 if (DBG)
164 printk("simscsi_sg_%s @ %lx (off %lx) use_sg=%d len=%d\n",
165 mode == SSC_READ ? "read":"write", req.addr, offset,
166 list_len, sl->length);
167 ia64_ssc(stat.fd, 1, __pa(&req), offset, mode);
168 ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
169
170 /* should not happen in our case */
171 if (stat.count != req.len) {
172 sc->result = DID_ERROR << 16;
173 return;
174 }
175 offset += sl->length;
176 sl++;
177 list_len--;
178 }
179 sc->result = GOOD;
180}
181
182/*
183 * function handling both READ_6/WRITE_6 (non-scatter/gather mode)
184 * commands.
185 * Added 02/26/99 S.Eranian
186 */
187static void
188simscsi_readwrite6 (struct scsi_cmnd *sc, int mode)
189{
190 unsigned long offset;
191
192 offset = (((sc->cmnd[1] & 0x1f) << 16) | (sc->cmnd[2] << 8) | sc->cmnd[3])*512;
193 if (sc->use_sg > 0)
194 simscsi_sg_readwrite(sc, mode, offset);
195 else
196 simscsi_readwrite(sc, mode, offset, sc->cmnd[4]*512);
197}
198
199static size_t
200simscsi_get_disk_size (int fd)
201{
202 struct disk_stat stat;
203 size_t bit, sectors = 0;
204 struct disk_req req;
205 char buf[512];
206
207 /*
208 * This is a bit kludgey: the simulator doesn't provide a direct way of determining
209 * the disk size, so we do a binary search, assuming a maximum disk size of 4GB.
210 */
211 for (bit = (4UL << 30)/512; bit != 0; bit >>= 1) {
212 req.addr = __pa(&buf);
213 req.len = sizeof(buf);
214 ia64_ssc(fd, 1, __pa(&req), ((sectors | bit) - 1)*512, SSC_READ);
215 stat.fd = fd;
216 ia64_ssc(__pa(&stat), 0, 0, 0, SSC_WAIT_COMPLETION);
217 if (stat.count == sizeof(buf))
218 sectors |= bit;
219 }
220 return sectors - 1; /* return last valid sector number */
221}
222
223static void
224simscsi_readwrite10 (struct scsi_cmnd *sc, int mode)
225{
226 unsigned long offset;
227
228 offset = ( (sc->cmnd[2] << 24) | (sc->cmnd[3] << 16)
229 | (sc->cmnd[4] << 8) | (sc->cmnd[5] << 0))*512;
230 if (sc->use_sg > 0)
231 simscsi_sg_readwrite(sc, mode, offset);
232 else
233 simscsi_readwrite(sc, mode, offset, ((sc->cmnd[7] << 8) | sc->cmnd[8])*512);
234}
235
236static int
237simscsi_queuecommand (struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
238{
239 unsigned int target_id = sc->device->id;
240 char fname[MAX_ROOT_LEN+16];
241 size_t disk_size;
242 char *buf;
243#if DEBUG_SIMSCSI
244 register long sp asm ("sp");
245
246 if (DBG)
247 printk("simscsi_queuecommand: target=%d,cmnd=%u,sc=%lu,sp=%lx,done=%p\n",
248 target_id, sc->cmnd[0], sc->serial_number, sp, done);
249#endif
250
251 sc->result = DID_BAD_TARGET << 16;
252 sc->scsi_done = done;
253 if (target_id <= 15 && sc->device->lun == 0) {
254 switch (sc->cmnd[0]) {
255 case INQUIRY:
256 if (sc->request_bufflen < 35) {
257 break;
258 }
259 sprintf (fname, "%s%c", simscsi_root, 'a' + target_id);
260 desc[target_id] = ia64_ssc(__pa(fname), SSC_READ_ACCESS|SSC_WRITE_ACCESS,
261 0, 0, SSC_OPEN);
262 if (desc[target_id] < 0) {
263 /* disk doesn't exist... */
264 break;
265 }
266 buf = sc->request_buffer;
267 buf[0] = 0; /* magnetic disk */
268 buf[1] = 0; /* not a removable medium */
269 buf[2] = 2; /* SCSI-2 compliant device */
270 buf[3] = 2; /* SCSI-2 response data format */
271 buf[4] = 31; /* additional length (bytes) */
272 buf[5] = 0; /* reserved */
273 buf[6] = 0; /* reserved */
274 buf[7] = 0; /* various flags */
275 memcpy(buf + 8, "HP SIMULATED DISK 0.00", 28);
276 sc->result = GOOD;
277 break;
278
279 case TEST_UNIT_READY:
280 sc->result = GOOD;
281 break;
282
283 case READ_6:
284 if (desc[target_id] < 0 )
285 break;
286 simscsi_readwrite6(sc, SSC_READ);
287 break;
288
289 case READ_10:
290 if (desc[target_id] < 0 )
291 break;
292 simscsi_readwrite10(sc, SSC_READ);
293 break;
294
295 case WRITE_6:
296 if (desc[target_id] < 0)
297 break;
298 simscsi_readwrite6(sc, SSC_WRITE);
299 break;
300
301 case WRITE_10:
302 if (desc[target_id] < 0)
303 break;
304 simscsi_readwrite10(sc, SSC_WRITE);
305 break;
306
307
308 case READ_CAPACITY:
309 if (desc[target_id] < 0 || sc->request_bufflen < 8) {
310 break;
311 }
312 buf = sc->request_buffer;
313
314 disk_size = simscsi_get_disk_size(desc[target_id]);
315
316 /* pretend to be a 1GB disk (partition table contains real stuff): */
317 buf[0] = (disk_size >> 24) & 0xff;
318 buf[1] = (disk_size >> 16) & 0xff;
319 buf[2] = (disk_size >> 8) & 0xff;
320 buf[3] = (disk_size >> 0) & 0xff;
321 /* set block size of 512 bytes: */
322 buf[4] = 0;
323 buf[5] = 0;
324 buf[6] = 2;
325 buf[7] = 0;
326 sc->result = GOOD;
327 break;
328
329 case MODE_SENSE:
330 case MODE_SENSE_10:
331 /* sd.c uses this to determine whether disk does write-caching. */
332 memset(sc->request_buffer, 0, 128);
333 sc->result = GOOD;
334 break;
335
336 case START_STOP:
337 printk(KERN_ERR "START_STOP\n");
338 break;
339
340 default:
341 panic("simscsi: unknown SCSI command %u\n", sc->cmnd[0]);
342 }
343 }
344 if (sc->result == DID_BAD_TARGET) {
345 sc->result |= DRIVER_SENSE << 24;
346 sc->sense_buffer[0] = 0x70;
347 sc->sense_buffer[2] = 0x00;
348 }
349 if (atomic_read(&num_reqs) >= SIMSCSI_REQ_QUEUE_LEN) {
350 panic("Attempt to queue command while command is pending!!");
351 }
352 atomic_inc(&num_reqs);
353 queue[wr].sc = sc;
354 wr = (wr + 1) % SIMSCSI_REQ_QUEUE_LEN;
355
356 tasklet_schedule(&simscsi_tasklet);
357 return 0;
358}
359
360static int
361simscsi_host_reset (struct scsi_cmnd *sc)
362{
363 printk(KERN_ERR "simscsi_host_reset: not implemented\n");
364 return 0;
365}
366
367static struct scsi_host_template driver_template = {
368 .name = "simulated SCSI host adapter",
369 .proc_name = "simscsi",
370 .queuecommand = simscsi_queuecommand,
371 .eh_host_reset_handler = simscsi_host_reset,
372 .bios_param = simscsi_biosparam,
373 .can_queue = SIMSCSI_REQ_QUEUE_LEN,
374 .this_id = -1,
375 .sg_tablesize = SG_ALL,
376 .max_sectors = 1024,
377 .cmd_per_lun = SIMSCSI_REQ_QUEUE_LEN,
378 .use_clustering = DISABLE_CLUSTERING,
379};
380
381static int __init
382simscsi_init(void)
383{
384 int error;
385
386 host = scsi_host_alloc(&driver_template, 0);
387 if (!host)
388 return -ENOMEM;
389
390 error = scsi_add_host(host, NULL);
391 if (!error)
392 scsi_scan_host(host);
393 return error;
394}
395
396static void __exit
397simscsi_exit(void)
398{
399 scsi_remove_host(host);
400 scsi_host_put(host);
401}
402
403module_init(simscsi_init);
404module_exit(simscsi_exit);
diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c
new file mode 100644
index 000000000000..786e70718ce4
--- /dev/null
+++ b/arch/ia64/hp/sim/simserial.c
@@ -0,0 +1,1032 @@
1/*
2 * Simulated Serial Driver (fake serial)
3 *
4 * This driver is mostly used for bringup purposes and will go away.
5 * It has a strong dependency on the system console. All outputs
6 * are rerouted to the same facility as the one used by printk which, in our
7 * case means sys_sim.c console (goes via the simulator). The code hereafter
8 * is completely leveraged from the serial.c driver.
9 *
10 * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co
11 * Stephane Eranian <eranian@hpl.hp.com>
12 * David Mosberger-Tang <davidm@hpl.hp.com>
13 *
14 * 02/04/00 D. Mosberger Merged in serial.c bug fixes in rs_close().
15 * 02/25/00 D. Mosberger Synced up with 2.3.99pre-5 version of serial.c.
16 * 07/30/02 D. Mosberger Replace sti()/cli() with explicit spinlocks & local irq masking
17 */
18
19#include <linux/config.h>
20#include <linux/init.h>
21#include <linux/errno.h>
22#include <linux/sched.h>
23#include <linux/tty.h>
24#include <linux/tty_flip.h>
25#include <linux/major.h>
26#include <linux/fcntl.h>
27#include <linux/mm.h>
28#include <linux/slab.h>
29#include <linux/console.h>
30#include <linux/module.h>
31#include <linux/serial.h>
32#include <linux/serialP.h>
33
34#include <asm/irq.h>
35#include <asm/hw_irq.h>
36#include <asm/uaccess.h>
37
38#ifdef CONFIG_KDB
39# include <linux/kdb.h>
40#endif
41
42#undef SIMSERIAL_DEBUG /* define this to get some debug information */
43
44#define KEYBOARD_INTR 3 /* must match with simulator! */
45
46#define NR_PORTS 1 /* only one port for now */
47#define SERIAL_INLINE 1
48
49#ifdef SERIAL_INLINE
50#define _INLINE_ inline
51#endif
52
53#define IRQ_T(info) ((info->flags & ASYNC_SHARE_IRQ) ? SA_SHIRQ : SA_INTERRUPT)
54
55#define SSC_GETCHAR 21
56
57extern long ia64_ssc (long, long, long, long, int);
58extern void ia64_ssc_connect_irq (long intr, long irq);
59
60static char *serial_name = "SimSerial driver";
61static char *serial_version = "0.6";
62
63/*
64 * This has been extracted from asm/serial.h. We need one eventually but
65 * I don't know exactly what we're going to put in it so just fake one
66 * for now.
67 */
68#define BASE_BAUD ( 1843200 / 16 )
69
70#define STD_COM_FLAGS (ASYNC_BOOT_AUTOCONF | ASYNC_SKIP_TEST)
71
72/*
73 * Most of the values here are meaningless to this particular driver.
74 * However some values must be preserved for the code (leveraged from serial.c
75 * to work correctly).
76 * port must not be 0
77 * type must not be UNKNOWN
78 * So I picked arbitrary (guess from where?) values instead
79 */
80static struct serial_state rs_table[NR_PORTS]={
81 /* UART CLK PORT IRQ FLAGS */
82 { 0, BASE_BAUD, 0x3F8, 0, STD_COM_FLAGS,0,PORT_16550 } /* ttyS0 */
83};
84
85/*
86 * Just for the fun of it !
87 */
88static struct serial_uart_config uart_config[] = {
89 { "unknown", 1, 0 },
90 { "8250", 1, 0 },
91 { "16450", 1, 0 },
92 { "16550", 1, 0 },
93 { "16550A", 16, UART_CLEAR_FIFO | UART_USE_FIFO },
94 { "cirrus", 1, 0 },
95 { "ST16650", 1, UART_CLEAR_FIFO | UART_STARTECH },
96 { "ST16650V2", 32, UART_CLEAR_FIFO | UART_USE_FIFO |
97 UART_STARTECH },
98 { "TI16750", 64, UART_CLEAR_FIFO | UART_USE_FIFO},
99 { 0, 0}
100};
101
102struct tty_driver *hp_simserial_driver;
103
104static struct async_struct *IRQ_ports[NR_IRQS];
105
106static struct console *console;
107
108static unsigned char *tmp_buf;
109static DECLARE_MUTEX(tmp_buf_sem);
110
111extern struct console *console_drivers; /* from kernel/printk.c */
112
113/*
114 * ------------------------------------------------------------
115 * rs_stop() and rs_start()
116 *
117 * This routines are called before setting or resetting tty->stopped.
118 * They enable or disable transmitter interrupts, as necessary.
119 * ------------------------------------------------------------
120 */
121static void rs_stop(struct tty_struct *tty)
122{
123#ifdef SIMSERIAL_DEBUG
124 printk("rs_stop: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
125 tty->stopped, tty->hw_stopped, tty->flow_stopped);
126#endif
127
128}
129
130static void rs_start(struct tty_struct *tty)
131{
132#if SIMSERIAL_DEBUG
133 printk("rs_start: tty->stopped=%d tty->hw_stopped=%d tty->flow_stopped=%d\n",
134 tty->stopped, tty->hw_stopped, tty->flow_stopped);
135#endif
136}
137
138static void receive_chars(struct tty_struct *tty, struct pt_regs *regs)
139{
140 unsigned char ch;
141 static unsigned char seen_esc = 0;
142
143 while ( (ch = ia64_ssc(0, 0, 0, 0, SSC_GETCHAR)) ) {
144 if ( ch == 27 && seen_esc == 0 ) {
145 seen_esc = 1;
146 continue;
147 } else {
148 if ( seen_esc==1 && ch == 'O' ) {
149 seen_esc = 2;
150 continue;
151 } else if ( seen_esc == 2 ) {
152 if ( ch == 'P' ) show_state(); /* F1 key */
153#ifdef CONFIG_KDB
154 if ( ch == 'S' )
155 kdb(KDB_REASON_KEYBOARD, 0, (kdb_eframe_t) regs);
156#endif
157
158 seen_esc = 0;
159 continue;
160 }
161 }
162 seen_esc = 0;
163 if (tty->flip.count >= TTY_FLIPBUF_SIZE) break;
164
165 *tty->flip.char_buf_ptr = ch;
166
167 *tty->flip.flag_buf_ptr = 0;
168
169 tty->flip.flag_buf_ptr++;
170 tty->flip.char_buf_ptr++;
171 tty->flip.count++;
172 }
173 tty_flip_buffer_push(tty);
174}
175
176/*
177 * This is the serial driver's interrupt routine for a single port
178 */
179static irqreturn_t rs_interrupt_single(int irq, void *dev_id, struct pt_regs * regs)
180{
181 struct async_struct * info;
182
183 /*
184 * I don't know exactly why they don't use the dev_id opaque data
185 * pointer instead of this extra lookup table
186 */
187 info = IRQ_ports[irq];
188 if (!info || !info->tty) {
189 printk(KERN_INFO "simrs_interrupt_single: info|tty=0 info=%p problem\n", info);
190 return IRQ_NONE;
191 }
192 /*
193 * pretty simple in our case, because we only get interrupts
194 * on inbound traffic
195 */
196 receive_chars(info->tty, regs);
197 return IRQ_HANDLED;
198}
199
200/*
201 * -------------------------------------------------------------------
202 * Here ends the serial interrupt routines.
203 * -------------------------------------------------------------------
204 */
205
206#if 0
207/*
208 * not really used in our situation so keep them commented out for now
209 */
210static DECLARE_TASK_QUEUE(tq_serial); /* used to be at the top of the file */
211static void do_serial_bh(void)
212{
213 run_task_queue(&tq_serial);
214 printk(KERN_ERR "do_serial_bh: called\n");
215}
216#endif
217
218static void do_softint(void *private_)
219{
220 printk(KERN_ERR "simserial: do_softint called\n");
221}
222
223static void rs_put_char(struct tty_struct *tty, unsigned char ch)
224{
225 struct async_struct *info = (struct async_struct *)tty->driver_data;
226 unsigned long flags;
227
228 if (!tty || !info->xmit.buf) return;
229
230 local_irq_save(flags);
231 if (CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE) == 0) {
232 local_irq_restore(flags);
233 return;
234 }
235 info->xmit.buf[info->xmit.head] = ch;
236 info->xmit.head = (info->xmit.head + 1) & (SERIAL_XMIT_SIZE-1);
237 local_irq_restore(flags);
238}
239
240static _INLINE_ void transmit_chars(struct async_struct *info, int *intr_done)
241{
242 int count;
243 unsigned long flags;
244
245
246 local_irq_save(flags);
247
248 if (info->x_char) {
249 char c = info->x_char;
250
251 console->write(console, &c, 1);
252
253 info->state->icount.tx++;
254 info->x_char = 0;
255
256 goto out;
257 }
258
259 if (info->xmit.head == info->xmit.tail || info->tty->stopped || info->tty->hw_stopped) {
260#ifdef SIMSERIAL_DEBUG
261 printk("transmit_chars: head=%d, tail=%d, stopped=%d\n",
262 info->xmit.head, info->xmit.tail, info->tty->stopped);
263#endif
264 goto out;
265 }
266 /*
267 * We removed the loop and try to do it in to chunks. We need
268 * 2 operations maximum because it's a ring buffer.
269 *
270 * First from current to tail if possible.
271 * Then from the beginning of the buffer until necessary
272 */
273
274 count = min(CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE),
275 SERIAL_XMIT_SIZE - info->xmit.tail);
276 console->write(console, info->xmit.buf+info->xmit.tail, count);
277
278 info->xmit.tail = (info->xmit.tail+count) & (SERIAL_XMIT_SIZE-1);
279
280 /*
281 * We have more at the beginning of the buffer
282 */
283 count = CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
284 if (count) {
285 console->write(console, info->xmit.buf, count);
286 info->xmit.tail += count;
287 }
288out:
289 local_irq_restore(flags);
290}
291
292static void rs_flush_chars(struct tty_struct *tty)
293{
294 struct async_struct *info = (struct async_struct *)tty->driver_data;
295
296 if (info->xmit.head == info->xmit.tail || tty->stopped || tty->hw_stopped ||
297 !info->xmit.buf)
298 return;
299
300 transmit_chars(info, NULL);
301}
302
303
304static int rs_write(struct tty_struct * tty,
305 const unsigned char *buf, int count)
306{
307 int c, ret = 0;
308 struct async_struct *info = (struct async_struct *)tty->driver_data;
309 unsigned long flags;
310
311 if (!tty || !info->xmit.buf || !tmp_buf) return 0;
312
313 local_irq_save(flags);
314 while (1) {
315 c = CIRC_SPACE_TO_END(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
316 if (count < c)
317 c = count;
318 if (c <= 0) {
319 break;
320 }
321 memcpy(info->xmit.buf + info->xmit.head, buf, c);
322 info->xmit.head = ((info->xmit.head + c) &
323 (SERIAL_XMIT_SIZE-1));
324 buf += c;
325 count -= c;
326 ret += c;
327 }
328 local_irq_restore(flags);
329 /*
330 * Hey, we transmit directly from here in our case
331 */
332 if (CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE)
333 && !tty->stopped && !tty->hw_stopped) {
334 transmit_chars(info, NULL);
335 }
336 return ret;
337}
338
339static int rs_write_room(struct tty_struct *tty)
340{
341 struct async_struct *info = (struct async_struct *)tty->driver_data;
342
343 return CIRC_SPACE(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
344}
345
346static int rs_chars_in_buffer(struct tty_struct *tty)
347{
348 struct async_struct *info = (struct async_struct *)tty->driver_data;
349
350 return CIRC_CNT(info->xmit.head, info->xmit.tail, SERIAL_XMIT_SIZE);
351}
352
353static void rs_flush_buffer(struct tty_struct *tty)
354{
355 struct async_struct *info = (struct async_struct *)tty->driver_data;
356 unsigned long flags;
357
358 local_irq_save(flags);
359 info->xmit.head = info->xmit.tail = 0;
360 local_irq_restore(flags);
361
362 wake_up_interruptible(&tty->write_wait);
363
364 if ((tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
365 tty->ldisc.write_wakeup)
366 (tty->ldisc.write_wakeup)(tty);
367}
368
369/*
370 * This function is used to send a high-priority XON/XOFF character to
371 * the device
372 */
373static void rs_send_xchar(struct tty_struct *tty, char ch)
374{
375 struct async_struct *info = (struct async_struct *)tty->driver_data;
376
377 info->x_char = ch;
378 if (ch) {
379 /*
380 * I guess we could call console->write() directly but
381 * let's do that for now.
382 */
383 transmit_chars(info, NULL);
384 }
385}
386
387/*
388 * ------------------------------------------------------------
389 * rs_throttle()
390 *
391 * This routine is called by the upper-layer tty layer to signal that
392 * incoming characters should be throttled.
393 * ------------------------------------------------------------
394 */
395static void rs_throttle(struct tty_struct * tty)
396{
397 if (I_IXOFF(tty)) rs_send_xchar(tty, STOP_CHAR(tty));
398
399 printk(KERN_INFO "simrs_throttle called\n");
400}
401
402static void rs_unthrottle(struct tty_struct * tty)
403{
404 struct async_struct *info = (struct async_struct *)tty->driver_data;
405
406 if (I_IXOFF(tty)) {
407 if (info->x_char)
408 info->x_char = 0;
409 else
410 rs_send_xchar(tty, START_CHAR(tty));
411 }
412 printk(KERN_INFO "simrs_unthrottle called\n");
413}
414
415/*
416 * rs_break() --- routine which turns the break handling on or off
417 */
418static void rs_break(struct tty_struct *tty, int break_state)
419{
420}
421
422static int rs_ioctl(struct tty_struct *tty, struct file * file,
423 unsigned int cmd, unsigned long arg)
424{
425 if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) &&
426 (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) &&
427 (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) {
428 if (tty->flags & (1 << TTY_IO_ERROR))
429 return -EIO;
430 }
431
432 switch (cmd) {
433 case TIOCMGET:
434 printk(KERN_INFO "rs_ioctl: TIOCMGET called\n");
435 return -EINVAL;
436 case TIOCMBIS:
437 case TIOCMBIC:
438 case TIOCMSET:
439 printk(KERN_INFO "rs_ioctl: TIOCMBIS/BIC/SET called\n");
440 return -EINVAL;
441 case TIOCGSERIAL:
442 printk(KERN_INFO "simrs_ioctl TIOCGSERIAL called\n");
443 return 0;
444 case TIOCSSERIAL:
445 printk(KERN_INFO "simrs_ioctl TIOCSSERIAL called\n");
446 return 0;
447 case TIOCSERCONFIG:
448 printk(KERN_INFO "rs_ioctl: TIOCSERCONFIG called\n");
449 return -EINVAL;
450
451 case TIOCSERGETLSR: /* Get line status register */
452 printk(KERN_INFO "rs_ioctl: TIOCSERGETLSR called\n");
453 return -EINVAL;
454
455 case TIOCSERGSTRUCT:
456 printk(KERN_INFO "rs_ioctl: TIOCSERGSTRUCT called\n");
457#if 0
458 if (copy_to_user((struct async_struct *) arg,
459 info, sizeof(struct async_struct)))
460 return -EFAULT;
461#endif
462 return 0;
463
464 /*
465 * Wait for any of the 4 modem inputs (DCD,RI,DSR,CTS) to change
466 * - mask passed in arg for lines of interest
467 * (use |'ed TIOCM_RNG/DSR/CD/CTS for masking)
468 * Caller should use TIOCGICOUNT to see which one it was
469 */
470 case TIOCMIWAIT:
471 printk(KERN_INFO "rs_ioctl: TIOCMIWAIT: called\n");
472 return 0;
473 /*
474 * Get counter of input serial line interrupts (DCD,RI,DSR,CTS)
475 * Return: write counters to the user passed counter struct
476 * NB: both 1->0 and 0->1 transitions are counted except for
477 * RI where only 0->1 is counted.
478 */
479 case TIOCGICOUNT:
480 printk(KERN_INFO "rs_ioctl: TIOCGICOUNT called\n");
481 return 0;
482
483 case TIOCSERGWILD:
484 case TIOCSERSWILD:
485 /* "setserial -W" is called in Debian boot */
486 printk (KERN_INFO "TIOCSER?WILD ioctl obsolete, ignored.\n");
487 return 0;
488
489 default:
490 return -ENOIOCTLCMD;
491 }
492 return 0;
493}
494
495#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK))
496
497static void rs_set_termios(struct tty_struct *tty, struct termios *old_termios)
498{
499 unsigned int cflag = tty->termios->c_cflag;
500
501 if ( (cflag == old_termios->c_cflag)
502 && ( RELEVANT_IFLAG(tty->termios->c_iflag)
503 == RELEVANT_IFLAG(old_termios->c_iflag)))
504 return;
505
506
507 /* Handle turning off CRTSCTS */
508 if ((old_termios->c_cflag & CRTSCTS) &&
509 !(tty->termios->c_cflag & CRTSCTS)) {
510 tty->hw_stopped = 0;
511 rs_start(tty);
512 }
513}
514/*
515 * This routine will shutdown a serial port; interrupts are disabled, and
516 * DTR is dropped if the hangup on close termio flag is on.
517 */
518static void shutdown(struct async_struct * info)
519{
520 unsigned long flags;
521 struct serial_state *state;
522 int retval;
523
524 if (!(info->flags & ASYNC_INITIALIZED)) return;
525
526 state = info->state;
527
528#ifdef SIMSERIAL_DEBUG
529 printk("Shutting down serial port %d (irq %d)....", info->line,
530 state->irq);
531#endif
532
533 local_irq_save(flags);
534 {
535 /*
536 * First unlink the serial port from the IRQ chain...
537 */
538 if (info->next_port)
539 info->next_port->prev_port = info->prev_port;
540 if (info->prev_port)
541 info->prev_port->next_port = info->next_port;
542 else
543 IRQ_ports[state->irq] = info->next_port;
544
545 /*
546 * Free the IRQ, if necessary
547 */
548 if (state->irq && (!IRQ_ports[state->irq] ||
549 !IRQ_ports[state->irq]->next_port)) {
550 if (IRQ_ports[state->irq]) {
551 free_irq(state->irq, NULL);
552 retval = request_irq(state->irq, rs_interrupt_single,
553 IRQ_T(info), "serial", NULL);
554
555 if (retval)
556 printk(KERN_ERR "serial shutdown: request_irq: error %d"
557 " Couldn't reacquire IRQ.\n", retval);
558 } else
559 free_irq(state->irq, NULL);
560 }
561
562 if (info->xmit.buf) {
563 free_page((unsigned long) info->xmit.buf);
564 info->xmit.buf = 0;
565 }
566
567 if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
568
569 info->flags &= ~ASYNC_INITIALIZED;
570 }
571 local_irq_restore(flags);
572}
573
574/*
575 * ------------------------------------------------------------
576 * rs_close()
577 *
578 * This routine is called when the serial port gets closed. First, we
579 * wait for the last remaining data to be sent. Then, we unlink its
580 * async structure from the interrupt chain if necessary, and we free
581 * that IRQ if nothing is left in the chain.
582 * ------------------------------------------------------------
583 */
584static void rs_close(struct tty_struct *tty, struct file * filp)
585{
586 struct async_struct * info = (struct async_struct *)tty->driver_data;
587 struct serial_state *state;
588 unsigned long flags;
589
590 if (!info ) return;
591
592 state = info->state;
593
594 local_irq_save(flags);
595 if (tty_hung_up_p(filp)) {
596#ifdef SIMSERIAL_DEBUG
597 printk("rs_close: hung_up\n");
598#endif
599 local_irq_restore(flags);
600 return;
601 }
602#ifdef SIMSERIAL_DEBUG
603 printk("rs_close ttys%d, count = %d\n", info->line, state->count);
604#endif
605 if ((tty->count == 1) && (state->count != 1)) {
606 /*
607 * Uh, oh. tty->count is 1, which means that the tty
608 * structure will be freed. state->count should always
609 * be one in these conditions. If it's greater than
610 * one, we've got real problems, since it means the
611 * serial port won't be shutdown.
612 */
613 printk(KERN_ERR "rs_close: bad serial port count; tty->count is 1, "
614 "state->count is %d\n", state->count);
615 state->count = 1;
616 }
617 if (--state->count < 0) {
618 printk(KERN_ERR "rs_close: bad serial port count for ttys%d: %d\n",
619 info->line, state->count);
620 state->count = 0;
621 }
622 if (state->count) {
623 local_irq_restore(flags);
624 return;
625 }
626 info->flags |= ASYNC_CLOSING;
627 local_irq_restore(flags);
628
629 /*
630 * Now we wait for the transmit buffer to clear; and we notify
631 * the line discipline to only process XON/XOFF characters.
632 */
633 shutdown(info);
634 if (tty->driver->flush_buffer) tty->driver->flush_buffer(tty);
635 if (tty->ldisc.flush_buffer) tty->ldisc.flush_buffer(tty);
636 info->event = 0;
637 info->tty = 0;
638 if (info->blocked_open) {
639 if (info->close_delay) {
640 current->state = TASK_INTERRUPTIBLE;
641 schedule_timeout(info->close_delay);
642 }
643 wake_up_interruptible(&info->open_wait);
644 }
645 info->flags &= ~(ASYNC_NORMAL_ACTIVE|ASYNC_CLOSING);
646 wake_up_interruptible(&info->close_wait);
647}
648
649/*
650 * rs_wait_until_sent() --- wait until the transmitter is empty
651 */
652static void rs_wait_until_sent(struct tty_struct *tty, int timeout)
653{
654}
655
656
657/*
658 * rs_hangup() --- called by tty_hangup() when a hangup is signaled.
659 */
660static void rs_hangup(struct tty_struct *tty)
661{
662 struct async_struct * info = (struct async_struct *)tty->driver_data;
663 struct serial_state *state = info->state;
664
665#ifdef SIMSERIAL_DEBUG
666 printk("rs_hangup: called\n");
667#endif
668
669 state = info->state;
670
671 rs_flush_buffer(tty);
672 if (info->flags & ASYNC_CLOSING)
673 return;
674 shutdown(info);
675
676 info->event = 0;
677 state->count = 0;
678 info->flags &= ~ASYNC_NORMAL_ACTIVE;
679 info->tty = 0;
680 wake_up_interruptible(&info->open_wait);
681}
682
683
684static int get_async_struct(int line, struct async_struct **ret_info)
685{
686 struct async_struct *info;
687 struct serial_state *sstate;
688
689 sstate = rs_table + line;
690 sstate->count++;
691 if (sstate->info) {
692 *ret_info = sstate->info;
693 return 0;
694 }
695 info = kmalloc(sizeof(struct async_struct), GFP_KERNEL);
696 if (!info) {
697 sstate->count--;
698 return -ENOMEM;
699 }
700 memset(info, 0, sizeof(struct async_struct));
701 init_waitqueue_head(&info->open_wait);
702 init_waitqueue_head(&info->close_wait);
703 init_waitqueue_head(&info->delta_msr_wait);
704 info->magic = SERIAL_MAGIC;
705 info->port = sstate->port;
706 info->flags = sstate->flags;
707 info->xmit_fifo_size = sstate->xmit_fifo_size;
708 info->line = line;
709 INIT_WORK(&info->work, do_softint, info);
710 info->state = sstate;
711 if (sstate->info) {
712 kfree(info);
713 *ret_info = sstate->info;
714 return 0;
715 }
716 *ret_info = sstate->info = info;
717 return 0;
718}
719
720static int
721startup(struct async_struct *info)
722{
723 unsigned long flags;
724 int retval=0;
725 irqreturn_t (*handler)(int, void *, struct pt_regs *);
726 struct serial_state *state= info->state;
727 unsigned long page;
728
729 page = get_zeroed_page(GFP_KERNEL);
730 if (!page)
731 return -ENOMEM;
732
733 local_irq_save(flags);
734
735 if (info->flags & ASYNC_INITIALIZED) {
736 free_page(page);
737 goto errout;
738 }
739
740 if (!state->port || !state->type) {
741 if (info->tty) set_bit(TTY_IO_ERROR, &info->tty->flags);
742 free_page(page);
743 goto errout;
744 }
745 if (info->xmit.buf)
746 free_page(page);
747 else
748 info->xmit.buf = (unsigned char *) page;
749
750#ifdef SIMSERIAL_DEBUG
751 printk("startup: ttys%d (irq %d)...", info->line, state->irq);
752#endif
753
754 /*
755 * Allocate the IRQ if necessary
756 */
757 if (state->irq && (!IRQ_ports[state->irq] ||
758 !IRQ_ports[state->irq]->next_port)) {
759 if (IRQ_ports[state->irq]) {
760 retval = -EBUSY;
761 goto errout;
762 } else
763 handler = rs_interrupt_single;
764
765 retval = request_irq(state->irq, handler, IRQ_T(info), "simserial", NULL);
766 if (retval) {
767 if (capable(CAP_SYS_ADMIN)) {
768 if (info->tty)
769 set_bit(TTY_IO_ERROR,
770 &info->tty->flags);
771 retval = 0;
772 }
773 goto errout;
774 }
775 }
776
777 /*
778 * Insert serial port into IRQ chain.
779 */
780 info->prev_port = 0;
781 info->next_port = IRQ_ports[state->irq];
782 if (info->next_port)
783 info->next_port->prev_port = info;
784 IRQ_ports[state->irq] = info;
785
786 if (info->tty) clear_bit(TTY_IO_ERROR, &info->tty->flags);
787
788 info->xmit.head = info->xmit.tail = 0;
789
790#if 0
791 /*
792 * Set up serial timers...
793 */
794 timer_table[RS_TIMER].expires = jiffies + 2*HZ/100;
795 timer_active |= 1 << RS_TIMER;
796#endif
797
798 /*
799 * Set up the tty->alt_speed kludge
800 */
801 if (info->tty) {
802 if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_HI)
803 info->tty->alt_speed = 57600;
804 if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_VHI)
805 info->tty->alt_speed = 115200;
806 if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_SHI)
807 info->tty->alt_speed = 230400;
808 if ((info->flags & ASYNC_SPD_MASK) == ASYNC_SPD_WARP)
809 info->tty->alt_speed = 460800;
810 }
811
812 info->flags |= ASYNC_INITIALIZED;
813 local_irq_restore(flags);
814 return 0;
815
816errout:
817 local_irq_restore(flags);
818 return retval;
819}
820
821
822/*
823 * This routine is called whenever a serial port is opened. It
824 * enables interrupts for a serial port, linking in its async structure into
825 * the IRQ chain. It also performs the serial-specific
826 * initialization for the tty structure.
827 */
828static int rs_open(struct tty_struct *tty, struct file * filp)
829{
830 struct async_struct *info;
831 int retval, line;
832 unsigned long page;
833
834 line = tty->index;
835 if ((line < 0) || (line >= NR_PORTS))
836 return -ENODEV;
837 retval = get_async_struct(line, &info);
838 if (retval)
839 return retval;
840 tty->driver_data = info;
841 info->tty = tty;
842
843#ifdef SIMSERIAL_DEBUG
844 printk("rs_open %s, count = %d\n", tty->name, info->state->count);
845#endif
846 info->tty->low_latency = (info->flags & ASYNC_LOW_LATENCY) ? 1 : 0;
847
848 if (!tmp_buf) {
849 page = get_zeroed_page(GFP_KERNEL);
850 if (!page)
851 return -ENOMEM;
852 if (tmp_buf)
853 free_page(page);
854 else
855 tmp_buf = (unsigned char *) page;
856 }
857
858 /*
859 * If the port is the middle of closing, bail out now
860 */
861 if (tty_hung_up_p(filp) ||
862 (info->flags & ASYNC_CLOSING)) {
863 if (info->flags & ASYNC_CLOSING)
864 interruptible_sleep_on(&info->close_wait);
865#ifdef SERIAL_DO_RESTART
866 return ((info->flags & ASYNC_HUP_NOTIFY) ?
867 -EAGAIN : -ERESTARTSYS);
868#else
869 return -EAGAIN;
870#endif
871 }
872
873 /*
874 * Start up serial port
875 */
876 retval = startup(info);
877 if (retval) {
878 return retval;
879 }
880
881 /*
882 * figure out which console to use (should be one already)
883 */
884 console = console_drivers;
885 while (console) {
886 if ((console->flags & CON_ENABLED) && console->write) break;
887 console = console->next;
888 }
889
890#ifdef SIMSERIAL_DEBUG
891 printk("rs_open ttys%d successful\n", info->line);
892#endif
893 return 0;
894}
895
896/*
897 * /proc fs routines....
898 */
899
900static inline int line_info(char *buf, struct serial_state *state)
901{
902 return sprintf(buf, "%d: uart:%s port:%lX irq:%d\n",
903 state->line, uart_config[state->type].name,
904 state->port, state->irq);
905}
906
907static int rs_read_proc(char *page, char **start, off_t off, int count,
908 int *eof, void *data)
909{
910 int i, len = 0, l;
911 off_t begin = 0;
912
913 len += sprintf(page, "simserinfo:1.0 driver:%s\n", serial_version);
914 for (i = 0; i < NR_PORTS && len < 4000; i++) {
915 l = line_info(page + len, &rs_table[i]);
916 len += l;
917 if (len+begin > off+count)
918 goto done;
919 if (len+begin < off) {
920 begin += len;
921 len = 0;
922 }
923 }
924 *eof = 1;
925done:
926 if (off >= len+begin)
927 return 0;
928 *start = page + (begin-off);
929 return ((count < begin+len-off) ? count : begin+len-off);
930}
931
932/*
933 * ---------------------------------------------------------------------
934 * rs_init() and friends
935 *
936 * rs_init() is called at boot-time to initialize the serial driver.
937 * ---------------------------------------------------------------------
938 */
939
940/*
941 * This routine prints out the appropriate serial driver version
942 * number, and identifies which options were configured into this
943 * driver.
944 */
945static inline void show_serial_version(void)
946{
947 printk(KERN_INFO "%s version %s with", serial_name, serial_version);
948 printk(KERN_INFO " no serial options enabled\n");
949}
950
951static struct tty_operations hp_ops = {
952 .open = rs_open,
953 .close = rs_close,
954 .write = rs_write,
955 .put_char = rs_put_char,
956 .flush_chars = rs_flush_chars,
957 .write_room = rs_write_room,
958 .chars_in_buffer = rs_chars_in_buffer,
959 .flush_buffer = rs_flush_buffer,
960 .ioctl = rs_ioctl,
961 .throttle = rs_throttle,
962 .unthrottle = rs_unthrottle,
963 .send_xchar = rs_send_xchar,
964 .set_termios = rs_set_termios,
965 .stop = rs_stop,
966 .start = rs_start,
967 .hangup = rs_hangup,
968 .break_ctl = rs_break,
969 .wait_until_sent = rs_wait_until_sent,
970 .read_proc = rs_read_proc,
971};
972
973/*
974 * The serial driver boot-time initialization code!
975 */
976static int __init
977simrs_init (void)
978{
979 int i;
980 struct serial_state *state;
981
982 if (!ia64_platform_is("hpsim"))
983 return -ENODEV;
984
985 hp_simserial_driver = alloc_tty_driver(1);
986 if (!hp_simserial_driver)
987 return -ENOMEM;
988
989 show_serial_version();
990
991 /* Initialize the tty_driver structure */
992
993 hp_simserial_driver->owner = THIS_MODULE;
994 hp_simserial_driver->driver_name = "simserial";
995 hp_simserial_driver->name = "ttyS";
996 hp_simserial_driver->major = TTY_MAJOR;
997 hp_simserial_driver->minor_start = 64;
998 hp_simserial_driver->type = TTY_DRIVER_TYPE_SERIAL;
999 hp_simserial_driver->subtype = SERIAL_TYPE_NORMAL;
1000 hp_simserial_driver->init_termios = tty_std_termios;
1001 hp_simserial_driver->init_termios.c_cflag =
1002 B9600 | CS8 | CREAD | HUPCL | CLOCAL;
1003 hp_simserial_driver->flags = TTY_DRIVER_REAL_RAW;
1004 tty_set_operations(hp_simserial_driver, &hp_ops);
1005
1006 /*
1007 * Let's have a little bit of fun !
1008 */
1009 for (i = 0, state = rs_table; i < NR_PORTS; i++,state++) {
1010
1011 if (state->type == PORT_UNKNOWN) continue;
1012
1013 if (!state->irq) {
1014 state->irq = assign_irq_vector(AUTO_ASSIGN);
1015 ia64_ssc_connect_irq(KEYBOARD_INTR, state->irq);
1016 }
1017
1018 printk(KERN_INFO "ttyS%d at 0x%04lx (irq = %d) is a %s\n",
1019 state->line,
1020 state->port, state->irq,
1021 uart_config[state->type].name);
1022 }
1023
1024 if (tty_register_driver(hp_simserial_driver))
1025 panic("Couldn't register simserial driver\n");
1026
1027 return 0;
1028}
1029
1030#ifndef MODULE
1031__initcall(simrs_init);
1032#endif
diff --git a/arch/ia64/hp/zx1/Makefile b/arch/ia64/hp/zx1/Makefile
new file mode 100644
index 000000000000..61e878729d1e
--- /dev/null
+++ b/arch/ia64/hp/zx1/Makefile
@@ -0,0 +1,8 @@
1#
2# ia64/hp/zx1/Makefile
3#
4# Copyright (C) 2002 Hewlett Packard
5# Copyright (C) Alex Williamson (alex_williamson@hp.com)
6#
7
8obj-$(CONFIG_IA64_GENERIC) += hpzx1_machvec.o hpzx1_swiotlb_machvec.o
diff --git a/arch/ia64/hp/zx1/hpzx1_machvec.c b/arch/ia64/hp/zx1/hpzx1_machvec.c
new file mode 100644
index 000000000000..32518b0f923e
--- /dev/null
+++ b/arch/ia64/hp/zx1/hpzx1_machvec.c
@@ -0,0 +1,3 @@
1#define MACHVEC_PLATFORM_NAME hpzx1
2#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpzx1.h>
3#include <asm/machvec_init.h>
diff --git a/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c
new file mode 100644
index 000000000000..4392a96b3c58
--- /dev/null
+++ b/arch/ia64/hp/zx1/hpzx1_swiotlb_machvec.c
@@ -0,0 +1,3 @@
1#define MACHVEC_PLATFORM_NAME hpzx1_swiotlb
2#define MACHVEC_PLATFORM_HEADER <asm/machvec_hpzx1_swiotlb.h>
3#include <asm/machvec_init.h>
diff --git a/arch/ia64/ia32/Makefile b/arch/ia64/ia32/Makefile
new file mode 100644
index 000000000000..2ed90da81166
--- /dev/null
+++ b/arch/ia64/ia32/Makefile
@@ -0,0 +1,12 @@
1#
2# Makefile for the ia32 kernel emulation subsystem.
3#
4
5obj-y := ia32_entry.o sys_ia32.o ia32_ioctl.o ia32_signal.o \
6 ia32_support.o ia32_traps.o binfmt_elf32.o ia32_ldt.o
7
8CFLAGS_ia32_ioctl.o += -Ifs/
9
10# Don't let GCC uses f16-f31 so that save_ia32_fpstate_live() and
11# restore_ia32_fpstate_live() can be sure the live register contain user-level state.
12CFLAGS_ia32_signal.o += -mfixed-range=f16-f31
diff --git a/arch/ia64/ia32/binfmt_elf32.c b/arch/ia64/ia32/binfmt_elf32.c
new file mode 100644
index 000000000000..31de70b7c67f
--- /dev/null
+++ b/arch/ia64/ia32/binfmt_elf32.c
@@ -0,0 +1,294 @@
1/*
2 * IA-32 ELF support.
3 *
4 * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
5 * Copyright (C) 2001 Hewlett-Packard Co
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 *
8 * 06/16/00 A. Mallick initialize csd/ssd/tssd/cflg for ia32_load_state
9 * 04/13/01 D. Mosberger dropped saving tssd in ar.k1---it's not needed
10 * 09/14/01 D. Mosberger fixed memory management for gdt/tss page
11 */
12#include <linux/config.h>
13
14#include <linux/types.h>
15#include <linux/mm.h>
16#include <linux/security.h>
17
18#include <asm/param.h>
19#include <asm/signal.h>
20
21#include "ia32priv.h"
22#include "elfcore32.h"
23
24/* Override some function names */
25#undef start_thread
26#define start_thread ia32_start_thread
27#define elf_format elf32_format
28#define init_elf_binfmt init_elf32_binfmt
29#define exit_elf_binfmt exit_elf32_binfmt
30
31#undef CLOCKS_PER_SEC
32#define CLOCKS_PER_SEC IA32_CLOCKS_PER_SEC
33
34extern void ia64_elf32_init (struct pt_regs *regs);
35
36static void elf32_set_personality (void);
37
38#define setup_arg_pages(bprm,tos,exec) ia32_setup_arg_pages(bprm,exec)
39#define elf_map elf32_map
40
41#undef SET_PERSONALITY
42#define SET_PERSONALITY(ex, ibcs2) elf32_set_personality()
43
44#define elf_read_implies_exec(ex, have_pt_gnu_stack) (!(have_pt_gnu_stack))
45
46/* Ugly but avoids duplication */
47#include "../../../fs/binfmt_elf.c"
48
49extern struct page *ia32_shared_page[];
50extern unsigned long *ia32_gdt;
51extern struct page *ia32_gate_page;
52
53struct page *
54ia32_install_shared_page (struct vm_area_struct *vma, unsigned long address, int *type)
55{
56 struct page *pg = ia32_shared_page[smp_processor_id()];
57 get_page(pg);
58 if (type)
59 *type = VM_FAULT_MINOR;
60 return pg;
61}
62
63struct page *
64ia32_install_gate_page (struct vm_area_struct *vma, unsigned long address, int *type)
65{
66 struct page *pg = ia32_gate_page;
67 get_page(pg);
68 if (type)
69 *type = VM_FAULT_MINOR;
70 return pg;
71}
72
73
74static struct vm_operations_struct ia32_shared_page_vm_ops = {
75 .nopage = ia32_install_shared_page
76};
77
78static struct vm_operations_struct ia32_gate_page_vm_ops = {
79 .nopage = ia32_install_gate_page
80};
81
82void
83ia64_elf32_init (struct pt_regs *regs)
84{
85 struct vm_area_struct *vma;
86
87 /*
88 * Map GDT below 4GB, where the processor can find it. We need to map
89 * it with privilege level 3 because the IVE uses non-privileged accesses to these
90 * tables. IA-32 segmentation is used to protect against IA-32 accesses to them.
91 */
92 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
93 if (vma) {
94 memset(vma, 0, sizeof(*vma));
95 vma->vm_mm = current->mm;
96 vma->vm_start = IA32_GDT_OFFSET;
97 vma->vm_end = vma->vm_start + PAGE_SIZE;
98 vma->vm_page_prot = PAGE_SHARED;
99 vma->vm_flags = VM_READ|VM_MAYREAD|VM_RESERVED;
100 vma->vm_ops = &ia32_shared_page_vm_ops;
101 down_write(&current->mm->mmap_sem);
102 {
103 if (insert_vm_struct(current->mm, vma)) {
104 kmem_cache_free(vm_area_cachep, vma);
105 up_write(&current->mm->mmap_sem);
106 BUG();
107 }
108 }
109 up_write(&current->mm->mmap_sem);
110 }
111
112 /*
113 * When user stack is not executable, push sigreturn code to stack makes
114 * segmentation fault raised when returning to kernel. So now sigreturn
115 * code is locked in specific gate page, which is pointed by pretcode
116 * when setup_frame_ia32
117 */
118 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
119 if (vma) {
120 memset(vma, 0, sizeof(*vma));
121 vma->vm_mm = current->mm;
122 vma->vm_start = IA32_GATE_OFFSET;
123 vma->vm_end = vma->vm_start + PAGE_SIZE;
124 vma->vm_page_prot = PAGE_COPY_EXEC;
125 vma->vm_flags = VM_READ | VM_MAYREAD | VM_EXEC
126 | VM_MAYEXEC | VM_RESERVED;
127 vma->vm_ops = &ia32_gate_page_vm_ops;
128 down_write(&current->mm->mmap_sem);
129 {
130 if (insert_vm_struct(current->mm, vma)) {
131 kmem_cache_free(vm_area_cachep, vma);
132 up_write(&current->mm->mmap_sem);
133 BUG();
134 }
135 }
136 up_write(&current->mm->mmap_sem);
137 }
138
139 /*
140 * Install LDT as anonymous memory. This gives us all-zero segment descriptors
141 * until a task modifies them via modify_ldt().
142 */
143 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
144 if (vma) {
145 memset(vma, 0, sizeof(*vma));
146 vma->vm_mm = current->mm;
147 vma->vm_start = IA32_LDT_OFFSET;
148 vma->vm_end = vma->vm_start + PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
149 vma->vm_page_prot = PAGE_SHARED;
150 vma->vm_flags = VM_READ|VM_WRITE|VM_MAYREAD|VM_MAYWRITE;
151 down_write(&current->mm->mmap_sem);
152 {
153 if (insert_vm_struct(current->mm, vma)) {
154 kmem_cache_free(vm_area_cachep, vma);
155 up_write(&current->mm->mmap_sem);
156 BUG();
157 }
158 }
159 up_write(&current->mm->mmap_sem);
160 }
161
162 ia64_psr(regs)->ac = 0; /* turn off alignment checking */
163 regs->loadrs = 0;
164 /*
165 * According to the ABI %edx points to an `atexit' handler. Since we don't have
166 * one we'll set it to 0 and initialize all the other registers just to make
167 * things more deterministic, ala the i386 implementation.
168 */
169 regs->r8 = 0; /* %eax */
170 regs->r11 = 0; /* %ebx */
171 regs->r9 = 0; /* %ecx */
172 regs->r10 = 0; /* %edx */
173 regs->r13 = 0; /* %ebp */
174 regs->r14 = 0; /* %esi */
175 regs->r15 = 0; /* %edi */
176
177 current->thread.eflag = IA32_EFLAG;
178 current->thread.fsr = IA32_FSR_DEFAULT;
179 current->thread.fcr = IA32_FCR_DEFAULT;
180 current->thread.fir = 0;
181 current->thread.fdr = 0;
182
183 /*
184 * Setup GDTD. Note: GDTD is the descrambled version of the pseudo-descriptor
185 * format defined by Figure 3-11 "Pseudo-Descriptor Format" in the IA-32
186 * architecture manual. Also note that the only fields that are not ignored are
187 * `base', `limit', 'G', `P' (must be 1) and `S' (must be 0).
188 */
189 regs->r31 = IA32_SEG_UNSCRAMBLE(IA32_SEG_DESCRIPTOR(IA32_GDT_OFFSET, IA32_PAGE_SIZE - 1,
190 0, 0, 0, 1, 0, 0, 0));
191 /* Setup the segment selectors */
192 regs->r16 = (__USER_DS << 16) | __USER_DS; /* ES == DS, GS, FS are zero */
193 regs->r17 = (__USER_DS << 16) | __USER_CS; /* SS, CS; ia32_load_state() sets TSS and LDT */
194
195 ia32_load_segment_descriptors(current);
196 ia32_load_state(current);
197}
198
199int
200ia32_setup_arg_pages (struct linux_binprm *bprm, int executable_stack)
201{
202 unsigned long stack_base;
203 struct vm_area_struct *mpnt;
204 struct mm_struct *mm = current->mm;
205 int i, ret;
206
207 stack_base = IA32_STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
208 mm->arg_start = bprm->p + stack_base;
209
210 bprm->p += stack_base;
211 if (bprm->loader)
212 bprm->loader += stack_base;
213 bprm->exec += stack_base;
214
215 mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
216 if (!mpnt)
217 return -ENOMEM;
218
219 if (security_vm_enough_memory((IA32_STACK_TOP - (PAGE_MASK & (unsigned long) bprm->p))
220 >> PAGE_SHIFT)) {
221 kmem_cache_free(vm_area_cachep, mpnt);
222 return -ENOMEM;
223 }
224
225 memset(mpnt, 0, sizeof(*mpnt));
226
227 down_write(&current->mm->mmap_sem);
228 {
229 mpnt->vm_mm = current->mm;
230 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
231 mpnt->vm_end = IA32_STACK_TOP;
232 if (executable_stack == EXSTACK_ENABLE_X)
233 mpnt->vm_flags = VM_STACK_FLAGS | VM_EXEC;
234 else if (executable_stack == EXSTACK_DISABLE_X)
235 mpnt->vm_flags = VM_STACK_FLAGS & ~VM_EXEC;
236 else
237 mpnt->vm_flags = VM_STACK_FLAGS;
238 mpnt->vm_page_prot = (mpnt->vm_flags & VM_EXEC)?
239 PAGE_COPY_EXEC: PAGE_COPY;
240 if ((ret = insert_vm_struct(current->mm, mpnt))) {
241 up_write(&current->mm->mmap_sem);
242 kmem_cache_free(vm_area_cachep, mpnt);
243 return ret;
244 }
245 current->mm->stack_vm = current->mm->total_vm = vma_pages(mpnt);
246 }
247
248 for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
249 struct page *page = bprm->page[i];
250 if (page) {
251 bprm->page[i] = NULL;
252 install_arg_page(mpnt, page, stack_base);
253 }
254 stack_base += PAGE_SIZE;
255 }
256 up_write(&current->mm->mmap_sem);
257
258 /* Can't do it in ia64_elf32_init(). Needs to be done before calls to
259 elf32_map() */
260 current->thread.ppl = ia32_init_pp_list();
261
262 return 0;
263}
264
265static void
266elf32_set_personality (void)
267{
268 set_personality(PER_LINUX32);
269 current->thread.map_base = IA32_PAGE_OFFSET/3;
270 current->thread.task_size = IA32_PAGE_OFFSET; /* use what Linux/x86 uses... */
271 set_fs(USER_DS); /* set addr limit for new TASK_SIZE */
272}
273
274static unsigned long
275elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type)
276{
277 unsigned long pgoff = (eppnt->p_vaddr) & ~IA32_PAGE_MASK;
278
279 return ia32_do_mmap(filep, (addr & IA32_PAGE_MASK), eppnt->p_filesz + pgoff, prot, type,
280 eppnt->p_offset - pgoff);
281}
282
283#define cpu_uses_ia32el() (local_cpu_data->family > 0x1f)
284
285static int __init check_elf32_binfmt(void)
286{
287 if (cpu_uses_ia32el()) {
288 printk("Please use IA-32 EL for executing IA-32 binaries\n");
289 return unregister_binfmt(&elf_format);
290 }
291 return 0;
292}
293
294module_init(check_elf32_binfmt)
diff --git a/arch/ia64/ia32/elfcore32.h b/arch/ia64/ia32/elfcore32.h
new file mode 100644
index 000000000000..b73b8b6b10c1
--- /dev/null
+++ b/arch/ia64/ia32/elfcore32.h
@@ -0,0 +1,138 @@
1/*
2 * IA-32 ELF core dump support.
3 *
4 * Copyright (C) 2003 Arun Sharma <arun.sharma@intel.com>
5 *
6 * Derived from the x86_64 version
7 */
8#ifndef _ELFCORE32_H_
9#define _ELFCORE32_H_
10
11#include <asm/intrinsics.h>
12#include <asm/uaccess.h>
13
14#define USE_ELF_CORE_DUMP 1
15
16/* Override elfcore.h */
17#define _LINUX_ELFCORE_H 1
18typedef unsigned int elf_greg_t;
19
20#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
21typedef elf_greg_t elf_gregset_t[ELF_NGREG];
22
23typedef struct ia32_user_i387_struct elf_fpregset_t;
24typedef struct ia32_user_fxsr_struct elf_fpxregset_t;
25
26struct elf_siginfo
27{
28 int si_signo; /* signal number */
29 int si_code; /* extra code */
30 int si_errno; /* errno */
31};
32
33#define jiffies_to_timeval(a,b) do { (b)->tv_usec = 0; (b)->tv_sec = (a)/HZ; }while(0)
34
35struct elf_prstatus
36{
37 struct elf_siginfo pr_info; /* Info associated with signal */
38 short pr_cursig; /* Current signal */
39 unsigned int pr_sigpend; /* Set of pending signals */
40 unsigned int pr_sighold; /* Set of held signals */
41 pid_t pr_pid;
42 pid_t pr_ppid;
43 pid_t pr_pgrp;
44 pid_t pr_sid;
45 struct compat_timeval pr_utime; /* User time */
46 struct compat_timeval pr_stime; /* System time */
47 struct compat_timeval pr_cutime; /* Cumulative user time */
48 struct compat_timeval pr_cstime; /* Cumulative system time */
49 elf_gregset_t pr_reg; /* GP registers */
50 int pr_fpvalid; /* True if math co-processor being used. */
51};
52
53#define ELF_PRARGSZ (80) /* Number of chars for args */
54
55struct elf_prpsinfo
56{
57 char pr_state; /* numeric process state */
58 char pr_sname; /* char for pr_state */
59 char pr_zomb; /* zombie */
60 char pr_nice; /* nice val */
61 unsigned int pr_flag; /* flags */
62 __u16 pr_uid;
63 __u16 pr_gid;
64 pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
65 /* Lots missing */
66 char pr_fname[16]; /* filename of executable */
67 char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
68};
69
70#define ELF_CORE_COPY_REGS(pr_reg, regs) \
71 pr_reg[0] = regs->r11; \
72 pr_reg[1] = regs->r9; \
73 pr_reg[2] = regs->r10; \
74 pr_reg[3] = regs->r14; \
75 pr_reg[4] = regs->r15; \
76 pr_reg[5] = regs->r13; \
77 pr_reg[6] = regs->r8; \
78 pr_reg[7] = regs->r16 & 0xffff; \
79 pr_reg[8] = (regs->r16 >> 16) & 0xffff; \
80 pr_reg[9] = (regs->r16 >> 32) & 0xffff; \
81 pr_reg[10] = (regs->r16 >> 48) & 0xffff; \
82 pr_reg[11] = regs->r1; \
83 pr_reg[12] = regs->cr_iip; \
84 pr_reg[13] = regs->r17 & 0xffff; \
85 pr_reg[14] = ia64_getreg(_IA64_REG_AR_EFLAG); \
86 pr_reg[15] = regs->r12; \
87 pr_reg[16] = (regs->r17 >> 16) & 0xffff;
88
89static inline void elf_core_copy_regs(elf_gregset_t *elfregs,
90 struct pt_regs *regs)
91{
92 ELF_CORE_COPY_REGS((*elfregs), regs)
93}
94
95static inline int elf_core_copy_task_regs(struct task_struct *t,
96 elf_gregset_t* elfregs)
97{
98 struct pt_regs *pp = ia64_task_regs(t);
99 ELF_CORE_COPY_REGS((*elfregs), pp);
100 return 1;
101}
102
103static inline int
104elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpregset_t *fpu)
105{
106 struct ia32_user_i387_struct *fpstate = (void*)fpu;
107 mm_segment_t old_fs;
108
109 if (!tsk_used_math(tsk))
110 return 0;
111
112 old_fs = get_fs();
113 set_fs(KERNEL_DS);
114 save_ia32_fpstate(tsk, (struct ia32_user_i387_struct __user *) fpstate);
115 set_fs(old_fs);
116
117 return 1;
118}
119
120#define ELF_CORE_COPY_XFPREGS 1
121static inline int
122elf_core_copy_task_xfpregs(struct task_struct *tsk, elf_fpxregset_t *xfpu)
123{
124 struct ia32_user_fxsr_struct *fpxstate = (void*) xfpu;
125 mm_segment_t old_fs;
126
127 if (!tsk_used_math(tsk))
128 return 0;
129
130 old_fs = get_fs();
131 set_fs(KERNEL_DS);
132 save_ia32_fpxstate(tsk, (struct ia32_user_fxsr_struct __user *) fpxstate);
133 set_fs(old_fs);
134
135 return 1;
136}
137
138#endif /* _ELFCORE32_H_ */
diff --git a/arch/ia64/ia32/ia32_entry.S b/arch/ia64/ia32/ia32_entry.S
new file mode 100644
index 000000000000..829a6d80711c
--- /dev/null
+++ b/arch/ia64/ia32/ia32_entry.S
@@ -0,0 +1,500 @@
1#include <asm/asmmacro.h>
2#include <asm/ia32.h>
3#include <asm/offsets.h>
4#include <asm/signal.h>
5#include <asm/thread_info.h>
6
7#include "../kernel/minstate.h"
8
9 /*
10 * execve() is special because in case of success, we need to
11 * setup a null register window frame (in case an IA-32 process
12 * is exec'ing an IA-64 program).
13 */
14ENTRY(ia32_execve)
15 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(3)
16 alloc loc1=ar.pfs,3,2,4,0
17 mov loc0=rp
18 .body
19 zxt4 out0=in0 // filename
20 ;; // stop bit between alloc and call
21 zxt4 out1=in1 // argv
22 zxt4 out2=in2 // envp
23 add out3=16,sp // regs
24 br.call.sptk.few rp=sys32_execve
251: cmp.ge p6,p0=r8,r0
26 mov ar.pfs=loc1 // restore ar.pfs
27 ;;
28(p6) mov ar.pfs=r0 // clear ar.pfs in case of success
29 sxt4 r8=r8 // return 64-bit result
30 mov rp=loc0
31 br.ret.sptk.few rp
32END(ia32_execve)
33
34ENTRY(ia32_clone)
35 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
36 alloc r16=ar.pfs,5,2,6,0
37 DO_SAVE_SWITCH_STACK
38 mov loc0=rp
39 mov loc1=r16 // save ar.pfs across do_fork
40 .body
41 zxt4 out1=in1 // newsp
42 mov out3=16 // stacksize (compensates for 16-byte scratch area)
43 adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
44 mov out0=in0 // out0 = clone_flags
45 zxt4 out4=in2 // out4 = parent_tidptr
46 zxt4 out5=in4 // out5 = child_tidptr
47 br.call.sptk.many rp=do_fork
48.ret0: .restore sp
49 adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
50 mov ar.pfs=loc1
51 mov rp=loc0
52 br.ret.sptk.many rp
53END(ia32_clone)
54
55ENTRY(sys32_rt_sigsuspend)
56 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
57 alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs
58 mov loc0=rp
59 mov out0=in0 // mask
60 mov out1=in1 // sigsetsize
61 mov out2=sp // out2 = &sigscratch
62 .fframe 16
63 adds sp=-16,sp // allocate dummy "sigscratch"
64 ;;
65 .body
66 br.call.sptk.many rp=ia32_rt_sigsuspend
671: .restore sp
68 adds sp=16,sp
69 mov rp=loc0
70 mov ar.pfs=loc1
71 br.ret.sptk.many rp
72END(sys32_rt_sigsuspend)
73
74ENTRY(sys32_sigsuspend)
75 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
76 alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs
77 mov loc0=rp
78 mov out0=in2 // mask (first two args are ignored)
79 ;;
80 mov out1=sp // out1 = &sigscratch
81 .fframe 16
82 adds sp=-16,sp // allocate dummy "sigscratch"
83 .body
84 br.call.sptk.many rp=ia32_sigsuspend
851: .restore sp
86 adds sp=16,sp
87 mov rp=loc0
88 mov ar.pfs=loc1
89 br.ret.sptk.many rp
90END(sys32_sigsuspend)
91
92GLOBAL_ENTRY(ia32_ret_from_clone)
93 PT_REGS_UNWIND_INFO(0)
94{ /*
95 * Some versions of gas generate bad unwind info if the first instruction of a
96 * procedure doesn't go into the first slot of a bundle. This is a workaround.
97 */
98 nop.m 0
99 nop.i 0
100 /*
101 * We need to call schedule_tail() to complete the scheduling process.
102 * Called by ia64_switch_to after do_fork()->copy_thread(). r8 contains the
103 * address of the previously executing task.
104 */
105 br.call.sptk.many rp=ia64_invoke_schedule_tail
106}
107.ret1:
108 adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
109 ;;
110 ld4 r2=[r2]
111 ;;
112 mov r8=0
113 and r2=_TIF_SYSCALL_TRACEAUDIT,r2
114 ;;
115 cmp.ne p6,p0=r2,r0
116(p6) br.cond.spnt .ia32_strace_check_retval
117 ;; // prevent RAW on r8
118END(ia32_ret_from_clone)
119 // fall thrugh
120GLOBAL_ENTRY(ia32_ret_from_syscall)
121 PT_REGS_UNWIND_INFO(0)
122
123 cmp.ge p6,p7=r8,r0 // syscall executed successfully?
124 adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
125 ;;
126 alloc r3=ar.pfs,0,0,0,0 // drop the syscall argument frame
127 st8 [r2]=r8 // store return value in slot for r8
128 br.cond.sptk.many ia64_leave_kernel
129END(ia32_ret_from_syscall)
130
131 //
132 // Invoke a system call, but do some tracing before and after the call.
133 // We MUST preserve the current register frame throughout this routine
134 // because some system calls (such as ia64_execve) directly
135 // manipulate ar.pfs.
136 //
137 // Input:
138 // r8 = syscall number
139 // b6 = syscall entry point
140 //
141GLOBAL_ENTRY(ia32_trace_syscall)
142 PT_REGS_UNWIND_INFO(0)
143 mov r3=-38
144 adds r2=IA64_PT_REGS_R8_OFFSET+16,sp
145 ;;
146 st8 [r2]=r3 // initialize return code to -ENOSYS
147 br.call.sptk.few rp=syscall_trace_enter // give parent a chance to catch syscall args
148.ret2: // Need to reload arguments (they may be changed by the tracing process)
149 adds r2=IA64_PT_REGS_R1_OFFSET+16,sp // r2 = &pt_regs.r1
150 adds r3=IA64_PT_REGS_R13_OFFSET+16,sp // r3 = &pt_regs.r13
151 mov r15=IA32_NR_syscalls
152 ;;
153 ld4 r8=[r2],IA64_PT_REGS_R9_OFFSET-IA64_PT_REGS_R1_OFFSET
154 movl r16=ia32_syscall_table
155 ;;
156 ld4 r33=[r2],8 // r9 == ecx
157 ld4 r37=[r3],16 // r13 == ebp
158 cmp.ltu.unc p6,p7=r8,r15
159 ;;
160 ld4 r34=[r2],8 // r10 == edx
161 ld4 r36=[r3],8 // r15 == edi
162(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
163 ;;
164 ld8 r16=[r16]
165 ;;
166 ld4 r32=[r2],8 // r11 == ebx
167 mov b6=r16
168 ld4 r35=[r3],8 // r14 == esi
169 br.call.sptk.few rp=b6 // do the syscall
170.ia32_strace_check_retval:
171 cmp.lt p6,p0=r8,r0 // syscall failed?
172 adds r2=IA64_PT_REGS_R8_OFFSET+16,sp // r2 = &pt_regs.r8
173 ;;
174 st8.spill [r2]=r8 // store return value in slot for r8
175 br.call.sptk.few rp=syscall_trace_leave // give parent a chance to catch return value
176.ret4: alloc r2=ar.pfs,0,0,0,0 // drop the syscall argument frame
177 br.cond.sptk.many ia64_leave_kernel
178END(ia32_trace_syscall)
179
180GLOBAL_ENTRY(sys32_vfork)
181 alloc r16=ar.pfs,2,2,4,0;;
182 mov out0=IA64_CLONE_VFORK|IA64_CLONE_VM|SIGCHLD // out0 = clone_flags
183 br.cond.sptk.few .fork1 // do the work
184END(sys32_vfork)
185
186GLOBAL_ENTRY(sys32_fork)
187 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
188 alloc r16=ar.pfs,2,2,4,0
189 mov out0=SIGCHLD // out0 = clone_flags
190 ;;
191.fork1:
192 mov loc0=rp
193 mov loc1=r16 // save ar.pfs across do_fork
194 DO_SAVE_SWITCH_STACK
195
196 .body
197
198 mov out1=0
199 mov out3=0
200 adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
201 br.call.sptk.few rp=do_fork
202.ret5: .restore sp
203 adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
204 mov ar.pfs=loc1
205 mov rp=loc0
206 br.ret.sptk.many rp
207END(sys32_fork)
208
209 .rodata
210 .align 8
211 .globl ia32_syscall_table
212ia32_syscall_table:
213 data8 sys_ni_syscall /* 0 - old "setup(" system call*/
214 data8 sys_exit
215 data8 sys32_fork
216 data8 sys_read
217 data8 sys_write
218 data8 sys32_open /* 5 */
219 data8 sys_close
220 data8 sys32_waitpid
221 data8 sys_creat
222 data8 sys_link
223 data8 sys_unlink /* 10 */
224 data8 ia32_execve
225 data8 sys_chdir
226 data8 compat_sys_time
227 data8 sys_mknod
228 data8 sys_chmod /* 15 */
229 data8 sys_lchown /* 16-bit version */
230 data8 sys_ni_syscall /* old break syscall holder */
231 data8 sys_ni_syscall
232 data8 sys32_lseek
233 data8 sys_getpid /* 20 */
234 data8 compat_sys_mount
235 data8 sys_oldumount
236 data8 sys_setuid /* 16-bit version */
237 data8 sys_getuid /* 16-bit version */
238 data8 compat_sys_stime /* 25 */
239 data8 sys32_ptrace
240 data8 sys32_alarm
241 data8 sys_ni_syscall
242 data8 sys32_pause
243 data8 compat_sys_utime /* 30 */
244 data8 sys_ni_syscall /* old stty syscall holder */
245 data8 sys_ni_syscall /* old gtty syscall holder */
246 data8 sys_access
247 data8 sys_nice
248 data8 sys_ni_syscall /* 35 */ /* old ftime syscall holder */
249 data8 sys_sync
250 data8 sys_kill
251 data8 sys_rename
252 data8 sys_mkdir
253 data8 sys_rmdir /* 40 */
254 data8 sys_dup
255 data8 sys32_pipe
256 data8 compat_sys_times
257 data8 sys_ni_syscall /* old prof syscall holder */
258 data8 sys32_brk /* 45 */
259 data8 sys_setgid /* 16-bit version */
260 data8 sys_getgid /* 16-bit version */
261 data8 sys32_signal
262 data8 sys_geteuid /* 16-bit version */
263 data8 sys_getegid /* 16-bit version */ /* 50 */
264 data8 sys_acct
265 data8 sys_umount /* recycled never used phys( */
266 data8 sys_ni_syscall /* old lock syscall holder */
267 data8 compat_sys_ioctl
268 data8 compat_sys_fcntl /* 55 */
269 data8 sys_ni_syscall /* old mpx syscall holder */
270 data8 sys_setpgid
271 data8 sys_ni_syscall /* old ulimit syscall holder */
272 data8 sys_ni_syscall
273 data8 sys_umask /* 60 */
274 data8 sys_chroot
275 data8 sys_ustat
276 data8 sys_dup2
277 data8 sys_getppid
278 data8 sys_getpgrp /* 65 */
279 data8 sys_setsid
280 data8 sys32_sigaction
281 data8 sys_ni_syscall
282 data8 sys_ni_syscall
283 data8 sys_setreuid /* 16-bit version */ /* 70 */
284 data8 sys_setregid /* 16-bit version */
285 data8 sys32_sigsuspend
286 data8 compat_sys_sigpending
287 data8 sys_sethostname
288 data8 compat_sys_setrlimit /* 75 */
289 data8 compat_sys_old_getrlimit
290 data8 compat_sys_getrusage
291 data8 sys32_gettimeofday
292 data8 sys32_settimeofday
293 data8 sys32_getgroups16 /* 80 */
294 data8 sys32_setgroups16
295 data8 sys32_old_select
296 data8 sys_symlink
297 data8 sys_ni_syscall
298 data8 sys_readlink /* 85 */
299 data8 sys_uselib
300 data8 sys_swapon
301 data8 sys_reboot
302 data8 sys32_readdir
303 data8 sys32_mmap /* 90 */
304 data8 sys32_munmap
305 data8 sys_truncate
306 data8 sys_ftruncate
307 data8 sys_fchmod
308 data8 sys_fchown /* 16-bit version */ /* 95 */
309 data8 sys_getpriority
310 data8 sys_setpriority
311 data8 sys_ni_syscall /* old profil syscall holder */
312 data8 compat_sys_statfs
313 data8 compat_sys_fstatfs /* 100 */
314 data8 sys_ni_syscall /* ioperm */
315 data8 compat_sys_socketcall
316 data8 sys_syslog
317 data8 compat_sys_setitimer
318 data8 compat_sys_getitimer /* 105 */
319 data8 compat_sys_newstat
320 data8 compat_sys_newlstat
321 data8 compat_sys_newfstat
322 data8 sys_ni_syscall
323 data8 sys_ni_syscall /* iopl */ /* 110 */
324 data8 sys_vhangup
325 data8 sys_ni_syscall /* used to be sys_idle */
326 data8 sys_ni_syscall
327 data8 compat_sys_wait4
328 data8 sys_swapoff /* 115 */
329 data8 sys32_sysinfo
330 data8 sys32_ipc
331 data8 sys_fsync
332 data8 sys32_sigreturn
333 data8 ia32_clone /* 120 */
334 data8 sys_setdomainname
335 data8 sys32_newuname
336 data8 sys32_modify_ldt
337 data8 sys_ni_syscall /* adjtimex */
338 data8 sys32_mprotect /* 125 */
339 data8 compat_sys_sigprocmask
340 data8 sys_ni_syscall /* create_module */
341 data8 sys_ni_syscall /* init_module */
342 data8 sys_ni_syscall /* delete_module */
343 data8 sys_ni_syscall /* get_kernel_syms */ /* 130 */
344 data8 sys_quotactl
345 data8 sys_getpgid
346 data8 sys_fchdir
347 data8 sys_ni_syscall /* sys_bdflush */
348 data8 sys_sysfs /* 135 */
349 data8 sys32_personality
350 data8 sys_ni_syscall /* for afs_syscall */
351 data8 sys_setfsuid /* 16-bit version */
352 data8 sys_setfsgid /* 16-bit version */
353 data8 sys_llseek /* 140 */
354 data8 compat_sys_getdents
355 data8 compat_sys_select
356 data8 sys_flock
357 data8 sys32_msync
358 data8 compat_sys_readv /* 145 */
359 data8 compat_sys_writev
360 data8 sys_getsid
361 data8 sys_fdatasync
362 data8 sys32_sysctl
363 data8 sys_mlock /* 150 */
364 data8 sys_munlock
365 data8 sys_mlockall
366 data8 sys_munlockall
367 data8 sys_sched_setparam
368 data8 sys_sched_getparam /* 155 */
369 data8 sys_sched_setscheduler
370 data8 sys_sched_getscheduler
371 data8 sys_sched_yield
372 data8 sys_sched_get_priority_max
373 data8 sys_sched_get_priority_min /* 160 */
374 data8 sys32_sched_rr_get_interval
375 data8 compat_sys_nanosleep
376 data8 sys32_mremap
377 data8 sys_setresuid /* 16-bit version */
378 data8 sys32_getresuid16 /* 16-bit version */ /* 165 */
379 data8 sys_ni_syscall /* vm86 */
380 data8 sys_ni_syscall /* sys_query_module */
381 data8 sys_poll
382 data8 sys_ni_syscall /* nfsservctl */
383 data8 sys_setresgid /* 170 */
384 data8 sys32_getresgid16
385 data8 sys_prctl
386 data8 sys32_rt_sigreturn
387 data8 sys32_rt_sigaction
388 data8 sys32_rt_sigprocmask /* 175 */
389 data8 sys_rt_sigpending
390 data8 compat_sys_rt_sigtimedwait
391 data8 sys32_rt_sigqueueinfo
392 data8 sys32_rt_sigsuspend
393 data8 sys32_pread /* 180 */
394 data8 sys32_pwrite
395 data8 sys_chown /* 16-bit version */
396 data8 sys_getcwd
397 data8 sys_capget
398 data8 sys_capset /* 185 */
399 data8 sys32_sigaltstack
400 data8 sys32_sendfile
401 data8 sys_ni_syscall /* streams1 */
402 data8 sys_ni_syscall /* streams2 */
403 data8 sys32_vfork /* 190 */
404 data8 compat_sys_getrlimit
405 data8 sys32_mmap2
406 data8 sys32_truncate64
407 data8 sys32_ftruncate64
408 data8 sys32_stat64 /* 195 */
409 data8 sys32_lstat64
410 data8 sys32_fstat64
411 data8 sys_lchown
412 data8 sys_getuid
413 data8 sys_getgid /* 200 */
414 data8 sys_geteuid
415 data8 sys_getegid
416 data8 sys_setreuid
417 data8 sys_setregid
418 data8 sys_getgroups /* 205 */
419 data8 sys_setgroups
420 data8 sys_fchown
421 data8 sys_setresuid
422 data8 sys_getresuid
423 data8 sys_setresgid /* 210 */
424 data8 sys_getresgid
425 data8 sys_chown
426 data8 sys_setuid
427 data8 sys_setgid
428 data8 sys_setfsuid /* 215 */
429 data8 sys_setfsgid
430 data8 sys_pivot_root
431 data8 sys_mincore
432 data8 sys_madvise
433 data8 compat_sys_getdents64 /* 220 */
434 data8 compat_sys_fcntl64
435 data8 sys_ni_syscall /* reserved for TUX */
436 data8 sys_ni_syscall /* reserved for Security */
437 data8 sys_gettid
438 data8 sys_readahead /* 225 */
439 data8 sys_setxattr
440 data8 sys_lsetxattr
441 data8 sys_fsetxattr
442 data8 sys_getxattr
443 data8 sys_lgetxattr /* 230 */
444 data8 sys_fgetxattr
445 data8 sys_listxattr
446 data8 sys_llistxattr
447 data8 sys_flistxattr
448 data8 sys_removexattr /* 235 */
449 data8 sys_lremovexattr
450 data8 sys_fremovexattr
451 data8 sys_tkill
452 data8 sys_sendfile64
453 data8 compat_sys_futex /* 240 */
454 data8 compat_sys_sched_setaffinity
455 data8 compat_sys_sched_getaffinity
456 data8 sys32_set_thread_area
457 data8 sys32_get_thread_area
458 data8 compat_sys_io_setup /* 245 */
459 data8 sys_io_destroy
460 data8 compat_sys_io_getevents
461 data8 compat_sys_io_submit
462 data8 sys_io_cancel
463 data8 sys_fadvise64 /* 250 */
464 data8 sys_ni_syscall
465 data8 sys_exit_group
466 data8 sys_lookup_dcookie
467 data8 sys_epoll_create
468 data8 sys32_epoll_ctl /* 255 */
469 data8 sys32_epoll_wait
470 data8 sys_remap_file_pages
471 data8 sys_set_tid_address
472 data8 sys32_timer_create
473 data8 compat_sys_timer_settime /* 260 */
474 data8 compat_sys_timer_gettime
475 data8 sys_timer_getoverrun
476 data8 sys_timer_delete
477 data8 compat_sys_clock_settime
478 data8 compat_sys_clock_gettime /* 265 */
479 data8 compat_sys_clock_getres
480 data8 compat_sys_clock_nanosleep
481 data8 compat_sys_statfs64
482 data8 compat_sys_fstatfs64
483 data8 sys_tgkill /* 270 */
484 data8 compat_sys_utimes
485 data8 sys32_fadvise64_64
486 data8 sys_ni_syscall
487 data8 sys_ni_syscall
488 data8 sys_ni_syscall /* 275 */
489 data8 sys_ni_syscall
490 data8 compat_sys_mq_open
491 data8 sys_mq_unlink
492 data8 compat_sys_mq_timedsend
493 data8 compat_sys_mq_timedreceive /* 280 */
494 data8 compat_sys_mq_notify
495 data8 compat_sys_mq_getsetattr
496 data8 sys_ni_syscall /* reserved for kexec */
497 data8 compat_sys_waitid
498
499 // guard against failures to increase IA32_NR_syscalls
500 .org ia32_syscall_table + 8*IA32_NR_syscalls
diff --git a/arch/ia64/ia32/ia32_ioctl.c b/arch/ia64/ia32/ia32_ioctl.c
new file mode 100644
index 000000000000..9845dabe2613
--- /dev/null
+++ b/arch/ia64/ia32/ia32_ioctl.c
@@ -0,0 +1,48 @@
1/*
2 * IA32 Architecture-specific ioctl shim code
3 *
4 * Copyright (C) 2000 VA Linux Co
5 * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
6 * Copyright (C) 2001-2003 Hewlett-Packard Co
7 * David Mosberger-Tang <davidm@hpl.hp.com>
8 */
9
10#include <linux/signal.h> /* argh, msdos_fs.h isn't self-contained... */
11#include <linux/syscalls.h>
12#include "ia32priv.h"
13
14#define INCLUDES
15#include "compat_ioctl.c"
16#include <asm/ioctl32.h>
17
18#define IOCTL_NR(a) ((a) & ~(_IOC_SIZEMASK << _IOC_SIZESHIFT))
19
20#define DO_IOCTL(fd, cmd, arg) ({ \
21 int _ret; \
22 mm_segment_t _old_fs = get_fs(); \
23 \
24 set_fs(KERNEL_DS); \
25 _ret = sys_ioctl(fd, cmd, (unsigned long)arg); \
26 set_fs(_old_fs); \
27 _ret; \
28})
29
30#define CODE
31#include "compat_ioctl.c"
32
33typedef int (* ioctl32_handler_t)(unsigned int, unsigned int, unsigned long, struct file *);
34
35#define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL((cmd),sys_ioctl)
36#define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl32_handler_t)(handler), NULL },
37#define IOCTL_TABLE_START \
38 struct ioctl_trans ioctl_start[] = {
39#define IOCTL_TABLE_END \
40 };
41
42IOCTL_TABLE_START
43#define DECLARES
44#include "compat_ioctl.c"
45#include <linux/compat_ioctl.h>
46IOCTL_TABLE_END
47
48int ioctl_table_size = ARRAY_SIZE(ioctl_start);
diff --git a/arch/ia64/ia32/ia32_ldt.c b/arch/ia64/ia32/ia32_ldt.c
new file mode 100644
index 000000000000..a152738c7d0d
--- /dev/null
+++ b/arch/ia64/ia32/ia32_ldt.c
@@ -0,0 +1,147 @@
1/*
2 * Copyright (C) 2001, 2004 Hewlett-Packard Co
3 * David Mosberger-Tang <davidm@hpl.hp.com>
4 *
5 * Adapted from arch/i386/kernel/ldt.c
6 */
7
8#include <linux/errno.h>
9#include <linux/sched.h>
10#include <linux/string.h>
11#include <linux/mm.h>
12#include <linux/smp.h>
13#include <linux/smp_lock.h>
14#include <linux/vmalloc.h>
15
16#include <asm/uaccess.h>
17
18#include "ia32priv.h"
19
20/*
21 * read_ldt() is not really atomic - this is not a problem since synchronization of reads
22 * and writes done to the LDT has to be assured by user-space anyway. Writes are atomic,
23 * to protect the security checks done on new descriptors.
24 */
25static int
26read_ldt (void __user *ptr, unsigned long bytecount)
27{
28 unsigned long bytes_left, n;
29 char __user *src, *dst;
30 char buf[256]; /* temporary buffer (don't overflow kernel stack!) */
31
32 if (bytecount > IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE)
33 bytecount = IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE;
34
35 bytes_left = bytecount;
36
37 src = (void __user *) IA32_LDT_OFFSET;
38 dst = ptr;
39
40 while (bytes_left) {
41 n = sizeof(buf);
42 if (n > bytes_left)
43 n = bytes_left;
44
45 /*
46 * We know we're reading valid memory, but we still must guard against
47 * running out of memory.
48 */
49 if (__copy_from_user(buf, src, n))
50 return -EFAULT;
51
52 if (copy_to_user(dst, buf, n))
53 return -EFAULT;
54
55 src += n;
56 dst += n;
57 bytes_left -= n;
58 }
59 return bytecount;
60}
61
62static int
63read_default_ldt (void __user * ptr, unsigned long bytecount)
64{
65 unsigned long size;
66 int err;
67
68 /* XXX fix me: should return equivalent of default_ldt[0] */
69 err = 0;
70 size = 8;
71 if (size > bytecount)
72 size = bytecount;
73
74 err = size;
75 if (clear_user(ptr, size))
76 err = -EFAULT;
77
78 return err;
79}
80
81static int
82write_ldt (void __user * ptr, unsigned long bytecount, int oldmode)
83{
84 struct ia32_user_desc ldt_info;
85 __u64 entry;
86 int ret;
87
88 if (bytecount != sizeof(ldt_info))
89 return -EINVAL;
90 if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
91 return -EFAULT;
92
93 if (ldt_info.entry_number >= IA32_LDT_ENTRIES)
94 return -EINVAL;
95 if (ldt_info.contents == 3) {
96 if (oldmode)
97 return -EINVAL;
98 if (ldt_info.seg_not_present == 0)
99 return -EINVAL;
100 }
101
102 if (ldt_info.base_addr == 0 && ldt_info.limit == 0
103 && (oldmode || (ldt_info.contents == 0 && ldt_info.read_exec_only == 1
104 && ldt_info.seg_32bit == 0 && ldt_info.limit_in_pages == 0
105 && ldt_info.seg_not_present == 1 && ldt_info.useable == 0)))
106 /* allow LDTs to be cleared by the user */
107 entry = 0;
108 else
109 /* we must set the "Accessed" bit as IVE doesn't emulate it */
110 entry = IA32_SEG_DESCRIPTOR(ldt_info.base_addr, ldt_info.limit,
111 (((ldt_info.read_exec_only ^ 1) << 1)
112 | (ldt_info.contents << 2)) | 1,
113 1, 3, ldt_info.seg_not_present ^ 1,
114 (oldmode ? 0 : ldt_info.useable),
115 ldt_info.seg_32bit,
116 ldt_info.limit_in_pages);
117 /*
118 * Install the new entry. We know we're accessing valid (mapped) user-level
119 * memory, but we still need to guard against out-of-memory, hence we must use
120 * put_user().
121 */
122 ret = __put_user(entry, (__u64 __user *) IA32_LDT_OFFSET + ldt_info.entry_number);
123 ia32_load_segment_descriptors(current);
124 return ret;
125}
126
127asmlinkage int
128sys32_modify_ldt (int func, unsigned int ptr, unsigned int bytecount)
129{
130 int ret = -ENOSYS;
131
132 switch (func) {
133 case 0:
134 ret = read_ldt(compat_ptr(ptr), bytecount);
135 break;
136 case 1:
137 ret = write_ldt(compat_ptr(ptr), bytecount, 1);
138 break;
139 case 2:
140 ret = read_default_ldt(compat_ptr(ptr), bytecount);
141 break;
142 case 0x11:
143 ret = write_ldt(compat_ptr(ptr), bytecount, 0);
144 break;
145 }
146 return ret;
147}
diff --git a/arch/ia64/ia32/ia32_signal.c b/arch/ia64/ia32/ia32_signal.c
new file mode 100644
index 000000000000..19b02adce68c
--- /dev/null
+++ b/arch/ia64/ia32/ia32_signal.c
@@ -0,0 +1,1036 @@
1/*
2 * IA32 Architecture-specific signal handling support.
3 *
4 * Copyright (C) 1999, 2001-2002, 2005 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
7 * Copyright (C) 2000 VA Linux Co
8 * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
9 *
10 * Derived from i386 and Alpha versions.
11 */
12
13#include <linux/errno.h>
14#include <linux/kernel.h>
15#include <linux/mm.h>
16#include <linux/personality.h>
17#include <linux/ptrace.h>
18#include <linux/sched.h>
19#include <linux/signal.h>
20#include <linux/smp.h>
21#include <linux/smp_lock.h>
22#include <linux/stddef.h>
23#include <linux/syscalls.h>
24#include <linux/unistd.h>
25#include <linux/wait.h>
26#include <linux/compat.h>
27
28#include <asm/intrinsics.h>
29#include <asm/uaccess.h>
30#include <asm/rse.h>
31#include <asm/sigcontext.h>
32#include <asm/segment.h>
33
34#include "ia32priv.h"
35
36#include "../kernel/sigframe.h"
37
38#define A(__x) ((unsigned long)(__x))
39
40#define DEBUG_SIG 0
41#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
42
43#define __IA32_NR_sigreturn 119
44#define __IA32_NR_rt_sigreturn 173
45
46struct sigframe_ia32
47{
48 int pretcode;
49 int sig;
50 struct sigcontext_ia32 sc;
51 struct _fpstate_ia32 fpstate;
52 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
53 char retcode[8];
54};
55
56struct rt_sigframe_ia32
57{
58 int pretcode;
59 int sig;
60 int pinfo;
61 int puc;
62 compat_siginfo_t info;
63 struct ucontext_ia32 uc;
64 struct _fpstate_ia32 fpstate;
65 char retcode[8];
66};
67
68int
69copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from)
70{
71 unsigned long tmp;
72 int err;
73
74 if (!access_ok(VERIFY_READ, from, sizeof(compat_siginfo_t)))
75 return -EFAULT;
76
77 err = __get_user(to->si_signo, &from->si_signo);
78 err |= __get_user(to->si_errno, &from->si_errno);
79 err |= __get_user(to->si_code, &from->si_code);
80
81 if (to->si_code < 0)
82 err |= __copy_from_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
83 else {
84 switch (to->si_code >> 16) {
85 case __SI_CHLD >> 16:
86 err |= __get_user(to->si_utime, &from->si_utime);
87 err |= __get_user(to->si_stime, &from->si_stime);
88 err |= __get_user(to->si_status, &from->si_status);
89 default:
90 err |= __get_user(to->si_pid, &from->si_pid);
91 err |= __get_user(to->si_uid, &from->si_uid);
92 break;
93 case __SI_FAULT >> 16:
94 err |= __get_user(tmp, &from->si_addr);
95 to->si_addr = (void __user *) tmp;
96 break;
97 case __SI_POLL >> 16:
98 err |= __get_user(to->si_band, &from->si_band);
99 err |= __get_user(to->si_fd, &from->si_fd);
100 break;
101 case __SI_RT >> 16: /* This is not generated by the kernel as of now. */
102 case __SI_MESGQ >> 16:
103 err |= __get_user(to->si_pid, &from->si_pid);
104 err |= __get_user(to->si_uid, &from->si_uid);
105 err |= __get_user(to->si_int, &from->si_int);
106 break;
107 }
108 }
109 return err;
110}
111
112int
113copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from)
114{
115 unsigned int addr;
116 int err;
117
118 if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t)))
119 return -EFAULT;
120
121 /* If you change siginfo_t structure, please be sure
122 this code is fixed accordingly.
123 It should never copy any pad contained in the structure
124 to avoid security leaks, but must copy the generic
125 3 ints plus the relevant union member.
126 This routine must convert siginfo from 64bit to 32bit as well
127 at the same time. */
128 err = __put_user(from->si_signo, &to->si_signo);
129 err |= __put_user(from->si_errno, &to->si_errno);
130 err |= __put_user((short)from->si_code, &to->si_code);
131 if (from->si_code < 0)
132 err |= __copy_to_user(&to->_sifields._pad, &from->_sifields._pad, SI_PAD_SIZE);
133 else {
134 switch (from->si_code >> 16) {
135 case __SI_CHLD >> 16:
136 err |= __put_user(from->si_utime, &to->si_utime);
137 err |= __put_user(from->si_stime, &to->si_stime);
138 err |= __put_user(from->si_status, &to->si_status);
139 default:
140 err |= __put_user(from->si_pid, &to->si_pid);
141 err |= __put_user(from->si_uid, &to->si_uid);
142 break;
143 case __SI_FAULT >> 16:
144 /* avoid type-checking warnings by copying _pad[0] in lieu of si_addr... */
145 err |= __put_user(from->_sifields._pad[0], &to->si_addr);
146 break;
147 case __SI_POLL >> 16:
148 err |= __put_user(from->si_band, &to->si_band);
149 err |= __put_user(from->si_fd, &to->si_fd);
150 break;
151 case __SI_TIMER >> 16:
152 err |= __put_user(from->si_tid, &to->si_tid);
153 err |= __put_user(from->si_overrun, &to->si_overrun);
154 addr = (unsigned long) from->si_ptr;
155 err |= __put_user(addr, &to->si_ptr);
156 break;
157 case __SI_RT >> 16: /* Not generated by the kernel as of now. */
158 case __SI_MESGQ >> 16:
159 err |= __put_user(from->si_uid, &to->si_uid);
160 err |= __put_user(from->si_pid, &to->si_pid);
161 addr = (unsigned long) from->si_ptr;
162 err |= __put_user(addr, &to->si_ptr);
163 break;
164 }
165 }
166 return err;
167}
168
169
170/*
171 * SAVE and RESTORE of ia32 fpstate info, from ia64 current state
172 * Used in exception handler to pass the fpstate to the user, and restore
173 * the fpstate while returning from the exception handler.
174 *
175 * fpstate info and their mapping to IA64 regs:
176 * fpstate REG(BITS) Attribute Comments
177 * cw ar.fcr(0:12) with bits 7 and 6 not used
178 * sw ar.fsr(0:15)
179 * tag ar.fsr(16:31) with odd numbered bits not used
180 * (read returns 0, writes ignored)
181 * ipoff ar.fir(0:31)
182 * cssel ar.fir(32:47)
183 * dataoff ar.fdr(0:31)
184 * datasel ar.fdr(32:47)
185 *
186 * _st[(0+TOS)%8] f8
187 * _st[(1+TOS)%8] f9
188 * _st[(2+TOS)%8] f10
189 * _st[(3+TOS)%8] f11 (f8..f11 from ptregs)
190 * : : : (f12..f15 from live reg)
191 * : : :
192 * _st[(7+TOS)%8] f15 TOS=sw.top(bits11:13)
193 *
194 * status Same as sw RO
195 * magic 0 as X86_FXSR_MAGIC in ia32
196 * mxcsr Bits(7:15)=ar.fcr(39:47)
197 * Bits(0:5) =ar.fsr(32:37) with bit 6 reserved
198 * _xmm[0..7] f16..f31 (live registers)
199 * with _xmm[0]
200 * Bit(64:127)=f17(0:63)
201 * Bit(0:63)=f16(0:63)
202 * All other fields unused...
203 */
204
205static int
206save_ia32_fpstate_live (struct _fpstate_ia32 __user *save)
207{
208 struct task_struct *tsk = current;
209 struct pt_regs *ptp;
210 struct _fpreg_ia32 *fpregp;
211 char buf[32];
212 unsigned long fsr, fcr, fir, fdr;
213 unsigned long new_fsr;
214 unsigned long num128[2];
215 unsigned long mxcsr=0;
216 int fp_tos, fr8_st_map;
217
218 if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
219 return -EFAULT;
220
221 /* Read in fsr, fcr, fir, fdr and copy onto fpstate */
222 fsr = ia64_getreg(_IA64_REG_AR_FSR);
223 fcr = ia64_getreg(_IA64_REG_AR_FCR);
224 fir = ia64_getreg(_IA64_REG_AR_FIR);
225 fdr = ia64_getreg(_IA64_REG_AR_FDR);
226
227 /*
228 * We need to clear the exception state before calling the signal handler. Clear
229 * the bits 15, bits 0-7 in fp status word. Similar to the functionality of fnclex
230 * instruction.
231 */
232 new_fsr = fsr & ~0x80ff;
233 ia64_setreg(_IA64_REG_AR_FSR, new_fsr);
234
235 __put_user(fcr & 0xffff, &save->cw);
236 __put_user(fsr & 0xffff, &save->sw);
237 __put_user((fsr>>16) & 0xffff, &save->tag);
238 __put_user(fir, &save->ipoff);
239 __put_user((fir>>32) & 0xffff, &save->cssel);
240 __put_user(fdr, &save->dataoff);
241 __put_user((fdr>>32) & 0xffff, &save->datasel);
242 __put_user(fsr & 0xffff, &save->status);
243
244 mxcsr = ((fcr>>32) & 0xff80) | ((fsr>>32) & 0x3f);
245 __put_user(mxcsr & 0xffff, &save->mxcsr);
246 __put_user( 0, &save->magic); //#define X86_FXSR_MAGIC 0x0000
247
248 /*
249 * save f8..f11 from pt_regs
250 * save f12..f15 from live register set
251 */
252 /*
253 * Find the location where f8 has to go in fp reg stack. This depends on
254 * TOP(11:13) field of sw. Other f reg continue sequentially from where f8 maps
255 * to.
256 */
257 fp_tos = (fsr>>11)&0x7;
258 fr8_st_map = (8-fp_tos)&0x7;
259 ptp = ia64_task_regs(tsk);
260 fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
261 ia64f2ia32f(fpregp, &ptp->f8);
262 copy_to_user(&save->_st[(0+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
263 ia64f2ia32f(fpregp, &ptp->f9);
264 copy_to_user(&save->_st[(1+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
265 ia64f2ia32f(fpregp, &ptp->f10);
266 copy_to_user(&save->_st[(2+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
267 ia64f2ia32f(fpregp, &ptp->f11);
268 copy_to_user(&save->_st[(3+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
269
270 ia64_stfe(fpregp, 12);
271 copy_to_user(&save->_st[(4+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
272 ia64_stfe(fpregp, 13);
273 copy_to_user(&save->_st[(5+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
274 ia64_stfe(fpregp, 14);
275 copy_to_user(&save->_st[(6+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
276 ia64_stfe(fpregp, 15);
277 copy_to_user(&save->_st[(7+fr8_st_map)&0x7], fpregp, sizeof(struct _fpreg_ia32));
278
279 ia64_stf8(&num128[0], 16);
280 ia64_stf8(&num128[1], 17);
281 copy_to_user(&save->_xmm[0], num128, sizeof(struct _xmmreg_ia32));
282
283 ia64_stf8(&num128[0], 18);
284 ia64_stf8(&num128[1], 19);
285 copy_to_user(&save->_xmm[1], num128, sizeof(struct _xmmreg_ia32));
286
287 ia64_stf8(&num128[0], 20);
288 ia64_stf8(&num128[1], 21);
289 copy_to_user(&save->_xmm[2], num128, sizeof(struct _xmmreg_ia32));
290
291 ia64_stf8(&num128[0], 22);
292 ia64_stf8(&num128[1], 23);
293 copy_to_user(&save->_xmm[3], num128, sizeof(struct _xmmreg_ia32));
294
295 ia64_stf8(&num128[0], 24);
296 ia64_stf8(&num128[1], 25);
297 copy_to_user(&save->_xmm[4], num128, sizeof(struct _xmmreg_ia32));
298
299 ia64_stf8(&num128[0], 26);
300 ia64_stf8(&num128[1], 27);
301 copy_to_user(&save->_xmm[5], num128, sizeof(struct _xmmreg_ia32));
302
303 ia64_stf8(&num128[0], 28);
304 ia64_stf8(&num128[1], 29);
305 copy_to_user(&save->_xmm[6], num128, sizeof(struct _xmmreg_ia32));
306
307 ia64_stf8(&num128[0], 30);
308 ia64_stf8(&num128[1], 31);
309 copy_to_user(&save->_xmm[7], num128, sizeof(struct _xmmreg_ia32));
310 return 0;
311}
312
313static int
314restore_ia32_fpstate_live (struct _fpstate_ia32 __user *save)
315{
316 struct task_struct *tsk = current;
317 struct pt_regs *ptp;
318 unsigned int lo, hi;
319 unsigned long num128[2];
320 unsigned long num64, mxcsr;
321 struct _fpreg_ia32 *fpregp;
322 char buf[32];
323 unsigned long fsr, fcr, fir, fdr;
324 int fp_tos, fr8_st_map;
325
326 if (!access_ok(VERIFY_READ, save, sizeof(*save)))
327 return(-EFAULT);
328
329 /*
330 * Updating fsr, fcr, fir, fdr.
331 * Just a bit more complicated than save.
332 * - Need to make sure that we don't write any value other than the
333 * specific fpstate info
334 * - Need to make sure that the untouched part of frs, fdr, fir, fcr
335 * should remain same while writing.
336 * So, we do a read, change specific fields and write.
337 */
338 fsr = ia64_getreg(_IA64_REG_AR_FSR);
339 fcr = ia64_getreg(_IA64_REG_AR_FCR);
340 fir = ia64_getreg(_IA64_REG_AR_FIR);
341 fdr = ia64_getreg(_IA64_REG_AR_FDR);
342
343 __get_user(mxcsr, (unsigned int __user *)&save->mxcsr);
344 /* setting bits 0..5 8..12 with cw and 39..47 from mxcsr */
345 __get_user(lo, (unsigned int __user *)&save->cw);
346 num64 = mxcsr & 0xff10;
347 num64 = (num64 << 32) | (lo & 0x1f3f);
348 fcr = (fcr & (~0xff1000001f3fUL)) | num64;
349
350 /* setting bits 0..31 with sw and tag and 32..37 from mxcsr */
351 __get_user(lo, (unsigned int __user *)&save->sw);
352 /* set bits 15,7 (fsw.b, fsw.es) to reflect the current error status */
353 if ( !(lo & 0x7f) )
354 lo &= (~0x8080);
355 __get_user(hi, (unsigned int __user *)&save->tag);
356 num64 = mxcsr & 0x3f;
357 num64 = (num64 << 16) | (hi & 0xffff);
358 num64 = (num64 << 16) | (lo & 0xffff);
359 fsr = (fsr & (~0x3fffffffffUL)) | num64;
360
361 /* setting bits 0..47 with cssel and ipoff */
362 __get_user(lo, (unsigned int __user *)&save->ipoff);
363 __get_user(hi, (unsigned int __user *)&save->cssel);
364 num64 = hi & 0xffff;
365 num64 = (num64 << 32) | lo;
366 fir = (fir & (~0xffffffffffffUL)) | num64;
367
368 /* setting bits 0..47 with datasel and dataoff */
369 __get_user(lo, (unsigned int __user *)&save->dataoff);
370 __get_user(hi, (unsigned int __user *)&save->datasel);
371 num64 = hi & 0xffff;
372 num64 = (num64 << 32) | lo;
373 fdr = (fdr & (~0xffffffffffffUL)) | num64;
374
375 ia64_setreg(_IA64_REG_AR_FSR, fsr);
376 ia64_setreg(_IA64_REG_AR_FCR, fcr);
377 ia64_setreg(_IA64_REG_AR_FIR, fir);
378 ia64_setreg(_IA64_REG_AR_FDR, fdr);
379
380 /*
381 * restore f8..f11 onto pt_regs
382 * restore f12..f15 onto live registers
383 */
384 /*
385 * Find the location where f8 has to go in fp reg stack. This depends on
386 * TOP(11:13) field of sw. Other f reg continue sequentially from where f8 maps
387 * to.
388 */
389 fp_tos = (fsr>>11)&0x7;
390 fr8_st_map = (8-fp_tos)&0x7;
391 fpregp = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
392
393 ptp = ia64_task_regs(tsk);
394 copy_from_user(fpregp, &save->_st[(0+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
395 ia32f2ia64f(&ptp->f8, fpregp);
396 copy_from_user(fpregp, &save->_st[(1+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
397 ia32f2ia64f(&ptp->f9, fpregp);
398 copy_from_user(fpregp, &save->_st[(2+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
399 ia32f2ia64f(&ptp->f10, fpregp);
400 copy_from_user(fpregp, &save->_st[(3+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
401 ia32f2ia64f(&ptp->f11, fpregp);
402
403 copy_from_user(fpregp, &save->_st[(4+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
404 ia64_ldfe(12, fpregp);
405 copy_from_user(fpregp, &save->_st[(5+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
406 ia64_ldfe(13, fpregp);
407 copy_from_user(fpregp, &save->_st[(6+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
408 ia64_ldfe(14, fpregp);
409 copy_from_user(fpregp, &save->_st[(7+fr8_st_map)&0x7], sizeof(struct _fpreg_ia32));
410 ia64_ldfe(15, fpregp);
411
412 copy_from_user(num128, &save->_xmm[0], sizeof(struct _xmmreg_ia32));
413 ia64_ldf8(16, &num128[0]);
414 ia64_ldf8(17, &num128[1]);
415
416 copy_from_user(num128, &save->_xmm[1], sizeof(struct _xmmreg_ia32));
417 ia64_ldf8(18, &num128[0]);
418 ia64_ldf8(19, &num128[1]);
419
420 copy_from_user(num128, &save->_xmm[2], sizeof(struct _xmmreg_ia32));
421 ia64_ldf8(20, &num128[0]);
422 ia64_ldf8(21, &num128[1]);
423
424 copy_from_user(num128, &save->_xmm[3], sizeof(struct _xmmreg_ia32));
425 ia64_ldf8(22, &num128[0]);
426 ia64_ldf8(23, &num128[1]);
427
428 copy_from_user(num128, &save->_xmm[4], sizeof(struct _xmmreg_ia32));
429 ia64_ldf8(24, &num128[0]);
430 ia64_ldf8(25, &num128[1]);
431
432 copy_from_user(num128, &save->_xmm[5], sizeof(struct _xmmreg_ia32));
433 ia64_ldf8(26, &num128[0]);
434 ia64_ldf8(27, &num128[1]);
435
436 copy_from_user(num128, &save->_xmm[6], sizeof(struct _xmmreg_ia32));
437 ia64_ldf8(28, &num128[0]);
438 ia64_ldf8(29, &num128[1]);
439
440 copy_from_user(num128, &save->_xmm[7], sizeof(struct _xmmreg_ia32));
441 ia64_ldf8(30, &num128[0]);
442 ia64_ldf8(31, &num128[1]);
443 return 0;
444}
445
446static inline void
447sigact_set_handler (struct k_sigaction *sa, unsigned int handler, unsigned int restorer)
448{
449 if (handler + 1 <= 2)
450 /* SIG_DFL, SIG_IGN, or SIG_ERR: must sign-extend to 64-bits */
451 sa->sa.sa_handler = (__sighandler_t) A((int) handler);
452 else
453 sa->sa.sa_handler = (__sighandler_t) (((unsigned long) restorer << 32) | handler);
454}
455
456long
457__ia32_rt_sigsuspend (compat_sigset_t *sset, unsigned int sigsetsize, struct sigscratch *scr)
458{
459 extern long ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall);
460 sigset_t oldset, set;
461
462 scr->scratch_unat = 0; /* avoid leaking kernel bits to user level */
463 memset(&set, 0, sizeof(&set));
464
465 if (memcpy(&set.sig, &sset->sig, sigsetsize))
466 return -EFAULT;
467
468 sigdelsetmask(&set, ~_BLOCKABLE);
469
470 spin_lock_irq(&current->sighand->siglock);
471 {
472 oldset = current->blocked;
473 current->blocked = set;
474 recalc_sigpending();
475 }
476 spin_unlock_irq(&current->sighand->siglock);
477
478 /*
479 * The return below usually returns to the signal handler. We need to pre-set the
480 * correct error code here to ensure that the right values get saved in sigcontext
481 * by ia64_do_signal.
482 */
483 scr->pt.r8 = -EINTR;
484 while (1) {
485 current->state = TASK_INTERRUPTIBLE;
486 schedule();
487 if (ia64_do_signal(&oldset, scr, 1))
488 return -EINTR;
489 }
490}
491
492asmlinkage long
493ia32_rt_sigsuspend (compat_sigset_t __user *uset, unsigned int sigsetsize, struct sigscratch *scr)
494{
495 compat_sigset_t set;
496
497 if (sigsetsize > sizeof(compat_sigset_t))
498 return -EINVAL;
499
500 if (copy_from_user(&set.sig, &uset->sig, sigsetsize))
501 return -EFAULT;
502
503 return __ia32_rt_sigsuspend(&set, sigsetsize, scr);
504}
505
506asmlinkage long
507ia32_sigsuspend (unsigned int mask, struct sigscratch *scr)
508{
509 return __ia32_rt_sigsuspend((compat_sigset_t *) &mask, sizeof(mask), scr);
510}
511
512asmlinkage long
513sys32_signal (int sig, unsigned int handler)
514{
515 struct k_sigaction new_sa, old_sa;
516 int ret;
517
518 sigact_set_handler(&new_sa, handler, 0);
519 new_sa.sa.sa_flags = SA_ONESHOT | SA_NOMASK;
520
521 ret = do_sigaction(sig, &new_sa, &old_sa);
522
523 return ret ? ret : IA32_SA_HANDLER(&old_sa);
524}
525
526asmlinkage long
527sys32_rt_sigaction (int sig, struct sigaction32 __user *act,
528 struct sigaction32 __user *oact, unsigned int sigsetsize)
529{
530 struct k_sigaction new_ka, old_ka;
531 unsigned int handler, restorer;
532 int ret;
533
534 /* XXX: Don't preclude handling different sized sigset_t's. */
535 if (sigsetsize != sizeof(compat_sigset_t))
536 return -EINVAL;
537
538 if (act) {
539 ret = get_user(handler, &act->sa_handler);
540 ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
541 ret |= get_user(restorer, &act->sa_restorer);
542 ret |= copy_from_user(&new_ka.sa.sa_mask, &act->sa_mask, sizeof(compat_sigset_t));
543 if (ret)
544 return -EFAULT;
545
546 sigact_set_handler(&new_ka, handler, restorer);
547 }
548
549 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
550
551 if (!ret && oact) {
552 ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler);
553 ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
554 ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer);
555 ret |= copy_to_user(&oact->sa_mask, &old_ka.sa.sa_mask, sizeof(compat_sigset_t));
556 }
557 return ret;
558}
559
560
561asmlinkage long
562sys32_rt_sigprocmask (int how, compat_sigset_t __user *set, compat_sigset_t __user *oset,
563 unsigned int sigsetsize)
564{
565 mm_segment_t old_fs = get_fs();
566 sigset_t s;
567 long ret;
568
569 if (sigsetsize > sizeof(s))
570 return -EINVAL;
571
572 if (set) {
573 memset(&s, 0, sizeof(s));
574 if (copy_from_user(&s.sig, set, sigsetsize))
575 return -EFAULT;
576 }
577 set_fs(KERNEL_DS);
578 ret = sys_rt_sigprocmask(how,
579 set ? (sigset_t __user *) &s : NULL,
580 oset ? (sigset_t __user *) &s : NULL, sizeof(s));
581 set_fs(old_fs);
582 if (ret)
583 return ret;
584 if (oset) {
585 if (copy_to_user(oset, &s.sig, sigsetsize))
586 return -EFAULT;
587 }
588 return 0;
589}
590
591asmlinkage long
592sys32_rt_sigqueueinfo (int pid, int sig, compat_siginfo_t __user *uinfo)
593{
594 mm_segment_t old_fs = get_fs();
595 siginfo_t info;
596 int ret;
597
598 if (copy_siginfo_from_user32(&info, uinfo))
599 return -EFAULT;
600 set_fs(KERNEL_DS);
601 ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *) &info);
602 set_fs(old_fs);
603 return ret;
604}
605
606asmlinkage long
607sys32_sigaction (int sig, struct old_sigaction32 __user *act, struct old_sigaction32 __user *oact)
608{
609 struct k_sigaction new_ka, old_ka;
610 unsigned int handler, restorer;
611 int ret;
612
613 if (act) {
614 compat_old_sigset_t mask;
615
616 ret = get_user(handler, &act->sa_handler);
617 ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
618 ret |= get_user(restorer, &act->sa_restorer);
619 ret |= get_user(mask, &act->sa_mask);
620 if (ret)
621 return ret;
622
623 sigact_set_handler(&new_ka, handler, restorer);
624 siginitset(&new_ka.sa.sa_mask, mask);
625 }
626
627 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
628
629 if (!ret && oact) {
630 ret = put_user(IA32_SA_HANDLER(&old_ka), &oact->sa_handler);
631 ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
632 ret |= put_user(IA32_SA_RESTORER(&old_ka), &oact->sa_restorer);
633 ret |= put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
634 }
635
636 return ret;
637}
638
639static int
640setup_sigcontext_ia32 (struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __user *fpstate,
641 struct pt_regs *regs, unsigned long mask)
642{
643 int err = 0;
644 unsigned long flag;
645
646 if (!access_ok(VERIFY_WRITE, sc, sizeof(*sc)))
647 return -EFAULT;
648
649 err |= __put_user((regs->r16 >> 32) & 0xffff, (unsigned int __user *)&sc->fs);
650 err |= __put_user((regs->r16 >> 48) & 0xffff, (unsigned int __user *)&sc->gs);
651 err |= __put_user((regs->r16 >> 16) & 0xffff, (unsigned int __user *)&sc->es);
652 err |= __put_user(regs->r16 & 0xffff, (unsigned int __user *)&sc->ds);
653 err |= __put_user(regs->r15, &sc->edi);
654 err |= __put_user(regs->r14, &sc->esi);
655 err |= __put_user(regs->r13, &sc->ebp);
656 err |= __put_user(regs->r12, &sc->esp);
657 err |= __put_user(regs->r11, &sc->ebx);
658 err |= __put_user(regs->r10, &sc->edx);
659 err |= __put_user(regs->r9, &sc->ecx);
660 err |= __put_user(regs->r8, &sc->eax);
661#if 0
662 err |= __put_user(current->tss.trap_no, &sc->trapno);
663 err |= __put_user(current->tss.error_code, &sc->err);
664#endif
665 err |= __put_user(regs->cr_iip, &sc->eip);
666 err |= __put_user(regs->r17 & 0xffff, (unsigned int __user *)&sc->cs);
667 /*
668 * `eflags' is in an ar register for this context
669 */
670 flag = ia64_getreg(_IA64_REG_AR_EFLAG);
671 err |= __put_user((unsigned int)flag, &sc->eflags);
672 err |= __put_user(regs->r12, &sc->esp_at_signal);
673 err |= __put_user((regs->r17 >> 16) & 0xffff, (unsigned int __user *)&sc->ss);
674
675 if ( save_ia32_fpstate_live(fpstate) < 0 )
676 err = -EFAULT;
677 else
678 err |= __put_user((u32)(u64)fpstate, &sc->fpstate);
679
680#if 0
681 tmp = save_i387(fpstate);
682 if (tmp < 0)
683 err = 1;
684 else
685 err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate);
686
687 /* non-iBCS2 extensions.. */
688#endif
689 err |= __put_user(mask, &sc->oldmask);
690#if 0
691 err |= __put_user(current->tss.cr2, &sc->cr2);
692#endif
693 return err;
694}
695
696static int
697restore_sigcontext_ia32 (struct pt_regs *regs, struct sigcontext_ia32 __user *sc, int *peax)
698{
699 unsigned int err = 0;
700
701 /* Always make any pending restarted system calls return -EINTR */
702 current_thread_info()->restart_block.fn = do_no_restart_syscall;
703
704 if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
705 return(-EFAULT);
706
707#define COPY(ia64x, ia32x) err |= __get_user(regs->ia64x, &sc->ia32x)
708
709#define copyseg_gs(tmp) (regs->r16 |= (unsigned long) (tmp) << 48)
710#define copyseg_fs(tmp) (regs->r16 |= (unsigned long) (tmp) << 32)
711#define copyseg_cs(tmp) (regs->r17 |= tmp)
712#define copyseg_ss(tmp) (regs->r17 |= (unsigned long) (tmp) << 16)
713#define copyseg_es(tmp) (regs->r16 |= (unsigned long) (tmp) << 16)
714#define copyseg_ds(tmp) (regs->r16 |= tmp)
715
716#define COPY_SEG(seg) \
717 { \
718 unsigned short tmp; \
719 err |= __get_user(tmp, &sc->seg); \
720 copyseg_##seg(tmp); \
721 }
722#define COPY_SEG_STRICT(seg) \
723 { \
724 unsigned short tmp; \
725 err |= __get_user(tmp, &sc->seg); \
726 copyseg_##seg(tmp|3); \
727 }
728
729 /* To make COPY_SEGs easier, we zero r16, r17 */
730 regs->r16 = 0;
731 regs->r17 = 0;
732
733 COPY_SEG(gs);
734 COPY_SEG(fs);
735 COPY_SEG(es);
736 COPY_SEG(ds);
737 COPY(r15, edi);
738 COPY(r14, esi);
739 COPY(r13, ebp);
740 COPY(r12, esp);
741 COPY(r11, ebx);
742 COPY(r10, edx);
743 COPY(r9, ecx);
744 COPY(cr_iip, eip);
745 COPY_SEG_STRICT(cs);
746 COPY_SEG_STRICT(ss);
747 ia32_load_segment_descriptors(current);
748 {
749 unsigned int tmpflags;
750 unsigned long flag;
751
752 /*
753 * IA32 `eflags' is not part of `pt_regs', it's in an ar register which
754 * is part of the thread context. Fortunately, we are executing in the
755 * IA32 process's context.
756 */
757 err |= __get_user(tmpflags, &sc->eflags);
758 flag = ia64_getreg(_IA64_REG_AR_EFLAG);
759 flag &= ~0x40DD5;
760 flag |= (tmpflags & 0x40DD5);
761 ia64_setreg(_IA64_REG_AR_EFLAG, flag);
762
763 regs->r1 = -1; /* disable syscall checks, r1 is orig_eax */
764 }
765
766 {
767 struct _fpstate_ia32 __user *buf = NULL;
768 u32 fpstate_ptr;
769 err |= get_user(fpstate_ptr, &(sc->fpstate));
770 buf = compat_ptr(fpstate_ptr);
771 if (buf) {
772 err |= restore_ia32_fpstate_live(buf);
773 }
774 }
775
776#if 0
777 {
778 struct _fpstate * buf;
779 err |= __get_user(buf, &sc->fpstate);
780 if (buf) {
781 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
782 goto badframe;
783 err |= restore_i387(buf);
784 }
785 }
786#endif
787
788 err |= __get_user(*peax, &sc->eax);
789 return err;
790
791#if 0
792 badframe:
793 return 1;
794#endif
795}
796
797/*
798 * Determine which stack to use..
799 */
800static inline void __user *
801get_sigframe (struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size)
802{
803 unsigned long esp;
804
805 /* Default to using normal stack (truncate off sign-extension of bit 31: */
806 esp = (unsigned int) regs->r12;
807
808 /* This is the X/Open sanctioned signal stack switching. */
809 if (ka->sa.sa_flags & SA_ONSTACK) {
810 if (!on_sig_stack(esp))
811 esp = current->sas_ss_sp + current->sas_ss_size;
812 }
813 /* Legacy stack switching not supported */
814
815 return (void __user *)((esp - frame_size) & -8ul);
816}
817
818static int
819setup_frame_ia32 (int sig, struct k_sigaction *ka, sigset_t *set, struct pt_regs * regs)
820{
821 struct exec_domain *ed = current_thread_info()->exec_domain;
822 struct sigframe_ia32 __user *frame;
823 int err = 0;
824
825 frame = get_sigframe(ka, regs, sizeof(*frame));
826
827 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
828 goto give_sigsegv;
829
830 err |= __put_user((ed && ed->signal_invmap && sig < 32
831 ? (int)(ed->signal_invmap[sig]) : sig), &frame->sig);
832
833 err |= setup_sigcontext_ia32(&frame->sc, &frame->fpstate, regs, set->sig[0]);
834
835 if (_COMPAT_NSIG_WORDS > 1)
836 err |= __copy_to_user(frame->extramask, (char *) &set->sig + 4,
837 sizeof(frame->extramask));
838
839 /* Set up to return from userspace. If provided, use a stub
840 already in userspace. */
841 if (ka->sa.sa_flags & SA_RESTORER) {
842 unsigned int restorer = IA32_SA_RESTORER(ka);
843 err |= __put_user(restorer, &frame->pretcode);
844 } else {
845 /* Pointing to restorer in ia32 gate page */
846 err |= __put_user(IA32_GATE_OFFSET, &frame->pretcode);
847 }
848
849 /* This is popl %eax ; movl $,%eax ; int $0x80
850 * and there for historical reasons only.
851 * See arch/i386/kernel/signal.c
852 */
853
854 err |= __put_user(0xb858, (short __user *)(frame->retcode+0));
855 err |= __put_user(__IA32_NR_sigreturn, (int __user *)(frame->retcode+2));
856 err |= __put_user(0x80cd, (short __user *)(frame->retcode+6));
857
858 if (err)
859 goto give_sigsegv;
860
861 /* Set up registers for signal handler */
862 regs->r12 = (unsigned long) frame;
863 regs->cr_iip = IA32_SA_HANDLER(ka);
864
865 set_fs(USER_DS);
866
867#if 0
868 regs->eflags &= ~TF_MASK;
869#endif
870
871#if 0
872 printk("SIG deliver (%s:%d): sig=%d sp=%p pc=%lx ra=%x\n",
873 current->comm, current->pid, sig, (void *) frame, regs->cr_iip, frame->pretcode);
874#endif
875
876 return 1;
877
878 give_sigsegv:
879 force_sigsegv(sig, current);
880 return 0;
881}
882
883static int
884setup_rt_frame_ia32 (int sig, struct k_sigaction *ka, siginfo_t *info,
885 sigset_t *set, struct pt_regs * regs)
886{
887 struct exec_domain *ed = current_thread_info()->exec_domain;
888 compat_uptr_t pinfo, puc;
889 struct rt_sigframe_ia32 __user *frame;
890 int err = 0;
891
892 frame = get_sigframe(ka, regs, sizeof(*frame));
893
894 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
895 goto give_sigsegv;
896
897 err |= __put_user((ed && ed->signal_invmap
898 && sig < 32 ? ed->signal_invmap[sig] : sig), &frame->sig);
899
900 pinfo = (long __user) &frame->info;
901 puc = (long __user) &frame->uc;
902 err |= __put_user(pinfo, &frame->pinfo);
903 err |= __put_user(puc, &frame->puc);
904 err |= copy_siginfo_to_user32(&frame->info, info);
905
906 /* Create the ucontext. */
907 err |= __put_user(0, &frame->uc.uc_flags);
908 err |= __put_user(0, &frame->uc.uc_link);
909 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
910 err |= __put_user(sas_ss_flags(regs->r12), &frame->uc.uc_stack.ss_flags);
911 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
912 err |= setup_sigcontext_ia32(&frame->uc.uc_mcontext, &frame->fpstate, regs, set->sig[0]);
913 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
914 if (err)
915 goto give_sigsegv;
916
917 /* Set up to return from userspace. If provided, use a stub
918 already in userspace. */
919 if (ka->sa.sa_flags & SA_RESTORER) {
920 unsigned int restorer = IA32_SA_RESTORER(ka);
921 err |= __put_user(restorer, &frame->pretcode);
922 } else {
923 /* Pointing to rt_restorer in ia32 gate page */
924 err |= __put_user(IA32_GATE_OFFSET + 8, &frame->pretcode);
925 }
926
927 /* This is movl $,%eax ; int $0x80
928 * and there for historical reasons only.
929 * See arch/i386/kernel/signal.c
930 */
931
932 err |= __put_user(0xb8, (char __user *)(frame->retcode+0));
933 err |= __put_user(__IA32_NR_rt_sigreturn, (int __user *)(frame->retcode+1));
934 err |= __put_user(0x80cd, (short __user *)(frame->retcode+5));
935
936 if (err)
937 goto give_sigsegv;
938
939 /* Set up registers for signal handler */
940 regs->r12 = (unsigned long) frame;
941 regs->cr_iip = IA32_SA_HANDLER(ka);
942
943 set_fs(USER_DS);
944
945#if 0
946 regs->eflags &= ~TF_MASK;
947#endif
948
949#if 0
950 printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%x\n",
951 current->comm, current->pid, (void *) frame, regs->cr_iip, frame->pretcode);
952#endif
953
954 return 1;
955
956give_sigsegv:
957 force_sigsegv(sig, current);
958 return 0;
959}
960
961int
962ia32_setup_frame1 (int sig, struct k_sigaction *ka, siginfo_t *info,
963 sigset_t *set, struct pt_regs *regs)
964{
965 /* Set up the stack frame */
966 if (ka->sa.sa_flags & SA_SIGINFO)
967 return setup_rt_frame_ia32(sig, ka, info, set, regs);
968 else
969 return setup_frame_ia32(sig, ka, set, regs);
970}
971
972asmlinkage long
973sys32_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4, int arg5,
974 int arg6, int arg7, struct pt_regs regs)
975{
976 unsigned long esp = (unsigned int) regs.r12;
977 struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(esp - 8);
978 sigset_t set;
979 int eax;
980
981 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
982 goto badframe;
983
984 if (__get_user(set.sig[0], &frame->sc.oldmask)
985 || (_COMPAT_NSIG_WORDS > 1 && __copy_from_user((char *) &set.sig + 4, &frame->extramask,
986 sizeof(frame->extramask))))
987 goto badframe;
988
989 sigdelsetmask(&set, ~_BLOCKABLE);
990 spin_lock_irq(&current->sighand->siglock);
991 current->blocked = set;
992 recalc_sigpending();
993 spin_unlock_irq(&current->sighand->siglock);
994
995 if (restore_sigcontext_ia32(&regs, &frame->sc, &eax))
996 goto badframe;
997 return eax;
998
999 badframe:
1000 force_sig(SIGSEGV, current);
1001 return 0;
1002}
1003
1004asmlinkage long
1005sys32_rt_sigreturn (int arg0, int arg1, int arg2, int arg3, int arg4,
1006 int arg5, int arg6, int arg7, struct pt_regs regs)
1007{
1008 unsigned long esp = (unsigned int) regs.r12;
1009 struct rt_sigframe_ia32 __user *frame = (struct rt_sigframe_ia32 __user *)(esp - 4);
1010 sigset_t set;
1011 int eax;
1012
1013 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
1014 goto badframe;
1015 if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
1016 goto badframe;
1017
1018 sigdelsetmask(&set, ~_BLOCKABLE);
1019 spin_lock_irq(&current->sighand->siglock);
1020 current->blocked = set;
1021 recalc_sigpending();
1022 spin_unlock_irq(&current->sighand->siglock);
1023
1024 if (restore_sigcontext_ia32(&regs, &frame->uc.uc_mcontext, &eax))
1025 goto badframe;
1026
1027 /* It is more difficult to avoid calling this function than to
1028 call it and ignore errors. */
1029 do_sigaltstack((stack_t __user *) &frame->uc.uc_stack, NULL, esp);
1030
1031 return eax;
1032
1033 badframe:
1034 force_sig(SIGSEGV, current);
1035 return 0;
1036}
diff --git a/arch/ia64/ia32/ia32_support.c b/arch/ia64/ia32/ia32_support.c
new file mode 100644
index 000000000000..4f630043b3ae
--- /dev/null
+++ b/arch/ia64/ia32/ia32_support.c
@@ -0,0 +1,264 @@
1/*
2 * IA32 helper functions
3 *
4 * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
5 * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com>
6 * Copyright (C) 2001-2002 Hewlett-Packard Co
7 * David Mosberger-Tang <davidm@hpl.hp.com>
8 *
9 * 06/16/00 A. Mallick added csd/ssd/tssd for ia32 thread context
10 * 02/19/01 D. Mosberger dropped tssd; it's not needed
11 * 09/14/01 D. Mosberger fixed memory management for gdt/tss page
12 * 09/29/01 D. Mosberger added ia32_load_segment_descriptors()
13 */
14
15#include <linux/kernel.h>
16#include <linux/init.h>
17#include <linux/mm.h>
18#include <linux/personality.h>
19#include <linux/sched.h>
20
21#include <asm/intrinsics.h>
22#include <asm/page.h>
23#include <asm/pgtable.h>
24#include <asm/system.h>
25#include <asm/processor.h>
26#include <asm/uaccess.h>
27
28#include "ia32priv.h"
29
30extern void die_if_kernel (char *str, struct pt_regs *regs, long err);
31
32struct exec_domain ia32_exec_domain;
33struct page *ia32_shared_page[NR_CPUS];
34unsigned long *ia32_boot_gdt;
35unsigned long *cpu_gdt_table[NR_CPUS];
36struct page *ia32_gate_page;
37
38static unsigned long
39load_desc (u16 selector)
40{
41 unsigned long *table, limit, index;
42
43 if (!selector)
44 return 0;
45 if (selector & IA32_SEGSEL_TI) {
46 table = (unsigned long *) IA32_LDT_OFFSET;
47 limit = IA32_LDT_ENTRIES;
48 } else {
49 table = cpu_gdt_table[smp_processor_id()];
50 limit = IA32_PAGE_SIZE / sizeof(ia32_boot_gdt[0]);
51 }
52 index = selector >> IA32_SEGSEL_INDEX_SHIFT;
53 if (index >= limit)
54 return 0;
55 return IA32_SEG_UNSCRAMBLE(table[index]);
56}
57
58void
59ia32_load_segment_descriptors (struct task_struct *task)
60{
61 struct pt_regs *regs = ia64_task_regs(task);
62
63 /* Setup the segment descriptors */
64 regs->r24 = load_desc(regs->r16 >> 16); /* ESD */
65 regs->r27 = load_desc(regs->r16 >> 0); /* DSD */
66 regs->r28 = load_desc(regs->r16 >> 32); /* FSD */
67 regs->r29 = load_desc(regs->r16 >> 48); /* GSD */
68 regs->ar_csd = load_desc(regs->r17 >> 0); /* CSD */
69 regs->ar_ssd = load_desc(regs->r17 >> 16); /* SSD */
70}
71
72int
73ia32_clone_tls (struct task_struct *child, struct pt_regs *childregs)
74{
75 struct desc_struct *desc;
76 struct ia32_user_desc info;
77 int idx;
78
79 if (copy_from_user(&info, (void __user *)(childregs->r14 & 0xffffffff), sizeof(info)))
80 return -EFAULT;
81 if (LDT_empty(&info))
82 return -EINVAL;
83
84 idx = info.entry_number;
85 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
86 return -EINVAL;
87
88 desc = child->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
89 desc->a = LDT_entry_a(&info);
90 desc->b = LDT_entry_b(&info);
91
92 /* XXX: can this be done in a cleaner way ? */
93 load_TLS(&child->thread, smp_processor_id());
94 ia32_load_segment_descriptors(child);
95 load_TLS(&current->thread, smp_processor_id());
96
97 return 0;
98}
99
100void
101ia32_save_state (struct task_struct *t)
102{
103 t->thread.eflag = ia64_getreg(_IA64_REG_AR_EFLAG);
104 t->thread.fsr = ia64_getreg(_IA64_REG_AR_FSR);
105 t->thread.fcr = ia64_getreg(_IA64_REG_AR_FCR);
106 t->thread.fir = ia64_getreg(_IA64_REG_AR_FIR);
107 t->thread.fdr = ia64_getreg(_IA64_REG_AR_FDR);
108 ia64_set_kr(IA64_KR_IO_BASE, t->thread.old_iob);
109 ia64_set_kr(IA64_KR_TSSD, t->thread.old_k1);
110}
111
112void
113ia32_load_state (struct task_struct *t)
114{
115 unsigned long eflag, fsr, fcr, fir, fdr, tssd;
116 struct pt_regs *regs = ia64_task_regs(t);
117
118 eflag = t->thread.eflag;
119 fsr = t->thread.fsr;
120 fcr = t->thread.fcr;
121 fir = t->thread.fir;
122 fdr = t->thread.fdr;
123 tssd = load_desc(_TSS); /* TSSD */
124
125 ia64_setreg(_IA64_REG_AR_EFLAG, eflag);
126 ia64_setreg(_IA64_REG_AR_FSR, fsr);
127 ia64_setreg(_IA64_REG_AR_FCR, fcr);
128 ia64_setreg(_IA64_REG_AR_FIR, fir);
129 ia64_setreg(_IA64_REG_AR_FDR, fdr);
130 current->thread.old_iob = ia64_get_kr(IA64_KR_IO_BASE);
131 current->thread.old_k1 = ia64_get_kr(IA64_KR_TSSD);
132 ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
133 ia64_set_kr(IA64_KR_TSSD, tssd);
134
135 regs->r17 = (_TSS << 48) | (_LDT << 32) | (__u32) regs->r17;
136 regs->r30 = load_desc(_LDT); /* LDTD */
137 load_TLS(&t->thread, smp_processor_id());
138}
139
140/*
141 * Setup IA32 GDT and TSS
142 */
143void
144ia32_gdt_init (void)
145{
146 int cpu = smp_processor_id();
147
148 ia32_shared_page[cpu] = alloc_page(GFP_KERNEL);
149 if (!ia32_shared_page[cpu])
150 panic("failed to allocate ia32_shared_page[%d]\n", cpu);
151
152 cpu_gdt_table[cpu] = page_address(ia32_shared_page[cpu]);
153
154 /* Copy from the boot cpu's GDT */
155 memcpy(cpu_gdt_table[cpu], ia32_boot_gdt, PAGE_SIZE);
156}
157
158
159/*
160 * Setup IA32 GDT and TSS
161 */
162static void
163ia32_boot_gdt_init (void)
164{
165 unsigned long ldt_size;
166
167 ia32_shared_page[0] = alloc_page(GFP_KERNEL);
168 if (!ia32_shared_page[0])
169 panic("failed to allocate ia32_shared_page[0]\n");
170
171 ia32_boot_gdt = page_address(ia32_shared_page[0]);
172 cpu_gdt_table[0] = ia32_boot_gdt;
173
174 /* CS descriptor in IA-32 (scrambled) format */
175 ia32_boot_gdt[__USER_CS >> 3]
176 = IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT,
177 0xb, 1, 3, 1, 1, 1, 1);
178
179 /* DS descriptor in IA-32 (scrambled) format */
180 ia32_boot_gdt[__USER_DS >> 3]
181 = IA32_SEG_DESCRIPTOR(0, (IA32_GATE_END-1) >> IA32_PAGE_SHIFT,
182 0x3, 1, 3, 1, 1, 1, 1);
183
184 ldt_size = PAGE_ALIGN(IA32_LDT_ENTRIES*IA32_LDT_ENTRY_SIZE);
185 ia32_boot_gdt[TSS_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_TSS_OFFSET, 235,
186 0xb, 0, 3, 1, 1, 1, 0);
187 ia32_boot_gdt[LDT_ENTRY] = IA32_SEG_DESCRIPTOR(IA32_LDT_OFFSET, ldt_size - 1,
188 0x2, 0, 3, 1, 1, 1, 0);
189}
190
191static void
192ia32_gate_page_init(void)
193{
194 unsigned long *sr;
195
196 ia32_gate_page = alloc_page(GFP_KERNEL);
197 sr = page_address(ia32_gate_page);
198 /* This is popl %eax ; movl $,%eax ; int $0x80 */
199 *sr++ = 0xb858 | (__IA32_NR_sigreturn << 16) | (0x80cdUL << 48);
200
201 /* This is movl $,%eax ; int $0x80 */
202 *sr = 0xb8 | (__IA32_NR_rt_sigreturn << 8) | (0x80cdUL << 40);
203}
204
205void
206ia32_mem_init(void)
207{
208 ia32_boot_gdt_init();
209 ia32_gate_page_init();
210}
211
212/*
213 * Handle bad IA32 interrupt via syscall
214 */
215void
216ia32_bad_interrupt (unsigned long int_num, struct pt_regs *regs)
217{
218 siginfo_t siginfo;
219
220 die_if_kernel("Bad IA-32 interrupt", regs, int_num);
221
222 siginfo.si_signo = SIGTRAP;
223 siginfo.si_errno = int_num; /* XXX is it OK to abuse si_errno like this? */
224 siginfo.si_flags = 0;
225 siginfo.si_isr = 0;
226 siginfo.si_addr = NULL;
227 siginfo.si_imm = 0;
228 siginfo.si_code = TRAP_BRKPT;
229 force_sig_info(SIGTRAP, &siginfo, current);
230}
231
232void
233ia32_cpu_init (void)
234{
235 /* initialize global ia32 state - CR0 and CR4 */
236 ia64_setreg(_IA64_REG_AR_CFLAG, (((ulong) IA32_CR4 << 32) | IA32_CR0));
237}
238
239static int __init
240ia32_init (void)
241{
242 ia32_exec_domain.name = "Linux/x86";
243 ia32_exec_domain.handler = NULL;
244 ia32_exec_domain.pers_low = PER_LINUX32;
245 ia32_exec_domain.pers_high = PER_LINUX32;
246 ia32_exec_domain.signal_map = default_exec_domain.signal_map;
247 ia32_exec_domain.signal_invmap = default_exec_domain.signal_invmap;
248 register_exec_domain(&ia32_exec_domain);
249
250#if PAGE_SHIFT > IA32_PAGE_SHIFT
251 {
252 extern kmem_cache_t *partial_page_cachep;
253
254 partial_page_cachep = kmem_cache_create("partial_page_cache",
255 sizeof(struct partial_page), 0, 0,
256 NULL, NULL);
257 if (!partial_page_cachep)
258 panic("Cannot create partial page SLAB cache");
259 }
260#endif
261 return 0;
262}
263
264__initcall(ia32_init);
diff --git a/arch/ia64/ia32/ia32_traps.c b/arch/ia64/ia32/ia32_traps.c
new file mode 100644
index 000000000000..e486042672f1
--- /dev/null
+++ b/arch/ia64/ia32/ia32_traps.c
@@ -0,0 +1,156 @@
1/*
2 * IA-32 exception handlers
3 *
4 * Copyright (C) 2000 Asit K. Mallick <asit.k.mallick@intel.com>
5 * Copyright (C) 2001-2002 Hewlett-Packard Co
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 *
8 * 06/16/00 A. Mallick added siginfo for most cases (close to IA32)
9 * 09/29/00 D. Mosberger added ia32_intercept()
10 */
11
12#include <linux/kernel.h>
13#include <linux/sched.h>
14
15#include "ia32priv.h"
16
17#include <asm/intrinsics.h>
18#include <asm/ptrace.h>
19
20int
21ia32_intercept (struct pt_regs *regs, unsigned long isr)
22{
23 switch ((isr >> 16) & 0xff) {
24 case 0: /* Instruction intercept fault */
25 case 4: /* Locked Data reference fault */
26 case 1: /* Gate intercept trap */
27 return -1;
28
29 case 2: /* System flag trap */
30 if (((isr >> 14) & 0x3) >= 2) {
31 /* MOV SS, POP SS instructions */
32 ia64_psr(regs)->id = 1;
33 return 0;
34 } else
35 return -1;
36 }
37 return -1;
38}
39
40int
41ia32_exception (struct pt_regs *regs, unsigned long isr)
42{
43 struct siginfo siginfo;
44
45 /* initialize these fields to avoid leaking kernel bits to user space: */
46 siginfo.si_errno = 0;
47 siginfo.si_flags = 0;
48 siginfo.si_isr = 0;
49 siginfo.si_imm = 0;
50 switch ((isr >> 16) & 0xff) {
51 case 1:
52 case 2:
53 siginfo.si_signo = SIGTRAP;
54 if (isr == 0)
55 siginfo.si_code = TRAP_TRACE;
56 else if (isr & 0x4)
57 siginfo.si_code = TRAP_BRANCH;
58 else
59 siginfo.si_code = TRAP_BRKPT;
60 break;
61
62 case 3:
63 siginfo.si_signo = SIGTRAP;
64 siginfo.si_code = TRAP_BRKPT;
65 break;
66
67 case 0: /* Divide fault */
68 siginfo.si_signo = SIGFPE;
69 siginfo.si_code = FPE_INTDIV;
70 break;
71
72 case 4: /* Overflow */
73 case 5: /* Bounds fault */
74 siginfo.si_signo = SIGFPE;
75 siginfo.si_code = 0;
76 break;
77
78 case 6: /* Invalid Op-code */
79 siginfo.si_signo = SIGILL;
80 siginfo.si_code = ILL_ILLOPN;
81 break;
82
83 case 7: /* FP DNA */
84 case 8: /* Double Fault */
85 case 9: /* Invalid TSS */
86 case 11: /* Segment not present */
87 case 12: /* Stack fault */
88 case 13: /* General Protection Fault */
89 siginfo.si_signo = SIGSEGV;
90 siginfo.si_code = 0;
91 break;
92
93 case 16: /* Pending FP error */
94 {
95 unsigned long fsr, fcr;
96
97 fsr = ia64_getreg(_IA64_REG_AR_FSR);
98 fcr = ia64_getreg(_IA64_REG_AR_FCR);
99
100 siginfo.si_signo = SIGFPE;
101 /*
102 * (~cwd & swd) will mask out exceptions that are not set to unmasked
103 * status. 0x3f is the exception bits in these regs, 0x200 is the
104 * C1 reg you need in case of a stack fault, 0x040 is the stack
105 * fault bit. We should only be taking one exception at a time,
106 * so if this combination doesn't produce any single exception,
107 * then we have a bad program that isn't synchronizing its FPU usage
108 * and it will suffer the consequences since we won't be able to
109 * fully reproduce the context of the exception
110 */
111 siginfo.si_isr = isr;
112 siginfo.si_flags = __ISR_VALID;
113 switch(((~fcr) & (fsr & 0x3f)) | (fsr & 0x240)) {
114 case 0x000:
115 default:
116 siginfo.si_code = 0;
117 break;
118 case 0x001: /* Invalid Op */
119 case 0x040: /* Stack Fault */
120 case 0x240: /* Stack Fault | Direction */
121 siginfo.si_code = FPE_FLTINV;
122 break;
123 case 0x002: /* Denormalize */
124 case 0x010: /* Underflow */
125 siginfo.si_code = FPE_FLTUND;
126 break;
127 case 0x004: /* Zero Divide */
128 siginfo.si_code = FPE_FLTDIV;
129 break;
130 case 0x008: /* Overflow */
131 siginfo.si_code = FPE_FLTOVF;
132 break;
133 case 0x020: /* Precision */
134 siginfo.si_code = FPE_FLTRES;
135 break;
136 }
137
138 break;
139 }
140
141 case 17: /* Alignment check */
142 siginfo.si_signo = SIGSEGV;
143 siginfo.si_code = BUS_ADRALN;
144 break;
145
146 case 19: /* SSE Numeric error */
147 siginfo.si_signo = SIGFPE;
148 siginfo.si_code = 0;
149 break;
150
151 default:
152 return -1;
153 }
154 force_sig_info(siginfo.si_signo, &siginfo, current);
155 return 0;
156}
diff --git a/arch/ia64/ia32/ia32priv.h b/arch/ia64/ia32/ia32priv.h
new file mode 100644
index 000000000000..b2de948bdaea
--- /dev/null
+++ b/arch/ia64/ia32/ia32priv.h
@@ -0,0 +1,544 @@
1#ifndef _ASM_IA64_IA32_PRIV_H
2#define _ASM_IA64_IA32_PRIV_H
3
4#include <linux/config.h>
5
6#include <asm/ia32.h>
7
8#ifdef CONFIG_IA32_SUPPORT
9
10#include <linux/binfmts.h>
11#include <linux/compat.h>
12#include <linux/rbtree.h>
13
14#include <asm/processor.h>
15
16/*
17 * 32 bit structures for IA32 support.
18 */
19
20#define IA32_PAGE_SIZE (1UL << IA32_PAGE_SHIFT)
21#define IA32_PAGE_MASK (~(IA32_PAGE_SIZE - 1))
22#define IA32_PAGE_ALIGN(addr) (((addr) + IA32_PAGE_SIZE - 1) & IA32_PAGE_MASK)
23#define IA32_CLOCKS_PER_SEC 100 /* Cast in stone for IA32 Linux */
24
25/*
26 * partially mapped pages provide precise accounting of which 4k sub pages
27 * are mapped and which ones are not, thereby improving IA-32 compatibility.
28 */
29struct partial_page {
30 struct partial_page *next; /* linked list, sorted by address */
31 struct rb_node pp_rb;
32 /* 64K is the largest "normal" page supported by ia64 ABI. So 4K*32
33 * should suffice.*/
34 unsigned int bitmap;
35 unsigned int base;
36};
37
38struct partial_page_list {
39 struct partial_page *pp_head; /* list head, points to the lowest
40 * addressed partial page */
41 struct rb_root ppl_rb;
42 struct partial_page *pp_hint; /* pp_hint->next is the last
43 * accessed partial page */
44 atomic_t pp_count; /* reference count */
45};
46
47#if PAGE_SHIFT > IA32_PAGE_SHIFT
48struct partial_page_list* ia32_init_pp_list (void);
49#else
50# define ia32_init_pp_list() 0
51#endif
52
53/* sigcontext.h */
54/*
55 * As documented in the iBCS2 standard..
56 *
57 * The first part of "struct _fpstate" is just the
58 * normal i387 hardware setup, the extra "status"
59 * word is used to save the coprocessor status word
60 * before entering the handler.
61 */
62struct _fpreg_ia32 {
63 unsigned short significand[4];
64 unsigned short exponent;
65};
66
67struct _fpxreg_ia32 {
68 unsigned short significand[4];
69 unsigned short exponent;
70 unsigned short padding[3];
71};
72
73struct _xmmreg_ia32 {
74 unsigned int element[4];
75};
76
77
78struct _fpstate_ia32 {
79 unsigned int cw,
80 sw,
81 tag,
82 ipoff,
83 cssel,
84 dataoff,
85 datasel;
86 struct _fpreg_ia32 _st[8];
87 unsigned short status;
88 unsigned short magic; /* 0xffff = regular FPU data only */
89
90 /* FXSR FPU environment */
91 unsigned int _fxsr_env[6]; /* FXSR FPU env is ignored */
92 unsigned int mxcsr;
93 unsigned int reserved;
94 struct _fpxreg_ia32 _fxsr_st[8]; /* FXSR FPU reg data is ignored */
95 struct _xmmreg_ia32 _xmm[8];
96 unsigned int padding[56];
97};
98
99struct sigcontext_ia32 {
100 unsigned short gs, __gsh;
101 unsigned short fs, __fsh;
102 unsigned short es, __esh;
103 unsigned short ds, __dsh;
104 unsigned int edi;
105 unsigned int esi;
106 unsigned int ebp;
107 unsigned int esp;
108 unsigned int ebx;
109 unsigned int edx;
110 unsigned int ecx;
111 unsigned int eax;
112 unsigned int trapno;
113 unsigned int err;
114 unsigned int eip;
115 unsigned short cs, __csh;
116 unsigned int eflags;
117 unsigned int esp_at_signal;
118 unsigned short ss, __ssh;
119 unsigned int fpstate; /* really (struct _fpstate_ia32 *) */
120 unsigned int oldmask;
121 unsigned int cr2;
122};
123
124/* user.h */
125/*
126 * IA32 (Pentium III/4) FXSR, SSE support
127 *
128 * Provide support for the GDB 5.0+ PTRACE_{GET|SET}FPXREGS requests for
129 * interacting with the FXSR-format floating point environment. Floating
130 * point data can be accessed in the regular format in the usual manner,
131 * and both the standard and SIMD floating point data can be accessed via
132 * the new ptrace requests. In either case, changes to the FPU environment
133 * will be reflected in the task's state as expected.
134 */
135struct ia32_user_i387_struct {
136 int cwd;
137 int swd;
138 int twd;
139 int fip;
140 int fcs;
141 int foo;
142 int fos;
143 /* 8*10 bytes for each FP-reg = 80 bytes */
144 struct _fpreg_ia32 st_space[8];
145};
146
147struct ia32_user_fxsr_struct {
148 unsigned short cwd;
149 unsigned short swd;
150 unsigned short twd;
151 unsigned short fop;
152 int fip;
153 int fcs;
154 int foo;
155 int fos;
156 int mxcsr;
157 int reserved;
158 int st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
159 int xmm_space[32]; /* 8*16 bytes for each XMM-reg = 128 bytes */
160 int padding[56];
161};
162
163/* signal.h */
164#define IA32_SET_SA_HANDLER(ka,handler,restorer) \
165 ((ka)->sa.sa_handler = (__sighandler_t) \
166 (((unsigned long)(restorer) << 32) \
167 | ((handler) & 0xffffffff)))
168#define IA32_SA_HANDLER(ka) ((unsigned long) (ka)->sa.sa_handler & 0xffffffff)
169#define IA32_SA_RESTORER(ka) ((unsigned long) (ka)->sa.sa_handler >> 32)
170
171#define __IA32_NR_sigreturn 119
172#define __IA32_NR_rt_sigreturn 173
173
174struct sigaction32 {
175 unsigned int sa_handler; /* Really a pointer, but need to deal with 32 bits */
176 unsigned int sa_flags;
177 unsigned int sa_restorer; /* Another 32 bit pointer */
178 compat_sigset_t sa_mask; /* A 32 bit mask */
179};
180
181struct old_sigaction32 {
182 unsigned int sa_handler; /* Really a pointer, but need to deal
183 with 32 bits */
184 compat_old_sigset_t sa_mask; /* A 32 bit mask */
185 unsigned int sa_flags;
186 unsigned int sa_restorer; /* Another 32 bit pointer */
187};
188
189typedef struct sigaltstack_ia32 {
190 unsigned int ss_sp;
191 int ss_flags;
192 unsigned int ss_size;
193} stack_ia32_t;
194
195struct ucontext_ia32 {
196 unsigned int uc_flags;
197 unsigned int uc_link;
198 stack_ia32_t uc_stack;
199 struct sigcontext_ia32 uc_mcontext;
200 sigset_t uc_sigmask; /* mask last for extensibility */
201};
202
203struct stat64 {
204 unsigned long long st_dev;
205 unsigned char __pad0[4];
206 unsigned int __st_ino;
207 unsigned int st_mode;
208 unsigned int st_nlink;
209 unsigned int st_uid;
210 unsigned int st_gid;
211 unsigned long long st_rdev;
212 unsigned char __pad3[4];
213 unsigned int st_size_lo;
214 unsigned int st_size_hi;
215 unsigned int st_blksize;
216 unsigned int st_blocks; /* Number 512-byte blocks allocated. */
217 unsigned int __pad4; /* future possible st_blocks high bits */
218 unsigned int st_atime;
219 unsigned int st_atime_nsec;
220 unsigned int st_mtime;
221 unsigned int st_mtime_nsec;
222 unsigned int st_ctime;
223 unsigned int st_ctime_nsec;
224 unsigned int st_ino_lo;
225 unsigned int st_ino_hi;
226};
227
228typedef struct compat_siginfo {
229 int si_signo;
230 int si_errno;
231 int si_code;
232
233 union {
234 int _pad[((128/sizeof(int)) - 3)];
235
236 /* kill() */
237 struct {
238 unsigned int _pid; /* sender's pid */
239 unsigned int _uid; /* sender's uid */
240 } _kill;
241
242 /* POSIX.1b timers */
243 struct {
244 timer_t _tid; /* timer id */
245 int _overrun; /* overrun count */
246 char _pad[sizeof(unsigned int) - sizeof(int)];
247 compat_sigval_t _sigval; /* same as below */
248 int _sys_private; /* not to be passed to user */
249 } _timer;
250
251 /* POSIX.1b signals */
252 struct {
253 unsigned int _pid; /* sender's pid */
254 unsigned int _uid; /* sender's uid */
255 compat_sigval_t _sigval;
256 } _rt;
257
258 /* SIGCHLD */
259 struct {
260 unsigned int _pid; /* which child */
261 unsigned int _uid; /* sender's uid */
262 int _status; /* exit code */
263 compat_clock_t _utime;
264 compat_clock_t _stime;
265 } _sigchld;
266
267 /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
268 struct {
269 unsigned int _addr; /* faulting insn/memory ref. */
270 } _sigfault;
271
272 /* SIGPOLL */
273 struct {
274 int _band; /* POLL_IN, POLL_OUT, POLL_MSG */
275 int _fd;
276 } _sigpoll;
277 } _sifields;
278} compat_siginfo_t;
279
280struct old_linux32_dirent {
281 u32 d_ino;
282 u32 d_offset;
283 u16 d_namlen;
284 char d_name[1];
285};
286
287/*
288 * IA-32 ELF specific definitions for IA-64.
289 */
290
291#define _ASM_IA64_ELF_H /* Don't include elf.h */
292
293#include <linux/sched.h>
294#include <asm/processor.h>
295
296/*
297 * This is used to ensure we don't load something for the wrong architecture.
298 */
299#define elf_check_arch(x) ((x)->e_machine == EM_386)
300
301/*
302 * These are used to set parameters in the core dumps.
303 */
304#define ELF_CLASS ELFCLASS32
305#define ELF_DATA ELFDATA2LSB
306#define ELF_ARCH EM_386
307
308#define IA32_PAGE_OFFSET 0xc0000000
309#define IA32_STACK_TOP IA32_PAGE_OFFSET
310#define IA32_GATE_OFFSET IA32_PAGE_OFFSET
311#define IA32_GATE_END IA32_PAGE_OFFSET + PAGE_SIZE
312
313/*
314 * The system segments (GDT, TSS, LDT) have to be mapped below 4GB so the IA-32 engine can
315 * access them.
316 */
317#define IA32_GDT_OFFSET (IA32_PAGE_OFFSET + PAGE_SIZE)
318#define IA32_TSS_OFFSET (IA32_PAGE_OFFSET + 2*PAGE_SIZE)
319#define IA32_LDT_OFFSET (IA32_PAGE_OFFSET + 3*PAGE_SIZE)
320
321#define ELF_EXEC_PAGESIZE IA32_PAGE_SIZE
322
323/*
324 * This is the location that an ET_DYN program is loaded if exec'ed.
325 * Typical use of this is to invoke "./ld.so someprog" to test out a
326 * new version of the loader. We need to make sure that it is out of
327 * the way of the program that it will "exec", and that there is
328 * sufficient room for the brk.
329 */
330#define ELF_ET_DYN_BASE (IA32_PAGE_OFFSET/3 + 0x1000000)
331
332void ia64_elf32_init(struct pt_regs *regs);
333#define ELF_PLAT_INIT(_r, load_addr) ia64_elf32_init(_r)
334
335#define elf_addr_t u32
336
337/* This macro yields a bitmask that programs can use to figure out
338 what instruction set this CPU supports. */
339#define ELF_HWCAP 0
340
341/* This macro yields a string that ld.so will use to load
342 implementation specific libraries for optimization. Not terribly
343 relevant until we have real hardware to play with... */
344#define ELF_PLATFORM NULL
345
346#ifdef __KERNEL__
347# define SET_PERSONALITY(EX,IBCS2) \
348 (current->personality = (IBCS2) ? PER_SVR4 : PER_LINUX)
349#endif
350
351#define IA32_EFLAG 0x200
352
353/*
354 * IA-32 ELF specific definitions for IA-64.
355 */
356
357#define __USER_CS 0x23
358#define __USER_DS 0x2B
359
360/*
361 * The per-cpu GDT has 32 entries: see <asm-i386/segment.h>
362 */
363#define GDT_ENTRIES 32
364
365#define GDT_SIZE (GDT_ENTRIES * 8)
366
367#define TSS_ENTRY 14
368#define LDT_ENTRY (TSS_ENTRY + 1)
369
370#define IA32_SEGSEL_RPL (0x3 << 0)
371#define IA32_SEGSEL_TI (0x1 << 2)
372#define IA32_SEGSEL_INDEX_SHIFT 3
373
374#define _TSS ((unsigned long) TSS_ENTRY << IA32_SEGSEL_INDEX_SHIFT)
375#define _LDT ((unsigned long) LDT_ENTRY << IA32_SEGSEL_INDEX_SHIFT)
376
377#define IA32_SEG_BASE 16
378#define IA32_SEG_TYPE 40
379#define IA32_SEG_SYS 44
380#define IA32_SEG_DPL 45
381#define IA32_SEG_P 47
382#define IA32_SEG_HIGH_LIMIT 48
383#define IA32_SEG_AVL 52
384#define IA32_SEG_DB 54
385#define IA32_SEG_G 55
386#define IA32_SEG_HIGH_BASE 56
387
388#define IA32_SEG_DESCRIPTOR(base, limit, segtype, nonsysseg, dpl, segpresent, avl, segdb, gran) \
389 (((limit) & 0xffff) \
390 | (((unsigned long) (base) & 0xffffff) << IA32_SEG_BASE) \
391 | ((unsigned long) (segtype) << IA32_SEG_TYPE) \
392 | ((unsigned long) (nonsysseg) << IA32_SEG_SYS) \
393 | ((unsigned long) (dpl) << IA32_SEG_DPL) \
394 | ((unsigned long) (segpresent) << IA32_SEG_P) \
395 | ((((unsigned long) (limit) >> 16) & 0xf) << IA32_SEG_HIGH_LIMIT) \
396 | ((unsigned long) (avl) << IA32_SEG_AVL) \
397 | ((unsigned long) (segdb) << IA32_SEG_DB) \
398 | ((unsigned long) (gran) << IA32_SEG_G) \
399 | ((((unsigned long) (base) >> 24) & 0xff) << IA32_SEG_HIGH_BASE))
400
401#define SEG_LIM 32
402#define SEG_TYPE 52
403#define SEG_SYS 56
404#define SEG_DPL 57
405#define SEG_P 59
406#define SEG_AVL 60
407#define SEG_DB 62
408#define SEG_G 63
409
410/* Unscramble an IA-32 segment descriptor into the IA-64 format. */
411#define IA32_SEG_UNSCRAMBLE(sd) \
412 ( (((sd) >> IA32_SEG_BASE) & 0xffffff) | ((((sd) >> IA32_SEG_HIGH_BASE) & 0xff) << 24) \
413 | ((((sd) & 0xffff) | ((((sd) >> IA32_SEG_HIGH_LIMIT) & 0xf) << 16)) << SEG_LIM) \
414 | ((((sd) >> IA32_SEG_TYPE) & 0xf) << SEG_TYPE) \
415 | ((((sd) >> IA32_SEG_SYS) & 0x1) << SEG_SYS) \
416 | ((((sd) >> IA32_SEG_DPL) & 0x3) << SEG_DPL) \
417 | ((((sd) >> IA32_SEG_P) & 0x1) << SEG_P) \
418 | ((((sd) >> IA32_SEG_AVL) & 0x1) << SEG_AVL) \
419 | ((((sd) >> IA32_SEG_DB) & 0x1) << SEG_DB) \
420 | ((((sd) >> IA32_SEG_G) & 0x1) << SEG_G))
421
422#define IA32_IOBASE 0x2000000000000000UL /* Virtual address for I/O space */
423
424#define IA32_CR0 0x80000001 /* Enable PG and PE bits */
425#define IA32_CR4 0x600 /* MMXEX and FXSR on */
426
427/*
428 * IA32 floating point control registers starting values
429 */
430
431#define IA32_FSR_DEFAULT 0x55550000 /* set all tag bits */
432#define IA32_FCR_DEFAULT 0x17800000037fUL /* extended precision, all masks */
433
434#define IA32_PTRACE_GETREGS 12
435#define IA32_PTRACE_SETREGS 13
436#define IA32_PTRACE_GETFPREGS 14
437#define IA32_PTRACE_SETFPREGS 15
438#define IA32_PTRACE_GETFPXREGS 18
439#define IA32_PTRACE_SETFPXREGS 19
440
441#define ia32_start_thread(regs,new_ip,new_sp) do { \
442 set_fs(USER_DS); \
443 ia64_psr(regs)->cpl = 3; /* set user mode */ \
444 ia64_psr(regs)->ri = 0; /* clear return slot number */ \
445 ia64_psr(regs)->is = 1; /* IA-32 instruction set */ \
446 regs->cr_iip = new_ip; \
447 regs->ar_rsc = 0xc; /* enforced lazy mode, priv. level 3 */ \
448 regs->ar_rnat = 0; \
449 regs->loadrs = 0; \
450 regs->r12 = new_sp; \
451} while (0)
452
453/*
454 * Local Descriptor Table (LDT) related declarations.
455 */
456
457#define IA32_LDT_ENTRIES 8192 /* Maximum number of LDT entries supported. */
458#define IA32_LDT_ENTRY_SIZE 8 /* The size of each LDT entry. */
459
460#define LDT_entry_a(info) \
461 ((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
462
463#define LDT_entry_b(info) \
464 (((info)->base_addr & 0xff000000) | \
465 (((info)->base_addr & 0x00ff0000) >> 16) | \
466 ((info)->limit & 0xf0000) | \
467 (((info)->read_exec_only ^ 1) << 9) | \
468 ((info)->contents << 10) | \
469 (((info)->seg_not_present ^ 1) << 15) | \
470 ((info)->seg_32bit << 22) | \
471 ((info)->limit_in_pages << 23) | \
472 ((info)->useable << 20) | \
473 0x7100)
474
475#define LDT_empty(info) ( \
476 (info)->base_addr == 0 && \
477 (info)->limit == 0 && \
478 (info)->contents == 0 && \
479 (info)->read_exec_only == 1 && \
480 (info)->seg_32bit == 0 && \
481 (info)->limit_in_pages == 0 && \
482 (info)->seg_not_present == 1 && \
483 (info)->useable == 0 )
484
485static inline void
486load_TLS (struct thread_struct *t, unsigned int cpu)
487{
488 extern unsigned long *cpu_gdt_table[NR_CPUS];
489
490 memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 0, &t->tls_array[0], sizeof(long));
491 memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 1, &t->tls_array[1], sizeof(long));
492 memcpy(cpu_gdt_table[cpu] + GDT_ENTRY_TLS_MIN + 2, &t->tls_array[2], sizeof(long));
493}
494
495struct ia32_user_desc {
496 unsigned int entry_number;
497 unsigned int base_addr;
498 unsigned int limit;
499 unsigned int seg_32bit:1;
500 unsigned int contents:2;
501 unsigned int read_exec_only:1;
502 unsigned int limit_in_pages:1;
503 unsigned int seg_not_present:1;
504 unsigned int useable:1;
505};
506
507struct linux_binprm;
508
509extern void ia32_init_addr_space (struct pt_regs *regs);
510extern int ia32_setup_arg_pages (struct linux_binprm *bprm, int exec_stack);
511extern unsigned long ia32_do_mmap (struct file *, unsigned long, unsigned long, int, int, loff_t);
512extern void ia32_load_segment_descriptors (struct task_struct *task);
513
514#define ia32f2ia64f(dst,src) \
515do { \
516 ia64_ldfe(6,src); \
517 ia64_stop(); \
518 ia64_stf_spill(dst, 6); \
519} while(0)
520
521#define ia64f2ia32f(dst,src) \
522do { \
523 ia64_ldf_fill(6, src); \
524 ia64_stop(); \
525 ia64_stfe(dst, 6); \
526} while(0)
527
528struct user_regs_struct32 {
529 __u32 ebx, ecx, edx, esi, edi, ebp, eax;
530 unsigned short ds, __ds, es, __es;
531 unsigned short fs, __fs, gs, __gs;
532 __u32 orig_eax, eip;
533 unsigned short cs, __cs;
534 __u32 eflags, esp;
535 unsigned short ss, __ss;
536};
537
538/* Prototypes for use in elfcore32.h */
539extern int save_ia32_fpstate (struct task_struct *, struct ia32_user_i387_struct __user *);
540extern int save_ia32_fpxstate (struct task_struct *, struct ia32_user_fxsr_struct __user *);
541
542#endif /* !CONFIG_IA32_SUPPORT */
543
544#endif /* _ASM_IA64_IA32_PRIV_H */
diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c
new file mode 100644
index 000000000000..247a21c64aea
--- /dev/null
+++ b/arch/ia64/ia32/sys_ia32.c
@@ -0,0 +1,2747 @@
1/*
2 * sys_ia32.c: Conversion between 32bit and 64bit native syscalls. Derived from sys_sparc32.c.
3 *
4 * Copyright (C) 2000 VA Linux Co
5 * Copyright (C) 2000 Don Dugger <n0ano@valinux.com>
6 * Copyright (C) 1999 Arun Sharma <arun.sharma@intel.com>
7 * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
8 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu)
9 * Copyright (C) 2000-2003, 2005 Hewlett-Packard Co
10 * David Mosberger-Tang <davidm@hpl.hp.com>
11 * Copyright (C) 2004 Gordon Jin <gordon.jin@intel.com>
12 *
13 * These routines maintain argument size conversion between 32bit and 64bit
14 * environment.
15 */
16
17#include <linux/config.h>
18#include <linux/kernel.h>
19#include <linux/syscalls.h>
20#include <linux/sysctl.h>
21#include <linux/sched.h>
22#include <linux/fs.h>
23#include <linux/file.h>
24#include <linux/signal.h>
25#include <linux/resource.h>
26#include <linux/times.h>
27#include <linux/utsname.h>
28#include <linux/timex.h>
29#include <linux/smp.h>
30#include <linux/smp_lock.h>
31#include <linux/sem.h>
32#include <linux/msg.h>
33#include <linux/mm.h>
34#include <linux/shm.h>
35#include <linux/slab.h>
36#include <linux/uio.h>
37#include <linux/nfs_fs.h>
38#include <linux/quota.h>
39#include <linux/sunrpc/svc.h>
40#include <linux/nfsd/nfsd.h>
41#include <linux/nfsd/cache.h>
42#include <linux/nfsd/xdr.h>
43#include <linux/nfsd/syscall.h>
44#include <linux/poll.h>
45#include <linux/eventpoll.h>
46#include <linux/personality.h>
47#include <linux/ptrace.h>
48#include <linux/stat.h>
49#include <linux/ipc.h>
50#include <linux/compat.h>
51#include <linux/vfs.h>
52#include <linux/mman.h>
53
54#include <asm/intrinsics.h>
55#include <asm/semaphore.h>
56#include <asm/types.h>
57#include <asm/uaccess.h>
58#include <asm/unistd.h>
59
60#include "ia32priv.h"
61
62#include <net/scm.h>
63#include <net/sock.h>
64
65#define DEBUG 0
66
67#if DEBUG
68# define DBG(fmt...) printk(KERN_DEBUG fmt)
69#else
70# define DBG(fmt...)
71#endif
72
73#define ROUND_UP(x,a) ((__typeof__(x))(((unsigned long)(x) + ((a) - 1)) & ~((a) - 1)))
74
75#define OFFSET4K(a) ((a) & 0xfff)
76#define PAGE_START(addr) ((addr) & PAGE_MASK)
77#define MINSIGSTKSZ_IA32 2048
78
79#define high2lowuid(uid) ((uid) > 65535 ? 65534 : (uid))
80#define high2lowgid(gid) ((gid) > 65535 ? 65534 : (gid))
81
82/*
83 * Anything that modifies or inspects ia32 user virtual memory must hold this semaphore
84 * while doing so.
85 */
86/* XXX make per-mm: */
87static DECLARE_MUTEX(ia32_mmap_sem);
88
89asmlinkage long
90sys32_execve (char __user *name, compat_uptr_t __user *argv, compat_uptr_t __user *envp,
91 struct pt_regs *regs)
92{
93 long error;
94 char *filename;
95 unsigned long old_map_base, old_task_size, tssd;
96
97 filename = getname(name);
98 error = PTR_ERR(filename);
99 if (IS_ERR(filename))
100 return error;
101
102 old_map_base = current->thread.map_base;
103 old_task_size = current->thread.task_size;
104 tssd = ia64_get_kr(IA64_KR_TSSD);
105
106 /* we may be exec'ing a 64-bit process: reset map base, task-size, and io-base: */
107 current->thread.map_base = DEFAULT_MAP_BASE;
108 current->thread.task_size = DEFAULT_TASK_SIZE;
109 ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
110 ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
111
112 error = compat_do_execve(filename, argv, envp, regs);
113 putname(filename);
114
115 if (error < 0) {
116 /* oops, execve failed, switch back to old values... */
117 ia64_set_kr(IA64_KR_IO_BASE, IA32_IOBASE);
118 ia64_set_kr(IA64_KR_TSSD, tssd);
119 current->thread.map_base = old_map_base;
120 current->thread.task_size = old_task_size;
121 }
122
123 return error;
124}
125
126int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf)
127{
128 int err;
129
130 if ((u64) stat->size > MAX_NON_LFS ||
131 !old_valid_dev(stat->dev) ||
132 !old_valid_dev(stat->rdev))
133 return -EOVERFLOW;
134
135 if (clear_user(ubuf, sizeof(*ubuf)))
136 return -EFAULT;
137
138 err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev);
139 err |= __put_user(stat->ino, &ubuf->st_ino);
140 err |= __put_user(stat->mode, &ubuf->st_mode);
141 err |= __put_user(stat->nlink, &ubuf->st_nlink);
142 err |= __put_user(high2lowuid(stat->uid), &ubuf->st_uid);
143 err |= __put_user(high2lowgid(stat->gid), &ubuf->st_gid);
144 err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev);
145 err |= __put_user(stat->size, &ubuf->st_size);
146 err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime);
147 err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec);
148 err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime);
149 err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec);
150 err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime);
151 err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec);
152 err |= __put_user(stat->blksize, &ubuf->st_blksize);
153 err |= __put_user(stat->blocks, &ubuf->st_blocks);
154 return err;
155}
156
157#if PAGE_SHIFT > IA32_PAGE_SHIFT
158
159
160static int
161get_page_prot (struct vm_area_struct *vma, unsigned long addr)
162{
163 int prot = 0;
164
165 if (!vma || vma->vm_start > addr)
166 return 0;
167
168 if (vma->vm_flags & VM_READ)
169 prot |= PROT_READ;
170 if (vma->vm_flags & VM_WRITE)
171 prot |= PROT_WRITE;
172 if (vma->vm_flags & VM_EXEC)
173 prot |= PROT_EXEC;
174 return prot;
175}
176
177/*
178 * Map a subpage by creating an anonymous page that contains the union of the old page and
179 * the subpage.
180 */
181static unsigned long
182mmap_subpage (struct file *file, unsigned long start, unsigned long end, int prot, int flags,
183 loff_t off)
184{
185 void *page = NULL;
186 struct inode *inode;
187 unsigned long ret = 0;
188 struct vm_area_struct *vma = find_vma(current->mm, start);
189 int old_prot = get_page_prot(vma, start);
190
191 DBG("mmap_subpage(file=%p,start=0x%lx,end=0x%lx,prot=%x,flags=%x,off=0x%llx)\n",
192 file, start, end, prot, flags, off);
193
194
195 /* Optimize the case where the old mmap and the new mmap are both anonymous */
196 if ((old_prot & PROT_WRITE) && (flags & MAP_ANONYMOUS) && !vma->vm_file) {
197 if (clear_user((void __user *) start, end - start)) {
198 ret = -EFAULT;
199 goto out;
200 }
201 goto skip_mmap;
202 }
203
204 page = (void *) get_zeroed_page(GFP_KERNEL);
205 if (!page)
206 return -ENOMEM;
207
208 if (old_prot)
209 copy_from_user(page, (void __user *) PAGE_START(start), PAGE_SIZE);
210
211 down_write(&current->mm->mmap_sem);
212 {
213 ret = do_mmap(NULL, PAGE_START(start), PAGE_SIZE, prot | PROT_WRITE,
214 flags | MAP_FIXED | MAP_ANONYMOUS, 0);
215 }
216 up_write(&current->mm->mmap_sem);
217
218 if (IS_ERR((void *) ret))
219 goto out;
220
221 if (old_prot) {
222 /* copy back the old page contents. */
223 if (offset_in_page(start))
224 copy_to_user((void __user *) PAGE_START(start), page,
225 offset_in_page(start));
226 if (offset_in_page(end))
227 copy_to_user((void __user *) end, page + offset_in_page(end),
228 PAGE_SIZE - offset_in_page(end));
229 }
230
231 if (!(flags & MAP_ANONYMOUS)) {
232 /* read the file contents */
233 inode = file->f_dentry->d_inode;
234 if (!inode->i_fop || !file->f_op->read
235 || ((*file->f_op->read)(file, (char __user *) start, end - start, &off) < 0))
236 {
237 ret = -EINVAL;
238 goto out;
239 }
240 }
241
242 skip_mmap:
243 if (!(prot & PROT_WRITE))
244 ret = sys_mprotect(PAGE_START(start), PAGE_SIZE, prot | old_prot);
245 out:
246 if (page)
247 free_page((unsigned long) page);
248 return ret;
249}
250
251/* SLAB cache for partial_page structures */
252kmem_cache_t *partial_page_cachep;
253
254/*
255 * init partial_page_list.
256 * return 0 means kmalloc fail.
257 */
258struct partial_page_list*
259ia32_init_pp_list(void)
260{
261 struct partial_page_list *p;
262
263 if ((p = kmalloc(sizeof(*p), GFP_KERNEL)) == NULL)
264 return p;
265 p->pp_head = NULL;
266 p->ppl_rb = RB_ROOT;
267 p->pp_hint = NULL;
268 atomic_set(&p->pp_count, 1);
269 return p;
270}
271
272/*
273 * Search for the partial page with @start in partial page list @ppl.
274 * If finds the partial page, return the found partial page.
275 * Else, return 0 and provide @pprev, @rb_link, @rb_parent to
276 * be used by later __ia32_insert_pp().
277 */
278static struct partial_page *
279__ia32_find_pp(struct partial_page_list *ppl, unsigned int start,
280 struct partial_page **pprev, struct rb_node ***rb_link,
281 struct rb_node **rb_parent)
282{
283 struct partial_page *pp;
284 struct rb_node **__rb_link, *__rb_parent, *rb_prev;
285
286 pp = ppl->pp_hint;
287 if (pp && pp->base == start)
288 return pp;
289
290 __rb_link = &ppl->ppl_rb.rb_node;
291 rb_prev = __rb_parent = NULL;
292
293 while (*__rb_link) {
294 __rb_parent = *__rb_link;
295 pp = rb_entry(__rb_parent, struct partial_page, pp_rb);
296
297 if (pp->base == start) {
298 ppl->pp_hint = pp;
299 return pp;
300 } else if (pp->base < start) {
301 rb_prev = __rb_parent;
302 __rb_link = &__rb_parent->rb_right;
303 } else {
304 __rb_link = &__rb_parent->rb_left;
305 }
306 }
307
308 *rb_link = __rb_link;
309 *rb_parent = __rb_parent;
310 *pprev = NULL;
311 if (rb_prev)
312 *pprev = rb_entry(rb_prev, struct partial_page, pp_rb);
313 return NULL;
314}
315
316/*
317 * insert @pp into @ppl.
318 */
319static void
320__ia32_insert_pp(struct partial_page_list *ppl, struct partial_page *pp,
321 struct partial_page *prev, struct rb_node **rb_link,
322 struct rb_node *rb_parent)
323{
324 /* link list */
325 if (prev) {
326 pp->next = prev->next;
327 prev->next = pp;
328 } else {
329 ppl->pp_head = pp;
330 if (rb_parent)
331 pp->next = rb_entry(rb_parent,
332 struct partial_page, pp_rb);
333 else
334 pp->next = NULL;
335 }
336
337 /* link rb */
338 rb_link_node(&pp->pp_rb, rb_parent, rb_link);
339 rb_insert_color(&pp->pp_rb, &ppl->ppl_rb);
340
341 ppl->pp_hint = pp;
342}
343
344/*
345 * delete @pp from partial page list @ppl.
346 */
347static void
348__ia32_delete_pp(struct partial_page_list *ppl, struct partial_page *pp,
349 struct partial_page *prev)
350{
351 if (prev) {
352 prev->next = pp->next;
353 if (ppl->pp_hint == pp)
354 ppl->pp_hint = prev;
355 } else {
356 ppl->pp_head = pp->next;
357 if (ppl->pp_hint == pp)
358 ppl->pp_hint = pp->next;
359 }
360 rb_erase(&pp->pp_rb, &ppl->ppl_rb);
361 kmem_cache_free(partial_page_cachep, pp);
362}
363
364static struct partial_page *
365__pp_prev(struct partial_page *pp)
366{
367 struct rb_node *prev = rb_prev(&pp->pp_rb);
368 if (prev)
369 return rb_entry(prev, struct partial_page, pp_rb);
370 else
371 return NULL;
372}
373
374/*
375 * Delete partial pages with address between @start and @end.
376 * @start and @end are page aligned.
377 */
378static void
379__ia32_delete_pp_range(unsigned int start, unsigned int end)
380{
381 struct partial_page *pp, *prev;
382 struct rb_node **rb_link, *rb_parent;
383
384 if (start >= end)
385 return;
386
387 pp = __ia32_find_pp(current->thread.ppl, start, &prev,
388 &rb_link, &rb_parent);
389 if (pp)
390 prev = __pp_prev(pp);
391 else {
392 if (prev)
393 pp = prev->next;
394 else
395 pp = current->thread.ppl->pp_head;
396 }
397
398 while (pp && pp->base < end) {
399 struct partial_page *tmp = pp->next;
400 __ia32_delete_pp(current->thread.ppl, pp, prev);
401 pp = tmp;
402 }
403}
404
405/*
406 * Set the range between @start and @end in bitmap.
407 * @start and @end should be IA32 page aligned and in the same IA64 page.
408 */
409static int
410__ia32_set_pp(unsigned int start, unsigned int end, int flags)
411{
412 struct partial_page *pp, *prev;
413 struct rb_node ** rb_link, *rb_parent;
414 unsigned int pstart, start_bit, end_bit, i;
415
416 pstart = PAGE_START(start);
417 start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
418 end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
419 if (end_bit == 0)
420 end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
421 pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
422 &rb_link, &rb_parent);
423 if (pp) {
424 for (i = start_bit; i < end_bit; i++)
425 set_bit(i, &pp->bitmap);
426 /*
427 * Check: if this partial page has been set to a full page,
428 * then delete it.
429 */
430 if (find_first_zero_bit(&pp->bitmap, sizeof(pp->bitmap)*8) >=
431 PAGE_SIZE/IA32_PAGE_SIZE) {
432 __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
433 }
434 return 0;
435 }
436
437 /*
438 * MAP_FIXED may lead to overlapping mmap.
439 * In this case, the requested mmap area may already mmaped as a full
440 * page. So check vma before adding a new partial page.
441 */
442 if (flags & MAP_FIXED) {
443 struct vm_area_struct *vma = find_vma(current->mm, pstart);
444 if (vma && vma->vm_start <= pstart)
445 return 0;
446 }
447
448 /* new a partial_page */
449 pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
450 if (!pp)
451 return -ENOMEM;
452 pp->base = pstart;
453 pp->bitmap = 0;
454 for (i=start_bit; i<end_bit; i++)
455 set_bit(i, &(pp->bitmap));
456 pp->next = NULL;
457 __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
458 return 0;
459}
460
461/*
462 * @start and @end should be IA32 page aligned, but don't need to be in the
463 * same IA64 page. Split @start and @end to make sure they're in the same IA64
464 * page, then call __ia32_set_pp().
465 */
466static void
467ia32_set_pp(unsigned int start, unsigned int end, int flags)
468{
469 down_write(&current->mm->mmap_sem);
470 if (flags & MAP_FIXED) {
471 /*
472 * MAP_FIXED may lead to overlapping mmap. When this happens,
473 * a series of complete IA64 pages results in deletion of
474 * old partial pages in that range.
475 */
476 __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
477 }
478
479 if (end < PAGE_ALIGN(start)) {
480 __ia32_set_pp(start, end, flags);
481 } else {
482 if (offset_in_page(start))
483 __ia32_set_pp(start, PAGE_ALIGN(start), flags);
484 if (offset_in_page(end))
485 __ia32_set_pp(PAGE_START(end), end, flags);
486 }
487 up_write(&current->mm->mmap_sem);
488}
489
490/*
491 * Unset the range between @start and @end in bitmap.
492 * @start and @end should be IA32 page aligned and in the same IA64 page.
493 * After doing that, if the bitmap is 0, then free the page and return 1,
494 * else return 0;
495 * If not find the partial page in the list, then
496 * If the vma exists, then the full page is set to a partial page;
497 * Else return -ENOMEM.
498 */
499static int
500__ia32_unset_pp(unsigned int start, unsigned int end)
501{
502 struct partial_page *pp, *prev;
503 struct rb_node ** rb_link, *rb_parent;
504 unsigned int pstart, start_bit, end_bit, i;
505 struct vm_area_struct *vma;
506
507 pstart = PAGE_START(start);
508 start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
509 end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
510 if (end_bit == 0)
511 end_bit = PAGE_SIZE / IA32_PAGE_SIZE;
512
513 pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
514 &rb_link, &rb_parent);
515 if (pp) {
516 for (i = start_bit; i < end_bit; i++)
517 clear_bit(i, &pp->bitmap);
518 if (pp->bitmap == 0) {
519 __ia32_delete_pp(current->thread.ppl, pp, __pp_prev(pp));
520 return 1;
521 }
522 return 0;
523 }
524
525 vma = find_vma(current->mm, pstart);
526 if (!vma || vma->vm_start > pstart) {
527 return -ENOMEM;
528 }
529
530 /* new a partial_page */
531 pp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
532 if (!pp)
533 return -ENOMEM;
534 pp->base = pstart;
535 pp->bitmap = 0;
536 for (i = 0; i < start_bit; i++)
537 set_bit(i, &(pp->bitmap));
538 for (i = end_bit; i < PAGE_SIZE / IA32_PAGE_SIZE; i++)
539 set_bit(i, &(pp->bitmap));
540 pp->next = NULL;
541 __ia32_insert_pp(current->thread.ppl, pp, prev, rb_link, rb_parent);
542 return 0;
543}
544
545/*
546 * Delete pp between PAGE_ALIGN(start) and PAGE_START(end) by calling
547 * __ia32_delete_pp_range(). Unset possible partial pages by calling
548 * __ia32_unset_pp().
549 * The returned value see __ia32_unset_pp().
550 */
551static int
552ia32_unset_pp(unsigned int *startp, unsigned int *endp)
553{
554 unsigned int start = *startp, end = *endp;
555 int ret = 0;
556
557 down_write(&current->mm->mmap_sem);
558
559 __ia32_delete_pp_range(PAGE_ALIGN(start), PAGE_START(end));
560
561 if (end < PAGE_ALIGN(start)) {
562 ret = __ia32_unset_pp(start, end);
563 if (ret == 1) {
564 *startp = PAGE_START(start);
565 *endp = PAGE_ALIGN(end);
566 }
567 if (ret == 0) {
568 /* to shortcut sys_munmap() in sys32_munmap() */
569 *startp = PAGE_START(start);
570 *endp = PAGE_START(end);
571 }
572 } else {
573 if (offset_in_page(start)) {
574 ret = __ia32_unset_pp(start, PAGE_ALIGN(start));
575 if (ret == 1)
576 *startp = PAGE_START(start);
577 if (ret == 0)
578 *startp = PAGE_ALIGN(start);
579 if (ret < 0)
580 goto out;
581 }
582 if (offset_in_page(end)) {
583 ret = __ia32_unset_pp(PAGE_START(end), end);
584 if (ret == 1)
585 *endp = PAGE_ALIGN(end);
586 if (ret == 0)
587 *endp = PAGE_START(end);
588 }
589 }
590
591 out:
592 up_write(&current->mm->mmap_sem);
593 return ret;
594}
595
596/*
597 * Compare the range between @start and @end with bitmap in partial page.
598 * @start and @end should be IA32 page aligned and in the same IA64 page.
599 */
600static int
601__ia32_compare_pp(unsigned int start, unsigned int end)
602{
603 struct partial_page *pp, *prev;
604 struct rb_node ** rb_link, *rb_parent;
605 unsigned int pstart, start_bit, end_bit, size;
606 unsigned int first_bit, next_zero_bit; /* the first range in bitmap */
607
608 pstart = PAGE_START(start);
609
610 pp = __ia32_find_pp(current->thread.ppl, pstart, &prev,
611 &rb_link, &rb_parent);
612 if (!pp)
613 return 1;
614
615 start_bit = (start % PAGE_SIZE) / IA32_PAGE_SIZE;
616 end_bit = (end % PAGE_SIZE) / IA32_PAGE_SIZE;
617 size = sizeof(pp->bitmap) * 8;
618 first_bit = find_first_bit(&pp->bitmap, size);
619 next_zero_bit = find_next_zero_bit(&pp->bitmap, size, first_bit);
620 if ((start_bit < first_bit) || (end_bit > next_zero_bit)) {
621 /* exceeds the first range in bitmap */
622 return -ENOMEM;
623 } else if ((start_bit == first_bit) && (end_bit == next_zero_bit)) {
624 first_bit = find_next_bit(&pp->bitmap, size, next_zero_bit);
625 if ((next_zero_bit < first_bit) && (first_bit < size))
626 return 1; /* has next range */
627 else
628 return 0; /* no next range */
629 } else
630 return 1;
631}
632
633/*
634 * @start and @end should be IA32 page aligned, but don't need to be in the
635 * same IA64 page. Split @start and @end to make sure they're in the same IA64
636 * page, then call __ia32_compare_pp().
637 *
638 * Take this as example: the range is the 1st and 2nd 4K page.
639 * Return 0 if they fit bitmap exactly, i.e. bitmap = 00000011;
640 * Return 1 if the range doesn't cover whole bitmap, e.g. bitmap = 00001111;
641 * Return -ENOMEM if the range exceeds the bitmap, e.g. bitmap = 00000001 or
642 * bitmap = 00000101.
643 */
644static int
645ia32_compare_pp(unsigned int *startp, unsigned int *endp)
646{
647 unsigned int start = *startp, end = *endp;
648 int retval = 0;
649
650 down_write(&current->mm->mmap_sem);
651
652 if (end < PAGE_ALIGN(start)) {
653 retval = __ia32_compare_pp(start, end);
654 if (retval == 0) {
655 *startp = PAGE_START(start);
656 *endp = PAGE_ALIGN(end);
657 }
658 } else {
659 if (offset_in_page(start)) {
660 retval = __ia32_compare_pp(start,
661 PAGE_ALIGN(start));
662 if (retval == 0)
663 *startp = PAGE_START(start);
664 if (retval < 0)
665 goto out;
666 }
667 if (offset_in_page(end)) {
668 retval = __ia32_compare_pp(PAGE_START(end), end);
669 if (retval == 0)
670 *endp = PAGE_ALIGN(end);
671 }
672 }
673
674 out:
675 up_write(&current->mm->mmap_sem);
676 return retval;
677}
678
679static void
680__ia32_drop_pp_list(struct partial_page_list *ppl)
681{
682 struct partial_page *pp = ppl->pp_head;
683
684 while (pp) {
685 struct partial_page *next = pp->next;
686 kmem_cache_free(partial_page_cachep, pp);
687 pp = next;
688 }
689
690 kfree(ppl);
691}
692
693void
694ia32_drop_partial_page_list(struct task_struct *task)
695{
696 struct partial_page_list* ppl = task->thread.ppl;
697
698 if (ppl && atomic_dec_and_test(&ppl->pp_count))
699 __ia32_drop_pp_list(ppl);
700}
701
702/*
703 * Copy current->thread.ppl to ppl (already initialized).
704 */
705static int
706__ia32_copy_pp_list(struct partial_page_list *ppl)
707{
708 struct partial_page *pp, *tmp, *prev;
709 struct rb_node **rb_link, *rb_parent;
710
711 ppl->pp_head = NULL;
712 ppl->pp_hint = NULL;
713 ppl->ppl_rb = RB_ROOT;
714 rb_link = &ppl->ppl_rb.rb_node;
715 rb_parent = NULL;
716 prev = NULL;
717
718 for (pp = current->thread.ppl->pp_head; pp; pp = pp->next) {
719 tmp = kmem_cache_alloc(partial_page_cachep, GFP_KERNEL);
720 if (!tmp)
721 return -ENOMEM;
722 *tmp = *pp;
723 __ia32_insert_pp(ppl, tmp, prev, rb_link, rb_parent);
724 prev = tmp;
725 rb_link = &tmp->pp_rb.rb_right;
726 rb_parent = &tmp->pp_rb;
727 }
728 return 0;
729}
730
731int
732ia32_copy_partial_page_list(struct task_struct *p, unsigned long clone_flags)
733{
734 int retval = 0;
735
736 if (clone_flags & CLONE_VM) {
737 atomic_inc(&current->thread.ppl->pp_count);
738 p->thread.ppl = current->thread.ppl;
739 } else {
740 p->thread.ppl = ia32_init_pp_list();
741 if (!p->thread.ppl)
742 return -ENOMEM;
743 down_write(&current->mm->mmap_sem);
744 {
745 retval = __ia32_copy_pp_list(p->thread.ppl);
746 }
747 up_write(&current->mm->mmap_sem);
748 }
749
750 return retval;
751}
752
753static unsigned long
754emulate_mmap (struct file *file, unsigned long start, unsigned long len, int prot, int flags,
755 loff_t off)
756{
757 unsigned long tmp, end, pend, pstart, ret, is_congruent, fudge = 0;
758 struct inode *inode;
759 loff_t poff;
760
761 end = start + len;
762 pstart = PAGE_START(start);
763 pend = PAGE_ALIGN(end);
764
765 if (flags & MAP_FIXED) {
766 ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
767 if (start > pstart) {
768 if (flags & MAP_SHARED)
769 printk(KERN_INFO
770 "%s(%d): emulate_mmap() can't share head (addr=0x%lx)\n",
771 current->comm, current->pid, start);
772 ret = mmap_subpage(file, start, min(PAGE_ALIGN(start), end), prot, flags,
773 off);
774 if (IS_ERR((void *) ret))
775 return ret;
776 pstart += PAGE_SIZE;
777 if (pstart >= pend)
778 goto out; /* done */
779 }
780 if (end < pend) {
781 if (flags & MAP_SHARED)
782 printk(KERN_INFO
783 "%s(%d): emulate_mmap() can't share tail (end=0x%lx)\n",
784 current->comm, current->pid, end);
785 ret = mmap_subpage(file, max(start, PAGE_START(end)), end, prot, flags,
786 (off + len) - offset_in_page(end));
787 if (IS_ERR((void *) ret))
788 return ret;
789 pend -= PAGE_SIZE;
790 if (pstart >= pend)
791 goto out; /* done */
792 }
793 } else {
794 /*
795 * If a start address was specified, use it if the entire rounded out area
796 * is available.
797 */
798 if (start && !pstart)
799 fudge = 1; /* handle case of mapping to range (0,PAGE_SIZE) */
800 tmp = arch_get_unmapped_area(file, pstart - fudge, pend - pstart, 0, flags);
801 if (tmp != pstart) {
802 pstart = tmp;
803 start = pstart + offset_in_page(off); /* make start congruent with off */
804 end = start + len;
805 pend = PAGE_ALIGN(end);
806 }
807 }
808
809 poff = off + (pstart - start); /* note: (pstart - start) may be negative */
810 is_congruent = (flags & MAP_ANONYMOUS) || (offset_in_page(poff) == 0);
811
812 if ((flags & MAP_SHARED) && !is_congruent)
813 printk(KERN_INFO "%s(%d): emulate_mmap() can't share contents of incongruent mmap "
814 "(addr=0x%lx,off=0x%llx)\n", current->comm, current->pid, start, off);
815
816 DBG("mmap_body: mapping [0x%lx-0x%lx) %s with poff 0x%llx\n", pstart, pend,
817 is_congruent ? "congruent" : "not congruent", poff);
818
819 down_write(&current->mm->mmap_sem);
820 {
821 if (!(flags & MAP_ANONYMOUS) && is_congruent)
822 ret = do_mmap(file, pstart, pend - pstart, prot, flags | MAP_FIXED, poff);
823 else
824 ret = do_mmap(NULL, pstart, pend - pstart,
825 prot | ((flags & MAP_ANONYMOUS) ? 0 : PROT_WRITE),
826 flags | MAP_FIXED | MAP_ANONYMOUS, 0);
827 }
828 up_write(&current->mm->mmap_sem);
829
830 if (IS_ERR((void *) ret))
831 return ret;
832
833 if (!is_congruent) {
834 /* read the file contents */
835 inode = file->f_dentry->d_inode;
836 if (!inode->i_fop || !file->f_op->read
837 || ((*file->f_op->read)(file, (char __user *) pstart, pend - pstart, &poff)
838 < 0))
839 {
840 sys_munmap(pstart, pend - pstart);
841 return -EINVAL;
842 }
843 if (!(prot & PROT_WRITE) && sys_mprotect(pstart, pend - pstart, prot) < 0)
844 return -EINVAL;
845 }
846
847 if (!(flags & MAP_FIXED))
848 ia32_set_pp((unsigned int)start, (unsigned int)end, flags);
849out:
850 return start;
851}
852
853#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
854
855static inline unsigned int
856get_prot32 (unsigned int prot)
857{
858 if (prot & PROT_WRITE)
859 /* on x86, PROT_WRITE implies PROT_READ which implies PROT_EEC */
860 prot |= PROT_READ | PROT_WRITE | PROT_EXEC;
861 else if (prot & (PROT_READ | PROT_EXEC))
862 /* on x86, there is no distinction between PROT_READ and PROT_EXEC */
863 prot |= (PROT_READ | PROT_EXEC);
864
865 return prot;
866}
867
868unsigned long
869ia32_do_mmap (struct file *file, unsigned long addr, unsigned long len, int prot, int flags,
870 loff_t offset)
871{
872 DBG("ia32_do_mmap(file=%p,addr=0x%lx,len=0x%lx,prot=%x,flags=%x,offset=0x%llx)\n",
873 file, addr, len, prot, flags, offset);
874
875 if (file && (!file->f_op || !file->f_op->mmap))
876 return -ENODEV;
877
878 len = IA32_PAGE_ALIGN(len);
879 if (len == 0)
880 return addr;
881
882 if (len > IA32_PAGE_OFFSET || addr > IA32_PAGE_OFFSET - len)
883 {
884 if (flags & MAP_FIXED)
885 return -ENOMEM;
886 else
887 return -EINVAL;
888 }
889
890 if (OFFSET4K(offset))
891 return -EINVAL;
892
893 prot = get_prot32(prot);
894
895#if PAGE_SHIFT > IA32_PAGE_SHIFT
896 down(&ia32_mmap_sem);
897 {
898 addr = emulate_mmap(file, addr, len, prot, flags, offset);
899 }
900 up(&ia32_mmap_sem);
901#else
902 down_write(&current->mm->mmap_sem);
903 {
904 addr = do_mmap(file, addr, len, prot, flags, offset);
905 }
906 up_write(&current->mm->mmap_sem);
907#endif
908 DBG("ia32_do_mmap: returning 0x%lx\n", addr);
909 return addr;
910}
911
912/*
913 * Linux/i386 didn't use to be able to handle more than 4 system call parameters, so these
914 * system calls used a memory block for parameter passing..
915 */
916
917struct mmap_arg_struct {
918 unsigned int addr;
919 unsigned int len;
920 unsigned int prot;
921 unsigned int flags;
922 unsigned int fd;
923 unsigned int offset;
924};
925
926asmlinkage long
927sys32_mmap (struct mmap_arg_struct __user *arg)
928{
929 struct mmap_arg_struct a;
930 struct file *file = NULL;
931 unsigned long addr;
932 int flags;
933
934 if (copy_from_user(&a, arg, sizeof(a)))
935 return -EFAULT;
936
937 if (OFFSET4K(a.offset))
938 return -EINVAL;
939
940 flags = a.flags;
941
942 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
943 if (!(flags & MAP_ANONYMOUS)) {
944 file = fget(a.fd);
945 if (!file)
946 return -EBADF;
947 }
948
949 addr = ia32_do_mmap(file, a.addr, a.len, a.prot, flags, a.offset);
950
951 if (file)
952 fput(file);
953 return addr;
954}
955
956asmlinkage long
957sys32_mmap2 (unsigned int addr, unsigned int len, unsigned int prot, unsigned int flags,
958 unsigned int fd, unsigned int pgoff)
959{
960 struct file *file = NULL;
961 unsigned long retval;
962
963 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
964 if (!(flags & MAP_ANONYMOUS)) {
965 file = fget(fd);
966 if (!file)
967 return -EBADF;
968 }
969
970 retval = ia32_do_mmap(file, addr, len, prot, flags,
971 (unsigned long) pgoff << IA32_PAGE_SHIFT);
972
973 if (file)
974 fput(file);
975 return retval;
976}
977
978asmlinkage long
979sys32_munmap (unsigned int start, unsigned int len)
980{
981 unsigned int end = start + len;
982 long ret;
983
984#if PAGE_SHIFT <= IA32_PAGE_SHIFT
985 ret = sys_munmap(start, end - start);
986#else
987 if (OFFSET4K(start))
988 return -EINVAL;
989
990 end = IA32_PAGE_ALIGN(end);
991 if (start >= end)
992 return -EINVAL;
993
994 ret = ia32_unset_pp(&start, &end);
995 if (ret < 0)
996 return ret;
997
998 if (start >= end)
999 return 0;
1000
1001 down(&ia32_mmap_sem);
1002 {
1003 ret = sys_munmap(start, end - start);
1004 }
1005 up(&ia32_mmap_sem);
1006#endif
1007 return ret;
1008}
1009
1010#if PAGE_SHIFT > IA32_PAGE_SHIFT
1011
1012/*
1013 * When mprotect()ing a partial page, we set the permission to the union of the old
1014 * settings and the new settings. In other words, it's only possible to make access to a
1015 * partial page less restrictive.
1016 */
1017static long
1018mprotect_subpage (unsigned long address, int new_prot)
1019{
1020 int old_prot;
1021 struct vm_area_struct *vma;
1022
1023 if (new_prot == PROT_NONE)
1024 return 0; /* optimize case where nothing changes... */
1025 vma = find_vma(current->mm, address);
1026 old_prot = get_page_prot(vma, address);
1027 return sys_mprotect(address, PAGE_SIZE, new_prot | old_prot);
1028}
1029
1030#endif /* PAGE_SHIFT > IA32_PAGE_SHIFT */
1031
1032asmlinkage long
1033sys32_mprotect (unsigned int start, unsigned int len, int prot)
1034{
1035 unsigned int end = start + len;
1036#if PAGE_SHIFT > IA32_PAGE_SHIFT
1037 long retval = 0;
1038#endif
1039
1040 prot = get_prot32(prot);
1041
1042#if PAGE_SHIFT <= IA32_PAGE_SHIFT
1043 return sys_mprotect(start, end - start, prot);
1044#else
1045 if (OFFSET4K(start))
1046 return -EINVAL;
1047
1048 end = IA32_PAGE_ALIGN(end);
1049 if (end < start)
1050 return -EINVAL;
1051
1052 retval = ia32_compare_pp(&start, &end);
1053
1054 if (retval < 0)
1055 return retval;
1056
1057 down(&ia32_mmap_sem);
1058 {
1059 if (offset_in_page(start)) {
1060 /* start address is 4KB aligned but not page aligned. */
1061 retval = mprotect_subpage(PAGE_START(start), prot);
1062 if (retval < 0)
1063 goto out;
1064
1065 start = PAGE_ALIGN(start);
1066 if (start >= end)
1067 goto out; /* retval is already zero... */
1068 }
1069
1070 if (offset_in_page(end)) {
1071 /* end address is 4KB aligned but not page aligned. */
1072 retval = mprotect_subpage(PAGE_START(end), prot);
1073 if (retval < 0)
1074 goto out;
1075
1076 end = PAGE_START(end);
1077 }
1078 retval = sys_mprotect(start, end - start, prot);
1079 }
1080 out:
1081 up(&ia32_mmap_sem);
1082 return retval;
1083#endif
1084}
1085
1086asmlinkage long
1087sys32_mremap (unsigned int addr, unsigned int old_len, unsigned int new_len,
1088 unsigned int flags, unsigned int new_addr)
1089{
1090 long ret;
1091
1092#if PAGE_SHIFT <= IA32_PAGE_SHIFT
1093 ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
1094#else
1095 unsigned int old_end, new_end;
1096
1097 if (OFFSET4K(addr))
1098 return -EINVAL;
1099
1100 old_len = IA32_PAGE_ALIGN(old_len);
1101 new_len = IA32_PAGE_ALIGN(new_len);
1102 old_end = addr + old_len;
1103 new_end = addr + new_len;
1104
1105 if (!new_len)
1106 return -EINVAL;
1107
1108 if ((flags & MREMAP_FIXED) && (OFFSET4K(new_addr)))
1109 return -EINVAL;
1110
1111 if (old_len >= new_len) {
1112 ret = sys32_munmap(addr + new_len, old_len - new_len);
1113 if (ret && old_len != new_len)
1114 return ret;
1115 ret = addr;
1116 if (!(flags & MREMAP_FIXED) || (new_addr == addr))
1117 return ret;
1118 old_len = new_len;
1119 }
1120
1121 addr = PAGE_START(addr);
1122 old_len = PAGE_ALIGN(old_end) - addr;
1123 new_len = PAGE_ALIGN(new_end) - addr;
1124
1125 down(&ia32_mmap_sem);
1126 {
1127 ret = sys_mremap(addr, old_len, new_len, flags, new_addr);
1128 }
1129 up(&ia32_mmap_sem);
1130
1131 if ((ret >= 0) && (old_len < new_len)) {
1132 /* mremap expanded successfully */
1133 ia32_set_pp(old_end, new_end, flags);
1134 }
1135#endif
1136 return ret;
1137}
1138
1139asmlinkage long
1140sys32_pipe (int __user *fd)
1141{
1142 int retval;
1143 int fds[2];
1144
1145 retval = do_pipe(fds);
1146 if (retval)
1147 goto out;
1148 if (copy_to_user(fd, fds, sizeof(fds)))
1149 retval = -EFAULT;
1150 out:
1151 return retval;
1152}
1153
1154static inline long
1155get_tv32 (struct timeval *o, struct compat_timeval __user *i)
1156{
1157 return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
1158 (__get_user(o->tv_sec, &i->tv_sec) | __get_user(o->tv_usec, &i->tv_usec)));
1159}
1160
1161static inline long
1162put_tv32 (struct compat_timeval __user *o, struct timeval *i)
1163{
1164 return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
1165 (__put_user(i->tv_sec, &o->tv_sec) | __put_user(i->tv_usec, &o->tv_usec)));
1166}
1167
1168asmlinkage unsigned long
1169sys32_alarm (unsigned int seconds)
1170{
1171 struct itimerval it_new, it_old;
1172 unsigned int oldalarm;
1173
1174 it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
1175 it_new.it_value.tv_sec = seconds;
1176 it_new.it_value.tv_usec = 0;
1177 do_setitimer(ITIMER_REAL, &it_new, &it_old);
1178 oldalarm = it_old.it_value.tv_sec;
1179 /* ehhh.. We can't return 0 if we have an alarm pending.. */
1180 /* And we'd better return too much than too little anyway */
1181 if (it_old.it_value.tv_usec)
1182 oldalarm++;
1183 return oldalarm;
1184}
1185
1186/* Translations due to time_t size differences. Which affects all
1187 sorts of things, like timeval and itimerval. */
1188
1189extern struct timezone sys_tz;
1190
1191asmlinkage long
1192sys32_gettimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
1193{
1194 if (tv) {
1195 struct timeval ktv;
1196 do_gettimeofday(&ktv);
1197 if (put_tv32(tv, &ktv))
1198 return -EFAULT;
1199 }
1200 if (tz) {
1201 if (copy_to_user(tz, &sys_tz, sizeof(sys_tz)))
1202 return -EFAULT;
1203 }
1204 return 0;
1205}
1206
1207asmlinkage long
1208sys32_settimeofday (struct compat_timeval __user *tv, struct timezone __user *tz)
1209{
1210 struct timeval ktv;
1211 struct timespec kts;
1212 struct timezone ktz;
1213
1214 if (tv) {
1215 if (get_tv32(&ktv, tv))
1216 return -EFAULT;
1217 kts.tv_sec = ktv.tv_sec;
1218 kts.tv_nsec = ktv.tv_usec * 1000;
1219 }
1220 if (tz) {
1221 if (copy_from_user(&ktz, tz, sizeof(ktz)))
1222 return -EFAULT;
1223 }
1224
1225 return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
1226}
1227
1228struct getdents32_callback {
1229 struct compat_dirent __user *current_dir;
1230 struct compat_dirent __user *previous;
1231 int count;
1232 int error;
1233};
1234
1235struct readdir32_callback {
1236 struct old_linux32_dirent __user * dirent;
1237 int count;
1238};
1239
1240static int
1241filldir32 (void *__buf, const char *name, int namlen, loff_t offset, ino_t ino,
1242 unsigned int d_type)
1243{
1244 struct compat_dirent __user * dirent;
1245 struct getdents32_callback * buf = (struct getdents32_callback *) __buf;
1246 int reclen = ROUND_UP(offsetof(struct compat_dirent, d_name) + namlen + 1, 4);
1247
1248 buf->error = -EINVAL; /* only used if we fail.. */
1249 if (reclen > buf->count)
1250 return -EINVAL;
1251 buf->error = -EFAULT; /* only used if we fail.. */
1252 dirent = buf->previous;
1253 if (dirent)
1254 if (put_user(offset, &dirent->d_off))
1255 return -EFAULT;
1256 dirent = buf->current_dir;
1257 buf->previous = dirent;
1258 if (put_user(ino, &dirent->d_ino)
1259 || put_user(reclen, &dirent->d_reclen)
1260 || copy_to_user(dirent->d_name, name, namlen)
1261 || put_user(0, dirent->d_name + namlen))
1262 return -EFAULT;
1263 dirent = (struct compat_dirent __user *) ((char __user *) dirent + reclen);
1264 buf->current_dir = dirent;
1265 buf->count -= reclen;
1266 return 0;
1267}
1268
1269asmlinkage long
1270sys32_getdents (unsigned int fd, struct compat_dirent __user *dirent, unsigned int count)
1271{
1272 struct file * file;
1273 struct compat_dirent __user * lastdirent;
1274 struct getdents32_callback buf;
1275 int error;
1276
1277 error = -EBADF;
1278 file = fget(fd);
1279 if (!file)
1280 goto out;
1281
1282 buf.current_dir = dirent;
1283 buf.previous = NULL;
1284 buf.count = count;
1285 buf.error = 0;
1286
1287 error = vfs_readdir(file, filldir32, &buf);
1288 if (error < 0)
1289 goto out_putf;
1290 error = buf.error;
1291 lastdirent = buf.previous;
1292 if (lastdirent) {
1293 error = -EINVAL;
1294 if (put_user(file->f_pos, &lastdirent->d_off))
1295 goto out_putf;
1296 error = count - buf.count;
1297 }
1298
1299out_putf:
1300 fput(file);
1301out:
1302 return error;
1303}
1304
1305static int
1306fillonedir32 (void * __buf, const char * name, int namlen, loff_t offset, ino_t ino,
1307 unsigned int d_type)
1308{
1309 struct readdir32_callback * buf = (struct readdir32_callback *) __buf;
1310 struct old_linux32_dirent __user * dirent;
1311
1312 if (buf->count)
1313 return -EINVAL;
1314 buf->count++;
1315 dirent = buf->dirent;
1316 if (put_user(ino, &dirent->d_ino)
1317 || put_user(offset, &dirent->d_offset)
1318 || put_user(namlen, &dirent->d_namlen)
1319 || copy_to_user(dirent->d_name, name, namlen)
1320 || put_user(0, dirent->d_name + namlen))
1321 return -EFAULT;
1322 return 0;
1323}
1324
1325asmlinkage long
1326sys32_readdir (unsigned int fd, void __user *dirent, unsigned int count)
1327{
1328 int error;
1329 struct file * file;
1330 struct readdir32_callback buf;
1331
1332 error = -EBADF;
1333 file = fget(fd);
1334 if (!file)
1335 goto out;
1336
1337 buf.count = 0;
1338 buf.dirent = dirent;
1339
1340 error = vfs_readdir(file, fillonedir32, &buf);
1341 if (error >= 0)
1342 error = buf.count;
1343 fput(file);
1344out:
1345 return error;
1346}
1347
1348struct sel_arg_struct {
1349 unsigned int n;
1350 unsigned int inp;
1351 unsigned int outp;
1352 unsigned int exp;
1353 unsigned int tvp;
1354};
1355
1356asmlinkage long
1357sys32_old_select (struct sel_arg_struct __user *arg)
1358{
1359 struct sel_arg_struct a;
1360
1361 if (copy_from_user(&a, arg, sizeof(a)))
1362 return -EFAULT;
1363 return compat_sys_select(a.n, compat_ptr(a.inp), compat_ptr(a.outp),
1364 compat_ptr(a.exp), compat_ptr(a.tvp));
1365}
1366
1367#define SEMOP 1
1368#define SEMGET 2
1369#define SEMCTL 3
1370#define SEMTIMEDOP 4
1371#define MSGSND 11
1372#define MSGRCV 12
1373#define MSGGET 13
1374#define MSGCTL 14
1375#define SHMAT 21
1376#define SHMDT 22
1377#define SHMGET 23
1378#define SHMCTL 24
1379
1380asmlinkage long
1381sys32_ipc(u32 call, int first, int second, int third, u32 ptr, u32 fifth)
1382{
1383 int version;
1384
1385 version = call >> 16; /* hack for backward compatibility */
1386 call &= 0xffff;
1387
1388 switch (call) {
1389 case SEMTIMEDOP:
1390 if (fifth)
1391 return compat_sys_semtimedop(first, compat_ptr(ptr),
1392 second, compat_ptr(fifth));
1393 /* else fall through for normal semop() */
1394 case SEMOP:
1395 /* struct sembuf is the same on 32 and 64bit :)) */
1396 return sys_semtimedop(first, compat_ptr(ptr), second,
1397 NULL);
1398 case SEMGET:
1399 return sys_semget(first, second, third);
1400 case SEMCTL:
1401 return compat_sys_semctl(first, second, third, compat_ptr(ptr));
1402
1403 case MSGSND:
1404 return compat_sys_msgsnd(first, second, third, compat_ptr(ptr));
1405 case MSGRCV:
1406 return compat_sys_msgrcv(first, second, fifth, third, version, compat_ptr(ptr));
1407 case MSGGET:
1408 return sys_msgget((key_t) first, second);
1409 case MSGCTL:
1410 return compat_sys_msgctl(first, second, compat_ptr(ptr));
1411
1412 case SHMAT:
1413 return compat_sys_shmat(first, second, third, version, compat_ptr(ptr));
1414 break;
1415 case SHMDT:
1416 return sys_shmdt(compat_ptr(ptr));
1417 case SHMGET:
1418 return sys_shmget(first, (unsigned)second, third);
1419 case SHMCTL:
1420 return compat_sys_shmctl(first, second, compat_ptr(ptr));
1421
1422 default:
1423 return -ENOSYS;
1424 }
1425 return -EINVAL;
1426}
1427
1428asmlinkage long
1429compat_sys_wait4 (compat_pid_t pid, compat_uint_t * stat_addr, int options,
1430 struct compat_rusage *ru);
1431
1432asmlinkage long
1433sys32_waitpid (int pid, unsigned int *stat_addr, int options)
1434{
1435 return compat_sys_wait4(pid, stat_addr, options, NULL);
1436}
1437
1438static unsigned int
1439ia32_peek (struct task_struct *child, unsigned long addr, unsigned int *val)
1440{
1441 size_t copied;
1442 unsigned int ret;
1443
1444 copied = access_process_vm(child, addr, val, sizeof(*val), 0);
1445 return (copied != sizeof(ret)) ? -EIO : 0;
1446}
1447
1448static unsigned int
1449ia32_poke (struct task_struct *child, unsigned long addr, unsigned int val)
1450{
1451
1452 if (access_process_vm(child, addr, &val, sizeof(val), 1) != sizeof(val))
1453 return -EIO;
1454 return 0;
1455}
1456
1457/*
1458 * The order in which registers are stored in the ptrace regs structure
1459 */
1460#define PT_EBX 0
1461#define PT_ECX 1
1462#define PT_EDX 2
1463#define PT_ESI 3
1464#define PT_EDI 4
1465#define PT_EBP 5
1466#define PT_EAX 6
1467#define PT_DS 7
1468#define PT_ES 8
1469#define PT_FS 9
1470#define PT_GS 10
1471#define PT_ORIG_EAX 11
1472#define PT_EIP 12
1473#define PT_CS 13
1474#define PT_EFL 14
1475#define PT_UESP 15
1476#define PT_SS 16
1477
1478static unsigned int
1479getreg (struct task_struct *child, int regno)
1480{
1481 struct pt_regs *child_regs;
1482
1483 child_regs = ia64_task_regs(child);
1484 switch (regno / sizeof(int)) {
1485 case PT_EBX: return child_regs->r11;
1486 case PT_ECX: return child_regs->r9;
1487 case PT_EDX: return child_regs->r10;
1488 case PT_ESI: return child_regs->r14;
1489 case PT_EDI: return child_regs->r15;
1490 case PT_EBP: return child_regs->r13;
1491 case PT_EAX: return child_regs->r8;
1492 case PT_ORIG_EAX: return child_regs->r1; /* see dispatch_to_ia32_handler() */
1493 case PT_EIP: return child_regs->cr_iip;
1494 case PT_UESP: return child_regs->r12;
1495 case PT_EFL: return child->thread.eflag;
1496 case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
1497 return __USER_DS;
1498 case PT_CS: return __USER_CS;
1499 default:
1500 printk(KERN_ERR "ia32.getreg(): unknown register %d\n", regno);
1501 break;
1502 }
1503 return 0;
1504}
1505
1506static void
1507putreg (struct task_struct *child, int regno, unsigned int value)
1508{
1509 struct pt_regs *child_regs;
1510
1511 child_regs = ia64_task_regs(child);
1512 switch (regno / sizeof(int)) {
1513 case PT_EBX: child_regs->r11 = value; break;
1514 case PT_ECX: child_regs->r9 = value; break;
1515 case PT_EDX: child_regs->r10 = value; break;
1516 case PT_ESI: child_regs->r14 = value; break;
1517 case PT_EDI: child_regs->r15 = value; break;
1518 case PT_EBP: child_regs->r13 = value; break;
1519 case PT_EAX: child_regs->r8 = value; break;
1520 case PT_ORIG_EAX: child_regs->r1 = value; break;
1521 case PT_EIP: child_regs->cr_iip = value; break;
1522 case PT_UESP: child_regs->r12 = value; break;
1523 case PT_EFL: child->thread.eflag = value; break;
1524 case PT_DS: case PT_ES: case PT_FS: case PT_GS: case PT_SS:
1525 if (value != __USER_DS)
1526 printk(KERN_ERR
1527 "ia32.putreg: attempt to set invalid segment register %d = %x\n",
1528 regno, value);
1529 break;
1530 case PT_CS:
1531 if (value != __USER_CS)
1532 printk(KERN_ERR
1533 "ia32.putreg: attempt to to set invalid segment register %d = %x\n",
1534 regno, value);
1535 break;
1536 default:
1537 printk(KERN_ERR "ia32.putreg: unknown register %d\n", regno);
1538 break;
1539 }
1540}
1541
1542static void
1543put_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp,
1544 struct switch_stack *swp, int tos)
1545{
1546 struct _fpreg_ia32 *f;
1547 char buf[32];
1548
1549 f = (struct _fpreg_ia32 *)(((unsigned long)buf + 15) & ~15);
1550 if ((regno += tos) >= 8)
1551 regno -= 8;
1552 switch (regno) {
1553 case 0:
1554 ia64f2ia32f(f, &ptp->f8);
1555 break;
1556 case 1:
1557 ia64f2ia32f(f, &ptp->f9);
1558 break;
1559 case 2:
1560 ia64f2ia32f(f, &ptp->f10);
1561 break;
1562 case 3:
1563 ia64f2ia32f(f, &ptp->f11);
1564 break;
1565 case 4:
1566 case 5:
1567 case 6:
1568 case 7:
1569 ia64f2ia32f(f, &swp->f12 + (regno - 4));
1570 break;
1571 }
1572 copy_to_user(reg, f, sizeof(*reg));
1573}
1574
1575static void
1576get_fpreg (int regno, struct _fpreg_ia32 __user *reg, struct pt_regs *ptp,
1577 struct switch_stack *swp, int tos)
1578{
1579
1580 if ((regno += tos) >= 8)
1581 regno -= 8;
1582 switch (regno) {
1583 case 0:
1584 copy_from_user(&ptp->f8, reg, sizeof(*reg));
1585 break;
1586 case 1:
1587 copy_from_user(&ptp->f9, reg, sizeof(*reg));
1588 break;
1589 case 2:
1590 copy_from_user(&ptp->f10, reg, sizeof(*reg));
1591 break;
1592 case 3:
1593 copy_from_user(&ptp->f11, reg, sizeof(*reg));
1594 break;
1595 case 4:
1596 case 5:
1597 case 6:
1598 case 7:
1599 copy_from_user(&swp->f12 + (regno - 4), reg, sizeof(*reg));
1600 break;
1601 }
1602 return;
1603}
1604
1605int
1606save_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save)
1607{
1608 struct switch_stack *swp;
1609 struct pt_regs *ptp;
1610 int i, tos;
1611
1612 if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
1613 return -EFAULT;
1614
1615 __put_user(tsk->thread.fcr & 0xffff, &save->cwd);
1616 __put_user(tsk->thread.fsr & 0xffff, &save->swd);
1617 __put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd);
1618 __put_user(tsk->thread.fir, &save->fip);
1619 __put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs);
1620 __put_user(tsk->thread.fdr, &save->foo);
1621 __put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos);
1622
1623 /*
1624 * Stack frames start with 16-bytes of temp space
1625 */
1626 swp = (struct switch_stack *)(tsk->thread.ksp + 16);
1627 ptp = ia64_task_regs(tsk);
1628 tos = (tsk->thread.fsr >> 11) & 7;
1629 for (i = 0; i < 8; i++)
1630 put_fpreg(i, &save->st_space[i], ptp, swp, tos);
1631 return 0;
1632}
1633
1634static int
1635restore_ia32_fpstate (struct task_struct *tsk, struct ia32_user_i387_struct __user *save)
1636{
1637 struct switch_stack *swp;
1638 struct pt_regs *ptp;
1639 int i, tos;
1640 unsigned int fsrlo, fsrhi, num32;
1641
1642 if (!access_ok(VERIFY_READ, save, sizeof(*save)))
1643 return(-EFAULT);
1644
1645 __get_user(num32, (unsigned int __user *)&save->cwd);
1646 tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f);
1647 __get_user(fsrlo, (unsigned int __user *)&save->swd);
1648 __get_user(fsrhi, (unsigned int __user *)&save->twd);
1649 num32 = (fsrhi << 16) | fsrlo;
1650 tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32;
1651 __get_user(num32, (unsigned int __user *)&save->fip);
1652 tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32;
1653 __get_user(num32, (unsigned int __user *)&save->foo);
1654 tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32;
1655
1656 /*
1657 * Stack frames start with 16-bytes of temp space
1658 */
1659 swp = (struct switch_stack *)(tsk->thread.ksp + 16);
1660 ptp = ia64_task_regs(tsk);
1661 tos = (tsk->thread.fsr >> 11) & 7;
1662 for (i = 0; i < 8; i++)
1663 get_fpreg(i, &save->st_space[i], ptp, swp, tos);
1664 return 0;
1665}
1666
1667int
1668save_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save)
1669{
1670 struct switch_stack *swp;
1671 struct pt_regs *ptp;
1672 int i, tos;
1673 unsigned long mxcsr=0;
1674 unsigned long num128[2];
1675
1676 if (!access_ok(VERIFY_WRITE, save, sizeof(*save)))
1677 return -EFAULT;
1678
1679 __put_user(tsk->thread.fcr & 0xffff, &save->cwd);
1680 __put_user(tsk->thread.fsr & 0xffff, &save->swd);
1681 __put_user((tsk->thread.fsr>>16) & 0xffff, &save->twd);
1682 __put_user(tsk->thread.fir, &save->fip);
1683 __put_user((tsk->thread.fir>>32) & 0xffff, &save->fcs);
1684 __put_user(tsk->thread.fdr, &save->foo);
1685 __put_user((tsk->thread.fdr>>32) & 0xffff, &save->fos);
1686
1687 /*
1688 * Stack frames start with 16-bytes of temp space
1689 */
1690 swp = (struct switch_stack *)(tsk->thread.ksp + 16);
1691 ptp = ia64_task_regs(tsk);
1692 tos = (tsk->thread.fsr >> 11) & 7;
1693 for (i = 0; i < 8; i++)
1694 put_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos);
1695
1696 mxcsr = ((tsk->thread.fcr>>32) & 0xff80) | ((tsk->thread.fsr>>32) & 0x3f);
1697 __put_user(mxcsr & 0xffff, &save->mxcsr);
1698 for (i = 0; i < 8; i++) {
1699 memcpy(&(num128[0]), &(swp->f16) + i*2, sizeof(unsigned long));
1700 memcpy(&(num128[1]), &(swp->f17) + i*2, sizeof(unsigned long));
1701 copy_to_user(&save->xmm_space[0] + 4*i, num128, sizeof(struct _xmmreg_ia32));
1702 }
1703 return 0;
1704}
1705
1706static int
1707restore_ia32_fpxstate (struct task_struct *tsk, struct ia32_user_fxsr_struct __user *save)
1708{
1709 struct switch_stack *swp;
1710 struct pt_regs *ptp;
1711 int i, tos;
1712 unsigned int fsrlo, fsrhi, num32;
1713 int mxcsr;
1714 unsigned long num64;
1715 unsigned long num128[2];
1716
1717 if (!access_ok(VERIFY_READ, save, sizeof(*save)))
1718 return(-EFAULT);
1719
1720 __get_user(num32, (unsigned int __user *)&save->cwd);
1721 tsk->thread.fcr = (tsk->thread.fcr & (~0x1f3f)) | (num32 & 0x1f3f);
1722 __get_user(fsrlo, (unsigned int __user *)&save->swd);
1723 __get_user(fsrhi, (unsigned int __user *)&save->twd);
1724 num32 = (fsrhi << 16) | fsrlo;
1725 tsk->thread.fsr = (tsk->thread.fsr & (~0xffffffff)) | num32;
1726 __get_user(num32, (unsigned int __user *)&save->fip);
1727 tsk->thread.fir = (tsk->thread.fir & (~0xffffffff)) | num32;
1728 __get_user(num32, (unsigned int __user *)&save->foo);
1729 tsk->thread.fdr = (tsk->thread.fdr & (~0xffffffff)) | num32;
1730
1731 /*
1732 * Stack frames start with 16-bytes of temp space
1733 */
1734 swp = (struct switch_stack *)(tsk->thread.ksp + 16);
1735 ptp = ia64_task_regs(tsk);
1736 tos = (tsk->thread.fsr >> 11) & 7;
1737 for (i = 0; i < 8; i++)
1738 get_fpreg(i, (struct _fpreg_ia32 __user *)&save->st_space[4*i], ptp, swp, tos);
1739
1740 __get_user(mxcsr, (unsigned int __user *)&save->mxcsr);
1741 num64 = mxcsr & 0xff10;
1742 tsk->thread.fcr = (tsk->thread.fcr & (~0xff1000000000UL)) | (num64<<32);
1743 num64 = mxcsr & 0x3f;
1744 tsk->thread.fsr = (tsk->thread.fsr & (~0x3f00000000UL)) | (num64<<32);
1745
1746 for (i = 0; i < 8; i++) {
1747 copy_from_user(num128, &save->xmm_space[0] + 4*i, sizeof(struct _xmmreg_ia32));
1748 memcpy(&(swp->f16) + i*2, &(num128[0]), sizeof(unsigned long));
1749 memcpy(&(swp->f17) + i*2, &(num128[1]), sizeof(unsigned long));
1750 }
1751 return 0;
1752}
1753
1754asmlinkage long
1755sys32_ptrace (int request, pid_t pid, unsigned int addr, unsigned int data)
1756{
1757 struct task_struct *child;
1758 unsigned int value, tmp;
1759 long i, ret;
1760
1761 lock_kernel();
1762 if (request == PTRACE_TRACEME) {
1763 ret = sys_ptrace(request, pid, addr, data);
1764 goto out;
1765 }
1766
1767 ret = -ESRCH;
1768 read_lock(&tasklist_lock);
1769 child = find_task_by_pid(pid);
1770 if (child)
1771 get_task_struct(child);
1772 read_unlock(&tasklist_lock);
1773 if (!child)
1774 goto out;
1775 ret = -EPERM;
1776 if (pid == 1) /* no messing around with init! */
1777 goto out_tsk;
1778
1779 if (request == PTRACE_ATTACH) {
1780 ret = sys_ptrace(request, pid, addr, data);
1781 goto out_tsk;
1782 }
1783
1784 ret = ptrace_check_attach(child, request == PTRACE_KILL);
1785 if (ret < 0)
1786 goto out_tsk;
1787
1788 switch (request) {
1789 case PTRACE_PEEKTEXT:
1790 case PTRACE_PEEKDATA: /* read word at location addr */
1791 ret = ia32_peek(child, addr, &value);
1792 if (ret == 0)
1793 ret = put_user(value, (unsigned int __user *) compat_ptr(data));
1794 else
1795 ret = -EIO;
1796 goto out_tsk;
1797
1798 case PTRACE_POKETEXT:
1799 case PTRACE_POKEDATA: /* write the word at location addr */
1800 ret = ia32_poke(child, addr, data);
1801 goto out_tsk;
1802
1803 case PTRACE_PEEKUSR: /* read word at addr in USER area */
1804 ret = -EIO;
1805 if ((addr & 3) || addr > 17*sizeof(int))
1806 break;
1807
1808 tmp = getreg(child, addr);
1809 if (!put_user(tmp, (unsigned int __user *) compat_ptr(data)))
1810 ret = 0;
1811 break;
1812
1813 case PTRACE_POKEUSR: /* write word at addr in USER area */
1814 ret = -EIO;
1815 if ((addr & 3) || addr > 17*sizeof(int))
1816 break;
1817
1818 putreg(child, addr, data);
1819 ret = 0;
1820 break;
1821
1822 case IA32_PTRACE_GETREGS:
1823 if (!access_ok(VERIFY_WRITE, compat_ptr(data), 17*sizeof(int))) {
1824 ret = -EIO;
1825 break;
1826 }
1827 for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) {
1828 put_user(getreg(child, i), (unsigned int __user *) compat_ptr(data));
1829 data += sizeof(int);
1830 }
1831 ret = 0;
1832 break;
1833
1834 case IA32_PTRACE_SETREGS:
1835 if (!access_ok(VERIFY_READ, compat_ptr(data), 17*sizeof(int))) {
1836 ret = -EIO;
1837 break;
1838 }
1839 for (i = 0; i < (int) (17*sizeof(int)); i += sizeof(int) ) {
1840 get_user(tmp, (unsigned int __user *) compat_ptr(data));
1841 putreg(child, i, tmp);
1842 data += sizeof(int);
1843 }
1844 ret = 0;
1845 break;
1846
1847 case IA32_PTRACE_GETFPREGS:
1848 ret = save_ia32_fpstate(child, (struct ia32_user_i387_struct __user *)
1849 compat_ptr(data));
1850 break;
1851
1852 case IA32_PTRACE_GETFPXREGS:
1853 ret = save_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *)
1854 compat_ptr(data));
1855 break;
1856
1857 case IA32_PTRACE_SETFPREGS:
1858 ret = restore_ia32_fpstate(child, (struct ia32_user_i387_struct __user *)
1859 compat_ptr(data));
1860 break;
1861
1862 case IA32_PTRACE_SETFPXREGS:
1863 ret = restore_ia32_fpxstate(child, (struct ia32_user_fxsr_struct __user *)
1864 compat_ptr(data));
1865 break;
1866
1867 case PTRACE_GETEVENTMSG:
1868 ret = put_user(child->ptrace_message, (unsigned int __user *) compat_ptr(data));
1869 break;
1870
1871 case PTRACE_SYSCALL: /* continue, stop after next syscall */
1872 case PTRACE_CONT: /* restart after signal. */
1873 case PTRACE_KILL:
1874 case PTRACE_SINGLESTEP: /* execute chile for one instruction */
1875 case PTRACE_DETACH: /* detach a process */
1876 ret = sys_ptrace(request, pid, addr, data);
1877 break;
1878
1879 default:
1880 ret = ptrace_request(child, request, addr, data);
1881 break;
1882
1883 }
1884 out_tsk:
1885 put_task_struct(child);
1886 out:
1887 unlock_kernel();
1888 return ret;
1889}
1890
1891typedef struct {
1892 unsigned int ss_sp;
1893 unsigned int ss_flags;
1894 unsigned int ss_size;
1895} ia32_stack_t;
1896
1897asmlinkage long
1898sys32_sigaltstack (ia32_stack_t __user *uss32, ia32_stack_t __user *uoss32,
1899 long arg2, long arg3, long arg4, long arg5, long arg6,
1900 long arg7, struct pt_regs pt)
1901{
1902 stack_t uss, uoss;
1903 ia32_stack_t buf32;
1904 int ret;
1905 mm_segment_t old_fs = get_fs();
1906
1907 if (uss32) {
1908 if (copy_from_user(&buf32, uss32, sizeof(ia32_stack_t)))
1909 return -EFAULT;
1910 uss.ss_sp = (void __user *) (long) buf32.ss_sp;
1911 uss.ss_flags = buf32.ss_flags;
1912 /* MINSIGSTKSZ is different for ia32 vs ia64. We lie here to pass the
1913 check and set it to the user requested value later */
1914 if ((buf32.ss_flags != SS_DISABLE) && (buf32.ss_size < MINSIGSTKSZ_IA32)) {
1915 ret = -ENOMEM;
1916 goto out;
1917 }
1918 uss.ss_size = MINSIGSTKSZ;
1919 }
1920 set_fs(KERNEL_DS);
1921 ret = do_sigaltstack(uss32 ? (stack_t __user *) &uss : NULL,
1922 (stack_t __user *) &uoss, pt.r12);
1923 current->sas_ss_size = buf32.ss_size;
1924 set_fs(old_fs);
1925out:
1926 if (ret < 0)
1927 return(ret);
1928 if (uoss32) {
1929 buf32.ss_sp = (long __user) uoss.ss_sp;
1930 buf32.ss_flags = uoss.ss_flags;
1931 buf32.ss_size = uoss.ss_size;
1932 if (copy_to_user(uoss32, &buf32, sizeof(ia32_stack_t)))
1933 return -EFAULT;
1934 }
1935 return ret;
1936}
1937
1938asmlinkage int
1939sys32_pause (void)
1940{
1941 current->state = TASK_INTERRUPTIBLE;
1942 schedule();
1943 return -ERESTARTNOHAND;
1944}
1945
1946asmlinkage int
1947sys32_msync (unsigned int start, unsigned int len, int flags)
1948{
1949 unsigned int addr;
1950
1951 if (OFFSET4K(start))
1952 return -EINVAL;
1953 addr = PAGE_START(start);
1954 return sys_msync(addr, len + (start - addr), flags);
1955}
1956
1957struct sysctl32 {
1958 unsigned int name;
1959 int nlen;
1960 unsigned int oldval;
1961 unsigned int oldlenp;
1962 unsigned int newval;
1963 unsigned int newlen;
1964 unsigned int __unused[4];
1965};
1966
1967#ifdef CONFIG_SYSCTL
1968asmlinkage long
1969sys32_sysctl (struct sysctl32 __user *args)
1970{
1971 struct sysctl32 a32;
1972 mm_segment_t old_fs = get_fs ();
1973 void __user *oldvalp, *newvalp;
1974 size_t oldlen;
1975 int __user *namep;
1976 long ret;
1977
1978 if (copy_from_user(&a32, args, sizeof(a32)))
1979 return -EFAULT;
1980
1981 /*
1982 * We need to pre-validate these because we have to disable address checking
1983 * before calling do_sysctl() because of OLDLEN but we can't run the risk of the
1984 * user specifying bad addresses here. Well, since we're dealing with 32 bit
1985 * addresses, we KNOW that access_ok() will always succeed, so this is an
1986 * expensive NOP, but so what...
1987 */
1988 namep = (int __user *) compat_ptr(a32.name);
1989 oldvalp = compat_ptr(a32.oldval);
1990 newvalp = compat_ptr(a32.newval);
1991
1992 if ((oldvalp && get_user(oldlen, (int __user *) compat_ptr(a32.oldlenp)))
1993 || !access_ok(VERIFY_WRITE, namep, 0)
1994 || !access_ok(VERIFY_WRITE, oldvalp, 0)
1995 || !access_ok(VERIFY_WRITE, newvalp, 0))
1996 return -EFAULT;
1997
1998 set_fs(KERNEL_DS);
1999 lock_kernel();
2000 ret = do_sysctl(namep, a32.nlen, oldvalp, (size_t __user *) &oldlen,
2001 newvalp, (size_t) a32.newlen);
2002 unlock_kernel();
2003 set_fs(old_fs);
2004
2005 if (oldvalp && put_user (oldlen, (int __user *) compat_ptr(a32.oldlenp)))
2006 return -EFAULT;
2007
2008 return ret;
2009}
2010#endif
2011
2012asmlinkage long
2013sys32_newuname (struct new_utsname __user *name)
2014{
2015 int ret = sys_newuname(name);
2016
2017 if (!ret)
2018 if (copy_to_user(name->machine, "i686\0\0\0", 8))
2019 ret = -EFAULT;
2020 return ret;
2021}
2022
2023asmlinkage long
2024sys32_getresuid16 (u16 __user *ruid, u16 __user *euid, u16 __user *suid)
2025{
2026 uid_t a, b, c;
2027 int ret;
2028 mm_segment_t old_fs = get_fs();
2029
2030 set_fs(KERNEL_DS);
2031 ret = sys_getresuid((uid_t __user *) &a, (uid_t __user *) &b, (uid_t __user *) &c);
2032 set_fs(old_fs);
2033
2034 if (put_user(a, ruid) || put_user(b, euid) || put_user(c, suid))
2035 return -EFAULT;
2036 return ret;
2037}
2038
2039asmlinkage long
2040sys32_getresgid16 (u16 __user *rgid, u16 __user *egid, u16 __user *sgid)
2041{
2042 gid_t a, b, c;
2043 int ret;
2044 mm_segment_t old_fs = get_fs();
2045
2046 set_fs(KERNEL_DS);
2047 ret = sys_getresgid((gid_t __user *) &a, (gid_t __user *) &b, (gid_t __user *) &c);
2048 set_fs(old_fs);
2049
2050 if (ret)
2051 return ret;
2052
2053 return put_user(a, rgid) | put_user(b, egid) | put_user(c, sgid);
2054}
2055
2056asmlinkage long
2057sys32_lseek (unsigned int fd, int offset, unsigned int whence)
2058{
2059 /* Sign-extension of "offset" is important here... */
2060 return sys_lseek(fd, offset, whence);
2061}
2062
2063static int
2064groups16_to_user(short __user *grouplist, struct group_info *group_info)
2065{
2066 int i;
2067 short group;
2068
2069 for (i = 0; i < group_info->ngroups; i++) {
2070 group = (short)GROUP_AT(group_info, i);
2071 if (put_user(group, grouplist+i))
2072 return -EFAULT;
2073 }
2074
2075 return 0;
2076}
2077
2078static int
2079groups16_from_user(struct group_info *group_info, short __user *grouplist)
2080{
2081 int i;
2082 short group;
2083
2084 for (i = 0; i < group_info->ngroups; i++) {
2085 if (get_user(group, grouplist+i))
2086 return -EFAULT;
2087 GROUP_AT(group_info, i) = (gid_t)group;
2088 }
2089
2090 return 0;
2091}
2092
2093asmlinkage long
2094sys32_getgroups16 (int gidsetsize, short __user *grouplist)
2095{
2096 int i;
2097
2098 if (gidsetsize < 0)
2099 return -EINVAL;
2100
2101 get_group_info(current->group_info);
2102 i = current->group_info->ngroups;
2103 if (gidsetsize) {
2104 if (i > gidsetsize) {
2105 i = -EINVAL;
2106 goto out;
2107 }
2108 if (groups16_to_user(grouplist, current->group_info)) {
2109 i = -EFAULT;
2110 goto out;
2111 }
2112 }
2113out:
2114 put_group_info(current->group_info);
2115 return i;
2116}
2117
2118asmlinkage long
2119sys32_setgroups16 (int gidsetsize, short __user *grouplist)
2120{
2121 struct group_info *group_info;
2122 int retval;
2123
2124 if (!capable(CAP_SETGID))
2125 return -EPERM;
2126 if ((unsigned)gidsetsize > NGROUPS_MAX)
2127 return -EINVAL;
2128
2129 group_info = groups_alloc(gidsetsize);
2130 if (!group_info)
2131 return -ENOMEM;
2132 retval = groups16_from_user(group_info, grouplist);
2133 if (retval) {
2134 put_group_info(group_info);
2135 return retval;
2136 }
2137
2138 retval = set_current_groups(group_info);
2139 put_group_info(group_info);
2140
2141 return retval;
2142}
2143
2144asmlinkage long
2145sys32_truncate64 (unsigned int path, unsigned int len_lo, unsigned int len_hi)
2146{
2147 return sys_truncate(compat_ptr(path), ((unsigned long) len_hi << 32) | len_lo);
2148}
2149
2150asmlinkage long
2151sys32_ftruncate64 (int fd, unsigned int len_lo, unsigned int len_hi)
2152{
2153 return sys_ftruncate(fd, ((unsigned long) len_hi << 32) | len_lo);
2154}
2155
2156static int
2157putstat64 (struct stat64 __user *ubuf, struct kstat *kbuf)
2158{
2159 int err;
2160 u64 hdev;
2161
2162 if (clear_user(ubuf, sizeof(*ubuf)))
2163 return -EFAULT;
2164
2165 hdev = huge_encode_dev(kbuf->dev);
2166 err = __put_user(hdev, (u32 __user*)&ubuf->st_dev);
2167 err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_dev) + 1);
2168 err |= __put_user(kbuf->ino, &ubuf->__st_ino);
2169 err |= __put_user(kbuf->ino, &ubuf->st_ino_lo);
2170 err |= __put_user(kbuf->ino >> 32, &ubuf->st_ino_hi);
2171 err |= __put_user(kbuf->mode, &ubuf->st_mode);
2172 err |= __put_user(kbuf->nlink, &ubuf->st_nlink);
2173 err |= __put_user(kbuf->uid, &ubuf->st_uid);
2174 err |= __put_user(kbuf->gid, &ubuf->st_gid);
2175 hdev = huge_encode_dev(kbuf->rdev);
2176 err = __put_user(hdev, (u32 __user*)&ubuf->st_rdev);
2177 err |= __put_user(hdev >> 32, ((u32 __user*)&ubuf->st_rdev) + 1);
2178 err |= __put_user(kbuf->size, &ubuf->st_size_lo);
2179 err |= __put_user((kbuf->size >> 32), &ubuf->st_size_hi);
2180 err |= __put_user(kbuf->atime.tv_sec, &ubuf->st_atime);
2181 err |= __put_user(kbuf->atime.tv_nsec, &ubuf->st_atime_nsec);
2182 err |= __put_user(kbuf->mtime.tv_sec, &ubuf->st_mtime);
2183 err |= __put_user(kbuf->mtime.tv_nsec, &ubuf->st_mtime_nsec);
2184 err |= __put_user(kbuf->ctime.tv_sec, &ubuf->st_ctime);
2185 err |= __put_user(kbuf->ctime.tv_nsec, &ubuf->st_ctime_nsec);
2186 err |= __put_user(kbuf->blksize, &ubuf->st_blksize);
2187 err |= __put_user(kbuf->blocks, &ubuf->st_blocks);
2188 return err;
2189}
2190
2191asmlinkage long
2192sys32_stat64 (char __user *filename, struct stat64 __user *statbuf)
2193{
2194 struct kstat s;
2195 long ret = vfs_stat(filename, &s);
2196 if (!ret)
2197 ret = putstat64(statbuf, &s);
2198 return ret;
2199}
2200
2201asmlinkage long
2202sys32_lstat64 (char __user *filename, struct stat64 __user *statbuf)
2203{
2204 struct kstat s;
2205 long ret = vfs_lstat(filename, &s);
2206 if (!ret)
2207 ret = putstat64(statbuf, &s);
2208 return ret;
2209}
2210
2211asmlinkage long
2212sys32_fstat64 (unsigned int fd, struct stat64 __user *statbuf)
2213{
2214 struct kstat s;
2215 long ret = vfs_fstat(fd, &s);
2216 if (!ret)
2217 ret = putstat64(statbuf, &s);
2218 return ret;
2219}
2220
2221struct sysinfo32 {
2222 s32 uptime;
2223 u32 loads[3];
2224 u32 totalram;
2225 u32 freeram;
2226 u32 sharedram;
2227 u32 bufferram;
2228 u32 totalswap;
2229 u32 freeswap;
2230 u16 procs;
2231 u16 pad;
2232 u32 totalhigh;
2233 u32 freehigh;
2234 u32 mem_unit;
2235 char _f[8];
2236};
2237
2238asmlinkage long
2239sys32_sysinfo (struct sysinfo32 __user *info)
2240{
2241 struct sysinfo s;
2242 long ret, err;
2243 int bitcount = 0;
2244 mm_segment_t old_fs = get_fs();
2245
2246 set_fs(KERNEL_DS);
2247 ret = sys_sysinfo((struct sysinfo __user *) &s);
2248 set_fs(old_fs);
2249 /* Check to see if any memory value is too large for 32-bit and
2250 * scale down if needed.
2251 */
2252 if ((s.totalram >> 32) || (s.totalswap >> 32)) {
2253 while (s.mem_unit < PAGE_SIZE) {
2254 s.mem_unit <<= 1;
2255 bitcount++;
2256 }
2257 s.totalram >>= bitcount;
2258 s.freeram >>= bitcount;
2259 s.sharedram >>= bitcount;
2260 s.bufferram >>= bitcount;
2261 s.totalswap >>= bitcount;
2262 s.freeswap >>= bitcount;
2263 s.totalhigh >>= bitcount;
2264 s.freehigh >>= bitcount;
2265 }
2266
2267 if (!access_ok(VERIFY_WRITE, info, sizeof(*info)))
2268 return -EFAULT;
2269
2270 err = __put_user(s.uptime, &info->uptime);
2271 err |= __put_user(s.loads[0], &info->loads[0]);
2272 err |= __put_user(s.loads[1], &info->loads[1]);
2273 err |= __put_user(s.loads[2], &info->loads[2]);
2274 err |= __put_user(s.totalram, &info->totalram);
2275 err |= __put_user(s.freeram, &info->freeram);
2276 err |= __put_user(s.sharedram, &info->sharedram);
2277 err |= __put_user(s.bufferram, &info->bufferram);
2278 err |= __put_user(s.totalswap, &info->totalswap);
2279 err |= __put_user(s.freeswap, &info->freeswap);
2280 err |= __put_user(s.procs, &info->procs);
2281 err |= __put_user (s.totalhigh, &info->totalhigh);
2282 err |= __put_user (s.freehigh, &info->freehigh);
2283 err |= __put_user (s.mem_unit, &info->mem_unit);
2284 if (err)
2285 return -EFAULT;
2286 return ret;
2287}
2288
2289asmlinkage long
2290sys32_sched_rr_get_interval (pid_t pid, struct compat_timespec __user *interval)
2291{
2292 mm_segment_t old_fs = get_fs();
2293 struct timespec t;
2294 long ret;
2295
2296 set_fs(KERNEL_DS);
2297 ret = sys_sched_rr_get_interval(pid, (struct timespec __user *) &t);
2298 set_fs(old_fs);
2299 if (put_compat_timespec(&t, interval))
2300 return -EFAULT;
2301 return ret;
2302}
2303
2304asmlinkage long
2305sys32_pread (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi)
2306{
2307 return sys_pread64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo);
2308}
2309
2310asmlinkage long
2311sys32_pwrite (unsigned int fd, void __user *buf, unsigned int count, u32 pos_lo, u32 pos_hi)
2312{
2313 return sys_pwrite64(fd, buf, count, ((unsigned long) pos_hi << 32) | pos_lo);
2314}
2315
2316asmlinkage long
2317sys32_sendfile (int out_fd, int in_fd, int __user *offset, unsigned int count)
2318{
2319 mm_segment_t old_fs = get_fs();
2320 long ret;
2321 off_t of;
2322
2323 if (offset && get_user(of, offset))
2324 return -EFAULT;
2325
2326 set_fs(KERNEL_DS);
2327 ret = sys_sendfile(out_fd, in_fd, offset ? (off_t __user *) &of : NULL, count);
2328 set_fs(old_fs);
2329
2330 if (!ret && offset && put_user(of, offset))
2331 return -EFAULT;
2332
2333 return ret;
2334}
2335
2336asmlinkage long
2337sys32_personality (unsigned int personality)
2338{
2339 long ret;
2340
2341 if (current->personality == PER_LINUX32 && personality == PER_LINUX)
2342 personality = PER_LINUX32;
2343 ret = sys_personality(personality);
2344 if (ret == PER_LINUX32)
2345 ret = PER_LINUX;
2346 return ret;
2347}
2348
2349asmlinkage unsigned long
2350sys32_brk (unsigned int brk)
2351{
2352 unsigned long ret, obrk;
2353 struct mm_struct *mm = current->mm;
2354
2355 obrk = mm->brk;
2356 ret = sys_brk(brk);
2357 if (ret < obrk)
2358 clear_user(compat_ptr(ret), PAGE_ALIGN(ret) - ret);
2359 return ret;
2360}
2361
2362/*
2363 * Exactly like fs/open.c:sys_open(), except that it doesn't set the O_LARGEFILE flag.
2364 */
2365asmlinkage long
2366sys32_open (const char __user * filename, int flags, int mode)
2367{
2368 char * tmp;
2369 int fd, error;
2370
2371 tmp = getname(filename);
2372 fd = PTR_ERR(tmp);
2373 if (!IS_ERR(tmp)) {
2374 fd = get_unused_fd();
2375 if (fd >= 0) {
2376 struct file *f = filp_open(tmp, flags, mode);
2377 error = PTR_ERR(f);
2378 if (IS_ERR(f))
2379 goto out_error;
2380 fd_install(fd, f);
2381 }
2382out:
2383 putname(tmp);
2384 }
2385 return fd;
2386
2387out_error:
2388 put_unused_fd(fd);
2389 fd = error;
2390 goto out;
2391}
2392
2393/* Structure for ia32 emulation on ia64 */
2394struct epoll_event32
2395{
2396 u32 events;
2397 u32 data[2];
2398};
2399
2400asmlinkage long
2401sys32_epoll_ctl(int epfd, int op, int fd, struct epoll_event32 __user *event)
2402{
2403 mm_segment_t old_fs = get_fs();
2404 struct epoll_event event64;
2405 int error;
2406 u32 data_halfword;
2407
2408 if (!access_ok(VERIFY_READ, event, sizeof(struct epoll_event32)))
2409 return -EFAULT;
2410
2411 __get_user(event64.events, &event->events);
2412 __get_user(data_halfword, &event->data[0]);
2413 event64.data = data_halfword;
2414 __get_user(data_halfword, &event->data[1]);
2415 event64.data |= (u64)data_halfword << 32;
2416
2417 set_fs(KERNEL_DS);
2418 error = sys_epoll_ctl(epfd, op, fd, (struct epoll_event __user *) &event64);
2419 set_fs(old_fs);
2420
2421 return error;
2422}
2423
2424asmlinkage long
2425sys32_epoll_wait(int epfd, struct epoll_event32 __user * events, int maxevents,
2426 int timeout)
2427{
2428 struct epoll_event *events64 = NULL;
2429 mm_segment_t old_fs = get_fs();
2430 int error, numevents, size;
2431 int evt_idx;
2432 int do_free_pages = 0;
2433
2434 if (maxevents <= 0) {
2435 return -EINVAL;
2436 }
2437
2438 /* Verify that the area passed by the user is writeable */
2439 if (!access_ok(VERIFY_WRITE, events, maxevents * sizeof(struct epoll_event32)))
2440 return -EFAULT;
2441
2442 /*
2443 * Allocate space for the intermediate copy. If the space needed
2444 * is large enough to cause kmalloc to fail, then try again with
2445 * __get_free_pages.
2446 */
2447 size = maxevents * sizeof(struct epoll_event);
2448 events64 = kmalloc(size, GFP_KERNEL);
2449 if (events64 == NULL) {
2450 events64 = (struct epoll_event *)
2451 __get_free_pages(GFP_KERNEL, get_order(size));
2452 if (events64 == NULL)
2453 return -ENOMEM;
2454 do_free_pages = 1;
2455 }
2456
2457 /* Do the system call */
2458 set_fs(KERNEL_DS); /* copy_to/from_user should work on kernel mem*/
2459 numevents = sys_epoll_wait(epfd, (struct epoll_event __user *) events64,
2460 maxevents, timeout);
2461 set_fs(old_fs);
2462
2463 /* Don't modify userspace memory if we're returning an error */
2464 if (numevents > 0) {
2465 /* Translate the 64-bit structures back into the 32-bit
2466 structures */
2467 for (evt_idx = 0; evt_idx < numevents; evt_idx++) {
2468 __put_user(events64[evt_idx].events,
2469 &events[evt_idx].events);
2470 __put_user((u32)events64[evt_idx].data,
2471 &events[evt_idx].data[0]);
2472 __put_user((u32)(events64[evt_idx].data >> 32),
2473 &events[evt_idx].data[1]);
2474 }
2475 }
2476
2477 if (do_free_pages)
2478 free_pages((unsigned long) events64, get_order(size));
2479 else
2480 kfree(events64);
2481 return numevents;
2482}
2483
2484/*
2485 * Get a yet unused TLS descriptor index.
2486 */
2487static int
2488get_free_idx (void)
2489{
2490 struct thread_struct *t = &current->thread;
2491 int idx;
2492
2493 for (idx = 0; idx < GDT_ENTRY_TLS_ENTRIES; idx++)
2494 if (desc_empty(t->tls_array + idx))
2495 return idx + GDT_ENTRY_TLS_MIN;
2496 return -ESRCH;
2497}
2498
2499/*
2500 * Set a given TLS descriptor:
2501 */
2502asmlinkage int
2503sys32_set_thread_area (struct ia32_user_desc __user *u_info)
2504{
2505 struct thread_struct *t = &current->thread;
2506 struct ia32_user_desc info;
2507 struct desc_struct *desc;
2508 int cpu, idx;
2509
2510 if (copy_from_user(&info, u_info, sizeof(info)))
2511 return -EFAULT;
2512 idx = info.entry_number;
2513
2514 /*
2515 * index -1 means the kernel should try to find and allocate an empty descriptor:
2516 */
2517 if (idx == -1) {
2518 idx = get_free_idx();
2519 if (idx < 0)
2520 return idx;
2521 if (put_user(idx, &u_info->entry_number))
2522 return -EFAULT;
2523 }
2524
2525 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
2526 return -EINVAL;
2527
2528 desc = t->tls_array + idx - GDT_ENTRY_TLS_MIN;
2529
2530 cpu = smp_processor_id();
2531
2532 if (LDT_empty(&info)) {
2533 desc->a = 0;
2534 desc->b = 0;
2535 } else {
2536 desc->a = LDT_entry_a(&info);
2537 desc->b = LDT_entry_b(&info);
2538 }
2539 load_TLS(t, cpu);
2540 return 0;
2541}
2542
2543/*
2544 * Get the current Thread-Local Storage area:
2545 */
2546
2547#define GET_BASE(desc) ( \
2548 (((desc)->a >> 16) & 0x0000ffff) | \
2549 (((desc)->b << 16) & 0x00ff0000) | \
2550 ( (desc)->b & 0xff000000) )
2551
2552#define GET_LIMIT(desc) ( \
2553 ((desc)->a & 0x0ffff) | \
2554 ((desc)->b & 0xf0000) )
2555
2556#define GET_32BIT(desc) (((desc)->b >> 22) & 1)
2557#define GET_CONTENTS(desc) (((desc)->b >> 10) & 3)
2558#define GET_WRITABLE(desc) (((desc)->b >> 9) & 1)
2559#define GET_LIMIT_PAGES(desc) (((desc)->b >> 23) & 1)
2560#define GET_PRESENT(desc) (((desc)->b >> 15) & 1)
2561#define GET_USEABLE(desc) (((desc)->b >> 20) & 1)
2562
2563asmlinkage int
2564sys32_get_thread_area (struct ia32_user_desc __user *u_info)
2565{
2566 struct ia32_user_desc info;
2567 struct desc_struct *desc;
2568 int idx;
2569
2570 if (get_user(idx, &u_info->entry_number))
2571 return -EFAULT;
2572 if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)
2573 return -EINVAL;
2574
2575 desc = current->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;
2576
2577 info.entry_number = idx;
2578 info.base_addr = GET_BASE(desc);
2579 info.limit = GET_LIMIT(desc);
2580 info.seg_32bit = GET_32BIT(desc);
2581 info.contents = GET_CONTENTS(desc);
2582 info.read_exec_only = !GET_WRITABLE(desc);
2583 info.limit_in_pages = GET_LIMIT_PAGES(desc);
2584 info.seg_not_present = !GET_PRESENT(desc);
2585 info.useable = GET_USEABLE(desc);
2586
2587 if (copy_to_user(u_info, &info, sizeof(info)))
2588 return -EFAULT;
2589 return 0;
2590}
2591
2592asmlinkage long
2593sys32_timer_create(u32 clock, struct compat_sigevent __user *se32, timer_t __user *timer_id)
2594{
2595 struct sigevent se;
2596 mm_segment_t oldfs;
2597 timer_t t;
2598 long err;
2599
2600 if (se32 == NULL)
2601 return sys_timer_create(clock, NULL, timer_id);
2602
2603 if (get_compat_sigevent(&se, se32))
2604 return -EFAULT;
2605
2606 if (!access_ok(VERIFY_WRITE,timer_id,sizeof(timer_t)))
2607 return -EFAULT;
2608
2609 oldfs = get_fs();
2610 set_fs(KERNEL_DS);
2611 err = sys_timer_create(clock, (struct sigevent __user *) &se, (timer_t __user *) &t);
2612 set_fs(oldfs);
2613
2614 if (!err)
2615 err = __put_user (t, timer_id);
2616
2617 return err;
2618}
2619
2620long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
2621 __u32 len_low, __u32 len_high, int advice)
2622{
2623 return sys_fadvise64_64(fd,
2624 (((u64)offset_high)<<32) | offset_low,
2625 (((u64)len_high)<<32) | len_low,
2626 advice);
2627}
2628
2629#ifdef NOTYET /* UNTESTED FOR IA64 FROM HERE DOWN */
2630
2631asmlinkage long sys32_setreuid(compat_uid_t ruid, compat_uid_t euid)
2632{
2633 uid_t sruid, seuid;
2634
2635 sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid);
2636 seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid);
2637 return sys_setreuid(sruid, seuid);
2638}
2639
2640asmlinkage long
2641sys32_setresuid(compat_uid_t ruid, compat_uid_t euid,
2642 compat_uid_t suid)
2643{
2644 uid_t sruid, seuid, ssuid;
2645
2646 sruid = (ruid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)ruid);
2647 seuid = (euid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)euid);
2648 ssuid = (suid == (compat_uid_t)-1) ? ((uid_t)-1) : ((uid_t)suid);
2649 return sys_setresuid(sruid, seuid, ssuid);
2650}
2651
2652asmlinkage long
2653sys32_setregid(compat_gid_t rgid, compat_gid_t egid)
2654{
2655 gid_t srgid, segid;
2656
2657 srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid);
2658 segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid);
2659 return sys_setregid(srgid, segid);
2660}
2661
2662asmlinkage long
2663sys32_setresgid(compat_gid_t rgid, compat_gid_t egid,
2664 compat_gid_t sgid)
2665{
2666 gid_t srgid, segid, ssgid;
2667
2668 srgid = (rgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)rgid);
2669 segid = (egid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)egid);
2670 ssgid = (sgid == (compat_gid_t)-1) ? ((gid_t)-1) : ((gid_t)sgid);
2671 return sys_setresgid(srgid, segid, ssgid);
2672}
2673
2674/* Handle adjtimex compatibility. */
2675
2676struct timex32 {
2677 u32 modes;
2678 s32 offset, freq, maxerror, esterror;
2679 s32 status, constant, precision, tolerance;
2680 struct compat_timeval time;
2681 s32 tick;
2682 s32 ppsfreq, jitter, shift, stabil;
2683 s32 jitcnt, calcnt, errcnt, stbcnt;
2684 s32 :32; s32 :32; s32 :32; s32 :32;
2685 s32 :32; s32 :32; s32 :32; s32 :32;
2686 s32 :32; s32 :32; s32 :32; s32 :32;
2687};
2688
2689extern int do_adjtimex(struct timex *);
2690
2691asmlinkage long
2692sys32_adjtimex(struct timex32 *utp)
2693{
2694 struct timex txc;
2695 int ret;
2696
2697 memset(&txc, 0, sizeof(struct timex));
2698
2699 if(get_user(txc.modes, &utp->modes) ||
2700 __get_user(txc.offset, &utp->offset) ||
2701 __get_user(txc.freq, &utp->freq) ||
2702 __get_user(txc.maxerror, &utp->maxerror) ||
2703 __get_user(txc.esterror, &utp->esterror) ||
2704 __get_user(txc.status, &utp->status) ||
2705 __get_user(txc.constant, &utp->constant) ||
2706 __get_user(txc.precision, &utp->precision) ||
2707 __get_user(txc.tolerance, &utp->tolerance) ||
2708 __get_user(txc.time.tv_sec, &utp->time.tv_sec) ||
2709 __get_user(txc.time.tv_usec, &utp->time.tv_usec) ||
2710 __get_user(txc.tick, &utp->tick) ||
2711 __get_user(txc.ppsfreq, &utp->ppsfreq) ||
2712 __get_user(txc.jitter, &utp->jitter) ||
2713 __get_user(txc.shift, &utp->shift) ||
2714 __get_user(txc.stabil, &utp->stabil) ||
2715 __get_user(txc.jitcnt, &utp->jitcnt) ||
2716 __get_user(txc.calcnt, &utp->calcnt) ||
2717 __get_user(txc.errcnt, &utp->errcnt) ||
2718 __get_user(txc.stbcnt, &utp->stbcnt))
2719 return -EFAULT;
2720
2721 ret = do_adjtimex(&txc);
2722
2723 if(put_user(txc.modes, &utp->modes) ||
2724 __put_user(txc.offset, &utp->offset) ||
2725 __put_user(txc.freq, &utp->freq) ||
2726 __put_user(txc.maxerror, &utp->maxerror) ||
2727 __put_user(txc.esterror, &utp->esterror) ||
2728 __put_user(txc.status, &utp->status) ||
2729 __put_user(txc.constant, &utp->constant) ||
2730 __put_user(txc.precision, &utp->precision) ||
2731 __put_user(txc.tolerance, &utp->tolerance) ||
2732 __put_user(txc.time.tv_sec, &utp->time.tv_sec) ||
2733 __put_user(txc.time.tv_usec, &utp->time.tv_usec) ||
2734 __put_user(txc.tick, &utp->tick) ||
2735 __put_user(txc.ppsfreq, &utp->ppsfreq) ||
2736 __put_user(txc.jitter, &utp->jitter) ||
2737 __put_user(txc.shift, &utp->shift) ||
2738 __put_user(txc.stabil, &utp->stabil) ||
2739 __put_user(txc.jitcnt, &utp->jitcnt) ||
2740 __put_user(txc.calcnt, &utp->calcnt) ||
2741 __put_user(txc.errcnt, &utp->errcnt) ||
2742 __put_user(txc.stbcnt, &utp->stbcnt))
2743 ret = -EFAULT;
2744
2745 return ret;
2746}
2747#endif /* NOTYET */
diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh
new file mode 100644
index 000000000000..929e780026d1
--- /dev/null
+++ b/arch/ia64/install.sh
@@ -0,0 +1,40 @@
1#!/bin/sh
2#
3# arch/ia64/install.sh
4#
5# This file is subject to the terms and conditions of the GNU General Public
6# License. See the file "COPYING" in the main directory of this archive
7# for more details.
8#
9# Copyright (C) 1995 by Linus Torvalds
10#
11# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin
12#
13# "make install" script for ia64 architecture
14#
15# Arguments:
16# $1 - kernel version
17# $2 - kernel image file
18# $3 - kernel map file
19# $4 - default install path (blank if root directory)
20#
21
22# User may have a custom install script
23
24if [ -x ~/bin/installkernel ]; then exec ~/bin/installkernel "$@"; fi
25if [ -x /sbin/installkernel ]; then exec /sbin/installkernel "$@"; fi
26
27# Default install - same as make zlilo
28
29if [ -f $4/vmlinuz ]; then
30 mv $4/vmlinuz $4/vmlinuz.old
31fi
32
33if [ -f $4/System.map ]; then
34 mv $4/System.map $4/System.old
35fi
36
37cat $2 > $4/vmlinuz
38cp $3 $4/System.map
39
40test -x /usr/sbin/elilo && /usr/sbin/elilo
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
new file mode 100644
index 000000000000..c1a02bbc252c
--- /dev/null
+++ b/arch/ia64/kernel/Makefile
@@ -0,0 +1,52 @@
1#
2# Makefile for the linux kernel.
3#
4
5extra-y := head.o init_task.o vmlinux.lds
6
7obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \
8 irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \
9 salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \
10 unwind.o mca.o mca_asm.o topology.o
11
12obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o
13obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o
14obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o
15obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
16obj-$(CONFIG_IA64_PALINFO) += palinfo.o
17obj-$(CONFIG_IOSAPIC) += iosapic.o
18obj-$(CONFIG_MODULES) += module.o
19obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o
20obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o
21obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
22obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o
23mca_recovery-y += mca_drv.o mca_drv_asm.o
24
25# The gate DSO image is built using a special linker script.
26targets += gate.so gate-syms.o
27
28extra-y += gate.so gate-syms.o gate.lds gate.o
29
30# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state.
31CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31
32
33CPPFLAGS_gate.lds := -P -C -U$(ARCH)
34
35quiet_cmd_gate = GATE $@
36 cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@
37
38GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1
39$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE
40 $(call if_changed,gate)
41
42$(obj)/built-in.o: $(obj)/gate-syms.o
43$(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o
44
45GATECFLAGS_gate-syms.o = -r
46$(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE
47 $(call if_changed,gate)
48
49# gate-data.o contains the gate DSO image as data in section .data.gate.
50# We must build gate.so before we can assemble it.
51# Note: kbuild does not track this dependency due to usage of .incbin
52$(obj)/gate-data.o: $(obj)/gate.so
diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c
new file mode 100644
index 000000000000..2623df5e2633
--- /dev/null
+++ b/arch/ia64/kernel/acpi-ext.c
@@ -0,0 +1,100 @@
1/*
2 * arch/ia64/kernel/acpi-ext.c
3 *
4 * Copyright (C) 2003 Hewlett-Packard
5 * Copyright (C) Alex Williamson
6 * Copyright (C) Bjorn Helgaas
7 *
8 * Vendor specific extensions to ACPI.
9 */
10
11#include <linux/config.h>
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/acpi.h>
15#include <linux/efi.h>
16
17#include <asm/acpi-ext.h>
18
19struct acpi_vendor_descriptor {
20 u8 guid_id;
21 efi_guid_t guid;
22};
23
24struct acpi_vendor_info {
25 struct acpi_vendor_descriptor *descriptor;
26 u8 *data;
27 u32 length;
28};
29
30acpi_status
31acpi_vendor_resource_match(struct acpi_resource *resource, void *context)
32{
33 struct acpi_vendor_info *info = (struct acpi_vendor_info *) context;
34 struct acpi_resource_vendor *vendor;
35 struct acpi_vendor_descriptor *descriptor;
36 u32 length;
37
38 if (resource->id != ACPI_RSTYPE_VENDOR)
39 return AE_OK;
40
41 vendor = (struct acpi_resource_vendor *) &resource->data;
42 descriptor = (struct acpi_vendor_descriptor *) vendor->reserved;
43 if (vendor->length <= sizeof(*info->descriptor) ||
44 descriptor->guid_id != info->descriptor->guid_id ||
45 efi_guidcmp(descriptor->guid, info->descriptor->guid))
46 return AE_OK;
47
48 length = vendor->length - sizeof(struct acpi_vendor_descriptor);
49 info->data = acpi_os_allocate(length);
50 if (!info->data)
51 return AE_NO_MEMORY;
52
53 memcpy(info->data, vendor->reserved + sizeof(struct acpi_vendor_descriptor), length);
54 info->length = length;
55 return AE_CTRL_TERMINATE;
56}
57
58acpi_status
59acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id,
60 u8 **data, u32 *length)
61{
62 struct acpi_vendor_info info;
63
64 info.descriptor = id;
65 info.data = NULL;
66
67 acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match, &info);
68 if (!info.data)
69 return AE_NOT_FOUND;
70
71 *data = info.data;
72 *length = info.length;
73 return AE_OK;
74}
75
76struct acpi_vendor_descriptor hp_ccsr_descriptor = {
77 .guid_id = 2,
78 .guid = EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad)
79};
80
81acpi_status
82hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length)
83{
84 acpi_status status;
85 u8 *data;
86 u32 length;
87
88 status = acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length);
89
90 if (ACPI_FAILURE(status) || length != 16)
91 return AE_NOT_FOUND;
92
93 memcpy(csr_base, data, sizeof(*csr_base));
94 memcpy(csr_length, data + 8, sizeof(*csr_length));
95 acpi_os_free(data);
96
97 return AE_OK;
98}
99
100EXPORT_SYMBOL(hp_acpi_csr_space);
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
new file mode 100644
index 000000000000..a8e99c56a768
--- /dev/null
+++ b/arch/ia64/kernel/acpi.c
@@ -0,0 +1,841 @@
1/*
2 * acpi.c - Architecture-Specific Low-Level ACPI Support
3 *
4 * Copyright (C) 1999 VA Linux Systems
5 * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
6 * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co.
7 * David Mosberger-Tang <davidm@hpl.hp.com>
8 * Copyright (C) 2000 Intel Corp.
9 * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com>
10 * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com>
11 * Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com>
12 * Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com>
13 * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de>
14 *
15 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program; if not, write to the Free Software
29 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 *
31 * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
32 */
33
34#include <linux/config.h>
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/kernel.h>
38#include <linux/sched.h>
39#include <linux/smp.h>
40#include <linux/string.h>
41#include <linux/types.h>
42#include <linux/irq.h>
43#include <linux/acpi.h>
44#include <linux/efi.h>
45#include <linux/mmzone.h>
46#include <linux/nodemask.h>
47#include <asm/io.h>
48#include <asm/iosapic.h>
49#include <asm/machvec.h>
50#include <asm/page.h>
51#include <asm/system.h>
52#include <asm/numa.h>
53#include <asm/sal.h>
54#include <asm/cyclone.h>
55
56#define BAD_MADT_ENTRY(entry, end) ( \
57 (!entry) || (unsigned long)entry + sizeof(*entry) > end || \
58 ((acpi_table_entry_header *)entry)->length != sizeof(*entry))
59
60#define PREFIX "ACPI: "
61
62void (*pm_idle) (void);
63EXPORT_SYMBOL(pm_idle);
64void (*pm_power_off) (void);
65EXPORT_SYMBOL(pm_power_off);
66
67unsigned char acpi_kbd_controller_present = 1;
68unsigned char acpi_legacy_devices;
69
70#define MAX_SAPICS 256
71u16 ia64_acpiid_to_sapicid[MAX_SAPICS] =
72 { [0 ... MAX_SAPICS - 1] = -1 };
73EXPORT_SYMBOL(ia64_acpiid_to_sapicid);
74
75const char *
76acpi_get_sysname (void)
77{
78#ifdef CONFIG_IA64_GENERIC
79 unsigned long rsdp_phys;
80 struct acpi20_table_rsdp *rsdp;
81 struct acpi_table_xsdt *xsdt;
82 struct acpi_table_header *hdr;
83
84 rsdp_phys = acpi_find_rsdp();
85 if (!rsdp_phys) {
86 printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n");
87 return "dig";
88 }
89
90 rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys);
91 if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) {
92 printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n");
93 return "dig";
94 }
95
96 xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address);
97 hdr = &xsdt->header;
98 if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) {
99 printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n");
100 return "dig";
101 }
102
103 if (!strcmp(hdr->oem_id, "HP")) {
104 return "hpzx1";
105 }
106 else if (!strcmp(hdr->oem_id, "SGI")) {
107 return "sn2";
108 }
109
110 return "dig";
111#else
112# if defined (CONFIG_IA64_HP_SIM)
113 return "hpsim";
114# elif defined (CONFIG_IA64_HP_ZX1)
115 return "hpzx1";
116# elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB)
117 return "hpzx1_swiotlb";
118# elif defined (CONFIG_IA64_SGI_SN2)
119 return "sn2";
120# elif defined (CONFIG_IA64_DIG)
121 return "dig";
122# else
123# error Unknown platform. Fix acpi.c.
124# endif
125#endif
126}
127
128#ifdef CONFIG_ACPI_BOOT
129
130#define ACPI_MAX_PLATFORM_INTERRUPTS 256
131
132/* Array to record platform interrupt vectors for generic interrupt routing. */
133int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = {
134 [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1
135};
136
137enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC;
138
139/*
140 * Interrupt routing API for device drivers. Provides interrupt vector for
141 * a generic platform event. Currently only CPEI is implemented.
142 */
143int
144acpi_request_vector (u32 int_type)
145{
146 int vector = -1;
147
148 if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) {
149 /* corrected platform error interrupt */
150 vector = platform_intr_list[int_type];
151 } else
152 printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n");
153 return vector;
154}
155
156char *
157__acpi_map_table (unsigned long phys_addr, unsigned long size)
158{
159 return __va(phys_addr);
160}
161
162/* --------------------------------------------------------------------------
163 Boot-time Table Parsing
164 -------------------------------------------------------------------------- */
165
166static int total_cpus __initdata;
167static int available_cpus __initdata;
168struct acpi_table_madt * acpi_madt __initdata;
169static u8 has_8259;
170
171
172static int __init
173acpi_parse_lapic_addr_ovr (
174 acpi_table_entry_header *header, const unsigned long end)
175{
176 struct acpi_table_lapic_addr_ovr *lapic;
177
178 lapic = (struct acpi_table_lapic_addr_ovr *) header;
179
180 if (BAD_MADT_ENTRY(lapic, end))
181 return -EINVAL;
182
183 if (lapic->address) {
184 iounmap(ipi_base_addr);
185 ipi_base_addr = ioremap(lapic->address, 0);
186 }
187 return 0;
188}
189
190
191static int __init
192acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end)
193{
194 struct acpi_table_lsapic *lsapic;
195
196 lsapic = (struct acpi_table_lsapic *) header;
197
198 if (BAD_MADT_ENTRY(lsapic, end))
199 return -EINVAL;
200
201 if (lsapic->flags.enabled) {
202#ifdef CONFIG_SMP
203 smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid;
204#endif
205 ia64_acpiid_to_sapicid[lsapic->acpi_id] = (lsapic->id << 8) | lsapic->eid;
206 ++available_cpus;
207 }
208
209 total_cpus++;
210 return 0;
211}
212
213
214static int __init
215acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end)
216{
217 struct acpi_table_lapic_nmi *lacpi_nmi;
218
219 lacpi_nmi = (struct acpi_table_lapic_nmi*) header;
220
221 if (BAD_MADT_ENTRY(lacpi_nmi, end))
222 return -EINVAL;
223
224 /* TBD: Support lapic_nmi entries */
225 return 0;
226}
227
228
229static int __init
230acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end)
231{
232 struct acpi_table_iosapic *iosapic;
233
234 iosapic = (struct acpi_table_iosapic *) header;
235
236 if (BAD_MADT_ENTRY(iosapic, end))
237 return -EINVAL;
238
239 iosapic_init(iosapic->address, iosapic->global_irq_base);
240
241 return 0;
242}
243
244
245static int __init
246acpi_parse_plat_int_src (
247 acpi_table_entry_header *header, const unsigned long end)
248{
249 struct acpi_table_plat_int_src *plintsrc;
250 int vector;
251
252 plintsrc = (struct acpi_table_plat_int_src *) header;
253
254 if (BAD_MADT_ENTRY(plintsrc, end))
255 return -EINVAL;
256
257 /*
258 * Get vector assignment for this interrupt, set attributes,
259 * and program the IOSAPIC routing table.
260 */
261 vector = iosapic_register_platform_intr(plintsrc->type,
262 plintsrc->global_irq,
263 plintsrc->iosapic_vector,
264 plintsrc->eid,
265 plintsrc->id,
266 (plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
267 (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
268
269 platform_intr_list[plintsrc->type] = vector;
270 return 0;
271}
272
273
274static int __init
275acpi_parse_int_src_ovr (
276 acpi_table_entry_header *header, const unsigned long end)
277{
278 struct acpi_table_int_src_ovr *p;
279
280 p = (struct acpi_table_int_src_ovr *) header;
281
282 if (BAD_MADT_ENTRY(p, end))
283 return -EINVAL;
284
285 iosapic_override_isa_irq(p->bus_irq, p->global_irq,
286 (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
287 (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
288 return 0;
289}
290
291
292static int __init
293acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end)
294{
295 struct acpi_table_nmi_src *nmi_src;
296
297 nmi_src = (struct acpi_table_nmi_src*) header;
298
299 if (BAD_MADT_ENTRY(nmi_src, end))
300 return -EINVAL;
301
302 /* TBD: Support nimsrc entries */
303 return 0;
304}
305
306static void __init
307acpi_madt_oem_check (char *oem_id, char *oem_table_id)
308{
309 if (!strncmp(oem_id, "IBM", 3) &&
310 (!strncmp(oem_table_id, "SERMOW", 6))) {
311
312 /*
313 * Unfortunately ITC_DRIFT is not yet part of the
314 * official SAL spec, so the ITC_DRIFT bit is not
315 * set by the BIOS on this hardware.
316 */
317 sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT;
318
319 cyclone_setup();
320 }
321}
322
323static int __init
324acpi_parse_madt (unsigned long phys_addr, unsigned long size)
325{
326 if (!phys_addr || !size)
327 return -EINVAL;
328
329 acpi_madt = (struct acpi_table_madt *) __va(phys_addr);
330
331 /* remember the value for reference after free_initmem() */
332#ifdef CONFIG_ITANIUM
333 has_8259 = 1; /* Firmware on old Itanium systems is broken */
334#else
335 has_8259 = acpi_madt->flags.pcat_compat;
336#endif
337 iosapic_system_init(has_8259);
338
339 /* Get base address of IPI Message Block */
340
341 if (acpi_madt->lapic_address)
342 ipi_base_addr = ioremap(acpi_madt->lapic_address, 0);
343
344 printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr);
345
346 acpi_madt_oem_check(acpi_madt->header.oem_id,
347 acpi_madt->header.oem_table_id);
348
349 return 0;
350}
351
352
353#ifdef CONFIG_ACPI_NUMA
354
355#undef SLIT_DEBUG
356
357#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32)
358
359static int __initdata srat_num_cpus; /* number of cpus */
360static u32 __devinitdata pxm_flag[PXM_FLAG_LEN];
361#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag))
362#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag))
363/* maps to convert between proximity domain and logical node ID */
364int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS];
365int __initdata nid_to_pxm_map[MAX_NUMNODES];
366static struct acpi_table_slit __initdata *slit_table;
367
368/*
369 * ACPI 2.0 SLIT (System Locality Information Table)
370 * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf
371 */
372void __init
373acpi_numa_slit_init (struct acpi_table_slit *slit)
374{
375 u32 len;
376
377 len = sizeof(struct acpi_table_header) + 8
378 + slit->localities * slit->localities;
379 if (slit->header.length != len) {
380 printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n",
381 len, slit->header.length);
382 memset(numa_slit, 10, sizeof(numa_slit));
383 return;
384 }
385 slit_table = slit;
386}
387
388void __init
389acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa)
390{
391 /* record this node in proximity bitmap */
392 pxm_bit_set(pa->proximity_domain);
393
394 node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid);
395 /* nid should be overridden as logical node id later */
396 node_cpuid[srat_num_cpus].nid = pa->proximity_domain;
397 srat_num_cpus++;
398}
399
400void __init
401acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma)
402{
403 unsigned long paddr, size;
404 u8 pxm;
405 struct node_memblk_s *p, *q, *pend;
406
407 pxm = ma->proximity_domain;
408
409 /* fill node memory chunk structure */
410 paddr = ma->base_addr_hi;
411 paddr = (paddr << 32) | ma->base_addr_lo;
412 size = ma->length_hi;
413 size = (size << 32) | ma->length_lo;
414
415 /* Ignore disabled entries */
416 if (!ma->flags.enabled)
417 return;
418
419 /* record this node in proximity bitmap */
420 pxm_bit_set(pxm);
421
422 /* Insertion sort based on base address */
423 pend = &node_memblk[num_node_memblks];
424 for (p = &node_memblk[0]; p < pend; p++) {
425 if (paddr < p->start_paddr)
426 break;
427 }
428 if (p < pend) {
429 for (q = pend - 1; q >= p; q--)
430 *(q + 1) = *q;
431 }
432 p->start_paddr = paddr;
433 p->size = size;
434 p->nid = pxm;
435 num_node_memblks++;
436}
437
438void __init
439acpi_numa_arch_fixup (void)
440{
441 int i, j, node_from, node_to;
442
443 /* If there's no SRAT, fix the phys_id and mark node 0 online */
444 if (srat_num_cpus == 0) {
445 node_set_online(0);
446 node_cpuid[0].phys_id = hard_smp_processor_id();
447 return;
448 }
449
450 /*
451 * MCD - This can probably be dropped now. No need for pxm ID to node ID
452 * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES.
453 */
454 /* calculate total number of nodes in system from PXM bitmap */
455 memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map));
456 memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map));
457 nodes_clear(node_online_map);
458 for (i = 0; i < MAX_PXM_DOMAINS; i++) {
459 if (pxm_bit_test(i)) {
460 int nid = num_online_nodes();
461 pxm_to_nid_map[i] = nid;
462 nid_to_pxm_map[nid] = i;
463 node_set_online(nid);
464 }
465 }
466
467 /* set logical node id in memory chunk structure */
468 for (i = 0; i < num_node_memblks; i++)
469 node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid];
470
471 /* assign memory bank numbers for each chunk on each node */
472 for_each_online_node(i) {
473 int bank;
474
475 bank = 0;
476 for (j = 0; j < num_node_memblks; j++)
477 if (node_memblk[j].nid == i)
478 node_memblk[j].bank = bank++;
479 }
480
481 /* set logical node id in cpu structure */
482 for (i = 0; i < srat_num_cpus; i++)
483 node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid];
484
485 printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes());
486 printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks);
487
488 if (!slit_table) return;
489 memset(numa_slit, -1, sizeof(numa_slit));
490 for (i=0; i<slit_table->localities; i++) {
491 if (!pxm_bit_test(i))
492 continue;
493 node_from = pxm_to_nid_map[i];
494 for (j=0; j<slit_table->localities; j++) {
495 if (!pxm_bit_test(j))
496 continue;
497 node_to = pxm_to_nid_map[j];
498 node_distance(node_from, node_to) =
499 slit_table->entry[i*slit_table->localities + j];
500 }
501 }
502
503#ifdef SLIT_DEBUG
504 printk("ACPI 2.0 SLIT locality table:\n");
505 for_each_online_node(i) {
506 for_each_online_node(j)
507 printk("%03d ", node_distance(i,j));
508 printk("\n");
509 }
510#endif
511}
512#endif /* CONFIG_ACPI_NUMA */
513
514unsigned int
515acpi_register_gsi (u32 gsi, int edge_level, int active_high_low)
516{
517 if (has_8259 && gsi < 16)
518 return isa_irq_to_vector(gsi);
519
520 return iosapic_register_intr(gsi,
521 (active_high_low == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW,
522 (edge_level == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL);
523}
524EXPORT_SYMBOL(acpi_register_gsi);
525
526#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
527void
528acpi_unregister_gsi (u32 gsi)
529{
530 iosapic_unregister_intr(gsi);
531}
532EXPORT_SYMBOL(acpi_unregister_gsi);
533#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
534
535static int __init
536acpi_parse_fadt (unsigned long phys_addr, unsigned long size)
537{
538 struct acpi_table_header *fadt_header;
539 struct fadt_descriptor_rev2 *fadt;
540
541 if (!phys_addr || !size)
542 return -EINVAL;
543
544 fadt_header = (struct acpi_table_header *) __va(phys_addr);
545 if (fadt_header->revision != 3)
546 return -ENODEV; /* Only deal with ACPI 2.0 FADT */
547
548 fadt = (struct fadt_descriptor_rev2 *) fadt_header;
549
550 if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER))
551 acpi_kbd_controller_present = 0;
552
553 if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES)
554 acpi_legacy_devices = 1;
555
556 acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW);
557 return 0;
558}
559
560
561unsigned long __init
562acpi_find_rsdp (void)
563{
564 unsigned long rsdp_phys = 0;
565
566 if (efi.acpi20)
567 rsdp_phys = __pa(efi.acpi20);
568 else if (efi.acpi)
569 printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n");
570 return rsdp_phys;
571}
572
573
574int __init
575acpi_boot_init (void)
576{
577
578 /*
579 * MADT
580 * ----
581 * Parse the Multiple APIC Description Table (MADT), if exists.
582 * Note that this table provides platform SMP configuration
583 * information -- the successor to MPS tables.
584 */
585
586 if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) {
587 printk(KERN_ERR PREFIX "Can't find MADT\n");
588 goto skip_madt;
589 }
590
591 /* Local APIC */
592
593 if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0)
594 printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n");
595
596 if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1)
597 printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n");
598
599 if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0)
600 printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n");
601
602 /* I/O APIC */
603
604 if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1)
605 printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n");
606
607 /* System-Level Interrupt Routing */
608
609 if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0)
610 printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n");
611
612 if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0)
613 printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n");
614
615 if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0)
616 printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
617 skip_madt:
618
619 /*
620 * FADT says whether a legacy keyboard controller is present.
621 * The FADT also contains an SCI_INT line, by which the system
622 * gets interrupts such as power and sleep buttons. If it's not
623 * on a Legacy interrupt, it needs to be setup.
624 */
625 if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1)
626 printk(KERN_ERR PREFIX "Can't find FADT\n");
627
628#ifdef CONFIG_SMP
629 if (available_cpus == 0) {
630 printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
631 printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id());
632 smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id();
633 available_cpus = 1; /* We've got at least one of these, no? */
634 }
635 smp_boot_data.cpu_count = available_cpus;
636
637 smp_build_cpu_map();
638# ifdef CONFIG_ACPI_NUMA
639 if (srat_num_cpus == 0) {
640 int cpu, i = 1;
641 for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++)
642 if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id())
643 node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu];
644 }
645 build_cpu_to_node_map();
646# endif
647#endif
648 /* Make boot-up look pretty */
649 printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus);
650 return 0;
651}
652
653int
654acpi_gsi_to_irq (u32 gsi, unsigned int *irq)
655{
656 int vector;
657
658 if (has_8259 && gsi < 16)
659 *irq = isa_irq_to_vector(gsi);
660 else {
661 vector = gsi_to_vector(gsi);
662 if (vector == -1)
663 return -1;
664
665 *irq = vector;
666 }
667 return 0;
668}
669
670/*
671 * ACPI based hotplug CPU support
672 */
673#ifdef CONFIG_ACPI_HOTPLUG_CPU
674static
675int
676acpi_map_cpu2node(acpi_handle handle, int cpu, long physid)
677{
678#ifdef CONFIG_ACPI_NUMA
679 int pxm_id;
680
681 pxm_id = acpi_get_pxm(handle);
682
683 /*
684 * Assuming that the container driver would have set the proximity
685 * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag
686 */
687 node_cpuid[cpu].nid = (pxm_id < 0) ? 0:
688 pxm_to_nid_map[pxm_id];
689
690 node_cpuid[cpu].phys_id = physid;
691#endif
692 return(0);
693}
694
695
696int
697acpi_map_lsapic(acpi_handle handle, int *pcpu)
698{
699 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
700 union acpi_object *obj;
701 struct acpi_table_lsapic *lsapic;
702 cpumask_t tmp_map;
703 long physid;
704 int cpu;
705
706 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
707 return -EINVAL;
708
709 if (!buffer.length || !buffer.pointer)
710 return -EINVAL;
711
712 obj = buffer.pointer;
713 if (obj->type != ACPI_TYPE_BUFFER ||
714 obj->buffer.length < sizeof(*lsapic)) {
715 acpi_os_free(buffer.pointer);
716 return -EINVAL;
717 }
718
719 lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer;
720
721 if ((lsapic->header.type != ACPI_MADT_LSAPIC) ||
722 (!lsapic->flags.enabled)) {
723 acpi_os_free(buffer.pointer);
724 return -EINVAL;
725 }
726
727 physid = ((lsapic->id <<8) | (lsapic->eid));
728
729 acpi_os_free(buffer.pointer);
730 buffer.length = ACPI_ALLOCATE_BUFFER;
731 buffer.pointer = NULL;
732
733 cpus_complement(tmp_map, cpu_present_map);
734 cpu = first_cpu(tmp_map);
735 if(cpu >= NR_CPUS)
736 return -EINVAL;
737
738 acpi_map_cpu2node(handle, cpu, physid);
739
740 cpu_set(cpu, cpu_present_map);
741 ia64_cpu_to_sapicid[cpu] = physid;
742 ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu];
743
744 *pcpu = cpu;
745 return(0);
746}
747EXPORT_SYMBOL(acpi_map_lsapic);
748
749
750int
751acpi_unmap_lsapic(int cpu)
752{
753 int i;
754
755 for (i=0; i<MAX_SAPICS; i++) {
756 if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) {
757 ia64_acpiid_to_sapicid[i] = -1;
758 break;
759 }
760 }
761 ia64_cpu_to_sapicid[cpu] = -1;
762 cpu_clear(cpu,cpu_present_map);
763
764#ifdef CONFIG_ACPI_NUMA
765 /* NUMA specific cleanup's */
766#endif
767
768 return(0);
769}
770EXPORT_SYMBOL(acpi_unmap_lsapic);
771#endif /* CONFIG_ACPI_HOTPLUG_CPU */
772
773
774#ifdef CONFIG_ACPI_NUMA
775acpi_status __init
776acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret)
777{
778 struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL};
779 union acpi_object *obj;
780 struct acpi_table_iosapic *iosapic;
781 unsigned int gsi_base;
782 int node;
783
784 /* Only care about objects w/ a method that returns the MADT */
785 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
786 return AE_OK;
787
788 if (!buffer.length || !buffer.pointer)
789 return AE_OK;
790
791 obj = buffer.pointer;
792 if (obj->type != ACPI_TYPE_BUFFER ||
793 obj->buffer.length < sizeof(*iosapic)) {
794 acpi_os_free(buffer.pointer);
795 return AE_OK;
796 }
797
798 iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer;
799
800 if (iosapic->header.type != ACPI_MADT_IOSAPIC) {
801 acpi_os_free(buffer.pointer);
802 return AE_OK;
803 }
804
805 gsi_base = iosapic->global_irq_base;
806
807 acpi_os_free(buffer.pointer);
808 buffer.length = ACPI_ALLOCATE_BUFFER;
809 buffer.pointer = NULL;
810
811 /*
812 * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell
813 * us which node to associate this with.
814 */
815 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer)))
816 return AE_OK;
817
818 if (!buffer.length || !buffer.pointer)
819 return AE_OK;
820
821 obj = buffer.pointer;
822
823 if (obj->type != ACPI_TYPE_INTEGER ||
824 obj->integer.value >= MAX_PXM_DOMAINS) {
825 acpi_os_free(buffer.pointer);
826 return AE_OK;
827 }
828
829 node = pxm_to_nid_map[obj->integer.value];
830 acpi_os_free(buffer.pointer);
831
832 if (node >= MAX_NUMNODES || !node_online(node) ||
833 cpus_empty(node_to_cpumask(node)))
834 return AE_OK;
835
836 /* We know a gsi to node mapping! */
837 map_iosapic_to_node(gsi_base, node);
838 return AE_OK;
839}
840#endif /* CONFIG_NUMA */
841#endif /* CONFIG_ACPI_BOOT */
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c
new file mode 100644
index 000000000000..7d1ae2982c53
--- /dev/null
+++ b/arch/ia64/kernel/asm-offsets.c
@@ -0,0 +1,239 @@
1/*
2 * Generate definitions needed by assembly language modules.
3 * This code generates raw asm output which is post-processed
4 * to extract and format the required data.
5 */
6
7#include <linux/config.h>
8
9#include <linux/sched.h>
10
11#include <asm-ia64/processor.h>
12#include <asm-ia64/ptrace.h>
13#include <asm-ia64/siginfo.h>
14#include <asm-ia64/sigcontext.h>
15#include <asm-ia64/mca.h>
16
17#include "../kernel/sigframe.h"
18
19#define DEFINE(sym, val) \
20 asm volatile("\n->" #sym " %0 " #val : : "i" (val))
21
22#define BLANK() asm volatile("\n->" : : )
23
24void foo(void)
25{
26 DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct));
27 DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info));
28 DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs));
29 DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack));
30 DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo));
31 DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64));
32 DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe));
33 DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info));
34
35 BLANK();
36
37 DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
38 DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
39
40 BLANK();
41
42 DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked));
43 DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid));
44 DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader));
45 DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending));
46 DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid));
47 DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent));
48 DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand));
49 DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal));
50 DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid));
51 DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp));
52 DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack));
53
54 BLANK();
55
56 DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock));
57
58 BLANK();
59
60 DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct,
61 group_stop_count));
62 DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending));
63
64 BLANK();
65
66 DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6));
67 DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7));
68 DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd));
69 DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd));
70 DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8));
71 DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9));
72 DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10));
73 DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11));
74 DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr));
75 DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip));
76 DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs));
77 DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat));
78 DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs));
79 DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc));
80 DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat));
81
82 DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore));
83 DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr));
84 DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0));
85 DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs));
86 DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1));
87 DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12));
88 DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13));
89 DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr));
90 DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15));
91 DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14));
92 DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2));
93 DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3));
94 DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16));
95 DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17));
96 DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18));
97 DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19));
98 DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20));
99 DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21));
100 DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22));
101 DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23));
102 DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24));
103 DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25));
104 DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26));
105 DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27));
106 DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28));
107 DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29));
108 DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30));
109 DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31));
110 DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv));
111 DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6));
112 DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7));
113 DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8));
114 DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9));
115 DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10));
116 DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11));
117
118 BLANK();
119
120 DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat));
121 DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr));
122 DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2));
123 DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3));
124 DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4));
125 DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5));
126 DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12));
127 DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13));
128 DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14));
129 DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15));
130 DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16));
131 DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17));
132 DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18));
133 DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19));
134 DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20));
135 DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21));
136 DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22));
137 DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23));
138 DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24));
139 DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25));
140 DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26));
141 DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27));
142 DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28));
143 DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29));
144 DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30));
145 DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31));
146 DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4));
147 DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5));
148 DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6));
149 DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7));
150 DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0));
151 DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1));
152 DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2));
153 DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3));
154 DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4));
155 DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5));
156 DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs));
157 DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc));
158 DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat));
159 DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat));
160 DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore));
161 DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr));
162
163 BLANK();
164
165 DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip));
166 DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp));
167 DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr));
168 DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat));
169 DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat));
170 DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0]));
171 DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm));
172 DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags));
173 DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6]));
174 DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr));
175 DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12]));
176 DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base));
177 DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs));
178
179 BLANK();
180
181 DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal));
182
183 BLANK();
184
185 DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0));
186 DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1));
187 DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2));
188 DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler));
189 DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc));
190 BLANK();
191 /* for assembly files which can't include sched.h: */
192 DEFINE(IA64_CLONE_VFORK, CLONE_VFORK);
193 DEFINE(IA64_CLONE_VM, CLONE_VM);
194
195 BLANK();
196 DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET,
197 offsetof (struct cpuinfo_ia64, nsec_per_cyc));
198 DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET,
199 offsetof (struct cpuinfo_ia64, ptce_base));
200 DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET,
201 offsetof (struct cpuinfo_ia64, ptce_count));
202 DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET,
203 offsetof (struct cpuinfo_ia64, ptce_stride));
204 BLANK();
205 DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET,
206 offsetof (struct timespec, tv_nsec));
207
208 DEFINE(CLONE_SETTLS_BIT, 19);
209#if CLONE_SETTLS != (1<<19)
210# error "CLONE_SETTLS_BIT incorrect, please fix"
211#endif
212
213 BLANK();
214 DEFINE(IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET,
215 offsetof (struct ia64_mca_cpu, proc_state_dump));
216 DEFINE(IA64_MCA_CPU_STACK_OFFSET,
217 offsetof (struct ia64_mca_cpu, stack));
218 DEFINE(IA64_MCA_CPU_STACKFRAME_OFFSET,
219 offsetof (struct ia64_mca_cpu, stackframe));
220 DEFINE(IA64_MCA_CPU_RBSTORE_OFFSET,
221 offsetof (struct ia64_mca_cpu, rbstore));
222 DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET,
223 offsetof (struct ia64_mca_cpu, init_stack));
224 BLANK();
225 /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
226 DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr));
227 DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source));
228 DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift));
229 DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc));
230 DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset));
231 DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle));
232 DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter));
233 DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter));
234 DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask));
235 DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU);
236 DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64);
237 DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32);
238 DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec));
239}
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c
new file mode 100644
index 000000000000..0b286ca164f9
--- /dev/null
+++ b/arch/ia64/kernel/brl_emu.c
@@ -0,0 +1,234 @@
1/*
2 * Emulation of the "brl" instruction for IA64 processors that
3 * don't support it in hardware.
4 * Author: Stephan Zeisset, Intel Corp. <Stephan.Zeisset@intel.com>
5 *
6 * 02/22/02 D. Mosberger Clear si_flgs, si_isr, and si_imm to avoid
7 * leaking kernel bits.
8 */
9
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <asm/uaccess.h>
13#include <asm/processor.h>
14
15extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5;
16
17struct illegal_op_return {
18 unsigned long fkt, arg1, arg2, arg3;
19};
20
21/*
22 * The unimplemented bits of a virtual address must be set
23 * to the value of the most significant implemented bit.
24 * unimpl_va_mask includes all unimplemented bits and
25 * the most significant implemented bit, so the result
26 * of an and operation with the mask must be all 0's
27 * or all 1's for the address to be valid.
28 */
29#define unimplemented_virtual_address(va) ( \
30 ((va) & local_cpu_data->unimpl_va_mask) != 0 && \
31 ((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask \
32)
33
34/*
35 * The unimplemented bits of a physical address must be 0.
36 * unimpl_pa_mask includes all unimplemented bits, so the result
37 * of an and operation with the mask must be all 0's for the
38 * address to be valid.
39 */
40#define unimplemented_physical_address(pa) ( \
41 ((pa) & local_cpu_data->unimpl_pa_mask) != 0 \
42)
43
44/*
45 * Handle an illegal operation fault that was caused by an
46 * unimplemented "brl" instruction.
47 * If we are not successful (e.g because the illegal operation
48 * wasn't caused by a "brl" after all), we return -1.
49 * If we are successful, we return either 0 or the address
50 * of a "fixup" function for manipulating preserved register
51 * state.
52 */
53
54struct illegal_op_return
55ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec)
56{
57 unsigned long bundle[2];
58 unsigned long opcode, btype, qp, offset, cpl;
59 unsigned long next_ip;
60 struct siginfo siginfo;
61 struct illegal_op_return rv;
62 long tmp_taken, unimplemented_address;
63
64 rv.fkt = (unsigned long) -1;
65
66 /*
67 * Decode the instruction bundle.
68 */
69
70 if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle)))
71 return rv;
72
73 next_ip = (unsigned long) regs->cr_iip + 16;
74
75 /* "brl" must be in slot 2. */
76 if (ia64_psr(regs)->ri != 1) return rv;
77
78 /* Must be "mlx" template */
79 if ((bundle[0] & 0x1e) != 0x4) return rv;
80
81 opcode = (bundle[1] >> 60);
82 btype = ((bundle[1] >> 29) & 0x7);
83 qp = ((bundle[1] >> 23) & 0x3f);
84 offset = ((bundle[1] & 0x0800000000000000L) << 4)
85 | ((bundle[1] & 0x00fffff000000000L) >> 32)
86 | ((bundle[1] & 0x00000000007fffffL) << 40)
87 | ((bundle[0] & 0xffff000000000000L) >> 24);
88
89 tmp_taken = regs->pr & (1L << qp);
90
91 switch(opcode) {
92
93 case 0xC:
94 /*
95 * Long Branch.
96 */
97 if (btype != 0) return rv;
98 rv.fkt = 0;
99 if (!(tmp_taken)) {
100 /*
101 * Qualifying predicate is 0.
102 * Skip instruction.
103 */
104 regs->cr_iip = next_ip;
105 ia64_psr(regs)->ri = 0;
106 return rv;
107 }
108 break;
109
110 case 0xD:
111 /*
112 * Long Call.
113 */
114 rv.fkt = 0;
115 if (!(tmp_taken)) {
116 /*
117 * Qualifying predicate is 0.
118 * Skip instruction.
119 */
120 regs->cr_iip = next_ip;
121 ia64_psr(regs)->ri = 0;
122 return rv;
123 }
124
125 /*
126 * BR[btype] = IP+16
127 */
128 switch(btype) {
129 case 0:
130 regs->b0 = next_ip;
131 break;
132 case 1:
133 rv.fkt = (unsigned long) &ia64_set_b1;
134 break;
135 case 2:
136 rv.fkt = (unsigned long) &ia64_set_b2;
137 break;
138 case 3:
139 rv.fkt = (unsigned long) &ia64_set_b3;
140 break;
141 case 4:
142 rv.fkt = (unsigned long) &ia64_set_b4;
143 break;
144 case 5:
145 rv.fkt = (unsigned long) &ia64_set_b5;
146 break;
147 case 6:
148 regs->b6 = next_ip;
149 break;
150 case 7:
151 regs->b7 = next_ip;
152 break;
153 }
154 rv.arg1 = next_ip;
155
156 /*
157 * AR[PFS].pfm = CFM
158 * AR[PFS].pec = AR[EC]
159 * AR[PFS].ppl = PSR.cpl
160 */
161 cpl = ia64_psr(regs)->cpl;
162 regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff)
163 | (ar_ec << 52) | (cpl << 62));
164
165 /*
166 * CFM.sof -= CFM.sol
167 * CFM.sol = 0
168 * CFM.sor = 0
169 * CFM.rrb.gr = 0
170 * CFM.rrb.fr = 0
171 * CFM.rrb.pr = 0
172 */
173 regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f)
174 - ((regs->cr_ifs >> 7) & 0x7f));
175
176 break;
177
178 default:
179 /*
180 * Unknown opcode.
181 */
182 return rv;
183
184 }
185
186 regs->cr_iip += offset;
187 ia64_psr(regs)->ri = 0;
188
189 if (ia64_psr(regs)->it == 0)
190 unimplemented_address = unimplemented_physical_address(regs->cr_iip);
191 else
192 unimplemented_address = unimplemented_virtual_address(regs->cr_iip);
193
194 if (unimplemented_address) {
195 /*
196 * The target address contains unimplemented bits.
197 */
198 printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n");
199 siginfo.si_signo = SIGILL;
200 siginfo.si_errno = 0;
201 siginfo.si_flags = 0;
202 siginfo.si_isr = 0;
203 siginfo.si_imm = 0;
204 siginfo.si_code = ILL_BADIADDR;
205 force_sig_info(SIGILL, &siginfo, current);
206 } else if (ia64_psr(regs)->tb) {
207 /*
208 * Branch Tracing is enabled.
209 * Force a taken branch signal.
210 */
211 siginfo.si_signo = SIGTRAP;
212 siginfo.si_errno = 0;
213 siginfo.si_code = TRAP_BRANCH;
214 siginfo.si_flags = 0;
215 siginfo.si_isr = 0;
216 siginfo.si_addr = 0;
217 siginfo.si_imm = 0;
218 force_sig_info(SIGTRAP, &siginfo, current);
219 } else if (ia64_psr(regs)->ss) {
220 /*
221 * Single Step is enabled.
222 * Force a trace signal.
223 */
224 siginfo.si_signo = SIGTRAP;
225 siginfo.si_errno = 0;
226 siginfo.si_code = TRAP_TRACE;
227 siginfo.si_flags = 0;
228 siginfo.si_isr = 0;
229 siginfo.si_addr = 0;
230 siginfo.si_imm = 0;
231 force_sig_info(SIGTRAP, &siginfo, current);
232 }
233 return rv;
234}
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c
new file mode 100644
index 000000000000..768c7e46957c
--- /dev/null
+++ b/arch/ia64/kernel/cyclone.c
@@ -0,0 +1,109 @@
1#include <linux/module.h>
2#include <linux/smp.h>
3#include <linux/time.h>
4#include <linux/errno.h>
5#include <asm/io.h>
6
7/* IBM Summit (EXA) Cyclone counter code*/
8#define CYCLONE_CBAR_ADDR 0xFEB00CD0
9#define CYCLONE_PMCC_OFFSET 0x51A0
10#define CYCLONE_MPMC_OFFSET 0x51D0
11#define CYCLONE_MPCS_OFFSET 0x51A8
12#define CYCLONE_TIMER_FREQ 100000000
13
14int use_cyclone;
15void __init cyclone_setup(void)
16{
17 use_cyclone = 1;
18}
19
20
21struct time_interpolator cyclone_interpolator = {
22 .source = TIME_SOURCE_MMIO64,
23 .shift = 16,
24 .frequency = CYCLONE_TIMER_FREQ,
25 .drift = -100,
26 .mask = (1LL << 40) - 1
27};
28
29int __init init_cyclone_clock(void)
30{
31 u64* reg;
32 u64 base; /* saved cyclone base address */
33 u64 offset; /* offset from pageaddr to cyclone_timer register */
34 int i;
35 u32* volatile cyclone_timer; /* Cyclone MPMC0 register */
36
37 if (!use_cyclone)
38 return -ENODEV;
39
40 printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n");
41
42 /* find base address */
43 offset = (CYCLONE_CBAR_ADDR);
44 reg = (u64*)ioremap_nocache(offset, sizeof(u64));
45 if(!reg){
46 printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n");
47 use_cyclone = 0;
48 return -ENODEV;
49 }
50 base = readq(reg);
51 if(!base){
52 printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n");
53 use_cyclone = 0;
54 return -ENODEV;
55 }
56 iounmap(reg);
57
58 /* setup PMCC */
59 offset = (base + CYCLONE_PMCC_OFFSET);
60 reg = (u64*)ioremap_nocache(offset, sizeof(u64));
61 if(!reg){
62 printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n");
63 use_cyclone = 0;
64 return -ENODEV;
65 }
66 writel(0x00000001,reg);
67 iounmap(reg);
68
69 /* setup MPCS */
70 offset = (base + CYCLONE_MPCS_OFFSET);
71 reg = (u64*)ioremap_nocache(offset, sizeof(u64));
72 if(!reg){
73 printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n");
74 use_cyclone = 0;
75 return -ENODEV;
76 }
77 writel(0x00000001,reg);
78 iounmap(reg);
79
80 /* map in cyclone_timer */
81 offset = (base + CYCLONE_MPMC_OFFSET);
82 cyclone_timer = (u32*)ioremap_nocache(offset, sizeof(u32));
83 if(!cyclone_timer){
84 printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n");
85 use_cyclone = 0;
86 return -ENODEV;
87 }
88
89 /*quick test to make sure its ticking*/
90 for(i=0; i<3; i++){
91 u32 old = readl(cyclone_timer);
92 int stall = 100;
93 while(stall--) barrier();
94 if(readl(cyclone_timer) == old){
95 printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n");
96 iounmap(cyclone_timer);
97 cyclone_timer = 0;
98 use_cyclone = 0;
99 return -ENODEV;
100 }
101 }
102 /* initialize last tick */
103 cyclone_interpolator.addr = cyclone_timer;
104 register_time_interpolator(&cyclone_interpolator);
105
106 return 0;
107}
108
109__initcall(init_cyclone_clock);
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c
new file mode 100644
index 000000000000..fe532c970438
--- /dev/null
+++ b/arch/ia64/kernel/domain.c
@@ -0,0 +1,382 @@
1/*
2 * arch/ia64/kernel/domain.c
3 * Architecture specific sched-domains builder.
4 *
5 * Copyright (C) 2004 Jesse Barnes
6 * Copyright (C) 2004 Silicon Graphics, Inc.
7 */
8
9#include <linux/sched.h>
10#include <linux/percpu.h>
11#include <linux/slab.h>
12#include <linux/cpumask.h>
13#include <linux/init.h>
14#include <linux/topology.h>
15#include <linux/nodemask.h>
16
17#define SD_NODES_PER_DOMAIN 6
18
19#ifdef CONFIG_NUMA
20/**
21 * find_next_best_node - find the next node to include in a sched_domain
22 * @node: node whose sched_domain we're building
23 * @used_nodes: nodes already in the sched_domain
24 *
25 * Find the next node to include in a given scheduling domain. Simply
26 * finds the closest node not already in the @used_nodes map.
27 *
28 * Should use nodemask_t.
29 */
30static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
31{
32 int i, n, val, min_val, best_node = 0;
33
34 min_val = INT_MAX;
35
36 for (i = 0; i < MAX_NUMNODES; i++) {
37 /* Start at @node */
38 n = (node + i) % MAX_NUMNODES;
39
40 if (!nr_cpus_node(n))
41 continue;
42
43 /* Skip already used nodes */
44 if (test_bit(n, used_nodes))
45 continue;
46
47 /* Simple min distance search */
48 val = node_distance(node, n);
49
50 if (val < min_val) {
51 min_val = val;
52 best_node = n;
53 }
54 }
55
56 set_bit(best_node, used_nodes);
57 return best_node;
58}
59
60/**
61 * sched_domain_node_span - get a cpumask for a node's sched_domain
62 * @node: node whose cpumask we're constructing
63 * @size: number of nodes to include in this span
64 *
65 * Given a node, construct a good cpumask for its sched_domain to span. It
66 * should be one that prevents unnecessary balancing, but also spreads tasks
67 * out optimally.
68 */
69static cpumask_t __devinit sched_domain_node_span(int node)
70{
71 int i;
72 cpumask_t span, nodemask;
73 DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
74
75 cpus_clear(span);
76 bitmap_zero(used_nodes, MAX_NUMNODES);
77
78 nodemask = node_to_cpumask(node);
79 cpus_or(span, span, nodemask);
80 set_bit(node, used_nodes);
81
82 for (i = 1; i < SD_NODES_PER_DOMAIN; i++) {
83 int next_node = find_next_best_node(node, used_nodes);
84 nodemask = node_to_cpumask(next_node);
85 cpus_or(span, span, nodemask);
86 }
87
88 return span;
89}
90#endif
91
92/*
93 * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we
94 * can switch it on easily if needed.
95 */
96#ifdef CONFIG_SCHED_SMT
97static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
98static struct sched_group sched_group_cpus[NR_CPUS];
99static int __devinit cpu_to_cpu_group(int cpu)
100{
101 return cpu;
102}
103#endif
104
105static DEFINE_PER_CPU(struct sched_domain, phys_domains);
106static struct sched_group sched_group_phys[NR_CPUS];
107static int __devinit cpu_to_phys_group(int cpu)
108{
109#ifdef CONFIG_SCHED_SMT
110 return first_cpu(cpu_sibling_map[cpu]);
111#else
112 return cpu;
113#endif
114}
115
116#ifdef CONFIG_NUMA
117/*
118 * The init_sched_build_groups can't handle what we want to do with node
119 * groups, so roll our own. Now each node has its own list of groups which
120 * gets dynamically allocated.
121 */
122static DEFINE_PER_CPU(struct sched_domain, node_domains);
123static struct sched_group *sched_group_nodes[MAX_NUMNODES];
124
125static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
126static struct sched_group sched_group_allnodes[MAX_NUMNODES];
127
128static int __devinit cpu_to_allnodes_group(int cpu)
129{
130 return cpu_to_node(cpu);
131}
132#endif
133
134/*
135 * Set up scheduler domains and groups. Callers must hold the hotplug lock.
136 */
137void __devinit arch_init_sched_domains(void)
138{
139 int i;
140 cpumask_t cpu_default_map;
141
142 /*
143 * Setup mask for cpus without special case scheduling requirements.
144 * For now this just excludes isolated cpus, but could be used to
145 * exclude other special cases in the future.
146 */
147 cpus_complement(cpu_default_map, cpu_isolated_map);
148 cpus_and(cpu_default_map, cpu_default_map, cpu_online_map);
149
150 /*
151 * Set up domains. Isolated domains just stay on the dummy domain.
152 */
153 for_each_cpu_mask(i, cpu_default_map) {
154 int group;
155 struct sched_domain *sd = NULL, *p;
156 cpumask_t nodemask = node_to_cpumask(cpu_to_node(i));
157
158 cpus_and(nodemask, nodemask, cpu_default_map);
159
160#ifdef CONFIG_NUMA
161 if (num_online_cpus()
162 > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
163 sd = &per_cpu(allnodes_domains, i);
164 *sd = SD_ALLNODES_INIT;
165 sd->span = cpu_default_map;
166 group = cpu_to_allnodes_group(i);
167 sd->groups = &sched_group_allnodes[group];
168 p = sd;
169 } else
170 p = NULL;
171
172 sd = &per_cpu(node_domains, i);
173 *sd = SD_NODE_INIT;
174 sd->span = sched_domain_node_span(cpu_to_node(i));
175 sd->parent = p;
176 cpus_and(sd->span, sd->span, cpu_default_map);
177#endif
178
179 p = sd;
180 sd = &per_cpu(phys_domains, i);
181 group = cpu_to_phys_group(i);
182 *sd = SD_CPU_INIT;
183 sd->span = nodemask;
184 sd->parent = p;
185 sd->groups = &sched_group_phys[group];
186
187#ifdef CONFIG_SCHED_SMT
188 p = sd;
189 sd = &per_cpu(cpu_domains, i);
190 group = cpu_to_cpu_group(i);
191 *sd = SD_SIBLING_INIT;
192 sd->span = cpu_sibling_map[i];
193 cpus_and(sd->span, sd->span, cpu_default_map);
194 sd->parent = p;
195 sd->groups = &sched_group_cpus[group];
196#endif
197 }
198
199#ifdef CONFIG_SCHED_SMT
200 /* Set up CPU (sibling) groups */
201 for_each_cpu_mask(i, cpu_default_map) {
202 cpumask_t this_sibling_map = cpu_sibling_map[i];
203 cpus_and(this_sibling_map, this_sibling_map, cpu_default_map);
204 if (i != first_cpu(this_sibling_map))
205 continue;
206
207 init_sched_build_groups(sched_group_cpus, this_sibling_map,
208 &cpu_to_cpu_group);
209 }
210#endif
211
212 /* Set up physical groups */
213 for (i = 0; i < MAX_NUMNODES; i++) {
214 cpumask_t nodemask = node_to_cpumask(i);
215
216 cpus_and(nodemask, nodemask, cpu_default_map);
217 if (cpus_empty(nodemask))
218 continue;
219
220 init_sched_build_groups(sched_group_phys, nodemask,
221 &cpu_to_phys_group);
222 }
223
224#ifdef CONFIG_NUMA
225 init_sched_build_groups(sched_group_allnodes, cpu_default_map,
226 &cpu_to_allnodes_group);
227
228 for (i = 0; i < MAX_NUMNODES; i++) {
229 /* Set up node groups */
230 struct sched_group *sg, *prev;
231 cpumask_t nodemask = node_to_cpumask(i);
232 cpumask_t domainspan;
233 cpumask_t covered = CPU_MASK_NONE;
234 int j;
235
236 cpus_and(nodemask, nodemask, cpu_default_map);
237 if (cpus_empty(nodemask))
238 continue;
239
240 domainspan = sched_domain_node_span(i);
241 cpus_and(domainspan, domainspan, cpu_default_map);
242
243 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
244 sched_group_nodes[i] = sg;
245 for_each_cpu_mask(j, nodemask) {
246 struct sched_domain *sd;
247 sd = &per_cpu(node_domains, j);
248 sd->groups = sg;
249 if (sd->groups == NULL) {
250 /* Turn off balancing if we have no groups */
251 sd->flags = 0;
252 }
253 }
254 if (!sg) {
255 printk(KERN_WARNING
256 "Can not alloc domain group for node %d\n", i);
257 continue;
258 }
259 sg->cpu_power = 0;
260 sg->cpumask = nodemask;
261 cpus_or(covered, covered, nodemask);
262 prev = sg;
263
264 for (j = 0; j < MAX_NUMNODES; j++) {
265 cpumask_t tmp, notcovered;
266 int n = (i + j) % MAX_NUMNODES;
267
268 cpus_complement(notcovered, covered);
269 cpus_and(tmp, notcovered, cpu_default_map);
270 cpus_and(tmp, tmp, domainspan);
271 if (cpus_empty(tmp))
272 break;
273
274 nodemask = node_to_cpumask(n);
275 cpus_and(tmp, tmp, nodemask);
276 if (cpus_empty(tmp))
277 continue;
278
279 sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL);
280 if (!sg) {
281 printk(KERN_WARNING
282 "Can not alloc domain group for node %d\n", j);
283 break;
284 }
285 sg->cpu_power = 0;
286 sg->cpumask = tmp;
287 cpus_or(covered, covered, tmp);
288 prev->next = sg;
289 prev = sg;
290 }
291 prev->next = sched_group_nodes[i];
292 }
293#endif
294
295 /* Calculate CPU power for physical packages and nodes */
296 for_each_cpu_mask(i, cpu_default_map) {
297 int power;
298 struct sched_domain *sd;
299#ifdef CONFIG_SCHED_SMT
300 sd = &per_cpu(cpu_domains, i);
301 power = SCHED_LOAD_SCALE;
302 sd->groups->cpu_power = power;
303#endif
304
305 sd = &per_cpu(phys_domains, i);
306 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
307 (cpus_weight(sd->groups->cpumask)-1) / 10;
308 sd->groups->cpu_power = power;
309
310#ifdef CONFIG_NUMA
311 sd = &per_cpu(allnodes_domains, i);
312 if (sd->groups) {
313 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
314 (cpus_weight(sd->groups->cpumask)-1) / 10;
315 sd->groups->cpu_power = power;
316 }
317#endif
318 }
319
320#ifdef CONFIG_NUMA
321 for (i = 0; i < MAX_NUMNODES; i++) {
322 struct sched_group *sg = sched_group_nodes[i];
323 int j;
324
325 if (sg == NULL)
326 continue;
327next_sg:
328 for_each_cpu_mask(j, sg->cpumask) {
329 struct sched_domain *sd;
330 int power;
331
332 sd = &per_cpu(phys_domains, j);
333 if (j != first_cpu(sd->groups->cpumask)) {
334 /*
335 * Only add "power" once for each
336 * physical package.
337 */
338 continue;
339 }
340 power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
341 (cpus_weight(sd->groups->cpumask)-1) / 10;
342
343 sg->cpu_power += power;
344 }
345 sg = sg->next;
346 if (sg != sched_group_nodes[i])
347 goto next_sg;
348 }
349#endif
350
351 /* Attach the domains */
352 for_each_online_cpu(i) {
353 struct sched_domain *sd;
354#ifdef CONFIG_SCHED_SMT
355 sd = &per_cpu(cpu_domains, i);
356#else
357 sd = &per_cpu(phys_domains, i);
358#endif
359 cpu_attach_domain(sd, i);
360 }
361}
362
363void __devinit arch_destroy_sched_domains(void)
364{
365#ifdef CONFIG_NUMA
366 int i;
367 for (i = 0; i < MAX_NUMNODES; i++) {
368 struct sched_group *oldsg, *sg = sched_group_nodes[i];
369 if (sg == NULL)
370 continue;
371 sg = sg->next;
372next_sg:
373 oldsg = sg;
374 sg = sg->next;
375 kfree(oldsg);
376 if (oldsg != sched_group_nodes[i])
377 goto next_sg;
378 sched_group_nodes[i] = NULL;
379 }
380#endif
381}
382
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c
new file mode 100644
index 000000000000..4a3b1aac43e7
--- /dev/null
+++ b/arch/ia64/kernel/efi.c
@@ -0,0 +1,832 @@
1/*
2 * Extensible Firmware Interface
3 *
4 * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999
5 *
6 * Copyright (C) 1999 VA Linux Systems
7 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
8 * Copyright (C) 1999-2003 Hewlett-Packard Co.
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Stephane Eranian <eranian@hpl.hp.com>
11 *
12 * All EFI Runtime Services are not implemented yet as EFI only
13 * supports physical mode addressing on SoftSDV. This is to be fixed
14 * in a future version. --drummond 1999-07-20
15 *
16 * Implemented EFI runtime services and virtual mode calls. --davidm
17 *
18 * Goutham Rao: <goutham.rao@intel.com>
19 * Skip non-WB memory and ignore empty memory ranges.
20 */
21#include <linux/config.h>
22#include <linux/module.h>
23#include <linux/kernel.h>
24#include <linux/init.h>
25#include <linux/types.h>
26#include <linux/time.h>
27#include <linux/efi.h>
28
29#include <asm/io.h>
30#include <asm/kregs.h>
31#include <asm/meminit.h>
32#include <asm/pgtable.h>
33#include <asm/processor.h>
34#include <asm/mca.h>
35
36#define EFI_DEBUG 0
37
38extern efi_status_t efi_call_phys (void *, ...);
39
40struct efi efi;
41EXPORT_SYMBOL(efi);
42static efi_runtime_services_t *runtime;
43static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
44
45#define efi_call_virt(f, args...) (*(f))(args)
46
47#define STUB_GET_TIME(prefix, adjust_arg) \
48static efi_status_t \
49prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \
50{ \
51 struct ia64_fpreg fr[6]; \
52 efi_time_cap_t *atc = NULL; \
53 efi_status_t ret; \
54 \
55 if (tc) \
56 atc = adjust_arg(tc); \
57 ia64_save_scratch_fpregs(fr); \
58 ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \
59 ia64_load_scratch_fpregs(fr); \
60 return ret; \
61}
62
63#define STUB_SET_TIME(prefix, adjust_arg) \
64static efi_status_t \
65prefix##_set_time (efi_time_t *tm) \
66{ \
67 struct ia64_fpreg fr[6]; \
68 efi_status_t ret; \
69 \
70 ia64_save_scratch_fpregs(fr); \
71 ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm)); \
72 ia64_load_scratch_fpregs(fr); \
73 return ret; \
74}
75
76#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \
77static efi_status_t \
78prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) \
79{ \
80 struct ia64_fpreg fr[6]; \
81 efi_status_t ret; \
82 \
83 ia64_save_scratch_fpregs(fr); \
84 ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \
85 adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \
86 ia64_load_scratch_fpregs(fr); \
87 return ret; \
88}
89
90#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \
91static efi_status_t \
92prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \
93{ \
94 struct ia64_fpreg fr[6]; \
95 efi_time_t *atm = NULL; \
96 efi_status_t ret; \
97 \
98 if (tm) \
99 atm = adjust_arg(tm); \
100 ia64_save_scratch_fpregs(fr); \
101 ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \
102 enabled, atm); \
103 ia64_load_scratch_fpregs(fr); \
104 return ret; \
105}
106
107#define STUB_GET_VARIABLE(prefix, adjust_arg) \
108static efi_status_t \
109prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \
110 unsigned long *data_size, void *data) \
111{ \
112 struct ia64_fpreg fr[6]; \
113 u32 *aattr = NULL; \
114 efi_status_t ret; \
115 \
116 if (attr) \
117 aattr = adjust_arg(attr); \
118 ia64_save_scratch_fpregs(fr); \
119 ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable), \
120 adjust_arg(name), adjust_arg(vendor), aattr, \
121 adjust_arg(data_size), adjust_arg(data)); \
122 ia64_load_scratch_fpregs(fr); \
123 return ret; \
124}
125
126#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \
127static efi_status_t \
128prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) \
129{ \
130 struct ia64_fpreg fr[6]; \
131 efi_status_t ret; \
132 \
133 ia64_save_scratch_fpregs(fr); \
134 ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable), \
135 adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \
136 ia64_load_scratch_fpregs(fr); \
137 return ret; \
138}
139
140#define STUB_SET_VARIABLE(prefix, adjust_arg) \
141static efi_status_t \
142prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr, \
143 unsigned long data_size, void *data) \
144{ \
145 struct ia64_fpreg fr[6]; \
146 efi_status_t ret; \
147 \
148 ia64_save_scratch_fpregs(fr); \
149 ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable), \
150 adjust_arg(name), adjust_arg(vendor), attr, data_size, \
151 adjust_arg(data)); \
152 ia64_load_scratch_fpregs(fr); \
153 return ret; \
154}
155
156#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \
157static efi_status_t \
158prefix##_get_next_high_mono_count (u32 *count) \
159{ \
160 struct ia64_fpreg fr[6]; \
161 efi_status_t ret; \
162 \
163 ia64_save_scratch_fpregs(fr); \
164 ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \
165 __va(runtime->get_next_high_mono_count), adjust_arg(count)); \
166 ia64_load_scratch_fpregs(fr); \
167 return ret; \
168}
169
170#define STUB_RESET_SYSTEM(prefix, adjust_arg) \
171static void \
172prefix##_reset_system (int reset_type, efi_status_t status, \
173 unsigned long data_size, efi_char16_t *data) \
174{ \
175 struct ia64_fpreg fr[6]; \
176 efi_char16_t *adata = NULL; \
177 \
178 if (data) \
179 adata = adjust_arg(data); \
180 \
181 ia64_save_scratch_fpregs(fr); \
182 efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system), \
183 reset_type, status, data_size, adata); \
184 /* should not return, but just in case... */ \
185 ia64_load_scratch_fpregs(fr); \
186}
187
188#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg))
189
190STUB_GET_TIME(phys, phys_ptr)
191STUB_SET_TIME(phys, phys_ptr)
192STUB_GET_WAKEUP_TIME(phys, phys_ptr)
193STUB_SET_WAKEUP_TIME(phys, phys_ptr)
194STUB_GET_VARIABLE(phys, phys_ptr)
195STUB_GET_NEXT_VARIABLE(phys, phys_ptr)
196STUB_SET_VARIABLE(phys, phys_ptr)
197STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr)
198STUB_RESET_SYSTEM(phys, phys_ptr)
199
200#define id(arg) arg
201
202STUB_GET_TIME(virt, id)
203STUB_SET_TIME(virt, id)
204STUB_GET_WAKEUP_TIME(virt, id)
205STUB_SET_WAKEUP_TIME(virt, id)
206STUB_GET_VARIABLE(virt, id)
207STUB_GET_NEXT_VARIABLE(virt, id)
208STUB_SET_VARIABLE(virt, id)
209STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id)
210STUB_RESET_SYSTEM(virt, id)
211
212void
213efi_gettimeofday (struct timespec *ts)
214{
215 efi_time_t tm;
216
217 memset(ts, 0, sizeof(ts));
218 if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS)
219 return;
220
221 ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second);
222 ts->tv_nsec = tm.nanosecond;
223}
224
225static int
226is_available_memory (efi_memory_desc_t *md)
227{
228 if (!(md->attribute & EFI_MEMORY_WB))
229 return 0;
230
231 switch (md->type) {
232 case EFI_LOADER_CODE:
233 case EFI_LOADER_DATA:
234 case EFI_BOOT_SERVICES_CODE:
235 case EFI_BOOT_SERVICES_DATA:
236 case EFI_CONVENTIONAL_MEMORY:
237 return 1;
238 }
239 return 0;
240}
241
242/*
243 * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers
244 * memory that is normally available to the kernel, issue a warning that some memory
245 * is being ignored.
246 */
247static void
248trim_bottom (efi_memory_desc_t *md, u64 start_addr)
249{
250 u64 num_skipped_pages;
251
252 if (md->phys_addr >= start_addr || !md->num_pages)
253 return;
254
255 num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
256 if (num_skipped_pages > md->num_pages)
257 num_skipped_pages = md->num_pages;
258
259 if (is_available_memory(md))
260 printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
261 "at 0x%lx\n", __FUNCTION__,
262 (num_skipped_pages << EFI_PAGE_SHIFT) >> 10,
263 md->phys_addr, start_addr - IA64_GRANULE_SIZE);
264 /*
265 * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory
266 * descriptor list to become unsorted. In such a case, md->num_pages will be
267 * zero, so the Right Thing will happen.
268 */
269 md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT;
270 md->num_pages -= num_skipped_pages;
271}
272
273static void
274trim_top (efi_memory_desc_t *md, u64 end_addr)
275{
276 u64 num_dropped_pages, md_end_addr;
277
278 md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT);
279
280 if (md_end_addr <= end_addr || !md->num_pages)
281 return;
282
283 num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT;
284 if (num_dropped_pages > md->num_pages)
285 num_dropped_pages = md->num_pages;
286
287 if (is_available_memory(md))
288 printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole "
289 "at 0x%lx\n", __FUNCTION__,
290 (num_dropped_pages << EFI_PAGE_SHIFT) >> 10,
291 md->phys_addr, end_addr);
292 md->num_pages -= num_dropped_pages;
293}
294
295/*
296 * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that
297 * has memory that is available for OS use.
298 */
299void
300efi_memmap_walk (efi_freemem_callback_t callback, void *arg)
301{
302 int prev_valid = 0;
303 struct range {
304 u64 start;
305 u64 end;
306 } prev, curr;
307 void *efi_map_start, *efi_map_end, *p, *q;
308 efi_memory_desc_t *md, *check_md;
309 u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0;
310 unsigned long total_mem = 0;
311
312 efi_map_start = __va(ia64_boot_param->efi_memmap);
313 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
314 efi_desc_size = ia64_boot_param->efi_memdesc_size;
315
316 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
317 md = p;
318
319 /* skip over non-WB memory descriptors; that's all we're interested in... */
320 if (!(md->attribute & EFI_MEMORY_WB))
321 continue;
322
323 /*
324 * granule_addr is the base of md's first granule.
325 * [granule_addr - first_non_wb_addr) is guaranteed to
326 * be contiguous WB memory.
327 */
328 granule_addr = GRANULEROUNDDOWN(md->phys_addr);
329 first_non_wb_addr = max(first_non_wb_addr, granule_addr);
330
331 if (first_non_wb_addr < md->phys_addr) {
332 trim_bottom(md, granule_addr + IA64_GRANULE_SIZE);
333 granule_addr = GRANULEROUNDDOWN(md->phys_addr);
334 first_non_wb_addr = max(first_non_wb_addr, granule_addr);
335 }
336
337 for (q = p; q < efi_map_end; q += efi_desc_size) {
338 check_md = q;
339
340 if ((check_md->attribute & EFI_MEMORY_WB) &&
341 (check_md->phys_addr == first_non_wb_addr))
342 first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT;
343 else
344 break; /* non-WB or hole */
345 }
346
347 last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr);
348 if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT))
349 trim_top(md, last_granule_addr);
350
351 if (is_available_memory(md)) {
352 if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) {
353 if (md->phys_addr >= max_addr)
354 continue;
355 md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT;
356 first_non_wb_addr = max_addr;
357 }
358
359 if (total_mem >= mem_limit)
360 continue;
361
362 if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) {
363 unsigned long limit_addr = md->phys_addr;
364
365 limit_addr += mem_limit - total_mem;
366 limit_addr = GRANULEROUNDDOWN(limit_addr);
367
368 if (md->phys_addr > limit_addr)
369 continue;
370
371 md->num_pages = (limit_addr - md->phys_addr) >>
372 EFI_PAGE_SHIFT;
373 first_non_wb_addr = max_addr = md->phys_addr +
374 (md->num_pages << EFI_PAGE_SHIFT);
375 }
376 total_mem += (md->num_pages << EFI_PAGE_SHIFT);
377
378 if (md->num_pages == 0)
379 continue;
380
381 curr.start = PAGE_OFFSET + md->phys_addr;
382 curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT);
383
384 if (!prev_valid) {
385 prev = curr;
386 prev_valid = 1;
387 } else {
388 if (curr.start < prev.start)
389 printk(KERN_ERR "Oops: EFI memory table not ordered!\n");
390
391 if (prev.end == curr.start) {
392 /* merge two consecutive memory ranges */
393 prev.end = curr.end;
394 } else {
395 start = PAGE_ALIGN(prev.start);
396 end = prev.end & PAGE_MASK;
397 if ((end > start) && (*callback)(start, end, arg) < 0)
398 return;
399 prev = curr;
400 }
401 }
402 }
403 }
404 if (prev_valid) {
405 start = PAGE_ALIGN(prev.start);
406 end = prev.end & PAGE_MASK;
407 if (end > start)
408 (*callback)(start, end, arg);
409 }
410}
411
412/*
413 * Look for the PAL_CODE region reported by EFI and maps it using an
414 * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor
415 * Abstraction Layer chapter 11 in ADAG
416 */
417
418void *
419efi_get_pal_addr (void)
420{
421 void *efi_map_start, *efi_map_end, *p;
422 efi_memory_desc_t *md;
423 u64 efi_desc_size;
424 int pal_code_count = 0;
425 u64 vaddr, mask;
426
427 efi_map_start = __va(ia64_boot_param->efi_memmap);
428 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
429 efi_desc_size = ia64_boot_param->efi_memdesc_size;
430
431 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
432 md = p;
433 if (md->type != EFI_PAL_CODE)
434 continue;
435
436 if (++pal_code_count > 1) {
437 printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n",
438 md->phys_addr);
439 continue;
440 }
441 /*
442 * The only ITLB entry in region 7 that is used is the one installed by
443 * __start(). That entry covers a 64MB range.
444 */
445 mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1);
446 vaddr = PAGE_OFFSET + md->phys_addr;
447
448 /*
449 * We must check that the PAL mapping won't overlap with the kernel
450 * mapping.
451 *
452 * PAL code is guaranteed to be aligned on a power of 2 between 4k and
453 * 256KB and that only one ITR is needed to map it. This implies that the
454 * PAL code is always aligned on its size, i.e., the closest matching page
455 * size supported by the TLB. Therefore PAL code is guaranteed never to
456 * cross a 64MB unless it is bigger than 64MB (very unlikely!). So for
457 * now the following test is enough to determine whether or not we need a
458 * dedicated ITR for the PAL code.
459 */
460 if ((vaddr & mask) == (KERNEL_START & mask)) {
461 printk(KERN_INFO "%s: no need to install ITR for PAL code\n",
462 __FUNCTION__);
463 continue;
464 }
465
466 if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE)
467 panic("Woah! PAL code size bigger than a granule!");
468
469#if EFI_DEBUG
470 mask = ~((1 << IA64_GRANULE_SHIFT) - 1);
471
472 printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n",
473 smp_processor_id(), md->phys_addr,
474 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
475 vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE);
476#endif
477 return __va(md->phys_addr);
478 }
479 printk(KERN_WARNING "%s: no PAL-code memory-descriptor found",
480 __FUNCTION__);
481 return NULL;
482}
483
484void
485efi_map_pal_code (void)
486{
487 void *pal_vaddr = efi_get_pal_addr ();
488 u64 psr;
489
490 if (!pal_vaddr)
491 return;
492
493 /*
494 * Cannot write to CRx with PSR.ic=1
495 */
496 psr = ia64_clear_ic();
497 ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr),
498 pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)),
499 IA64_GRANULE_SHIFT);
500 ia64_set_psr(psr); /* restore psr */
501 ia64_srlz_i();
502}
503
504void __init
505efi_init (void)
506{
507 void *efi_map_start, *efi_map_end;
508 efi_config_table_t *config_tables;
509 efi_char16_t *c16;
510 u64 efi_desc_size;
511 char *cp, *end, vendor[100] = "unknown";
512 extern char saved_command_line[];
513 int i;
514
515 /* it's too early to be able to use the standard kernel command line support... */
516 for (cp = saved_command_line; *cp; ) {
517 if (memcmp(cp, "mem=", 4) == 0) {
518 cp += 4;
519 mem_limit = memparse(cp, &end);
520 if (end != cp)
521 break;
522 cp = end;
523 } else if (memcmp(cp, "max_addr=", 9) == 0) {
524 cp += 9;
525 max_addr = GRANULEROUNDDOWN(memparse(cp, &end));
526 if (end != cp)
527 break;
528 cp = end;
529 } else {
530 while (*cp != ' ' && *cp)
531 ++cp;
532 while (*cp == ' ')
533 ++cp;
534 }
535 }
536 if (max_addr != ~0UL)
537 printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);
538
539 efi.systab = __va(ia64_boot_param->efi_systab);
540
541 /*
542 * Verify the EFI Table
543 */
544 if (efi.systab == NULL)
545 panic("Woah! Can't find EFI system table.\n");
546 if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE)
547 panic("Woah! EFI system table signature incorrect\n");
548 if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0)
549 printk(KERN_WARNING "Warning: EFI system table major version mismatch: "
550 "got %d.%02d, expected %d.%02d\n",
551 efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff,
552 EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff);
553
554 config_tables = __va(efi.systab->tables);
555
556 /* Show what we know for posterity */
557 c16 = __va(efi.systab->fw_vendor);
558 if (c16) {
559 for (i = 0;i < (int) sizeof(vendor) && *c16; ++i)
560 vendor[i] = *c16++;
561 vendor[i] = '\0';
562 }
563
564 printk(KERN_INFO "EFI v%u.%.02u by %s:",
565 efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor);
566
567 for (i = 0; i < (int) efi.systab->nr_tables; i++) {
568 if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) {
569 efi.mps = __va(config_tables[i].table);
570 printk(" MPS=0x%lx", config_tables[i].table);
571 } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) {
572 efi.acpi20 = __va(config_tables[i].table);
573 printk(" ACPI 2.0=0x%lx", config_tables[i].table);
574 } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) {
575 efi.acpi = __va(config_tables[i].table);
576 printk(" ACPI=0x%lx", config_tables[i].table);
577 } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) {
578 efi.smbios = __va(config_tables[i].table);
579 printk(" SMBIOS=0x%lx", config_tables[i].table);
580 } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) {
581 efi.sal_systab = __va(config_tables[i].table);
582 printk(" SALsystab=0x%lx", config_tables[i].table);
583 } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) {
584 efi.hcdp = __va(config_tables[i].table);
585 printk(" HCDP=0x%lx", config_tables[i].table);
586 }
587 }
588 printk("\n");
589
590 runtime = __va(efi.systab->runtime);
591 efi.get_time = phys_get_time;
592 efi.set_time = phys_set_time;
593 efi.get_wakeup_time = phys_get_wakeup_time;
594 efi.set_wakeup_time = phys_set_wakeup_time;
595 efi.get_variable = phys_get_variable;
596 efi.get_next_variable = phys_get_next_variable;
597 efi.set_variable = phys_set_variable;
598 efi.get_next_high_mono_count = phys_get_next_high_mono_count;
599 efi.reset_system = phys_reset_system;
600
601 efi_map_start = __va(ia64_boot_param->efi_memmap);
602 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
603 efi_desc_size = ia64_boot_param->efi_memdesc_size;
604
605#if EFI_DEBUG
606 /* print EFI memory map: */
607 {
608 efi_memory_desc_t *md;
609 void *p;
610
611 for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) {
612 md = p;
613 printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n",
614 i, md->type, md->attribute, md->phys_addr,
615 md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT),
616 md->num_pages >> (20 - EFI_PAGE_SHIFT));
617 }
618 }
619#endif
620
621 efi_map_pal_code();
622 efi_enter_virtual_mode();
623}
624
625void
626efi_enter_virtual_mode (void)
627{
628 void *efi_map_start, *efi_map_end, *p;
629 efi_memory_desc_t *md;
630 efi_status_t status;
631 u64 efi_desc_size;
632
633 efi_map_start = __va(ia64_boot_param->efi_memmap);
634 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
635 efi_desc_size = ia64_boot_param->efi_memdesc_size;
636
637 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
638 md = p;
639 if (md->attribute & EFI_MEMORY_RUNTIME) {
640 /*
641 * Some descriptors have multiple bits set, so the order of
642 * the tests is relevant.
643 */
644 if (md->attribute & EFI_MEMORY_WB) {
645 md->virt_addr = (u64) __va(md->phys_addr);
646 } else if (md->attribute & EFI_MEMORY_UC) {
647 md->virt_addr = (u64) ioremap(md->phys_addr, 0);
648 } else if (md->attribute & EFI_MEMORY_WC) {
649#if 0
650 md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
651 | _PAGE_D
652 | _PAGE_MA_WC
653 | _PAGE_PL_0
654 | _PAGE_AR_RW));
655#else
656 printk(KERN_INFO "EFI_MEMORY_WC mapping\n");
657 md->virt_addr = (u64) ioremap(md->phys_addr, 0);
658#endif
659 } else if (md->attribute & EFI_MEMORY_WT) {
660#if 0
661 md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P
662 | _PAGE_D | _PAGE_MA_WT
663 | _PAGE_PL_0
664 | _PAGE_AR_RW));
665#else
666 printk(KERN_INFO "EFI_MEMORY_WT mapping\n");
667 md->virt_addr = (u64) ioremap(md->phys_addr, 0);
668#endif
669 }
670 }
671 }
672
673 status = efi_call_phys(__va(runtime->set_virtual_address_map),
674 ia64_boot_param->efi_memmap_size,
675 efi_desc_size, ia64_boot_param->efi_memdesc_version,
676 ia64_boot_param->efi_memmap);
677 if (status != EFI_SUCCESS) {
678 printk(KERN_WARNING "warning: unable to switch EFI into virtual mode "
679 "(status=%lu)\n", status);
680 return;
681 }
682
683 /*
684 * Now that EFI is in virtual mode, we call the EFI functions more efficiently:
685 */
686 efi.get_time = virt_get_time;
687 efi.set_time = virt_set_time;
688 efi.get_wakeup_time = virt_get_wakeup_time;
689 efi.set_wakeup_time = virt_set_wakeup_time;
690 efi.get_variable = virt_get_variable;
691 efi.get_next_variable = virt_get_next_variable;
692 efi.set_variable = virt_set_variable;
693 efi.get_next_high_mono_count = virt_get_next_high_mono_count;
694 efi.reset_system = virt_reset_system;
695}
696
697/*
698 * Walk the EFI memory map looking for the I/O port range. There can only be one entry of
699 * this type, other I/O port ranges should be described via ACPI.
700 */
701u64
702efi_get_iobase (void)
703{
704 void *efi_map_start, *efi_map_end, *p;
705 efi_memory_desc_t *md;
706 u64 efi_desc_size;
707
708 efi_map_start = __va(ia64_boot_param->efi_memmap);
709 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
710 efi_desc_size = ia64_boot_param->efi_memdesc_size;
711
712 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
713 md = p;
714 if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) {
715 if (md->attribute & EFI_MEMORY_UC)
716 return md->phys_addr;
717 }
718 }
719 return 0;
720}
721
722u32
723efi_mem_type (unsigned long phys_addr)
724{
725 void *efi_map_start, *efi_map_end, *p;
726 efi_memory_desc_t *md;
727 u64 efi_desc_size;
728
729 efi_map_start = __va(ia64_boot_param->efi_memmap);
730 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
731 efi_desc_size = ia64_boot_param->efi_memdesc_size;
732
733 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
734 md = p;
735
736 if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
737 return md->type;
738 }
739 return 0;
740}
741
742u64
743efi_mem_attributes (unsigned long phys_addr)
744{
745 void *efi_map_start, *efi_map_end, *p;
746 efi_memory_desc_t *md;
747 u64 efi_desc_size;
748
749 efi_map_start = __va(ia64_boot_param->efi_memmap);
750 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
751 efi_desc_size = ia64_boot_param->efi_memdesc_size;
752
753 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
754 md = p;
755
756 if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT))
757 return md->attribute;
758 }
759 return 0;
760}
761EXPORT_SYMBOL(efi_mem_attributes);
762
763int
764valid_phys_addr_range (unsigned long phys_addr, unsigned long *size)
765{
766 void *efi_map_start, *efi_map_end, *p;
767 efi_memory_desc_t *md;
768 u64 efi_desc_size;
769
770 efi_map_start = __va(ia64_boot_param->efi_memmap);
771 efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size;
772 efi_desc_size = ia64_boot_param->efi_memdesc_size;
773
774 for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
775 md = p;
776
777 if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) {
778 if (!(md->attribute & EFI_MEMORY_WB))
779 return 0;
780
781 if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr)
782 *size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr;
783 return 1;
784 }
785 }
786 return 0;
787}
788
789int __init
790efi_uart_console_only(void)
791{
792 efi_status_t status;
793 char *s, name[] = "ConOut";
794 efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID;
795 efi_char16_t *utf16, name_utf16[32];
796 unsigned char data[1024];
797 unsigned long size = sizeof(data);
798 struct efi_generic_dev_path *hdr, *end_addr;
799 int uart = 0;
800
801 /* Convert to UTF-16 */
802 utf16 = name_utf16;
803 s = name;
804 while (*s)
805 *utf16++ = *s++ & 0x7f;
806 *utf16 = 0;
807
808 status = efi.get_variable(name_utf16, &guid, NULL, &size, data);
809 if (status != EFI_SUCCESS) {
810 printk(KERN_ERR "No EFI %s variable?\n", name);
811 return 0;
812 }
813
814 hdr = (struct efi_generic_dev_path *) data;
815 end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size);
816 while (hdr < end_addr) {
817 if (hdr->type == EFI_DEV_MSG &&
818 hdr->sub_type == EFI_DEV_MSG_UART)
819 uart = 1;
820 else if (hdr->type == EFI_DEV_END_PATH ||
821 hdr->type == EFI_DEV_END_PATH2) {
822 if (!uart)
823 return 0;
824 if (hdr->sub_type == EFI_DEV_END_ENTIRE)
825 return 1;
826 uart = 0;
827 }
828 hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length);
829 }
830 printk(KERN_ERR "Malformed %s value\n", name);
831 return 0;
832}
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S
new file mode 100644
index 000000000000..5a7fe70212a9
--- /dev/null
+++ b/arch/ia64/kernel/efi_stub.S
@@ -0,0 +1,86 @@
1/*
2 * EFI call stub.
3 *
4 * Copyright (C) 1999-2001 Hewlett-Packard Co
5 * David Mosberger <davidm@hpl.hp.com>
6 *
7 * This stub allows us to make EFI calls in physical mode with interrupts
8 * turned off. We need this because we can't call SetVirtualMap() until
9 * the kernel has booted far enough to allow allocation of struct vma_struct
10 * entries (which we would need to map stuff with memory attributes other
11 * than uncached or writeback...). Since the GetTime() service gets called
12 * earlier than that, we need to be able to make physical mode EFI calls from
13 * the kernel.
14 */
15
16/*
17 * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System
18 * Abstraction Layer Specification", revision 2.6e). Note that
19 * psr.dfl and psr.dfh MUST be cleared, despite what this manual says.
20 * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call
21 * (the br.ia instruction fails unless psr.dfl and psr.dfh are
22 * cleared). Fortunately, SAL promises not to touch the floating
23 * point regs, so at least we don't have to save f2-f127.
24 */
25#define PSR_BITS_TO_CLEAR \
26 (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \
27 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
28 IA64_PSR_DFL | IA64_PSR_DFH)
29
30#define PSR_BITS_TO_SET \
31 (IA64_PSR_BN)
32
33#include <asm/processor.h>
34#include <asm/asmmacro.h>
35
36/*
37 * Inputs:
38 * in0 = address of function descriptor of EFI routine to call
39 * in1..in7 = arguments to routine
40 *
41 * Outputs:
42 * r8 = EFI_STATUS returned by called function
43 */
44
45GLOBAL_ENTRY(efi_call_phys)
46 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
47 alloc loc1=ar.pfs,8,7,7,0
48 ld8 r2=[in0],8 // load EFI function's entry point
49 mov loc0=rp
50 .body
51 ;;
52 mov loc2=gp // save global pointer
53 mov loc4=ar.rsc // save RSE configuration
54 mov ar.rsc=0 // put RSE in enforced lazy, LE mode
55 ;;
56 ld8 gp=[in0] // load EFI function's global pointer
57 movl r16=PSR_BITS_TO_CLEAR
58 mov loc3=psr // save processor status word
59 movl r17=PSR_BITS_TO_SET
60 ;;
61 or loc3=loc3,r17
62 mov b6=r2
63 ;;
64 andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared
65 br.call.sptk.many rp=ia64_switch_mode_phys
66.ret0: mov out4=in5
67 mov out0=in1
68 mov out1=in2
69 mov out2=in3
70 mov out3=in4
71 mov out5=in6
72 mov out6=in7
73 mov loc5=r19
74 mov loc6=r20
75 br.call.sptk.many rp=b6 // call the EFI function
76.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode
77 mov r16=loc3
78 mov r19=loc5
79 mov r20=loc6
80 br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
81.ret2: mov ar.rsc=loc4 // restore RSE configuration
82 mov ar.pfs=loc1
83 mov rp=loc0
84 mov gp=loc2
85 br.ret.sptk.many rp
86END(efi_call_phys)
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S
new file mode 100644
index 000000000000..0272c010a3ba
--- /dev/null
+++ b/arch/ia64/kernel/entry.S
@@ -0,0 +1,1587 @@
1/*
2 * ia64/kernel/entry.S
3 *
4 * Kernel entry points.
5 *
6 * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co
7 * David Mosberger-Tang <davidm@hpl.hp.com>
8 * Copyright (C) 1999, 2002-2003
9 * Asit Mallick <Asit.K.Mallick@intel.com>
10 * Don Dugger <Don.Dugger@intel.com>
11 * Suresh Siddha <suresh.b.siddha@intel.com>
12 * Fenghua Yu <fenghua.yu@intel.com>
13 * Copyright (C) 1999 VA Linux Systems
14 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
15 */
16/*
17 * ia64_switch_to now places correct virtual mapping in in TR2 for
18 * kernel stack. This allows us to handle interrupts without changing
19 * to physical mode.
20 *
21 * Jonathan Nicklin <nicklin@missioncriticallinux.com>
22 * Patrick O'Rourke <orourke@missioncriticallinux.com>
23 * 11/07/2000
24 */
25/*
26 * Global (preserved) predicate usage on syscall entry/exit path:
27 *
28 * pKStk: See entry.h.
29 * pUStk: See entry.h.
30 * pSys: See entry.h.
31 * pNonSys: !pSys
32 */
33
34#include <linux/config.h>
35
36#include <asm/asmmacro.h>
37#include <asm/cache.h>
38#include <asm/errno.h>
39#include <asm/kregs.h>
40#include <asm/offsets.h>
41#include <asm/pgtable.h>
42#include <asm/percpu.h>
43#include <asm/processor.h>
44#include <asm/thread_info.h>
45#include <asm/unistd.h>
46
47#include "minstate.h"
48
49 /*
50 * execve() is special because in case of success, we need to
51 * setup a null register window frame.
52 */
53ENTRY(ia64_execve)
54 /*
55 * Allocate 8 input registers since ptrace() may clobber them
56 */
57 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
58 alloc loc1=ar.pfs,8,2,4,0
59 mov loc0=rp
60 .body
61 mov out0=in0 // filename
62 ;; // stop bit between alloc and call
63 mov out1=in1 // argv
64 mov out2=in2 // envp
65 add out3=16,sp // regs
66 br.call.sptk.many rp=sys_execve
67.ret0:
68#ifdef CONFIG_IA32_SUPPORT
69 /*
70 * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers
71 * from pt_regs.
72 */
73 adds r16=PT(CR_IPSR)+16,sp
74 ;;
75 ld8 r16=[r16]
76#endif
77 cmp4.ge p6,p7=r8,r0
78 mov ar.pfs=loc1 // restore ar.pfs
79 sxt4 r8=r8 // return 64-bit result
80 ;;
81 stf.spill [sp]=f0
82(p6) cmp.ne pKStk,pUStk=r0,r0 // a successful execve() lands us in user-mode...
83 mov rp=loc0
84(p6) mov ar.pfs=r0 // clear ar.pfs on success
85(p7) br.ret.sptk.many rp
86
87 /*
88 * In theory, we'd have to zap this state only to prevent leaking of
89 * security sensitive state (e.g., if current->mm->dumpable is zero). However,
90 * this executes in less than 20 cycles even on Itanium, so it's not worth
91 * optimizing for...).
92 */
93 mov ar.unat=0; mov ar.lc=0
94 mov r4=0; mov f2=f0; mov b1=r0
95 mov r5=0; mov f3=f0; mov b2=r0
96 mov r6=0; mov f4=f0; mov b3=r0
97 mov r7=0; mov f5=f0; mov b4=r0
98 ldf.fill f12=[sp]; mov f13=f0; mov b5=r0
99 ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0
100 ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0
101 ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0
102 ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0
103 ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0
104 ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0
105#ifdef CONFIG_IA32_SUPPORT
106 tbit.nz p6,p0=r16, IA64_PSR_IS_BIT
107 movl loc0=ia64_ret_from_ia32_execve
108 ;;
109(p6) mov rp=loc0
110#endif
111 br.ret.sptk.many rp
112END(ia64_execve)
113
114/*
115 * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr,
116 * u64 tls)
117 */
118GLOBAL_ENTRY(sys_clone2)
119 /*
120 * Allocate 8 input registers since ptrace() may clobber them
121 */
122 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
123 alloc r16=ar.pfs,8,2,6,0
124 DO_SAVE_SWITCH_STACK
125 adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
126 mov loc0=rp
127 mov loc1=r16 // save ar.pfs across do_fork
128 .body
129 mov out1=in1
130 mov out3=in2
131 tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
132 mov out4=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
133 ;;
134(p6) st8 [r2]=in5 // store TLS in r16 for copy_thread()
135 mov out5=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
136 adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
137 mov out0=in0 // out0 = clone_flags
138 br.call.sptk.many rp=do_fork
139.ret1: .restore sp
140 adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
141 mov ar.pfs=loc1
142 mov rp=loc0
143 br.ret.sptk.many rp
144END(sys_clone2)
145
146/*
147 * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls)
148 * Deprecated. Use sys_clone2() instead.
149 */
150GLOBAL_ENTRY(sys_clone)
151 /*
152 * Allocate 8 input registers since ptrace() may clobber them
153 */
154 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
155 alloc r16=ar.pfs,8,2,6,0
156 DO_SAVE_SWITCH_STACK
157 adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp
158 mov loc0=rp
159 mov loc1=r16 // save ar.pfs across do_fork
160 .body
161 mov out1=in1
162 mov out3=16 // stacksize (compensates for 16-byte scratch area)
163 tbit.nz p6,p0=in0,CLONE_SETTLS_BIT
164 mov out4=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID
165 ;;
166(p6) st8 [r2]=in4 // store TLS in r13 (tp)
167 mov out5=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID
168 adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = &regs
169 mov out0=in0 // out0 = clone_flags
170 br.call.sptk.many rp=do_fork
171.ret2: .restore sp
172 adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack
173 mov ar.pfs=loc1
174 mov rp=loc0
175 br.ret.sptk.many rp
176END(sys_clone)
177
178/*
179 * prev_task <- ia64_switch_to(struct task_struct *next)
180 * With Ingo's new scheduler, interrupts are disabled when this routine gets
181 * called. The code starting at .map relies on this. The rest of the code
182 * doesn't care about the interrupt masking status.
183 */
184GLOBAL_ENTRY(ia64_switch_to)
185 .prologue
186 alloc r16=ar.pfs,1,0,0,0
187 DO_SAVE_SWITCH_STACK
188 .body
189
190 adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13
191 movl r25=init_task
192 mov r27=IA64_KR(CURRENT_STACK)
193 adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0
194 dep r20=0,in0,61,3 // physical address of "next"
195 ;;
196 st8 [r22]=sp // save kernel stack pointer of old task
197 shr.u r26=r20,IA64_GRANULE_SHIFT
198 cmp.eq p7,p6=r25,in0
199 ;;
200 /*
201 * If we've already mapped this task's page, we can skip doing it again.
202 */
203(p6) cmp.eq p7,p6=r26,r27
204(p6) br.cond.dpnt .map
205 ;;
206.done:
207(p6) ssm psr.ic // if we had to map, reenable the psr.ic bit FIRST!!!
208 ;;
209(p6) srlz.d
210 ld8 sp=[r21] // load kernel stack pointer of new task
211 mov IA64_KR(CURRENT)=in0 // update "current" application register
212 mov r8=r13 // return pointer to previously running task
213 mov r13=in0 // set "current" pointer
214 ;;
215 DO_LOAD_SWITCH_STACK
216
217#ifdef CONFIG_SMP
218 sync.i // ensure "fc"s done by this CPU are visible on other CPUs
219#endif
220 br.ret.sptk.many rp // boogie on out in new context
221
222.map:
223 rsm psr.ic // interrupts (psr.i) are already disabled here
224 movl r25=PAGE_KERNEL
225 ;;
226 srlz.d
227 or r23=r25,r20 // construct PA | page properties
228 mov r25=IA64_GRANULE_SHIFT<<2
229 ;;
230 mov cr.itir=r25
231 mov cr.ifa=in0 // VA of next task...
232 ;;
233 mov r25=IA64_TR_CURRENT_STACK
234 mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped...
235 ;;
236 itr.d dtr[r25]=r23 // wire in new mapping...
237 br.cond.sptk .done
238END(ia64_switch_to)
239
240/*
241 * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This
242 * means that we may get an interrupt with "sp" pointing to the new kernel stack while
243 * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc,
244 * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a
245 * problem. Also, we don't need to specify unwind information for preserved registers
246 * that are not modified in save_switch_stack as the right unwind information is already
247 * specified at the call-site of save_switch_stack.
248 */
249
250/*
251 * save_switch_stack:
252 * - r16 holds ar.pfs
253 * - b7 holds address to return to
254 * - rp (b0) holds return address to save
255 */
256GLOBAL_ENTRY(save_switch_stack)
257 .prologue
258 .altrp b7
259 flushrs // flush dirty regs to backing store (must be first in insn group)
260 .save @priunat,r17
261 mov r17=ar.unat // preserve caller's
262 .body
263#ifdef CONFIG_ITANIUM
264 adds r2=16+128,sp
265 adds r3=16+64,sp
266 adds r14=SW(R4)+16,sp
267 ;;
268 st8.spill [r14]=r4,16 // spill r4
269 lfetch.fault.excl.nt1 [r3],128
270 ;;
271 lfetch.fault.excl.nt1 [r2],128
272 lfetch.fault.excl.nt1 [r3],128
273 ;;
274 lfetch.fault.excl [r2]
275 lfetch.fault.excl [r3]
276 adds r15=SW(R5)+16,sp
277#else
278 add r2=16+3*128,sp
279 add r3=16,sp
280 add r14=SW(R4)+16,sp
281 ;;
282 st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0
283 lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010
284 ;;
285 lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090
286 lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190
287 ;;
288 lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110
289 lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210
290 adds r15=SW(R5)+16,sp
291#endif
292 ;;
293 st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5
294 mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0
295 add r2=SW(F2)+16,sp // r2 = &sw->f2
296 ;;
297 st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6
298 mov.m r18=ar.fpsr // preserve fpsr
299 add r3=SW(F3)+16,sp // r3 = &sw->f3
300 ;;
301 stf.spill [r2]=f2,32
302 mov.m r19=ar.rnat
303 mov r21=b0
304
305 stf.spill [r3]=f3,32
306 st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7
307 mov r22=b1
308 ;;
309 // since we're done with the spills, read and save ar.unat:
310 mov.m r29=ar.unat
311 mov.m r20=ar.bspstore
312 mov r23=b2
313 stf.spill [r2]=f4,32
314 stf.spill [r3]=f5,32
315 mov r24=b3
316 ;;
317 st8 [r14]=r21,SW(B1)-SW(B0) // save b0
318 st8 [r15]=r23,SW(B3)-SW(B2) // save b2
319 mov r25=b4
320 mov r26=b5
321 ;;
322 st8 [r14]=r22,SW(B4)-SW(B1) // save b1
323 st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3
324 mov r21=ar.lc // I-unit
325 stf.spill [r2]=f12,32
326 stf.spill [r3]=f13,32
327 ;;
328 st8 [r14]=r25,SW(B5)-SW(B4) // save b4
329 st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs
330 stf.spill [r2]=f14,32
331 stf.spill [r3]=f15,32
332 ;;
333 st8 [r14]=r26 // save b5
334 st8 [r15]=r21 // save ar.lc
335 stf.spill [r2]=f16,32
336 stf.spill [r3]=f17,32
337 ;;
338 stf.spill [r2]=f18,32
339 stf.spill [r3]=f19,32
340 ;;
341 stf.spill [r2]=f20,32
342 stf.spill [r3]=f21,32
343 ;;
344 stf.spill [r2]=f22,32
345 stf.spill [r3]=f23,32
346 ;;
347 stf.spill [r2]=f24,32
348 stf.spill [r3]=f25,32
349 ;;
350 stf.spill [r2]=f26,32
351 stf.spill [r3]=f27,32
352 ;;
353 stf.spill [r2]=f28,32
354 stf.spill [r3]=f29,32
355 ;;
356 stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30)
357 stf.spill [r3]=f31,SW(PR)-SW(F31)
358 add r14=SW(CALLER_UNAT)+16,sp
359 ;;
360 st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat
361 st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat
362 mov r21=pr
363 ;;
364 st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat
365 st8 [r3]=r21 // save predicate registers
366 ;;
367 st8 [r2]=r20 // save ar.bspstore
368 st8 [r14]=r18 // save fpsr
369 mov ar.rsc=3 // put RSE back into eager mode, pl 0
370 br.cond.sptk.many b7
371END(save_switch_stack)
372
373/*
374 * load_switch_stack:
375 * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK)
376 * - b7 holds address to return to
377 * - must not touch r8-r11
378 */
379ENTRY(load_switch_stack)
380 .prologue
381 .altrp b7
382
383 .body
384 lfetch.fault.nt1 [sp]
385 adds r2=SW(AR_BSPSTORE)+16,sp
386 adds r3=SW(AR_UNAT)+16,sp
387 mov ar.rsc=0 // put RSE into enforced lazy mode
388 adds r14=SW(CALLER_UNAT)+16,sp
389 adds r15=SW(AR_FPSR)+16,sp
390 ;;
391 ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore
392 ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat
393 ;;
394 ld8 r21=[r2],16 // restore b0
395 ld8 r22=[r3],16 // restore b1
396 ;;
397 ld8 r23=[r2],16 // restore b2
398 ld8 r24=[r3],16 // restore b3
399 ;;
400 ld8 r25=[r2],16 // restore b4
401 ld8 r26=[r3],16 // restore b5
402 ;;
403 ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs
404 ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc
405 ;;
406 ld8 r28=[r2] // restore pr
407 ld8 r30=[r3] // restore rnat
408 ;;
409 ld8 r18=[r14],16 // restore caller's unat
410 ld8 r19=[r15],24 // restore fpsr
411 ;;
412 ldf.fill f2=[r14],32
413 ldf.fill f3=[r15],32
414 ;;
415 ldf.fill f4=[r14],32
416 ldf.fill f5=[r15],32
417 ;;
418 ldf.fill f12=[r14],32
419 ldf.fill f13=[r15],32
420 ;;
421 ldf.fill f14=[r14],32
422 ldf.fill f15=[r15],32
423 ;;
424 ldf.fill f16=[r14],32
425 ldf.fill f17=[r15],32
426 ;;
427 ldf.fill f18=[r14],32
428 ldf.fill f19=[r15],32
429 mov b0=r21
430 ;;
431 ldf.fill f20=[r14],32
432 ldf.fill f21=[r15],32
433 mov b1=r22
434 ;;
435 ldf.fill f22=[r14],32
436 ldf.fill f23=[r15],32
437 mov b2=r23
438 ;;
439 mov ar.bspstore=r27
440 mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7
441 mov b3=r24
442 ;;
443 ldf.fill f24=[r14],32
444 ldf.fill f25=[r15],32
445 mov b4=r25
446 ;;
447 ldf.fill f26=[r14],32
448 ldf.fill f27=[r15],32
449 mov b5=r26
450 ;;
451 ldf.fill f28=[r14],32
452 ldf.fill f29=[r15],32
453 mov ar.pfs=r16
454 ;;
455 ldf.fill f30=[r14],32
456 ldf.fill f31=[r15],24
457 mov ar.lc=r17
458 ;;
459 ld8.fill r4=[r14],16
460 ld8.fill r5=[r15],16
461 mov pr=r28,-1
462 ;;
463 ld8.fill r6=[r14],16
464 ld8.fill r7=[r15],16
465
466 mov ar.unat=r18 // restore caller's unat
467 mov ar.rnat=r30 // must restore after bspstore but before rsc!
468 mov ar.fpsr=r19 // restore fpsr
469 mov ar.rsc=3 // put RSE back into eager mode, pl 0
470 br.cond.sptk.many b7
471END(load_switch_stack)
472
473GLOBAL_ENTRY(__ia64_syscall)
474 .regstk 6,0,0,0
475 mov r15=in5 // put syscall number in place
476 break __BREAK_SYSCALL
477 movl r2=errno
478 cmp.eq p6,p7=-1,r10
479 ;;
480(p6) st4 [r2]=r8
481(p6) mov r8=-1
482 br.ret.sptk.many rp
483END(__ia64_syscall)
484
485GLOBAL_ENTRY(execve)
486 mov r15=__NR_execve // put syscall number in place
487 break __BREAK_SYSCALL
488 br.ret.sptk.many rp
489END(execve)
490
491GLOBAL_ENTRY(clone)
492 mov r15=__NR_clone // put syscall number in place
493 break __BREAK_SYSCALL
494 br.ret.sptk.many rp
495END(clone)
496
497 /*
498 * Invoke a system call, but do some tracing before and after the call.
499 * We MUST preserve the current register frame throughout this routine
500 * because some system calls (such as ia64_execve) directly
501 * manipulate ar.pfs.
502 */
503GLOBAL_ENTRY(ia64_trace_syscall)
504 PT_REGS_UNWIND_INFO(0)
505 /*
506 * We need to preserve the scratch registers f6-f11 in case the system
507 * call is sigreturn.
508 */
509 adds r16=PT(F6)+16,sp
510 adds r17=PT(F7)+16,sp
511 ;;
512 stf.spill [r16]=f6,32
513 stf.spill [r17]=f7,32
514 ;;
515 stf.spill [r16]=f8,32
516 stf.spill [r17]=f9,32
517 ;;
518 stf.spill [r16]=f10
519 stf.spill [r17]=f11
520 br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args
521 adds r16=PT(F6)+16,sp
522 adds r17=PT(F7)+16,sp
523 ;;
524 ldf.fill f6=[r16],32
525 ldf.fill f7=[r17],32
526 ;;
527 ldf.fill f8=[r16],32
528 ldf.fill f9=[r17],32
529 ;;
530 ldf.fill f10=[r16]
531 ldf.fill f11=[r17]
532 // the syscall number may have changed, so re-load it and re-calculate the
533 // syscall entry-point:
534 adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #)
535 ;;
536 ld8 r15=[r15]
537 mov r3=NR_syscalls - 1
538 ;;
539 adds r15=-1024,r15
540 movl r16=sys_call_table
541 ;;
542 shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
543 cmp.leu p6,p7=r15,r3
544 ;;
545(p6) ld8 r20=[r20] // load address of syscall entry point
546(p7) movl r20=sys_ni_syscall
547 ;;
548 mov b6=r20
549 br.call.sptk.many rp=b6 // do the syscall
550.strace_check_retval:
551 cmp.lt p6,p0=r8,r0 // syscall failed?
552 adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
553 adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
554 mov r10=0
555(p6) br.cond.sptk strace_error // syscall failed ->
556 ;; // avoid RAW on r10
557.strace_save_retval:
558.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8
559.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10
560 br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
561.ret3: br.cond.sptk .work_pending_syscall_end
562
563strace_error:
564 ld8 r3=[r2] // load pt_regs.r8
565 sub r9=0,r8 // negate return value to get errno value
566 ;;
567 cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0?
568 adds r3=16,r2 // r3=&pt_regs.r10
569 ;;
570(p6) mov r10=-1
571(p6) mov r8=r9
572 br.cond.sptk .strace_save_retval
573END(ia64_trace_syscall)
574
575 /*
576 * When traced and returning from sigreturn, we invoke syscall_trace but then
577 * go straight to ia64_leave_kernel rather than ia64_leave_syscall.
578 */
579GLOBAL_ENTRY(ia64_strace_leave_kernel)
580 PT_REGS_UNWIND_INFO(0)
581{ /*
582 * Some versions of gas generate bad unwind info if the first instruction of a
583 * procedure doesn't go into the first slot of a bundle. This is a workaround.
584 */
585 nop.m 0
586 nop.i 0
587 br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value
588}
589.ret4: br.cond.sptk ia64_leave_kernel
590END(ia64_strace_leave_kernel)
591
592GLOBAL_ENTRY(ia64_ret_from_clone)
593 PT_REGS_UNWIND_INFO(0)
594{ /*
595 * Some versions of gas generate bad unwind info if the first instruction of a
596 * procedure doesn't go into the first slot of a bundle. This is a workaround.
597 */
598 nop.m 0
599 nop.i 0
600 /*
601 * We need to call schedule_tail() to complete the scheduling process.
602 * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the
603 * address of the previously executing task.
604 */
605 br.call.sptk.many rp=ia64_invoke_schedule_tail
606}
607.ret8:
608 adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
609 ;;
610 ld4 r2=[r2]
611 ;;
612 mov r8=0
613 and r2=_TIF_SYSCALL_TRACEAUDIT,r2
614 ;;
615 cmp.ne p6,p0=r2,r0
616(p6) br.cond.spnt .strace_check_retval
617 ;; // added stop bits to prevent r8 dependency
618END(ia64_ret_from_clone)
619 // fall through
620GLOBAL_ENTRY(ia64_ret_from_syscall)
621 PT_REGS_UNWIND_INFO(0)
622 cmp.ge p6,p7=r8,r0 // syscall executed successfully?
623 adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
624 mov r10=r0 // clear error indication in r10
625(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure
626END(ia64_ret_from_syscall)
627 // fall through
628/*
629 * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't
630 * need to switch to bank 0 and doesn't restore the scratch registers.
631 * To avoid leaking kernel bits, the scratch registers are set to
632 * the following known-to-be-safe values:
633 *
634 * r1: restored (global pointer)
635 * r2: cleared
636 * r3: 1 (when returning to user-level)
637 * r8-r11: restored (syscall return value(s))
638 * r12: restored (user-level stack pointer)
639 * r13: restored (user-level thread pointer)
640 * r14: cleared
641 * r15: restored (syscall #)
642 * r16-r17: cleared
643 * r18: user-level b6
644 * r19: cleared
645 * r20: user-level ar.fpsr
646 * r21: user-level b0
647 * r22: cleared
648 * r23: user-level ar.bspstore
649 * r24: user-level ar.rnat
650 * r25: user-level ar.unat
651 * r26: user-level ar.pfs
652 * r27: user-level ar.rsc
653 * r28: user-level ip
654 * r29: user-level psr
655 * r30: user-level cfm
656 * r31: user-level pr
657 * f6-f11: cleared
658 * pr: restored (user-level pr)
659 * b0: restored (user-level rp)
660 * b6: restored
661 * b7: cleared
662 * ar.unat: restored (user-level ar.unat)
663 * ar.pfs: restored (user-level ar.pfs)
664 * ar.rsc: restored (user-level ar.rsc)
665 * ar.rnat: restored (user-level ar.rnat)
666 * ar.bspstore: restored (user-level ar.bspstore)
667 * ar.fpsr: restored (user-level ar.fpsr)
668 * ar.ccv: cleared
669 * ar.csd: cleared
670 * ar.ssd: cleared
671 */
672ENTRY(ia64_leave_syscall)
673 PT_REGS_UNWIND_INFO(0)
674 /*
675 * work.need_resched etc. mustn't get changed by this CPU before it returns to
676 * user- or fsys-mode, hence we disable interrupts early on.
677 *
678 * p6 controls whether current_thread_info()->flags needs to be check for
679 * extra work. We always check for extra work when returning to user-level.
680 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
681 * is 0. After extra work processing has been completed, execution
682 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
683 * needs to be redone.
684 */
685#ifdef CONFIG_PREEMPT
686 rsm psr.i // disable interrupts
687 cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
688(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
689 ;;
690 .pred.rel.mutex pUStk,pKStk
691(pKStk) ld4 r21=[r20] // r21 <- preempt_count
692(pUStk) mov r21=0 // r21 <- 0
693 ;;
694 cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
695#else /* !CONFIG_PREEMPT */
696(pUStk) rsm psr.i
697 cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall
698(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
699#endif
700.work_processed_syscall:
701 adds r2=PT(LOADRS)+16,r12
702 adds r3=PT(AR_BSPSTORE)+16,r12
703 adds r18=TI_FLAGS+IA64_TASK_SIZE,r13
704 ;;
705(p6) ld4 r31=[r18] // load current_thread_info()->flags
706 ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs"
707 mov b7=r0 // clear b7
708 ;;
709 ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage)
710 ld8 r18=[r2],PT(R9)-PT(B6) // load b6
711(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
712 ;;
713 mov r16=ar.bsp // M2 get existing backing store pointer
714(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending?
715(p6) br.cond.spnt .work_pending_syscall
716 ;;
717 // start restoring the state saved on the kernel stack (struct pt_regs):
718 ld8 r9=[r2],PT(CR_IPSR)-PT(R9)
719 ld8 r11=[r3],PT(CR_IIP)-PT(R11)
720 mov f6=f0 // clear f6
721 ;;
722 invala // M0|1 invalidate ALAT
723 rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection
724 mov f9=f0 // clear f9
725
726 ld8 r29=[r2],16 // load cr.ipsr
727 ld8 r28=[r3],16 // load cr.iip
728 mov f8=f0 // clear f8
729 ;;
730 ld8 r30=[r2],16 // M0|1 load cr.ifs
731 mov.m ar.ssd=r0 // M2 clear ar.ssd
732 cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
733 ;;
734 ld8 r25=[r3],16 // M0|1 load ar.unat
735 mov.m ar.csd=r0 // M2 clear ar.csd
736 mov r22=r0 // clear r22
737 ;;
738 ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs
739(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
740 mov f10=f0 // clear f10
741 ;;
742 ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0
743 ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc
744 mov f11=f0 // clear f11
745 ;;
746 ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage)
747 ld8 r31=[r3],PT(R1)-PT(PR) // load predicates
748(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13
749 ;;
750 ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr
751 ld8.fill r1=[r3],16 // load r1
752(pUStk) mov r17=1
753 ;;
754 srlz.d // M0 ensure interruption collection is off
755 ld8.fill r13=[r3],16
756 mov f7=f0 // clear f7
757 ;;
758 ld8.fill r12=[r2] // restore r12 (sp)
759 ld8.fill r15=[r3] // restore r15
760 addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0
761 ;;
762(pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8
763(pUStk) st1 [r14]=r17
764 mov b6=r18 // I0 restore b6
765 ;;
766 mov r14=r0 // clear r14
767 shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition
768(pKStk) br.cond.dpnt.many skip_rbs_switch
769
770 mov.m ar.ccv=r0 // clear ar.ccv
771(pNonSys) br.cond.dpnt.many dont_preserve_current_frame
772 br.cond.sptk.many rbs_switch
773END(ia64_leave_syscall)
774
775#ifdef CONFIG_IA32_SUPPORT
776GLOBAL_ENTRY(ia64_ret_from_ia32_execve)
777 PT_REGS_UNWIND_INFO(0)
778 adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8
779 adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10
780 ;;
781 .mem.offset 0,0
782 st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit
783 .mem.offset 8,0
784 st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit
785END(ia64_ret_from_ia32_execve_syscall)
786 // fall through
787#endif /* CONFIG_IA32_SUPPORT */
788GLOBAL_ENTRY(ia64_leave_kernel)
789 PT_REGS_UNWIND_INFO(0)
790 /*
791 * work.need_resched etc. mustn't get changed by this CPU before it returns to
792 * user- or fsys-mode, hence we disable interrupts early on.
793 *
794 * p6 controls whether current_thread_info()->flags needs to be check for
795 * extra work. We always check for extra work when returning to user-level.
796 * With CONFIG_PREEMPT, we also check for extra work when the preempt_count
797 * is 0. After extra work processing has been completed, execution
798 * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check
799 * needs to be redone.
800 */
801#ifdef CONFIG_PREEMPT
802 rsm psr.i // disable interrupts
803 cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
804(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
805 ;;
806 .pred.rel.mutex pUStk,pKStk
807(pKStk) ld4 r21=[r20] // r21 <- preempt_count
808(pUStk) mov r21=0 // r21 <- 0
809 ;;
810 cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0)
811#else
812(pUStk) rsm psr.i
813 cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel
814(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk
815#endif
816.work_processed_kernel:
817 adds r17=TI_FLAGS+IA64_TASK_SIZE,r13
818 ;;
819(p6) ld4 r31=[r17] // load current_thread_info()->flags
820 adds r21=PT(PR)+16,r12
821 ;;
822
823 lfetch [r21],PT(CR_IPSR)-PT(PR)
824 adds r2=PT(B6)+16,r12
825 adds r3=PT(R16)+16,r12
826 ;;
827 lfetch [r21]
828 ld8 r28=[r2],8 // load b6
829 adds r29=PT(R24)+16,r12
830
831 ld8.fill r16=[r3],PT(AR_CSD)-PT(R16)
832 adds r30=PT(AR_CCV)+16,r12
833(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE?
834 ;;
835 ld8.fill r24=[r29]
836 ld8 r15=[r30] // load ar.ccv
837(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending?
838 ;;
839 ld8 r29=[r2],16 // load b7
840 ld8 r30=[r3],16 // load ar.csd
841(p6) br.cond.spnt .work_pending
842 ;;
843 ld8 r31=[r2],16 // load ar.ssd
844 ld8.fill r8=[r3],16
845 ;;
846 ld8.fill r9=[r2],16
847 ld8.fill r10=[r3],PT(R17)-PT(R10)
848 ;;
849 ld8.fill r11=[r2],PT(R18)-PT(R11)
850 ld8.fill r17=[r3],16
851 ;;
852 ld8.fill r18=[r2],16
853 ld8.fill r19=[r3],16
854 ;;
855 ld8.fill r20=[r2],16
856 ld8.fill r21=[r3],16
857 mov ar.csd=r30
858 mov ar.ssd=r31
859 ;;
860 rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection
861 invala // invalidate ALAT
862 ;;
863 ld8.fill r22=[r2],24
864 ld8.fill r23=[r3],24
865 mov b6=r28
866 ;;
867 ld8.fill r25=[r2],16
868 ld8.fill r26=[r3],16
869 mov b7=r29
870 ;;
871 ld8.fill r27=[r2],16
872 ld8.fill r28=[r3],16
873 ;;
874 ld8.fill r29=[r2],16
875 ld8.fill r30=[r3],24
876 ;;
877 ld8.fill r31=[r2],PT(F9)-PT(R31)
878 adds r3=PT(F10)-PT(F6),r3
879 ;;
880 ldf.fill f9=[r2],PT(F6)-PT(F9)
881 ldf.fill f10=[r3],PT(F8)-PT(F10)
882 ;;
883 ldf.fill f6=[r2],PT(F7)-PT(F6)
884 ;;
885 ldf.fill f7=[r2],PT(F11)-PT(F7)
886 ldf.fill f8=[r3],32
887 ;;
888 srlz.i // ensure interruption collection is off
889 mov ar.ccv=r15
890 ;;
891 ldf.fill f11=[r2]
892 bsw.0 // switch back to bank 0 (no stop bit required beforehand...)
893 ;;
894(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency)
895 adds r16=PT(CR_IPSR)+16,r12
896 adds r17=PT(CR_IIP)+16,r12
897
898(pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled
899 nop.i 0
900 nop.i 0
901 ;;
902 ld8 r29=[r16],16 // load cr.ipsr
903 ld8 r28=[r17],16 // load cr.iip
904 ;;
905 ld8 r30=[r16],16 // load cr.ifs
906 ld8 r25=[r17],16 // load ar.unat
907 ;;
908 ld8 r26=[r16],16 // load ar.pfs
909 ld8 r27=[r17],16 // load ar.rsc
910 cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs
911 ;;
912 ld8 r24=[r16],16 // load ar.rnat (may be garbage)
913 ld8 r23=[r17],16 // load ar.bspstore (may be garbage)
914 ;;
915 ld8 r31=[r16],16 // load predicates
916 ld8 r21=[r17],16 // load b0
917 ;;
918 ld8 r19=[r16],16 // load ar.rsc value for "loadrs"
919 ld8.fill r1=[r17],16 // load r1
920 ;;
921 ld8.fill r12=[r16],16
922 ld8.fill r13=[r17],16
923(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18
924 ;;
925 ld8 r20=[r16],16 // ar.fpsr
926 ld8.fill r15=[r17],16
927 ;;
928 ld8.fill r14=[r16],16
929 ld8.fill r2=[r17]
930(pUStk) mov r17=1
931 ;;
932 ld8.fill r3=[r16]
933(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack
934 shr.u r18=r19,16 // get byte size of existing "dirty" partition
935 ;;
936 mov r16=ar.bsp // get existing backing store pointer
937 addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0
938 ;;
939 ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8
940(pKStk) br.cond.dpnt skip_rbs_switch
941
942 /*
943 * Restore user backing store.
944 *
945 * NOTE: alloc, loadrs, and cover can't be predicated.
946 */
947(pNonSys) br.cond.dpnt dont_preserve_current_frame
948
949rbs_switch:
950 cover // add current frame into dirty partition and set cr.ifs
951 ;;
952 mov r19=ar.bsp // get new backing store pointer
953 sub r16=r16,r18 // krbs = old bsp - size of dirty partition
954 cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs
955 ;;
956 sub r19=r19,r16 // calculate total byte size of dirty partition
957 add r18=64,r18 // don't force in0-in7 into memory...
958 ;;
959 shl r19=r19,16 // shift size of dirty partition into loadrs position
960 ;;
961dont_preserve_current_frame:
962 /*
963 * To prevent leaking bits between the kernel and user-space,
964 * we must clear the stacked registers in the "invalid" partition here.
965 * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium,
966 * 5 registers/cycle on McKinley).
967 */
968# define pRecurse p6
969# define pReturn p7
970#ifdef CONFIG_ITANIUM
971# define Nregs 10
972#else
973# define Nregs 14
974#endif
975 alloc loc0=ar.pfs,2,Nregs-2,2,0
976 shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8))
977 sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize
978 ;;
979 mov ar.rsc=r19 // load ar.rsc to be used for "loadrs"
980 shladd in0=loc1,3,r17
981 mov in1=0
982 ;;
983 TEXT_ALIGN(32)
984rse_clear_invalid:
985#ifdef CONFIG_ITANIUM
986 // cycle 0
987 { .mii
988 alloc loc0=ar.pfs,2,Nregs-2,2,0
989 cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
990 add out0=-Nregs*8,in0
991}{ .mfb
992 add out1=1,in1 // increment recursion count
993 nop.f 0
994 nop.b 0 // can't do br.call here because of alloc (WAW on CFM)
995 ;;
996}{ .mfi // cycle 1
997 mov loc1=0
998 nop.f 0
999 mov loc2=0
1000}{ .mib
1001 mov loc3=0
1002 mov loc4=0
1003(pRecurse) br.call.sptk.many b0=rse_clear_invalid
1004
1005}{ .mfi // cycle 2
1006 mov loc5=0
1007 nop.f 0
1008 cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
1009}{ .mib
1010 mov loc6=0
1011 mov loc7=0
1012(pReturn) br.ret.sptk.many b0
1013}
1014#else /* !CONFIG_ITANIUM */
1015 alloc loc0=ar.pfs,2,Nregs-2,2,0
1016 cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse
1017 add out0=-Nregs*8,in0
1018 add out1=1,in1 // increment recursion count
1019 mov loc1=0
1020 mov loc2=0
1021 ;;
1022 mov loc3=0
1023 mov loc4=0
1024 mov loc5=0
1025 mov loc6=0
1026 mov loc7=0
1027(pRecurse) br.call.sptk.few b0=rse_clear_invalid
1028 ;;
1029 mov loc8=0
1030 mov loc9=0
1031 cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret
1032 mov loc10=0
1033 mov loc11=0
1034(pReturn) br.ret.sptk.many b0
1035#endif /* !CONFIG_ITANIUM */
1036# undef pRecurse
1037# undef pReturn
1038 ;;
1039 alloc r17=ar.pfs,0,0,0,0 // drop current register frame
1040 ;;
1041 loadrs
1042 ;;
1043skip_rbs_switch:
1044 mov ar.unat=r25 // M2
1045(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22
1046(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise
1047 ;;
1048(pUStk) mov ar.bspstore=r23 // M2
1049(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp
1050(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise
1051 ;;
1052 mov cr.ipsr=r29 // M2
1053 mov ar.pfs=r26 // I0
1054(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise
1055
1056(p9) mov cr.ifs=r30 // M2
1057 mov b0=r21 // I0
1058(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise
1059
1060 mov ar.fpsr=r20 // M2
1061 mov cr.iip=r28 // M2
1062 nop 0
1063 ;;
1064(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode
1065 nop 0
1066(pLvSys)mov r2=r0
1067
1068 mov ar.rsc=r27 // M2
1069 mov pr=r31,-1 // I0
1070 rfi // B
1071
1072 /*
1073 * On entry:
1074 * r20 = &current->thread_info->pre_count (if CONFIG_PREEMPT)
1075 * r31 = current->thread_info->flags
1076 * On exit:
1077 * p6 = TRUE if work-pending-check needs to be redone
1078 */
1079.work_pending_syscall:
1080 add r2=-8,r2
1081 add r3=-8,r3
1082 ;;
1083 st8 [r2]=r8
1084 st8 [r3]=r10
1085.work_pending:
1086 tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context?
1087(p6) br.cond.sptk.few .sigdelayed
1088 ;;
1089 tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0?
1090(p6) br.cond.sptk.few .notify
1091#ifdef CONFIG_PREEMPT
1092(pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1
1093 ;;
1094(pKStk) st4 [r20]=r21
1095 ssm psr.i // enable interrupts
1096#endif
1097 br.call.spnt.many rp=schedule
1098.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1
1099 rsm psr.i // disable interrupts
1100 ;;
1101#ifdef CONFIG_PREEMPT
1102(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13
1103 ;;
1104(pKStk) st4 [r20]=r0 // preempt_count() <- 0
1105#endif
1106(pLvSys)br.cond.sptk.few .work_pending_syscall_end
1107 br.cond.sptk.many .work_processed_kernel // re-check
1108
1109.notify:
1110(pUStk) br.call.spnt.many rp=notify_resume_user
1111.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0
1112(pLvSys)br.cond.sptk.few .work_pending_syscall_end
1113 br.cond.sptk.many .work_processed_kernel // don't re-check
1114
1115// There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where
1116// it could not be delivered. Deliver it now. The signal might be for us and
1117// may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed
1118// signal.
1119
1120.sigdelayed:
1121 br.call.sptk.many rp=do_sigdelayed
1122 cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check
1123(pLvSys)br.cond.sptk.few .work_pending_syscall_end
1124 br.cond.sptk.many .work_processed_kernel // re-check
1125
1126.work_pending_syscall_end:
1127 adds r2=PT(R8)+16,r12
1128 adds r3=PT(R10)+16,r12
1129 ;;
1130 ld8 r8=[r2]
1131 ld8 r10=[r3]
1132 br.cond.sptk.many .work_processed_syscall // re-check
1133
1134END(ia64_leave_kernel)
1135
1136ENTRY(handle_syscall_error)
1137 /*
1138 * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could
1139 * lead us to mistake a negative return value as a failed syscall. Those syscall
1140 * must deposit a non-zero value in pt_regs.r8 to indicate an error. If
1141 * pt_regs.r8 is zero, we assume that the call completed successfully.
1142 */
1143 PT_REGS_UNWIND_INFO(0)
1144 ld8 r3=[r2] // load pt_regs.r8
1145 ;;
1146 cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0?
1147 ;;
1148(p7) mov r10=-1
1149(p7) sub r8=0,r8 // negate return value to get errno
1150 br.cond.sptk ia64_leave_syscall
1151END(handle_syscall_error)
1152
1153 /*
1154 * Invoke schedule_tail(task) while preserving in0-in7, which may be needed
1155 * in case a system call gets restarted.
1156 */
1157GLOBAL_ENTRY(ia64_invoke_schedule_tail)
1158 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
1159 alloc loc1=ar.pfs,8,2,1,0
1160 mov loc0=rp
1161 mov out0=r8 // Address of previous task
1162 ;;
1163 br.call.sptk.many rp=schedule_tail
1164.ret11: mov ar.pfs=loc1
1165 mov rp=loc0
1166 br.ret.sptk.many rp
1167END(ia64_invoke_schedule_tail)
1168
1169 /*
1170 * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to
1171 * be set up by the caller. We declare 8 input registers so the system call
1172 * args get preserved, in case we need to restart a system call.
1173 */
1174ENTRY(notify_resume_user)
1175 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
1176 alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
1177 mov r9=ar.unat
1178 mov loc0=rp // save return address
1179 mov out0=0 // there is no "oldset"
1180 adds out1=8,sp // out1=&sigscratch->ar_pfs
1181(pSys) mov out2=1 // out2==1 => we're in a syscall
1182 ;;
1183(pNonSys) mov out2=0 // out2==0 => not a syscall
1184 .fframe 16
1185 .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
1186 st8 [sp]=r9,-16 // allocate space for ar.unat and save it
1187 st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch
1188 .body
1189 br.call.sptk.many rp=do_notify_resume_user
1190.ret15: .restore sp
1191 adds sp=16,sp // pop scratch stack space
1192 ;;
1193 ld8 r9=[sp] // load new unat from sigscratch->scratch_unat
1194 mov rp=loc0
1195 ;;
1196 mov ar.unat=r9
1197 mov ar.pfs=loc1
1198 br.ret.sptk.many rp
1199END(notify_resume_user)
1200
1201GLOBAL_ENTRY(sys_rt_sigsuspend)
1202 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8)
1203 alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart!
1204 mov r9=ar.unat
1205 mov loc0=rp // save return address
1206 mov out0=in0 // mask
1207 mov out1=in1 // sigsetsize
1208 adds out2=8,sp // out2=&sigscratch->ar_pfs
1209 ;;
1210 .fframe 16
1211 .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!)
1212 st8 [sp]=r9,-16 // allocate space for ar.unat and save it
1213 st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch
1214 .body
1215 br.call.sptk.many rp=ia64_rt_sigsuspend
1216.ret17: .restore sp
1217 adds sp=16,sp // pop scratch stack space
1218 ;;
1219 ld8 r9=[sp] // load new unat from sw->caller_unat
1220 mov rp=loc0
1221 ;;
1222 mov ar.unat=r9
1223 mov ar.pfs=loc1
1224 br.ret.sptk.many rp
1225END(sys_rt_sigsuspend)
1226
1227ENTRY(sys_rt_sigreturn)
1228 PT_REGS_UNWIND_INFO(0)
1229 /*
1230 * Allocate 8 input registers since ptrace() may clobber them
1231 */
1232 alloc r2=ar.pfs,8,0,1,0
1233 .prologue
1234 PT_REGS_SAVES(16)
1235 adds sp=-16,sp
1236 .body
1237 cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall...
1238 ;;
1239 /*
1240 * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined
1241 * syscall-entry path does not save them we save them here instead. Note: we
1242 * don't need to save any other registers that are not saved by the stream-lined
1243 * syscall path, because restore_sigcontext() restores them.
1244 */
1245 adds r16=PT(F6)+32,sp
1246 adds r17=PT(F7)+32,sp
1247 ;;
1248 stf.spill [r16]=f6,32
1249 stf.spill [r17]=f7,32
1250 ;;
1251 stf.spill [r16]=f8,32
1252 stf.spill [r17]=f9,32
1253 ;;
1254 stf.spill [r16]=f10
1255 stf.spill [r17]=f11
1256 adds out0=16,sp // out0 = &sigscratch
1257 br.call.sptk.many rp=ia64_rt_sigreturn
1258.ret19: .restore sp 0
1259 adds sp=16,sp
1260 ;;
1261 ld8 r9=[sp] // load new ar.unat
1262 mov.sptk b7=r8,ia64_leave_kernel
1263 ;;
1264 mov ar.unat=r9
1265 br.many b7
1266END(sys_rt_sigreturn)
1267
1268GLOBAL_ENTRY(ia64_prepare_handle_unaligned)
1269 .prologue
1270 /*
1271 * r16 = fake ar.pfs, we simply need to make sure privilege is still 0
1272 */
1273 mov r16=r0
1274 DO_SAVE_SWITCH_STACK
1275 br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt
1276.ret21: .body
1277 DO_LOAD_SWITCH_STACK
1278 br.cond.sptk.many rp // goes to ia64_leave_kernel
1279END(ia64_prepare_handle_unaligned)
1280
1281 //
1282 // unw_init_running(void (*callback)(info, arg), void *arg)
1283 //
1284# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15)
1285
1286GLOBAL_ENTRY(unw_init_running)
1287 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
1288 alloc loc1=ar.pfs,2,3,3,0
1289 ;;
1290 ld8 loc2=[in0],8
1291 mov loc0=rp
1292 mov r16=loc1
1293 DO_SAVE_SWITCH_STACK
1294 .body
1295
1296 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2)
1297 .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE
1298 SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE)
1299 adds sp=-EXTRA_FRAME_SIZE,sp
1300 .body
1301 ;;
1302 adds out0=16,sp // &info
1303 mov out1=r13 // current
1304 adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack
1305 br.call.sptk.many rp=unw_init_frame_info
13061: adds out0=16,sp // &info
1307 mov b6=loc2
1308 mov loc2=gp // save gp across indirect function call
1309 ;;
1310 ld8 gp=[in0]
1311 mov out1=in1 // arg
1312 br.call.sptk.many rp=b6 // invoke the callback function
13131: mov gp=loc2 // restore gp
1314
1315 // For now, we don't allow changing registers from within
1316 // unw_init_running; if we ever want to allow that, we'd
1317 // have to do a load_switch_stack here:
1318 .restore sp
1319 adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp
1320
1321 mov ar.pfs=loc1
1322 mov rp=loc0
1323 br.ret.sptk.many rp
1324END(unw_init_running)
1325
1326 .rodata
1327 .align 8
1328 .globl sys_call_table
1329sys_call_table:
1330 data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S.
1331 data8 sys_exit // 1025
1332 data8 sys_read
1333 data8 sys_write
1334 data8 sys_open
1335 data8 sys_close
1336 data8 sys_creat // 1030
1337 data8 sys_link
1338 data8 sys_unlink
1339 data8 ia64_execve
1340 data8 sys_chdir
1341 data8 sys_fchdir // 1035
1342 data8 sys_utimes
1343 data8 sys_mknod
1344 data8 sys_chmod
1345 data8 sys_chown
1346 data8 sys_lseek // 1040
1347 data8 sys_getpid
1348 data8 sys_getppid
1349 data8 sys_mount
1350 data8 sys_umount
1351 data8 sys_setuid // 1045
1352 data8 sys_getuid
1353 data8 sys_geteuid
1354 data8 sys_ptrace
1355 data8 sys_access
1356 data8 sys_sync // 1050
1357 data8 sys_fsync
1358 data8 sys_fdatasync
1359 data8 sys_kill
1360 data8 sys_rename
1361 data8 sys_mkdir // 1055
1362 data8 sys_rmdir
1363 data8 sys_dup
1364 data8 sys_pipe
1365 data8 sys_times
1366 data8 ia64_brk // 1060
1367 data8 sys_setgid
1368 data8 sys_getgid
1369 data8 sys_getegid
1370 data8 sys_acct
1371 data8 sys_ioctl // 1065
1372 data8 sys_fcntl
1373 data8 sys_umask
1374 data8 sys_chroot
1375 data8 sys_ustat
1376 data8 sys_dup2 // 1070
1377 data8 sys_setreuid
1378 data8 sys_setregid
1379 data8 sys_getresuid
1380 data8 sys_setresuid
1381 data8 sys_getresgid // 1075
1382 data8 sys_setresgid
1383 data8 sys_getgroups
1384 data8 sys_setgroups
1385 data8 sys_getpgid
1386 data8 sys_setpgid // 1080
1387 data8 sys_setsid
1388 data8 sys_getsid
1389 data8 sys_sethostname
1390 data8 sys_setrlimit
1391 data8 sys_getrlimit // 1085
1392 data8 sys_getrusage
1393 data8 sys_gettimeofday
1394 data8 sys_settimeofday
1395 data8 sys_select
1396 data8 sys_poll // 1090
1397 data8 sys_symlink
1398 data8 sys_readlink
1399 data8 sys_uselib
1400 data8 sys_swapon
1401 data8 sys_swapoff // 1095
1402 data8 sys_reboot
1403 data8 sys_truncate
1404 data8 sys_ftruncate
1405 data8 sys_fchmod
1406 data8 sys_fchown // 1100
1407 data8 ia64_getpriority
1408 data8 sys_setpriority
1409 data8 sys_statfs
1410 data8 sys_fstatfs
1411 data8 sys_gettid // 1105
1412 data8 sys_semget
1413 data8 sys_semop
1414 data8 sys_semctl
1415 data8 sys_msgget
1416 data8 sys_msgsnd // 1110
1417 data8 sys_msgrcv
1418 data8 sys_msgctl
1419 data8 sys_shmget
1420 data8 ia64_shmat
1421 data8 sys_shmdt // 1115
1422 data8 sys_shmctl
1423 data8 sys_syslog
1424 data8 sys_setitimer
1425 data8 sys_getitimer
1426 data8 sys_ni_syscall // 1120 /* was: ia64_oldstat */
1427 data8 sys_ni_syscall /* was: ia64_oldlstat */
1428 data8 sys_ni_syscall /* was: ia64_oldfstat */
1429 data8 sys_vhangup
1430 data8 sys_lchown
1431 data8 sys_remap_file_pages // 1125
1432 data8 sys_wait4
1433 data8 sys_sysinfo
1434 data8 sys_clone
1435 data8 sys_setdomainname
1436 data8 sys_newuname // 1130
1437 data8 sys_adjtimex
1438 data8 sys_ni_syscall /* was: ia64_create_module */
1439 data8 sys_init_module
1440 data8 sys_delete_module
1441 data8 sys_ni_syscall // 1135 /* was: sys_get_kernel_syms */
1442 data8 sys_ni_syscall /* was: sys_query_module */
1443 data8 sys_quotactl
1444 data8 sys_bdflush
1445 data8 sys_sysfs
1446 data8 sys_personality // 1140
1447 data8 sys_ni_syscall // sys_afs_syscall
1448 data8 sys_setfsuid
1449 data8 sys_setfsgid
1450 data8 sys_getdents
1451 data8 sys_flock // 1145
1452 data8 sys_readv
1453 data8 sys_writev
1454 data8 sys_pread64
1455 data8 sys_pwrite64
1456 data8 sys_sysctl // 1150
1457 data8 sys_mmap
1458 data8 sys_munmap
1459 data8 sys_mlock
1460 data8 sys_mlockall
1461 data8 sys_mprotect // 1155
1462 data8 ia64_mremap
1463 data8 sys_msync
1464 data8 sys_munlock
1465 data8 sys_munlockall
1466 data8 sys_sched_getparam // 1160
1467 data8 sys_sched_setparam
1468 data8 sys_sched_getscheduler
1469 data8 sys_sched_setscheduler
1470 data8 sys_sched_yield
1471 data8 sys_sched_get_priority_max // 1165
1472 data8 sys_sched_get_priority_min
1473 data8 sys_sched_rr_get_interval
1474 data8 sys_nanosleep
1475 data8 sys_nfsservctl
1476 data8 sys_prctl // 1170
1477 data8 sys_getpagesize
1478 data8 sys_mmap2
1479 data8 sys_pciconfig_read
1480 data8 sys_pciconfig_write
1481 data8 sys_perfmonctl // 1175
1482 data8 sys_sigaltstack
1483 data8 sys_rt_sigaction
1484 data8 sys_rt_sigpending
1485 data8 sys_rt_sigprocmask
1486 data8 sys_rt_sigqueueinfo // 1180
1487 data8 sys_rt_sigreturn
1488 data8 sys_rt_sigsuspend
1489 data8 sys_rt_sigtimedwait
1490 data8 sys_getcwd
1491 data8 sys_capget // 1185
1492 data8 sys_capset
1493 data8 sys_sendfile64
1494 data8 sys_ni_syscall // sys_getpmsg (STREAMS)
1495 data8 sys_ni_syscall // sys_putpmsg (STREAMS)
1496 data8 sys_socket // 1190
1497 data8 sys_bind
1498 data8 sys_connect
1499 data8 sys_listen
1500 data8 sys_accept
1501 data8 sys_getsockname // 1195
1502 data8 sys_getpeername
1503 data8 sys_socketpair
1504 data8 sys_send
1505 data8 sys_sendto
1506 data8 sys_recv // 1200
1507 data8 sys_recvfrom
1508 data8 sys_shutdown
1509 data8 sys_setsockopt
1510 data8 sys_getsockopt
1511 data8 sys_sendmsg // 1205
1512 data8 sys_recvmsg
1513 data8 sys_pivot_root
1514 data8 sys_mincore
1515 data8 sys_madvise
1516 data8 sys_newstat // 1210
1517 data8 sys_newlstat
1518 data8 sys_newfstat
1519 data8 sys_clone2
1520 data8 sys_getdents64
1521 data8 sys_getunwind // 1215
1522 data8 sys_readahead
1523 data8 sys_setxattr
1524 data8 sys_lsetxattr
1525 data8 sys_fsetxattr
1526 data8 sys_getxattr // 1220
1527 data8 sys_lgetxattr
1528 data8 sys_fgetxattr
1529 data8 sys_listxattr
1530 data8 sys_llistxattr
1531 data8 sys_flistxattr // 1225
1532 data8 sys_removexattr
1533 data8 sys_lremovexattr
1534 data8 sys_fremovexattr
1535 data8 sys_tkill
1536 data8 sys_futex // 1230
1537 data8 sys_sched_setaffinity
1538 data8 sys_sched_getaffinity
1539 data8 sys_set_tid_address
1540 data8 sys_fadvise64_64
1541 data8 sys_tgkill // 1235
1542 data8 sys_exit_group
1543 data8 sys_lookup_dcookie
1544 data8 sys_io_setup
1545 data8 sys_io_destroy
1546 data8 sys_io_getevents // 1240
1547 data8 sys_io_submit
1548 data8 sys_io_cancel
1549 data8 sys_epoll_create
1550 data8 sys_epoll_ctl
1551 data8 sys_epoll_wait // 1245
1552 data8 sys_restart_syscall
1553 data8 sys_semtimedop
1554 data8 sys_timer_create
1555 data8 sys_timer_settime
1556 data8 sys_timer_gettime // 1250
1557 data8 sys_timer_getoverrun
1558 data8 sys_timer_delete
1559 data8 sys_clock_settime
1560 data8 sys_clock_gettime
1561 data8 sys_clock_getres // 1255
1562 data8 sys_clock_nanosleep
1563 data8 sys_fstatfs64
1564 data8 sys_statfs64
1565 data8 sys_mbind
1566 data8 sys_get_mempolicy // 1260
1567 data8 sys_set_mempolicy
1568 data8 sys_mq_open
1569 data8 sys_mq_unlink
1570 data8 sys_mq_timedsend
1571 data8 sys_mq_timedreceive // 1265
1572 data8 sys_mq_notify
1573 data8 sys_mq_getsetattr
1574 data8 sys_ni_syscall // reserved for kexec_load
1575 data8 sys_ni_syscall // reserved for vserver
1576 data8 sys_waitid // 1270
1577 data8 sys_add_key
1578 data8 sys_request_key
1579 data8 sys_keyctl
1580 data8 sys_ni_syscall
1581 data8 sys_ni_syscall // 1275
1582 data8 sys_ni_syscall
1583 data8 sys_ni_syscall
1584 data8 sys_ni_syscall
1585 data8 sys_ni_syscall
1586
1587 .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h
new file mode 100644
index 000000000000..6d4ecec989b5
--- /dev/null
+++ b/arch/ia64/kernel/entry.h
@@ -0,0 +1,82 @@
1#include <linux/config.h>
2
3/*
4 * Preserved registers that are shared between code in ivt.S and
5 * entry.S. Be careful not to step on these!
6 */
7#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */
8#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */
9#define PRED_USER_STACK 3 /* returning to user-stacks? */
10#define PRED_SYSCALL 4 /* inside a system call? */
11#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */
12
13#ifdef __ASSEMBLY__
14# define PASTE2(x,y) x##y
15# define PASTE(x,y) PASTE2(x,y)
16
17# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL)
18# define pKStk PASTE(p,PRED_KERNEL_STACK)
19# define pUStk PASTE(p,PRED_USER_STACK)
20# define pSys PASTE(p,PRED_SYSCALL)
21# define pNonSys PASTE(p,PRED_NON_SYSCALL)
22#endif
23
24#define PT(f) (IA64_PT_REGS_##f##_OFFSET)
25#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET)
26
27#define PT_REGS_SAVES(off) \
28 .unwabi 3, 'i'; \
29 .fframe IA64_PT_REGS_SIZE+16+(off); \
30 .spillsp rp, PT(CR_IIP)+16+(off); \
31 .spillsp ar.pfs, PT(CR_IFS)+16+(off); \
32 .spillsp ar.unat, PT(AR_UNAT)+16+(off); \
33 .spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \
34 .spillsp pr, PT(PR)+16+(off);
35
36#define PT_REGS_UNWIND_INFO(off) \
37 .prologue; \
38 PT_REGS_SAVES(off); \
39 .body
40
41#define SWITCH_STACK_SAVES(off) \
42 .savesp ar.unat,SW(CALLER_UNAT)+16+(off); \
43 .savesp ar.fpsr,SW(AR_FPSR)+16+(off); \
44 .spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \
45 .spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \
46 .spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \
47 .spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \
48 .spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \
49 .spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \
50 .spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \
51 .spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \
52 .spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \
53 .spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \
54 .spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \
55 .spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \
56 .spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \
57 .spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \
58 .spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \
59 .spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \
60 .spillsp @priunat,SW(AR_UNAT)+16+(off); \
61 .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \
62 .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \
63 .spillsp pr,SW(PR)+16+(off))
64
65#define DO_SAVE_SWITCH_STACK \
66 movl r28=1f; \
67 ;; \
68 .fframe IA64_SWITCH_STACK_SIZE; \
69 adds sp=-IA64_SWITCH_STACK_SIZE,sp; \
70 mov.ret.sptk b7=r28,1f; \
71 SWITCH_STACK_SAVES(0); \
72 br.cond.sptk.many save_switch_stack; \
731:
74
75#define DO_LOAD_SWITCH_STACK \
76 movl r28=1f; \
77 ;; \
78 invala; \
79 mov.ret.sptk b7=r28,1f; \
80 br.cond.sptk.many load_switch_stack; \
811: .restore sp; \
82 adds sp=IA64_SWITCH_STACK_SIZE,sp
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S
new file mode 100644
index 000000000000..0d8650f7fce7
--- /dev/null
+++ b/arch/ia64/kernel/fsys.S
@@ -0,0 +1,884 @@
1/*
2 * This file contains the light-weight system call handlers (fsyscall-handlers).
3 *
4 * Copyright (C) 2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 25-Sep-03 davidm Implement fsys_rt_sigprocmask().
8 * 18-Feb-03 louisk Implement fsys_gettimeofday().
9 * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more,
10 * probably broke it along the way... ;-)
11 * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make
12 * it capable of using memory based clocks without falling back to C code.
13 */
14
15#include <asm/asmmacro.h>
16#include <asm/errno.h>
17#include <asm/offsets.h>
18#include <asm/percpu.h>
19#include <asm/thread_info.h>
20#include <asm/sal.h>
21#include <asm/signal.h>
22#include <asm/system.h>
23#include <asm/unistd.h>
24
25#include "entry.h"
26
27/*
28 * See Documentation/ia64/fsys.txt for details on fsyscalls.
29 *
30 * On entry to an fsyscall handler:
31 * r10 = 0 (i.e., defaults to "successful syscall return")
32 * r11 = saved ar.pfs (a user-level value)
33 * r15 = system call number
34 * r16 = "current" task pointer (in normal kernel-mode, this is in r13)
35 * r32-r39 = system call arguments
36 * b6 = return address (a user-level value)
37 * ar.pfs = previous frame-state (a user-level value)
38 * PSR.be = cleared to zero (i.e., little-endian byte order is in effect)
39 * all other registers may contain values passed in from user-mode
40 *
41 * On return from an fsyscall handler:
42 * r11 = saved ar.pfs (as passed into the fsyscall handler)
43 * r15 = system call number (as passed into the fsyscall handler)
44 * r32-r39 = system call arguments (as passed into the fsyscall handler)
45 * b6 = return address (as passed into the fsyscall handler)
46 * ar.pfs = previous frame-state (as passed into the fsyscall handler)
47 */
48
49ENTRY(fsys_ni_syscall)
50 .prologue
51 .altrp b6
52 .body
53 mov r8=ENOSYS
54 mov r10=-1
55 FSYS_RETURN
56END(fsys_ni_syscall)
57
58ENTRY(fsys_getpid)
59 .prologue
60 .altrp b6
61 .body
62 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
63 ;;
64 ld4 r9=[r9]
65 add r8=IA64_TASK_TGID_OFFSET,r16
66 ;;
67 and r9=TIF_ALLWORK_MASK,r9
68 ld4 r8=[r8] // r8 = current->tgid
69 ;;
70 cmp.ne p8,p0=0,r9
71(p8) br.spnt.many fsys_fallback_syscall
72 FSYS_RETURN
73END(fsys_getpid)
74
75ENTRY(fsys_getppid)
76 .prologue
77 .altrp b6
78 .body
79 add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16
80 ;;
81 ld8 r17=[r17] // r17 = current->group_leader
82 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
83 ;;
84
85 ld4 r9=[r9]
86 add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = &current->group_leader->real_parent
87 ;;
88 and r9=TIF_ALLWORK_MASK,r9
89
901: ld8 r18=[r17] // r18 = current->group_leader->real_parent
91 ;;
92 cmp.ne p8,p0=0,r9
93 add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = &current->group_leader->real_parent->tgid
94 ;;
95
96 /*
97 * The .acq is needed to ensure that the read of tgid has returned its data before
98 * we re-check "real_parent".
99 */
100 ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid
101#ifdef CONFIG_SMP
102 /*
103 * Re-read current->group_leader->real_parent.
104 */
105 ld8 r19=[r17] // r19 = current->group_leader->real_parent
106(p8) br.spnt.many fsys_fallback_syscall
107 ;;
108 cmp.ne p6,p0=r18,r19 // did real_parent change?
109 mov r19=0 // i must not leak kernel bits...
110(p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check
111 ;;
112 mov r17=0 // i must not leak kernel bits...
113 mov r18=0 // i must not leak kernel bits...
114#else
115 mov r17=0 // i must not leak kernel bits...
116 mov r18=0 // i must not leak kernel bits...
117 mov r19=0 // i must not leak kernel bits...
118#endif
119 FSYS_RETURN
120END(fsys_getppid)
121
122ENTRY(fsys_set_tid_address)
123 .prologue
124 .altrp b6
125 .body
126 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
127 ;;
128 ld4 r9=[r9]
129 tnat.z p6,p7=r32 // check argument register for being NaT
130 ;;
131 and r9=TIF_ALLWORK_MASK,r9
132 add r8=IA64_TASK_PID_OFFSET,r16
133 add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16
134 ;;
135 ld4 r8=[r8]
136 cmp.ne p8,p0=0,r9
137 mov r17=-1
138 ;;
139(p6) st8 [r18]=r32
140(p7) st8 [r18]=r17
141(p8) br.spnt.many fsys_fallback_syscall
142 ;;
143 mov r17=0 // i must not leak kernel bits...
144 mov r18=0 // i must not leak kernel bits...
145 FSYS_RETURN
146END(fsys_set_tid_address)
147
148/*
149 * Ensure that the time interpolator structure is compatible with the asm code
150 */
151#if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \
152 || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4
153#error fsys_gettimeofday incompatible with changes to struct time_interpolator
154#endif
155#define CLOCK_REALTIME 0
156#define CLOCK_MONOTONIC 1
157#define CLOCK_DIVIDE_BY_1000 0x4000
158#define CLOCK_ADD_MONOTONIC 0x8000
159
160ENTRY(fsys_gettimeofday)
161 .prologue
162 .altrp b6
163 .body
164 mov r31 = r32
165 tnat.nz p6,p0 = r33 // guard against NaT argument
166(p6) br.cond.spnt.few .fail_einval
167 mov r30 = CLOCK_DIVIDE_BY_1000
168 ;;
169.gettime:
170 // Register map
171 // Incoming r31 = pointer to address where to place result
172 // r30 = flags determining how time is processed
173 // r2,r3 = temp r4-r7 preserved
174 // r8 = result nanoseconds
175 // r9 = result seconds
176 // r10 = temporary storage for clock difference
177 // r11 = preserved: saved ar.pfs
178 // r12 = preserved: memory stack
179 // r13 = preserved: thread pointer
180 // r14 = address of mask / mask
181 // r15 = preserved: system call number
182 // r16 = preserved: current task pointer
183 // r17 = wall to monotonic use
184 // r18 = time_interpolator->offset
185 // r19 = address of wall_to_monotonic
186 // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address
187 // r21 = shift factor
188 // r22 = address of time interpolator->last_counter
189 // r23 = address of time_interpolator->last_cycle
190 // r24 = adress of time_interpolator->offset
191 // r25 = last_cycle value
192 // r26 = last_counter value
193 // r27 = pointer to xtime
194 // r28 = sequence number at the beginning of critcal section
195 // r29 = address of seqlock
196 // r30 = time processing flags / memory address
197 // r31 = pointer to result
198 // Predicates
199 // p6,p7 short term use
200 // p8 = timesource ar.itc
201 // p9 = timesource mmio64
202 // p10 = timesource mmio32
203 // p11 = timesource not to be handled by asm code
204 // p12 = memory time source ( = p9 | p10)
205 // p13 = do cmpxchg with time_interpolator_last_cycle
206 // p14 = Divide by 1000
207 // p15 = Add monotonic
208 //
209 // Note that instructions are optimized for McKinley. McKinley can process two
210 // bundles simultaneously and therefore we continuously try to feed the CPU
211 // two bundles and then a stop.
212 tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure
213 mov pr = r30,0xc000 // Set predicates according to function
214 add r2 = TI_FLAGS+IA64_TASK_SIZE,r16
215 movl r20 = time_interpolator
216 ;;
217 ld8 r20 = [r20] // get pointer to time_interpolator structure
218 movl r29 = xtime_lock
219 ld4 r2 = [r2] // process work pending flags
220 movl r27 = xtime
221 ;; // only one bundle here
222 ld8 r21 = [r20] // first quad with control information
223 and r2 = TIF_ALLWORK_MASK,r2
224(p6) br.cond.spnt.few .fail_einval // deferred branch
225 ;;
226 add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20
227 extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc
228 extr r8 = r21,0,16 // time_interpolator->source
229 cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled
230(p6) br.cond.spnt.many fsys_fallback_syscall
231 ;;
232 cmp.eq p8,p12 = 0,r8 // Check for cpu timer
233 cmp.eq p9,p0 = 1,r8 // MMIO64 ?
234 extr r2 = r21,24,8 // time_interpolator->jitter
235 cmp.eq p10,p0 = 2,r8 // MMIO32 ?
236 cmp.ltu p11,p0 = 2,r8 // function or other clock
237(p11) br.cond.spnt.many fsys_fallback_syscall
238 ;;
239 setf.sig f7 = r3 // Setup for scaling of counter
240(p15) movl r19 = wall_to_monotonic
241(p12) ld8 r30 = [r10]
242 cmp.ne p13,p0 = r2,r0 // need jitter compensation?
243 extr r21 = r21,16,8 // shift factor
244 ;;
245.time_redo:
246 .pred.rel.mutex p8,p9,p10
247 ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes
248(p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!!
249 add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20
250(p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues..
251(p10) ld4 r2 = [r30] // readw(ti->address)
252(p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20
253 ;; // could be removed by moving the last add upward
254 ld8 r26 = [r22] // time_interpolator->last_counter
255(p13) ld8 r25 = [r23] // time interpolator->last_cycle
256 add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20
257(p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET
258 ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET
259 add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20
260 ;;
261 ld8 r18 = [r24] // time_interpolator->offset
262 ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec
263(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm)
264 ;;
265 ld8 r14 = [r14] // time_interpolator->mask
266(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared
267 sub r10 = r2,r26 // current_counter - last_counter
268 ;;
269(p6) sub r10 = r25,r26 // time we got was less than last_cycle
270(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg
271 ;;
272 and r10 = r10,r14 // Apply mask
273 ;;
274 setf.sig f8 = r10
275 nop.i 123
276 ;;
277(p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv
278EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time
279 xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter)
280(p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs
281 ;;
282(p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET
283(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo
284 // simulate tbit.nz.or p7,p0 = r28,0
285 and r28 = ~1,r28 // Make sequence even to force retry if odd
286 getf.sig r2 = f8
287 mf
288 add r8 = r8,r18 // Add time interpolator offset
289 ;;
290 ld4 r10 = [r29] // xtime_lock.sequence
291(p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs
292 shr.u r2 = r2,r21
293 ;; // overloaded 3 bundles!
294 // End critical section.
295 add r8 = r8,r2 // Add xtime.nsecs
296 cmp4.ne.or p7,p0 = r28,r10
297(p7) br.cond.dpnt.few .time_redo // sequence number changed ?
298 // Now r8=tv->tv_nsec and r9=tv->tv_sec
299 mov r10 = r0
300 movl r2 = 1000000000
301 add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31
302(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack
303 ;;
304.time_normalize:
305 mov r21 = r8
306 cmp.ge p6,p0 = r8,r2
307(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time
308 ;;
309(p14) setf.sig f8 = r20
310(p6) sub r8 = r8,r2
311(p6) add r9 = 1,r9 // two nops before the branch.
312(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod
313(p6) br.cond.dpnt.few .time_normalize
314 ;;
315 // Divided by 8 though shift. Now divide by 125
316 // The compiler was able to do that with a multiply
317 // and a shift and we do the same
318EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles
319(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it...
320 ;;
321 mov r8 = r0
322(p14) getf.sig r2 = f8
323 ;;
324(p14) shr.u r21 = r2, 4
325 ;;
326EX(.fail_efault, st8 [r31] = r9)
327EX(.fail_efault, st8 [r23] = r21)
328 FSYS_RETURN
329.fail_einval:
330 mov r8 = EINVAL
331 mov r10 = -1
332 FSYS_RETURN
333.fail_efault:
334 mov r8 = EFAULT
335 mov r10 = -1
336 FSYS_RETURN
337END(fsys_gettimeofday)
338
339ENTRY(fsys_clock_gettime)
340 .prologue
341 .altrp b6
342 .body
343 cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32
344 // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC
345(p6) br.spnt.few fsys_fallback_syscall
346 mov r31 = r33
347 shl r30 = r32,15
348 br.many .gettime
349END(fsys_clock_gettime)
350
351/*
352 * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize).
353 */
354#if _NSIG_WORDS != 1
355# error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1.
356#endif
357ENTRY(fsys_rt_sigprocmask)
358 .prologue
359 .altrp b6
360 .body
361
362 add r2=IA64_TASK_BLOCKED_OFFSET,r16
363 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
364 cmp4.ltu p6,p0=SIG_SETMASK,r32
365
366 cmp.ne p15,p0=r0,r34 // oset != NULL?
367 tnat.nz p8,p0=r34
368 add r31=IA64_TASK_SIGHAND_OFFSET,r16
369 ;;
370 ld8 r3=[r2] // read/prefetch current->blocked
371 ld4 r9=[r9]
372 tnat.nz.or p6,p0=r35
373
374 cmp.ne.or p6,p0=_NSIG_WORDS*8,r35
375 tnat.nz.or p6,p0=r32
376(p6) br.spnt.few .fail_einval // fail with EINVAL
377 ;;
378#ifdef CONFIG_SMP
379 ld8 r31=[r31] // r31 <- current->sighand
380#endif
381 and r9=TIF_ALLWORK_MASK,r9
382 tnat.nz.or p8,p0=r33
383 ;;
384 cmp.ne p7,p0=0,r9
385 cmp.eq p6,p0=r0,r33 // set == NULL?
386 add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock
387(p8) br.spnt.few .fail_efault // fail with EFAULT
388(p7) br.spnt.many fsys_fallback_syscall // got pending kernel work...
389(p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask
390
391 /* Argh, we actually have to do some work and _update_ the signal mask: */
392
393EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set
394EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set
395 mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1))
396 ;;
397
398 rsm psr.i // mask interrupt delivery
399 mov ar.ccv=0
400 andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP
401
402#ifdef CONFIG_SMP
403 mov r17=1
404 ;;
405 cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock
406 mov r8=EINVAL // default to EINVAL
407 ;;
408 ld8 r3=[r2] // re-read current->blocked now that we hold the lock
409 cmp4.ne p6,p0=r18,r0
410(p6) br.cond.spnt.many .lock_contention
411 ;;
412#else
413 ld8 r3=[r2] // re-read current->blocked now that we hold the lock
414 mov r8=EINVAL // default to EINVAL
415#endif
416 add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16
417 add r19=IA64_TASK_SIGNAL_OFFSET,r16
418 cmp4.eq p6,p0=SIG_BLOCK,r32
419 ;;
420 ld8 r19=[r19] // r19 <- current->signal
421 cmp4.eq p7,p0=SIG_UNBLOCK,r32
422 cmp4.eq p8,p0=SIG_SETMASK,r32
423 ;;
424 ld8 r18=[r18] // r18 <- current->pending.signal
425 .pred.rel.mutex p6,p7,p8
426(p6) or r14=r3,r14 // SIG_BLOCK
427(p7) andcm r14=r3,r14 // SIG_UNBLOCK
428
429(p8) mov r14=r14 // SIG_SETMASK
430(p6) mov r8=0 // clear error code
431 // recalc_sigpending()
432 add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19
433
434 add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19
435 ;;
436 ld4 r17=[r17] // r17 <- current->signal->group_stop_count
437(p7) mov r8=0 // clear error code
438
439 ld8 r19=[r19] // r19 <- current->signal->shared_pending
440 ;;
441 cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)?
442(p8) mov r8=0 // clear error code
443
444 or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending
445 ;;
446 // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked:
447 andcm r18=r18,r14
448 add r9=TI_FLAGS+IA64_TASK_SIZE,r16
449 ;;
450
451(p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending
452 mov r19=0 // i must not leak kernel bits...
453(p6) br.cond.dpnt.many .sig_pending
454 ;;
455
4561: ld4 r17=[r9] // r17 <- current->thread_info->flags
457 ;;
458 mov ar.ccv=r17
459 and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING)
460 ;;
461
462 st8 [r2]=r14 // update current->blocked with new mask
463 cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18
464 ;;
465 cmp.ne p6,p0=r17,r14 // update failed?
466(p6) br.cond.spnt.few 1b // yes -> retry
467
468#ifdef CONFIG_SMP
469 st4.rel [r31]=r0 // release the lock
470#endif
471 ssm psr.i
472 ;;
473
474 srlz.d // ensure psr.i is set again
475 mov r18=0 // i must not leak kernel bits...
476
477.store_mask:
478EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset
479EX(.fail_efault, (p15) st8 [r34]=r3)
480 mov r2=0 // i must not leak kernel bits...
481 mov r3=0 // i must not leak kernel bits...
482 mov r8=0 // return 0
483 mov r9=0 // i must not leak kernel bits...
484 mov r14=0 // i must not leak kernel bits...
485 mov r17=0 // i must not leak kernel bits...
486 mov r31=0 // i must not leak kernel bits...
487 FSYS_RETURN
488
489.sig_pending:
490#ifdef CONFIG_SMP
491 st4.rel [r31]=r0 // release the lock
492#endif
493 ssm psr.i
494 ;;
495 srlz.d
496 br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall
497
498#ifdef CONFIG_SMP
499.lock_contention:
500 /* Rather than spinning here, fall back on doing a heavy-weight syscall. */
501 ssm psr.i
502 ;;
503 srlz.d
504 br.sptk.many fsys_fallback_syscall
505#endif
506END(fsys_rt_sigprocmask)
507
508ENTRY(fsys_fallback_syscall)
509 .prologue
510 .altrp b6
511 .body
512 /*
513 * We only get here from light-weight syscall handlers. Thus, we already
514 * know that r15 contains a valid syscall number. No need to re-check.
515 */
516 adds r17=-1024,r15
517 movl r14=sys_call_table
518 ;;
519 rsm psr.i
520 shladd r18=r17,3,r14
521 ;;
522 ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point
523 mov r29=psr // read psr (12 cyc load latency)
524 mov r27=ar.rsc
525 mov r21=ar.fpsr
526 mov r26=ar.pfs
527END(fsys_fallback_syscall)
528 /* FALL THROUGH */
529GLOBAL_ENTRY(fsys_bubble_down)
530 .prologue
531 .altrp b6
532 .body
533 /*
534 * We get here for syscalls that don't have a lightweight handler. For those, we
535 * need to bubble down into the kernel and that requires setting up a minimal
536 * pt_regs structure, and initializing the CPU state more or less as if an
537 * interruption had occurred. To make syscall-restarts work, we setup pt_regs
538 * such that cr_iip points to the second instruction in syscall_via_break.
539 * Decrementing the IP hence will restart the syscall via break and not
540 * decrementing IP will return us to the caller, as usual. Note that we preserve
541 * the value of psr.pp rather than initializing it from dcr.pp. This makes it
542 * possible to distinguish fsyscall execution from other privileged execution.
543 *
544 * On entry:
545 * - normal fsyscall handler register usage, except that we also have:
546 * - r18: address of syscall entry point
547 * - r21: ar.fpsr
548 * - r26: ar.pfs
549 * - r27: ar.rsc
550 * - r29: psr
551 */
552# define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \
553 | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \
554 | IA64_PSR_IC)
555 /*
556 * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have
557 * to synthesize.
558 */
559# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \
560 | IA64_PSR_BN | IA64_PSR_I)
561
562 invala
563 movl r8=PSR_ONE_BITS
564
565 mov r25=ar.unat // save ar.unat (5 cyc)
566 movl r9=PSR_PRESERVED_BITS
567
568 mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0
569 movl r28=__kernel_syscall_via_break
570 ;;
571 mov r23=ar.bspstore // save ar.bspstore (12 cyc)
572 mov r31=pr // save pr (2 cyc)
573 mov r20=r1 // save caller's gp in r20
574 ;;
575 mov r2=r16 // copy current task addr to addl-addressable register
576 and r9=r9,r29
577 mov r19=b6 // save b6 (2 cyc)
578 ;;
579 mov psr.l=r9 // slam the door (17 cyc to srlz.i)
580 or r29=r8,r29 // construct cr.ipsr value to save
581 addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS
582 ;;
583 // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks
584 // we may be reading ar.itc after writing to psr.l. Avoid that message with
585 // this directive:
586 dv_serialize_data
587 mov.m r24=ar.rnat // read ar.rnat (5 cyc lat)
588 lfetch.fault.excl.nt1 [r22]
589 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2
590
591 // ensure previous insn group is issued before we stall for srlz.i:
592 ;;
593 srlz.i // ensure new psr.l has been established
594 /////////////////////////////////////////////////////////////////////////////
595 ////////// from this point on, execution is not interruptible anymore
596 /////////////////////////////////////////////////////////////////////////////
597 addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack
598 cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1
599 ;;
600 st1 [r16]=r0 // clear current->thread.on_ustack flag
601 mov ar.bspstore=r22 // switch to kernel RBS
602 mov b6=r18 // copy syscall entry-point to b6 (7 cyc)
603 add r3=TI_FLAGS+IA64_TASK_SIZE,r2
604 ;;
605 ld4 r3=[r3] // r2 = current_thread_info()->flags
606 mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc)
607 mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0
608 br.call.sptk.many b7=ia64_syscall_setup
609 ;;
610 ssm psr.i
611 movl r2=ia64_ret_from_syscall
612 ;;
613 mov rp=r2 // set the real return addr
614 tbit.z p8,p0=r3,TIF_SYSCALL_TRACE
615 ;;
616(p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8
617(p8) br.call.sptk.many b6=b6 // ignore this return addr
618 br.cond.sptk ia64_trace_syscall
619END(fsys_bubble_down)
620
621 .rodata
622 .align 8
623 .globl fsyscall_table
624
625 data8 fsys_bubble_down
626fsyscall_table:
627 data8 fsys_ni_syscall
628 data8 0 // exit // 1025
629 data8 0 // read
630 data8 0 // write
631 data8 0 // open
632 data8 0 // close
633 data8 0 // creat // 1030
634 data8 0 // link
635 data8 0 // unlink
636 data8 0 // execve
637 data8 0 // chdir
638 data8 0 // fchdir // 1035
639 data8 0 // utimes
640 data8 0 // mknod
641 data8 0 // chmod
642 data8 0 // chown
643 data8 0 // lseek // 1040
644 data8 fsys_getpid // getpid
645 data8 fsys_getppid // getppid
646 data8 0 // mount
647 data8 0 // umount
648 data8 0 // setuid // 1045
649 data8 0 // getuid
650 data8 0 // geteuid
651 data8 0 // ptrace
652 data8 0 // access
653 data8 0 // sync // 1050
654 data8 0 // fsync
655 data8 0 // fdatasync
656 data8 0 // kill
657 data8 0 // rename
658 data8 0 // mkdir // 1055
659 data8 0 // rmdir
660 data8 0 // dup
661 data8 0 // pipe
662 data8 0 // times
663 data8 0 // brk // 1060
664 data8 0 // setgid
665 data8 0 // getgid
666 data8 0 // getegid
667 data8 0 // acct
668 data8 0 // ioctl // 1065
669 data8 0 // fcntl
670 data8 0 // umask
671 data8 0 // chroot
672 data8 0 // ustat
673 data8 0 // dup2 // 1070
674 data8 0 // setreuid
675 data8 0 // setregid
676 data8 0 // getresuid
677 data8 0 // setresuid
678 data8 0 // getresgid // 1075
679 data8 0 // setresgid
680 data8 0 // getgroups
681 data8 0 // setgroups
682 data8 0 // getpgid
683 data8 0 // setpgid // 1080
684 data8 0 // setsid
685 data8 0 // getsid
686 data8 0 // sethostname
687 data8 0 // setrlimit
688 data8 0 // getrlimit // 1085
689 data8 0 // getrusage
690 data8 fsys_gettimeofday // gettimeofday
691 data8 0 // settimeofday
692 data8 0 // select
693 data8 0 // poll // 1090
694 data8 0 // symlink
695 data8 0 // readlink
696 data8 0 // uselib
697 data8 0 // swapon
698 data8 0 // swapoff // 1095
699 data8 0 // reboot
700 data8 0 // truncate
701 data8 0 // ftruncate
702 data8 0 // fchmod
703 data8 0 // fchown // 1100
704 data8 0 // getpriority
705 data8 0 // setpriority
706 data8 0 // statfs
707 data8 0 // fstatfs
708 data8 0 // gettid // 1105
709 data8 0 // semget
710 data8 0 // semop
711 data8 0 // semctl
712 data8 0 // msgget
713 data8 0 // msgsnd // 1110
714 data8 0 // msgrcv
715 data8 0 // msgctl
716 data8 0 // shmget
717 data8 0 // shmat
718 data8 0 // shmdt // 1115
719 data8 0 // shmctl
720 data8 0 // syslog
721 data8 0 // setitimer
722 data8 0 // getitimer
723 data8 0 // 1120
724 data8 0
725 data8 0
726 data8 0 // vhangup
727 data8 0 // lchown
728 data8 0 // remap_file_pages // 1125
729 data8 0 // wait4
730 data8 0 // sysinfo
731 data8 0 // clone
732 data8 0 // setdomainname
733 data8 0 // newuname // 1130
734 data8 0 // adjtimex
735 data8 0
736 data8 0 // init_module
737 data8 0 // delete_module
738 data8 0 // 1135
739 data8 0
740 data8 0 // quotactl
741 data8 0 // bdflush
742 data8 0 // sysfs
743 data8 0 // personality // 1140
744 data8 0 // afs_syscall
745 data8 0 // setfsuid
746 data8 0 // setfsgid
747 data8 0 // getdents
748 data8 0 // flock // 1145
749 data8 0 // readv
750 data8 0 // writev
751 data8 0 // pread64
752 data8 0 // pwrite64
753 data8 0 // sysctl // 1150
754 data8 0 // mmap
755 data8 0 // munmap
756 data8 0 // mlock
757 data8 0 // mlockall
758 data8 0 // mprotect // 1155
759 data8 0 // mremap
760 data8 0 // msync
761 data8 0 // munlock
762 data8 0 // munlockall
763 data8 0 // sched_getparam // 1160
764 data8 0 // sched_setparam
765 data8 0 // sched_getscheduler
766 data8 0 // sched_setscheduler
767 data8 0 // sched_yield
768 data8 0 // sched_get_priority_max // 1165
769 data8 0 // sched_get_priority_min
770 data8 0 // sched_rr_get_interval
771 data8 0 // nanosleep
772 data8 0 // nfsservctl
773 data8 0 // prctl // 1170
774 data8 0 // getpagesize
775 data8 0 // mmap2
776 data8 0 // pciconfig_read
777 data8 0 // pciconfig_write
778 data8 0 // perfmonctl // 1175
779 data8 0 // sigaltstack
780 data8 0 // rt_sigaction
781 data8 0 // rt_sigpending
782 data8 fsys_rt_sigprocmask // rt_sigprocmask
783 data8 0 // rt_sigqueueinfo // 1180
784 data8 0 // rt_sigreturn
785 data8 0 // rt_sigsuspend
786 data8 0 // rt_sigtimedwait
787 data8 0 // getcwd
788 data8 0 // capget // 1185
789 data8 0 // capset
790 data8 0 // sendfile
791 data8 0
792 data8 0
793 data8 0 // socket // 1190
794 data8 0 // bind
795 data8 0 // connect
796 data8 0 // listen
797 data8 0 // accept
798 data8 0 // getsockname // 1195
799 data8 0 // getpeername
800 data8 0 // socketpair
801 data8 0 // send
802 data8 0 // sendto
803 data8 0 // recv // 1200
804 data8 0 // recvfrom
805 data8 0 // shutdown
806 data8 0 // setsockopt
807 data8 0 // getsockopt
808 data8 0 // sendmsg // 1205
809 data8 0 // recvmsg
810 data8 0 // pivot_root
811 data8 0 // mincore
812 data8 0 // madvise
813 data8 0 // newstat // 1210
814 data8 0 // newlstat
815 data8 0 // newfstat
816 data8 0 // clone2
817 data8 0 // getdents64
818 data8 0 // getunwind // 1215
819 data8 0 // readahead
820 data8 0 // setxattr
821 data8 0 // lsetxattr
822 data8 0 // fsetxattr
823 data8 0 // getxattr // 1220
824 data8 0 // lgetxattr
825 data8 0 // fgetxattr
826 data8 0 // listxattr
827 data8 0 // llistxattr
828 data8 0 // flistxattr // 1225
829 data8 0 // removexattr
830 data8 0 // lremovexattr
831 data8 0 // fremovexattr
832 data8 0 // tkill
833 data8 0 // futex // 1230
834 data8 0 // sched_setaffinity
835 data8 0 // sched_getaffinity
836 data8 fsys_set_tid_address // set_tid_address
837 data8 0 // fadvise64_64
838 data8 0 // tgkill // 1235
839 data8 0 // exit_group
840 data8 0 // lookup_dcookie
841 data8 0 // io_setup
842 data8 0 // io_destroy
843 data8 0 // io_getevents // 1240
844 data8 0 // io_submit
845 data8 0 // io_cancel
846 data8 0 // epoll_create
847 data8 0 // epoll_ctl
848 data8 0 // epoll_wait // 1245
849 data8 0 // restart_syscall
850 data8 0 // semtimedop
851 data8 0 // timer_create
852 data8 0 // timer_settime
853 data8 0 // timer_gettime // 1250
854 data8 0 // timer_getoverrun
855 data8 0 // timer_delete
856 data8 0 // clock_settime
857 data8 fsys_clock_gettime // clock_gettime
858 data8 0 // clock_getres // 1255
859 data8 0 // clock_nanosleep
860 data8 0 // fstatfs64
861 data8 0 // statfs64
862 data8 0
863 data8 0 // 1260
864 data8 0
865 data8 0 // mq_open
866 data8 0 // mq_unlink
867 data8 0 // mq_timedsend
868 data8 0 // mq_timedreceive // 1265
869 data8 0 // mq_notify
870 data8 0 // mq_getsetattr
871 data8 0 // kexec_load
872 data8 0
873 data8 0 // 1270
874 data8 0
875 data8 0
876 data8 0
877 data8 0
878 data8 0 // 1275
879 data8 0
880 data8 0
881 data8 0
882 data8 0
883
884 .org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls
diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S
new file mode 100644
index 000000000000..258c0a3238fb
--- /dev/null
+++ b/arch/ia64/kernel/gate-data.S
@@ -0,0 +1,3 @@
1 .section .data.gate, "aw"
2
3 .incbin "arch/ia64/kernel/gate.so"
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S
new file mode 100644
index 000000000000..facf75acdc85
--- /dev/null
+++ b/arch/ia64/kernel/gate.S
@@ -0,0 +1,372 @@
1/*
2 * This file contains the code that gets mapped at the upper end of each task's text
3 * region. For now, it contains the signal trampoline code only.
4 *
5 * Copyright (C) 1999-2003 Hewlett-Packard Co
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 */
8
9#include <linux/config.h>
10
11#include <asm/asmmacro.h>
12#include <asm/errno.h>
13#include <asm/offsets.h>
14#include <asm/sigcontext.h>
15#include <asm/system.h>
16#include <asm/unistd.h>
17
18/*
19 * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation,
20 * complications with the linker (which likes to create PLT stubs for branches
21 * to targets outside the shared object) and to avoid multi-phase kernel builds, we
22 * simply create minimalistic "patch lists" in special ELF sections.
23 */
24 .section ".data.patch.fsyscall_table", "a"
25 .previous
26#define LOAD_FSYSCALL_TABLE(reg) \
27[1:] movl reg=0; \
28 .xdata4 ".data.patch.fsyscall_table", 1b-.
29
30 .section ".data.patch.brl_fsys_bubble_down", "a"
31 .previous
32#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \
33[1:](pr)brl.cond.sptk 0; \
34 .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-.
35
36GLOBAL_ENTRY(__kernel_syscall_via_break)
37 .prologue
38 .altrp b6
39 .body
40 /*
41 * Note: for (fast) syscall restart to work, the break instruction must be
42 * the first one in the bundle addressed by syscall_via_break.
43 */
44{ .mib
45 break 0x100000
46 nop.i 0
47 br.ret.sptk.many b6
48}
49END(__kernel_syscall_via_break)
50
51/*
52 * On entry:
53 * r11 = saved ar.pfs
54 * r15 = system call #
55 * b0 = saved return address
56 * b6 = return address
57 * On exit:
58 * r11 = saved ar.pfs
59 * r15 = system call #
60 * b0 = saved return address
61 * all other "scratch" registers: undefined
62 * all "preserved" registers: same as on entry
63 */
64
65GLOBAL_ENTRY(__kernel_syscall_via_epc)
66 .prologue
67 .altrp b6
68 .body
69{
70 /*
71 * Note: the kernel cannot assume that the first two instructions in this
72 * bundle get executed. The remaining code must be safe even if
73 * they do not get executed.
74 */
75 adds r17=-1024,r15
76 mov r10=0 // default to successful syscall execution
77 epc
78}
79 ;;
80 rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be"
81 LOAD_FSYSCALL_TABLE(r14)
82
83 mov r16=IA64_KR(CURRENT) // 12 cycle read latency
84 tnat.nz p10,p9=r15
85 mov r19=NR_syscalls-1
86 ;;
87 shladd r18=r17,3,r14
88
89 srlz.d
90 cmp.ne p8,p0=r0,r0 // p8 <- FALSE
91 /* Note: if r17 is a NaT, p6 will be set to zero. */
92 cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)?
93 ;;
94(p6) ld8 r18=[r18]
95 mov r21=ar.fpsr
96 add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry
97 ;;
98(p6) mov b7=r18
99(p6) tbit.z p8,p0=r18,0
100(p8) br.dptk.many b7
101
102(p6) rsm psr.i
103 mov r27=ar.rsc
104 mov r26=ar.pfs
105 ;;
106 mov r29=psr // read psr (12 cyc load latency)
107/*
108 * brl.cond doesn't work as intended because the linker would convert this branch
109 * into a branch to a PLT. Perhaps there will be a way to avoid this with some
110 * future version of the linker. In the meantime, we just use an indirect branch
111 * instead.
112 */
113#ifdef CONFIG_ITANIUM
114(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down
115 ;;
116(p6) mov b7=r14
117(p6) br.sptk.many b7
118#else
119 BRL_COND_FSYS_BUBBLE_DOWN(p6)
120#endif
121
122 mov r10=-1
123(p10) mov r8=EINVAL
124(p9) mov r8=ENOSYS
125 FSYS_RETURN
126END(__kernel_syscall_via_epc)
127
128# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET)
129# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET)
130# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET)
131# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET)
132# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET)
133
134# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET
135# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET
136# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET
137# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET
138# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET
139# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET
140# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET
141# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET
142# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET
143# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET
144# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET
145# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET
146# define base0 r2
147# define base1 r3
148 /*
149 * When we get here, the memory stack looks like this:
150 *
151 * +===============================+
152 * | |
153 * // struct sigframe //
154 * | |
155 * +-------------------------------+ <-- sp+16
156 * | 16 byte of scratch |
157 * | space |
158 * +-------------------------------+ <-- sp
159 *
160 * The register stack looks _exactly_ the way it looked at the time the signal
161 * occurred. In other words, we're treading on a potential mine-field: each
162 * incoming general register may be a NaT value (including sp, in which case the
163 * process ends up dying with a SIGSEGV).
164 *
165 * The first thing need to do is a cover to get the registers onto the backing
166 * store. Once that is done, we invoke the signal handler which may modify some
167 * of the machine state. After returning from the signal handler, we return
168 * control to the previous context by executing a sigreturn system call. A signal
169 * handler may call the rt_sigreturn() function to directly return to a given
170 * sigcontext. However, the user-level sigreturn() needs to do much more than
171 * calling the rt_sigreturn() system call as it needs to unwind the stack to
172 * restore preserved registers that may have been saved on the signal handler's
173 * call stack.
174 */
175
176#define SIGTRAMP_SAVES \
177 .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \
178 .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \
179 .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \
180 .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \
181 .savesp pr, PR_OFF+SIGCONTEXT_OFF; \
182 .savesp rp, RP_OFF+SIGCONTEXT_OFF; \
183 .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \
184 .vframesp SP_OFF+SIGCONTEXT_OFF
185
186GLOBAL_ENTRY(__kernel_sigtramp)
187 // describe the state that is active when we get here:
188 .prologue
189 SIGTRAMP_SAVES
190 .body
191
192 .label_state 1
193
194 adds base0=SIGHANDLER_OFF,sp
195 adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp
196 br.call.sptk.many rp=1f
1971:
198 ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel
199 ld8 r15=[base1] // get address of new RBS base (or NULL)
200 cover // push args in interrupted frame onto backing store
201 ;;
202 cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel)
203 mov.m r9=ar.bsp // fetch ar.bsp
204 .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
205(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20)
206back_from_setup_rbs:
207 alloc r8=ar.pfs,0,0,3,0
208 ld8 out0=[base0],16 // load arg0 (signum)
209 adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1
210 ;;
211 ld8 out1=[base1] // load arg1 (siginfop)
212 ld8 r10=[r17],8 // get signal handler entry point
213 ;;
214 ld8 out2=[base0] // load arg2 (sigcontextp)
215 ld8 gp=[r17] // get signal handler's global pointer
216 adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
217 ;;
218 .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF
219 st8 [base0]=r9 // save sc_ar_bsp
220 adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
221 adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
222 ;;
223 stf.spill [base0]=f6,32
224 stf.spill [base1]=f7,32
225 ;;
226 stf.spill [base0]=f8,32
227 stf.spill [base1]=f9,32
228 mov b6=r10
229 ;;
230 stf.spill [base0]=f10,32
231 stf.spill [base1]=f11,32
232 ;;
233 stf.spill [base0]=f12,32
234 stf.spill [base1]=f13,32
235 ;;
236 stf.spill [base0]=f14,32
237 stf.spill [base1]=f15,32
238 br.call.sptk.many rp=b6 // call the signal handler
239.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp
240 ;;
241 ld8 r15=[base0] // fetch sc_ar_bsp
242 mov r14=ar.bsp
243 ;;
244 cmp.ne p1,p0=r14,r15 // do we need to restore the rbs?
245(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7)
246 ;;
247back_from_restore_rbs:
248 adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp
249 adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp
250 ;;
251 ldf.fill f6=[base0],32
252 ldf.fill f7=[base1],32
253 ;;
254 ldf.fill f8=[base0],32
255 ldf.fill f9=[base1],32
256 ;;
257 ldf.fill f10=[base0],32
258 ldf.fill f11=[base1],32
259 ;;
260 ldf.fill f12=[base0],32
261 ldf.fill f13=[base1],32
262 ;;
263 ldf.fill f14=[base0],32
264 ldf.fill f15=[base1],32
265 mov r15=__NR_rt_sigreturn
266 .restore sp // pop .prologue
267 break __BREAK_SYSCALL
268
269 .prologue
270 SIGTRAMP_SAVES
271setup_rbs:
272 mov ar.rsc=0 // put RSE into enforced lazy mode
273 ;;
274 .save ar.rnat, r19
275 mov r19=ar.rnat // save RNaT before switching backing store area
276 adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp
277
278 mov r18=ar.bspstore
279 mov ar.bspstore=r15 // switch over to new register backing store area
280 ;;
281
282 .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
283 st8 [r14]=r19 // save sc_ar_rnat
284 .body
285 mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16
286 adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp
287 ;;
288 invala
289 sub r15=r16,r15
290 extr.u r20=r18,3,6
291 ;;
292 mov ar.rsc=0xf // set RSE into eager mode, pl 3
293 cmp.eq p8,p0=63,r20
294 shl r15=r15,16
295 ;;
296 st8 [r14]=r15 // save sc_loadrs
297(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now
298 .restore sp // pop .prologue
299 br.cond.sptk back_from_setup_rbs
300
301 .prologue
302 SIGTRAMP_SAVES
303 .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF
304 .body
305restore_rbs:
306 // On input:
307 // r14 = bsp1 (bsp at the time of return from signal handler)
308 // r15 = bsp0 (bsp at the time the signal occurred)
309 //
310 // Here, we need to calculate bspstore0, the value that ar.bspstore needs
311 // to be set to, based on bsp0 and the size of the dirty partition on
312 // the alternate stack (sc_loadrs >> 16). This can be done with the
313 // following algorithm:
314 //
315 // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1));
316 //
317 // This is what the code below does.
318 //
319 alloc r2=ar.pfs,0,0,0,0 // alloc null frame
320 adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp
321 adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp
322 ;;
323 ld8 r17=[r16]
324 ld8 r16=[r18] // get new rnat
325 extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0)
326 ;;
327 mov ar.rsc=r17 // put RSE into enforced lazy mode
328 shr.u r17=r17,16
329 ;;
330 sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16)
331 shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19)
332 ;;
333 loadrs // restore dirty partition
334 extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1)
335 ;;
336 add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19)
337 ;;
338 shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40
339 ;;
340 sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1)
341 movl r17=0x8208208208208209
342 ;;
343 add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1)
344 setf.sig f7=r17
345 cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)?
346 ;;
347(p7) adds r18=-62,r18 // delta -= 62
348 ;;
349 setf.sig f6=r18
350 ;;
351 xmpy.h f6=f6,f7
352 ;;
353 getf.sig r17=f6
354 ;;
355 add r17=r17,r18
356 shr r18=r18,63
357 ;;
358 shr r17=r17,5
359 ;;
360 sub r17=r17,r18 // r17 = delta/63
361 ;;
362 add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1)
363 ;;
364 shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1))
365 ;;
366 mov ar.bspstore=r15 // switch back to old register backing store area
367 ;;
368 mov ar.rnat=r16 // restore RNaT
369 mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc)
370 // invala not necessary as that will happen when returning to user-mode
371 br.cond.sptk back_from_restore_rbs
372END(__kernel_sigtramp)
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S
new file mode 100644
index 000000000000..e1e4aba9ecd0
--- /dev/null
+++ b/arch/ia64/kernel/gate.lds.S
@@ -0,0 +1,95 @@
1/*
2 * Linker script for gate DSO. The gate pages are an ELF shared object prelinked to its
3 * virtual address, with only one read-only segment and one execute-only segment (both fit
4 * in one page). This script controls its layout.
5 */
6
7#include <linux/config.h>
8
9#include <asm/system.h>
10
11SECTIONS
12{
13 . = GATE_ADDR + SIZEOF_HEADERS;
14
15 .hash : { *(.hash) } :readable
16 .dynsym : { *(.dynsym) }
17 .dynstr : { *(.dynstr) }
18 .gnu.version : { *(.gnu.version) }
19 .gnu.version_d : { *(.gnu.version_d) }
20 .gnu.version_r : { *(.gnu.version_r) }
21 .dynamic : { *(.dynamic) } :readable :dynamic
22
23 /*
24 * This linker script is used both with -r and with -shared. For the layouts to match,
25 * we need to skip more than enough space for the dynamic symbol table et al. If this
26 * amount is insufficient, ld -shared will barf. Just increase it here.
27 */
28 . = GATE_ADDR + 0x500;
29
30 .data.patch : {
31 __start_gate_mckinley_e9_patchlist = .;
32 *(.data.patch.mckinley_e9)
33 __end_gate_mckinley_e9_patchlist = .;
34
35 __start_gate_vtop_patchlist = .;
36 *(.data.patch.vtop)
37 __end_gate_vtop_patchlist = .;
38
39 __start_gate_fsyscall_patchlist = .;
40 *(.data.patch.fsyscall_table)
41 __end_gate_fsyscall_patchlist = .;
42
43 __start_gate_brl_fsys_bubble_down_patchlist = .;
44 *(.data.patch.brl_fsys_bubble_down)
45 __end_gate_brl_fsys_bubble_down_patchlist = .;
46 } :readable
47 .IA_64.unwind_info : { *(.IA_64.unwind_info*) }
48 .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind
49#ifdef HAVE_BUGGY_SEGREL
50 .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable
51#else
52 . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1));
53 .text : { *(.text) *(.text.*) } :epc
54#endif
55
56 /DISCARD/ : {
57 *(.got.plt) *(.got)
58 *(.data .data.* .gnu.linkonce.d.*)
59 *(.dynbss)
60 *(.bss .bss.* .gnu.linkonce.b.*)
61 *(__ex_table)
62 }
63}
64
65/*
66 * We must supply the ELF program headers explicitly to get just one
67 * PT_LOAD segment, and set the flags explicitly to make segments read-only.
68 */
69PHDRS
70{
71 readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */
72#ifndef HAVE_BUGGY_SEGREL
73 epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */
74#endif
75 dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
76 unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */
77}
78
79/*
80 * This controls what symbols we export from the DSO.
81 */
82VERSION
83{
84 LINUX_2.5 {
85 global:
86 __kernel_syscall_via_break;
87 __kernel_syscall_via_epc;
88 __kernel_sigtramp;
89
90 local: *;
91 };
92}
93
94/* The ELF entry point can be used to set the AT_SYSINFO value. */
95ENTRY(__kernel_syscall_via_epc)
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S
new file mode 100644
index 000000000000..105c7fec8c6d
--- /dev/null
+++ b/arch/ia64/kernel/head.S
@@ -0,0 +1,996 @@
1/*
2 * Here is where the ball gets rolling as far as the kernel is concerned.
3 * When control is transferred to _start, the bootload has already
4 * loaded us to the correct address. All that's left to do here is
5 * to set up the kernel's global pointer and jump to the kernel
6 * entry point.
7 *
8 * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 * Stephane Eranian <eranian@hpl.hp.com>
11 * Copyright (C) 1999 VA Linux Systems
12 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
13 * Copyright (C) 1999 Intel Corp.
14 * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com>
15 * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com>
16 * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com>
17 * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2.
18 */
19
20#include <linux/config.h>
21
22#include <asm/asmmacro.h>
23#include <asm/fpu.h>
24#include <asm/kregs.h>
25#include <asm/mmu_context.h>
26#include <asm/offsets.h>
27#include <asm/pal.h>
28#include <asm/pgtable.h>
29#include <asm/processor.h>
30#include <asm/ptrace.h>
31#include <asm/system.h>
32
33 .section __special_page_section,"ax"
34
35 .global empty_zero_page
36empty_zero_page:
37 .skip PAGE_SIZE
38
39 .global swapper_pg_dir
40swapper_pg_dir:
41 .skip PAGE_SIZE
42
43 .rodata
44halt_msg:
45 stringz "Halting kernel\n"
46
47 .text
48
49 .global start_ap
50
51 /*
52 * Start the kernel. When the bootloader passes control to _start(), r28
53 * points to the address of the boot parameter area. Execution reaches
54 * here in physical mode.
55 */
56GLOBAL_ENTRY(_start)
57start_ap:
58 .prologue
59 .save rp, r0 // terminate unwind chain with a NULL rp
60 .body
61
62 rsm psr.i | psr.ic
63 ;;
64 srlz.i
65 ;;
66 /*
67 * Initialize kernel region registers:
68 * rr[0]: VHPT enabled, page size = PAGE_SHIFT
69 * rr[1]: VHPT enabled, page size = PAGE_SHIFT
70 * rr[2]: VHPT enabled, page size = PAGE_SHIFT
71 * rr[3]: VHPT enabled, page size = PAGE_SHIFT
72 * rr[4]: VHPT enabled, page size = PAGE_SHIFT
73 * rr[5]: VHPT enabled, page size = PAGE_SHIFT
74 * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT
75 * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT
76 * We initialize all of them to prevent inadvertently assuming
77 * something about the state of address translation early in boot.
78 */
79 mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
80 movl r7=(0<<61)
81 mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
82 movl r9=(1<<61)
83 mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
84 movl r11=(2<<61)
85 mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
86 movl r13=(3<<61)
87 mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
88 movl r15=(4<<61)
89 mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1)
90 movl r17=(5<<61)
91 mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
92 movl r19=(6<<61)
93 mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2))
94 movl r21=(7<<61)
95 ;;
96 mov rr[r7]=r6
97 mov rr[r9]=r8
98 mov rr[r11]=r10
99 mov rr[r13]=r12
100 mov rr[r15]=r14
101 mov rr[r17]=r16
102 mov rr[r19]=r18
103 mov rr[r21]=r20
104 ;;
105 /*
106 * Now pin mappings into the TLB for kernel text and data
107 */
108 mov r18=KERNEL_TR_PAGE_SHIFT<<2
109 movl r17=KERNEL_START
110 ;;
111 mov cr.itir=r18
112 mov cr.ifa=r17
113 mov r16=IA64_TR_KERNEL
114 mov r3=ip
115 movl r18=PAGE_KERNEL
116 ;;
117 dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT
118 ;;
119 or r18=r2,r18
120 ;;
121 srlz.i
122 ;;
123 itr.i itr[r16]=r18
124 ;;
125 itr.d dtr[r16]=r18
126 ;;
127 srlz.i
128
129 /*
130 * Switch into virtual mode:
131 */
132 movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
133 |IA64_PSR_DI)
134 ;;
135 mov cr.ipsr=r16
136 movl r17=1f
137 ;;
138 mov cr.iip=r17
139 mov cr.ifs=r0
140 ;;
141 rfi
142 ;;
1431: // now we are in virtual mode
144
145 // set IVT entry point---can't access I/O ports without it
146 movl r3=ia64_ivt
147 ;;
148 mov cr.iva=r3
149 movl r2=FPSR_DEFAULT
150 ;;
151 srlz.i
152 movl gp=__gp
153
154 mov ar.fpsr=r2
155 ;;
156
157#define isAP p2 // are we an Application Processor?
158#define isBP p3 // are we the Bootstrap Processor?
159
160#ifdef CONFIG_SMP
161 /*
162 * Find the init_task for the currently booting CPU. At poweron, and in
163 * UP mode, task_for_booting_cpu is NULL.
164 */
165 movl r3=task_for_booting_cpu
166 ;;
167 ld8 r3=[r3]
168 movl r2=init_task
169 ;;
170 cmp.eq isBP,isAP=r3,r0
171 ;;
172(isAP) mov r2=r3
173#else
174 movl r2=init_task
175 cmp.eq isBP,isAP=r0,r0
176#endif
177 ;;
178 tpa r3=r2 // r3 == phys addr of task struct
179 mov r16=-1
180(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it
181
182 // load mapping for stack (virtaddr in r2, physaddr in r3)
183 rsm psr.ic
184 movl r17=PAGE_KERNEL
185 ;;
186 srlz.d
187 dep r18=0,r3,0,12
188 ;;
189 or r18=r17,r18
190 dep r2=-1,r3,61,3 // IMVA of task
191 ;;
192 mov r17=rr[r2]
193 shr.u r16=r3,IA64_GRANULE_SHIFT
194 ;;
195 dep r17=0,r17,8,24
196 ;;
197 mov cr.itir=r17
198 mov cr.ifa=r2
199
200 mov r19=IA64_TR_CURRENT_STACK
201 ;;
202 itr.d dtr[r19]=r18
203 ;;
204 ssm psr.ic
205 srlz.d
206 ;;
207
208.load_current:
209 // load the "current" pointer (r13) and ar.k6 with the current task
210 mov IA64_KR(CURRENT)=r2 // virtual address
211 mov IA64_KR(CURRENT_STACK)=r16
212 mov r13=r2
213 /*
214 * Reserve space at the top of the stack for "struct pt_regs". Kernel threads
215 * don't store interesting values in that structure, but the space still needs
216 * to be there because time-critical stuff such as the context switching can
217 * be implemented more efficiently (for example, __switch_to()
218 * always sets the psr.dfh bit of the task it is switching to).
219 */
220 addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2
221 addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE
222 mov ar.rsc=0 // place RSE in enforced lazy mode
223 ;;
224 loadrs // clear the dirty partition
225 ;;
226 mov ar.bspstore=r2 // establish the new RSE stack
227 ;;
228 mov ar.rsc=0x3 // place RSE in eager mode
229
230(isBP) dep r28=-1,r28,61,3 // make address virtual
231(isBP) movl r2=ia64_boot_param
232 ;;
233(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader
234
235#ifdef CONFIG_SMP
236(isAP) br.call.sptk.many rp=start_secondary
237.ret0:
238(isAP) br.cond.sptk self
239#endif
240
241 // This is executed by the bootstrap processor (bsp) only:
242
243#ifdef CONFIG_IA64_FW_EMU
244 // initialize PAL & SAL emulator:
245 br.call.sptk.many rp=sys_fw_init
246.ret1:
247#endif
248 br.call.sptk.many rp=start_kernel
249.ret2: addl r3=@ltoff(halt_msg),gp
250 ;;
251 alloc r2=ar.pfs,8,0,2,0
252 ;;
253 ld8 out0=[r3]
254 br.call.sptk.many b0=console_print
255
256self: hint @pause
257 br.sptk.many self // endless loop
258END(_start)
259
260GLOBAL_ENTRY(ia64_save_debug_regs)
261 alloc r16=ar.pfs,1,0,0,0
262 mov r20=ar.lc // preserve ar.lc
263 mov ar.lc=IA64_NUM_DBG_REGS-1
264 mov r18=0
265 add r19=IA64_NUM_DBG_REGS*8,in0
266 ;;
2671: mov r16=dbr[r18]
268#ifdef CONFIG_ITANIUM
269 ;;
270 srlz.d
271#endif
272 mov r17=ibr[r18]
273 add r18=1,r18
274 ;;
275 st8.nta [in0]=r16,8
276 st8.nta [r19]=r17,8
277 br.cloop.sptk.many 1b
278 ;;
279 mov ar.lc=r20 // restore ar.lc
280 br.ret.sptk.many rp
281END(ia64_save_debug_regs)
282
283GLOBAL_ENTRY(ia64_load_debug_regs)
284 alloc r16=ar.pfs,1,0,0,0
285 lfetch.nta [in0]
286 mov r20=ar.lc // preserve ar.lc
287 add r19=IA64_NUM_DBG_REGS*8,in0
288 mov ar.lc=IA64_NUM_DBG_REGS-1
289 mov r18=-1
290 ;;
2911: ld8.nta r16=[in0],8
292 ld8.nta r17=[r19],8
293 add r18=1,r18
294 ;;
295 mov dbr[r18]=r16
296#ifdef CONFIG_ITANIUM
297 ;;
298 srlz.d // Errata 132 (NoFix status)
299#endif
300 mov ibr[r18]=r17
301 br.cloop.sptk.many 1b
302 ;;
303 mov ar.lc=r20 // restore ar.lc
304 br.ret.sptk.many rp
305END(ia64_load_debug_regs)
306
307GLOBAL_ENTRY(__ia64_save_fpu)
308 alloc r2=ar.pfs,1,4,0,0
309 adds loc0=96*16-16,in0
310 adds loc1=96*16-16-128,in0
311 ;;
312 stf.spill.nta [loc0]=f127,-256
313 stf.spill.nta [loc1]=f119,-256
314 ;;
315 stf.spill.nta [loc0]=f111,-256
316 stf.spill.nta [loc1]=f103,-256
317 ;;
318 stf.spill.nta [loc0]=f95,-256
319 stf.spill.nta [loc1]=f87,-256
320 ;;
321 stf.spill.nta [loc0]=f79,-256
322 stf.spill.nta [loc1]=f71,-256
323 ;;
324 stf.spill.nta [loc0]=f63,-256
325 stf.spill.nta [loc1]=f55,-256
326 adds loc2=96*16-32,in0
327 ;;
328 stf.spill.nta [loc0]=f47,-256
329 stf.spill.nta [loc1]=f39,-256
330 adds loc3=96*16-32-128,in0
331 ;;
332 stf.spill.nta [loc2]=f126,-256
333 stf.spill.nta [loc3]=f118,-256
334 ;;
335 stf.spill.nta [loc2]=f110,-256
336 stf.spill.nta [loc3]=f102,-256
337 ;;
338 stf.spill.nta [loc2]=f94,-256
339 stf.spill.nta [loc3]=f86,-256
340 ;;
341 stf.spill.nta [loc2]=f78,-256
342 stf.spill.nta [loc3]=f70,-256
343 ;;
344 stf.spill.nta [loc2]=f62,-256
345 stf.spill.nta [loc3]=f54,-256
346 adds loc0=96*16-48,in0
347 ;;
348 stf.spill.nta [loc2]=f46,-256
349 stf.spill.nta [loc3]=f38,-256
350 adds loc1=96*16-48-128,in0
351 ;;
352 stf.spill.nta [loc0]=f125,-256
353 stf.spill.nta [loc1]=f117,-256
354 ;;
355 stf.spill.nta [loc0]=f109,-256
356 stf.spill.nta [loc1]=f101,-256
357 ;;
358 stf.spill.nta [loc0]=f93,-256
359 stf.spill.nta [loc1]=f85,-256
360 ;;
361 stf.spill.nta [loc0]=f77,-256
362 stf.spill.nta [loc1]=f69,-256
363 ;;
364 stf.spill.nta [loc0]=f61,-256
365 stf.spill.nta [loc1]=f53,-256
366 adds loc2=96*16-64,in0
367 ;;
368 stf.spill.nta [loc0]=f45,-256
369 stf.spill.nta [loc1]=f37,-256
370 adds loc3=96*16-64-128,in0
371 ;;
372 stf.spill.nta [loc2]=f124,-256
373 stf.spill.nta [loc3]=f116,-256
374 ;;
375 stf.spill.nta [loc2]=f108,-256
376 stf.spill.nta [loc3]=f100,-256
377 ;;
378 stf.spill.nta [loc2]=f92,-256
379 stf.spill.nta [loc3]=f84,-256
380 ;;
381 stf.spill.nta [loc2]=f76,-256
382 stf.spill.nta [loc3]=f68,-256
383 ;;
384 stf.spill.nta [loc2]=f60,-256
385 stf.spill.nta [loc3]=f52,-256
386 adds loc0=96*16-80,in0
387 ;;
388 stf.spill.nta [loc2]=f44,-256
389 stf.spill.nta [loc3]=f36,-256
390 adds loc1=96*16-80-128,in0
391 ;;
392 stf.spill.nta [loc0]=f123,-256
393 stf.spill.nta [loc1]=f115,-256
394 ;;
395 stf.spill.nta [loc0]=f107,-256
396 stf.spill.nta [loc1]=f99,-256
397 ;;
398 stf.spill.nta [loc0]=f91,-256
399 stf.spill.nta [loc1]=f83,-256
400 ;;
401 stf.spill.nta [loc0]=f75,-256
402 stf.spill.nta [loc1]=f67,-256
403 ;;
404 stf.spill.nta [loc0]=f59,-256
405 stf.spill.nta [loc1]=f51,-256
406 adds loc2=96*16-96,in0
407 ;;
408 stf.spill.nta [loc0]=f43,-256
409 stf.spill.nta [loc1]=f35,-256
410 adds loc3=96*16-96-128,in0
411 ;;
412 stf.spill.nta [loc2]=f122,-256
413 stf.spill.nta [loc3]=f114,-256
414 ;;
415 stf.spill.nta [loc2]=f106,-256
416 stf.spill.nta [loc3]=f98,-256
417 ;;
418 stf.spill.nta [loc2]=f90,-256
419 stf.spill.nta [loc3]=f82,-256
420 ;;
421 stf.spill.nta [loc2]=f74,-256
422 stf.spill.nta [loc3]=f66,-256
423 ;;
424 stf.spill.nta [loc2]=f58,-256
425 stf.spill.nta [loc3]=f50,-256
426 adds loc0=96*16-112,in0
427 ;;
428 stf.spill.nta [loc2]=f42,-256
429 stf.spill.nta [loc3]=f34,-256
430 adds loc1=96*16-112-128,in0
431 ;;
432 stf.spill.nta [loc0]=f121,-256
433 stf.spill.nta [loc1]=f113,-256
434 ;;
435 stf.spill.nta [loc0]=f105,-256
436 stf.spill.nta [loc1]=f97,-256
437 ;;
438 stf.spill.nta [loc0]=f89,-256
439 stf.spill.nta [loc1]=f81,-256
440 ;;
441 stf.spill.nta [loc0]=f73,-256
442 stf.spill.nta [loc1]=f65,-256
443 ;;
444 stf.spill.nta [loc0]=f57,-256
445 stf.spill.nta [loc1]=f49,-256
446 adds loc2=96*16-128,in0
447 ;;
448 stf.spill.nta [loc0]=f41,-256
449 stf.spill.nta [loc1]=f33,-256
450 adds loc3=96*16-128-128,in0
451 ;;
452 stf.spill.nta [loc2]=f120,-256
453 stf.spill.nta [loc3]=f112,-256
454 ;;
455 stf.spill.nta [loc2]=f104,-256
456 stf.spill.nta [loc3]=f96,-256
457 ;;
458 stf.spill.nta [loc2]=f88,-256
459 stf.spill.nta [loc3]=f80,-256
460 ;;
461 stf.spill.nta [loc2]=f72,-256
462 stf.spill.nta [loc3]=f64,-256
463 ;;
464 stf.spill.nta [loc2]=f56,-256
465 stf.spill.nta [loc3]=f48,-256
466 ;;
467 stf.spill.nta [loc2]=f40
468 stf.spill.nta [loc3]=f32
469 br.ret.sptk.many rp
470END(__ia64_save_fpu)
471
472GLOBAL_ENTRY(__ia64_load_fpu)
473 alloc r2=ar.pfs,1,2,0,0
474 adds r3=128,in0
475 adds r14=256,in0
476 adds r15=384,in0
477 mov loc0=512
478 mov loc1=-1024+16
479 ;;
480 ldf.fill.nta f32=[in0],loc0
481 ldf.fill.nta f40=[ r3],loc0
482 ldf.fill.nta f48=[r14],loc0
483 ldf.fill.nta f56=[r15],loc0
484 ;;
485 ldf.fill.nta f64=[in0],loc0
486 ldf.fill.nta f72=[ r3],loc0
487 ldf.fill.nta f80=[r14],loc0
488 ldf.fill.nta f88=[r15],loc0
489 ;;
490 ldf.fill.nta f96=[in0],loc1
491 ldf.fill.nta f104=[ r3],loc1
492 ldf.fill.nta f112=[r14],loc1
493 ldf.fill.nta f120=[r15],loc1
494 ;;
495 ldf.fill.nta f33=[in0],loc0
496 ldf.fill.nta f41=[ r3],loc0
497 ldf.fill.nta f49=[r14],loc0
498 ldf.fill.nta f57=[r15],loc0
499 ;;
500 ldf.fill.nta f65=[in0],loc0
501 ldf.fill.nta f73=[ r3],loc0
502 ldf.fill.nta f81=[r14],loc0
503 ldf.fill.nta f89=[r15],loc0
504 ;;
505 ldf.fill.nta f97=[in0],loc1
506 ldf.fill.nta f105=[ r3],loc1
507 ldf.fill.nta f113=[r14],loc1
508 ldf.fill.nta f121=[r15],loc1
509 ;;
510 ldf.fill.nta f34=[in0],loc0
511 ldf.fill.nta f42=[ r3],loc0
512 ldf.fill.nta f50=[r14],loc0
513 ldf.fill.nta f58=[r15],loc0
514 ;;
515 ldf.fill.nta f66=[in0],loc0
516 ldf.fill.nta f74=[ r3],loc0
517 ldf.fill.nta f82=[r14],loc0
518 ldf.fill.nta f90=[r15],loc0
519 ;;
520 ldf.fill.nta f98=[in0],loc1
521 ldf.fill.nta f106=[ r3],loc1
522 ldf.fill.nta f114=[r14],loc1
523 ldf.fill.nta f122=[r15],loc1
524 ;;
525 ldf.fill.nta f35=[in0],loc0
526 ldf.fill.nta f43=[ r3],loc0
527 ldf.fill.nta f51=[r14],loc0
528 ldf.fill.nta f59=[r15],loc0
529 ;;
530 ldf.fill.nta f67=[in0],loc0
531 ldf.fill.nta f75=[ r3],loc0
532 ldf.fill.nta f83=[r14],loc0
533 ldf.fill.nta f91=[r15],loc0
534 ;;
535 ldf.fill.nta f99=[in0],loc1
536 ldf.fill.nta f107=[ r3],loc1
537 ldf.fill.nta f115=[r14],loc1
538 ldf.fill.nta f123=[r15],loc1
539 ;;
540 ldf.fill.nta f36=[in0],loc0
541 ldf.fill.nta f44=[ r3],loc0
542 ldf.fill.nta f52=[r14],loc0
543 ldf.fill.nta f60=[r15],loc0
544 ;;
545 ldf.fill.nta f68=[in0],loc0
546 ldf.fill.nta f76=[ r3],loc0
547 ldf.fill.nta f84=[r14],loc0
548 ldf.fill.nta f92=[r15],loc0
549 ;;
550 ldf.fill.nta f100=[in0],loc1
551 ldf.fill.nta f108=[ r3],loc1
552 ldf.fill.nta f116=[r14],loc1
553 ldf.fill.nta f124=[r15],loc1
554 ;;
555 ldf.fill.nta f37=[in0],loc0
556 ldf.fill.nta f45=[ r3],loc0
557 ldf.fill.nta f53=[r14],loc0
558 ldf.fill.nta f61=[r15],loc0
559 ;;
560 ldf.fill.nta f69=[in0],loc0
561 ldf.fill.nta f77=[ r3],loc0
562 ldf.fill.nta f85=[r14],loc0
563 ldf.fill.nta f93=[r15],loc0
564 ;;
565 ldf.fill.nta f101=[in0],loc1
566 ldf.fill.nta f109=[ r3],loc1
567 ldf.fill.nta f117=[r14],loc1
568 ldf.fill.nta f125=[r15],loc1
569 ;;
570 ldf.fill.nta f38 =[in0],loc0
571 ldf.fill.nta f46 =[ r3],loc0
572 ldf.fill.nta f54 =[r14],loc0
573 ldf.fill.nta f62 =[r15],loc0
574 ;;
575 ldf.fill.nta f70 =[in0],loc0
576 ldf.fill.nta f78 =[ r3],loc0
577 ldf.fill.nta f86 =[r14],loc0
578 ldf.fill.nta f94 =[r15],loc0
579 ;;
580 ldf.fill.nta f102=[in0],loc1
581 ldf.fill.nta f110=[ r3],loc1
582 ldf.fill.nta f118=[r14],loc1
583 ldf.fill.nta f126=[r15],loc1
584 ;;
585 ldf.fill.nta f39 =[in0],loc0
586 ldf.fill.nta f47 =[ r3],loc0
587 ldf.fill.nta f55 =[r14],loc0
588 ldf.fill.nta f63 =[r15],loc0
589 ;;
590 ldf.fill.nta f71 =[in0],loc0
591 ldf.fill.nta f79 =[ r3],loc0
592 ldf.fill.nta f87 =[r14],loc0
593 ldf.fill.nta f95 =[r15],loc0
594 ;;
595 ldf.fill.nta f103=[in0]
596 ldf.fill.nta f111=[ r3]
597 ldf.fill.nta f119=[r14]
598 ldf.fill.nta f127=[r15]
599 br.ret.sptk.many rp
600END(__ia64_load_fpu)
601
602GLOBAL_ENTRY(__ia64_init_fpu)
603 stf.spill [sp]=f0 // M3
604 mov f32=f0 // F
605 nop.b 0
606
607 ldfps f33,f34=[sp] // M0
608 ldfps f35,f36=[sp] // M1
609 mov f37=f0 // F
610 ;;
611
612 setf.s f38=r0 // M2
613 setf.s f39=r0 // M3
614 mov f40=f0 // F
615
616 ldfps f41,f42=[sp] // M0
617 ldfps f43,f44=[sp] // M1
618 mov f45=f0 // F
619
620 setf.s f46=r0 // M2
621 setf.s f47=r0 // M3
622 mov f48=f0 // F
623
624 ldfps f49,f50=[sp] // M0
625 ldfps f51,f52=[sp] // M1
626 mov f53=f0 // F
627
628 setf.s f54=r0 // M2
629 setf.s f55=r0 // M3
630 mov f56=f0 // F
631
632 ldfps f57,f58=[sp] // M0
633 ldfps f59,f60=[sp] // M1
634 mov f61=f0 // F
635
636 setf.s f62=r0 // M2
637 setf.s f63=r0 // M3
638 mov f64=f0 // F
639
640 ldfps f65,f66=[sp] // M0
641 ldfps f67,f68=[sp] // M1
642 mov f69=f0 // F
643
644 setf.s f70=r0 // M2
645 setf.s f71=r0 // M3
646 mov f72=f0 // F
647
648 ldfps f73,f74=[sp] // M0
649 ldfps f75,f76=[sp] // M1
650 mov f77=f0 // F
651
652 setf.s f78=r0 // M2
653 setf.s f79=r0 // M3
654 mov f80=f0 // F
655
656 ldfps f81,f82=[sp] // M0
657 ldfps f83,f84=[sp] // M1
658 mov f85=f0 // F
659
660 setf.s f86=r0 // M2
661 setf.s f87=r0 // M3
662 mov f88=f0 // F
663
664 /*
665 * When the instructions are cached, it would be faster to initialize
666 * the remaining registers with simply mov instructions (F-unit).
667 * This gets the time down to ~29 cycles. However, this would use up
668 * 33 bundles, whereas continuing with the above pattern yields
669 * 10 bundles and ~30 cycles.
670 */
671
672 ldfps f89,f90=[sp] // M0
673 ldfps f91,f92=[sp] // M1
674 mov f93=f0 // F
675
676 setf.s f94=r0 // M2
677 setf.s f95=r0 // M3
678 mov f96=f0 // F
679
680 ldfps f97,f98=[sp] // M0
681 ldfps f99,f100=[sp] // M1
682 mov f101=f0 // F
683
684 setf.s f102=r0 // M2
685 setf.s f103=r0 // M3
686 mov f104=f0 // F
687
688 ldfps f105,f106=[sp] // M0
689 ldfps f107,f108=[sp] // M1
690 mov f109=f0 // F
691
692 setf.s f110=r0 // M2
693 setf.s f111=r0 // M3
694 mov f112=f0 // F
695
696 ldfps f113,f114=[sp] // M0
697 ldfps f115,f116=[sp] // M1
698 mov f117=f0 // F
699
700 setf.s f118=r0 // M2
701 setf.s f119=r0 // M3
702 mov f120=f0 // F
703
704 ldfps f121,f122=[sp] // M0
705 ldfps f123,f124=[sp] // M1
706 mov f125=f0 // F
707
708 setf.s f126=r0 // M2
709 setf.s f127=r0 // M3
710 br.ret.sptk.many rp // F
711END(__ia64_init_fpu)
712
713/*
714 * Switch execution mode from virtual to physical
715 *
716 * Inputs:
717 * r16 = new psr to establish
718 * Output:
719 * r19 = old virtual address of ar.bsp
720 * r20 = old virtual address of sp
721 *
722 * Note: RSE must already be in enforced lazy mode
723 */
724GLOBAL_ENTRY(ia64_switch_mode_phys)
725 {
726 alloc r2=ar.pfs,0,0,0,0
727 rsm psr.i | psr.ic // disable interrupts and interrupt collection
728 mov r15=ip
729 }
730 ;;
731 {
732 flushrs // must be first insn in group
733 srlz.i
734 }
735 ;;
736 mov cr.ipsr=r16 // set new PSR
737 add r3=1f-ia64_switch_mode_phys,r15
738
739 mov r19=ar.bsp
740 mov r20=sp
741 mov r14=rp // get return address into a general register
742 ;;
743
744 // going to physical mode, use tpa to translate virt->phys
745 tpa r17=r19
746 tpa r3=r3
747 tpa sp=sp
748 tpa r14=r14
749 ;;
750
751 mov r18=ar.rnat // save ar.rnat
752 mov ar.bspstore=r17 // this steps on ar.rnat
753 mov cr.iip=r3
754 mov cr.ifs=r0
755 ;;
756 mov ar.rnat=r18 // restore ar.rnat
757 rfi // must be last insn in group
758 ;;
7591: mov rp=r14
760 br.ret.sptk.many rp
761END(ia64_switch_mode_phys)
762
763/*
764 * Switch execution mode from physical to virtual
765 *
766 * Inputs:
767 * r16 = new psr to establish
768 * r19 = new bspstore to establish
769 * r20 = new sp to establish
770 *
771 * Note: RSE must already be in enforced lazy mode
772 */
773GLOBAL_ENTRY(ia64_switch_mode_virt)
774 {
775 alloc r2=ar.pfs,0,0,0,0
776 rsm psr.i | psr.ic // disable interrupts and interrupt collection
777 mov r15=ip
778 }
779 ;;
780 {
781 flushrs // must be first insn in group
782 srlz.i
783 }
784 ;;
785 mov cr.ipsr=r16 // set new PSR
786 add r3=1f-ia64_switch_mode_virt,r15
787
788 mov r14=rp // get return address into a general register
789 ;;
790
791 // going to virtual
792 // - for code addresses, set upper bits of addr to KERNEL_START
793 // - for stack addresses, copy from input argument
794 movl r18=KERNEL_START
795 dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
796 dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT
797 mov sp=r20
798 ;;
799 or r3=r3,r18
800 or r14=r14,r18
801 ;;
802
803 mov r18=ar.rnat // save ar.rnat
804 mov ar.bspstore=r19 // this steps on ar.rnat
805 mov cr.iip=r3
806 mov cr.ifs=r0
807 ;;
808 mov ar.rnat=r18 // restore ar.rnat
809 rfi // must be last insn in group
810 ;;
8111: mov rp=r14
812 br.ret.sptk.many rp
813END(ia64_switch_mode_virt)
814
815GLOBAL_ENTRY(ia64_delay_loop)
816 .prologue
817{ nop 0 // work around GAS unwind info generation bug...
818 .save ar.lc,r2
819 mov r2=ar.lc
820 .body
821 ;;
822 mov ar.lc=r32
823}
824 ;;
825 // force loop to be 32-byte aligned (GAS bug means we cannot use .align
826 // inside function body without corrupting unwind info).
827{ nop 0 }
8281: br.cloop.sptk.few 1b
829 ;;
830 mov ar.lc=r2
831 br.ret.sptk.many rp
832END(ia64_delay_loop)
833
834/*
835 * Return a CPU-local timestamp in nano-seconds. This timestamp is
836 * NOT synchronized across CPUs its return value must never be
837 * compared against the values returned on another CPU. The usage in
838 * kernel/sched.c ensures that.
839 *
840 * The return-value of sched_clock() is NOT supposed to wrap-around.
841 * If it did, it would cause some scheduling hiccups (at the worst).
842 * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even
843 * that would happen only once every 5+ years.
844 *
845 * The code below basically calculates:
846 *
847 * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT
848 *
849 * except that the multiplication and the shift are done with 128-bit
850 * intermediate precision so that we can produce a full 64-bit result.
851 */
852GLOBAL_ENTRY(sched_clock)
853 addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0
854 mov.m r9=ar.itc // fetch cycle-counter (35 cyc)
855 ;;
856 ldf8 f8=[r8]
857 ;;
858 setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8...
859 ;;
860 xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc)
861 xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product
862 ;;
863 getf.sig r8=f10 // (5 cyc)
864 getf.sig r9=f11
865 ;;
866 shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT
867 br.ret.sptk.many rp
868END(sched_clock)
869
870GLOBAL_ENTRY(start_kernel_thread)
871 .prologue
872 .save rp, r0 // this is the end of the call-chain
873 .body
874 alloc r2 = ar.pfs, 0, 0, 2, 0
875 mov out0 = r9
876 mov out1 = r11;;
877 br.call.sptk.many rp = kernel_thread_helper;;
878 mov out0 = r8
879 br.call.sptk.many rp = sys_exit;;
8801: br.sptk.few 1b // not reached
881END(start_kernel_thread)
882
883#ifdef CONFIG_IA64_BRL_EMU
884
885/*
886 * Assembly routines used by brl_emu.c to set preserved register state.
887 */
888
889#define SET_REG(reg) \
890 GLOBAL_ENTRY(ia64_set_##reg); \
891 alloc r16=ar.pfs,1,0,0,0; \
892 mov reg=r32; \
893 ;; \
894 br.ret.sptk.many rp; \
895 END(ia64_set_##reg)
896
897SET_REG(b1);
898SET_REG(b2);
899SET_REG(b3);
900SET_REG(b4);
901SET_REG(b5);
902
903#endif /* CONFIG_IA64_BRL_EMU */
904
905#ifdef CONFIG_SMP
906 /*
907 * This routine handles spinlock contention. It uses a non-standard calling
908 * convention to avoid converting leaf routines into interior routines. Because
909 * of this special convention, there are several restrictions:
910 *
911 * - do not use gp relative variables, this code is called from the kernel
912 * and from modules, r1 is undefined.
913 * - do not use stacked registers, the caller owns them.
914 * - do not use the scratch stack space, the caller owns it.
915 * - do not use any registers other than the ones listed below
916 *
917 * Inputs:
918 * ar.pfs - saved CFM of caller
919 * ar.ccv - 0 (and available for use)
920 * r27 - flags from spin_lock_irqsave or 0. Must be preserved.
921 * r28 - available for use.
922 * r29 - available for use.
923 * r30 - available for use.
924 * r31 - address of lock, available for use.
925 * b6 - return address
926 * p14 - available for use.
927 * p15 - used to track flag status.
928 *
929 * If you patch this code to use more registers, do not forget to update
930 * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h.
931 */
932
933#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
934
935GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4)
936 .prologue
937 .save ar.pfs, r0 // this code effectively has a zero frame size
938 .save rp, r28
939 .body
940 nop 0
941 tbit.nz p15,p0=r27,IA64_PSR_I_BIT
942 .restore sp // pop existing prologue after next insn
943 mov b6 = r28
944 .prologue
945 .save ar.pfs, r0
946 .altrp b6
947 .body
948 ;;
949(p15) ssm psr.i // reenable interrupts if they were on
950 // DavidM says that srlz.d is slow and is not required in this case
951.wait:
952 // exponential backoff, kdb, lockmeter etc. go in here
953 hint @pause
954 ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
955 nop 0
956 ;;
957 cmp4.ne p14,p0=r30,r0
958(p14) br.cond.sptk.few .wait
959(p15) rsm psr.i // disable interrupts if we reenabled them
960 br.cond.sptk.few b6 // lock is now free, try to acquire
961 .global ia64_spinlock_contention_pre3_4_end // for kernprof
962ia64_spinlock_contention_pre3_4_end:
963END(ia64_spinlock_contention_pre3_4)
964
965#else
966
967GLOBAL_ENTRY(ia64_spinlock_contention)
968 .prologue
969 .altrp b6
970 .body
971 tbit.nz p15,p0=r27,IA64_PSR_I_BIT
972 ;;
973.wait:
974(p15) ssm psr.i // reenable interrupts if they were on
975 // DavidM says that srlz.d is slow and is not required in this case
976.wait2:
977 // exponential backoff, kdb, lockmeter etc. go in here
978 hint @pause
979 ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word
980 ;;
981 cmp4.ne p14,p0=r30,r0
982 mov r30 = 1
983(p14) br.cond.sptk.few .wait2
984(p15) rsm psr.i // disable interrupts if we reenabled them
985 ;;
986 cmpxchg4.acq r30=[r31], r30, ar.ccv
987 ;;
988 cmp4.ne p14,p0=r0,r30
989(p14) br.cond.sptk.few .wait
990
991 br.ret.sptk.many b6 // lock is now taken
992END(ia64_spinlock_contention)
993
994#endif
995
996#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c
new file mode 100644
index 000000000000..7bbf019c9867
--- /dev/null
+++ b/arch/ia64/kernel/ia64_ksyms.c
@@ -0,0 +1,127 @@
1/*
2 * Architecture-specific kernel symbols
3 *
4 * Don't put any exports here unless it's defined in an assembler file.
5 * All other exports should be put directly after the definition.
6 */
7
8#include <linux/config.h>
9#include <linux/module.h>
10
11#include <linux/string.h>
12EXPORT_SYMBOL(memset);
13EXPORT_SYMBOL(memchr);
14EXPORT_SYMBOL(memcmp);
15EXPORT_SYMBOL(memcpy);
16EXPORT_SYMBOL(memmove);
17EXPORT_SYMBOL(memscan);
18EXPORT_SYMBOL(strcat);
19EXPORT_SYMBOL(strchr);
20EXPORT_SYMBOL(strcmp);
21EXPORT_SYMBOL(strcpy);
22EXPORT_SYMBOL(strlen);
23EXPORT_SYMBOL(strncat);
24EXPORT_SYMBOL(strncmp);
25EXPORT_SYMBOL(strncpy);
26EXPORT_SYMBOL(strnlen);
27EXPORT_SYMBOL(strrchr);
28EXPORT_SYMBOL(strstr);
29EXPORT_SYMBOL(strpbrk);
30
31#include <asm/checksum.h>
32EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */
33
34#include <asm/semaphore.h>
35EXPORT_SYMBOL(__down);
36EXPORT_SYMBOL(__down_interruptible);
37EXPORT_SYMBOL(__down_trylock);
38EXPORT_SYMBOL(__up);
39
40#include <asm/page.h>
41EXPORT_SYMBOL(clear_page);
42
43#ifdef CONFIG_VIRTUAL_MEM_MAP
44#include <linux/bootmem.h>
45EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */
46#endif
47
48#include <asm/processor.h>
49EXPORT_SYMBOL(per_cpu__cpu_info);
50#ifdef CONFIG_SMP
51EXPORT_SYMBOL(per_cpu__local_per_cpu_offset);
52#endif
53
54#include <asm/uaccess.h>
55EXPORT_SYMBOL(__copy_user);
56EXPORT_SYMBOL(__do_clear_user);
57EXPORT_SYMBOL(__strlen_user);
58EXPORT_SYMBOL(__strncpy_from_user);
59EXPORT_SYMBOL(__strnlen_user);
60
61#include <asm/unistd.h>
62EXPORT_SYMBOL(__ia64_syscall);
63
64/* from arch/ia64/lib */
65extern void __divsi3(void);
66extern void __udivsi3(void);
67extern void __modsi3(void);
68extern void __umodsi3(void);
69extern void __divdi3(void);
70extern void __udivdi3(void);
71extern void __moddi3(void);
72extern void __umoddi3(void);
73
74EXPORT_SYMBOL(__divsi3);
75EXPORT_SYMBOL(__udivsi3);
76EXPORT_SYMBOL(__modsi3);
77EXPORT_SYMBOL(__umodsi3);
78EXPORT_SYMBOL(__divdi3);
79EXPORT_SYMBOL(__udivdi3);
80EXPORT_SYMBOL(__moddi3);
81EXPORT_SYMBOL(__umoddi3);
82
83#if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE)
84extern void xor_ia64_2(void);
85extern void xor_ia64_3(void);
86extern void xor_ia64_4(void);
87extern void xor_ia64_5(void);
88
89EXPORT_SYMBOL(xor_ia64_2);
90EXPORT_SYMBOL(xor_ia64_3);
91EXPORT_SYMBOL(xor_ia64_4);
92EXPORT_SYMBOL(xor_ia64_5);
93#endif
94
95#include <asm/pal.h>
96EXPORT_SYMBOL(ia64_pal_call_phys_stacked);
97EXPORT_SYMBOL(ia64_pal_call_phys_static);
98EXPORT_SYMBOL(ia64_pal_call_stacked);
99EXPORT_SYMBOL(ia64_pal_call_static);
100EXPORT_SYMBOL(ia64_load_scratch_fpregs);
101EXPORT_SYMBOL(ia64_save_scratch_fpregs);
102
103#include <asm/unwind.h>
104EXPORT_SYMBOL(unw_init_running);
105
106#ifdef ASM_SUPPORTED
107# ifdef CONFIG_SMP
108# if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
109/*
110 * This is not a normal routine and we don't want a function descriptor for it, so we use
111 * a fake declaration here.
112 */
113extern char ia64_spinlock_contention_pre3_4;
114EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4);
115# else
116/*
117 * This is not a normal routine and we don't want a function descriptor for it, so we use
118 * a fake declaration here.
119 */
120extern char ia64_spinlock_contention;
121EXPORT_SYMBOL(ia64_spinlock_contention);
122# endif
123# endif
124#endif
125
126extern char ia64_ivt[];
127EXPORT_SYMBOL(ia64_ivt);
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c
new file mode 100644
index 000000000000..b69c397ed1bf
--- /dev/null
+++ b/arch/ia64/kernel/init_task.c
@@ -0,0 +1,46 @@
1/*
2 * This is where we statically allocate and initialize the initial
3 * task.
4 *
5 * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 */
8
9#include <linux/init.h>
10#include <linux/mm.h>
11#include <linux/module.h>
12#include <linux/sched.h>
13#include <linux/init_task.h>
14#include <linux/mqueue.h>
15
16#include <asm/uaccess.h>
17#include <asm/pgtable.h>
18
19static struct fs_struct init_fs = INIT_FS;
20static struct files_struct init_files = INIT_FILES;
21static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
22static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
23struct mm_struct init_mm = INIT_MM(init_mm);
24
25EXPORT_SYMBOL(init_mm);
26
27/*
28 * Initial task structure.
29 *
30 * We need to make sure that this is properly aligned due to the way process stacks are
31 * handled. This is done by having a special ".data.init_task" section...
32 */
33#define init_thread_info init_task_mem.s.thread_info
34
35union {
36 struct {
37 struct task_struct task;
38 struct thread_info thread_info;
39 } s;
40 unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)];
41} init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{
42 .task = INIT_TASK(init_task_mem.s.task),
43 .thread_info = INIT_THREAD_INFO(init_task_mem.s.task)
44}};
45
46EXPORT_SYMBOL(init_task);
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c
new file mode 100644
index 000000000000..c15be5c38f56
--- /dev/null
+++ b/arch/ia64/kernel/iosapic.c
@@ -0,0 +1,827 @@
1/*
2 * I/O SAPIC support.
3 *
4 * Copyright (C) 1999 Intel Corp.
5 * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
6 * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com>
7 * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co.
8 * David Mosberger-Tang <davidm@hpl.hp.com>
9 * Copyright (C) 1999 VA Linux Systems
10 * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com>
11 *
12 * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code.
13 * In particular, we now have separate handlers for edge
14 * and level triggered interrupts.
15 * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation
16 * PCI to vector mapping, shared PCI interrupts.
17 * 00/10/27 D. Mosberger Document things a bit more to make them more understandable.
18 * Clean up much of the old IOSAPIC cruft.
19 * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for
20 * ACPI S5(SoftOff) support.
21 * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT
22 * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in
23 * iosapic_set_affinity(), initializations for
24 * /proc/irq/#/smp_affinity
25 * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing.
26 * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq
27 * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping
28 * error
29 * 02/07/29 T. Kochi Allocate interrupt vectors dynamically
30 * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.)
31 * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code.
32 * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC.
33 * Remove iosapic_address & gsi_base from external interfaces.
34 * Rationalize __init/__devinit attributes.
35 * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004
36 * Updated to work with irq migration necessary for CPU Hotplug
37 */
38/*
39 * Here is what the interrupt logic between a PCI device and the kernel looks like:
40 *
41 * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The
42 * device is uniquely identified by its bus--, and slot-number (the function
43 * number does not matter here because all functions share the same interrupt
44 * lines).
45 *
46 * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller.
47 * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level
48 * triggered and use the same polarity). Each interrupt line has a unique Global
49 * System Interrupt (GSI) number which can be calculated as the sum of the controller's
50 * base GSI number and the IOSAPIC pin number to which the line connects.
51 *
52 * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin
53 * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU.
54 *
55 * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as
56 * architecture-independent interrupt handling mechanism in Linux. As an
57 * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number
58 * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and
59 * IRQ. A platform can implement platform_irq_to_vector(irq) and
60 * platform_local_vector_to_irq(vector) APIs to differentiate the mapping.
61 * Please see also include/asm-ia64/hw_irq.h for those APIs.
62 *
63 * To sum up, there are three levels of mappings involved:
64 *
65 * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ
66 *
67 * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts.
68 * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this
69 * source code.
70 */
71#include <linux/config.h>
72
73#include <linux/acpi.h>
74#include <linux/init.h>
75#include <linux/irq.h>
76#include <linux/kernel.h>
77#include <linux/list.h>
78#include <linux/pci.h>
79#include <linux/smp.h>
80#include <linux/smp_lock.h>
81#include <linux/string.h>
82
83#include <asm/delay.h>
84#include <asm/hw_irq.h>
85#include <asm/io.h>
86#include <asm/iosapic.h>
87#include <asm/machvec.h>
88#include <asm/processor.h>
89#include <asm/ptrace.h>
90#include <asm/system.h>
91
92
93#undef DEBUG_INTERRUPT_ROUTING
94
95#ifdef DEBUG_INTERRUPT_ROUTING
96#define DBG(fmt...) printk(fmt)
97#else
98#define DBG(fmt...)
99#endif
100
101static DEFINE_SPINLOCK(iosapic_lock);
102
103/* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */
104
105static struct iosapic_intr_info {
106 char __iomem *addr; /* base address of IOSAPIC */
107 u32 low32; /* current value of low word of Redirection table entry */
108 unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
109 char rte_index; /* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */
110 unsigned char dmode : 3; /* delivery mode (see iosapic.h) */
111 unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */
112 unsigned char trigger : 1; /* trigger mode (see iosapic.h) */
113 int refcnt; /* reference counter */
114} iosapic_intr_info[IA64_NUM_VECTORS];
115
116static struct iosapic {
117 char __iomem *addr; /* base address of IOSAPIC */
118 unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */
119 unsigned short num_rte; /* number of RTE in this IOSAPIC */
120#ifdef CONFIG_NUMA
121 unsigned short node; /* numa node association via pxm */
122#endif
123} iosapic_lists[NR_IOSAPICS];
124
125static int num_iosapic;
126
127static unsigned char pcat_compat __initdata; /* 8259 compatibility flag */
128
129
130/*
131 * Find an IOSAPIC associated with a GSI
132 */
133static inline int
134find_iosapic (unsigned int gsi)
135{
136 int i;
137
138 for (i = 0; i < num_iosapic; i++) {
139 if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte)
140 return i;
141 }
142
143 return -1;
144}
145
146static inline int
147_gsi_to_vector (unsigned int gsi)
148{
149 struct iosapic_intr_info *info;
150
151 for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info)
152 if (info->gsi_base + info->rte_index == gsi)
153 return info - iosapic_intr_info;
154 return -1;
155}
156
157/*
158 * Translate GSI number to the corresponding IA-64 interrupt vector. If no
159 * entry exists, return -1.
160 */
161inline int
162gsi_to_vector (unsigned int gsi)
163{
164 return _gsi_to_vector(gsi);
165}
166
167int
168gsi_to_irq (unsigned int gsi)
169{
170 /*
171 * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq
172 * numbers...
173 */
174 return _gsi_to_vector(gsi);
175}
176
177static void
178set_rte (unsigned int vector, unsigned int dest, int mask)
179{
180 unsigned long pol, trigger, dmode;
181 u32 low32, high32;
182 char __iomem *addr;
183 int rte_index;
184 char redir;
185
186 DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest);
187
188 rte_index = iosapic_intr_info[vector].rte_index;
189 if (rte_index < 0)
190 return; /* not an IOSAPIC interrupt */
191
192 addr = iosapic_intr_info[vector].addr;
193 pol = iosapic_intr_info[vector].polarity;
194 trigger = iosapic_intr_info[vector].trigger;
195 dmode = iosapic_intr_info[vector].dmode;
196 vector &= (~IA64_IRQ_REDIRECTED);
197
198 redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0;
199
200#ifdef CONFIG_SMP
201 {
202 unsigned int irq;
203
204 for (irq = 0; irq < NR_IRQS; ++irq)
205 if (irq_to_vector(irq) == vector) {
206 set_irq_affinity_info(irq, (int)(dest & 0xffff), redir);
207 break;
208 }
209 }
210#endif
211
212 low32 = ((pol << IOSAPIC_POLARITY_SHIFT) |
213 (trigger << IOSAPIC_TRIGGER_SHIFT) |
214 (dmode << IOSAPIC_DELIVERY_SHIFT) |
215 ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) |
216 vector);
217
218 /* dest contains both id and eid */
219 high32 = (dest << IOSAPIC_DEST_SHIFT);
220
221 iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
222 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
223 iosapic_intr_info[vector].low32 = low32;
224}
225
226static void
227nop (unsigned int vector)
228{
229 /* do nothing... */
230}
231
232static void
233mask_irq (unsigned int irq)
234{
235 unsigned long flags;
236 char __iomem *addr;
237 u32 low32;
238 int rte_index;
239 ia64_vector vec = irq_to_vector(irq);
240
241 addr = iosapic_intr_info[vec].addr;
242 rte_index = iosapic_intr_info[vec].rte_index;
243
244 if (rte_index < 0)
245 return; /* not an IOSAPIC interrupt! */
246
247 spin_lock_irqsave(&iosapic_lock, flags);
248 {
249 /* set only the mask bit */
250 low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK;
251 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
252 }
253 spin_unlock_irqrestore(&iosapic_lock, flags);
254}
255
256static void
257unmask_irq (unsigned int irq)
258{
259 unsigned long flags;
260 char __iomem *addr;
261 u32 low32;
262 int rte_index;
263 ia64_vector vec = irq_to_vector(irq);
264
265 addr = iosapic_intr_info[vec].addr;
266 rte_index = iosapic_intr_info[vec].rte_index;
267 if (rte_index < 0)
268 return; /* not an IOSAPIC interrupt! */
269
270 spin_lock_irqsave(&iosapic_lock, flags);
271 {
272 low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK;
273 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
274 }
275 spin_unlock_irqrestore(&iosapic_lock, flags);
276}
277
278
279static void
280iosapic_set_affinity (unsigned int irq, cpumask_t mask)
281{
282#ifdef CONFIG_SMP
283 unsigned long flags;
284 u32 high32, low32;
285 int dest, rte_index;
286 char __iomem *addr;
287 int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0;
288 ia64_vector vec;
289
290 irq &= (~IA64_IRQ_REDIRECTED);
291 vec = irq_to_vector(irq);
292
293 if (cpus_empty(mask))
294 return;
295
296 dest = cpu_physical_id(first_cpu(mask));
297
298 rte_index = iosapic_intr_info[vec].rte_index;
299 addr = iosapic_intr_info[vec].addr;
300
301 if (rte_index < 0)
302 return; /* not an IOSAPIC interrupt */
303
304 set_irq_affinity_info(irq, dest, redir);
305
306 /* dest contains both id and eid */
307 high32 = dest << IOSAPIC_DEST_SHIFT;
308
309 spin_lock_irqsave(&iosapic_lock, flags);
310 {
311 low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT);
312
313 if (redir)
314 /* change delivery mode to lowest priority */
315 low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
316 else
317 /* change delivery mode to fixed */
318 low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT);
319
320 iosapic_intr_info[vec].low32 = low32;
321 iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32);
322 iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32);
323 }
324 spin_unlock_irqrestore(&iosapic_lock, flags);
325#endif
326}
327
328/*
329 * Handlers for level-triggered interrupts.
330 */
331
332static unsigned int
333iosapic_startup_level_irq (unsigned int irq)
334{
335 unmask_irq(irq);
336 return 0;
337}
338
339static void
340iosapic_end_level_irq (unsigned int irq)
341{
342 ia64_vector vec = irq_to_vector(irq);
343
344 move_irq(irq);
345 iosapic_eoi(iosapic_intr_info[vec].addr, vec);
346}
347
348#define iosapic_shutdown_level_irq mask_irq
349#define iosapic_enable_level_irq unmask_irq
350#define iosapic_disable_level_irq mask_irq
351#define iosapic_ack_level_irq nop
352
353struct hw_interrupt_type irq_type_iosapic_level = {
354 .typename = "IO-SAPIC-level",
355 .startup = iosapic_startup_level_irq,
356 .shutdown = iosapic_shutdown_level_irq,
357 .enable = iosapic_enable_level_irq,
358 .disable = iosapic_disable_level_irq,
359 .ack = iosapic_ack_level_irq,
360 .end = iosapic_end_level_irq,
361 .set_affinity = iosapic_set_affinity
362};
363
364/*
365 * Handlers for edge-triggered interrupts.
366 */
367
368static unsigned int
369iosapic_startup_edge_irq (unsigned int irq)
370{
371 unmask_irq(irq);
372 /*
373 * IOSAPIC simply drops interrupts pended while the
374 * corresponding pin was masked, so we can't know if an
375 * interrupt is pending already. Let's hope not...
376 */
377 return 0;
378}
379
380static void
381iosapic_ack_edge_irq (unsigned int irq)
382{
383 irq_desc_t *idesc = irq_descp(irq);
384
385 move_irq(irq);
386 /*
387 * Once we have recorded IRQ_PENDING already, we can mask the
388 * interrupt for real. This prevents IRQ storms from unhandled
389 * devices.
390 */
391 if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED))
392 mask_irq(irq);
393}
394
395#define iosapic_enable_edge_irq unmask_irq
396#define iosapic_disable_edge_irq nop
397#define iosapic_end_edge_irq nop
398
399struct hw_interrupt_type irq_type_iosapic_edge = {
400 .typename = "IO-SAPIC-edge",
401 .startup = iosapic_startup_edge_irq,
402 .shutdown = iosapic_disable_edge_irq,
403 .enable = iosapic_enable_edge_irq,
404 .disable = iosapic_disable_edge_irq,
405 .ack = iosapic_ack_edge_irq,
406 .end = iosapic_end_edge_irq,
407 .set_affinity = iosapic_set_affinity
408};
409
410unsigned int
411iosapic_version (char __iomem *addr)
412{
413 /*
414 * IOSAPIC Version Register return 32 bit structure like:
415 * {
416 * unsigned int version : 8;
417 * unsigned int reserved1 : 8;
418 * unsigned int max_redir : 8;
419 * unsigned int reserved2 : 8;
420 * }
421 */
422 return iosapic_read(addr, IOSAPIC_VERSION);
423}
424
425/*
426 * if the given vector is already owned by other,
427 * assign a new vector for the other and make the vector available
428 */
429static void __init
430iosapic_reassign_vector (int vector)
431{
432 int new_vector;
433
434 if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr
435 || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode
436 || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger)
437 {
438 new_vector = assign_irq_vector(AUTO_ASSIGN);
439 printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector);
440 memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector],
441 sizeof(struct iosapic_intr_info));
442 memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
443 iosapic_intr_info[vector].rte_index = -1;
444 }
445}
446
447static void
448register_intr (unsigned int gsi, int vector, unsigned char delivery,
449 unsigned long polarity, unsigned long trigger)
450{
451 irq_desc_t *idesc;
452 struct hw_interrupt_type *irq_type;
453 int rte_index;
454 int index;
455 unsigned long gsi_base;
456 void __iomem *iosapic_address;
457
458 index = find_iosapic(gsi);
459 if (index < 0) {
460 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi);
461 return;
462 }
463
464 iosapic_address = iosapic_lists[index].addr;
465 gsi_base = iosapic_lists[index].gsi_base;
466
467 rte_index = gsi - gsi_base;
468 iosapic_intr_info[vector].rte_index = rte_index;
469 iosapic_intr_info[vector].polarity = polarity;
470 iosapic_intr_info[vector].dmode = delivery;
471 iosapic_intr_info[vector].addr = iosapic_address;
472 iosapic_intr_info[vector].gsi_base = gsi_base;
473 iosapic_intr_info[vector].trigger = trigger;
474 iosapic_intr_info[vector].refcnt++;
475
476 if (trigger == IOSAPIC_EDGE)
477 irq_type = &irq_type_iosapic_edge;
478 else
479 irq_type = &irq_type_iosapic_level;
480
481 idesc = irq_descp(vector);
482 if (idesc->handler != irq_type) {
483 if (idesc->handler != &no_irq_type)
484 printk(KERN_WARNING "%s: changing vector %d from %s to %s\n",
485 __FUNCTION__, vector, idesc->handler->typename, irq_type->typename);
486 idesc->handler = irq_type;
487 }
488}
489
490static unsigned int
491get_target_cpu (unsigned int gsi, int vector)
492{
493#ifdef CONFIG_SMP
494 static int cpu = -1;
495
496 /*
497 * If the platform supports redirection via XTP, let it
498 * distribute interrupts.
499 */
500 if (smp_int_redirect & SMP_IRQ_REDIRECTION)
501 return cpu_physical_id(smp_processor_id());
502
503 /*
504 * Some interrupts (ACPI SCI, for instance) are registered
505 * before the BSP is marked as online.
506 */
507 if (!cpu_online(smp_processor_id()))
508 return cpu_physical_id(smp_processor_id());
509
510#ifdef CONFIG_NUMA
511 {
512 int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0;
513 cpumask_t cpu_mask;
514
515 iosapic_index = find_iosapic(gsi);
516 if (iosapic_index < 0 ||
517 iosapic_lists[iosapic_index].node == MAX_NUMNODES)
518 goto skip_numa_setup;
519
520 cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node);
521
522 for_each_cpu_mask(numa_cpu, cpu_mask) {
523 if (!cpu_online(numa_cpu))
524 cpu_clear(numa_cpu, cpu_mask);
525 }
526
527 num_cpus = cpus_weight(cpu_mask);
528
529 if (!num_cpus)
530 goto skip_numa_setup;
531
532 /* Use vector assigment to distribute across cpus in node */
533 cpu_index = vector % num_cpus;
534
535 for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++)
536 numa_cpu = next_cpu(numa_cpu, cpu_mask);
537
538 if (numa_cpu != NR_CPUS)
539 return cpu_physical_id(numa_cpu);
540 }
541skip_numa_setup:
542#endif
543 /*
544 * Otherwise, round-robin interrupt vectors across all the
545 * processors. (It'd be nice if we could be smarter in the
546 * case of NUMA.)
547 */
548 do {
549 if (++cpu >= NR_CPUS)
550 cpu = 0;
551 } while (!cpu_online(cpu));
552
553 return cpu_physical_id(cpu);
554#else
555 return cpu_physical_id(smp_processor_id());
556#endif
557}
558
559/*
560 * ACPI can describe IOSAPIC interrupts via static tables and namespace
561 * methods. This provides an interface to register those interrupts and
562 * program the IOSAPIC RTE.
563 */
564int
565iosapic_register_intr (unsigned int gsi,
566 unsigned long polarity, unsigned long trigger)
567{
568 int vector;
569 unsigned int dest;
570 unsigned long flags;
571
572 /*
573 * If this GSI has already been registered (i.e., it's a
574 * shared interrupt, or we lost a race to register it),
575 * don't touch the RTE.
576 */
577 spin_lock_irqsave(&iosapic_lock, flags);
578 {
579 vector = gsi_to_vector(gsi);
580 if (vector > 0) {
581 iosapic_intr_info[vector].refcnt++;
582 spin_unlock_irqrestore(&iosapic_lock, flags);
583 return vector;
584 }
585
586 vector = assign_irq_vector(AUTO_ASSIGN);
587 dest = get_target_cpu(gsi, vector);
588 register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY,
589 polarity, trigger);
590
591 set_rte(vector, dest, 1);
592 }
593 spin_unlock_irqrestore(&iosapic_lock, flags);
594
595 printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
596 gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
597 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
598 cpu_logical_id(dest), dest, vector);
599
600 return vector;
601}
602
603#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
604void
605iosapic_unregister_intr (unsigned int gsi)
606{
607 unsigned long flags;
608 int irq, vector;
609 irq_desc_t *idesc;
610 int rte_index;
611 unsigned long trigger, polarity;
612
613 /*
614 * If the irq associated with the gsi is not found,
615 * iosapic_unregister_intr() is unbalanced. We need to check
616 * this again after getting locks.
617 */
618 irq = gsi_to_irq(gsi);
619 if (irq < 0) {
620 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
621 WARN_ON(1);
622 return;
623 }
624 vector = irq_to_vector(irq);
625
626 idesc = irq_descp(irq);
627 spin_lock_irqsave(&idesc->lock, flags);
628 spin_lock(&iosapic_lock);
629 {
630 rte_index = iosapic_intr_info[vector].rte_index;
631 if (rte_index < 0) {
632 spin_unlock(&iosapic_lock);
633 spin_unlock_irqrestore(&idesc->lock, flags);
634 printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi);
635 WARN_ON(1);
636 return;
637 }
638
639 if (--iosapic_intr_info[vector].refcnt > 0) {
640 spin_unlock(&iosapic_lock);
641 spin_unlock_irqrestore(&idesc->lock, flags);
642 return;
643 }
644
645 /*
646 * If interrupt handlers still exist on the irq
647 * associated with the gsi, don't unregister the
648 * interrupt.
649 */
650 if (idesc->action) {
651 iosapic_intr_info[vector].refcnt++;
652 spin_unlock(&iosapic_lock);
653 spin_unlock_irqrestore(&idesc->lock, flags);
654 printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq);
655 return;
656 }
657
658 /* Clear the interrupt controller descriptor. */
659 idesc->handler = &no_irq_type;
660
661 trigger = iosapic_intr_info[vector].trigger;
662 polarity = iosapic_intr_info[vector].polarity;
663
664 /* Clear the interrupt information. */
665 memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info));
666 iosapic_intr_info[vector].rte_index = -1; /* mark as unused */
667 }
668 spin_unlock(&iosapic_lock);
669 spin_unlock_irqrestore(&idesc->lock, flags);
670
671 /* Free the interrupt vector */
672 free_irq_vector(vector);
673
674 printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n",
675 gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
676 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
677 vector);
678}
679#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
680
681/*
682 * ACPI calls this when it finds an entry for a platform interrupt.
683 * Note that the irq_base and IOSAPIC address must be set in iosapic_init().
684 */
685int __init
686iosapic_register_platform_intr (u32 int_type, unsigned int gsi,
687 int iosapic_vector, u16 eid, u16 id,
688 unsigned long polarity, unsigned long trigger)
689{
690 static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"};
691 unsigned char delivery;
692 int vector, mask = 0;
693 unsigned int dest = ((id << 8) | eid) & 0xffff;
694
695 switch (int_type) {
696 case ACPI_INTERRUPT_PMI:
697 vector = iosapic_vector;
698 /*
699 * since PMI vector is alloc'd by FW(ACPI) not by kernel,
700 * we need to make sure the vector is available
701 */
702 iosapic_reassign_vector(vector);
703 delivery = IOSAPIC_PMI;
704 break;
705 case ACPI_INTERRUPT_INIT:
706 vector = assign_irq_vector(AUTO_ASSIGN);
707 delivery = IOSAPIC_INIT;
708 break;
709 case ACPI_INTERRUPT_CPEI:
710 vector = IA64_CPE_VECTOR;
711 delivery = IOSAPIC_LOWEST_PRIORITY;
712 mask = 1;
713 break;
714 default:
715 printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type);
716 return -1;
717 }
718
719 register_intr(gsi, vector, delivery, polarity, trigger);
720
721 printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n",
722 int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown",
723 int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"),
724 (polarity == IOSAPIC_POL_HIGH ? "high" : "low"),
725 cpu_logical_id(dest), dest, vector);
726
727 set_rte(vector, dest, mask);
728 return vector;
729}
730
731
732/*
733 * ACPI calls this when it finds an entry for a legacy ISA IRQ override.
734 * Note that the gsi_base and IOSAPIC address must be set in iosapic_init().
735 */
736void __init
737iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi,
738 unsigned long polarity,
739 unsigned long trigger)
740{
741 int vector;
742 unsigned int dest = cpu_physical_id(smp_processor_id());
743
744 vector = isa_irq_to_vector(isa_irq);
745
746 register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger);
747
748 DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n",
749 isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level",
750 polarity == IOSAPIC_POL_HIGH ? "high" : "low",
751 cpu_logical_id(dest), dest, vector);
752
753 set_rte(vector, dest, 1);
754}
755
756void __init
757iosapic_system_init (int system_pcat_compat)
758{
759 int vector;
760
761 for (vector = 0; vector < IA64_NUM_VECTORS; ++vector)
762 iosapic_intr_info[vector].rte_index = -1; /* mark as unused */
763
764 pcat_compat = system_pcat_compat;
765 if (pcat_compat) {
766 /*
767 * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support
768 * enabled.
769 */
770 printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__);
771 outb(0xff, 0xA1);
772 outb(0xff, 0x21);
773 }
774}
775
776void __init
777iosapic_init (unsigned long phys_addr, unsigned int gsi_base)
778{
779 int num_rte;
780 unsigned int isa_irq, ver;
781 char __iomem *addr;
782
783 addr = ioremap(phys_addr, 0);
784 ver = iosapic_version(addr);
785
786 /*
787 * The MAX_REDIR register holds the highest input pin
788 * number (starting from 0).
789 * We add 1 so that we can use it for number of pins (= RTEs)
790 */
791 num_rte = ((ver >> 16) & 0xff) + 1;
792
793 iosapic_lists[num_iosapic].addr = addr;
794 iosapic_lists[num_iosapic].gsi_base = gsi_base;
795 iosapic_lists[num_iosapic].num_rte = num_rte;
796#ifdef CONFIG_NUMA
797 iosapic_lists[num_iosapic].node = MAX_NUMNODES;
798#endif
799 num_iosapic++;
800
801 if ((gsi_base == 0) && pcat_compat) {
802 /*
803 * Map the legacy ISA devices into the IOSAPIC data. Some of these may
804 * get reprogrammed later on with data from the ACPI Interrupt Source
805 * Override table.
806 */
807 for (isa_irq = 0; isa_irq < 16; ++isa_irq)
808 iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE);
809 }
810}
811
812#ifdef CONFIG_NUMA
813void __init
814map_iosapic_to_node(unsigned int gsi_base, int node)
815{
816 int index;
817
818 index = find_iosapic(gsi_base);
819 if (index < 0) {
820 printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n",
821 __FUNCTION__, gsi_base);
822 return;
823 }
824 iosapic_lists[index].node = node;
825 return;
826}
827#endif
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c
new file mode 100644
index 000000000000..28f2aadc38d0
--- /dev/null
+++ b/arch/ia64/kernel/irq.c
@@ -0,0 +1,238 @@
1/*
2 * linux/arch/ia64/kernel/irq.c
3 *
4 * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
5 *
6 * This file contains the code used by various IRQ handling routines:
7 * asking for different IRQ's should be done through these routines
8 * instead of just grabbing them. Thus setups with different IRQ numbers
9 * shouldn't result in any weird surprises, and installing new handlers
10 * should be easier.
11 *
12 * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004
13 *
14 * 4/14/2004: Added code to handle cpu migration and do safe irq
15 * migration without lossing interrupts for iosapic
16 * architecture.
17 */
18
19#include <asm/delay.h>
20#include <asm/uaccess.h>
21#include <linux/module.h>
22#include <linux/seq_file.h>
23#include <linux/interrupt.h>
24#include <linux/kernel_stat.h>
25
26/*
27 * 'what should we do if we get a hw irq event on an illegal vector'.
28 * each architecture has to answer this themselves.
29 */
30void ack_bad_irq(unsigned int irq)
31{
32 printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id());
33}
34
35#ifdef CONFIG_IA64_GENERIC
36unsigned int __ia64_local_vector_to_irq (ia64_vector vec)
37{
38 return (unsigned int) vec;
39}
40#endif
41
42/*
43 * Interrupt statistics:
44 */
45
46atomic_t irq_err_count;
47
48/*
49 * /proc/interrupts printing:
50 */
51
52int show_interrupts(struct seq_file *p, void *v)
53{
54 int i = *(loff_t *) v, j;
55 struct irqaction * action;
56 unsigned long flags;
57
58 if (i == 0) {
59 seq_printf(p, " ");
60 for (j=0; j<NR_CPUS; j++)
61 if (cpu_online(j))
62 seq_printf(p, "CPU%d ",j);
63 seq_putc(p, '\n');
64 }
65
66 if (i < NR_IRQS) {
67 spin_lock_irqsave(&irq_desc[i].lock, flags);
68 action = irq_desc[i].action;
69 if (!action)
70 goto skip;
71 seq_printf(p, "%3d: ",i);
72#ifndef CONFIG_SMP
73 seq_printf(p, "%10u ", kstat_irqs(i));
74#else
75 for (j = 0; j < NR_CPUS; j++)
76 if (cpu_online(j))
77 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
78#endif
79 seq_printf(p, " %14s", irq_desc[i].handler->typename);
80 seq_printf(p, " %s", action->name);
81
82 for (action=action->next; action; action = action->next)
83 seq_printf(p, ", %s", action->name);
84
85 seq_putc(p, '\n');
86skip:
87 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
88 } else if (i == NR_IRQS)
89 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
90 return 0;
91}
92
93#ifdef CONFIG_SMP
94/*
95 * This is updated when the user sets irq affinity via /proc
96 */
97static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS];
98static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)];
99
100static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 };
101
102/*
103 * Arch specific routine for deferred write to iosapic rte to reprogram
104 * intr destination.
105 */
106void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val)
107{
108 pending_irq_cpumask[irq] = mask_val;
109}
110
111void set_irq_affinity_info (unsigned int irq, int hwid, int redir)
112{
113 cpumask_t mask = CPU_MASK_NONE;
114
115 cpu_set(cpu_logical_id(hwid), mask);
116
117 if (irq < NR_IRQS) {
118 irq_affinity[irq] = mask;
119 irq_redir[irq] = (char) (redir & 0xff);
120 }
121}
122
123
124void move_irq(int irq)
125{
126 /* note - we hold desc->lock */
127 cpumask_t tmp;
128 irq_desc_t *desc = irq_descp(irq);
129 int redir = test_bit(irq, pending_irq_redir);
130
131 if (unlikely(!desc->handler->set_affinity))
132 return;
133
134 if (!cpus_empty(pending_irq_cpumask[irq])) {
135 cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map);
136 if (unlikely(!cpus_empty(tmp))) {
137 desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0),
138 pending_irq_cpumask[irq]);
139 }
140 cpus_clear(pending_irq_cpumask[irq]);
141 }
142}
143
144
145#endif /* CONFIG_SMP */
146
147#ifdef CONFIG_HOTPLUG_CPU
148unsigned int vectors_in_migration[NR_IRQS];
149
150/*
151 * Since cpu_online_map is already updated, we just need to check for
152 * affinity that has zeros
153 */
154static void migrate_irqs(void)
155{
156 cpumask_t mask;
157 irq_desc_t *desc;
158 int irq, new_cpu;
159
160 for (irq=0; irq < NR_IRQS; irq++) {
161 desc = irq_descp(irq);
162
163 /*
164 * No handling for now.
165 * TBD: Implement a disable function so we can now
166 * tell CPU not to respond to these local intr sources.
167 * such as ITV,CPEI,MCA etc.
168 */
169 if (desc->status == IRQ_PER_CPU)
170 continue;
171
172 cpus_and(mask, irq_affinity[irq], cpu_online_map);
173 if (any_online_cpu(mask) == NR_CPUS) {
174 /*
175 * Save it for phase 2 processing
176 */
177 vectors_in_migration[irq] = irq;
178
179 new_cpu = any_online_cpu(cpu_online_map);
180 mask = cpumask_of_cpu(new_cpu);
181
182 /*
183 * Al three are essential, currently WARN_ON.. maybe panic?
184 */
185 if (desc->handler && desc->handler->disable &&
186 desc->handler->enable && desc->handler->set_affinity) {
187 desc->handler->disable(irq);
188 desc->handler->set_affinity(irq, mask);
189 desc->handler->enable(irq);
190 } else {
191 WARN_ON((!(desc->handler) || !(desc->handler->disable) ||
192 !(desc->handler->enable) ||
193 !(desc->handler->set_affinity)));
194 }
195 }
196 }
197}
198
199void fixup_irqs(void)
200{
201 unsigned int irq;
202 extern void ia64_process_pending_intr(void);
203
204 ia64_set_itv(1<<16);
205 /*
206 * Phase 1: Locate irq's bound to this cpu and
207 * relocate them for cpu removal.
208 */
209 migrate_irqs();
210
211 /*
212 * Phase 2: Perform interrupt processing for all entries reported in
213 * local APIC.
214 */
215 ia64_process_pending_intr();
216
217 /*
218 * Phase 3: Now handle any interrupts not captured in local APIC.
219 * This is to account for cases that device interrupted during the time the
220 * rte was being disabled and re-programmed.
221 */
222 for (irq=0; irq < NR_IRQS; irq++) {
223 if (vectors_in_migration[irq]) {
224 vectors_in_migration[irq]=0;
225 __do_IRQ(irq, NULL);
226 }
227 }
228
229 /*
230 * Now let processor die. We do irq disable and max_xtp() to
231 * ensure there is no more interrupts routed to this processor.
232 * But the local timer interrupt can have 1 pending which we
233 * take care in timer_interrupt().
234 */
235 max_xtp();
236 local_irq_disable();
237}
238#endif
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c
new file mode 100644
index 000000000000..5ba06ebe355b
--- /dev/null
+++ b/arch/ia64/kernel/irq_ia64.c
@@ -0,0 +1,278 @@
1/*
2 * linux/arch/ia64/kernel/irq.c
3 *
4 * Copyright (C) 1998-2001 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 *
8 * 6/10/99: Updated to bring in sync with x86 version to facilitate
9 * support for SMP and different interrupt controllers.
10 *
11 * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector
12 * PCI to vector allocation routine.
13 * 04/14/2004 Ashok Raj <ashok.raj@intel.com>
14 * Added CPU Hotplug handling for IPF.
15 */
16
17#include <linux/config.h>
18#include <linux/module.h>
19
20#include <linux/jiffies.h>
21#include <linux/errno.h>
22#include <linux/init.h>
23#include <linux/interrupt.h>
24#include <linux/ioport.h>
25#include <linux/kernel_stat.h>
26#include <linux/slab.h>
27#include <linux/ptrace.h>
28#include <linux/random.h> /* for rand_initialize_irq() */
29#include <linux/signal.h>
30#include <linux/smp.h>
31#include <linux/smp_lock.h>
32#include <linux/threads.h>
33#include <linux/bitops.h>
34
35#include <asm/delay.h>
36#include <asm/intrinsics.h>
37#include <asm/io.h>
38#include <asm/hw_irq.h>
39#include <asm/machvec.h>
40#include <asm/pgtable.h>
41#include <asm/system.h>
42
43#ifdef CONFIG_PERFMON
44# include <asm/perfmon.h>
45#endif
46
47#define IRQ_DEBUG 0
48
49/* default base addr of IPI table */
50void __iomem *ipi_base_addr = ((void __iomem *)
51 (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR));
52
53/*
54 * Legacy IRQ to IA-64 vector translation table.
55 */
56__u8 isa_irq_to_vector_map[16] = {
57 /* 8259 IRQ translation, first 16 entries */
58 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29,
59 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21
60};
61EXPORT_SYMBOL(isa_irq_to_vector_map);
62
63static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)];
64
65int
66assign_irq_vector (int irq)
67{
68 int pos, vector;
69 again:
70 pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS);
71 vector = IA64_FIRST_DEVICE_VECTOR + pos;
72 if (vector > IA64_LAST_DEVICE_VECTOR)
73 /* XXX could look for sharable vectors instead of panic'ing... */
74 panic("assign_irq_vector: out of interrupt vectors!");
75 if (test_and_set_bit(pos, ia64_vector_mask))
76 goto again;
77 return vector;
78}
79
80void
81free_irq_vector (int vector)
82{
83 int pos;
84
85 if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR)
86 return;
87
88 pos = vector - IA64_FIRST_DEVICE_VECTOR;
89 if (!test_and_clear_bit(pos, ia64_vector_mask))
90 printk(KERN_WARNING "%s: double free!\n", __FUNCTION__);
91}
92
93#ifdef CONFIG_SMP
94# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE)
95#else
96# define IS_RESCHEDULE(vec) (0)
97#endif
98/*
99 * That's where the IVT branches when we get an external
100 * interrupt. This branches to the correct hardware IRQ handler via
101 * function ptr.
102 */
103void
104ia64_handle_irq (ia64_vector vector, struct pt_regs *regs)
105{
106 unsigned long saved_tpr;
107
108#if IRQ_DEBUG
109 {
110 unsigned long bsp, sp;
111
112 /*
113 * Note: if the interrupt happened while executing in
114 * the context switch routine (ia64_switch_to), we may
115 * get a spurious stack overflow here. This is
116 * because the register and the memory stack are not
117 * switched atomically.
118 */
119 bsp = ia64_getreg(_IA64_REG_AR_BSP);
120 sp = ia64_getreg(_IA64_REG_SP);
121
122 if ((sp - bsp) < 1024) {
123 static unsigned char count;
124 static long last_time;
125
126 if (jiffies - last_time > 5*HZ)
127 count = 0;
128 if (++count < 5) {
129 last_time = jiffies;
130 printk("ia64_handle_irq: DANGER: less than "
131 "1KB of free stack space!!\n"
132 "(bsp=0x%lx, sp=%lx)\n", bsp, sp);
133 }
134 }
135 }
136#endif /* IRQ_DEBUG */
137
138 /*
139 * Always set TPR to limit maximum interrupt nesting depth to
140 * 16 (without this, it would be ~240, which could easily lead
141 * to kernel stack overflows).
142 */
143 irq_enter();
144 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
145 ia64_srlz_d();
146 while (vector != IA64_SPURIOUS_INT_VECTOR) {
147 if (!IS_RESCHEDULE(vector)) {
148 ia64_setreg(_IA64_REG_CR_TPR, vector);
149 ia64_srlz_d();
150
151 __do_IRQ(local_vector_to_irq(vector), regs);
152
153 /*
154 * Disable interrupts and send EOI:
155 */
156 local_irq_disable();
157 ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
158 }
159 ia64_eoi();
160 vector = ia64_get_ivr();
161 }
162 /*
163 * This must be done *after* the ia64_eoi(). For example, the keyboard softirq
164 * handler needs to be able to wait for further keyboard interrupts, which can't
165 * come through until ia64_eoi() has been done.
166 */
167 irq_exit();
168}
169
170#ifdef CONFIG_HOTPLUG_CPU
171/*
172 * This function emulates a interrupt processing when a cpu is about to be
173 * brought down.
174 */
175void ia64_process_pending_intr(void)
176{
177 ia64_vector vector;
178 unsigned long saved_tpr;
179 extern unsigned int vectors_in_migration[NR_IRQS];
180
181 vector = ia64_get_ivr();
182
183 irq_enter();
184 saved_tpr = ia64_getreg(_IA64_REG_CR_TPR);
185 ia64_srlz_d();
186
187 /*
188 * Perform normal interrupt style processing
189 */
190 while (vector != IA64_SPURIOUS_INT_VECTOR) {
191 if (!IS_RESCHEDULE(vector)) {
192 ia64_setreg(_IA64_REG_CR_TPR, vector);
193 ia64_srlz_d();
194
195 /*
196 * Now try calling normal ia64_handle_irq as it would have got called
197 * from a real intr handler. Try passing null for pt_regs, hopefully
198 * it will work. I hope it works!.
199 * Probably could shared code.
200 */
201 vectors_in_migration[local_vector_to_irq(vector)]=0;
202 __do_IRQ(local_vector_to_irq(vector), NULL);
203
204 /*
205 * Disable interrupts and send EOI
206 */
207 local_irq_disable();
208 ia64_setreg(_IA64_REG_CR_TPR, saved_tpr);
209 }
210 ia64_eoi();
211 vector = ia64_get_ivr();
212 }
213 irq_exit();
214}
215#endif
216
217
218#ifdef CONFIG_SMP
219extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs);
220
221static struct irqaction ipi_irqaction = {
222 .handler = handle_IPI,
223 .flags = SA_INTERRUPT,
224 .name = "IPI"
225};
226#endif
227
228void
229register_percpu_irq (ia64_vector vec, struct irqaction *action)
230{
231 irq_desc_t *desc;
232 unsigned int irq;
233
234 for (irq = 0; irq < NR_IRQS; ++irq)
235 if (irq_to_vector(irq) == vec) {
236 desc = irq_descp(irq);
237 desc->status |= IRQ_PER_CPU;
238 desc->handler = &irq_type_ia64_lsapic;
239 if (action)
240 setup_irq(irq, action);
241 }
242}
243
244void __init
245init_IRQ (void)
246{
247 register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL);
248#ifdef CONFIG_SMP
249 register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction);
250#endif
251#ifdef CONFIG_PERFMON
252 pfm_init_percpu();
253#endif
254 platform_irq_init();
255}
256
257void
258ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect)
259{
260 void __iomem *ipi_addr;
261 unsigned long ipi_data;
262 unsigned long phys_cpu_id;
263
264#ifdef CONFIG_SMP
265 phys_cpu_id = cpu_physical_id(cpu);
266#else
267 phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff;
268#endif
269
270 /*
271 * cpu number is in 8bit ID and 8bit EID
272 */
273
274 ipi_data = (delivery_mode << 8) | (vector & 0xff);
275 ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3));
276
277 writeq(ipi_data, ipi_addr);
278}
diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c
new file mode 100644
index 000000000000..ea14e6a04409
--- /dev/null
+++ b/arch/ia64/kernel/irq_lsapic.c
@@ -0,0 +1,37 @@
1/*
2 * LSAPIC Interrupt Controller
3 *
4 * This takes care of interrupts that are generated by the CPU's
5 * internal Streamlined Advanced Programmable Interrupt Controller
6 * (LSAPIC), such as the ITC and IPI interrupts.
7 *
8 * Copyright (C) 1999 VA Linux Systems
9 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
10 * Copyright (C) 2000 Hewlett-Packard Co
11 * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
12 */
13
14#include <linux/sched.h>
15#include <linux/irq.h>
16
17static unsigned int
18lsapic_noop_startup (unsigned int irq)
19{
20 return 0;
21}
22
23static void
24lsapic_noop (unsigned int irq)
25{
26 /* nuthing to do... */
27}
28
29struct hw_interrupt_type irq_type_ia64_lsapic = {
30 .typename = "LSAPIC",
31 .startup = lsapic_noop_startup,
32 .shutdown = lsapic_noop,
33 .enable = lsapic_noop,
34 .disable = lsapic_noop,
35 .ack = lsapic_noop,
36 .end = lsapic_noop
37};
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S
new file mode 100644
index 000000000000..d9c05d53435b
--- /dev/null
+++ b/arch/ia64/kernel/ivt.S
@@ -0,0 +1,1619 @@
1/*
2 * arch/ia64/kernel/ivt.S
3 *
4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger <davidm@hpl.hp.com>
7 * Copyright (C) 2000, 2002-2003 Intel Co
8 * Asit Mallick <asit.k.mallick@intel.com>
9 * Suresh Siddha <suresh.b.siddha@intel.com>
10 * Kenneth Chen <kenneth.w.chen@intel.com>
11 * Fenghua Yu <fenghua.yu@intel.com>
12 *
13 * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP
14 * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT.
15 */
16/*
17 * This file defines the interruption vector table used by the CPU.
18 * It does not include one entry per possible cause of interruption.
19 *
20 * The first 20 entries of the table contain 64 bundles each while the
21 * remaining 48 entries contain only 16 bundles each.
22 *
23 * The 64 bundles are used to allow inlining the whole handler for critical
24 * interruptions like TLB misses.
25 *
26 * For each entry, the comment is as follows:
27 *
28 * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
29 * entry offset ----/ / / / /
30 * entry number ---------/ / / /
31 * size of the entry -------------/ / /
32 * vector name -------------------------------------/ /
33 * interruptions triggering this vector ----------------------/
34 *
35 * The table is 32KB in size and must be aligned on 32KB boundary.
36 * (The CPU ignores the 15 lower bits of the address)
37 *
38 * Table is based upon EAS2.6 (Oct 1999)
39 */
40
41#include <linux/config.h>
42
43#include <asm/asmmacro.h>
44#include <asm/break.h>
45#include <asm/ia32.h>
46#include <asm/kregs.h>
47#include <asm/offsets.h>
48#include <asm/pgtable.h>
49#include <asm/processor.h>
50#include <asm/ptrace.h>
51#include <asm/system.h>
52#include <asm/thread_info.h>
53#include <asm/unistd.h>
54#include <asm/errno.h>
55
56#if 1
57# define PSR_DEFAULT_BITS psr.ac
58#else
59# define PSR_DEFAULT_BITS 0
60#endif
61
62#if 0
63 /*
64 * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't
65 * needed for something else before enabling this...
66 */
67# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16
68#else
69# define DBG_FAULT(i)
70#endif
71
72#define MINSTATE_VIRT /* needed by minstate.h */
73#include "minstate.h"
74
75#define FAULT(n) \
76 mov r31=pr; \
77 mov r19=n;; /* prepare to save predicates */ \
78 br.sptk.many dispatch_to_fault_handler
79
80 .section .text.ivt,"ax"
81
82 .align 32768 // align on 32KB boundary
83 .global ia64_ivt
84ia64_ivt:
85/////////////////////////////////////////////////////////////////////////////////////////
86// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
87ENTRY(vhpt_miss)
88 DBG_FAULT(0)
89 /*
90 * The VHPT vector is invoked when the TLB entry for the virtual page table
91 * is missing. This happens only as a result of a previous
92 * (the "original") TLB miss, which may either be caused by an instruction
93 * fetch or a data access (or non-access).
94 *
95 * What we do here is normal TLB miss handing for the _original_ miss, followed
96 * by inserting the TLB entry for the virtual page table page that the VHPT
97 * walker was attempting to access. The latter gets inserted as long
98 * as both L1 and L2 have valid mappings for the faulting address.
99 * The TLB entry for the original miss gets inserted only if
100 * the L3 entry indicates that the page is present.
101 *
102 * do_page_fault gets invoked in the following cases:
103 * - the faulting virtual address uses unimplemented address bits
104 * - the faulting virtual address has no L1, L2, or L3 mapping
105 */
106 mov r16=cr.ifa // get address that caused the TLB miss
107#ifdef CONFIG_HUGETLB_PAGE
108 movl r18=PAGE_SHIFT
109 mov r25=cr.itir
110#endif
111 ;;
112 rsm psr.dt // use physical addressing for data
113 mov r31=pr // save the predicate registers
114 mov r19=IA64_KR(PT_BASE) // get page table base address
115 shl r21=r16,3 // shift bit 60 into sign bit
116 shr.u r17=r16,61 // get the region number into r17
117 ;;
118 shr r22=r21,3
119#ifdef CONFIG_HUGETLB_PAGE
120 extr.u r26=r25,2,6
121 ;;
122 cmp.ne p8,p0=r18,r26
123 sub r27=r26,r18
124 ;;
125(p8) dep r25=r18,r25,2,6
126(p8) shr r22=r22,r27
127#endif
128 ;;
129 cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5?
130 shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address
131 ;;
132(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
133
134 srlz.d
135 LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
136
137 .pred.rel "mutex", p6, p7
138(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
139(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
140 ;;
141(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
142(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
143 cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
144 shr.u r18=r22,PMD_SHIFT // shift L2 index into position
145 ;;
146 ld8 r17=[r17] // fetch the L1 entry (may be 0)
147 ;;
148(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
149 dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
150 ;;
151(p7) ld8 r20=[r17] // fetch the L2 entry (may be 0)
152 shr.u r19=r22,PAGE_SHIFT // shift L3 index into position
153 ;;
154(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL?
155 dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
156 ;;
157(p7) ld8 r18=[r21] // read the L3 PTE
158 mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss
159 ;;
160(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared?
161 mov r22=cr.iha // get the VHPT address that caused the TLB miss
162 ;; // avoid RAW on p7
163(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss?
164 dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address
165 ;;
166(p10) itc.i r18 // insert the instruction TLB entry
167(p11) itc.d r18 // insert the data TLB entry
168(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault)
169 mov cr.ifa=r22
170
171#ifdef CONFIG_HUGETLB_PAGE
172(p8) mov cr.itir=r25 // change to default page-size for VHPT
173#endif
174
175 /*
176 * Now compute and insert the TLB entry for the virtual page table. We never
177 * execute in a page table page so there is no need to set the exception deferral
178 * bit.
179 */
180 adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23
181 ;;
182(p7) itc.d r24
183 ;;
184#ifdef CONFIG_SMP
185 /*
186 * Tell the assemblers dependency-violation checker that the above "itc" instructions
187 * cannot possibly affect the following loads:
188 */
189 dv_serialize_data
190
191 /*
192 * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g
193 * between reading the pagetable and the "itc". If so, flush the entry we
194 * inserted and retry.
195 */
196 ld8 r25=[r21] // read L3 PTE again
197 ld8 r26=[r17] // read L2 entry again
198 ;;
199 cmp.ne p6,p7=r26,r20 // did L2 entry change
200 mov r27=PAGE_SHIFT<<2
201 ;;
202(p6) ptc.l r22,r27 // purge PTE page translation
203(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change
204 ;;
205(p6) ptc.l r16,r27 // purge translation
206#endif
207
208 mov pr=r31,-1 // restore predicate registers
209 rfi
210END(vhpt_miss)
211
212 .org ia64_ivt+0x400
213/////////////////////////////////////////////////////////////////////////////////////////
214// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
215ENTRY(itlb_miss)
216 DBG_FAULT(1)
217 /*
218 * The ITLB handler accesses the L3 PTE via the virtually mapped linear
219 * page table. If a nested TLB miss occurs, we switch into physical
220 * mode, walk the page table, and then re-execute the L3 PTE read
221 * and go on normally after that.
222 */
223 mov r16=cr.ifa // get virtual address
224 mov r29=b0 // save b0
225 mov r31=pr // save predicates
226.itlb_fault:
227 mov r17=cr.iha // get virtual address of L3 PTE
228 movl r30=1f // load nested fault continuation point
229 ;;
2301: ld8 r18=[r17] // read L3 PTE
231 ;;
232 mov b0=r29
233 tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
234(p6) br.cond.spnt page_fault
235 ;;
236 itc.i r18
237 ;;
238#ifdef CONFIG_SMP
239 /*
240 * Tell the assemblers dependency-violation checker that the above "itc" instructions
241 * cannot possibly affect the following loads:
242 */
243 dv_serialize_data
244
245 ld8 r19=[r17] // read L3 PTE again and see if same
246 mov r20=PAGE_SHIFT<<2 // setup page size for purge
247 ;;
248 cmp.ne p7,p0=r18,r19
249 ;;
250(p7) ptc.l r16,r20
251#endif
252 mov pr=r31,-1
253 rfi
254END(itlb_miss)
255
256 .org ia64_ivt+0x0800
257/////////////////////////////////////////////////////////////////////////////////////////
258// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
259ENTRY(dtlb_miss)
260 DBG_FAULT(2)
261 /*
262 * The DTLB handler accesses the L3 PTE via the virtually mapped linear
263 * page table. If a nested TLB miss occurs, we switch into physical
264 * mode, walk the page table, and then re-execute the L3 PTE read
265 * and go on normally after that.
266 */
267 mov r16=cr.ifa // get virtual address
268 mov r29=b0 // save b0
269 mov r31=pr // save predicates
270dtlb_fault:
271 mov r17=cr.iha // get virtual address of L3 PTE
272 movl r30=1f // load nested fault continuation point
273 ;;
2741: ld8 r18=[r17] // read L3 PTE
275 ;;
276 mov b0=r29
277 tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared?
278(p6) br.cond.spnt page_fault
279 ;;
280 itc.d r18
281 ;;
282#ifdef CONFIG_SMP
283 /*
284 * Tell the assemblers dependency-violation checker that the above "itc" instructions
285 * cannot possibly affect the following loads:
286 */
287 dv_serialize_data
288
289 ld8 r19=[r17] // read L3 PTE again and see if same
290 mov r20=PAGE_SHIFT<<2 // setup page size for purge
291 ;;
292 cmp.ne p7,p0=r18,r19
293 ;;
294(p7) ptc.l r16,r20
295#endif
296 mov pr=r31,-1
297 rfi
298END(dtlb_miss)
299
300 .org ia64_ivt+0x0c00
301/////////////////////////////////////////////////////////////////////////////////////////
302// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
303ENTRY(alt_itlb_miss)
304 DBG_FAULT(3)
305 mov r16=cr.ifa // get address that caused the TLB miss
306 movl r17=PAGE_KERNEL
307 mov r21=cr.ipsr
308 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
309 mov r31=pr
310 ;;
311#ifdef CONFIG_DISABLE_VHPT
312 shr.u r22=r16,61 // get the region number into r21
313 ;;
314 cmp.gt p8,p0=6,r22 // user mode
315 ;;
316(p8) thash r17=r16
317 ;;
318(p8) mov cr.iha=r17
319(p8) mov r29=b0 // save b0
320(p8) br.cond.dptk .itlb_fault
321#endif
322 extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
323 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
324 shr.u r18=r16,57 // move address bit 61 to bit 4
325 ;;
326 andcm r18=0x10,r18 // bit 4=~address-bit(61)
327 cmp.ne p8,p0=r0,r23 // psr.cpl != 0?
328 or r19=r17,r19 // insert PTE control bits into r19
329 ;;
330 or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
331(p8) br.cond.spnt page_fault
332 ;;
333 itc.i r19 // insert the TLB entry
334 mov pr=r31,-1
335 rfi
336END(alt_itlb_miss)
337
338 .org ia64_ivt+0x1000
339/////////////////////////////////////////////////////////////////////////////////////////
340// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
341ENTRY(alt_dtlb_miss)
342 DBG_FAULT(4)
343 mov r16=cr.ifa // get address that caused the TLB miss
344 movl r17=PAGE_KERNEL
345 mov r20=cr.isr
346 movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
347 mov r21=cr.ipsr
348 mov r31=pr
349 ;;
350#ifdef CONFIG_DISABLE_VHPT
351 shr.u r22=r16,61 // get the region number into r21
352 ;;
353 cmp.gt p8,p0=6,r22 // access to region 0-5
354 ;;
355(p8) thash r17=r16
356 ;;
357(p8) mov cr.iha=r17
358(p8) mov r29=b0 // save b0
359(p8) br.cond.dptk dtlb_fault
360#endif
361 extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
362 and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
363 tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
364 shr.u r18=r16,57 // move address bit 61 to bit 4
365 and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
366 tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on?
367 ;;
368 andcm r18=0x10,r18 // bit 4=~address-bit(61)
369 cmp.ne p8,p0=r0,r23
370(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field
371(p8) br.cond.spnt page_fault
372
373 dep r21=-1,r21,IA64_PSR_ED_BIT,1
374 or r19=r19,r17 // insert PTE control bits into r19
375 ;;
376 or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6
377(p6) mov cr.ipsr=r21
378 ;;
379(p7) itc.d r19 // insert the TLB entry
380 mov pr=r31,-1
381 rfi
382END(alt_dtlb_miss)
383
384 .org ia64_ivt+0x1400
385/////////////////////////////////////////////////////////////////////////////////////////
386// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
387ENTRY(nested_dtlb_miss)
388 /*
389 * In the absence of kernel bugs, we get here when the virtually mapped linear
390 * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction
391 * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page
392 * table is missing, a nested TLB miss fault is triggered and control is
393 * transferred to this point. When this happens, we lookup the pte for the
394 * faulting address by walking the page table in physical mode and return to the
395 * continuation point passed in register r30 (or call page_fault if the address is
396 * not mapped).
397 *
398 * Input: r16: faulting address
399 * r29: saved b0
400 * r30: continuation address
401 * r31: saved pr
402 *
403 * Output: r17: physical address of L3 PTE of faulting address
404 * r29: saved b0
405 * r30: continuation address
406 * r31: saved pr
407 *
408 * Clobbered: b0, r18, r19, r21, psr.dt (cleared)
409 */
410 rsm psr.dt // switch to using physical data addressing
411 mov r19=IA64_KR(PT_BASE) // get the page table base address
412 shl r21=r16,3 // shift bit 60 into sign bit
413 ;;
414 shr.u r17=r16,61 // get the region number into r17
415 ;;
416 cmp.eq p6,p7=5,r17 // is faulting address in region 5?
417 shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address
418 ;;
419(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place
420
421 srlz.d
422 LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir
423
424 .pred.rel "mutex", p6, p7
425(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT
426(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3
427 ;;
428(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8
429(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8)
430 cmp.eq p7,p6=0,r21 // unused address bits all zeroes?
431 shr.u r18=r16,PMD_SHIFT // shift L2 index into position
432 ;;
433 ld8 r17=[r17] // fetch the L1 entry (may be 0)
434 ;;
435(p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL?
436 dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry
437 ;;
438(p7) ld8 r17=[r17] // fetch the L2 entry (may be 0)
439 shr.u r19=r16,PAGE_SHIFT // shift L3 index into position
440 ;;
441(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL?
442 dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry
443(p6) br.cond.spnt page_fault
444 mov b0=r30
445 br.sptk.many b0 // return to continuation point
446END(nested_dtlb_miss)
447
448 .org ia64_ivt+0x1800
449/////////////////////////////////////////////////////////////////////////////////////////
450// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
451ENTRY(ikey_miss)
452 DBG_FAULT(6)
453 FAULT(6)
454END(ikey_miss)
455
456 //-----------------------------------------------------------------------------------
457 // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address)
458ENTRY(page_fault)
459 ssm psr.dt
460 ;;
461 srlz.i
462 ;;
463 SAVE_MIN_WITH_COVER
464 alloc r15=ar.pfs,0,0,3,0
465 mov out0=cr.ifa
466 mov out1=cr.isr
467 adds r3=8,r2 // set up second base pointer
468 ;;
469 ssm psr.ic | PSR_DEFAULT_BITS
470 ;;
471 srlz.i // guarantee that interruption collectin is on
472 ;;
473(p15) ssm psr.i // restore psr.i
474 movl r14=ia64_leave_kernel
475 ;;
476 SAVE_REST
477 mov rp=r14
478 ;;
479 adds out2=16,r12 // out2 = pointer to pt_regs
480 br.call.sptk.many b6=ia64_do_page_fault // ignore return address
481END(page_fault)
482
483 .org ia64_ivt+0x1c00
484/////////////////////////////////////////////////////////////////////////////////////////
485// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
486ENTRY(dkey_miss)
487 DBG_FAULT(7)
488 FAULT(7)
489END(dkey_miss)
490
491 .org ia64_ivt+0x2000
492/////////////////////////////////////////////////////////////////////////////////////////
493// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
494ENTRY(dirty_bit)
495 DBG_FAULT(8)
496 /*
497 * What we do here is to simply turn on the dirty bit in the PTE. We need to
498 * update both the page-table and the TLB entry. To efficiently access the PTE,
499 * we address it through the virtual page table. Most likely, the TLB entry for
500 * the relevant virtual page table page is still present in the TLB so we can
501 * normally do this without additional TLB misses. In case the necessary virtual
502 * page table TLB entry isn't present, we take a nested TLB miss hit where we look
503 * up the physical address of the L3 PTE and then continue at label 1 below.
504 */
505 mov r16=cr.ifa // get the address that caused the fault
506 movl r30=1f // load continuation point in case of nested fault
507 ;;
508 thash r17=r16 // compute virtual address of L3 PTE
509 mov r29=b0 // save b0 in case of nested fault
510 mov r31=pr // save pr
511#ifdef CONFIG_SMP
512 mov r28=ar.ccv // save ar.ccv
513 ;;
5141: ld8 r18=[r17]
515 ;; // avoid RAW on r18
516 mov ar.ccv=r18 // set compare value for cmpxchg
517 or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
518 ;;
519 cmpxchg8.acq r26=[r17],r25,ar.ccv
520 mov r24=PAGE_SHIFT<<2
521 ;;
522 cmp.eq p6,p7=r26,r18
523 ;;
524(p6) itc.d r25 // install updated PTE
525 ;;
526 /*
527 * Tell the assemblers dependency-violation checker that the above "itc" instructions
528 * cannot possibly affect the following loads:
529 */
530 dv_serialize_data
531
532 ld8 r18=[r17] // read PTE again
533 ;;
534 cmp.eq p6,p7=r18,r25 // is it same as the newly installed
535 ;;
536(p7) ptc.l r16,r24
537 mov b0=r29 // restore b0
538 mov ar.ccv=r28
539#else
540 ;;
5411: ld8 r18=[r17]
542 ;; // avoid RAW on r18
543 or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits
544 mov b0=r29 // restore b0
545 ;;
546 st8 [r17]=r18 // store back updated PTE
547 itc.d r18 // install updated PTE
548#endif
549 mov pr=r31,-1 // restore pr
550 rfi
551END(dirty_bit)
552
553 .org ia64_ivt+0x2400
554/////////////////////////////////////////////////////////////////////////////////////////
555// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
556ENTRY(iaccess_bit)
557 DBG_FAULT(9)
558 // Like Entry 8, except for instruction access
559 mov r16=cr.ifa // get the address that caused the fault
560 movl r30=1f // load continuation point in case of nested fault
561 mov r31=pr // save predicates
562#ifdef CONFIG_ITANIUM
563 /*
564 * Erratum 10 (IFA may contain incorrect address) has "NoFix" status.
565 */
566 mov r17=cr.ipsr
567 ;;
568 mov r18=cr.iip
569 tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set?
570 ;;
571(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa
572#endif /* CONFIG_ITANIUM */
573 ;;
574 thash r17=r16 // compute virtual address of L3 PTE
575 mov r29=b0 // save b0 in case of nested fault)
576#ifdef CONFIG_SMP
577 mov r28=ar.ccv // save ar.ccv
578 ;;
5791: ld8 r18=[r17]
580 ;;
581 mov ar.ccv=r18 // set compare value for cmpxchg
582 or r25=_PAGE_A,r18 // set the accessed bit
583 ;;
584 cmpxchg8.acq r26=[r17],r25,ar.ccv
585 mov r24=PAGE_SHIFT<<2
586 ;;
587 cmp.eq p6,p7=r26,r18
588 ;;
589(p6) itc.i r25 // install updated PTE
590 ;;
591 /*
592 * Tell the assemblers dependency-violation checker that the above "itc" instructions
593 * cannot possibly affect the following loads:
594 */
595 dv_serialize_data
596
597 ld8 r18=[r17] // read PTE again
598 ;;
599 cmp.eq p6,p7=r18,r25 // is it same as the newly installed
600 ;;
601(p7) ptc.l r16,r24
602 mov b0=r29 // restore b0
603 mov ar.ccv=r28
604#else /* !CONFIG_SMP */
605 ;;
6061: ld8 r18=[r17]
607 ;;
608 or r18=_PAGE_A,r18 // set the accessed bit
609 mov b0=r29 // restore b0
610 ;;
611 st8 [r17]=r18 // store back updated PTE
612 itc.i r18 // install updated PTE
613#endif /* !CONFIG_SMP */
614 mov pr=r31,-1
615 rfi
616END(iaccess_bit)
617
618 .org ia64_ivt+0x2800
619/////////////////////////////////////////////////////////////////////////////////////////
620// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
621ENTRY(daccess_bit)
622 DBG_FAULT(10)
623 // Like Entry 8, except for data access
624 mov r16=cr.ifa // get the address that caused the fault
625 movl r30=1f // load continuation point in case of nested fault
626 ;;
627 thash r17=r16 // compute virtual address of L3 PTE
628 mov r31=pr
629 mov r29=b0 // save b0 in case of nested fault)
630#ifdef CONFIG_SMP
631 mov r28=ar.ccv // save ar.ccv
632 ;;
6331: ld8 r18=[r17]
634 ;; // avoid RAW on r18
635 mov ar.ccv=r18 // set compare value for cmpxchg
636 or r25=_PAGE_A,r18 // set the dirty bit
637 ;;
638 cmpxchg8.acq r26=[r17],r25,ar.ccv
639 mov r24=PAGE_SHIFT<<2
640 ;;
641 cmp.eq p6,p7=r26,r18
642 ;;
643(p6) itc.d r25 // install updated PTE
644 /*
645 * Tell the assemblers dependency-violation checker that the above "itc" instructions
646 * cannot possibly affect the following loads:
647 */
648 dv_serialize_data
649 ;;
650 ld8 r18=[r17] // read PTE again
651 ;;
652 cmp.eq p6,p7=r18,r25 // is it same as the newly installed
653 ;;
654(p7) ptc.l r16,r24
655 mov ar.ccv=r28
656#else
657 ;;
6581: ld8 r18=[r17]
659 ;; // avoid RAW on r18
660 or r18=_PAGE_A,r18 // set the accessed bit
661 ;;
662 st8 [r17]=r18 // store back updated PTE
663 itc.d r18 // install updated PTE
664#endif
665 mov b0=r29 // restore b0
666 mov pr=r31,-1
667 rfi
668END(daccess_bit)
669
670 .org ia64_ivt+0x2c00
671/////////////////////////////////////////////////////////////////////////////////////////
672// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
673ENTRY(break_fault)
674 /*
675 * The streamlined system call entry/exit paths only save/restore the initial part
676 * of pt_regs. This implies that the callers of system-calls must adhere to the
677 * normal procedure calling conventions.
678 *
679 * Registers to be saved & restored:
680 * CR registers: cr.ipsr, cr.iip, cr.ifs
681 * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr
682 * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15
683 * Registers to be restored only:
684 * r8-r11: output value from the system call.
685 *
686 * During system call exit, scratch registers (including r15) are modified/cleared
687 * to prevent leaking bits from kernel to user level.
688 */
689 DBG_FAULT(11)
690 mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat.
691 mov r17=cr.iim
692 mov r18=__IA64_BREAK_SYSCALL
693 mov r21=ar.fpsr
694 mov r29=cr.ipsr
695 mov r19=b6
696 mov r25=ar.unat
697 mov r27=ar.rsc
698 mov r26=ar.pfs
699 mov r28=cr.iip
700 mov r31=pr // prepare to save predicates
701 mov r20=r1
702 ;;
703 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16
704 cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so)
705(p7) br.cond.spnt non_syscall
706 ;;
707 ld1 r17=[r16] // load current->thread.on_ustack flag
708 st1 [r16]=r0 // clear current->thread.on_ustack flag
709 add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT
710 ;;
711 invala
712
713 /* adjust return address so we skip over the break instruction: */
714
715 extr.u r8=r29,41,2 // extract ei field from cr.ipsr
716 ;;
717 cmp.eq p6,p7=2,r8 // isr.ei==2?
718 mov r2=r1 // setup r2 for ia64_syscall_setup
719 ;;
720(p6) mov r8=0 // clear ei to 0
721(p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped
722(p7) adds r8=1,r8 // increment ei to next slot
723 ;;
724 cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already?
725 dep r29=r8,r29,41,2 // insert new ei into cr.ipsr
726 ;;
727
728 // switch from user to kernel RBS:
729 MINSTATE_START_SAVE_MIN_VIRT
730 br.call.sptk.many b7=ia64_syscall_setup
731 ;;
732 MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1
733 ssm psr.ic | PSR_DEFAULT_BITS
734 ;;
735 srlz.i // guarantee that interruption collection is on
736 mov r3=NR_syscalls - 1
737 ;;
738(p15) ssm psr.i // restore psr.i
739 // p10==true means out registers are more than 8 or r15's Nat is true
740(p10) br.cond.spnt.many ia64_ret_from_syscall
741 ;;
742 movl r16=sys_call_table
743
744 adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024
745 movl r2=ia64_ret_from_syscall
746 ;;
747 shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024)
748 cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ?
749 mov rp=r2 // set the real return addr
750 ;;
751(p6) ld8 r20=[r20] // load address of syscall entry point
752(p7) movl r20=sys_ni_syscall
753
754 add r2=TI_FLAGS+IA64_TASK_SIZE,r13
755 ;;
756 ld4 r2=[r2] // r2 = current_thread_info()->flags
757 ;;
758 and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
759 ;;
760 cmp.eq p8,p0=r2,r0
761 mov b6=r20
762 ;;
763(p8) br.call.sptk.many b6=b6 // ignore this return addr
764 br.cond.sptk ia64_trace_syscall
765 // NOT REACHED
766END(break_fault)
767
768 .org ia64_ivt+0x3000
769/////////////////////////////////////////////////////////////////////////////////////////
770// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
771ENTRY(interrupt)
772 DBG_FAULT(12)
773 mov r31=pr // prepare to save predicates
774 ;;
775 SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3
776 ssm psr.ic | PSR_DEFAULT_BITS
777 ;;
778 adds r3=8,r2 // set up second base pointer for SAVE_REST
779 srlz.i // ensure everybody knows psr.ic is back on
780 ;;
781 SAVE_REST
782 ;;
783 alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group
784 mov out0=cr.ivr // pass cr.ivr as first arg
785 add out1=16,sp // pass pointer to pt_regs as second arg
786 ;;
787 srlz.d // make sure we see the effect of cr.ivr
788 movl r14=ia64_leave_kernel
789 ;;
790 mov rp=r14
791 br.call.sptk.many b6=ia64_handle_irq
792END(interrupt)
793
794 .org ia64_ivt+0x3400
795/////////////////////////////////////////////////////////////////////////////////////////
796// 0x3400 Entry 13 (size 64 bundles) Reserved
797 DBG_FAULT(13)
798 FAULT(13)
799
800 .org ia64_ivt+0x3800
801/////////////////////////////////////////////////////////////////////////////////////////
802// 0x3800 Entry 14 (size 64 bundles) Reserved
803 DBG_FAULT(14)
804 FAULT(14)
805
806 /*
807 * There is no particular reason for this code to be here, other than that
808 * there happens to be space here that would go unused otherwise. If this
809 * fault ever gets "unreserved", simply moved the following code to a more
810 * suitable spot...
811 *
812 * ia64_syscall_setup() is a separate subroutine so that it can
813 * allocate stacked registers so it can safely demine any
814 * potential NaT values from the input registers.
815 *
816 * On entry:
817 * - executing on bank 0 or bank 1 register set (doesn't matter)
818 * - r1: stack pointer
819 * - r2: current task pointer
820 * - r3: preserved
821 * - r11: original contents (saved ar.pfs to be saved)
822 * - r12: original contents (sp to be saved)
823 * - r13: original contents (tp to be saved)
824 * - r15: original contents (syscall # to be saved)
825 * - r18: saved bsp (after switching to kernel stack)
826 * - r19: saved b6
827 * - r20: saved r1 (gp)
828 * - r21: saved ar.fpsr
829 * - r22: kernel's register backing store base (krbs_base)
830 * - r23: saved ar.bspstore
831 * - r24: saved ar.rnat
832 * - r25: saved ar.unat
833 * - r26: saved ar.pfs
834 * - r27: saved ar.rsc
835 * - r28: saved cr.iip
836 * - r29: saved cr.ipsr
837 * - r31: saved pr
838 * - b0: original contents (to be saved)
839 * On exit:
840 * - executing on bank 1 registers
841 * - psr.ic enabled, interrupts restored
842 * - p10: TRUE if syscall is invoked with more than 8 out
843 * registers or r15's Nat is true
844 * - r1: kernel's gp
845 * - r3: preserved (same as on entry)
846 * - r8: -EINVAL if p10 is true
847 * - r12: points to kernel stack
848 * - r13: points to current task
849 * - p15: TRUE if interrupts need to be re-enabled
850 * - ar.fpsr: set to kernel settings
851 */
852GLOBAL_ENTRY(ia64_syscall_setup)
853#if PT(B6) != 0
854# error This code assumes that b6 is the first field in pt_regs.
855#endif
856 st8 [r1]=r19 // save b6
857 add r16=PT(CR_IPSR),r1 // initialize first base pointer
858 add r17=PT(R11),r1 // initialize second base pointer
859 ;;
860 alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable
861 st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr
862 tnat.nz p8,p0=in0
863
864 st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11
865 tnat.nz p9,p0=in1
866(pKStk) mov r18=r0 // make sure r18 isn't NaT
867 ;;
868
869 st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs
870 st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip
871 mov r28=b0 // save b0 (2 cyc)
872 ;;
873
874 st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat
875 dep r19=0,r19,38,26 // clear all bits but 0..37 [I0]
876(p8) mov in0=-1
877 ;;
878
879 st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs
880 extr.u r11=r19,7,7 // I0 // get sol of ar.pfs
881 and r8=0x7f,r19 // A // get sof of ar.pfs
882
883 st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc
884 tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0
885(p9) mov in1=-1
886 ;;
887
888(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8
889 tnat.nz p10,p0=in2
890 add r11=8,r11
891 ;;
892(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field
893(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field
894 tnat.nz p11,p0=in3
895 ;;
896(p10) mov in2=-1
897 tnat.nz p12,p0=in4 // [I0]
898(p11) mov in3=-1
899 ;;
900(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat
901(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore
902 shl r18=r18,16 // compute ar.rsc to be used for "loadrs"
903 ;;
904 st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates
905 st8 [r17]=r28,PT(R1)-PT(B0) // save b0
906 tnat.nz p13,p0=in5 // [I0]
907 ;;
908 st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs"
909 st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1
910(p12) mov in4=-1
911 ;;
912
913.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12
914.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13
915(p13) mov in5=-1
916 ;;
917 st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr
918 tnat.nz p14,p0=in6
919 cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8
920 ;;
921 stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error)
922(p9) tnat.nz p10,p0=r15
923 adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch)
924
925 st8.spill [r17]=r15 // save r15
926 tnat.nz p8,p0=in7
927 nop.i 0
928
929 mov r13=r2 // establish `current'
930 movl r1=__gp // establish kernel global pointer
931 ;;
932(p14) mov in6=-1
933(p8) mov in7=-1
934 nop.i 0
935
936 cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
937 movl r17=FPSR_DEFAULT
938 ;;
939 mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value
940(p10) mov r8=-EINVAL
941 br.ret.sptk.many b7
942END(ia64_syscall_setup)
943
944 .org ia64_ivt+0x3c00
945/////////////////////////////////////////////////////////////////////////////////////////
946// 0x3c00 Entry 15 (size 64 bundles) Reserved
947 DBG_FAULT(15)
948 FAULT(15)
949
950 /*
951 * Squatting in this space ...
952 *
953 * This special case dispatcher for illegal operation faults allows preserved
954 * registers to be modified through a callback function (asm only) that is handed
955 * back from the fault handler in r8. Up to three arguments can be passed to the
956 * callback function by returning an aggregate with the callback as its first
957 * element, followed by the arguments.
958 */
959ENTRY(dispatch_illegal_op_fault)
960 .prologue
961 .body
962 SAVE_MIN_WITH_COVER
963 ssm psr.ic | PSR_DEFAULT_BITS
964 ;;
965 srlz.i // guarantee that interruption collection is on
966 ;;
967(p15) ssm psr.i // restore psr.i
968 adds r3=8,r2 // set up second base pointer for SAVE_REST
969 ;;
970 alloc r14=ar.pfs,0,0,1,0 // must be first in insn group
971 mov out0=ar.ec
972 ;;
973 SAVE_REST
974 PT_REGS_UNWIND_INFO(0)
975 ;;
976 br.call.sptk.many rp=ia64_illegal_op_fault
977.ret0: ;;
978 alloc r14=ar.pfs,0,0,3,0 // must be first in insn group
979 mov out0=r9
980 mov out1=r10
981 mov out2=r11
982 movl r15=ia64_leave_kernel
983 ;;
984 mov rp=r15
985 mov b6=r8
986 ;;
987 cmp.ne p6,p0=0,r8
988(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel
989 br.sptk.many ia64_leave_kernel
990END(dispatch_illegal_op_fault)
991
992 .org ia64_ivt+0x4000
993/////////////////////////////////////////////////////////////////////////////////////////
994// 0x4000 Entry 16 (size 64 bundles) Reserved
995 DBG_FAULT(16)
996 FAULT(16)
997
998 .org ia64_ivt+0x4400
999/////////////////////////////////////////////////////////////////////////////////////////
1000// 0x4400 Entry 17 (size 64 bundles) Reserved
1001 DBG_FAULT(17)
1002 FAULT(17)
1003
1004ENTRY(non_syscall)
1005 SAVE_MIN_WITH_COVER
1006
1007 // There is no particular reason for this code to be here, other than that
1008 // there happens to be space here that would go unused otherwise. If this
1009 // fault ever gets "unreserved", simply moved the following code to a more
1010 // suitable spot...
1011
1012 alloc r14=ar.pfs,0,0,2,0
1013 mov out0=cr.iim
1014 add out1=16,sp
1015 adds r3=8,r2 // set up second base pointer for SAVE_REST
1016
1017 ssm psr.ic | PSR_DEFAULT_BITS
1018 ;;
1019 srlz.i // guarantee that interruption collection is on
1020 ;;
1021(p15) ssm psr.i // restore psr.i
1022 movl r15=ia64_leave_kernel
1023 ;;
1024 SAVE_REST
1025 mov rp=r15
1026 ;;
1027 br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr
1028END(non_syscall)
1029
1030 .org ia64_ivt+0x4800
1031/////////////////////////////////////////////////////////////////////////////////////////
1032// 0x4800 Entry 18 (size 64 bundles) Reserved
1033 DBG_FAULT(18)
1034 FAULT(18)
1035
1036 /*
1037 * There is no particular reason for this code to be here, other than that
1038 * there happens to be space here that would go unused otherwise. If this
1039 * fault ever gets "unreserved", simply moved the following code to a more
1040 * suitable spot...
1041 */
1042
1043ENTRY(dispatch_unaligned_handler)
1044 SAVE_MIN_WITH_COVER
1045 ;;
1046 alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
1047 mov out0=cr.ifa
1048 adds out1=16,sp
1049
1050 ssm psr.ic | PSR_DEFAULT_BITS
1051 ;;
1052 srlz.i // guarantee that interruption collection is on
1053 ;;
1054(p15) ssm psr.i // restore psr.i
1055 adds r3=8,r2 // set up second base pointer
1056 ;;
1057 SAVE_REST
1058 movl r14=ia64_leave_kernel
1059 ;;
1060 mov rp=r14
1061 br.sptk.many ia64_prepare_handle_unaligned
1062END(dispatch_unaligned_handler)
1063
1064 .org ia64_ivt+0x4c00
1065/////////////////////////////////////////////////////////////////////////////////////////
1066// 0x4c00 Entry 19 (size 64 bundles) Reserved
1067 DBG_FAULT(19)
1068 FAULT(19)
1069
1070 /*
1071 * There is no particular reason for this code to be here, other than that
1072 * there happens to be space here that would go unused otherwise. If this
1073 * fault ever gets "unreserved", simply moved the following code to a more
1074 * suitable spot...
1075 */
1076
1077ENTRY(dispatch_to_fault_handler)
1078 /*
1079 * Input:
1080 * psr.ic: off
1081 * r19: fault vector number (e.g., 24 for General Exception)
1082 * r31: contains saved predicates (pr)
1083 */
1084 SAVE_MIN_WITH_COVER_R19
1085 alloc r14=ar.pfs,0,0,5,0
1086 mov out0=r15
1087 mov out1=cr.isr
1088 mov out2=cr.ifa
1089 mov out3=cr.iim
1090 mov out4=cr.itir
1091 ;;
1092 ssm psr.ic | PSR_DEFAULT_BITS
1093 ;;
1094 srlz.i // guarantee that interruption collection is on
1095 ;;
1096(p15) ssm psr.i // restore psr.i
1097 adds r3=8,r2 // set up second base pointer for SAVE_REST
1098 ;;
1099 SAVE_REST
1100 movl r14=ia64_leave_kernel
1101 ;;
1102 mov rp=r14
1103 br.call.sptk.many b6=ia64_fault
1104END(dispatch_to_fault_handler)
1105
1106//
1107// --- End of long entries, Beginning of short entries
1108//
1109
1110 .org ia64_ivt+0x5000
1111/////////////////////////////////////////////////////////////////////////////////////////
1112// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49)
1113ENTRY(page_not_present)
1114 DBG_FAULT(20)
1115 mov r16=cr.ifa
1116 rsm psr.dt
1117 /*
1118 * The Linux page fault handler doesn't expect non-present pages to be in
1119 * the TLB. Flush the existing entry now, so we meet that expectation.
1120 */
1121 mov r17=PAGE_SHIFT<<2
1122 ;;
1123 ptc.l r16,r17
1124 ;;
1125 mov r31=pr
1126 srlz.d
1127 br.sptk.many page_fault
1128END(page_not_present)
1129
1130 .org ia64_ivt+0x5100
1131/////////////////////////////////////////////////////////////////////////////////////////
1132// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52)
1133ENTRY(key_permission)
1134 DBG_FAULT(21)
1135 mov r16=cr.ifa
1136 rsm psr.dt
1137 mov r31=pr
1138 ;;
1139 srlz.d
1140 br.sptk.many page_fault
1141END(key_permission)
1142
1143 .org ia64_ivt+0x5200
1144/////////////////////////////////////////////////////////////////////////////////////////
1145// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
1146ENTRY(iaccess_rights)
1147 DBG_FAULT(22)
1148 mov r16=cr.ifa
1149 rsm psr.dt
1150 mov r31=pr
1151 ;;
1152 srlz.d
1153 br.sptk.many page_fault
1154END(iaccess_rights)
1155
1156 .org ia64_ivt+0x5300
1157/////////////////////////////////////////////////////////////////////////////////////////
1158// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
1159ENTRY(daccess_rights)
1160 DBG_FAULT(23)
1161 mov r16=cr.ifa
1162 rsm psr.dt
1163 mov r31=pr
1164 ;;
1165 srlz.d
1166 br.sptk.many page_fault
1167END(daccess_rights)
1168
1169 .org ia64_ivt+0x5400
1170/////////////////////////////////////////////////////////////////////////////////////////
1171// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
1172ENTRY(general_exception)
1173 DBG_FAULT(24)
1174 mov r16=cr.isr
1175 mov r31=pr
1176 ;;
1177 cmp4.eq p6,p0=0,r16
1178(p6) br.sptk.many dispatch_illegal_op_fault
1179 ;;
1180 mov r19=24 // fault number
1181 br.sptk.many dispatch_to_fault_handler
1182END(general_exception)
1183
1184 .org ia64_ivt+0x5500
1185/////////////////////////////////////////////////////////////////////////////////////////
1186// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
1187ENTRY(disabled_fp_reg)
1188 DBG_FAULT(25)
1189 rsm psr.dfh // ensure we can access fph
1190 ;;
1191 srlz.d
1192 mov r31=pr
1193 mov r19=25
1194 br.sptk.many dispatch_to_fault_handler
1195END(disabled_fp_reg)
1196
1197 .org ia64_ivt+0x5600
1198/////////////////////////////////////////////////////////////////////////////////////////
1199// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
1200ENTRY(nat_consumption)
1201 DBG_FAULT(26)
1202 FAULT(26)
1203END(nat_consumption)
1204
1205 .org ia64_ivt+0x5700
1206/////////////////////////////////////////////////////////////////////////////////////////
1207// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
1208ENTRY(speculation_vector)
1209 DBG_FAULT(27)
1210 /*
1211 * A [f]chk.[as] instruction needs to take the branch to the recovery code but
1212 * this part of the architecture is not implemented in hardware on some CPUs, such
1213 * as Itanium. Thus, in general we need to emulate the behavior. IIM contains
1214 * the relative target (not yet sign extended). So after sign extending it we
1215 * simply add it to IIP. We also need to reset the EI field of the IPSR to zero,
1216 * i.e., the slot to restart into.
1217 *
1218 * cr.imm contains zero_ext(imm21)
1219 */
1220 mov r18=cr.iim
1221 ;;
1222 mov r17=cr.iip
1223 shl r18=r18,43 // put sign bit in position (43=64-21)
1224 ;;
1225
1226 mov r16=cr.ipsr
1227 shr r18=r18,39 // sign extend (39=43-4)
1228 ;;
1229
1230 add r17=r17,r18 // now add the offset
1231 ;;
1232 mov cr.iip=r17
1233 dep r16=0,r16,41,2 // clear EI
1234 ;;
1235
1236 mov cr.ipsr=r16
1237 ;;
1238
1239 rfi // and go back
1240END(speculation_vector)
1241
1242 .org ia64_ivt+0x5800
1243/////////////////////////////////////////////////////////////////////////////////////////
1244// 0x5800 Entry 28 (size 16 bundles) Reserved
1245 DBG_FAULT(28)
1246 FAULT(28)
1247
1248 .org ia64_ivt+0x5900
1249/////////////////////////////////////////////////////////////////////////////////////////
1250// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
1251ENTRY(debug_vector)
1252 DBG_FAULT(29)
1253 FAULT(29)
1254END(debug_vector)
1255
1256 .org ia64_ivt+0x5a00
1257/////////////////////////////////////////////////////////////////////////////////////////
1258// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
1259ENTRY(unaligned_access)
1260 DBG_FAULT(30)
1261 mov r16=cr.ipsr
1262 mov r31=pr // prepare to save predicates
1263 ;;
1264 br.sptk.many dispatch_unaligned_handler
1265END(unaligned_access)
1266
1267 .org ia64_ivt+0x5b00
1268/////////////////////////////////////////////////////////////////////////////////////////
1269// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
1270ENTRY(unsupported_data_reference)
1271 DBG_FAULT(31)
1272 FAULT(31)
1273END(unsupported_data_reference)
1274
1275 .org ia64_ivt+0x5c00
1276/////////////////////////////////////////////////////////////////////////////////////////
1277// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64)
1278ENTRY(floating_point_fault)
1279 DBG_FAULT(32)
1280 FAULT(32)
1281END(floating_point_fault)
1282
1283 .org ia64_ivt+0x5d00
1284/////////////////////////////////////////////////////////////////////////////////////////
1285// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
1286ENTRY(floating_point_trap)
1287 DBG_FAULT(33)
1288 FAULT(33)
1289END(floating_point_trap)
1290
1291 .org ia64_ivt+0x5e00
1292/////////////////////////////////////////////////////////////////////////////////////////
1293// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
1294ENTRY(lower_privilege_trap)
1295 DBG_FAULT(34)
1296 FAULT(34)
1297END(lower_privilege_trap)
1298
1299 .org ia64_ivt+0x5f00
1300/////////////////////////////////////////////////////////////////////////////////////////
1301// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
1302ENTRY(taken_branch_trap)
1303 DBG_FAULT(35)
1304 FAULT(35)
1305END(taken_branch_trap)
1306
1307 .org ia64_ivt+0x6000
1308/////////////////////////////////////////////////////////////////////////////////////////
1309// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
1310ENTRY(single_step_trap)
1311 DBG_FAULT(36)
1312 FAULT(36)
1313END(single_step_trap)
1314
1315 .org ia64_ivt+0x6100
1316/////////////////////////////////////////////////////////////////////////////////////////
1317// 0x6100 Entry 37 (size 16 bundles) Reserved
1318 DBG_FAULT(37)
1319 FAULT(37)
1320
1321 .org ia64_ivt+0x6200
1322/////////////////////////////////////////////////////////////////////////////////////////
1323// 0x6200 Entry 38 (size 16 bundles) Reserved
1324 DBG_FAULT(38)
1325 FAULT(38)
1326
1327 .org ia64_ivt+0x6300
1328/////////////////////////////////////////////////////////////////////////////////////////
1329// 0x6300 Entry 39 (size 16 bundles) Reserved
1330 DBG_FAULT(39)
1331 FAULT(39)
1332
1333 .org ia64_ivt+0x6400
1334/////////////////////////////////////////////////////////////////////////////////////////
1335// 0x6400 Entry 40 (size 16 bundles) Reserved
1336 DBG_FAULT(40)
1337 FAULT(40)
1338
1339 .org ia64_ivt+0x6500
1340/////////////////////////////////////////////////////////////////////////////////////////
1341// 0x6500 Entry 41 (size 16 bundles) Reserved
1342 DBG_FAULT(41)
1343 FAULT(41)
1344
1345 .org ia64_ivt+0x6600
1346/////////////////////////////////////////////////////////////////////////////////////////
1347// 0x6600 Entry 42 (size 16 bundles) Reserved
1348 DBG_FAULT(42)
1349 FAULT(42)
1350
1351 .org ia64_ivt+0x6700
1352/////////////////////////////////////////////////////////////////////////////////////////
1353// 0x6700 Entry 43 (size 16 bundles) Reserved
1354 DBG_FAULT(43)
1355 FAULT(43)
1356
1357 .org ia64_ivt+0x6800
1358/////////////////////////////////////////////////////////////////////////////////////////
1359// 0x6800 Entry 44 (size 16 bundles) Reserved
1360 DBG_FAULT(44)
1361 FAULT(44)
1362
1363 .org ia64_ivt+0x6900
1364/////////////////////////////////////////////////////////////////////////////////////////
1365// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
1366ENTRY(ia32_exception)
1367 DBG_FAULT(45)
1368 FAULT(45)
1369END(ia32_exception)
1370
1371 .org ia64_ivt+0x6a00
1372/////////////////////////////////////////////////////////////////////////////////////////
1373// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71)
1374ENTRY(ia32_intercept)
1375 DBG_FAULT(46)
1376#ifdef CONFIG_IA32_SUPPORT
1377 mov r31=pr
1378 mov r16=cr.isr
1379 ;;
1380 extr.u r17=r16,16,8 // get ISR.code
1381 mov r18=ar.eflag
1382 mov r19=cr.iim // old eflag value
1383 ;;
1384 cmp.ne p6,p0=2,r17
1385(p6) br.cond.spnt 1f // not a system flag fault
1386 xor r16=r18,r19
1387 ;;
1388 extr.u r17=r16,18,1 // get the eflags.ac bit
1389 ;;
1390 cmp.eq p6,p0=0,r17
1391(p6) br.cond.spnt 1f // eflags.ac bit didn't change
1392 ;;
1393 mov pr=r31,-1 // restore predicate registers
1394 rfi
1395
13961:
1397#endif // CONFIG_IA32_SUPPORT
1398 FAULT(46)
1399END(ia32_intercept)
1400
1401 .org ia64_ivt+0x6b00
1402/////////////////////////////////////////////////////////////////////////////////////////
1403// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74)
1404ENTRY(ia32_interrupt)
1405 DBG_FAULT(47)
1406#ifdef CONFIG_IA32_SUPPORT
1407 mov r31=pr
1408 br.sptk.many dispatch_to_ia32_handler
1409#else
1410 FAULT(47)
1411#endif
1412END(ia32_interrupt)
1413
1414 .org ia64_ivt+0x6c00
1415/////////////////////////////////////////////////////////////////////////////////////////
1416// 0x6c00 Entry 48 (size 16 bundles) Reserved
1417 DBG_FAULT(48)
1418 FAULT(48)
1419
1420 .org ia64_ivt+0x6d00
1421/////////////////////////////////////////////////////////////////////////////////////////
1422// 0x6d00 Entry 49 (size 16 bundles) Reserved
1423 DBG_FAULT(49)
1424 FAULT(49)
1425
1426 .org ia64_ivt+0x6e00
1427/////////////////////////////////////////////////////////////////////////////////////////
1428// 0x6e00 Entry 50 (size 16 bundles) Reserved
1429 DBG_FAULT(50)
1430 FAULT(50)
1431
1432 .org ia64_ivt+0x6f00
1433/////////////////////////////////////////////////////////////////////////////////////////
1434// 0x6f00 Entry 51 (size 16 bundles) Reserved
1435 DBG_FAULT(51)
1436 FAULT(51)
1437
1438 .org ia64_ivt+0x7000
1439/////////////////////////////////////////////////////////////////////////////////////////
1440// 0x7000 Entry 52 (size 16 bundles) Reserved
1441 DBG_FAULT(52)
1442 FAULT(52)
1443
1444 .org ia64_ivt+0x7100
1445/////////////////////////////////////////////////////////////////////////////////////////
1446// 0x7100 Entry 53 (size 16 bundles) Reserved
1447 DBG_FAULT(53)
1448 FAULT(53)
1449
1450 .org ia64_ivt+0x7200
1451/////////////////////////////////////////////////////////////////////////////////////////
1452// 0x7200 Entry 54 (size 16 bundles) Reserved
1453 DBG_FAULT(54)
1454 FAULT(54)
1455
1456 .org ia64_ivt+0x7300
1457/////////////////////////////////////////////////////////////////////////////////////////
1458// 0x7300 Entry 55 (size 16 bundles) Reserved
1459 DBG_FAULT(55)
1460 FAULT(55)
1461
1462 .org ia64_ivt+0x7400
1463/////////////////////////////////////////////////////////////////////////////////////////
1464// 0x7400 Entry 56 (size 16 bundles) Reserved
1465 DBG_FAULT(56)
1466 FAULT(56)
1467
1468 .org ia64_ivt+0x7500
1469/////////////////////////////////////////////////////////////////////////////////////////
1470// 0x7500 Entry 57 (size 16 bundles) Reserved
1471 DBG_FAULT(57)
1472 FAULT(57)
1473
1474 .org ia64_ivt+0x7600
1475/////////////////////////////////////////////////////////////////////////////////////////
1476// 0x7600 Entry 58 (size 16 bundles) Reserved
1477 DBG_FAULT(58)
1478 FAULT(58)
1479
1480 .org ia64_ivt+0x7700
1481/////////////////////////////////////////////////////////////////////////////////////////
1482// 0x7700 Entry 59 (size 16 bundles) Reserved
1483 DBG_FAULT(59)
1484 FAULT(59)
1485
1486 .org ia64_ivt+0x7800
1487/////////////////////////////////////////////////////////////////////////////////////////
1488// 0x7800 Entry 60 (size 16 bundles) Reserved
1489 DBG_FAULT(60)
1490 FAULT(60)
1491
1492 .org ia64_ivt+0x7900
1493/////////////////////////////////////////////////////////////////////////////////////////
1494// 0x7900 Entry 61 (size 16 bundles) Reserved
1495 DBG_FAULT(61)
1496 FAULT(61)
1497
1498 .org ia64_ivt+0x7a00
1499/////////////////////////////////////////////////////////////////////////////////////////
1500// 0x7a00 Entry 62 (size 16 bundles) Reserved
1501 DBG_FAULT(62)
1502 FAULT(62)
1503
1504 .org ia64_ivt+0x7b00
1505/////////////////////////////////////////////////////////////////////////////////////////
1506// 0x7b00 Entry 63 (size 16 bundles) Reserved
1507 DBG_FAULT(63)
1508 FAULT(63)
1509
1510 .org ia64_ivt+0x7c00
1511/////////////////////////////////////////////////////////////////////////////////////////
1512// 0x7c00 Entry 64 (size 16 bundles) Reserved
1513 DBG_FAULT(64)
1514 FAULT(64)
1515
1516 .org ia64_ivt+0x7d00
1517/////////////////////////////////////////////////////////////////////////////////////////
1518// 0x7d00 Entry 65 (size 16 bundles) Reserved
1519 DBG_FAULT(65)
1520 FAULT(65)
1521
1522 .org ia64_ivt+0x7e00
1523/////////////////////////////////////////////////////////////////////////////////////////
1524// 0x7e00 Entry 66 (size 16 bundles) Reserved
1525 DBG_FAULT(66)
1526 FAULT(66)
1527
1528 .org ia64_ivt+0x7f00
1529/////////////////////////////////////////////////////////////////////////////////////////
1530// 0x7f00 Entry 67 (size 16 bundles) Reserved
1531 DBG_FAULT(67)
1532 FAULT(67)
1533
1534#ifdef CONFIG_IA32_SUPPORT
1535
1536 /*
1537 * There is no particular reason for this code to be here, other than that
1538 * there happens to be space here that would go unused otherwise. If this
1539 * fault ever gets "unreserved", simply moved the following code to a more
1540 * suitable spot...
1541 */
1542
1543 // IA32 interrupt entry point
1544
1545ENTRY(dispatch_to_ia32_handler)
1546 SAVE_MIN
1547 ;;
1548 mov r14=cr.isr
1549 ssm psr.ic | PSR_DEFAULT_BITS
1550 ;;
1551 srlz.i // guarantee that interruption collection is on
1552 ;;
1553(p15) ssm psr.i
1554 adds r3=8,r2 // Base pointer for SAVE_REST
1555 ;;
1556 SAVE_REST
1557 ;;
1558 mov r15=0x80
1559 shr r14=r14,16 // Get interrupt number
1560 ;;
1561 cmp.ne p6,p0=r14,r15
1562(p6) br.call.dpnt.many b6=non_ia32_syscall
1563
1564 adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions
1565 adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp
1566 ;;
1567 cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0
1568 ld8 r8=[r14] // get r8
1569 ;;
1570 st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP)
1571 ;;
1572 alloc r15=ar.pfs,0,0,6,0 // must first in an insn group
1573 ;;
1574 ld4 r8=[r14],8 // r8 == eax (syscall number)
1575 mov r15=IA32_NR_syscalls
1576 ;;
1577 cmp.ltu.unc p6,p7=r8,r15
1578 ld4 out1=[r14],8 // r9 == ecx
1579 ;;
1580 ld4 out2=[r14],8 // r10 == edx
1581 ;;
1582 ld4 out0=[r14] // r11 == ebx
1583 adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp
1584 ;;
1585 ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp
1586 ;;
1587 ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi
1588 adds r2=TI_FLAGS+IA64_TASK_SIZE,r13
1589 ;;
1590 ld4 out4=[r14] // r15 == edi
1591 movl r16=ia32_syscall_table
1592 ;;
1593(p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number
1594 ld4 r2=[r2] // r2 = current_thread_info()->flags
1595 ;;
1596 ld8 r16=[r16]
1597 and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit
1598 ;;
1599 mov b6=r16
1600 movl r15=ia32_ret_from_syscall
1601 cmp.eq p8,p0=r2,r0
1602 ;;
1603 mov rp=r15
1604(p8) br.call.sptk.many b6=b6
1605 br.cond.sptk ia32_trace_syscall
1606
1607non_ia32_syscall:
1608 alloc r15=ar.pfs,0,0,2,0
1609 mov out0=r14 // interrupt #
1610 add out1=16,sp // pointer to pt_regs
1611 ;; // avoid WAW on CFM
1612 br.call.sptk.many rp=ia32_bad_interrupt
1613.ret1: movl r15=ia64_leave_kernel
1614 ;;
1615 mov rp=r15
1616 br.ret.sptk.many rp
1617END(dispatch_to_ia32_handler)
1618
1619#endif /* CONFIG_IA32_SUPPORT */
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c
new file mode 100644
index 000000000000..c3a04ee7f4f6
--- /dev/null
+++ b/arch/ia64/kernel/machvec.c
@@ -0,0 +1,70 @@
1#include <linux/config.h>
2#include <linux/module.h>
3
4#include <asm/machvec.h>
5#include <asm/system.h>
6
7#ifdef CONFIG_IA64_GENERIC
8
9#include <linux/kernel.h>
10#include <linux/string.h>
11
12#include <asm/page.h>
13
14struct ia64_machine_vector ia64_mv;
15EXPORT_SYMBOL(ia64_mv);
16
17static struct ia64_machine_vector *
18lookup_machvec (const char *name)
19{
20 extern struct ia64_machine_vector machvec_start[];
21 extern struct ia64_machine_vector machvec_end[];
22 struct ia64_machine_vector *mv;
23
24 for (mv = machvec_start; mv < machvec_end; ++mv)
25 if (strcmp (mv->name, name) == 0)
26 return mv;
27
28 return 0;
29}
30
31void
32machvec_init (const char *name)
33{
34 struct ia64_machine_vector *mv;
35
36 mv = lookup_machvec(name);
37 if (!mv) {
38 panic("generic kernel failed to find machine vector for platform %s!", name);
39 }
40 ia64_mv = *mv;
41 printk(KERN_INFO "booting generic kernel on platform %s\n", name);
42}
43
44#endif /* CONFIG_IA64_GENERIC */
45
46void
47machvec_setup (char **arg)
48{
49}
50EXPORT_SYMBOL(machvec_setup);
51
52void
53machvec_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
54{
55}
56EXPORT_SYMBOL(machvec_timer_interrupt);
57
58void
59machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir)
60{
61 mb();
62}
63EXPORT_SYMBOL(machvec_dma_sync_single);
64
65void
66machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir)
67{
68 mb();
69}
70EXPORT_SYMBOL(machvec_dma_sync_sg);
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
new file mode 100644
index 000000000000..4d6c7b8f667b
--- /dev/null
+++ b/arch/ia64/kernel/mca.c
@@ -0,0 +1,1470 @@
1/*
2 * File: mca.c
3 * Purpose: Generic MCA handling layer
4 *
5 * Updated for latest kernel
6 * Copyright (C) 2003 Hewlett-Packard Co
7 * David Mosberger-Tang <davidm@hpl.hp.com>
8 *
9 * Copyright (C) 2002 Dell Inc.
10 * Copyright (C) Matt Domsch (Matt_Domsch@dell.com)
11 *
12 * Copyright (C) 2002 Intel
13 * Copyright (C) Jenna Hall (jenna.s.hall@intel.com)
14 *
15 * Copyright (C) 2001 Intel
16 * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com)
17 *
18 * Copyright (C) 2000 Intel
19 * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com)
20 *
21 * Copyright (C) 1999, 2004 Silicon Graphics, Inc.
22 * Copyright (C) Vijay Chander(vijay@engr.sgi.com)
23 *
24 * 03/04/15 D. Mosberger Added INIT backtrace support.
25 * 02/03/25 M. Domsch GUID cleanups
26 *
27 * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU
28 * error flag, set SAL default return values, changed
29 * error record structure to linked list, added init call
30 * to sal_get_state_info_size().
31 *
32 * 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected
33 * platform errors, completed code for logging of
34 * corrected & uncorrected machine check errors, and
35 * updated for conformance with Nov. 2000 revision of the
36 * SAL 3.0 spec.
37 * 00/03/29 C. Fleckenstein Fixed PAL/SAL update issues, began MCA bug fixes, logging issues,
38 * added min save state dump, added INIT handler.
39 *
40 * 2003-12-08 Keith Owens <kaos@sgi.com>
41 * smp_call_function() must not be called from interrupt context (can
42 * deadlock on tasklist_lock). Use keventd to call smp_call_function().
43 *
44 * 2004-02-01 Keith Owens <kaos@sgi.com>
45 * Avoid deadlock when using printk() for MCA and INIT records.
46 * Delete all record printing code, moved to salinfo_decode in user space.
47 * Mark variables and functions static where possible.
48 * Delete dead variables and functions.
49 * Reorder to remove the need for forward declarations and to consolidate
50 * related code.
51 */
52#include <linux/config.h>
53#include <linux/types.h>
54#include <linux/init.h>
55#include <linux/sched.h>
56#include <linux/interrupt.h>
57#include <linux/irq.h>
58#include <linux/kallsyms.h>
59#include <linux/smp_lock.h>
60#include <linux/bootmem.h>
61#include <linux/acpi.h>
62#include <linux/timer.h>
63#include <linux/module.h>
64#include <linux/kernel.h>
65#include <linux/smp.h>
66#include <linux/workqueue.h>
67
68#include <asm/delay.h>
69#include <asm/machvec.h>
70#include <asm/meminit.h>
71#include <asm/page.h>
72#include <asm/ptrace.h>
73#include <asm/system.h>
74#include <asm/sal.h>
75#include <asm/mca.h>
76
77#include <asm/irq.h>
78#include <asm/hw_irq.h>
79
80#if defined(IA64_MCA_DEBUG_INFO)
81# define IA64_MCA_DEBUG(fmt...) printk(fmt)
82#else
83# define IA64_MCA_DEBUG(fmt...)
84#endif
85
86/* Used by mca_asm.S */
87ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state;
88ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state;
89u64 ia64_mca_serialize;
90DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
91DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
92DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
93DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */
94
95unsigned long __per_cpu_mca[NR_CPUS];
96
97/* In mca_asm.S */
98extern void ia64_monarch_init_handler (void);
99extern void ia64_slave_init_handler (void);
100
101static ia64_mc_info_t ia64_mc_info;
102
103#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */
104#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */
105#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */
106#define CPE_HISTORY_LENGTH 5
107#define CMC_HISTORY_LENGTH 5
108
109static struct timer_list cpe_poll_timer;
110static struct timer_list cmc_poll_timer;
111/*
112 * This variable tells whether we are currently in polling mode.
113 * Start with this in the wrong state so we won't play w/ timers
114 * before the system is ready.
115 */
116static int cmc_polling_enabled = 1;
117
118/*
119 * Clearing this variable prevents CPE polling from getting activated
120 * in mca_late_init. Use it if your system doesn't provide a CPEI,
121 * but encounters problems retrieving CPE logs. This should only be
122 * necessary for debugging.
123 */
124static int cpe_poll_enabled = 1;
125
126extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe);
127
128static int mca_init;
129
130/*
131 * IA64_MCA log support
132 */
133#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */
134#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */
135
136typedef struct ia64_state_log_s
137{
138 spinlock_t isl_lock;
139 int isl_index;
140 unsigned long isl_count;
141 ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */
142} ia64_state_log_t;
143
144static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES];
145
146#define IA64_LOG_ALLOCATE(it, size) \
147 {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \
148 (ia64_err_rec_t *)alloc_bootmem(size); \
149 ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \
150 (ia64_err_rec_t *)alloc_bootmem(size);}
151#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock)
152#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s)
153#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s)
154#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index
155#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index
156#define IA64_LOG_INDEX_INC(it) \
157 {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \
158 ia64_state_log[it].isl_count++;}
159#define IA64_LOG_INDEX_DEC(it) \
160 ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index
161#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]))
162#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]))
163#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
164
165/*
166 * ia64_log_init
167 * Reset the OS ia64 log buffer
168 * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
169 * Outputs : None
170 */
171static void
172ia64_log_init(int sal_info_type)
173{
174 u64 max_size = 0;
175
176 IA64_LOG_NEXT_INDEX(sal_info_type) = 0;
177 IA64_LOG_LOCK_INIT(sal_info_type);
178
179 // SAL will tell us the maximum size of any error record of this type
180 max_size = ia64_sal_get_state_info_size(sal_info_type);
181 if (!max_size)
182 /* alloc_bootmem() doesn't like zero-sized allocations! */
183 return;
184
185 // set up OS data structures to hold error info
186 IA64_LOG_ALLOCATE(sal_info_type, max_size);
187 memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size);
188 memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size);
189}
190
191/*
192 * ia64_log_get
193 *
194 * Get the current MCA log from SAL and copy it into the OS log buffer.
195 *
196 * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE})
197 * irq_safe whether you can use printk at this point
198 * Outputs : size (total record length)
199 * *buffer (ptr to error record)
200 *
201 */
202static u64
203ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe)
204{
205 sal_log_record_header_t *log_buffer;
206 u64 total_len = 0;
207 int s;
208
209 IA64_LOG_LOCK(sal_info_type);
210
211 /* Get the process state information */
212 log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type);
213
214 total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer);
215
216 if (total_len) {
217 IA64_LOG_INDEX_INC(sal_info_type);
218 IA64_LOG_UNLOCK(sal_info_type);
219 if (irq_safe) {
220 IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. "
221 "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len);
222 }
223 *buffer = (u8 *) log_buffer;
224 return total_len;
225 } else {
226 IA64_LOG_UNLOCK(sal_info_type);
227 return 0;
228 }
229}
230
231/*
232 * ia64_mca_log_sal_error_record
233 *
234 * This function retrieves a specified error record type from SAL
235 * and wakes up any processes waiting for error records.
236 *
237 * Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT)
238 */
239static void
240ia64_mca_log_sal_error_record(int sal_info_type)
241{
242 u8 *buffer;
243 sal_log_record_header_t *rh;
244 u64 size;
245 int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT;
246#ifdef IA64_MCA_DEBUG_INFO
247 static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" };
248#endif
249
250 size = ia64_log_get(sal_info_type, &buffer, irq_safe);
251 if (!size)
252 return;
253
254 salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe);
255
256 if (irq_safe)
257 IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n",
258 smp_processor_id(),
259 sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN");
260
261 /* Clear logs from corrected errors in case there's no user-level logger */
262 rh = (sal_log_record_header_t *)buffer;
263 if (rh->severity == sal_log_severity_corrected)
264 ia64_sal_clear_state_info(sal_info_type);
265}
266
267/*
268 * platform dependent error handling
269 */
270#ifndef PLATFORM_MCA_HANDLERS
271
272#ifdef CONFIG_ACPI
273
274static int cpe_vector = -1;
275
276static irqreturn_t
277ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs)
278{
279 static unsigned long cpe_history[CPE_HISTORY_LENGTH];
280 static int index;
281 static DEFINE_SPINLOCK(cpe_history_lock);
282
283 IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
284 __FUNCTION__, cpe_irq, smp_processor_id());
285
286 /* SAL spec states this should run w/ interrupts enabled */
287 local_irq_enable();
288
289 /* Get the CPE error record and log it */
290 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE);
291
292 spin_lock(&cpe_history_lock);
293 if (!cpe_poll_enabled && cpe_vector >= 0) {
294
295 int i, count = 1; /* we know 1 happened now */
296 unsigned long now = jiffies;
297
298 for (i = 0; i < CPE_HISTORY_LENGTH; i++) {
299 if (now - cpe_history[i] <= HZ)
300 count++;
301 }
302
303 IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH);
304 if (count >= CPE_HISTORY_LENGTH) {
305
306 cpe_poll_enabled = 1;
307 spin_unlock(&cpe_history_lock);
308 disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR));
309
310 /*
311 * Corrected errors will still be corrected, but
312 * make sure there's a log somewhere that indicates
313 * something is generating more than we can handle.
314 */
315 printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n");
316
317 mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL);
318
319 /* lock already released, get out now */
320 return IRQ_HANDLED;
321 } else {
322 cpe_history[index++] = now;
323 if (index == CPE_HISTORY_LENGTH)
324 index = 0;
325 }
326 }
327 spin_unlock(&cpe_history_lock);
328 return IRQ_HANDLED;
329}
330
331#endif /* CONFIG_ACPI */
332
333static void
334show_min_state (pal_min_state_area_t *minstate)
335{
336 u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri;
337 u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri;
338
339 printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits);
340 printk("pr\t\t%016lx\n", minstate->pmsa_pr);
341 printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0);
342 printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc);
343 printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip);
344 printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr);
345 printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs);
346 printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip);
347 printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr);
348 printk("xfs\t\t%016lx\n", minstate->pmsa_xfs);
349 printk("b1\t\t%016lx ", minstate->pmsa_br1);
350 print_symbol("%s\n", minstate->pmsa_br1);
351
352 printk("\nstatic registers r0-r15:\n");
353 printk(" r0- 3 %016lx %016lx %016lx %016lx\n",
354 0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]);
355 printk(" r4- 7 %016lx %016lx %016lx %016lx\n",
356 minstate->pmsa_gr[3], minstate->pmsa_gr[4],
357 minstate->pmsa_gr[5], minstate->pmsa_gr[6]);
358 printk(" r8-11 %016lx %016lx %016lx %016lx\n",
359 minstate->pmsa_gr[7], minstate->pmsa_gr[8],
360 minstate->pmsa_gr[9], minstate->pmsa_gr[10]);
361 printk("r12-15 %016lx %016lx %016lx %016lx\n",
362 minstate->pmsa_gr[11], minstate->pmsa_gr[12],
363 minstate->pmsa_gr[13], minstate->pmsa_gr[14]);
364
365 printk("\nbank 0:\n");
366 printk("r16-19 %016lx %016lx %016lx %016lx\n",
367 minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1],
368 minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]);
369 printk("r20-23 %016lx %016lx %016lx %016lx\n",
370 minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5],
371 minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]);
372 printk("r24-27 %016lx %016lx %016lx %016lx\n",
373 minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9],
374 minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]);
375 printk("r28-31 %016lx %016lx %016lx %016lx\n",
376 minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13],
377 minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]);
378
379 printk("\nbank 1:\n");
380 printk("r16-19 %016lx %016lx %016lx %016lx\n",
381 minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1],
382 minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]);
383 printk("r20-23 %016lx %016lx %016lx %016lx\n",
384 minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5],
385 minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]);
386 printk("r24-27 %016lx %016lx %016lx %016lx\n",
387 minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9],
388 minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]);
389 printk("r28-31 %016lx %016lx %016lx %016lx\n",
390 minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13],
391 minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]);
392}
393
394static void
395fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw)
396{
397 u64 *dst_banked, *src_banked, bit, shift, nat_bits;
398 int i;
399
400 /*
401 * First, update the pt-regs and switch-stack structures with the contents stored
402 * in the min-state area:
403 */
404 if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) {
405 pt->cr_ipsr = ms->pmsa_xpsr;
406 pt->cr_iip = ms->pmsa_xip;
407 pt->cr_ifs = ms->pmsa_xfs;
408 } else {
409 pt->cr_ipsr = ms->pmsa_ipsr;
410 pt->cr_iip = ms->pmsa_iip;
411 pt->cr_ifs = ms->pmsa_ifs;
412 }
413 pt->ar_rsc = ms->pmsa_rsc;
414 pt->pr = ms->pmsa_pr;
415 pt->r1 = ms->pmsa_gr[0];
416 pt->r2 = ms->pmsa_gr[1];
417 pt->r3 = ms->pmsa_gr[2];
418 sw->r4 = ms->pmsa_gr[3];
419 sw->r5 = ms->pmsa_gr[4];
420 sw->r6 = ms->pmsa_gr[5];
421 sw->r7 = ms->pmsa_gr[6];
422 pt->r8 = ms->pmsa_gr[7];
423 pt->r9 = ms->pmsa_gr[8];
424 pt->r10 = ms->pmsa_gr[9];
425 pt->r11 = ms->pmsa_gr[10];
426 pt->r12 = ms->pmsa_gr[11];
427 pt->r13 = ms->pmsa_gr[12];
428 pt->r14 = ms->pmsa_gr[13];
429 pt->r15 = ms->pmsa_gr[14];
430 dst_banked = &pt->r16; /* r16-r31 are contiguous in struct pt_regs */
431 src_banked = ms->pmsa_bank1_gr;
432 for (i = 0; i < 16; ++i)
433 dst_banked[i] = src_banked[i];
434 pt->b0 = ms->pmsa_br0;
435 sw->b1 = ms->pmsa_br1;
436
437 /* construct the NaT bits for the pt-regs structure: */
438# define PUT_NAT_BIT(dst, addr) \
439 do { \
440 bit = nat_bits & 1; nat_bits >>= 1; \
441 shift = ((unsigned long) addr >> 3) & 0x3f; \
442 dst = ((dst) & ~(1UL << shift)) | (bit << shift); \
443 } while (0)
444
445 /* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */
446 shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f;
447 nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift));
448
449 PUT_NAT_BIT(sw->caller_unat, &pt->r1);
450 PUT_NAT_BIT(sw->caller_unat, &pt->r2);
451 PUT_NAT_BIT(sw->caller_unat, &pt->r3);
452 PUT_NAT_BIT(sw->ar_unat, &sw->r4);
453 PUT_NAT_BIT(sw->ar_unat, &sw->r5);
454 PUT_NAT_BIT(sw->ar_unat, &sw->r6);
455 PUT_NAT_BIT(sw->ar_unat, &sw->r7);
456 PUT_NAT_BIT(sw->caller_unat, &pt->r8); PUT_NAT_BIT(sw->caller_unat, &pt->r9);
457 PUT_NAT_BIT(sw->caller_unat, &pt->r10); PUT_NAT_BIT(sw->caller_unat, &pt->r11);
458 PUT_NAT_BIT(sw->caller_unat, &pt->r12); PUT_NAT_BIT(sw->caller_unat, &pt->r13);
459 PUT_NAT_BIT(sw->caller_unat, &pt->r14); PUT_NAT_BIT(sw->caller_unat, &pt->r15);
460 nat_bits >>= 16; /* skip over bank0 NaT bits */
461 PUT_NAT_BIT(sw->caller_unat, &pt->r16); PUT_NAT_BIT(sw->caller_unat, &pt->r17);
462 PUT_NAT_BIT(sw->caller_unat, &pt->r18); PUT_NAT_BIT(sw->caller_unat, &pt->r19);
463 PUT_NAT_BIT(sw->caller_unat, &pt->r20); PUT_NAT_BIT(sw->caller_unat, &pt->r21);
464 PUT_NAT_BIT(sw->caller_unat, &pt->r22); PUT_NAT_BIT(sw->caller_unat, &pt->r23);
465 PUT_NAT_BIT(sw->caller_unat, &pt->r24); PUT_NAT_BIT(sw->caller_unat, &pt->r25);
466 PUT_NAT_BIT(sw->caller_unat, &pt->r26); PUT_NAT_BIT(sw->caller_unat, &pt->r27);
467 PUT_NAT_BIT(sw->caller_unat, &pt->r28); PUT_NAT_BIT(sw->caller_unat, &pt->r29);
468 PUT_NAT_BIT(sw->caller_unat, &pt->r30); PUT_NAT_BIT(sw->caller_unat, &pt->r31);
469}
470
471static void
472init_handler_platform (pal_min_state_area_t *ms,
473 struct pt_regs *pt, struct switch_stack *sw)
474{
475 struct unw_frame_info info;
476
477 /* if a kernel debugger is available call it here else just dump the registers */
478
479 /*
480 * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be
481 * generated via the BMC's command-line interface, but since the console is on the
482 * same serial line, the user will need some time to switch out of the BMC before
483 * the dump begins.
484 */
485 printk("Delaying for 5 seconds...\n");
486 udelay(5*1000000);
487 show_min_state(ms);
488
489 printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm);
490 fetch_min_state(ms, pt, sw);
491 unw_init_from_interruption(&info, current, pt, sw);
492 ia64_do_show_stack(&info, NULL);
493
494#ifdef CONFIG_SMP
495 /* read_trylock() would be handy... */
496 if (!tasklist_lock.write_lock)
497 read_lock(&tasklist_lock);
498#endif
499 {
500 struct task_struct *g, *t;
501 do_each_thread (g, t) {
502 if (t == current)
503 continue;
504
505 printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm);
506 show_stack(t, NULL);
507 } while_each_thread (g, t);
508 }
509#ifdef CONFIG_SMP
510 if (!tasklist_lock.write_lock)
511 read_unlock(&tasklist_lock);
512#endif
513
514 printk("\nINIT dump complete. Please reboot now.\n");
515 while (1); /* hang city if no debugger */
516}
517
518#ifdef CONFIG_ACPI
519/*
520 * ia64_mca_register_cpev
521 *
522 * Register the corrected platform error vector with SAL.
523 *
524 * Inputs
525 * cpev Corrected Platform Error Vector number
526 *
527 * Outputs
528 * None
529 */
530static void
531ia64_mca_register_cpev (int cpev)
532{
533 /* Register the CPE interrupt vector with SAL */
534 struct ia64_sal_retval isrv;
535
536 isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0);
537 if (isrv.status) {
538 printk(KERN_ERR "Failed to register Corrected Platform "
539 "Error interrupt vector with SAL (status %ld)\n", isrv.status);
540 return;
541 }
542
543 IA64_MCA_DEBUG("%s: corrected platform error "
544 "vector %#x registered\n", __FUNCTION__, cpev);
545}
546#endif /* CONFIG_ACPI */
547
548#endif /* PLATFORM_MCA_HANDLERS */
549
550/*
551 * ia64_mca_cmc_vector_setup
552 *
553 * Setup the corrected machine check vector register in the processor.
554 * (The interrupt is masked on boot. ia64_mca_late_init unmask this.)
555 * This function is invoked on a per-processor basis.
556 *
557 * Inputs
558 * None
559 *
560 * Outputs
561 * None
562 */
563void
564ia64_mca_cmc_vector_setup (void)
565{
566 cmcv_reg_t cmcv;
567
568 cmcv.cmcv_regval = 0;
569 cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */
570 cmcv.cmcv_vector = IA64_CMC_VECTOR;
571 ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
572
573 IA64_MCA_DEBUG("%s: CPU %d corrected "
574 "machine check vector %#x registered.\n",
575 __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR);
576
577 IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n",
578 __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV));
579}
580
581/*
582 * ia64_mca_cmc_vector_disable
583 *
584 * Mask the corrected machine check vector register in the processor.
585 * This function is invoked on a per-processor basis.
586 *
587 * Inputs
588 * dummy(unused)
589 *
590 * Outputs
591 * None
592 */
593static void
594ia64_mca_cmc_vector_disable (void *dummy)
595{
596 cmcv_reg_t cmcv;
597
598 cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
599
600 cmcv.cmcv_mask = 1; /* Mask/disable interrupt */
601 ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
602
603 IA64_MCA_DEBUG("%s: CPU %d corrected "
604 "machine check vector %#x disabled.\n",
605 __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector);
606}
607
608/*
609 * ia64_mca_cmc_vector_enable
610 *
611 * Unmask the corrected machine check vector register in the processor.
612 * This function is invoked on a per-processor basis.
613 *
614 * Inputs
615 * dummy(unused)
616 *
617 * Outputs
618 * None
619 */
620static void
621ia64_mca_cmc_vector_enable (void *dummy)
622{
623 cmcv_reg_t cmcv;
624
625 cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV);
626
627 cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */
628 ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval);
629
630 IA64_MCA_DEBUG("%s: CPU %d corrected "
631 "machine check vector %#x enabled.\n",
632 __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector);
633}
634
635/*
636 * ia64_mca_cmc_vector_disable_keventd
637 *
638 * Called via keventd (smp_call_function() is not safe in interrupt context) to
639 * disable the cmc interrupt vector.
640 */
641static void
642ia64_mca_cmc_vector_disable_keventd(void *unused)
643{
644 on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0);
645}
646
647/*
648 * ia64_mca_cmc_vector_enable_keventd
649 *
650 * Called via keventd (smp_call_function() is not safe in interrupt context) to
651 * enable the cmc interrupt vector.
652 */
653static void
654ia64_mca_cmc_vector_enable_keventd(void *unused)
655{
656 on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0);
657}
658
659/*
660 * ia64_mca_wakeup_ipi_wait
661 *
662 * Wait for the inter-cpu interrupt to be sent by the
663 * monarch processor once it is done with handling the
664 * MCA.
665 *
666 * Inputs : None
667 * Outputs : None
668 */
669static void
670ia64_mca_wakeup_ipi_wait(void)
671{
672 int irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6);
673 int irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f);
674 u64 irr = 0;
675
676 do {
677 switch(irr_num) {
678 case 0:
679 irr = ia64_getreg(_IA64_REG_CR_IRR0);
680 break;
681 case 1:
682 irr = ia64_getreg(_IA64_REG_CR_IRR1);
683 break;
684 case 2:
685 irr = ia64_getreg(_IA64_REG_CR_IRR2);
686 break;
687 case 3:
688 irr = ia64_getreg(_IA64_REG_CR_IRR3);
689 break;
690 }
691 cpu_relax();
692 } while (!(irr & (1UL << irr_bit))) ;
693}
694
695/*
696 * ia64_mca_wakeup
697 *
698 * Send an inter-cpu interrupt to wake-up a particular cpu
699 * and mark that cpu to be out of rendez.
700 *
701 * Inputs : cpuid
702 * Outputs : None
703 */
704static void
705ia64_mca_wakeup(int cpu)
706{
707 platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0);
708 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
709
710}
711
712/*
713 * ia64_mca_wakeup_all
714 *
715 * Wakeup all the cpus which have rendez'ed previously.
716 *
717 * Inputs : None
718 * Outputs : None
719 */
720static void
721ia64_mca_wakeup_all(void)
722{
723 int cpu;
724
725 /* Clear the Rendez checkin flag for all cpus */
726 for(cpu = 0; cpu < NR_CPUS; cpu++) {
727 if (!cpu_online(cpu))
728 continue;
729 if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE)
730 ia64_mca_wakeup(cpu);
731 }
732
733}
734
735/*
736 * ia64_mca_rendez_interrupt_handler
737 *
738 * This is handler used to put slave processors into spinloop
739 * while the monarch processor does the mca handling and later
740 * wake each slave up once the monarch is done.
741 *
742 * Inputs : None
743 * Outputs : None
744 */
745static irqreturn_t
746ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs)
747{
748 unsigned long flags;
749 int cpu = smp_processor_id();
750
751 /* Mask all interrupts */
752 local_irq_save(flags);
753
754 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE;
755 /* Register with the SAL monarch that the slave has
756 * reached SAL
757 */
758 ia64_sal_mc_rendez();
759
760 /* Wait for the wakeup IPI from the monarch
761 * This waiting is done by polling on the wakeup-interrupt
762 * vector bit in the processor's IRRs
763 */
764 ia64_mca_wakeup_ipi_wait();
765
766 /* Enable all interrupts */
767 local_irq_restore(flags);
768 return IRQ_HANDLED;
769}
770
771/*
772 * ia64_mca_wakeup_int_handler
773 *
774 * The interrupt handler for processing the inter-cpu interrupt to the
775 * slave cpu which was spinning in the rendez loop.
776 * Since this spinning is done by turning off the interrupts and
777 * polling on the wakeup-interrupt bit in the IRR, there is
778 * nothing useful to be done in the handler.
779 *
780 * Inputs : wakeup_irq (Wakeup-interrupt bit)
781 * arg (Interrupt handler specific argument)
782 * ptregs (Exception frame at the time of the interrupt)
783 * Outputs : None
784 *
785 */
786static irqreturn_t
787ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs)
788{
789 return IRQ_HANDLED;
790}
791
792/*
793 * ia64_return_to_sal_check
794 *
795 * This is function called before going back from the OS_MCA handler
796 * to the OS_MCA dispatch code which finally takes the control back
797 * to the SAL.
798 * The main purpose of this routine is to setup the OS_MCA to SAL
799 * return state which can be used by the OS_MCA dispatch code
800 * just before going back to SAL.
801 *
802 * Inputs : None
803 * Outputs : None
804 */
805
806static void
807ia64_return_to_sal_check(int recover)
808{
809
810 /* Copy over some relevant stuff from the sal_to_os_mca_handoff
811 * so that it can be used at the time of os_mca_to_sal_handoff
812 */
813 ia64_os_to_sal_handoff_state.imots_sal_gp =
814 ia64_sal_to_os_handoff_state.imsto_sal_gp;
815
816 ia64_os_to_sal_handoff_state.imots_sal_check_ra =
817 ia64_sal_to_os_handoff_state.imsto_sal_check_ra;
818
819 if (recover)
820 ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED;
821 else
822 ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT;
823
824 /* Default = tell SAL to return to same context */
825 ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT;
826
827 ia64_os_to_sal_handoff_state.imots_new_min_state =
828 (u64 *)ia64_sal_to_os_handoff_state.pal_min_state;
829
830}
831
832/* Function pointer for extra MCA recovery */
833int (*ia64_mca_ucmc_extension)
834 (void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*)
835 = NULL;
836
837int
838ia64_reg_MCA_extension(void *fn)
839{
840 if (ia64_mca_ucmc_extension)
841 return 1;
842
843 ia64_mca_ucmc_extension = fn;
844 return 0;
845}
846
847void
848ia64_unreg_MCA_extension(void)
849{
850 if (ia64_mca_ucmc_extension)
851 ia64_mca_ucmc_extension = NULL;
852}
853
854EXPORT_SYMBOL(ia64_reg_MCA_extension);
855EXPORT_SYMBOL(ia64_unreg_MCA_extension);
856
857/*
858 * ia64_mca_ucmc_handler
859 *
860 * This is uncorrectable machine check handler called from OS_MCA
861 * dispatch code which is in turn called from SAL_CHECK().
862 * This is the place where the core of OS MCA handling is done.
863 * Right now the logs are extracted and displayed in a well-defined
864 * format. This handler code is supposed to be run only on the
865 * monarch processor. Once the monarch is done with MCA handling
866 * further MCA logging is enabled by clearing logs.
867 * Monarch also has the duty of sending wakeup-IPIs to pull the
868 * slave processors out of rendezvous spinloop.
869 *
870 * Inputs : None
871 * Outputs : None
872 */
873void
874ia64_mca_ucmc_handler(void)
875{
876 pal_processor_state_info_t *psp = (pal_processor_state_info_t *)
877 &ia64_sal_to_os_handoff_state.proc_state_param;
878 int recover;
879
880 /* Get the MCA error record and log it */
881 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA);
882
883 /* TLB error is only exist in this SAL error record */
884 recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc))
885 /* other error recovery */
886 || (ia64_mca_ucmc_extension
887 && ia64_mca_ucmc_extension(
888 IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA),
889 &ia64_sal_to_os_handoff_state,
890 &ia64_os_to_sal_handoff_state));
891
892 if (recover) {
893 sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA);
894 rh->severity = sal_log_severity_corrected;
895 ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA);
896 }
897 /*
898 * Wakeup all the processors which are spinning in the rendezvous
899 * loop.
900 */
901 ia64_mca_wakeup_all();
902
903 /* Return to SAL */
904 ia64_return_to_sal_check(recover);
905}
906
907static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL);
908static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL);
909
910/*
911 * ia64_mca_cmc_int_handler
912 *
913 * This is corrected machine check interrupt handler.
914 * Right now the logs are extracted and displayed in a well-defined
915 * format.
916 *
917 * Inputs
918 * interrupt number
919 * client data arg ptr
920 * saved registers ptr
921 *
922 * Outputs
923 * None
924 */
925static irqreturn_t
926ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs)
927{
928 static unsigned long cmc_history[CMC_HISTORY_LENGTH];
929 static int index;
930 static DEFINE_SPINLOCK(cmc_history_lock);
931
932 IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n",
933 __FUNCTION__, cmc_irq, smp_processor_id());
934
935 /* SAL spec states this should run w/ interrupts enabled */
936 local_irq_enable();
937
938 /* Get the CMC error record and log it */
939 ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC);
940
941 spin_lock(&cmc_history_lock);
942 if (!cmc_polling_enabled) {
943 int i, count = 1; /* we know 1 happened now */
944 unsigned long now = jiffies;
945
946 for (i = 0; i < CMC_HISTORY_LENGTH; i++) {
947 if (now - cmc_history[i] <= HZ)
948 count++;
949 }
950
951 IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH);
952 if (count >= CMC_HISTORY_LENGTH) {
953
954 cmc_polling_enabled = 1;
955 spin_unlock(&cmc_history_lock);
956 schedule_work(&cmc_disable_work);
957
958 /*
959 * Corrected errors will still be corrected, but
960 * make sure there's a log somewhere that indicates
961 * something is generating more than we can handle.
962 */
963 printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n");
964
965 mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
966
967 /* lock already released, get out now */
968 return IRQ_HANDLED;
969 } else {
970 cmc_history[index++] = now;
971 if (index == CMC_HISTORY_LENGTH)
972 index = 0;
973 }
974 }
975 spin_unlock(&cmc_history_lock);
976 return IRQ_HANDLED;
977}
978
979/*
980 * ia64_mca_cmc_int_caller
981 *
982 * Triggered by sw interrupt from CMC polling routine. Calls
983 * real interrupt handler and either triggers a sw interrupt
984 * on the next cpu or does cleanup at the end.
985 *
986 * Inputs
987 * interrupt number
988 * client data arg ptr
989 * saved registers ptr
990 * Outputs
991 * handled
992 */
993static irqreturn_t
994ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs)
995{
996 static int start_count = -1;
997 unsigned int cpuid;
998
999 cpuid = smp_processor_id();
1000
1001 /* If first cpu, update count */
1002 if (start_count == -1)
1003 start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC);
1004
1005 ia64_mca_cmc_int_handler(cmc_irq, arg, ptregs);
1006
1007 for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
1008
1009 if (cpuid < NR_CPUS) {
1010 platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
1011 } else {
1012 /* If no log record, switch out of polling mode */
1013 if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) {
1014
1015 printk(KERN_WARNING "Returning to interrupt driven CMC handler\n");
1016 schedule_work(&cmc_enable_work);
1017 cmc_polling_enabled = 0;
1018
1019 } else {
1020
1021 mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL);
1022 }
1023
1024 start_count = -1;
1025 }
1026
1027 return IRQ_HANDLED;
1028}
1029
1030/*
1031 * ia64_mca_cmc_poll
1032 *
1033 * Poll for Corrected Machine Checks (CMCs)
1034 *
1035 * Inputs : dummy(unused)
1036 * Outputs : None
1037 *
1038 */
1039static void
1040ia64_mca_cmc_poll (unsigned long dummy)
1041{
1042 /* Trigger a CMC interrupt cascade */
1043 platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0);
1044}
1045
1046/*
1047 * ia64_mca_cpe_int_caller
1048 *
1049 * Triggered by sw interrupt from CPE polling routine. Calls
1050 * real interrupt handler and either triggers a sw interrupt
1051 * on the next cpu or does cleanup at the end.
1052 *
1053 * Inputs
1054 * interrupt number
1055 * client data arg ptr
1056 * saved registers ptr
1057 * Outputs
1058 * handled
1059 */
1060#ifdef CONFIG_ACPI
1061
1062static irqreturn_t
1063ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs)
1064{
1065 static int start_count = -1;
1066 static int poll_time = MIN_CPE_POLL_INTERVAL;
1067 unsigned int cpuid;
1068
1069 cpuid = smp_processor_id();
1070
1071 /* If first cpu, update count */
1072 if (start_count == -1)
1073 start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE);
1074
1075 ia64_mca_cpe_int_handler(cpe_irq, arg, ptregs);
1076
1077 for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++);
1078
1079 if (cpuid < NR_CPUS) {
1080 platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
1081 } else {
1082 /*
1083 * If a log was recorded, increase our polling frequency,
1084 * otherwise, backoff or return to interrupt mode.
1085 */
1086 if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) {
1087 poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2);
1088 } else if (cpe_vector < 0) {
1089 poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2);
1090 } else {
1091 poll_time = MIN_CPE_POLL_INTERVAL;
1092
1093 printk(KERN_WARNING "Returning to interrupt driven CPE handler\n");
1094 enable_irq(local_vector_to_irq(IA64_CPE_VECTOR));
1095 cpe_poll_enabled = 0;
1096 }
1097
1098 if (cpe_poll_enabled)
1099 mod_timer(&cpe_poll_timer, jiffies + poll_time);
1100 start_count = -1;
1101 }
1102
1103 return IRQ_HANDLED;
1104}
1105
1106#endif /* CONFIG_ACPI */
1107
1108/*
1109 * ia64_mca_cpe_poll
1110 *
1111 * Poll for Corrected Platform Errors (CPEs), trigger interrupt
1112 * on first cpu, from there it will trickle through all the cpus.
1113 *
1114 * Inputs : dummy(unused)
1115 * Outputs : None
1116 *
1117 */
1118static void
1119ia64_mca_cpe_poll (unsigned long dummy)
1120{
1121 /* Trigger a CPE interrupt cascade */
1122 platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0);
1123}
1124
1125/*
1126 * C portion of the OS INIT handler
1127 *
1128 * Called from ia64_monarch_init_handler
1129 *
1130 * Inputs: pointer to pt_regs where processor info was saved.
1131 *
1132 * Returns:
1133 * 0 if SAL must warm boot the System
1134 * 1 if SAL must return to interrupted context using PAL_MC_RESUME
1135 *
1136 */
1137void
1138ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw)
1139{
1140 pal_min_state_area_t *ms;
1141
1142 oops_in_progress = 1; /* avoid deadlock in printk, but it makes recovery dodgy */
1143 console_loglevel = 15; /* make sure printks make it to console */
1144
1145 printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n",
1146 ia64_sal_to_os_handoff_state.proc_state_param);
1147
1148 /*
1149 * Address of minstate area provided by PAL is physical,
1150 * uncacheable (bit 63 set). Convert to Linux virtual
1151 * address in region 6.
1152 */
1153 ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61));
1154
1155 init_handler_platform(ms, pt, sw); /* call platform specific routines */
1156}
1157
1158static int __init
1159ia64_mca_disable_cpe_polling(char *str)
1160{
1161 cpe_poll_enabled = 0;
1162 return 1;
1163}
1164
1165__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling);
1166
1167static struct irqaction cmci_irqaction = {
1168 .handler = ia64_mca_cmc_int_handler,
1169 .flags = SA_INTERRUPT,
1170 .name = "cmc_hndlr"
1171};
1172
1173static struct irqaction cmcp_irqaction = {
1174 .handler = ia64_mca_cmc_int_caller,
1175 .flags = SA_INTERRUPT,
1176 .name = "cmc_poll"
1177};
1178
1179static struct irqaction mca_rdzv_irqaction = {
1180 .handler = ia64_mca_rendez_int_handler,
1181 .flags = SA_INTERRUPT,
1182 .name = "mca_rdzv"
1183};
1184
1185static struct irqaction mca_wkup_irqaction = {
1186 .handler = ia64_mca_wakeup_int_handler,
1187 .flags = SA_INTERRUPT,
1188 .name = "mca_wkup"
1189};
1190
1191#ifdef CONFIG_ACPI
1192static struct irqaction mca_cpe_irqaction = {
1193 .handler = ia64_mca_cpe_int_handler,
1194 .flags = SA_INTERRUPT,
1195 .name = "cpe_hndlr"
1196};
1197
1198static struct irqaction mca_cpep_irqaction = {
1199 .handler = ia64_mca_cpe_int_caller,
1200 .flags = SA_INTERRUPT,
1201 .name = "cpe_poll"
1202};
1203#endif /* CONFIG_ACPI */
1204
1205/* Do per-CPU MCA-related initialization. */
1206
1207void __devinit
1208ia64_mca_cpu_init(void *cpu_data)
1209{
1210 void *pal_vaddr;
1211
1212 if (smp_processor_id() == 0) {
1213 void *mca_data;
1214 int cpu;
1215
1216 mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu)
1217 * NR_CPUS);
1218 for (cpu = 0; cpu < NR_CPUS; cpu++) {
1219 __per_cpu_mca[cpu] = __pa(mca_data);
1220 mca_data += sizeof(struct ia64_mca_cpu);
1221 }
1222 }
1223
1224 /*
1225 * The MCA info structure was allocated earlier and its
1226 * physical address saved in __per_cpu_mca[cpu]. Copy that
1227 * address * to ia64_mca_data so we can access it as a per-CPU
1228 * variable.
1229 */
1230 __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()];
1231
1232 /*
1233 * Stash away a copy of the PTE needed to map the per-CPU page.
1234 * We may need it during MCA recovery.
1235 */
1236 __get_cpu_var(ia64_mca_per_cpu_pte) =
1237 pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL));
1238
1239 /*
1240 * Also, stash away a copy of the PAL address and the PTE
1241 * needed to map it.
1242 */
1243 pal_vaddr = efi_get_pal_addr();
1244 if (!pal_vaddr)
1245 return;
1246 __get_cpu_var(ia64_mca_pal_base) =
1247 GRANULEROUNDDOWN((unsigned long) pal_vaddr);
1248 __get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr),
1249 PAGE_KERNEL));
1250}
1251
1252/*
1253 * ia64_mca_init
1254 *
1255 * Do all the system level mca specific initialization.
1256 *
1257 * 1. Register spinloop and wakeup request interrupt vectors
1258 *
1259 * 2. Register OS_MCA handler entry point
1260 *
1261 * 3. Register OS_INIT handler entry point
1262 *
1263 * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS.
1264 *
1265 * Note that this initialization is done very early before some kernel
1266 * services are available.
1267 *
1268 * Inputs : None
1269 *
1270 * Outputs : None
1271 */
1272void __init
1273ia64_mca_init(void)
1274{
1275 ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler;
1276 ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler;
1277 ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch;
1278 int i;
1279 s64 rc;
1280 struct ia64_sal_retval isrv;
1281 u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */
1282
1283 IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__);
1284
1285 /* Clear the Rendez checkin flag for all cpus */
1286 for(i = 0 ; i < NR_CPUS; i++)
1287 ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
1288
1289 /*
1290 * Register the rendezvous spinloop and wakeup mechanism with SAL
1291 */
1292
1293 /* Register the rendezvous interrupt vector with SAL */
1294 while (1) {
1295 isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT,
1296 SAL_MC_PARAM_MECHANISM_INT,
1297 IA64_MCA_RENDEZ_VECTOR,
1298 timeout,
1299 SAL_MC_PARAM_RZ_ALWAYS);
1300 rc = isrv.status;
1301 if (rc == 0)
1302 break;
1303 if (rc == -2) {
1304 printk(KERN_INFO "Increasing MCA rendezvous timeout from "
1305 "%ld to %ld milliseconds\n", timeout, isrv.v0);
1306 timeout = isrv.v0;
1307 continue;
1308 }
1309 printk(KERN_ERR "Failed to register rendezvous interrupt "
1310 "with SAL (status %ld)\n", rc);
1311 return;
1312 }
1313
1314 /* Register the wakeup interrupt vector with SAL */
1315 isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP,
1316 SAL_MC_PARAM_MECHANISM_INT,
1317 IA64_MCA_WAKEUP_VECTOR,
1318 0, 0);
1319 rc = isrv.status;
1320 if (rc) {
1321 printk(KERN_ERR "Failed to register wakeup interrupt with SAL "
1322 "(status %ld)\n", rc);
1323 return;
1324 }
1325
1326 IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __FUNCTION__);
1327
1328 ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp);
1329 /*
1330 * XXX - disable SAL checksum by setting size to 0; should be
1331 * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch);
1332 */
1333 ia64_mc_info.imi_mca_handler_size = 0;
1334
1335 /* Register the os mca handler with SAL */
1336 if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA,
1337 ia64_mc_info.imi_mca_handler,
1338 ia64_tpa(mca_hldlr_ptr->gp),
1339 ia64_mc_info.imi_mca_handler_size,
1340 0, 0, 0)))
1341 {
1342 printk(KERN_ERR "Failed to register OS MCA handler with SAL "
1343 "(status %ld)\n", rc);
1344 return;
1345 }
1346
1347 IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __FUNCTION__,
1348 ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp));
1349
1350 /*
1351 * XXX - disable SAL checksum by setting size to 0, should be
1352 * size of the actual init handler in mca_asm.S.
1353 */
1354 ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp);
1355 ia64_mc_info.imi_monarch_init_handler_size = 0;
1356 ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp);
1357 ia64_mc_info.imi_slave_init_handler_size = 0;
1358
1359 IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__,
1360 ia64_mc_info.imi_monarch_init_handler);
1361
1362 /* Register the os init handler with SAL */
1363 if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT,
1364 ia64_mc_info.imi_monarch_init_handler,
1365 ia64_tpa(ia64_getreg(_IA64_REG_GP)),
1366 ia64_mc_info.imi_monarch_init_handler_size,
1367 ia64_mc_info.imi_slave_init_handler,
1368 ia64_tpa(ia64_getreg(_IA64_REG_GP)),
1369 ia64_mc_info.imi_slave_init_handler_size)))
1370 {
1371 printk(KERN_ERR "Failed to register m/s INIT handlers with SAL "
1372 "(status %ld)\n", rc);
1373 return;
1374 }
1375
1376 IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__);
1377
1378 /*
1379 * Configure the CMCI/P vector and handler. Interrupts for CMC are
1380 * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c).
1381 */
1382 register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction);
1383 register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction);
1384 ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */
1385
1386 /* Setup the MCA rendezvous interrupt vector */
1387 register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction);
1388
1389 /* Setup the MCA wakeup interrupt vector */
1390 register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction);
1391
1392#ifdef CONFIG_ACPI
1393 /* Setup the CPEI/P vector and handler */
1394 cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI);
1395 register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction);
1396#endif
1397
1398 /* Initialize the areas set aside by the OS to buffer the
1399 * platform/processor error states for MCA/INIT/CMC
1400 * handling.
1401 */
1402 ia64_log_init(SAL_INFO_TYPE_MCA);
1403 ia64_log_init(SAL_INFO_TYPE_INIT);
1404 ia64_log_init(SAL_INFO_TYPE_CMC);
1405 ia64_log_init(SAL_INFO_TYPE_CPE);
1406
1407 mca_init = 1;
1408 printk(KERN_INFO "MCA related initialization done\n");
1409}
1410
1411/*
1412 * ia64_mca_late_init
1413 *
1414 * Opportunity to setup things that require initialization later
1415 * than ia64_mca_init. Setup a timer to poll for CPEs if the
1416 * platform doesn't support an interrupt driven mechanism.
1417 *
1418 * Inputs : None
1419 * Outputs : Status
1420 */
1421static int __init
1422ia64_mca_late_init(void)
1423{
1424 if (!mca_init)
1425 return 0;
1426
1427 /* Setup the CMCI/P vector and handler */
1428 init_timer(&cmc_poll_timer);
1429 cmc_poll_timer.function = ia64_mca_cmc_poll;
1430
1431 /* Unmask/enable the vector */
1432 cmc_polling_enabled = 0;
1433 schedule_work(&cmc_enable_work);
1434
1435 IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __FUNCTION__);
1436
1437#ifdef CONFIG_ACPI
1438 /* Setup the CPEI/P vector and handler */
1439 init_timer(&cpe_poll_timer);
1440 cpe_poll_timer.function = ia64_mca_cpe_poll;
1441
1442 {
1443 irq_desc_t *desc;
1444 unsigned int irq;
1445
1446 if (cpe_vector >= 0) {
1447 /* If platform supports CPEI, enable the irq. */
1448 cpe_poll_enabled = 0;
1449 for (irq = 0; irq < NR_IRQS; ++irq)
1450 if (irq_to_vector(irq) == cpe_vector) {
1451 desc = irq_descp(irq);
1452 desc->status |= IRQ_PER_CPU;
1453 setup_irq(irq, &mca_cpe_irqaction);
1454 }
1455 ia64_mca_register_cpev(cpe_vector);
1456 IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
1457 } else {
1458 /* If platform doesn't support CPEI, get the timer going. */
1459 if (cpe_poll_enabled) {
1460 ia64_mca_cpe_poll(0UL);
1461 IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__);
1462 }
1463 }
1464 }
1465#endif
1466
1467 return 0;
1468}
1469
1470device_initcall(ia64_mca_late_init);
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S
new file mode 100644
index 000000000000..cf3f8014f9ad
--- /dev/null
+++ b/arch/ia64/kernel/mca_asm.S
@@ -0,0 +1,928 @@
1//
2// assembly portion of the IA64 MCA handling
3//
4// Mods by cfleck to integrate into kernel build
5// 00/03/15 davidm Added various stop bits to get a clean compile
6//
7// 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp
8// kstack, switch modes, jump to C INIT handler
9//
10// 02/01/04 J.Hall <jenna.s.hall@intel.com>
11// Before entering virtual mode code:
12// 1. Check for TLB CPU error
13// 2. Restore current thread pointer to kr6
14// 3. Move stack ptr 16 bytes to conform to C calling convention
15//
16// 04/11/12 Russ Anderson <rja@sgi.com>
17// Added per cpu MCA/INIT stack save areas.
18//
19#include <linux/config.h>
20#include <linux/threads.h>
21
22#include <asm/asmmacro.h>
23#include <asm/pgtable.h>
24#include <asm/processor.h>
25#include <asm/mca_asm.h>
26#include <asm/mca.h>
27
28/*
29 * When we get a machine check, the kernel stack pointer is no longer
30 * valid, so we need to set a new stack pointer.
31 */
32#define MINSTATE_PHYS /* Make sure stack access is physical for MINSTATE */
33
34/*
35 * Needed for return context to SAL
36 */
37#define IA64_MCA_SAME_CONTEXT 0
38#define IA64_MCA_COLD_BOOT -2
39
40#include "minstate.h"
41
42/*
43 * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec)
44 * 1. GR1 = OS GP
45 * 2. GR8 = PAL_PROC physical address
46 * 3. GR9 = SAL_PROC physical address
47 * 4. GR10 = SAL GP (physical)
48 * 5. GR11 = Rendez state
49 * 6. GR12 = Return address to location within SAL_CHECK
50 */
51#define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp) \
52 LOAD_PHYSICAL(p0, _tmp, ia64_sal_to_os_handoff_state);; \
53 st8 [_tmp]=r1,0x08;; \
54 st8 [_tmp]=r8,0x08;; \
55 st8 [_tmp]=r9,0x08;; \
56 st8 [_tmp]=r10,0x08;; \
57 st8 [_tmp]=r11,0x08;; \
58 st8 [_tmp]=r12,0x08;; \
59 st8 [_tmp]=r17,0x08;; \
60 st8 [_tmp]=r18,0x08
61
62/*
63 * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec)
64 * (p6) is executed if we never entered virtual mode (TLB error)
65 * (p7) is executed if we entered virtual mode as expected (normal case)
66 * 1. GR8 = OS_MCA return status
67 * 2. GR9 = SAL GP (physical)
68 * 3. GR10 = 0/1 returning same/new context
69 * 4. GR22 = New min state save area pointer
70 * returns ptr to SAL rtn save loc in _tmp
71 */
72#define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \
73 movl _tmp=ia64_os_to_sal_handoff_state;; \
74 DATA_VA_TO_PA(_tmp);; \
75 ld8 r8=[_tmp],0x08;; \
76 ld8 r9=[_tmp],0x08;; \
77 ld8 r10=[_tmp],0x08;; \
78 ld8 r22=[_tmp],0x08;;
79 // now _tmp is pointing to SAL rtn save location
80
81/*
82 * COLD_BOOT_HANDOFF_STATE() sets ia64_mca_os_to_sal_state
83 * imots_os_status=IA64_MCA_COLD_BOOT
84 * imots_sal_gp=SAL GP
85 * imots_context=IA64_MCA_SAME_CONTEXT
86 * imots_new_min_state=Min state save area pointer
87 * imots_sal_check_ra=Return address to location within SAL_CHECK
88 *
89 */
90#define COLD_BOOT_HANDOFF_STATE(sal_to_os_handoff,os_to_sal_handoff,tmp)\
91 movl tmp=IA64_MCA_COLD_BOOT; \
92 movl sal_to_os_handoff=__pa(ia64_sal_to_os_handoff_state); \
93 movl os_to_sal_handoff=__pa(ia64_os_to_sal_handoff_state);; \
94 st8 [os_to_sal_handoff]=tmp,8;; \
95 ld8 tmp=[sal_to_os_handoff],48;; \
96 st8 [os_to_sal_handoff]=tmp,8;; \
97 movl tmp=IA64_MCA_SAME_CONTEXT;; \
98 st8 [os_to_sal_handoff]=tmp,8;; \
99 ld8 tmp=[sal_to_os_handoff],-8;; \
100 st8 [os_to_sal_handoff]=tmp,8;; \
101 ld8 tmp=[sal_to_os_handoff];; \
102 st8 [os_to_sal_handoff]=tmp;;
103
104#define GET_IA64_MCA_DATA(reg) \
105 GET_THIS_PADDR(reg, ia64_mca_data) \
106 ;; \
107 ld8 reg=[reg]
108
109 .global ia64_os_mca_dispatch
110 .global ia64_os_mca_dispatch_end
111 .global ia64_sal_to_os_handoff_state
112 .global ia64_os_to_sal_handoff_state
113
114 .text
115 .align 16
116
117ia64_os_mca_dispatch:
118
119 // Serialize all MCA processing
120 mov r3=1;;
121 LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);;
122ia64_os_mca_spin:
123 xchg8 r4=[r2],r3;;
124 cmp.ne p6,p0=r4,r0
125(p6) br ia64_os_mca_spin
126
127 // Save the SAL to OS MCA handoff state as defined
128 // by SAL SPEC 3.0
129 // NOTE : The order in which the state gets saved
130 // is dependent on the way the C-structure
131 // for ia64_mca_sal_to_os_state_t has been
132 // defined in include/asm/mca.h
133 SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
134 ;;
135
136 // LOG PROCESSOR STATE INFO FROM HERE ON..
137begin_os_mca_dump:
138 br ia64_os_mca_proc_state_dump;;
139
140ia64_os_mca_done_dump:
141
142 LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56)
143 ;;
144 ld8 r18=[r16] // Get processor state parameter on existing PALE_CHECK.
145 ;;
146 tbit.nz p6,p7=r18,60
147(p7) br.spnt done_tlb_purge_and_reload
148
149 // The following code purges TC and TR entries. Then reload all TC entries.
150 // Purge percpu data TC entries.
151begin_tlb_purge_and_reload:
152
153#define O(member) IA64_CPUINFO_##member##_OFFSET
154
155 GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2
156 ;;
157 addl r17=O(PTCE_STRIDE),r2
158 addl r2=O(PTCE_BASE),r2
159 ;;
160 ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base
161 ld4 r19=[r2],4 // r19=ptce_count[0]
162 ld4 r21=[r17],4 // r21=ptce_stride[0]
163 ;;
164 ld4 r20=[r2] // r20=ptce_count[1]
165 ld4 r22=[r17] // r22=ptce_stride[1]
166 mov r24=0
167 ;;
168 adds r20=-1,r20
169 ;;
170#undef O
171
1722:
173 cmp.ltu p6,p7=r24,r19
174(p7) br.cond.dpnt.few 4f
175 mov ar.lc=r20
1763:
177 ptc.e r18
178 ;;
179 add r18=r22,r18
180 br.cloop.sptk.few 3b
181 ;;
182 add r18=r21,r18
183 add r24=1,r24
184 ;;
185 br.sptk.few 2b
1864:
187 srlz.i // srlz.i implies srlz.d
188 ;;
189
190 // Now purge addresses formerly mapped by TR registers
191 // 1. Purge ITR&DTR for kernel.
192 movl r16=KERNEL_START
193 mov r18=KERNEL_TR_PAGE_SHIFT<<2
194 ;;
195 ptr.i r16, r18
196 ptr.d r16, r18
197 ;;
198 srlz.i
199 ;;
200 srlz.d
201 ;;
202 // 2. Purge DTR for PERCPU data.
203 movl r16=PERCPU_ADDR
204 mov r18=PERCPU_PAGE_SHIFT<<2
205 ;;
206 ptr.d r16,r18
207 ;;
208 srlz.d
209 ;;
210 // 3. Purge ITR for PAL code.
211 GET_THIS_PADDR(r2, ia64_mca_pal_base)
212 ;;
213 ld8 r16=[r2]
214 mov r18=IA64_GRANULE_SHIFT<<2
215 ;;
216 ptr.i r16,r18
217 ;;
218 srlz.i
219 ;;
220 // 4. Purge DTR for stack.
221 mov r16=IA64_KR(CURRENT_STACK)
222 ;;
223 shl r16=r16,IA64_GRANULE_SHIFT
224 movl r19=PAGE_OFFSET
225 ;;
226 add r16=r19,r16
227 mov r18=IA64_GRANULE_SHIFT<<2
228 ;;
229 ptr.d r16,r18
230 ;;
231 srlz.i
232 ;;
233 // Finally reload the TR registers.
234 // 1. Reload DTR/ITR registers for kernel.
235 mov r18=KERNEL_TR_PAGE_SHIFT<<2
236 movl r17=KERNEL_START
237 ;;
238 mov cr.itir=r18
239 mov cr.ifa=r17
240 mov r16=IA64_TR_KERNEL
241 mov r19=ip
242 movl r18=PAGE_KERNEL
243 ;;
244 dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT
245 ;;
246 or r18=r17,r18
247 ;;
248 itr.i itr[r16]=r18
249 ;;
250 itr.d dtr[r16]=r18
251 ;;
252 srlz.i
253 srlz.d
254 ;;
255 // 2. Reload DTR register for PERCPU data.
256 GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte)
257 ;;
258 movl r16=PERCPU_ADDR // vaddr
259 movl r18=PERCPU_PAGE_SHIFT<<2
260 ;;
261 mov cr.itir=r18
262 mov cr.ifa=r16
263 ;;
264 ld8 r18=[r2] // load per-CPU PTE
265 mov r16=IA64_TR_PERCPU_DATA;
266 ;;
267 itr.d dtr[r16]=r18
268 ;;
269 srlz.d
270 ;;
271 // 3. Reload ITR for PAL code.
272 GET_THIS_PADDR(r2, ia64_mca_pal_pte)
273 ;;
274 ld8 r18=[r2] // load PAL PTE
275 ;;
276 GET_THIS_PADDR(r2, ia64_mca_pal_base)
277 ;;
278 ld8 r16=[r2] // load PAL vaddr
279 mov r19=IA64_GRANULE_SHIFT<<2
280 ;;
281 mov cr.itir=r19
282 mov cr.ifa=r16
283 mov r20=IA64_TR_PALCODE
284 ;;
285 itr.i itr[r20]=r18
286 ;;
287 srlz.i
288 ;;
289 // 4. Reload DTR for stack.
290 mov r16=IA64_KR(CURRENT_STACK)
291 ;;
292 shl r16=r16,IA64_GRANULE_SHIFT
293 movl r19=PAGE_OFFSET
294 ;;
295 add r18=r19,r16
296 movl r20=PAGE_KERNEL
297 ;;
298 add r16=r20,r16
299 mov r19=IA64_GRANULE_SHIFT<<2
300 ;;
301 mov cr.itir=r19
302 mov cr.ifa=r18
303 mov r20=IA64_TR_CURRENT_STACK
304 ;;
305 itr.d dtr[r20]=r16
306 ;;
307 srlz.d
308 ;;
309 br.sptk.many done_tlb_purge_and_reload
310err:
311 COLD_BOOT_HANDOFF_STATE(r20,r21,r22)
312 br.sptk.many ia64_os_mca_done_restore
313
314done_tlb_purge_and_reload:
315
316 // Setup new stack frame for OS_MCA handling
317 GET_IA64_MCA_DATA(r2)
318 ;;
319 add r3 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
320 add r2 = IA64_MCA_CPU_RBSTORE_OFFSET, r2
321 ;;
322 rse_switch_context(r6,r3,r2);; // RSC management in this new context
323
324 GET_IA64_MCA_DATA(r2)
325 ;;
326 add r2 = IA64_MCA_CPU_STACK_OFFSET+IA64_MCA_STACK_SIZE-16, r2
327 ;;
328 mov r12=r2 // establish new stack-pointer
329
330 // Enter virtual mode from physical mode
331 VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4)
332ia64_os_mca_virtual_begin:
333
334 // Call virtual mode handler
335 movl r2=ia64_mca_ucmc_handler;;
336 mov b6=r2;;
337 br.call.sptk.many b0=b6;;
338.ret0:
339 // Revert back to physical mode before going back to SAL
340 PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4)
341ia64_os_mca_virtual_end:
342
343 // restore the original stack frame here
344 GET_IA64_MCA_DATA(r2)
345 ;;
346 add r2 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2
347 ;;
348 movl r4=IA64_PSR_MC
349 ;;
350 rse_return_context(r4,r3,r2) // switch from interrupt context for RSE
351
352 // let us restore all the registers from our PSI structure
353 mov r8=gp
354 ;;
355begin_os_mca_restore:
356 br ia64_os_mca_proc_state_restore;;
357
358ia64_os_mca_done_restore:
359 OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);;
360 // branch back to SALE_CHECK
361 ld8 r3=[r2];;
362 mov b0=r3;; // SAL_CHECK return address
363
364 // release lock
365 movl r3=ia64_mca_serialize;;
366 DATA_VA_TO_PA(r3);;
367 st8.rel [r3]=r0
368
369 br b0
370 ;;
371ia64_os_mca_dispatch_end:
372//EndMain//////////////////////////////////////////////////////////////////////
373
374
375//++
376// Name:
377// ia64_os_mca_proc_state_dump()
378//
379// Stub Description:
380//
381// This stub dumps the processor state during MCHK to a data area
382//
383//--
384
385ia64_os_mca_proc_state_dump:
386// Save bank 1 GRs 16-31 which will be used by c-language code when we switch
387// to virtual addressing mode.
388 GET_IA64_MCA_DATA(r2)
389 ;;
390 add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
391 ;;
392// save ar.NaT
393 mov r5=ar.unat // ar.unat
394
395// save banked GRs 16-31 along with NaT bits
396 bsw.1;;
397 st8.spill [r2]=r16,8;;
398 st8.spill [r2]=r17,8;;
399 st8.spill [r2]=r18,8;;
400 st8.spill [r2]=r19,8;;
401 st8.spill [r2]=r20,8;;
402 st8.spill [r2]=r21,8;;
403 st8.spill [r2]=r22,8;;
404 st8.spill [r2]=r23,8;;
405 st8.spill [r2]=r24,8;;
406 st8.spill [r2]=r25,8;;
407 st8.spill [r2]=r26,8;;
408 st8.spill [r2]=r27,8;;
409 st8.spill [r2]=r28,8;;
410 st8.spill [r2]=r29,8;;
411 st8.spill [r2]=r30,8;;
412 st8.spill [r2]=r31,8;;
413
414 mov r4=ar.unat;;
415 st8 [r2]=r4,8 // save User NaT bits for r16-r31
416 mov ar.unat=r5 // restore original unat
417 bsw.0;;
418
419//save BRs
420 add r4=8,r2 // duplicate r2 in r4
421 add r6=2*8,r2 // duplicate r2 in r4
422
423 mov r3=b0
424 mov r5=b1
425 mov r7=b2;;
426 st8 [r2]=r3,3*8
427 st8 [r4]=r5,3*8
428 st8 [r6]=r7,3*8;;
429
430 mov r3=b3
431 mov r5=b4
432 mov r7=b5;;
433 st8 [r2]=r3,3*8
434 st8 [r4]=r5,3*8
435 st8 [r6]=r7,3*8;;
436
437 mov r3=b6
438 mov r5=b7;;
439 st8 [r2]=r3,2*8
440 st8 [r4]=r5,2*8;;
441
442cSaveCRs:
443// save CRs
444 add r4=8,r2 // duplicate r2 in r4
445 add r6=2*8,r2 // duplicate r2 in r4
446
447 mov r3=cr.dcr
448 mov r5=cr.itm
449 mov r7=cr.iva;;
450
451 st8 [r2]=r3,8*8
452 st8 [r4]=r5,3*8
453 st8 [r6]=r7,3*8;; // 48 byte rements
454
455 mov r3=cr.pta;;
456 st8 [r2]=r3,8*8;; // 64 byte rements
457
458// if PSR.ic=0, reading interruption registers causes an illegal operation fault
459 mov r3=psr;;
460 tbit.nz.unc p6,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test
461(p6) st8 [r2]=r0,9*8+160 // increment by 232 byte inc.
462begin_skip_intr_regs:
463(p6) br SkipIntrRegs;;
464
465 add r4=8,r2 // duplicate r2 in r4
466 add r6=2*8,r2 // duplicate r2 in r6
467
468 mov r3=cr.ipsr
469 mov r5=cr.isr
470 mov r7=r0;;
471 st8 [r2]=r3,3*8
472 st8 [r4]=r5,3*8
473 st8 [r6]=r7,3*8;;
474
475 mov r3=cr.iip
476 mov r5=cr.ifa
477 mov r7=cr.itir;;
478 st8 [r2]=r3,3*8
479 st8 [r4]=r5,3*8
480 st8 [r6]=r7,3*8;;
481
482 mov r3=cr.iipa
483 mov r5=cr.ifs
484 mov r7=cr.iim;;
485 st8 [r2]=r3,3*8
486 st8 [r4]=r5,3*8
487 st8 [r6]=r7,3*8;;
488
489 mov r3=cr25;; // cr.iha
490 st8 [r2]=r3,160;; // 160 byte rement
491
492SkipIntrRegs:
493 st8 [r2]=r0,152;; // another 152 byte .
494
495 add r4=8,r2 // duplicate r2 in r4
496 add r6=2*8,r2 // duplicate r2 in r6
497
498 mov r3=cr.lid
499// mov r5=cr.ivr // cr.ivr, don't read it
500 mov r7=cr.tpr;;
501 st8 [r2]=r3,3*8
502 st8 [r4]=r5,3*8
503 st8 [r6]=r7,3*8;;
504
505 mov r3=r0 // cr.eoi => cr67
506 mov r5=r0 // cr.irr0 => cr68
507 mov r7=r0;; // cr.irr1 => cr69
508 st8 [r2]=r3,3*8
509 st8 [r4]=r5,3*8
510 st8 [r6]=r7,3*8;;
511
512 mov r3=r0 // cr.irr2 => cr70
513 mov r5=r0 // cr.irr3 => cr71
514 mov r7=cr.itv;;
515 st8 [r2]=r3,3*8
516 st8 [r4]=r5,3*8
517 st8 [r6]=r7,3*8;;
518
519 mov r3=cr.pmv
520 mov r5=cr.cmcv;;
521 st8 [r2]=r3,7*8
522 st8 [r4]=r5,7*8;;
523
524 mov r3=r0 // cr.lrr0 => cr80
525 mov r5=r0;; // cr.lrr1 => cr81
526 st8 [r2]=r3,23*8
527 st8 [r4]=r5,23*8;;
528
529 adds r2=25*8,r2;;
530
531cSaveARs:
532// save ARs
533 add r4=8,r2 // duplicate r2 in r4
534 add r6=2*8,r2 // duplicate r2 in r6
535
536 mov r3=ar.k0
537 mov r5=ar.k1
538 mov r7=ar.k2;;
539 st8 [r2]=r3,3*8
540 st8 [r4]=r5,3*8
541 st8 [r6]=r7,3*8;;
542
543 mov r3=ar.k3
544 mov r5=ar.k4
545 mov r7=ar.k5;;
546 st8 [r2]=r3,3*8
547 st8 [r4]=r5,3*8
548 st8 [r6]=r7,3*8;;
549
550 mov r3=ar.k6
551 mov r5=ar.k7
552 mov r7=r0;; // ar.kr8
553 st8 [r2]=r3,10*8
554 st8 [r4]=r5,10*8
555 st8 [r6]=r7,10*8;; // rement by 72 bytes
556
557 mov r3=ar.rsc
558 mov ar.rsc=r0 // put RSE in enforced lazy mode
559 mov r5=ar.bsp
560 ;;
561 mov r7=ar.bspstore;;
562 st8 [r2]=r3,3*8
563 st8 [r4]=r5,3*8
564 st8 [r6]=r7,3*8;;
565
566 mov r3=ar.rnat;;
567 st8 [r2]=r3,8*13 // increment by 13x8 bytes
568
569 mov r3=ar.ccv;;
570 st8 [r2]=r3,8*4
571
572 mov r3=ar.unat;;
573 st8 [r2]=r3,8*4
574
575 mov r3=ar.fpsr;;
576 st8 [r2]=r3,8*4
577
578 mov r3=ar.itc;;
579 st8 [r2]=r3,160 // 160
580
581 mov r3=ar.pfs;;
582 st8 [r2]=r3,8
583
584 mov r3=ar.lc;;
585 st8 [r2]=r3,8
586
587 mov r3=ar.ec;;
588 st8 [r2]=r3
589 add r2=8*62,r2 //padding
590
591// save RRs
592 mov ar.lc=0x08-1
593 movl r4=0x00;;
594
595cStRR:
596 dep.z r5=r4,61,3;;
597 mov r3=rr[r5];;
598 st8 [r2]=r3,8
599 add r4=1,r4
600 br.cloop.sptk.few cStRR
601 ;;
602end_os_mca_dump:
603 br ia64_os_mca_done_dump;;
604
605//EndStub//////////////////////////////////////////////////////////////////////
606
607
608//++
609// Name:
610// ia64_os_mca_proc_state_restore()
611//
612// Stub Description:
613//
614// This is a stub to restore the saved processor state during MCHK
615//
616//--
617
618ia64_os_mca_proc_state_restore:
619
620// Restore bank1 GR16-31
621 GET_IA64_MCA_DATA(r2)
622 ;;
623 add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2
624
625restore_GRs: // restore bank-1 GRs 16-31
626 bsw.1;;
627 add r3=16*8,r2;; // to get to NaT of GR 16-31
628 ld8 r3=[r3];;
629 mov ar.unat=r3;; // first restore NaT
630
631 ld8.fill r16=[r2],8;;
632 ld8.fill r17=[r2],8;;
633 ld8.fill r18=[r2],8;;
634 ld8.fill r19=[r2],8;;
635 ld8.fill r20=[r2],8;;
636 ld8.fill r21=[r2],8;;
637 ld8.fill r22=[r2],8;;
638 ld8.fill r23=[r2],8;;
639 ld8.fill r24=[r2],8;;
640 ld8.fill r25=[r2],8;;
641 ld8.fill r26=[r2],8;;
642 ld8.fill r27=[r2],8;;
643 ld8.fill r28=[r2],8;;
644 ld8.fill r29=[r2],8;;
645 ld8.fill r30=[r2],8;;
646 ld8.fill r31=[r2],8;;
647
648 ld8 r3=[r2],8;; // increment to skip NaT
649 bsw.0;;
650
651restore_BRs:
652 add r4=8,r2 // duplicate r2 in r4
653 add r6=2*8,r2;; // duplicate r2 in r4
654
655 ld8 r3=[r2],3*8
656 ld8 r5=[r4],3*8
657 ld8 r7=[r6],3*8;;
658 mov b0=r3
659 mov b1=r5
660 mov b2=r7;;
661
662 ld8 r3=[r2],3*8
663 ld8 r5=[r4],3*8
664 ld8 r7=[r6],3*8;;
665 mov b3=r3
666 mov b4=r5
667 mov b5=r7;;
668
669 ld8 r3=[r2],2*8
670 ld8 r5=[r4],2*8;;
671 mov b6=r3
672 mov b7=r5;;
673
674restore_CRs:
675 add r4=8,r2 // duplicate r2 in r4
676 add r6=2*8,r2;; // duplicate r2 in r4
677
678 ld8 r3=[r2],8*8
679 ld8 r5=[r4],3*8
680 ld8 r7=[r6],3*8;; // 48 byte increments
681 mov cr.dcr=r3
682 mov cr.itm=r5
683 mov cr.iva=r7;;
684
685 ld8 r3=[r2],8*8;; // 64 byte increments
686// mov cr.pta=r3
687
688
689// if PSR.ic=1, reading interruption registers causes an illegal operation fault
690 mov r3=psr;;
691 tbit.nz.unc p6,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test
692(p6) st8 [r2]=r0,9*8+160 // increment by 232 byte inc.
693
694begin_rskip_intr_regs:
695(p6) br rSkipIntrRegs;;
696
697 add r4=8,r2 // duplicate r2 in r4
698 add r6=2*8,r2;; // duplicate r2 in r4
699
700 ld8 r3=[r2],3*8
701 ld8 r5=[r4],3*8
702 ld8 r7=[r6],3*8;;
703 mov cr.ipsr=r3
704// mov cr.isr=r5 // cr.isr is read only
705
706 ld8 r3=[r2],3*8
707 ld8 r5=[r4],3*8
708 ld8 r7=[r6],3*8;;
709 mov cr.iip=r3
710 mov cr.ifa=r5
711 mov cr.itir=r7;;
712
713 ld8 r3=[r2],3*8
714 ld8 r5=[r4],3*8
715 ld8 r7=[r6],3*8;;
716 mov cr.iipa=r3
717 mov cr.ifs=r5
718 mov cr.iim=r7
719
720 ld8 r3=[r2],160;; // 160 byte increment
721 mov cr.iha=r3
722
723rSkipIntrRegs:
724 ld8 r3=[r2],152;; // another 152 byte inc.
725
726 add r4=8,r2 // duplicate r2 in r4
727 add r6=2*8,r2;; // duplicate r2 in r6
728
729 ld8 r3=[r2],8*3
730 ld8 r5=[r4],8*3
731 ld8 r7=[r6],8*3;;
732 mov cr.lid=r3
733// mov cr.ivr=r5 // cr.ivr is read only
734 mov cr.tpr=r7;;
735
736 ld8 r3=[r2],8*3
737 ld8 r5=[r4],8*3
738 ld8 r7=[r6],8*3;;
739// mov cr.eoi=r3
740// mov cr.irr0=r5 // cr.irr0 is read only
741// mov cr.irr1=r7;; // cr.irr1 is read only
742
743 ld8 r3=[r2],8*3
744 ld8 r5=[r4],8*3
745 ld8 r7=[r6],8*3;;
746// mov cr.irr2=r3 // cr.irr2 is read only
747// mov cr.irr3=r5 // cr.irr3 is read only
748 mov cr.itv=r7;;
749
750 ld8 r3=[r2],8*7
751 ld8 r5=[r4],8*7;;
752 mov cr.pmv=r3
753 mov cr.cmcv=r5;;
754
755 ld8 r3=[r2],8*23
756 ld8 r5=[r4],8*23;;
757 adds r2=8*23,r2
758 adds r4=8*23,r4;;
759// mov cr.lrr0=r3
760// mov cr.lrr1=r5
761
762 adds r2=8*2,r2;;
763
764restore_ARs:
765 add r4=8,r2 // duplicate r2 in r4
766 add r6=2*8,r2;; // duplicate r2 in r4
767
768 ld8 r3=[r2],3*8
769 ld8 r5=[r4],3*8
770 ld8 r7=[r6],3*8;;
771 mov ar.k0=r3
772 mov ar.k1=r5
773 mov ar.k2=r7;;
774
775 ld8 r3=[r2],3*8
776 ld8 r5=[r4],3*8
777 ld8 r7=[r6],3*8;;
778 mov ar.k3=r3
779 mov ar.k4=r5
780 mov ar.k5=r7;;
781
782 ld8 r3=[r2],10*8
783 ld8 r5=[r4],10*8
784 ld8 r7=[r6],10*8;;
785 mov ar.k6=r3
786 mov ar.k7=r5
787 ;;
788
789 ld8 r3=[r2],3*8
790 ld8 r5=[r4],3*8
791 ld8 r7=[r6],3*8;;
792// mov ar.rsc=r3
793// mov ar.bsp=r5 // ar.bsp is read only
794 mov ar.rsc=r0 // make sure that RSE is in enforced lazy mode
795 ;;
796 mov ar.bspstore=r7;;
797
798 ld8 r9=[r2],8*13;;
799 mov ar.rnat=r9
800
801 mov ar.rsc=r3
802 ld8 r3=[r2],8*4;;
803 mov ar.ccv=r3
804
805 ld8 r3=[r2],8*4;;
806 mov ar.unat=r3
807
808 ld8 r3=[r2],8*4;;
809 mov ar.fpsr=r3
810
811 ld8 r3=[r2],160;; // 160
812// mov ar.itc=r3
813
814 ld8 r3=[r2],8;;
815 mov ar.pfs=r3
816
817 ld8 r3=[r2],8;;
818 mov ar.lc=r3
819
820 ld8 r3=[r2];;
821 mov ar.ec=r3
822 add r2=8*62,r2;; // padding
823
824restore_RRs:
825 mov r5=ar.lc
826 mov ar.lc=0x08-1
827 movl r4=0x00;;
828cStRRr:
829 dep.z r7=r4,61,3
830 ld8 r3=[r2],8;;
831 mov rr[r7]=r3 // what are its access previledges?
832 add r4=1,r4
833 br.cloop.sptk.few cStRRr
834 ;;
835 mov ar.lc=r5
836 ;;
837end_os_mca_restore:
838 br ia64_os_mca_done_restore;;
839
840//EndStub//////////////////////////////////////////////////////////////////////
841
842
843// ok, the issue here is that we need to save state information so
844// it can be useable by the kernel debugger and show regs routines.
845// In order to do this, our best bet is save the current state (plus
846// the state information obtain from the MIN_STATE_AREA) into a pt_regs
847// format. This way we can pass it on in a useable format.
848//
849
850//
851// SAL to OS entry point for INIT on the monarch processor
852// This has been defined for registration purposes with SAL
853// as a part of ia64_mca_init.
854//
855// When we get here, the following registers have been
856// set by the SAL for our use
857//
858// 1. GR1 = OS INIT GP
859// 2. GR8 = PAL_PROC physical address
860// 3. GR9 = SAL_PROC physical address
861// 4. GR10 = SAL GP (physical)
862// 5. GR11 = Init Reason
863// 0 = Received INIT for event other than crash dump switch
864// 1 = Received wakeup at the end of an OS_MCA corrected machine check
865// 2 = Received INIT dude to CrashDump switch assertion
866//
867// 6. GR12 = Return address to location within SAL_INIT procedure
868
869
870GLOBAL_ENTRY(ia64_monarch_init_handler)
871 .prologue
872 // stash the information the SAL passed to os
873 SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2)
874 ;;
875 SAVE_MIN_WITH_COVER
876 ;;
877 mov r8=cr.ifa
878 mov r9=cr.isr
879 adds r3=8,r2 // set up second base pointer
880 ;;
881 SAVE_REST
882
883// ok, enough should be saved at this point to be dangerous, and supply
884// information for a dump
885// We need to switch to Virtual mode before hitting the C functions.
886
887 movl r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN
888 mov r3=psr // get the current psr, minimum enabled at this point
889 ;;
890 or r2=r2,r3
891 ;;
892 movl r3=IVirtual_Switch
893 ;;
894 mov cr.iip=r3 // short return to set the appropriate bits
895 mov cr.ipsr=r2 // need to do an rfi to set appropriate bits
896 ;;
897 rfi
898 ;;
899IVirtual_Switch:
900 //
901 // We should now be running virtual
902 //
903 // Let's call the C handler to get the rest of the state info
904 //
905 alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!)
906 ;;
907 adds out0=16,sp // out0 = pointer to pt_regs
908 ;;
909 DO_SAVE_SWITCH_STACK
910 .body
911 adds out1=16,sp // out0 = pointer to switch_stack
912
913 br.call.sptk.many rp=ia64_init_handler
914.ret1:
915
916return_from_init:
917 br.sptk return_from_init
918END(ia64_monarch_init_handler)
919
920//
921// SAL to OS entry point for INIT on the slave processor
922// This has been defined for registration purposes with SAL
923// as a part of ia64_mca_init.
924//
925
926GLOBAL_ENTRY(ia64_slave_init_handler)
9271: br.sptk 1b
928END(ia64_slave_init_handler)
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c
new file mode 100644
index 000000000000..ab478172c349
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv.c
@@ -0,0 +1,639 @@
1/*
2 * File: mca_drv.c
3 * Purpose: Generic MCA handling layer
4 *
5 * Copyright (C) 2004 FUJITSU LIMITED
6 * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
7 */
8#include <linux/config.h>
9#include <linux/types.h>
10#include <linux/init.h>
11#include <linux/sched.h>
12#include <linux/interrupt.h>
13#include <linux/irq.h>
14#include <linux/kallsyms.h>
15#include <linux/smp_lock.h>
16#include <linux/bootmem.h>
17#include <linux/acpi.h>
18#include <linux/timer.h>
19#include <linux/module.h>
20#include <linux/kernel.h>
21#include <linux/smp.h>
22#include <linux/workqueue.h>
23#include <linux/mm.h>
24
25#include <asm/delay.h>
26#include <asm/machvec.h>
27#include <asm/page.h>
28#include <asm/ptrace.h>
29#include <asm/system.h>
30#include <asm/sal.h>
31#include <asm/mca.h>
32
33#include <asm/irq.h>
34#include <asm/hw_irq.h>
35
36#include "mca_drv.h"
37
38/* max size of SAL error record (default) */
39static int sal_rec_max = 10000;
40
41/* from mca.c */
42static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state;
43static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state;
44
45/* from mca_drv_asm.S */
46extern void *mca_handler_bhhook(void);
47
48static DEFINE_SPINLOCK(mca_bh_lock);
49
50typedef enum {
51 MCA_IS_LOCAL = 0,
52 MCA_IS_GLOBAL = 1
53} mca_type_t;
54
55#define MAX_PAGE_ISOLATE 1024
56
57static struct page *page_isolate[MAX_PAGE_ISOLATE];
58static int num_page_isolate = 0;
59
60typedef enum {
61 ISOLATE_NG = 0,
62 ISOLATE_OK = 1
63} isolate_status_t;
64
65/*
66 * This pool keeps pointers to the section part of SAL error record
67 */
68static struct {
69 slidx_list_t *buffer; /* section pointer list pool */
70 int cur_idx; /* Current index of section pointer list pool */
71 int max_idx; /* Maximum index of section pointer list pool */
72} slidx_pool;
73
74/**
75 * mca_page_isolate - isolate a poisoned page in order not to use it later
76 * @paddr: poisoned memory location
77 *
78 * Return value:
79 * ISOLATE_OK / ISOLATE_NG
80 */
81
82static isolate_status_t
83mca_page_isolate(unsigned long paddr)
84{
85 int i;
86 struct page *p;
87
88 /* whether physical address is valid or not */
89 if ( !ia64_phys_addr_valid(paddr) )
90 return ISOLATE_NG;
91
92 /* convert physical address to physical page number */
93 p = pfn_to_page(paddr>>PAGE_SHIFT);
94
95 /* check whether a page number have been already registered or not */
96 for( i = 0; i < num_page_isolate; i++ )
97 if( page_isolate[i] == p )
98 return ISOLATE_OK; /* already listed */
99
100 /* limitation check */
101 if( num_page_isolate == MAX_PAGE_ISOLATE )
102 return ISOLATE_NG;
103
104 /* kick pages having attribute 'SLAB' or 'Reserved' */
105 if( PageSlab(p) || PageReserved(p) )
106 return ISOLATE_NG;
107
108 /* add attribute 'Reserved' and register the page */
109 SetPageReserved(p);
110 page_isolate[num_page_isolate++] = p;
111
112 return ISOLATE_OK;
113}
114
115/**
116 * mca_hanlder_bh - Kill the process which occurred memory read error
117 * @paddr: poisoned address received from MCA Handler
118 */
119
120void
121mca_handler_bh(unsigned long paddr)
122{
123 printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n",
124 current->pid, current->comm);
125
126 spin_lock(&mca_bh_lock);
127 if (mca_page_isolate(paddr) == ISOLATE_OK) {
128 printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr);
129 } else {
130 printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr);
131 }
132 spin_unlock(&mca_bh_lock);
133
134 /* This process is about to be killed itself */
135 force_sig(SIGKILL, current);
136 schedule();
137}
138
139/**
140 * mca_make_peidx - Make index of processor error section
141 * @slpi: pointer to record of processor error section
142 * @peidx: pointer to index of processor error section
143 */
144
145static void
146mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx)
147{
148 /*
149 * calculate the start address of
150 * "struct cpuid_info" and "sal_processor_static_info_t".
151 */
152 u64 total_check_num = slpi->valid.num_cache_check
153 + slpi->valid.num_tlb_check
154 + slpi->valid.num_bus_check
155 + slpi->valid.num_reg_file_check
156 + slpi->valid.num_ms_check;
157 u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num
158 + sizeof(sal_log_processor_info_t);
159 u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info);
160
161 peidx_head(peidx) = slpi;
162 peidx_mid(peidx) = (struct sal_cpuid_info *)
163 (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL);
164 peidx_bottom(peidx) = (sal_processor_static_info_t *)
165 (slpi->valid.psi_static_struct ?
166 ((char*)slpi + head_size + mid_size) : NULL);
167}
168
169/**
170 * mca_make_slidx - Make index of SAL error record
171 * @buffer: pointer to SAL error record
172 * @slidx: pointer to index of SAL error record
173 *
174 * Return value:
175 * 1 if record has platform error / 0 if not
176 */
177#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \
178 { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \
179 hl->hdr = ptr; \
180 list_add(&hl->list, &(sect)); \
181 slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; }
182
183static int
184mca_make_slidx(void *buffer, slidx_table_t *slidx)
185{
186 int platform_err = 0;
187 int record_len = ((sal_log_record_header_t*)buffer)->len;
188 u32 ercd_pos;
189 int sects;
190 sal_log_section_hdr_t *sp;
191
192 /*
193 * Initialize index referring current record
194 */
195 INIT_LIST_HEAD(&(slidx->proc_err));
196 INIT_LIST_HEAD(&(slidx->mem_dev_err));
197 INIT_LIST_HEAD(&(slidx->sel_dev_err));
198 INIT_LIST_HEAD(&(slidx->pci_bus_err));
199 INIT_LIST_HEAD(&(slidx->smbios_dev_err));
200 INIT_LIST_HEAD(&(slidx->pci_comp_err));
201 INIT_LIST_HEAD(&(slidx->plat_specific_err));
202 INIT_LIST_HEAD(&(slidx->host_ctlr_err));
203 INIT_LIST_HEAD(&(slidx->plat_bus_err));
204 INIT_LIST_HEAD(&(slidx->unsupported));
205
206 /*
207 * Extract a Record Header
208 */
209 slidx->header = buffer;
210
211 /*
212 * Extract each section records
213 * (arranged from "int ia64_log_platform_info_print()")
214 */
215 for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0;
216 ercd_pos < record_len; ercd_pos += sp->len, sects++) {
217 sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos);
218 if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) {
219 LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp);
220 } else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) {
221 platform_err = 1;
222 LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp);
223 } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) {
224 platform_err = 1;
225 LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp);
226 } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) {
227 platform_err = 1;
228 LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp);
229 } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) {
230 platform_err = 1;
231 LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp);
232 } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) {
233 platform_err = 1;
234 LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp);
235 } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) {
236 platform_err = 1;
237 LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp);
238 } else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) {
239 platform_err = 1;
240 LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp);
241 } else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) {
242 platform_err = 1;
243 LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp);
244 } else {
245 LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp);
246 }
247 }
248 slidx->n_sections = sects;
249
250 return platform_err;
251}
252
253/**
254 * init_record_index_pools - Initialize pool of lists for SAL record index
255 *
256 * Return value:
257 * 0 on Success / -ENOMEM on Failure
258 */
259static int
260init_record_index_pools(void)
261{
262 int i;
263 int rec_max_size; /* Maximum size of SAL error records */
264 int sect_min_size; /* Minimum size of SAL error sections */
265 /* minimum size table of each section */
266 static int sal_log_sect_min_sizes[] = {
267 sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t),
268 sizeof(sal_log_mem_dev_err_info_t),
269 sizeof(sal_log_sel_dev_err_info_t),
270 sizeof(sal_log_pci_bus_err_info_t),
271 sizeof(sal_log_smbios_dev_err_info_t),
272 sizeof(sal_log_pci_comp_err_info_t),
273 sizeof(sal_log_plat_specific_err_info_t),
274 sizeof(sal_log_host_ctlr_err_info_t),
275 sizeof(sal_log_plat_bus_err_info_t),
276 };
277
278 /*
279 * MCA handler cannot allocate new memory on flight,
280 * so we preallocate enough memory to handle a SAL record.
281 *
282 * Initialize a handling set of slidx_pool:
283 * 1. Pick up the max size of SAL error records
284 * 2. Pick up the min size of SAL error sections
285 * 3. Allocate the pool as enough to 2 SAL records
286 * (now we can estimate the maxinum of section in a record.)
287 */
288
289 /* - 1 - */
290 rec_max_size = sal_rec_max;
291
292 /* - 2 - */
293 sect_min_size = sal_log_sect_min_sizes[0];
294 for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++)
295 if (sect_min_size > sal_log_sect_min_sizes[i])
296 sect_min_size = sal_log_sect_min_sizes[i];
297
298 /* - 3 - */
299 slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1;
300 slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL);
301
302 return slidx_pool.buffer ? 0 : -ENOMEM;
303}
304
305
306/*****************************************************************************
307 * Recovery functions *
308 *****************************************************************************/
309
310/**
311 * is_mca_global - Check whether this MCA is global or not
312 * @peidx: pointer of index of processor error section
313 * @pbci: pointer to pal_bus_check_info_t
314 *
315 * Return value:
316 * MCA_IS_LOCAL / MCA_IS_GLOBAL
317 */
318
319static mca_type_t
320is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci)
321{
322 pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
323
324 /*
325 * PAL can request a rendezvous, if the MCA has a global scope.
326 * If "rz_always" flag is set, SAL requests MCA rendezvous
327 * in spite of global MCA.
328 * Therefore it is local MCA when rendezvous has not been requested.
329 * Failed to rendezvous, the system must be down.
330 */
331 switch (sal_to_os_handoff_state->imsto_rendez_state) {
332 case -1: /* SAL rendezvous unsuccessful */
333 return MCA_IS_GLOBAL;
334 case 0: /* SAL rendezvous not required */
335 return MCA_IS_LOCAL;
336 case 1: /* SAL rendezvous successful int */
337 case 2: /* SAL rendezvous successful int with init */
338 default:
339 break;
340 }
341
342 /*
343 * If One or more Cache/TLB/Reg_File/Uarch_Check is here,
344 * it would be a local MCA. (i.e. processor internal error)
345 */
346 if (psp->tc || psp->cc || psp->rc || psp->uc)
347 return MCA_IS_LOCAL;
348
349 /*
350 * Bus_Check structure with Bus_Check.ib (internal bus error) flag set
351 * would be a global MCA. (e.g. a system bus address parity error)
352 */
353 if (!pbci || pbci->ib)
354 return MCA_IS_GLOBAL;
355
356 /*
357 * Bus_Check structure with Bus_Check.eb (external bus error) flag set
358 * could be either a local MCA or a global MCA.
359 *
360 * Referring Bus_Check.bsi:
361 * 0: Unknown/unclassified
362 * 1: BERR#
363 * 2: BINIT#
364 * 3: Hard Fail
365 * (FIXME: Are these SGI specific or generic bsi values?)
366 */
367 if (pbci->eb)
368 switch (pbci->bsi) {
369 case 0:
370 /* e.g. a load from poisoned memory */
371 return MCA_IS_LOCAL;
372 case 1:
373 case 2:
374 case 3:
375 return MCA_IS_GLOBAL;
376 }
377
378 return MCA_IS_GLOBAL;
379}
380
381/**
382 * recover_from_read_error - Try to recover the errors which type are "read"s.
383 * @slidx: pointer of index of SAL error record
384 * @peidx: pointer of index of processor error section
385 * @pbci: pointer of pal_bus_check_info
386 *
387 * Return value:
388 * 1 on Success / 0 on Failure
389 */
390
391static int
392recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
393{
394 sal_log_mod_error_info_t *smei;
395 pal_min_state_area_t *pmsa;
396 struct ia64_psr *psr1, *psr2;
397 ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
398
399 /* Is target address valid? */
400 if (!pbci->tv)
401 return 0;
402
403 /*
404 * cpu read or memory-mapped io read
405 *
406 * offending process affected process OS MCA do
407 * kernel mode kernel mode down system
408 * kernel mode user mode kill the process
409 * user mode kernel mode down system (*)
410 * user mode user mode kill the process
411 *
412 * (*) You could terminate offending user-mode process
413 * if (pbci->pv && pbci->pl != 0) *and* if you sure
414 * the process not have any locks of kernel.
415 */
416
417 psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr);
418
419 /*
420 * Check the privilege level of interrupted context.
421 * If it is user-mode, then terminate affected process.
422 */
423 if (psr1->cpl != 0) {
424 smei = peidx_bus_check(peidx, 0);
425 if (smei->valid.target_identifier) {
426 /*
427 * setup for resume to bottom half of MCA,
428 * "mca_handler_bhhook"
429 */
430 pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61));
431 /* pass to bhhook as 1st argument (gr8) */
432 pmsa->pmsa_gr[8-1] = smei->target_identifier;
433 /* set interrupted return address (but no use) */
434 pmsa->pmsa_br0 = pmsa->pmsa_iip;
435 /* change resume address to bottom half */
436 pmsa->pmsa_iip = mca_hdlr_bh->fp;
437 pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp;
438 /* set cpl with kernel mode */
439 psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr;
440 psr2->cpl = 0;
441 psr2->ri = 0;
442
443 return 1;
444 }
445
446 }
447
448 return 0;
449}
450
451/**
452 * recover_from_platform_error - Recover from platform error.
453 * @slidx: pointer of index of SAL error record
454 * @peidx: pointer of index of processor error section
455 * @pbci: pointer of pal_bus_check_info
456 *
457 * Return value:
458 * 1 on Success / 0 on Failure
459 */
460
461static int
462recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
463{
464 int status = 0;
465 pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
466
467 if (psp->bc && pbci->eb && pbci->bsi == 0) {
468 switch(pbci->type) {
469 case 1: /* partial read */
470 case 3: /* full line(cpu) read */
471 case 9: /* I/O space read */
472 status = recover_from_read_error(slidx, peidx, pbci);
473 break;
474 case 0: /* unknown */
475 case 2: /* partial write */
476 case 4: /* full line write */
477 case 5: /* implicit or explicit write-back operation */
478 case 6: /* snoop probe */
479 case 7: /* incoming or outgoing ptc.g */
480 case 8: /* write coalescing transactions */
481 case 10: /* I/O space write */
482 case 11: /* inter-processor interrupt message(IPI) */
483 case 12: /* interrupt acknowledge or external task priority cycle */
484 default:
485 break;
486 }
487 }
488
489 return status;
490}
491
492/**
493 * recover_from_processor_error
494 * @platform: whether there are some platform error section or not
495 * @slidx: pointer of index of SAL error record
496 * @peidx: pointer of index of processor error section
497 * @pbci: pointer of pal_bus_check_info
498 *
499 * Return value:
500 * 1 on Success / 0 on Failure
501 */
502/*
503 * Later we try to recover when below all conditions are satisfied.
504 * 1. Only one processor error section is exist.
505 * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK)
506 * 3. The entry of BUS_CHECK_INFO is 1.
507 * 4. "External bus error" flag is set and the others are not set.
508 */
509
510static int
511recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci)
512{
513 pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx);
514
515 /*
516 * We cannot recover errors with other than bus_check.
517 */
518 if (psp->cc || psp->rc || psp->uc)
519 return 0;
520
521 /*
522 * If there is no bus error, record is weird but we need not to recover.
523 */
524 if (psp->bc == 0 || pbci == NULL)
525 return 1;
526
527 /*
528 * Sorry, we cannot handle so many.
529 */
530 if (peidx_bus_check_num(peidx) > 1)
531 return 0;
532 /*
533 * Well, here is only one bus error.
534 */
535 if (pbci->ib || pbci->cc)
536 return 0;
537 if (pbci->eb && pbci->bsi > 0)
538 return 0;
539 if (psp->ci == 0)
540 return 0;
541
542 /*
543 * This is a local MCA and estimated as recoverble external bus error.
544 * (e.g. a load from poisoned memory)
545 * This means "there are some platform errors".
546 */
547 if (platform)
548 return recover_from_platform_error(slidx, peidx, pbci);
549 /*
550 * On account of strange SAL error record, we cannot recover.
551 */
552 return 0;
553}
554
555/**
556 * mca_try_to_recover - Try to recover from MCA
557 * @rec: pointer to a SAL error record
558 *
559 * Return value:
560 * 1 on Success / 0 on Failure
561 */
562
563static int
564mca_try_to_recover(void *rec,
565 ia64_mca_sal_to_os_state_t *sal_to_os_state,
566 ia64_mca_os_to_sal_state_t *os_to_sal_state)
567{
568 int platform_err;
569 int n_proc_err;
570 slidx_table_t slidx;
571 peidx_table_t peidx;
572 pal_bus_check_info_t pbci;
573
574 /* handoff state from/to mca.c */
575 sal_to_os_handoff_state = sal_to_os_state;
576 os_to_sal_handoff_state = os_to_sal_state;
577
578 /* Make index of SAL error record */
579 platform_err = mca_make_slidx(rec, &slidx);
580
581 /* Count processor error sections */
582 n_proc_err = slidx_count(&slidx, proc_err);
583
584 /* Now, OS can recover when there is one processor error section */
585 if (n_proc_err > 1)
586 return 0;
587 else if (n_proc_err == 0) {
588 /* Weird SAL record ... We need not to recover */
589
590 return 1;
591 }
592
593 /* Make index of processor error section */
594 mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx);
595
596 /* Extract Processor BUS_CHECK[0] */
597 *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0);
598
599 /* Check whether MCA is global or not */
600 if (is_mca_global(&peidx, &pbci))
601 return 0;
602
603 /* Try to recover a processor error */
604 return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci);
605}
606
607/*
608 * =============================================================================
609 */
610
611int __init mca_external_handler_init(void)
612{
613 if (init_record_index_pools())
614 return -ENOMEM;
615
616 /* register external mca handlers */
617 if (ia64_reg_MCA_extension(mca_try_to_recover)){
618 printk(KERN_ERR "ia64_reg_MCA_extension failed.\n");
619 kfree(slidx_pool.buffer);
620 return -EFAULT;
621 }
622 return 0;
623}
624
625void __exit mca_external_handler_exit(void)
626{
627 /* unregister external mca handlers */
628 ia64_unreg_MCA_extension();
629 kfree(slidx_pool.buffer);
630}
631
632module_init(mca_external_handler_init);
633module_exit(mca_external_handler_exit);
634
635module_param(sal_rec_max, int, 0644);
636MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record");
637
638MODULE_DESCRIPTION("ia64 platform dependent mca handler driver");
639MODULE_LICENSE("GPL");
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h
new file mode 100644
index 000000000000..0227b761f2c4
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv.h
@@ -0,0 +1,113 @@
1/*
2 * File: mca_drv.h
3 * Purpose: Define helpers for Generic MCA handling
4 *
5 * Copyright (C) 2004 FUJITSU LIMITED
6 * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
7 */
8/*
9 * Processor error section:
10 *
11 * +-sal_log_processor_info_t *info-------------+
12 * | sal_log_section_hdr_t header; |
13 * | ... |
14 * | sal_log_mod_error_info_t info[0]; |
15 * +-+----------------+-------------------------+
16 * | CACHE_CHECK | ^ num_cache_check v
17 * +----------------+
18 * | TLB_CHECK | ^ num_tlb_check v
19 * +----------------+
20 * | BUS_CHECK | ^ num_bus_check v
21 * +----------------+
22 * | REG_FILE_CHECK | ^ num_reg_file_check v
23 * +----------------+
24 * | MS_CHECK | ^ num_ms_check v
25 * +-struct cpuid_info *id----------------------+
26 * | regs[5]; |
27 * | reserved; |
28 * +-sal_processor_static_info_t *regs----------+
29 * | valid; |
30 * | ... |
31 * | fr[128]; |
32 * +--------------------------------------------+
33 */
34
35/* peidx: index of processor error section */
36typedef struct peidx_table {
37 sal_log_processor_info_t *info;
38 struct sal_cpuid_info *id;
39 sal_processor_static_info_t *regs;
40} peidx_table_t;
41
42#define peidx_head(p) (((p)->info))
43#define peidx_mid(p) (((p)->id))
44#define peidx_bottom(p) (((p)->regs))
45
46#define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter))
47#define peidx_field_valid(p) (&(peidx_head(p)->valid))
48#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area))
49
50#define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check)
51#define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check)
52#define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check)
53#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check)
54#define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check)
55
56#define peidx_cache_check_idx(p, n) (n)
57#define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n)
58#define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n)
59#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n)
60#define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n)
61
62#define peidx_mod_error_info(p, name, n) \
63({ int __idx = peidx_##name##_idx(p, n); \
64 sal_log_mod_error_info_t *__ret = NULL; \
65 if (peidx_##name##_num(p) > n) /*BUG*/ \
66 __ret = &(peidx_head(p)->info[__idx]); \
67 __ret; })
68
69#define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n)
70#define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n)
71#define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n)
72#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n)
73#define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n)
74
75#define peidx_check_info(proc, name, n) \
76({ \
77 sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\
78 u64 __temp = __info && __info->valid.check_info \
79 ? __info->check_info : 0; \
80 __temp; })
81
82/* slidx: index of SAL log error record */
83
84typedef struct slidx_list {
85 struct list_head list;
86 sal_log_section_hdr_t *hdr;
87} slidx_list_t;
88
89typedef struct slidx_table {
90 sal_log_record_header_t *header;
91 int n_sections; /* # of section headers */
92 struct list_head proc_err;
93 struct list_head mem_dev_err;
94 struct list_head sel_dev_err;
95 struct list_head pci_bus_err;
96 struct list_head smbios_dev_err;
97 struct list_head pci_comp_err;
98 struct list_head plat_specific_err;
99 struct list_head host_ctlr_err;
100 struct list_head plat_bus_err;
101 struct list_head unsupported; /* list of unsupported sections */
102} slidx_table_t;
103
104#define slidx_foreach_entry(pos, head) \
105 list_for_each_entry(pos, head, list)
106#define slidx_first_entry(head) \
107 (((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL)
108#define slidx_count(slidx, sec) \
109({ int __count = 0; \
110 slidx_list_t *__pos; \
111 slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\
112 __count; })
113
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S
new file mode 100644
index 000000000000..bcfa05acc561
--- /dev/null
+++ b/arch/ia64/kernel/mca_drv_asm.S
@@ -0,0 +1,45 @@
1/*
2 * File: mca_drv_asm.S
3 * Purpose: Assembly portion of Generic MCA handling
4 *
5 * Copyright (C) 2004 FUJITSU LIMITED
6 * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com)
7 */
8#include <linux/config.h>
9#include <linux/threads.h>
10
11#include <asm/asmmacro.h>
12#include <asm/processor.h>
13
14GLOBAL_ENTRY(mca_handler_bhhook)
15 invala // clear RSE ?
16 ;; //
17 cover //
18 ;; //
19 clrrrb //
20 ;;
21 alloc r16=ar.pfs,0,2,1,0 // make a new frame
22 ;;
23 mov r13=IA64_KR(CURRENT) // current task pointer
24 ;;
25 adds r12=IA64_TASK_THREAD_KSP_OFFSET,r13
26 ;;
27 ld8 r12=[r12] // stack pointer
28 ;;
29 mov loc0=r16
30 movl loc1=mca_handler_bh // recovery C function
31 ;;
32 mov out0=r8 // poisoned address
33 mov b6=loc1
34 ;;
35 mov loc1=rp
36 ;;
37 br.call.sptk.many rp=b6 // not return ...
38 ;;
39 mov ar.pfs=loc0
40 mov rp=loc1
41 ;;
42 mov r8=r0
43 br.ret.sptk.many rp
44 ;;
45END(mca_handler_bhhook)
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h
new file mode 100644
index 000000000000..1dbc7b2497c9
--- /dev/null
+++ b/arch/ia64/kernel/minstate.h
@@ -0,0 +1,251 @@
1#include <linux/config.h>
2
3#include <asm/cache.h>
4
5#include "entry.h"
6
7/*
8 * For ivt.s we want to access the stack virtually so we don't have to disable translation
9 * on interrupts.
10 *
11 * On entry:
12 * r1: pointer to current task (ar.k6)
13 */
14#define MINSTATE_START_SAVE_MIN_VIRT \
15(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
16 ;; \
17(pUStk) mov.m r24=ar.rnat; \
18(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \
19(pKStk) mov r1=sp; /* get sp */ \
20 ;; \
21(pUStk) lfetch.fault.excl.nt1 [r22]; \
22(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
23(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
24 ;; \
25(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
26(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
27 ;; \
28(pUStk) mov r18=ar.bsp; \
29(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */
30
31#define MINSTATE_END_SAVE_MIN_VIRT \
32 bsw.1; /* switch back to bank 1 (must be last in insn group) */ \
33 ;;
34
35/*
36 * For mca_asm.S we want to access the stack physically since the state is saved before we
37 * go virtual and don't want to destroy the iip or ipsr.
38 */
39#define MINSTATE_START_SAVE_MIN_PHYS \
40(pKStk) mov r3=IA64_KR(PER_CPU_DATA);; \
41(pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \
42(pKStk) ld8 r3 = [r3];; \
43(pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \
44(pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \
45(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \
46(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \
47 ;; \
48(pUStk) mov r24=ar.rnat; \
49(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \
50(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \
51(pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \
52 ;; \
53(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \
54(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \
55 ;; \
56(pUStk) mov r18=ar.bsp; \
57(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \
58
59#define MINSTATE_END_SAVE_MIN_PHYS \
60 dep r12=-1,r12,61,3; /* make sp a kernel virtual address */ \
61 ;;
62
63#ifdef MINSTATE_VIRT
64# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT)
65# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT
66# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT
67#endif
68
69#ifdef MINSTATE_PHYS
70# define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; tpa reg=reg
71# define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS
72# define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS
73#endif
74
75/*
76 * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
77 * the minimum state necessary that allows us to turn psr.ic back
78 * on.
79 *
80 * Assumed state upon entry:
81 * psr.ic: off
82 * r31: contains saved predicates (pr)
83 *
84 * Upon exit, the state is as follows:
85 * psr.ic: off
86 * r2 = points to &pt_regs.r16
87 * r8 = contents of ar.ccv
88 * r9 = contents of ar.csd
89 * r10 = contents of ar.ssd
90 * r11 = FPSR_DEFAULT
91 * r12 = kernel sp (kernel virtual address)
92 * r13 = points to current task_struct (kernel virtual address)
93 * p15 = TRUE if psr.i is set in cr.ipsr
94 * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
95 * preserved
96 *
97 * Note that psr.ic is NOT turned on by this macro. This is so that
98 * we can pass interruption state as arguments to a handler.
99 */
100#define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \
101 MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \
102 mov r27=ar.rsc; /* M */ \
103 mov r20=r1; /* A */ \
104 mov r25=ar.unat; /* M */ \
105 mov r29=cr.ipsr; /* M */ \
106 mov r26=ar.pfs; /* I */ \
107 mov r28=cr.iip; /* M */ \
108 mov r21=ar.fpsr; /* M */ \
109 COVER; /* B;; (or nothing) */ \
110 ;; \
111 adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \
112 ;; \
113 ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \
114 st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \
115 adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \
116 /* switch from user to kernel RBS: */ \
117 ;; \
118 invala; /* M */ \
119 SAVE_IFS; \
120 cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \
121 ;; \
122 MINSTATE_START_SAVE_MIN \
123 adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \
124 adds r16=PT(CR_IPSR),r1; \
125 ;; \
126 lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \
127 st8 [r16]=r29; /* save cr.ipsr */ \
128 ;; \
129 lfetch.fault.excl.nt1 [r17]; \
130 tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \
131 mov r29=b0 \
132 ;; \
133 adds r16=PT(R8),r1; /* initialize first base pointer */ \
134 adds r17=PT(R9),r1; /* initialize second base pointer */ \
135(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \
136 ;; \
137.mem.offset 0,0; st8.spill [r16]=r8,16; \
138.mem.offset 8,0; st8.spill [r17]=r9,16; \
139 ;; \
140.mem.offset 0,0; st8.spill [r16]=r10,24; \
141.mem.offset 8,0; st8.spill [r17]=r11,24; \
142 ;; \
143 st8 [r16]=r28,16; /* save cr.iip */ \
144 st8 [r17]=r30,16; /* save cr.ifs */ \
145(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \
146 mov r8=ar.ccv; \
147 mov r9=ar.csd; \
148 mov r10=ar.ssd; \
149 movl r11=FPSR_DEFAULT; /* L-unit */ \
150 ;; \
151 st8 [r16]=r25,16; /* save ar.unat */ \
152 st8 [r17]=r26,16; /* save ar.pfs */ \
153 shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \
154 ;; \
155 st8 [r16]=r27,16; /* save ar.rsc */ \
156(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \
157(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \
158 ;; /* avoid RAW on r16 & r17 */ \
159(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \
160 st8 [r17]=r31,16; /* save predicates */ \
161(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \
162 ;; \
163 st8 [r16]=r29,16; /* save b0 */ \
164 st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \
165 cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \
166 ;; \
167.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \
168.mem.offset 8,0; st8.spill [r17]=r12,16; \
169 adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \
170 ;; \
171.mem.offset 0,0; st8.spill [r16]=r13,16; \
172.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \
173 mov r13=IA64_KR(CURRENT); /* establish `current' */ \
174 ;; \
175.mem.offset 0,0; st8.spill [r16]=r15,16; \
176.mem.offset 8,0; st8.spill [r17]=r14,16; \
177 ;; \
178.mem.offset 0,0; st8.spill [r16]=r2,16; \
179.mem.offset 8,0; st8.spill [r17]=r3,16; \
180 adds r2=IA64_PT_REGS_R16_OFFSET,r1; \
181 ;; \
182 EXTRA; \
183 movl r1=__gp; /* establish kernel global pointer */ \
184 ;; \
185 MINSTATE_END_SAVE_MIN
186
187/*
188 * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
189 *
190 * Assumed state upon entry:
191 * psr.ic: on
192 * r2: points to &pt_regs.r16
193 * r3: points to &pt_regs.r17
194 * r8: contents of ar.ccv
195 * r9: contents of ar.csd
196 * r10: contents of ar.ssd
197 * r11: FPSR_DEFAULT
198 *
199 * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
200 */
201#define SAVE_REST \
202.mem.offset 0,0; st8.spill [r2]=r16,16; \
203.mem.offset 8,0; st8.spill [r3]=r17,16; \
204 ;; \
205.mem.offset 0,0; st8.spill [r2]=r18,16; \
206.mem.offset 8,0; st8.spill [r3]=r19,16; \
207 ;; \
208.mem.offset 0,0; st8.spill [r2]=r20,16; \
209.mem.offset 8,0; st8.spill [r3]=r21,16; \
210 mov r18=b6; \
211 ;; \
212.mem.offset 0,0; st8.spill [r2]=r22,16; \
213.mem.offset 8,0; st8.spill [r3]=r23,16; \
214 mov r19=b7; \
215 ;; \
216.mem.offset 0,0; st8.spill [r2]=r24,16; \
217.mem.offset 8,0; st8.spill [r3]=r25,16; \
218 ;; \
219.mem.offset 0,0; st8.spill [r2]=r26,16; \
220.mem.offset 8,0; st8.spill [r3]=r27,16; \
221 ;; \
222.mem.offset 0,0; st8.spill [r2]=r28,16; \
223.mem.offset 8,0; st8.spill [r3]=r29,16; \
224 ;; \
225.mem.offset 0,0; st8.spill [r2]=r30,16; \
226.mem.offset 8,0; st8.spill [r3]=r31,32; \
227 ;; \
228 mov ar.fpsr=r11; /* M-unit */ \
229 st8 [r2]=r8,8; /* ar.ccv */ \
230 adds r24=PT(B6)-PT(F7),r3; \
231 ;; \
232 stf.spill [r2]=f6,32; \
233 stf.spill [r3]=f7,32; \
234 ;; \
235 stf.spill [r2]=f8,32; \
236 stf.spill [r3]=f9,32; \
237 ;; \
238 stf.spill [r2]=f10; \
239 stf.spill [r3]=f11; \
240 adds r25=PT(B7)-PT(F11),r3; \
241 ;; \
242 st8 [r24]=r18,16; /* b6 */ \
243 st8 [r25]=r19,16; /* b7 */ \
244 ;; \
245 st8 [r24]=r9; /* ar.csd */ \
246 st8 [r25]=r10; /* ar.ssd */ \
247 ;;
248
249#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,)
250#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19)
251#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, )
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
new file mode 100644
index 000000000000..febc091c2f02
--- /dev/null
+++ b/arch/ia64/kernel/module.c
@@ -0,0 +1,952 @@
1/*
2 * IA-64-specific support for kernel module loader.
3 *
4 * Copyright (C) 2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * Loosely based on patch by Rusty Russell.
8 */
9
10/* relocs tested so far:
11
12 DIR64LSB
13 FPTR64LSB
14 GPREL22
15 LDXMOV
16 LDXMOV
17 LTOFF22
18 LTOFF22X
19 LTOFF22X
20 LTOFF_FPTR22
21 PCREL21B (for br.call only; br.cond is not supported out of modules!)
22 PCREL60B (for brl.cond only; brl.call is not supported for modules!)
23 PCREL64LSB
24 SECREL32LSB
25 SEGREL64LSB
26 */
27
28#include <linux/config.h>
29
30#include <linux/kernel.h>
31#include <linux/sched.h>
32#include <linux/elf.h>
33#include <linux/moduleloader.h>
34#include <linux/string.h>
35#include <linux/vmalloc.h>
36
37#include <asm/patch.h>
38#include <asm/unaligned.h>
39
40#define ARCH_MODULE_DEBUG 0
41
42#if ARCH_MODULE_DEBUG
43# define DEBUGP printk
44# define inline
45#else
46# define DEBUGP(fmt , a...)
47#endif
48
49#ifdef CONFIG_ITANIUM
50# define USE_BRL 0
51#else
52# define USE_BRL 1
53#endif
54
55#define MAX_LTOFF ((uint64_t) (1 << 22)) /* max. allowable linkage-table offset */
56
57/* Define some relocation helper macros/types: */
58
59#define FORMAT_SHIFT 0
60#define FORMAT_BITS 3
61#define FORMAT_MASK ((1 << FORMAT_BITS) - 1)
62#define VALUE_SHIFT 3
63#define VALUE_BITS 5
64#define VALUE_MASK ((1 << VALUE_BITS) - 1)
65
66enum reloc_target_format {
67 /* direct encoded formats: */
68 RF_NONE = 0,
69 RF_INSN14 = 1,
70 RF_INSN22 = 2,
71 RF_INSN64 = 3,
72 RF_32MSB = 4,
73 RF_32LSB = 5,
74 RF_64MSB = 6,
75 RF_64LSB = 7,
76
77 /* formats that cannot be directly decoded: */
78 RF_INSN60,
79 RF_INSN21B, /* imm21 form 1 */
80 RF_INSN21M, /* imm21 form 2 */
81 RF_INSN21F /* imm21 form 3 */
82};
83
84enum reloc_value_formula {
85 RV_DIRECT = 4, /* S + A */
86 RV_GPREL = 5, /* @gprel(S + A) */
87 RV_LTREL = 6, /* @ltoff(S + A) */
88 RV_PLTREL = 7, /* @pltoff(S + A) */
89 RV_FPTR = 8, /* @fptr(S + A) */
90 RV_PCREL = 9, /* S + A - P */
91 RV_LTREL_FPTR = 10, /* @ltoff(@fptr(S + A)) */
92 RV_SEGREL = 11, /* @segrel(S + A) */
93 RV_SECREL = 12, /* @secrel(S + A) */
94 RV_BDREL = 13, /* BD + A */
95 RV_LTV = 14, /* S + A (like RV_DIRECT, except frozen at static link-time) */
96 RV_PCREL2 = 15, /* S + A - P */
97 RV_SPECIAL = 16, /* various (see below) */
98 RV_RSVD17 = 17,
99 RV_TPREL = 18, /* @tprel(S + A) */
100 RV_LTREL_TPREL = 19, /* @ltoff(@tprel(S + A)) */
101 RV_DTPMOD = 20, /* @dtpmod(S + A) */
102 RV_LTREL_DTPMOD = 21, /* @ltoff(@dtpmod(S + A)) */
103 RV_DTPREL = 22, /* @dtprel(S + A) */
104 RV_LTREL_DTPREL = 23, /* @ltoff(@dtprel(S + A)) */
105 RV_RSVD24 = 24,
106 RV_RSVD25 = 25,
107 RV_RSVD26 = 26,
108 RV_RSVD27 = 27
109 /* 28-31 reserved for implementation-specific purposes. */
110};
111
112#define N(reloc) [R_IA64_##reloc] = #reloc
113
114static const char *reloc_name[256] = {
115 N(NONE), N(IMM14), N(IMM22), N(IMM64),
116 N(DIR32MSB), N(DIR32LSB), N(DIR64MSB), N(DIR64LSB),
117 N(GPREL22), N(GPREL64I), N(GPREL32MSB), N(GPREL32LSB),
118 N(GPREL64MSB), N(GPREL64LSB), N(LTOFF22), N(LTOFF64I),
119 N(PLTOFF22), N(PLTOFF64I), N(PLTOFF64MSB), N(PLTOFF64LSB),
120 N(FPTR64I), N(FPTR32MSB), N(FPTR32LSB), N(FPTR64MSB),
121 N(FPTR64LSB), N(PCREL60B), N(PCREL21B), N(PCREL21M),
122 N(PCREL21F), N(PCREL32MSB), N(PCREL32LSB), N(PCREL64MSB),
123 N(PCREL64LSB), N(LTOFF_FPTR22), N(LTOFF_FPTR64I), N(LTOFF_FPTR32MSB),
124 N(LTOFF_FPTR32LSB), N(LTOFF_FPTR64MSB), N(LTOFF_FPTR64LSB), N(SEGREL32MSB),
125 N(SEGREL32LSB), N(SEGREL64MSB), N(SEGREL64LSB), N(SECREL32MSB),
126 N(SECREL32LSB), N(SECREL64MSB), N(SECREL64LSB), N(REL32MSB),
127 N(REL32LSB), N(REL64MSB), N(REL64LSB), N(LTV32MSB),
128 N(LTV32LSB), N(LTV64MSB), N(LTV64LSB), N(PCREL21BI),
129 N(PCREL22), N(PCREL64I), N(IPLTMSB), N(IPLTLSB),
130 N(COPY), N(LTOFF22X), N(LDXMOV), N(TPREL14),
131 N(TPREL22), N(TPREL64I), N(TPREL64MSB), N(TPREL64LSB),
132 N(LTOFF_TPREL22), N(DTPMOD64MSB), N(DTPMOD64LSB), N(LTOFF_DTPMOD22),
133 N(DTPREL14), N(DTPREL22), N(DTPREL64I), N(DTPREL32MSB),
134 N(DTPREL32LSB), N(DTPREL64MSB), N(DTPREL64LSB), N(LTOFF_DTPREL22)
135};
136
137#undef N
138
139struct got_entry {
140 uint64_t val;
141};
142
143struct fdesc {
144 uint64_t ip;
145 uint64_t gp;
146};
147
148/* Opaque struct for insns, to protect against derefs. */
149struct insn;
150
151static inline uint64_t
152bundle (const struct insn *insn)
153{
154 return (uint64_t) insn & ~0xfUL;
155}
156
157static inline int
158slot (const struct insn *insn)
159{
160 return (uint64_t) insn & 0x3;
161}
162
163static int
164apply_imm64 (struct module *mod, struct insn *insn, uint64_t val)
165{
166 if (slot(insn) != 2) {
167 printk(KERN_ERR "%s: invalid slot number %d for IMM64\n",
168 mod->name, slot(insn));
169 return 0;
170 }
171 ia64_patch_imm64((u64) insn, val);
172 return 1;
173}
174
175static int
176apply_imm60 (struct module *mod, struct insn *insn, uint64_t val)
177{
178 if (slot(insn) != 2) {
179 printk(KERN_ERR "%s: invalid slot number %d for IMM60\n",
180 mod->name, slot(insn));
181 return 0;
182 }
183 if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) {
184 printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val);
185 return 0;
186 }
187 ia64_patch_imm60((u64) insn, val);
188 return 1;
189}
190
191static int
192apply_imm22 (struct module *mod, struct insn *insn, uint64_t val)
193{
194 if (val + (1 << 21) >= (1 << 22)) {
195 printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val);
196 return 0;
197 }
198 ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */
199 | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */
200 | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */
201 | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */));
202 return 1;
203}
204
205static int
206apply_imm21b (struct module *mod, struct insn *insn, uint64_t val)
207{
208 if (val + (1 << 20) >= (1 << 21)) {
209 printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val);
210 return 0;
211 }
212 ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */
213 | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */));
214 return 1;
215}
216
217#if USE_BRL
218
219struct plt_entry {
220 /* Three instruction bundles in PLT. */
221 unsigned char bundle[2][16];
222};
223
224static const struct plt_entry ia64_plt_template = {
225 {
226 {
227 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
228 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */
229 0x00, 0x00, 0x00, 0x60
230 },
231 {
232 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
233 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* brl.many gp=TARGET_GP */
234 0x08, 0x00, 0x00, 0xc0
235 }
236 }
237};
238
239static int
240patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
241{
242 if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp)
243 && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2),
244 (target_ip - (int64_t) plt->bundle[1]) / 16))
245 return 1;
246 return 0;
247}
248
249unsigned long
250plt_target (struct plt_entry *plt)
251{
252 uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1];
253 long off;
254
255 b0 = b[0]; b1 = b[1];
256 off = ( ((b1 & 0x00fffff000000000UL) >> 36) /* imm20b -> bit 0 */
257 | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36) /* imm39 -> bit 20 */
258 | ((b1 & 0x0800000000000000UL) << 0)); /* i -> bit 59 */
259 return (long) plt->bundle[1] + 16*off;
260}
261
262#else /* !USE_BRL */
263
264struct plt_entry {
265 /* Three instruction bundles in PLT. */
266 unsigned char bundle[3][16];
267};
268
269static const struct plt_entry ia64_plt_template = {
270 {
271 {
272 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
273 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* movl r16=TARGET_IP */
274 0x02, 0x00, 0x00, 0x60
275 },
276 {
277 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */
278 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */
279 0x00, 0x00, 0x00, 0x60
280 },
281 {
282 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */
283 0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */
284 0x60, 0x00, 0x80, 0x00 /* br.few b6 */
285 }
286 }
287};
288
289static int
290patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp)
291{
292 if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip)
293 && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp))
294 return 1;
295 return 0;
296}
297
298unsigned long
299plt_target (struct plt_entry *plt)
300{
301 uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0];
302
303 b0 = b[0]; b1 = b[1];
304 return ( ((b1 & 0x000007f000000000) >> 36) /* imm7b -> bit 0 */
305 | ((b1 & 0x07fc000000000000) >> 43) /* imm9d -> bit 7 */
306 | ((b1 & 0x0003e00000000000) >> 29) /* imm5c -> bit 16 */
307 | ((b1 & 0x0000100000000000) >> 23) /* ic -> bit 21 */
308 | ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40) /* imm41 -> bit 22 */
309 | ((b1 & 0x0800000000000000) << 4)); /* i -> bit 63 */
310}
311
312#endif /* !USE_BRL */
313
314void *
315module_alloc (unsigned long size)
316{
317 if (!size)
318 return NULL;
319 return vmalloc(size);
320}
321
322void
323module_free (struct module *mod, void *module_region)
324{
325 if (mod->arch.init_unw_table && module_region == mod->module_init) {
326 unw_remove_unwind_table(mod->arch.init_unw_table);
327 mod->arch.init_unw_table = NULL;
328 }
329 vfree(module_region);
330}
331
332/* Have we already seen one of these relocations? */
333/* FIXME: we could look in other sections, too --RR */
334static int
335duplicate_reloc (const Elf64_Rela *rela, unsigned int num)
336{
337 unsigned int i;
338
339 for (i = 0; i < num; i++) {
340 if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend)
341 return 1;
342 }
343 return 0;
344}
345
346/* Count how many GOT entries we may need */
347static unsigned int
348count_gots (const Elf64_Rela *rela, unsigned int num)
349{
350 unsigned int i, ret = 0;
351
352 /* Sure, this is order(n^2), but it's usually short, and not
353 time critical */
354 for (i = 0; i < num; i++) {
355 switch (ELF64_R_TYPE(rela[i].r_info)) {
356 case R_IA64_LTOFF22:
357 case R_IA64_LTOFF22X:
358 case R_IA64_LTOFF64I:
359 case R_IA64_LTOFF_FPTR22:
360 case R_IA64_LTOFF_FPTR64I:
361 case R_IA64_LTOFF_FPTR32MSB:
362 case R_IA64_LTOFF_FPTR32LSB:
363 case R_IA64_LTOFF_FPTR64MSB:
364 case R_IA64_LTOFF_FPTR64LSB:
365 if (!duplicate_reloc(rela, i))
366 ret++;
367 break;
368 }
369 }
370 return ret;
371}
372
373/* Count how many PLT entries we may need */
374static unsigned int
375count_plts (const Elf64_Rela *rela, unsigned int num)
376{
377 unsigned int i, ret = 0;
378
379 /* Sure, this is order(n^2), but it's usually short, and not
380 time critical */
381 for (i = 0; i < num; i++) {
382 switch (ELF64_R_TYPE(rela[i].r_info)) {
383 case R_IA64_PCREL21B:
384 case R_IA64_PLTOFF22:
385 case R_IA64_PLTOFF64I:
386 case R_IA64_PLTOFF64MSB:
387 case R_IA64_PLTOFF64LSB:
388 case R_IA64_IPLTMSB:
389 case R_IA64_IPLTLSB:
390 if (!duplicate_reloc(rela, i))
391 ret++;
392 break;
393 }
394 }
395 return ret;
396}
397
398/* We need to create an function-descriptors for any internal function
399 which is referenced. */
400static unsigned int
401count_fdescs (const Elf64_Rela *rela, unsigned int num)
402{
403 unsigned int i, ret = 0;
404
405 /* Sure, this is order(n^2), but it's usually short, and not time critical. */
406 for (i = 0; i < num; i++) {
407 switch (ELF64_R_TYPE(rela[i].r_info)) {
408 case R_IA64_FPTR64I:
409 case R_IA64_FPTR32LSB:
410 case R_IA64_FPTR32MSB:
411 case R_IA64_FPTR64LSB:
412 case R_IA64_FPTR64MSB:
413 case R_IA64_LTOFF_FPTR22:
414 case R_IA64_LTOFF_FPTR32LSB:
415 case R_IA64_LTOFF_FPTR32MSB:
416 case R_IA64_LTOFF_FPTR64I:
417 case R_IA64_LTOFF_FPTR64LSB:
418 case R_IA64_LTOFF_FPTR64MSB:
419 case R_IA64_IPLTMSB:
420 case R_IA64_IPLTLSB:
421 /*
422 * Jumps to static functions sometimes go straight to their
423 * offset. Of course, that may not be possible if the jump is
424 * from init -> core or vice. versa, so we need to generate an
425 * FDESC (and PLT etc) for that.
426 */
427 case R_IA64_PCREL21B:
428 if (!duplicate_reloc(rela, i))
429 ret++;
430 break;
431 }
432 }
433 return ret;
434}
435
436int
437module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings,
438 struct module *mod)
439{
440 unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0;
441 Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum;
442
443 /*
444 * To store the PLTs and function-descriptors, we expand the .text section for
445 * core module-code and the .init.text section for initialization code.
446 */
447 for (s = sechdrs; s < sechdrs_end; ++s)
448 if (strcmp(".core.plt", secstrings + s->sh_name) == 0)
449 mod->arch.core_plt = s;
450 else if (strcmp(".init.plt", secstrings + s->sh_name) == 0)
451 mod->arch.init_plt = s;
452 else if (strcmp(".got", secstrings + s->sh_name) == 0)
453 mod->arch.got = s;
454 else if (strcmp(".opd", secstrings + s->sh_name) == 0)
455 mod->arch.opd = s;
456 else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0)
457 mod->arch.unwind = s;
458
459 if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) {
460 printk(KERN_ERR "%s: sections missing\n", mod->name);
461 return -ENOEXEC;
462 }
463
464 /* GOT and PLTs can occur in any relocated section... */
465 for (s = sechdrs + 1; s < sechdrs_end; ++s) {
466 const Elf64_Rela *rels = (void *)ehdr + s->sh_offset;
467 unsigned long numrels = s->sh_size/sizeof(Elf64_Rela);
468
469 if (s->sh_type != SHT_RELA)
470 continue;
471
472 gots += count_gots(rels, numrels);
473 fdescs += count_fdescs(rels, numrels);
474 if (strstr(secstrings + s->sh_name, ".init"))
475 init_plts += count_plts(rels, numrels);
476 else
477 core_plts += count_plts(rels, numrels);
478 }
479
480 mod->arch.core_plt->sh_type = SHT_NOBITS;
481 mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
482 mod->arch.core_plt->sh_addralign = 16;
483 mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry);
484 mod->arch.init_plt->sh_type = SHT_NOBITS;
485 mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
486 mod->arch.init_plt->sh_addralign = 16;
487 mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry);
488 mod->arch.got->sh_type = SHT_NOBITS;
489 mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC;
490 mod->arch.got->sh_addralign = 8;
491 mod->arch.got->sh_size = gots * sizeof(struct got_entry);
492 mod->arch.opd->sh_type = SHT_NOBITS;
493 mod->arch.opd->sh_flags = SHF_ALLOC;
494 mod->arch.opd->sh_addralign = 8;
495 mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc);
496 DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n",
497 __FUNCTION__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size,
498 mod->arch.got->sh_size, mod->arch.opd->sh_size);
499 return 0;
500}
501
502static inline int
503in_init (const struct module *mod, uint64_t addr)
504{
505 return addr - (uint64_t) mod->module_init < mod->init_size;
506}
507
508static inline int
509in_core (const struct module *mod, uint64_t addr)
510{
511 return addr - (uint64_t) mod->module_core < mod->core_size;
512}
513
514static inline int
515is_internal (const struct module *mod, uint64_t value)
516{
517 return in_init(mod, value) || in_core(mod, value);
518}
519
520/*
521 * Get gp-relative offset for the linkage-table entry of VALUE.
522 */
523static uint64_t
524get_ltoff (struct module *mod, uint64_t value, int *okp)
525{
526 struct got_entry *got, *e;
527
528 if (!*okp)
529 return 0;
530
531 got = (void *) mod->arch.got->sh_addr;
532 for (e = got; e < got + mod->arch.next_got_entry; ++e)
533 if (e->val == value)
534 goto found;
535
536 /* Not enough GOT entries? */
537 if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size))
538 BUG();
539
540 e->val = value;
541 ++mod->arch.next_got_entry;
542 found:
543 return (uint64_t) e - mod->arch.gp;
544}
545
546static inline int
547gp_addressable (struct module *mod, uint64_t value)
548{
549 return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF;
550}
551
552/* Get PC-relative PLT entry for this value. Returns 0 on failure. */
553static uint64_t
554get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp)
555{
556 struct plt_entry *plt, *plt_end;
557 uint64_t target_ip, target_gp;
558
559 if (!*okp)
560 return 0;
561
562 if (in_init(mod, (uint64_t) insn)) {
563 plt = (void *) mod->arch.init_plt->sh_addr;
564 plt_end = (void *) plt + mod->arch.init_plt->sh_size;
565 } else {
566 plt = (void *) mod->arch.core_plt->sh_addr;
567 plt_end = (void *) plt + mod->arch.core_plt->sh_size;
568 }
569
570 /* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */
571 target_ip = ((uint64_t *) value)[0];
572 target_gp = ((uint64_t *) value)[1];
573
574 /* Look for existing PLT entry. */
575 while (plt->bundle[0][0]) {
576 if (plt_target(plt) == target_ip)
577 goto found;
578 if (++plt >= plt_end)
579 BUG();
580 }
581 *plt = ia64_plt_template;
582 if (!patch_plt(mod, plt, target_ip, target_gp)) {
583 *okp = 0;
584 return 0;
585 }
586#if ARCH_MODULE_DEBUG
587 if (plt_target(plt) != target_ip) {
588 printk("%s: mistargeted PLT: wanted %lx, got %lx\n",
589 __FUNCTION__, target_ip, plt_target(plt));
590 *okp = 0;
591 return 0;
592 }
593#endif
594 found:
595 return (uint64_t) plt;
596}
597
598/* Get function descriptor for VALUE. */
599static uint64_t
600get_fdesc (struct module *mod, uint64_t value, int *okp)
601{
602 struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr;
603
604 if (!*okp)
605 return 0;
606
607 if (!value) {
608 printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name);
609 return 0;
610 }
611
612 if (!is_internal(mod, value))
613 /*
614 * If it's not a module-local entry-point, "value" already points to a
615 * function-descriptor.
616 */
617 return value;
618
619 /* Look for existing function descriptor. */
620 while (fdesc->ip) {
621 if (fdesc->ip == value)
622 return (uint64_t)fdesc;
623 if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size)
624 BUG();
625 }
626
627 /* Create new one */
628 fdesc->ip = value;
629 fdesc->gp = mod->arch.gp;
630 return (uint64_t) fdesc;
631}
632
633static inline int
634do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend,
635 Elf64_Shdr *sec, void *location)
636{
637 enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK;
638 enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK;
639 uint64_t val;
640 int ok = 1;
641
642 val = sym->st_value + addend;
643
644 switch (formula) {
645 case RV_SEGREL: /* segment base is arbitrarily chosen to be 0 for kernel modules */
646 case RV_DIRECT:
647 break;
648
649 case RV_GPREL: val -= mod->arch.gp; break;
650 case RV_LTREL: val = get_ltoff(mod, val, &ok); break;
651 case RV_PLTREL: val = get_plt(mod, location, val, &ok); break;
652 case RV_FPTR: val = get_fdesc(mod, val, &ok); break;
653 case RV_SECREL: val -= sec->sh_addr; break;
654 case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break;
655
656 case RV_PCREL:
657 switch (r_type) {
658 case R_IA64_PCREL21B:
659 if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) ||
660 (in_core(mod, val) && in_init(mod, (uint64_t)location))) {
661 /*
662 * Init section may have been allocated far away from core,
663 * if the branch won't reach, then allocate a plt for it.
664 */
665 uint64_t delta = ((int64_t)val - (int64_t)location) / 16;
666 if (delta + (1 << 20) >= (1 << 21)) {
667 val = get_fdesc(mod, val, &ok);
668 val = get_plt(mod, location, val, &ok);
669 }
670 } else if (!is_internal(mod, val))
671 val = get_plt(mod, location, val, &ok);
672 /* FALL THROUGH */
673 default:
674 val -= bundle(location);
675 break;
676
677 case R_IA64_PCREL32MSB:
678 case R_IA64_PCREL32LSB:
679 case R_IA64_PCREL64MSB:
680 case R_IA64_PCREL64LSB:
681 val -= (uint64_t) location;
682 break;
683
684 }
685 switch (r_type) {
686 case R_IA64_PCREL60B: format = RF_INSN60; break;
687 case R_IA64_PCREL21B: format = RF_INSN21B; break;
688 case R_IA64_PCREL21M: format = RF_INSN21M; break;
689 case R_IA64_PCREL21F: format = RF_INSN21F; break;
690 default: break;
691 }
692 break;
693
694 case RV_BDREL:
695 val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core);
696 break;
697
698 case RV_LTV:
699 /* can link-time value relocs happen here? */
700 BUG();
701 break;
702
703 case RV_PCREL2:
704 if (r_type == R_IA64_PCREL21BI) {
705 if (!is_internal(mod, val)) {
706 printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n",
707 __FUNCTION__, reloc_name[r_type], val);
708 return -ENOEXEC;
709 }
710 format = RF_INSN21B;
711 }
712 val -= bundle(location);
713 break;
714
715 case RV_SPECIAL:
716 switch (r_type) {
717 case R_IA64_IPLTMSB:
718 case R_IA64_IPLTLSB:
719 val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok);
720 format = RF_64LSB;
721 if (r_type == R_IA64_IPLTMSB)
722 format = RF_64MSB;
723 break;
724
725 case R_IA64_SUB:
726 val = addend - sym->st_value;
727 format = RF_INSN64;
728 break;
729
730 case R_IA64_LTOFF22X:
731 if (gp_addressable(mod, val))
732 val -= mod->arch.gp;
733 else
734 val = get_ltoff(mod, val, &ok);
735 format = RF_INSN22;
736 break;
737
738 case R_IA64_LDXMOV:
739 if (gp_addressable(mod, val)) {
740 /* turn "ld8" into "mov": */
741 DEBUGP("%s: patching ld8 at %p to mov\n", __FUNCTION__, location);
742 ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL);
743 }
744 return 0;
745
746 default:
747 if (reloc_name[r_type])
748 printk(KERN_ERR "%s: special reloc %s not supported",
749 mod->name, reloc_name[r_type]);
750 else
751 printk(KERN_ERR "%s: unknown special reloc %x\n",
752 mod->name, r_type);
753 return -ENOEXEC;
754 }
755 break;
756
757 case RV_TPREL:
758 case RV_LTREL_TPREL:
759 case RV_DTPMOD:
760 case RV_LTREL_DTPMOD:
761 case RV_DTPREL:
762 case RV_LTREL_DTPREL:
763 printk(KERN_ERR "%s: %s reloc not supported\n",
764 mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?");
765 return -ENOEXEC;
766
767 default:
768 printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type);
769 return -ENOEXEC;
770 }
771
772 if (!ok)
773 return -ENOEXEC;
774
775 DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __FUNCTION__, location, val,
776 reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend);
777
778 switch (format) {
779 case RF_INSN21B: ok = apply_imm21b(mod, location, (int64_t) val / 16); break;
780 case RF_INSN22: ok = apply_imm22(mod, location, val); break;
781 case RF_INSN64: ok = apply_imm64(mod, location, val); break;
782 case RF_INSN60: ok = apply_imm60(mod, location, (int64_t) val / 16); break;
783 case RF_32LSB: put_unaligned(val, (uint32_t *) location); break;
784 case RF_64LSB: put_unaligned(val, (uint64_t *) location); break;
785 case RF_32MSB: /* ia64 Linux is little-endian... */
786 case RF_64MSB: /* ia64 Linux is little-endian... */
787 case RF_INSN14: /* must be within-module, i.e., resolved by "ld -r" */
788 case RF_INSN21M: /* must be within-module, i.e., resolved by "ld -r" */
789 case RF_INSN21F: /* must be within-module, i.e., resolved by "ld -r" */
790 printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n",
791 mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?");
792 return -ENOEXEC;
793
794 default:
795 printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n",
796 mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format);
797 return -ENOEXEC;
798 }
799 return ok ? 0 : -ENOEXEC;
800}
801
802int
803apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
804 unsigned int relsec, struct module *mod)
805{
806 unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela);
807 Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr;
808 Elf64_Shdr *target_sec;
809 int ret;
810
811 DEBUGP("%s: applying section %u (%u relocs) to %u\n", __FUNCTION__,
812 relsec, n, sechdrs[relsec].sh_info);
813
814 target_sec = sechdrs + sechdrs[relsec].sh_info;
815
816 if (target_sec->sh_entsize == ~0UL)
817 /*
818 * If target section wasn't allocated, we don't need to relocate it.
819 * Happens, e.g., for debug sections.
820 */
821 return 0;
822
823 if (!mod->arch.gp) {
824 /*
825 * XXX Should have an arch-hook for running this after final section
826 * addresses have been selected...
827 */
828 /* See if gp can cover the entire core module: */
829 uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2;
830 if (mod->core_size >= MAX_LTOFF)
831 /*
832 * This takes advantage of fact that SHF_ARCH_SMALL gets allocated
833 * at the end of the module.
834 */
835 gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2;
836 mod->arch.gp = gp;
837 DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp);
838 }
839
840 for (i = 0; i < n; i++) {
841 ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info),
842 ((Elf64_Sym *) sechdrs[symindex].sh_addr
843 + ELF64_R_SYM(rela[i].r_info)),
844 rela[i].r_addend, target_sec,
845 (void *) target_sec->sh_addr + rela[i].r_offset);
846 if (ret < 0)
847 return ret;
848 }
849 return 0;
850}
851
852int
853apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex,
854 unsigned int relsec, struct module *mod)
855{
856 printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec);
857 return -ENOEXEC;
858}
859
860/*
861 * Modules contain a single unwind table which covers both the core and the init text
862 * sections but since the two are not contiguous, we need to split this table up such that
863 * we can register (and unregister) each "segment" seperately. Fortunately, this sounds
864 * more complicated than it really is.
865 */
866static void
867register_unwind_table (struct module *mod)
868{
869 struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr;
870 struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start);
871 struct unw_table_entry tmp, *e1, *e2, *core, *init;
872 unsigned long num_init = 0, num_core = 0;
873
874 /* First, count how many init and core unwind-table entries there are. */
875 for (e1 = start; e1 < end; ++e1)
876 if (in_init(mod, e1->start_offset))
877 ++num_init;
878 else
879 ++num_core;
880 /*
881 * Second, sort the table such that all unwind-table entries for the init and core
882 * text sections are nicely separated. We do this with a stupid bubble sort
883 * (unwind tables don't get ridiculously huge).
884 */
885 for (e1 = start; e1 < end; ++e1) {
886 for (e2 = e1 + 1; e2 < end; ++e2) {
887 if (e2->start_offset < e1->start_offset) {
888 tmp = *e1;
889 *e1 = *e2;
890 *e2 = tmp;
891 }
892 }
893 }
894 /*
895 * Third, locate the init and core segments in the unwind table:
896 */
897 if (in_init(mod, start->start_offset)) {
898 init = start;
899 core = start + num_init;
900 } else {
901 core = start;
902 init = start + num_core;
903 }
904
905 DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __FUNCTION__,
906 mod->name, mod->arch.gp, num_init, num_core);
907
908 /*
909 * Fourth, register both tables (if not empty).
910 */
911 if (num_core > 0) {
912 mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
913 core, core + num_core);
914 DEBUGP("%s: core: handle=%p [%p-%p)\n", __FUNCTION__,
915 mod->arch.core_unw_table, core, core + num_core);
916 }
917 if (num_init > 0) {
918 mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp,
919 init, init + num_init);
920 DEBUGP("%s: init: handle=%p [%p-%p)\n", __FUNCTION__,
921 mod->arch.init_unw_table, init, init + num_init);
922 }
923}
924
925int
926module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod)
927{
928 DEBUGP("%s: init: entry=%p\n", __FUNCTION__, mod->init);
929 if (mod->arch.unwind)
930 register_unwind_table(mod);
931 return 0;
932}
933
934void
935module_arch_cleanup (struct module *mod)
936{
937 if (mod->arch.init_unw_table)
938 unw_remove_unwind_table(mod->arch.init_unw_table);
939 if (mod->arch.core_unw_table)
940 unw_remove_unwind_table(mod->arch.core_unw_table);
941}
942
943#ifdef CONFIG_SMP
944void
945percpu_modcopy (void *pcpudst, const void *src, unsigned long size)
946{
947 unsigned int i;
948 for (i = 0; i < NR_CPUS; i++)
949 if (cpu_possible(i))
950 memcpy(pcpudst + __per_cpu_offset[i], src, size);
951}
952#endif /* CONFIG_SMP */
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
new file mode 100644
index 000000000000..5018c7f2e7a8
--- /dev/null
+++ b/arch/ia64/kernel/pal.S
@@ -0,0 +1,302 @@
1/*
2 * PAL Firmware support
3 * IA-64 Processor Programmers Reference Vol 2
4 *
5 * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
6 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
7 * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co
8 * David Mosberger <davidm@hpl.hp.com>
9 * Stephane Eranian <eranian@hpl.hp.com>
10 *
11 * 05/22/2000 eranian Added support for stacked register calls
12 * 05/24/2000 eranian Added support for physical mode static calls
13 */
14
15#include <asm/asmmacro.h>
16#include <asm/processor.h>
17
18 .data
19pal_entry_point:
20 data8 ia64_pal_default_handler
21 .text
22
23/*
24 * Set the PAL entry point address. This could be written in C code, but we do it here
25 * to keep it all in one module (besides, it's so trivial that it's
26 * not a big deal).
27 *
28 * in0 Address of the PAL entry point (text address, NOT a function descriptor).
29 */
30GLOBAL_ENTRY(ia64_pal_handler_init)
31 alloc r3=ar.pfs,1,0,0,0
32 movl r2=pal_entry_point
33 ;;
34 st8 [r2]=in0
35 br.ret.sptk.many rp
36END(ia64_pal_handler_init)
37
38/*
39 * Default PAL call handler. This needs to be coded in assembly because it uses
40 * the static calling convention, i.e., the RSE may not be used and calls are
41 * done via "br.cond" (not "br.call").
42 */
43GLOBAL_ENTRY(ia64_pal_default_handler)
44 mov r8=-1
45 br.cond.sptk.many rp
46END(ia64_pal_default_handler)
47
48/*
49 * Make a PAL call using the static calling convention.
50 *
51 * in0 Index of PAL service
52 * in1 - in3 Remaining PAL arguments
53 * in4 1 ==> clear psr.ic, 0 ==> don't clear psr.ic
54 *
55 */
56GLOBAL_ENTRY(ia64_pal_call_static)
57 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
58 alloc loc1 = ar.pfs,5,5,0,0
59 movl loc2 = pal_entry_point
601: {
61 mov r28 = in0
62 mov r29 = in1
63 mov r8 = ip
64 }
65 ;;
66 ld8 loc2 = [loc2] // loc2 <- entry point
67 tbit.nz p6,p7 = in4, 0
68 adds r8 = 1f-1b,r8
69 mov loc4=ar.rsc // save RSE configuration
70 ;;
71 mov ar.rsc=0 // put RSE in enforced lazy, LE mode
72 mov loc3 = psr
73 mov loc0 = rp
74 .body
75 mov r30 = in2
76
77(p6) rsm psr.i | psr.ic
78 mov r31 = in3
79 mov b7 = loc2
80
81(p7) rsm psr.i
82 ;;
83(p6) srlz.i
84 mov rp = r8
85 br.cond.sptk.many b7
861: mov psr.l = loc3
87 mov ar.rsc = loc4 // restore RSE configuration
88 mov ar.pfs = loc1
89 mov rp = loc0
90 ;;
91 srlz.d // seralize restoration of psr.l
92 br.ret.sptk.many b0
93END(ia64_pal_call_static)
94
95/*
96 * Make a PAL call using the stacked registers calling convention.
97 *
98 * Inputs:
99 * in0 Index of PAL service
100 * in2 - in3 Remaning PAL arguments
101 */
102GLOBAL_ENTRY(ia64_pal_call_stacked)
103 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
104 alloc loc1 = ar.pfs,4,4,4,0
105 movl loc2 = pal_entry_point
106
107 mov r28 = in0 // Index MUST be copied to r28
108 mov out0 = in0 // AND in0 of PAL function
109 mov loc0 = rp
110 .body
111 ;;
112 ld8 loc2 = [loc2] // loc2 <- entry point
113 mov out1 = in1
114 mov out2 = in2
115 mov out3 = in3
116 mov loc3 = psr
117 ;;
118 rsm psr.i
119 mov b7 = loc2
120 ;;
121 br.call.sptk.many rp=b7 // now make the call
122.ret0: mov psr.l = loc3
123 mov ar.pfs = loc1
124 mov rp = loc0
125 ;;
126 srlz.d // serialize restoration of psr.l
127 br.ret.sptk.many b0
128END(ia64_pal_call_stacked)
129
130/*
131 * Make a physical mode PAL call using the static registers calling convention.
132 *
133 * Inputs:
134 * in0 Index of PAL service
135 * in2 - in3 Remaning PAL arguments
136 *
137 * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel.
138 * So we don't need to clear them.
139 */
140#define PAL_PSR_BITS_TO_CLEAR \
141 (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT | \
142 IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \
143 IA64_PSR_DFL | IA64_PSR_DFH)
144
145#define PAL_PSR_BITS_TO_SET \
146 (IA64_PSR_BN)
147
148
149GLOBAL_ENTRY(ia64_pal_call_phys_static)
150 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
151 alloc loc1 = ar.pfs,4,7,0,0
152 movl loc2 = pal_entry_point
1531: {
154 mov r28 = in0 // copy procedure index
155 mov r8 = ip // save ip to compute branch
156 mov loc0 = rp // save rp
157 }
158 .body
159 ;;
160 ld8 loc2 = [loc2] // loc2 <- entry point
161 mov r29 = in1 // first argument
162 mov r30 = in2 // copy arg2
163 mov r31 = in3 // copy arg3
164 ;;
165 mov loc3 = psr // save psr
166 adds r8 = 1f-1b,r8 // calculate return address for call
167 ;;
168 mov loc4=ar.rsc // save RSE configuration
169 dep.z loc2=loc2,0,61 // convert pal entry point to physical
170 tpa r8=r8 // convert rp to physical
171 ;;
172 mov b7 = loc2 // install target to branch reg
173 mov ar.rsc=0 // put RSE in enforced lazy, LE mode
174 movl r16=PAL_PSR_BITS_TO_CLEAR
175 movl r17=PAL_PSR_BITS_TO_SET
176 ;;
177 or loc3=loc3,r17 // add in psr the bits to set
178 ;;
179 andcm r16=loc3,r16 // removes bits to clear from psr
180 br.call.sptk.many rp=ia64_switch_mode_phys
181.ret1: mov rp = r8 // install return address (physical)
182 mov loc5 = r19
183 mov loc6 = r20
184 br.cond.sptk.many b7
1851:
186 mov ar.rsc=0 // put RSE in enforced lazy, LE mode
187 mov r16=loc3 // r16= original psr
188 mov r19=loc5
189 mov r20=loc6
190 br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
191.ret2:
192 mov psr.l = loc3 // restore init PSR
193
194 mov ar.pfs = loc1
195 mov rp = loc0
196 ;;
197 mov ar.rsc=loc4 // restore RSE configuration
198 srlz.d // seralize restoration of psr.l
199 br.ret.sptk.many b0
200END(ia64_pal_call_phys_static)
201
202/*
203 * Make a PAL call using the stacked registers in physical mode.
204 *
205 * Inputs:
206 * in0 Index of PAL service
207 * in2 - in3 Remaning PAL arguments
208 */
209GLOBAL_ENTRY(ia64_pal_call_phys_stacked)
210 .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
211 alloc loc1 = ar.pfs,5,7,4,0
212 movl loc2 = pal_entry_point
2131: {
214 mov r28 = in0 // copy procedure index
215 mov loc0 = rp // save rp
216 }
217 .body
218 ;;
219 ld8 loc2 = [loc2] // loc2 <- entry point
220 mov out0 = in0 // first argument
221 mov out1 = in1 // copy arg2
222 mov out2 = in2 // copy arg3
223 mov out3 = in3 // copy arg3
224 ;;
225 mov loc3 = psr // save psr
226 ;;
227 mov loc4=ar.rsc // save RSE configuration
228 dep.z loc2=loc2,0,61 // convert pal entry point to physical
229 ;;
230 mov ar.rsc=0 // put RSE in enforced lazy, LE mode
231 movl r16=PAL_PSR_BITS_TO_CLEAR
232 movl r17=PAL_PSR_BITS_TO_SET
233 ;;
234 or loc3=loc3,r17 // add in psr the bits to set
235 mov b7 = loc2 // install target to branch reg
236 ;;
237 andcm r16=loc3,r16 // removes bits to clear from psr
238 br.call.sptk.many rp=ia64_switch_mode_phys
239.ret6:
240 mov loc5 = r19
241 mov loc6 = r20
242 br.call.sptk.many rp=b7 // now make the call
243.ret7:
244 mov ar.rsc=0 // put RSE in enforced lazy, LE mode
245 mov r16=loc3 // r16= original psr
246 mov r19=loc5
247 mov r20=loc6
248 br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode
249
250.ret8: mov psr.l = loc3 // restore init PSR
251 mov ar.pfs = loc1
252 mov rp = loc0
253 ;;
254 mov ar.rsc=loc4 // restore RSE configuration
255 srlz.d // seralize restoration of psr.l
256 br.ret.sptk.many b0
257END(ia64_pal_call_phys_stacked)
258
259/*
260 * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15).
261 *
262 * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch
263 * regs fp-low partition.
264 *
265 * Inputs:
266 * in0 Address of stack storage for fp regs
267 */
268GLOBAL_ENTRY(ia64_save_scratch_fpregs)
269 alloc r3=ar.pfs,1,0,0,0
270 add r2=16,in0
271 ;;
272 stf.spill [in0] = f10,32
273 stf.spill [r2] = f11,32
274 ;;
275 stf.spill [in0] = f12,32
276 stf.spill [r2] = f13,32
277 ;;
278 stf.spill [in0] = f14,32
279 stf.spill [r2] = f15,32
280 br.ret.sptk.many rp
281END(ia64_save_scratch_fpregs)
282
283/*
284 * Load scratch fp scratch regs (fp10-fp15)
285 *
286 * Inputs:
287 * in0 Address of stack storage for fp regs
288 */
289GLOBAL_ENTRY(ia64_load_scratch_fpregs)
290 alloc r3=ar.pfs,1,0,0,0
291 add r2=16,in0
292 ;;
293 ldf.fill f10 = [in0],32
294 ldf.fill f11 = [r2],32
295 ;;
296 ldf.fill f12 = [in0],32
297 ldf.fill f13 = [r2],32
298 ;;
299 ldf.fill f14 = [in0],32
300 ldf.fill f15 = [r2],32
301 br.ret.sptk.many rp
302END(ia64_load_scratch_fpregs)
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c
new file mode 100644
index 000000000000..25e7c8344564
--- /dev/null
+++ b/arch/ia64/kernel/palinfo.c
@@ -0,0 +1,1023 @@
1/*
2 * palinfo.c
3 *
4 * Prints processor specific information reported by PAL.
5 * This code is based on specification of PAL as of the
6 * Intel IA-64 Architecture Software Developer's Manual v1.0.
7 *
8 *
9 * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co
10 * Stephane Eranian <eranian@hpl.hp.com>
11 * Copyright (C) 2004 Intel Corporation
12 * Ashok Raj <ashok.raj@intel.com>
13 *
14 * 05/26/2000 S.Eranian initial release
15 * 08/21/2000 S.Eranian updated to July 2000 PAL specs
16 * 02/05/2001 S.Eranian fixed module support
17 * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes
18 * 03/24/2004 Ashok Raj updated to work with CPU Hotplug
19 */
20#include <linux/config.h>
21#include <linux/types.h>
22#include <linux/errno.h>
23#include <linux/init.h>
24#include <linux/proc_fs.h>
25#include <linux/mm.h>
26#include <linux/module.h>
27#include <linux/efi.h>
28#include <linux/notifier.h>
29#include <linux/cpu.h>
30#include <linux/cpumask.h>
31
32#include <asm/pal.h>
33#include <asm/sal.h>
34#include <asm/page.h>
35#include <asm/processor.h>
36#include <linux/smp.h>
37
38MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
39MODULE_DESCRIPTION("/proc interface to IA-64 PAL");
40MODULE_LICENSE("GPL");
41
42#define PALINFO_VERSION "0.5"
43
44typedef int (*palinfo_func_t)(char*);
45
46typedef struct {
47 const char *name; /* name of the proc entry */
48 palinfo_func_t proc_read; /* function to call for reading */
49 struct proc_dir_entry *entry; /* registered entry (removal) */
50} palinfo_entry_t;
51
52
53/*
54 * A bunch of string array to get pretty printing
55 */
56
57static char *cache_types[] = {
58 "", /* not used */
59 "Instruction",
60 "Data",
61 "Data/Instruction" /* unified */
62};
63
64static const char *cache_mattrib[]={
65 "WriteThrough",
66 "WriteBack",
67 "", /* reserved */
68 "" /* reserved */
69};
70
71static const char *cache_st_hints[]={
72 "Temporal, level 1",
73 "Reserved",
74 "Reserved",
75 "Non-temporal, all levels",
76 "Reserved",
77 "Reserved",
78 "Reserved",
79 "Reserved"
80};
81
82static const char *cache_ld_hints[]={
83 "Temporal, level 1",
84 "Non-temporal, level 1",
85 "Reserved",
86 "Non-temporal, all levels",
87 "Reserved",
88 "Reserved",
89 "Reserved",
90 "Reserved"
91};
92
93static const char *rse_hints[]={
94 "enforced lazy",
95 "eager stores",
96 "eager loads",
97 "eager loads and stores"
98};
99
100#define RSE_HINTS_COUNT ARRAY_SIZE(rse_hints)
101
102static const char *mem_attrib[]={
103 "WB", /* 000 */
104 "SW", /* 001 */
105 "010", /* 010 */
106 "011", /* 011 */
107 "UC", /* 100 */
108 "UCE", /* 101 */
109 "WC", /* 110 */
110 "NaTPage" /* 111 */
111};
112
113/*
114 * Take a 64bit vector and produces a string such that
115 * if bit n is set then 2^n in clear text is generated. The adjustment
116 * to the right unit is also done.
117 *
118 * Input:
119 * - a pointer to a buffer to hold the string
120 * - a 64-bit vector
121 * Ouput:
122 * - a pointer to the end of the buffer
123 *
124 */
125static char *
126bitvector_process(char *p, u64 vector)
127{
128 int i,j;
129 const char *units[]={ "", "K", "M", "G", "T" };
130
131 for (i=0, j=0; i < 64; i++ , j=i/10) {
132 if (vector & 0x1) {
133 p += sprintf(p, "%d%s ", 1 << (i-j*10), units[j]);
134 }
135 vector >>= 1;
136 }
137 return p;
138}
139
140/*
141 * Take a 64bit vector and produces a string such that
142 * if bit n is set then register n is present. The function
143 * takes into account consecutive registers and prints out ranges.
144 *
145 * Input:
146 * - a pointer to a buffer to hold the string
147 * - a 64-bit vector
148 * Ouput:
149 * - a pointer to the end of the buffer
150 *
151 */
152static char *
153bitregister_process(char *p, u64 *reg_info, int max)
154{
155 int i, begin, skip = 0;
156 u64 value = reg_info[0];
157
158 value >>= i = begin = ffs(value) - 1;
159
160 for(; i < max; i++ ) {
161
162 if (i != 0 && (i%64) == 0) value = *++reg_info;
163
164 if ((value & 0x1) == 0 && skip == 0) {
165 if (begin <= i - 2)
166 p += sprintf(p, "%d-%d ", begin, i-1);
167 else
168 p += sprintf(p, "%d ", i-1);
169 skip = 1;
170 begin = -1;
171 } else if ((value & 0x1) && skip == 1) {
172 skip = 0;
173 begin = i;
174 }
175 value >>=1;
176 }
177 if (begin > -1) {
178 if (begin < 127)
179 p += sprintf(p, "%d-127", begin);
180 else
181 p += sprintf(p, "127");
182 }
183
184 return p;
185}
186
187static int
188power_info(char *page)
189{
190 s64 status;
191 char *p = page;
192 u64 halt_info_buffer[8];
193 pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer;
194 int i;
195
196 status = ia64_pal_halt_info(halt_info);
197 if (status != 0) return 0;
198
199 for (i=0; i < 8 ; i++ ) {
200 if (halt_info[i].pal_power_mgmt_info_s.im == 1) {
201 p += sprintf(p, "Power level %d:\n"
202 "\tentry_latency : %d cycles\n"
203 "\texit_latency : %d cycles\n"
204 "\tpower consumption : %d mW\n"
205 "\tCache+TLB coherency : %s\n", i,
206 halt_info[i].pal_power_mgmt_info_s.entry_latency,
207 halt_info[i].pal_power_mgmt_info_s.exit_latency,
208 halt_info[i].pal_power_mgmt_info_s.power_consumption,
209 halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No");
210 } else {
211 p += sprintf(p,"Power level %d: not implemented\n",i);
212 }
213 }
214 return p - page;
215}
216
217static int
218cache_info(char *page)
219{
220 char *p = page;
221 u64 i, levels, unique_caches;
222 pal_cache_config_info_t cci;
223 int j, k;
224 s64 status;
225
226 if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) {
227 printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status);
228 return 0;
229 }
230
231 p += sprintf(p, "Cache levels : %ld\nUnique caches : %ld\n\n", levels, unique_caches);
232
233 for (i=0; i < levels; i++) {
234
235 for (j=2; j >0 ; j--) {
236
237 /* even without unification some level may not be present */
238 if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) {
239 continue;
240 }
241 p += sprintf(p,
242 "%s Cache level %lu:\n"
243 "\tSize : %lu bytes\n"
244 "\tAttributes : ",
245 cache_types[j+cci.pcci_unified], i+1,
246 cci.pcci_cache_size);
247
248 if (cci.pcci_unified) p += sprintf(p, "Unified ");
249
250 p += sprintf(p, "%s\n", cache_mattrib[cci.pcci_cache_attr]);
251
252 p += sprintf(p,
253 "\tAssociativity : %d\n"
254 "\tLine size : %d bytes\n"
255 "\tStride : %d bytes\n",
256 cci.pcci_assoc, 1<<cci.pcci_line_size, 1<<cci.pcci_stride);
257 if (j == 1)
258 p += sprintf(p, "\tStore latency : N/A\n");
259 else
260 p += sprintf(p, "\tStore latency : %d cycle(s)\n",
261 cci.pcci_st_latency);
262
263 p += sprintf(p,
264 "\tLoad latency : %d cycle(s)\n"
265 "\tStore hints : ", cci.pcci_ld_latency);
266
267 for(k=0; k < 8; k++ ) {
268 if ( cci.pcci_st_hints & 0x1)
269 p += sprintf(p, "[%s]", cache_st_hints[k]);
270 cci.pcci_st_hints >>=1;
271 }
272 p += sprintf(p, "\n\tLoad hints : ");
273
274 for(k=0; k < 8; k++ ) {
275 if (cci.pcci_ld_hints & 0x1)
276 p += sprintf(p, "[%s]", cache_ld_hints[k]);
277 cci.pcci_ld_hints >>=1;
278 }
279 p += sprintf(p,
280 "\n\tAlias boundary : %d byte(s)\n"
281 "\tTag LSB : %d\n"
282 "\tTag MSB : %d\n",
283 1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb,
284 cci.pcci_tag_msb);
285
286 /* when unified, data(j=2) is enough */
287 if (cci.pcci_unified) break;
288 }
289 }
290 return p - page;
291}
292
293
294static int
295vm_info(char *page)
296{
297 char *p = page;
298 u64 tr_pages =0, vw_pages=0, tc_pages;
299 u64 attrib;
300 pal_vm_info_1_u_t vm_info_1;
301 pal_vm_info_2_u_t vm_info_2;
302 pal_tc_info_u_t tc_info;
303 ia64_ptce_info_t ptce;
304 const char *sep;
305 int i, j;
306 s64 status;
307
308 if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
309 printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
310 return 0;
311 }
312
313
314 p += sprintf(p,
315 "Physical Address Space : %d bits\n"
316 "Virtual Address Space : %d bits\n"
317 "Protection Key Registers(PKR) : %d\n"
318 "Implemented bits in PKR.key : %d\n"
319 "Hash Tag ID : 0x%x\n"
320 "Size of RR.rid : %d\n",
321 vm_info_1.pal_vm_info_1_s.phys_add_size,
322 vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1,
323 vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id,
324 vm_info_2.pal_vm_info_2_s.rid_size);
325
326 if (ia64_pal_mem_attrib(&attrib) != 0)
327 return 0;
328
329 p += sprintf(p, "Supported memory attributes : ");
330 sep = "";
331 for (i = 0; i < 8; i++) {
332 if (attrib & (1 << i)) {
333 p += sprintf(p, "%s%s", sep, mem_attrib[i]);
334 sep = ", ";
335 }
336 }
337 p += sprintf(p, "\n");
338
339 if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) {
340 printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status);
341 return 0;
342 }
343
344 p += sprintf(p,
345 "\nTLB walker : %simplemented\n"
346 "Number of DTR : %d\n"
347 "Number of ITR : %d\n"
348 "TLB insertable page sizes : ",
349 vm_info_1.pal_vm_info_1_s.vw ? "" : "not ",
350 vm_info_1.pal_vm_info_1_s.max_dtr_entry+1,
351 vm_info_1.pal_vm_info_1_s.max_itr_entry+1);
352
353
354 p = bitvector_process(p, tr_pages);
355
356 p += sprintf(p, "\nTLB purgeable page sizes : ");
357
358 p = bitvector_process(p, vw_pages);
359
360 if ((status=ia64_get_ptce(&ptce)) != 0) {
361 printk(KERN_ERR "ia64_get_ptce=%ld\n", status);
362 return 0;
363 }
364
365 p += sprintf(p,
366 "\nPurge base address : 0x%016lx\n"
367 "Purge outer loop count : %d\n"
368 "Purge inner loop count : %d\n"
369 "Purge outer loop stride : %d\n"
370 "Purge inner loop stride : %d\n",
371 ptce.base, ptce.count[0], ptce.count[1], ptce.stride[0], ptce.stride[1]);
372
373 p += sprintf(p,
374 "TC Levels : %d\n"
375 "Unique TC(s) : %d\n",
376 vm_info_1.pal_vm_info_1_s.num_tc_levels,
377 vm_info_1.pal_vm_info_1_s.max_unique_tcs);
378
379 for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) {
380 for (j=2; j>0 ; j--) {
381 tc_pages = 0; /* just in case */
382
383
384 /* even without unification, some levels may not be present */
385 if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) {
386 continue;
387 }
388
389 p += sprintf(p,
390 "\n%s Translation Cache Level %d:\n"
391 "\tHash sets : %d\n"
392 "\tAssociativity : %d\n"
393 "\tNumber of entries : %d\n"
394 "\tFlags : ",
395 cache_types[j+tc_info.tc_unified], i+1, tc_info.tc_num_sets,
396 tc_info.tc_associativity, tc_info.tc_num_entries);
397
398 if (tc_info.tc_pf) p += sprintf(p, "PreferredPageSizeOptimized ");
399 if (tc_info.tc_unified) p += sprintf(p, "Unified ");
400 if (tc_info.tc_reduce_tr) p += sprintf(p, "TCReduction");
401
402 p += sprintf(p, "\n\tSupported page sizes: ");
403
404 p = bitvector_process(p, tc_pages);
405
406 /* when unified date (j=2) is enough */
407 if (tc_info.tc_unified) break;
408 }
409 }
410 p += sprintf(p, "\n");
411
412 return p - page;
413}
414
415
416static int
417register_info(char *page)
418{
419 char *p = page;
420 u64 reg_info[2];
421 u64 info;
422 u64 phys_stacked;
423 pal_hints_u_t hints;
424 u64 iregs, dregs;
425 char *info_type[]={
426 "Implemented AR(s)",
427 "AR(s) with read side-effects",
428 "Implemented CR(s)",
429 "CR(s) with read side-effects",
430 };
431
432 for(info=0; info < 4; info++) {
433
434 if (ia64_pal_register_info(info, &reg_info[0], &reg_info[1]) != 0) return 0;
435
436 p += sprintf(p, "%-32s : ", info_type[info]);
437
438 p = bitregister_process(p, reg_info, 128);
439
440 p += sprintf(p, "\n");
441 }
442
443 if (ia64_pal_rse_info(&phys_stacked, &hints) != 0) return 0;
444
445 p += sprintf(p,
446 "RSE stacked physical registers : %ld\n"
447 "RSE load/store hints : %ld (%s)\n",
448 phys_stacked, hints.ph_data,
449 hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)");
450
451 if (ia64_pal_debug_info(&iregs, &dregs))
452 return 0;
453
454 p += sprintf(p,
455 "Instruction debug register pairs : %ld\n"
456 "Data debug register pairs : %ld\n", iregs, dregs);
457
458 return p - page;
459}
460
461static const char *proc_features[]={
462 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
463 NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
464 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
465 NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL,
466 NULL,NULL,NULL,NULL,NULL,
467 "XIP,XPSR,XFS implemented",
468 "XR1-XR3 implemented",
469 "Disable dynamic predicate prediction",
470 "Disable processor physical number",
471 "Disable dynamic data cache prefetch",
472 "Disable dynamic inst cache prefetch",
473 "Disable dynamic branch prediction",
474 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
475 "Disable BINIT on processor time-out",
476 "Disable dynamic power management (DPM)",
477 "Disable coherency",
478 "Disable cache",
479 "Enable CMCI promotion",
480 "Enable MCA to BINIT promotion",
481 "Enable MCA promotion",
482 "Enable BERR promotion"
483};
484
485
486static int
487processor_info(char *page)
488{
489 char *p = page;
490 const char **v = proc_features;
491 u64 avail=1, status=1, control=1;
492 int i;
493 s64 ret;
494
495 if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0;
496
497 for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) {
498 if ( ! *v ) continue;
499 p += sprintf(p, "%-40s : %s%s %s\n", *v,
500 avail & 0x1 ? "" : "NotImpl",
501 avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
502 avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
503 }
504 return p - page;
505}
506
507static const char *bus_features[]={
508 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
509 NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL,
510 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
511 NULL,NULL,
512 "Request Bus Parking",
513 "Bus Lock Mask",
514 "Enable Half Transfer",
515 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
516 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
517 NULL, NULL, NULL, NULL,
518 "Enable Cache Line Repl. Shared",
519 "Enable Cache Line Repl. Exclusive",
520 "Disable Transaction Queuing",
521 "Disable Response Error Checking",
522 "Disable Bus Error Checking",
523 "Disable Bus Requester Internal Error Signalling",
524 "Disable Bus Requester Error Signalling",
525 "Disable Bus Initialization Event Checking",
526 "Disable Bus Initialization Event Signalling",
527 "Disable Bus Address Error Checking",
528 "Disable Bus Address Error Signalling",
529 "Disable Bus Data Error Checking"
530};
531
532
533static int
534bus_info(char *page)
535{
536 char *p = page;
537 const char **v = bus_features;
538 pal_bus_features_u_t av, st, ct;
539 u64 avail, status, control;
540 int i;
541 s64 ret;
542
543 if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0;
544
545 avail = av.pal_bus_features_val;
546 status = st.pal_bus_features_val;
547 control = ct.pal_bus_features_val;
548
549 for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) {
550 if ( ! *v ) continue;
551 p += sprintf(p, "%-48s : %s%s %s\n", *v,
552 avail & 0x1 ? "" : "NotImpl",
553 avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "",
554 avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): "");
555 }
556 return p - page;
557}
558
559static int
560version_info(char *page)
561{
562 pal_version_u_t min_ver, cur_ver;
563 char *p = page;
564
565 /* The PAL_VERSION call is advertised as being able to support
566 * both physical and virtual mode calls. This seems to be a documentation
567 * bug rather than firmware bug. In fact, it does only support physical mode.
568 * So now the code reflects this fact and the pal_version() has been updated
569 * accordingly.
570 */
571 if (ia64_pal_version(&min_ver, &cur_ver) != 0) return 0;
572
573 p += sprintf(p,
574 "PAL_vendor : 0x%02x (min=0x%02x)\n"
575 "PAL_A : %x.%x.%x (min=%x.%x.%x)\n"
576 "PAL_B : %x.%x.%x (min=%x.%x.%x)\n",
577 cur_ver.pal_version_s.pv_pal_vendor, min_ver.pal_version_s.pv_pal_vendor,
578
579 cur_ver.pal_version_s.pv_pal_a_model>>4,
580 cur_ver.pal_version_s.pv_pal_a_model&0xf, cur_ver.pal_version_s.pv_pal_a_rev,
581 min_ver.pal_version_s.pv_pal_a_model>>4,
582 min_ver.pal_version_s.pv_pal_a_model&0xf, min_ver.pal_version_s.pv_pal_a_rev,
583
584 cur_ver.pal_version_s.pv_pal_b_model>>4,
585 cur_ver.pal_version_s.pv_pal_b_model&0xf, cur_ver.pal_version_s.pv_pal_b_rev,
586 min_ver.pal_version_s.pv_pal_b_model>>4,
587 min_ver.pal_version_s.pv_pal_b_model&0xf, min_ver.pal_version_s.pv_pal_b_rev);
588 return p - page;
589}
590
591static int
592perfmon_info(char *page)
593{
594 char *p = page;
595 u64 pm_buffer[16];
596 pal_perf_mon_info_u_t pm_info;
597
598 if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) return 0;
599
600 p += sprintf(p,
601 "PMC/PMD pairs : %d\n"
602 "Counter width : %d bits\n"
603 "Cycle event number : %d\n"
604 "Retired event number : %d\n"
605 "Implemented PMC : ",
606 pm_info.pal_perf_mon_info_s.generic, pm_info.pal_perf_mon_info_s.width,
607 pm_info.pal_perf_mon_info_s.cycles, pm_info.pal_perf_mon_info_s.retired);
608
609 p = bitregister_process(p, pm_buffer, 256);
610 p += sprintf(p, "\nImplemented PMD : ");
611 p = bitregister_process(p, pm_buffer+4, 256);
612 p += sprintf(p, "\nCycles count capable : ");
613 p = bitregister_process(p, pm_buffer+8, 256);
614 p += sprintf(p, "\nRetired bundles count capable : ");
615
616#ifdef CONFIG_ITANIUM
617 /*
618 * PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES
619 * which is wrong, both PMC4 and PMD5 support it.
620 */
621 if (pm_buffer[12] == 0x10) pm_buffer[12]=0x30;
622#endif
623
624 p = bitregister_process(p, pm_buffer+12, 256);
625
626 p += sprintf(p, "\n");
627
628 return p - page;
629}
630
631static int
632frequency_info(char *page)
633{
634 char *p = page;
635 struct pal_freq_ratio proc, itc, bus;
636 u64 base;
637
638 if (ia64_pal_freq_base(&base) == -1)
639 p += sprintf(p, "Output clock : not implemented\n");
640 else
641 p += sprintf(p, "Output clock : %ld ticks/s\n", base);
642
643 if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0;
644
645 p += sprintf(p,
646 "Processor/Clock ratio : %ld/%ld\n"
647 "Bus/Clock ratio : %ld/%ld\n"
648 "ITC/Clock ratio : %ld/%ld\n",
649 proc.num, proc.den, bus.num, bus.den, itc.num, itc.den);
650
651 return p - page;
652}
653
654static int
655tr_info(char *page)
656{
657 char *p = page;
658 s64 status;
659 pal_tr_valid_u_t tr_valid;
660 u64 tr_buffer[4];
661 pal_vm_info_1_u_t vm_info_1;
662 pal_vm_info_2_u_t vm_info_2;
663 u64 i, j;
664 u64 max[3], pgm;
665 struct ifa_reg {
666 u64 valid:1;
667 u64 ig:11;
668 u64 vpn:52;
669 } *ifa_reg;
670 struct itir_reg {
671 u64 rv1:2;
672 u64 ps:6;
673 u64 key:24;
674 u64 rv2:32;
675 } *itir_reg;
676 struct gr_reg {
677 u64 p:1;
678 u64 rv1:1;
679 u64 ma:3;
680 u64 a:1;
681 u64 d:1;
682 u64 pl:2;
683 u64 ar:3;
684 u64 ppn:38;
685 u64 rv2:2;
686 u64 ed:1;
687 u64 ig:11;
688 } *gr_reg;
689 struct rid_reg {
690 u64 ig1:1;
691 u64 rv1:1;
692 u64 ig2:6;
693 u64 rid:24;
694 u64 rv2:32;
695 } *rid_reg;
696
697 if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) {
698 printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status);
699 return 0;
700 }
701 max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1;
702 max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1;
703
704 for (i=0; i < 2; i++ ) {
705 for (j=0; j < max[i]; j++) {
706
707 status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid);
708 if (status != 0) {
709 printk(KERN_ERR "palinfo: pal call failed on tr[%lu:%lu]=%ld\n",
710 i, j, status);
711 continue;
712 }
713
714 ifa_reg = (struct ifa_reg *)&tr_buffer[2];
715
716 if (ifa_reg->valid == 0) continue;
717
718 gr_reg = (struct gr_reg *)tr_buffer;
719 itir_reg = (struct itir_reg *)&tr_buffer[1];
720 rid_reg = (struct rid_reg *)&tr_buffer[3];
721
722 pgm = -1 << (itir_reg->ps - 12);
723 p += sprintf(p,
724 "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n"
725 "\tppn : 0x%lx\n"
726 "\tvpn : 0x%lx\n"
727 "\tps : ",
728 "ID"[i], j,
729 tr_valid.pal_tr_valid_s.access_rights_valid,
730 tr_valid.pal_tr_valid_s.priv_level_valid,
731 tr_valid.pal_tr_valid_s.dirty_bit_valid,
732 tr_valid.pal_tr_valid_s.mem_attr_valid,
733 (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12);
734
735 p = bitvector_process(p, 1<< itir_reg->ps);
736
737 p += sprintf(p,
738 "\n\tpl : %d\n"
739 "\tar : %d\n"
740 "\trid : %x\n"
741 "\tp : %d\n"
742 "\tma : %d\n"
743 "\td : %d\n",
744 gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma,
745 gr_reg->d);
746 }
747 }
748 return p - page;
749}
750
751
752
753/*
754 * List {name,function} pairs for every entry in /proc/palinfo/cpu*
755 */
756static palinfo_entry_t palinfo_entries[]={
757 { "version_info", version_info, },
758 { "vm_info", vm_info, },
759 { "cache_info", cache_info, },
760 { "power_info", power_info, },
761 { "register_info", register_info, },
762 { "processor_info", processor_info, },
763 { "perfmon_info", perfmon_info, },
764 { "frequency_info", frequency_info, },
765 { "bus_info", bus_info },
766 { "tr_info", tr_info, }
767};
768
769#define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries)
770
771/*
772 * this array is used to keep track of the proc entries we create. This is
773 * required in the module mode when we need to remove all entries. The procfs code
774 * does not do recursion of deletion
775 *
776 * Notes:
777 * - +1 accounts for the cpuN directory entry in /proc/pal
778 */
779#define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1))
780
781static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES];
782static struct proc_dir_entry *palinfo_dir;
783
784/*
785 * This data structure is used to pass which cpu,function is being requested
786 * It must fit in a 64bit quantity to be passed to the proc callback routine
787 *
788 * In SMP mode, when we get a request for another CPU, we must call that
789 * other CPU using IPI and wait for the result before returning.
790 */
791typedef union {
792 u64 value;
793 struct {
794 unsigned req_cpu: 32; /* for which CPU this info is */
795 unsigned func_id: 32; /* which function is requested */
796 } pal_func_cpu;
797} pal_func_cpu_u_t;
798
799#define req_cpu pal_func_cpu.req_cpu
800#define func_id pal_func_cpu.func_id
801
802#ifdef CONFIG_SMP
803
804/*
805 * used to hold information about final function to call
806 */
807typedef struct {
808 palinfo_func_t func; /* pointer to function to call */
809 char *page; /* buffer to store results */
810 int ret; /* return value from call */
811} palinfo_smp_data_t;
812
813
814/*
815 * this function does the actual final call and he called
816 * from the smp code, i.e., this is the palinfo callback routine
817 */
818static void
819palinfo_smp_call(void *info)
820{
821 palinfo_smp_data_t *data = (palinfo_smp_data_t *)info;
822 if (data == NULL) {
823 printk(KERN_ERR "palinfo: data pointer is NULL\n");
824 data->ret = 0; /* no output */
825 return;
826 }
827 /* does this actual call */
828 data->ret = (*data->func)(data->page);
829}
830
831/*
832 * function called to trigger the IPI, we need to access a remote CPU
833 * Return:
834 * 0 : error or nothing to output
835 * otherwise how many bytes in the "page" buffer were written
836 */
837static
838int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
839{
840 palinfo_smp_data_t ptr;
841 int ret;
842
843 ptr.func = palinfo_entries[f->func_id].proc_read;
844 ptr.page = page;
845 ptr.ret = 0; /* just in case */
846
847
848 /* will send IPI to other CPU and wait for completion of remote call */
849 if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) {
850 printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: "
851 "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret);
852 return 0;
853 }
854 return ptr.ret;
855}
856#else /* ! CONFIG_SMP */
857static
858int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page)
859{
860 printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n");
861 return 0;
862}
863#endif /* CONFIG_SMP */
864
865/*
866 * Entry point routine: all calls go through this function
867 */
868static int
869palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data)
870{
871 int len=0;
872 pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data;
873
874 /*
875 * in SMP mode, we may need to call another CPU to get correct
876 * information. PAL, by definition, is processor specific
877 */
878 if (f->req_cpu == get_cpu())
879 len = (*palinfo_entries[f->func_id].proc_read)(page);
880 else
881 len = palinfo_handle_smp(f, page);
882
883 put_cpu();
884
885 if (len <= off+count) *eof = 1;
886
887 *start = page + off;
888 len -= off;
889
890 if (len>count) len = count;
891 if (len<0) len = 0;
892
893 return len;
894}
895
896static void
897create_palinfo_proc_entries(unsigned int cpu)
898{
899# define CPUSTR "cpu%d"
900
901 pal_func_cpu_u_t f;
902 struct proc_dir_entry **pdir;
903 struct proc_dir_entry *cpu_dir;
904 int j;
905 char cpustr[sizeof(CPUSTR)];
906
907
908 /*
909 * we keep track of created entries in a depth-first order for
910 * cleanup purposes. Each entry is stored into palinfo_proc_entries
911 */
912 sprintf(cpustr,CPUSTR, cpu);
913
914 cpu_dir = proc_mkdir(cpustr, palinfo_dir);
915
916 f.req_cpu = cpu;
917
918 /*
919 * Compute the location to store per cpu entries
920 * We dont store the top level entry in this list, but
921 * remove it finally after removing all cpu entries.
922 */
923 pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)];
924 *pdir++ = cpu_dir;
925 for (j=0; j < NR_PALINFO_ENTRIES; j++) {
926 f.func_id = j;
927 *pdir = create_proc_read_entry(
928 palinfo_entries[j].name, 0, cpu_dir,
929 palinfo_read_entry, (void *)f.value);
930 if (*pdir)
931 (*pdir)->owner = THIS_MODULE;
932 pdir++;
933 }
934}
935
936static void
937remove_palinfo_proc_entries(unsigned int hcpu)
938{
939 int j;
940 struct proc_dir_entry *cpu_dir, **pdir;
941
942 pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)];
943 cpu_dir = *pdir;
944 *pdir++=NULL;
945 for (j=0; j < (NR_PALINFO_ENTRIES); j++) {
946 if ((*pdir)) {
947 remove_proc_entry ((*pdir)->name, cpu_dir);
948 *pdir ++= NULL;
949 }
950 }
951
952 if (cpu_dir) {
953 remove_proc_entry(cpu_dir->name, palinfo_dir);
954 }
955}
956
957static int __devinit palinfo_cpu_callback(struct notifier_block *nfb,
958 unsigned long action,
959 void *hcpu)
960{
961 unsigned int hotcpu = (unsigned long)hcpu;
962
963 switch (action) {
964 case CPU_ONLINE:
965 create_palinfo_proc_entries(hotcpu);
966 break;
967#ifdef CONFIG_HOTPLUG_CPU
968 case CPU_DEAD:
969 remove_palinfo_proc_entries(hotcpu);
970 break;
971#endif
972 }
973 return NOTIFY_OK;
974}
975
976static struct notifier_block palinfo_cpu_notifier =
977{
978 .notifier_call = palinfo_cpu_callback,
979 .priority = 0,
980};
981
982static int __init
983palinfo_init(void)
984{
985 int i = 0;
986
987 printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION);
988 palinfo_dir = proc_mkdir("pal", NULL);
989
990 /* Create palinfo dirs in /proc for all online cpus */
991 for_each_online_cpu(i) {
992 create_palinfo_proc_entries(i);
993 }
994
995 /* Register for future delivery via notify registration */
996 register_cpu_notifier(&palinfo_cpu_notifier);
997
998 return 0;
999}
1000
1001static void __exit
1002palinfo_exit(void)
1003{
1004 int i = 0;
1005
1006 /* remove all nodes: depth first pass. Could optimize this */
1007 for_each_online_cpu(i) {
1008 remove_palinfo_proc_entries(i);
1009 }
1010
1011 /*
1012 * Remove the top level entry finally
1013 */
1014 remove_proc_entry(palinfo_dir->name, NULL);
1015
1016 /*
1017 * Unregister from cpu notifier callbacks
1018 */
1019 unregister_cpu_notifier(&palinfo_cpu_notifier);
1020}
1021
1022module_init(palinfo_init);
1023module_exit(palinfo_exit);
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c
new file mode 100644
index 000000000000..367804a605fa
--- /dev/null
+++ b/arch/ia64/kernel/patch.c
@@ -0,0 +1,189 @@
1/*
2 * Instruction-patching support.
3 *
4 * Copyright (C) 2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7#include <linux/init.h>
8#include <linux/string.h>
9
10#include <asm/patch.h>
11#include <asm/processor.h>
12#include <asm/sections.h>
13#include <asm/system.h>
14#include <asm/unistd.h>
15
16/*
17 * This was adapted from code written by Tony Luck:
18 *
19 * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle
20 * like this:
21 *
22 * 6 6 5 4 3 2 1
23 * 3210987654321098765432109876543210987654321098765432109876543210
24 * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG
25 *
26 * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
27 * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB
28 */
29static u64
30get_imm64 (u64 insn_addr)
31{
32 u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */
33
34 return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/
35 ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/
36 ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/
37 ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/
38 ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/
39 ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/
40 ((p[1] & 0x000007f000000000UL) >> 36); /*G*/
41}
42
43/* Patch instruction with "val" where "mask" has 1 bits. */
44void
45ia64_patch (u64 insn_addr, u64 mask, u64 val)
46{
47 u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16);
48# define insn_mask ((1UL << 41) - 1)
49 unsigned long shift;
50
51 b0 = b[0]; b1 = b[1];
52 shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */
53 if (shift >= 64) {
54 m1 = mask << (shift - 64);
55 v1 = val << (shift - 64);
56 } else {
57 m0 = mask << shift; m1 = mask >> (64 - shift);
58 v0 = val << shift; v1 = val >> (64 - shift);
59 b[0] = (b0 & ~m0) | (v0 & m0);
60 }
61 b[1] = (b1 & ~m1) | (v1 & m1);
62}
63
64void
65ia64_patch_imm64 (u64 insn_addr, u64 val)
66{
67 ia64_patch(insn_addr,
68 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */
69 | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */
70 | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */
71 | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */
72 | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */));
73 ia64_patch(insn_addr - 1, 0x1ffffffffffUL, val >> 22);
74}
75
76void
77ia64_patch_imm60 (u64 insn_addr, u64 val)
78{
79 ia64_patch(insn_addr,
80 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */
81 | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */));
82 ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18);
83}
84
85/*
86 * We need sometimes to load the physical address of a kernel
87 * object. Often we can convert the virtual address to physical
88 * at execution time, but sometimes (either for performance reasons
89 * or during error recovery) we cannot to this. Patch the marked
90 * bundles to load the physical address.
91 */
92void __init
93ia64_patch_vtop (unsigned long start, unsigned long end)
94{
95 s32 *offp = (s32 *) start;
96 u64 ip;
97
98 while (offp < (s32 *) end) {
99 ip = (u64) offp + *offp;
100
101 /* replace virtual address with corresponding physical address: */
102 ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip)));
103 ia64_fc((void *) ip);
104 ++offp;
105 }
106 ia64_sync_i();
107 ia64_srlz_i();
108}
109
110void
111ia64_patch_mckinley_e9 (unsigned long start, unsigned long end)
112{
113 static int first_time = 1;
114 int need_workaround;
115 s32 *offp = (s32 *) start;
116 u64 *wp;
117
118 need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0);
119
120 if (first_time) {
121 first_time = 0;
122 if (need_workaround)
123 printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n");
124 else
125 printk(KERN_INFO "McKinley Errata 9 workaround not needed; "
126 "disabling it\n");
127 }
128 if (need_workaround)
129 return;
130
131 while (offp < (s32 *) end) {
132 wp = (u64 *) ia64_imva((char *) offp + *offp);
133 wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */
134 wp[1] = 0x0004000000000200UL;
135 wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */
136 wp[3] = 0x0084006880000200UL;
137 ia64_fc(wp); ia64_fc(wp + 2);
138 ++offp;
139 }
140 ia64_sync_i();
141 ia64_srlz_i();
142}
143
144static void
145patch_fsyscall_table (unsigned long start, unsigned long end)
146{
147 extern unsigned long fsyscall_table[NR_syscalls];
148 s32 *offp = (s32 *) start;
149 u64 ip;
150
151 while (offp < (s32 *) end) {
152 ip = (u64) ia64_imva((char *) offp + *offp);
153 ia64_patch_imm64(ip, (u64) fsyscall_table);
154 ia64_fc((void *) ip);
155 ++offp;
156 }
157 ia64_sync_i();
158 ia64_srlz_i();
159}
160
161static void
162patch_brl_fsys_bubble_down (unsigned long start, unsigned long end)
163{
164 extern char fsys_bubble_down[];
165 s32 *offp = (s32 *) start;
166 u64 ip;
167
168 while (offp < (s32 *) end) {
169 ip = (u64) offp + *offp;
170 ia64_patch_imm60((u64) ia64_imva((void *) ip),
171 (u64) (fsys_bubble_down - (ip & -16)) / 16);
172 ia64_fc((void *) ip);
173 ++offp;
174 }
175 ia64_sync_i();
176 ia64_srlz_i();
177}
178
179void
180ia64_patch_gate (void)
181{
182# define START(name) ((unsigned long) __start_gate_##name##_patchlist)
183# define END(name) ((unsigned long)__end_gate_##name##_patchlist)
184
185 patch_fsyscall_table(START(fsyscall), END(fsyscall));
186 patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down));
187 ia64_patch_vtop(START(vtop), END(vtop));
188 ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9));
189}
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
new file mode 100644
index 000000000000..71147be3279c
--- /dev/null
+++ b/arch/ia64/kernel/perfmon.c
@@ -0,0 +1,6676 @@
1/*
2 * This file implements the perfmon-2 subsystem which is used
3 * to program the IA-64 Performance Monitoring Unit (PMU).
4 *
5 * The initial version of perfmon.c was written by
6 * Ganesh Venkitachalam, IBM Corp.
7 *
8 * Then it was modified for perfmon-1.x by Stephane Eranian and
9 * David Mosberger, Hewlett Packard Co.
10 *
11 * Version Perfmon-2.x is a rewrite of perfmon-1.x
12 * by Stephane Eranian, Hewlett Packard Co.
13 *
14 * Copyright (C) 1999-2003, 2005 Hewlett Packard Co
15 * Stephane Eranian <eranian@hpl.hp.com>
16 * David Mosberger-Tang <davidm@hpl.hp.com>
17 *
18 * More information about perfmon available at:
19 * http://www.hpl.hp.com/research/linux/perfmon
20 */
21
22#include <linux/config.h>
23#include <linux/module.h>
24#include <linux/kernel.h>
25#include <linux/sched.h>
26#include <linux/interrupt.h>
27#include <linux/smp_lock.h>
28#include <linux/proc_fs.h>
29#include <linux/seq_file.h>
30#include <linux/init.h>
31#include <linux/vmalloc.h>
32#include <linux/mm.h>
33#include <linux/sysctl.h>
34#include <linux/list.h>
35#include <linux/file.h>
36#include <linux/poll.h>
37#include <linux/vfs.h>
38#include <linux/pagemap.h>
39#include <linux/mount.h>
40#include <linux/version.h>
41#include <linux/bitops.h>
42
43#include <asm/errno.h>
44#include <asm/intrinsics.h>
45#include <asm/page.h>
46#include <asm/perfmon.h>
47#include <asm/processor.h>
48#include <asm/signal.h>
49#include <asm/system.h>
50#include <asm/uaccess.h>
51#include <asm/delay.h>
52
53#ifdef CONFIG_PERFMON
54/*
55 * perfmon context state
56 */
57#define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */
58#define PFM_CTX_LOADED 2 /* context is loaded onto a task */
59#define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */
60#define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */
61
62#define PFM_INVALID_ACTIVATION (~0UL)
63
64/*
65 * depth of message queue
66 */
67#define PFM_MAX_MSGS 32
68#define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail)
69
70/*
71 * type of a PMU register (bitmask).
72 * bitmask structure:
73 * bit0 : register implemented
74 * bit1 : end marker
75 * bit2-3 : reserved
76 * bit4 : pmc has pmc.pm
77 * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter
78 * bit6-7 : register type
79 * bit8-31: reserved
80 */
81#define PFM_REG_NOTIMPL 0x0 /* not implemented at all */
82#define PFM_REG_IMPL 0x1 /* register implemented */
83#define PFM_REG_END 0x2 /* end marker */
84#define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */
85#define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */
86#define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */
87#define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */
88#define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */
89
90#define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END)
91#define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END)
92
93#define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY)
94
95/* i assumed unsigned */
96#define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL))
97#define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL))
98
99/* XXX: these assume that register i is implemented */
100#define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
101#define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING)
102#define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR)
103#define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL)
104
105#define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value
106#define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask
107#define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0]
108#define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0]
109
110#define PFM_NUM_IBRS IA64_NUM_DBG_REGS
111#define PFM_NUM_DBRS IA64_NUM_DBG_REGS
112
113#define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0)
114#define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling)
115#define PFM_CTX_TASK(h) (h)->ctx_task
116
117#define PMU_PMC_OI 5 /* position of pmc.oi bit */
118
119/* XXX: does not support more than 64 PMDs */
120#define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask)
121#define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL)
122
123#define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask)
124
125#define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64)
126#define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64)
127#define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1)
128#define PFM_CODE_RR 0 /* requesting code range restriction */
129#define PFM_DATA_RR 1 /* requestion data range restriction */
130
131#define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v)
132#define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v)
133#define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info)
134
135#define RDEP(x) (1UL<<(x))
136
137/*
138 * context protection macros
139 * in SMP:
140 * - we need to protect against CPU concurrency (spin_lock)
141 * - we need to protect against PMU overflow interrupts (local_irq_disable)
142 * in UP:
143 * - we need to protect against PMU overflow interrupts (local_irq_disable)
144 *
145 * spin_lock_irqsave()/spin_lock_irqrestore():
146 * in SMP: local_irq_disable + spin_lock
147 * in UP : local_irq_disable
148 *
149 * spin_lock()/spin_lock():
150 * in UP : removed automatically
151 * in SMP: protect against context accesses from other CPU. interrupts
152 * are not masked. This is useful for the PMU interrupt handler
153 * because we know we will not get PMU concurrency in that code.
154 */
155#define PROTECT_CTX(c, f) \
156 do { \
157 DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \
158 spin_lock_irqsave(&(c)->ctx_lock, f); \
159 DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \
160 } while(0)
161
162#define UNPROTECT_CTX(c, f) \
163 do { \
164 DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \
165 spin_unlock_irqrestore(&(c)->ctx_lock, f); \
166 } while(0)
167
168#define PROTECT_CTX_NOPRINT(c, f) \
169 do { \
170 spin_lock_irqsave(&(c)->ctx_lock, f); \
171 } while(0)
172
173
174#define UNPROTECT_CTX_NOPRINT(c, f) \
175 do { \
176 spin_unlock_irqrestore(&(c)->ctx_lock, f); \
177 } while(0)
178
179
180#define PROTECT_CTX_NOIRQ(c) \
181 do { \
182 spin_lock(&(c)->ctx_lock); \
183 } while(0)
184
185#define UNPROTECT_CTX_NOIRQ(c) \
186 do { \
187 spin_unlock(&(c)->ctx_lock); \
188 } while(0)
189
190
191#ifdef CONFIG_SMP
192
193#define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)
194#define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++
195#define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION()
196
197#else /* !CONFIG_SMP */
198#define SET_ACTIVATION(t) do {} while(0)
199#define GET_ACTIVATION(t) do {} while(0)
200#define INC_ACTIVATION(t) do {} while(0)
201#endif /* CONFIG_SMP */
202
203#define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0)
204#define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner)
205#define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx)
206
207#define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g)
208#define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g)
209
210#define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0)
211
212/*
213 * cmp0 must be the value of pmc0
214 */
215#define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL)
216
217#define PFMFS_MAGIC 0xa0b4d889
218
219/*
220 * debugging
221 */
222#define PFM_DEBUGGING 1
223#ifdef PFM_DEBUGGING
224#define DPRINT(a) \
225 do { \
226 if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
227 } while (0)
228
229#define DPRINT_ovfl(a) \
230 do { \
231 if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \
232 } while (0)
233#endif
234
235/*
236 * 64-bit software counter structure
237 *
238 * the next_reset_type is applied to the next call to pfm_reset_regs()
239 */
240typedef struct {
241 unsigned long val; /* virtual 64bit counter value */
242 unsigned long lval; /* last reset value */
243 unsigned long long_reset; /* reset value on sampling overflow */
244 unsigned long short_reset; /* reset value on overflow */
245 unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */
246 unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */
247 unsigned long seed; /* seed for random-number generator */
248 unsigned long mask; /* mask for random-number generator */
249 unsigned int flags; /* notify/do not notify */
250 unsigned long eventid; /* overflow event identifier */
251} pfm_counter_t;
252
253/*
254 * context flags
255 */
256typedef struct {
257 unsigned int block:1; /* when 1, task will blocked on user notifications */
258 unsigned int system:1; /* do system wide monitoring */
259 unsigned int using_dbreg:1; /* using range restrictions (debug registers) */
260 unsigned int is_sampling:1; /* true if using a custom format */
261 unsigned int excl_idle:1; /* exclude idle task in system wide session */
262 unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */
263 unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */
264 unsigned int no_msg:1; /* no message sent on overflow */
265 unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */
266 unsigned int reserved:22;
267} pfm_context_flags_t;
268
269#define PFM_TRAP_REASON_NONE 0x0 /* default value */
270#define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */
271#define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */
272
273
274/*
275 * perfmon context: encapsulates all the state of a monitoring session
276 */
277
278typedef struct pfm_context {
279 spinlock_t ctx_lock; /* context protection */
280
281 pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */
282 unsigned int ctx_state; /* state: active/inactive (no bitfield) */
283
284 struct task_struct *ctx_task; /* task to which context is attached */
285
286 unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */
287
288 struct semaphore ctx_restart_sem; /* use for blocking notification mode */
289
290 unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */
291 unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */
292 unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */
293
294 unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */
295 unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */
296 unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */
297
298 unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */
299
300 unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */
301 unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */
302 unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */
303 unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */
304
305 pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */
306
307 u64 ctx_saved_psr_up; /* only contains psr.up value */
308
309 unsigned long ctx_last_activation; /* context last activation number for last_cpu */
310 unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */
311 unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */
312
313 int ctx_fd; /* file descriptor used my this context */
314 pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */
315
316 pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */
317 void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */
318 unsigned long ctx_smpl_size; /* size of sampling buffer */
319 void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */
320
321 wait_queue_head_t ctx_msgq_wait;
322 pfm_msg_t ctx_msgq[PFM_MAX_MSGS];
323 int ctx_msgq_head;
324 int ctx_msgq_tail;
325 struct fasync_struct *ctx_async_queue;
326
327 wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */
328} pfm_context_t;
329
330/*
331 * magic number used to verify that structure is really
332 * a perfmon context
333 */
334#define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops)
335
336#define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context)
337
338#ifdef CONFIG_SMP
339#define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v)
340#define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu
341#else
342#define SET_LAST_CPU(ctx, v) do {} while(0)
343#define GET_LAST_CPU(ctx) do {} while(0)
344#endif
345
346
347#define ctx_fl_block ctx_flags.block
348#define ctx_fl_system ctx_flags.system
349#define ctx_fl_using_dbreg ctx_flags.using_dbreg
350#define ctx_fl_is_sampling ctx_flags.is_sampling
351#define ctx_fl_excl_idle ctx_flags.excl_idle
352#define ctx_fl_going_zombie ctx_flags.going_zombie
353#define ctx_fl_trap_reason ctx_flags.trap_reason
354#define ctx_fl_no_msg ctx_flags.no_msg
355#define ctx_fl_can_restart ctx_flags.can_restart
356
357#define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0);
358#define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking
359
360/*
361 * global information about all sessions
362 * mostly used to synchronize between system wide and per-process
363 */
364typedef struct {
365 spinlock_t pfs_lock; /* lock the structure */
366
367 unsigned int pfs_task_sessions; /* number of per task sessions */
368 unsigned int pfs_sys_sessions; /* number of per system wide sessions */
369 unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */
370 unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */
371 struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */
372} pfm_session_t;
373
374/*
375 * information about a PMC or PMD.
376 * dep_pmd[]: a bitmask of dependent PMD registers
377 * dep_pmc[]: a bitmask of dependent PMC registers
378 */
379typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
380typedef struct {
381 unsigned int type;
382 int pm_pos;
383 unsigned long default_value; /* power-on default value */
384 unsigned long reserved_mask; /* bitmask of reserved bits */
385 pfm_reg_check_t read_check;
386 pfm_reg_check_t write_check;
387 unsigned long dep_pmd[4];
388 unsigned long dep_pmc[4];
389} pfm_reg_desc_t;
390
391/* assume cnum is a valid monitor */
392#define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1)
393
394/*
395 * This structure is initialized at boot time and contains
396 * a description of the PMU main characteristics.
397 *
398 * If the probe function is defined, detection is based
399 * on its return value:
400 * - 0 means recognized PMU
401 * - anything else means not supported
402 * When the probe function is not defined, then the pmu_family field
403 * is used and it must match the host CPU family such that:
404 * - cpu->family & config->pmu_family != 0
405 */
406typedef struct {
407 unsigned long ovfl_val; /* overflow value for counters */
408
409 pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */
410 pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */
411
412 unsigned int num_pmcs; /* number of PMCS: computed at init time */
413 unsigned int num_pmds; /* number of PMDS: computed at init time */
414 unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */
415 unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */
416
417 char *pmu_name; /* PMU family name */
418 unsigned int pmu_family; /* cpuid family pattern used to identify pmu */
419 unsigned int flags; /* pmu specific flags */
420 unsigned int num_ibrs; /* number of IBRS: computed at init time */
421 unsigned int num_dbrs; /* number of DBRS: computed at init time */
422 unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */
423 int (*probe)(void); /* customized probe routine */
424 unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */
425} pmu_config_t;
426/*
427 * PMU specific flags
428 */
429#define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */
430
431/*
432 * debug register related type definitions
433 */
434typedef struct {
435 unsigned long ibr_mask:56;
436 unsigned long ibr_plm:4;
437 unsigned long ibr_ig:3;
438 unsigned long ibr_x:1;
439} ibr_mask_reg_t;
440
441typedef struct {
442 unsigned long dbr_mask:56;
443 unsigned long dbr_plm:4;
444 unsigned long dbr_ig:2;
445 unsigned long dbr_w:1;
446 unsigned long dbr_r:1;
447} dbr_mask_reg_t;
448
449typedef union {
450 unsigned long val;
451 ibr_mask_reg_t ibr;
452 dbr_mask_reg_t dbr;
453} dbreg_t;
454
455
456/*
457 * perfmon command descriptions
458 */
459typedef struct {
460 int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
461 char *cmd_name;
462 int cmd_flags;
463 unsigned int cmd_narg;
464 size_t cmd_argsize;
465 int (*cmd_getsize)(void *arg, size_t *sz);
466} pfm_cmd_desc_t;
467
468#define PFM_CMD_FD 0x01 /* command requires a file descriptor */
469#define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */
470#define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */
471#define PFM_CMD_STOP 0x08 /* command does not work on zombie context */
472
473
474#define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name
475#define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ)
476#define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW)
477#define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD)
478#define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP)
479
480#define PFM_CMD_ARG_MANY -1 /* cannot be zero */
481
482typedef struct {
483 int debug; /* turn on/off debugging via syslog */
484 int debug_ovfl; /* turn on/off debug printk in overflow handler */
485 int fastctxsw; /* turn on/off fast (unsecure) ctxsw */
486 int expert_mode; /* turn on/off value checking */
487 int debug_pfm_read;
488} pfm_sysctl_t;
489
490typedef struct {
491 unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */
492 unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */
493 unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */
494 unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */
495 unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */
496 unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */
497 unsigned long pfm_smpl_handler_calls;
498 unsigned long pfm_smpl_handler_cycles;
499 char pad[SMP_CACHE_BYTES] ____cacheline_aligned;
500} pfm_stats_t;
501
502/*
503 * perfmon internal variables
504 */
505static pfm_stats_t pfm_stats[NR_CPUS];
506static pfm_session_t pfm_sessions; /* global sessions information */
507
508static struct proc_dir_entry *perfmon_dir;
509static pfm_uuid_t pfm_null_uuid = {0,};
510
511static spinlock_t pfm_buffer_fmt_lock;
512static LIST_HEAD(pfm_buffer_fmt_list);
513
514static pmu_config_t *pmu_conf;
515
516/* sysctl() controls */
517static pfm_sysctl_t pfm_sysctl;
518int pfm_debug_var;
519
520static ctl_table pfm_ctl_table[]={
521 {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
522 {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,},
523 {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
524 {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,},
525 { 0, },
526};
527static ctl_table pfm_sysctl_dir[] = {
528 {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, },
529 {0,},
530};
531static ctl_table pfm_sysctl_root[] = {
532 {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, },
533 {0,},
534};
535static struct ctl_table_header *pfm_sysctl_header;
536
537static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
538static int pfm_flush(struct file *filp);
539
540#define pfm_get_cpu_var(v) __ia64_per_cpu_var(v)
541#define pfm_get_cpu_data(a,b) per_cpu(a, b)
542
543static inline void
544pfm_put_task(struct task_struct *task)
545{
546 if (task != current) put_task_struct(task);
547}
548
549static inline void
550pfm_set_task_notify(struct task_struct *task)
551{
552 struct thread_info *info;
553
554 info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE);
555 set_bit(TIF_NOTIFY_RESUME, &info->flags);
556}
557
558static inline void
559pfm_clear_task_notify(void)
560{
561 clear_thread_flag(TIF_NOTIFY_RESUME);
562}
563
564static inline void
565pfm_reserve_page(unsigned long a)
566{
567 SetPageReserved(vmalloc_to_page((void *)a));
568}
569static inline void
570pfm_unreserve_page(unsigned long a)
571{
572 ClearPageReserved(vmalloc_to_page((void*)a));
573}
574
575static inline unsigned long
576pfm_protect_ctx_ctxsw(pfm_context_t *x)
577{
578 spin_lock(&(x)->ctx_lock);
579 return 0UL;
580}
581
582static inline unsigned long
583pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f)
584{
585 spin_unlock(&(x)->ctx_lock);
586}
587
588static inline unsigned int
589pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct)
590{
591 return do_munmap(mm, addr, len);
592}
593
594static inline unsigned long
595pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec)
596{
597 return get_unmapped_area(file, addr, len, pgoff, flags);
598}
599
600
601static struct super_block *
602pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data)
603{
604 return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC);
605}
606
607static struct file_system_type pfm_fs_type = {
608 .name = "pfmfs",
609 .get_sb = pfmfs_get_sb,
610 .kill_sb = kill_anon_super,
611};
612
613DEFINE_PER_CPU(unsigned long, pfm_syst_info);
614DEFINE_PER_CPU(struct task_struct *, pmu_owner);
615DEFINE_PER_CPU(pfm_context_t *, pmu_ctx);
616DEFINE_PER_CPU(unsigned long, pmu_activation_number);
617
618
619/* forward declaration */
620static struct file_operations pfm_file_ops;
621
622/*
623 * forward declarations
624 */
625#ifndef CONFIG_SMP
626static void pfm_lazy_save_regs (struct task_struct *ta);
627#endif
628
629void dump_pmu_state(const char *);
630static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
631
632#include "perfmon_itanium.h"
633#include "perfmon_mckinley.h"
634#include "perfmon_generic.h"
635
636static pmu_config_t *pmu_confs[]={
637 &pmu_conf_mck,
638 &pmu_conf_ita,
639 &pmu_conf_gen, /* must be last */
640 NULL
641};
642
643
644static int pfm_end_notify_user(pfm_context_t *ctx);
645
646static inline void
647pfm_clear_psr_pp(void)
648{
649 ia64_rsm(IA64_PSR_PP);
650 ia64_srlz_i();
651}
652
653static inline void
654pfm_set_psr_pp(void)
655{
656 ia64_ssm(IA64_PSR_PP);
657 ia64_srlz_i();
658}
659
660static inline void
661pfm_clear_psr_up(void)
662{
663 ia64_rsm(IA64_PSR_UP);
664 ia64_srlz_i();
665}
666
667static inline void
668pfm_set_psr_up(void)
669{
670 ia64_ssm(IA64_PSR_UP);
671 ia64_srlz_i();
672}
673
674static inline unsigned long
675pfm_get_psr(void)
676{
677 unsigned long tmp;
678 tmp = ia64_getreg(_IA64_REG_PSR);
679 ia64_srlz_i();
680 return tmp;
681}
682
683static inline void
684pfm_set_psr_l(unsigned long val)
685{
686 ia64_setreg(_IA64_REG_PSR_L, val);
687 ia64_srlz_i();
688}
689
690static inline void
691pfm_freeze_pmu(void)
692{
693 ia64_set_pmc(0,1UL);
694 ia64_srlz_d();
695}
696
697static inline void
698pfm_unfreeze_pmu(void)
699{
700 ia64_set_pmc(0,0UL);
701 ia64_srlz_d();
702}
703
704static inline void
705pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs)
706{
707 int i;
708
709 for (i=0; i < nibrs; i++) {
710 ia64_set_ibr(i, ibrs[i]);
711 ia64_dv_serialize_instruction();
712 }
713 ia64_srlz_i();
714}
715
716static inline void
717pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs)
718{
719 int i;
720
721 for (i=0; i < ndbrs; i++) {
722 ia64_set_dbr(i, dbrs[i]);
723 ia64_dv_serialize_data();
724 }
725 ia64_srlz_d();
726}
727
728/*
729 * PMD[i] must be a counter. no check is made
730 */
731static inline unsigned long
732pfm_read_soft_counter(pfm_context_t *ctx, int i)
733{
734 return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val);
735}
736
737/*
738 * PMD[i] must be a counter. no check is made
739 */
740static inline void
741pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val)
742{
743 unsigned long ovfl_val = pmu_conf->ovfl_val;
744
745 ctx->ctx_pmds[i].val = val & ~ovfl_val;
746 /*
747 * writing to unimplemented part is ignore, so we do not need to
748 * mask off top part
749 */
750 ia64_set_pmd(i, val & ovfl_val);
751}
752
753static pfm_msg_t *
754pfm_get_new_msg(pfm_context_t *ctx)
755{
756 int idx, next;
757
758 next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS;
759
760 DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
761 if (next == ctx->ctx_msgq_head) return NULL;
762
763 idx = ctx->ctx_msgq_tail;
764 ctx->ctx_msgq_tail = next;
765
766 DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx));
767
768 return ctx->ctx_msgq+idx;
769}
770
771static pfm_msg_t *
772pfm_get_next_msg(pfm_context_t *ctx)
773{
774 pfm_msg_t *msg;
775
776 DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
777
778 if (PFM_CTXQ_EMPTY(ctx)) return NULL;
779
780 /*
781 * get oldest message
782 */
783 msg = ctx->ctx_msgq+ctx->ctx_msgq_head;
784
785 /*
786 * and move forward
787 */
788 ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS;
789
790 DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type));
791
792 return msg;
793}
794
795static void
796pfm_reset_msgq(pfm_context_t *ctx)
797{
798 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
799 DPRINT(("ctx=%p msgq reset\n", ctx));
800}
801
802static void *
803pfm_rvmalloc(unsigned long size)
804{
805 void *mem;
806 unsigned long addr;
807
808 size = PAGE_ALIGN(size);
809 mem = vmalloc(size);
810 if (mem) {
811 //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem);
812 memset(mem, 0, size);
813 addr = (unsigned long)mem;
814 while (size > 0) {
815 pfm_reserve_page(addr);
816 addr+=PAGE_SIZE;
817 size-=PAGE_SIZE;
818 }
819 }
820 return mem;
821}
822
823static void
824pfm_rvfree(void *mem, unsigned long size)
825{
826 unsigned long addr;
827
828 if (mem) {
829 DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size));
830 addr = (unsigned long) mem;
831 while ((long) size > 0) {
832 pfm_unreserve_page(addr);
833 addr+=PAGE_SIZE;
834 size-=PAGE_SIZE;
835 }
836 vfree(mem);
837 }
838 return;
839}
840
841static pfm_context_t *
842pfm_context_alloc(void)
843{
844 pfm_context_t *ctx;
845
846 /*
847 * allocate context descriptor
848 * must be able to free with interrupts disabled
849 */
850 ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL);
851 if (ctx) {
852 memset(ctx, 0, sizeof(pfm_context_t));
853 DPRINT(("alloc ctx @%p\n", ctx));
854 }
855 return ctx;
856}
857
858static void
859pfm_context_free(pfm_context_t *ctx)
860{
861 if (ctx) {
862 DPRINT(("free ctx @%p\n", ctx));
863 kfree(ctx);
864 }
865}
866
867static void
868pfm_mask_monitoring(struct task_struct *task)
869{
870 pfm_context_t *ctx = PFM_GET_CTX(task);
871 struct thread_struct *th = &task->thread;
872 unsigned long mask, val, ovfl_mask;
873 int i;
874
875 DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid));
876
877 ovfl_mask = pmu_conf->ovfl_val;
878 /*
879 * monitoring can only be masked as a result of a valid
880 * counter overflow. In UP, it means that the PMU still
881 * has an owner. Note that the owner can be different
882 * from the current task. However the PMU state belongs
883 * to the owner.
884 * In SMP, a valid overflow only happens when task is
885 * current. Therefore if we come here, we know that
886 * the PMU state belongs to the current task, therefore
887 * we can access the live registers.
888 *
889 * So in both cases, the live register contains the owner's
890 * state. We can ONLY touch the PMU registers and NOT the PSR.
891 *
892 * As a consequence to this call, the thread->pmds[] array
893 * contains stale information which must be ignored
894 * when context is reloaded AND monitoring is active (see
895 * pfm_restart).
896 */
897 mask = ctx->ctx_used_pmds[0];
898 for (i = 0; mask; i++, mask>>=1) {
899 /* skip non used pmds */
900 if ((mask & 0x1) == 0) continue;
901 val = ia64_get_pmd(i);
902
903 if (PMD_IS_COUNTING(i)) {
904 /*
905 * we rebuild the full 64 bit value of the counter
906 */
907 ctx->ctx_pmds[i].val += (val & ovfl_mask);
908 } else {
909 ctx->ctx_pmds[i].val = val;
910 }
911 DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
912 i,
913 ctx->ctx_pmds[i].val,
914 val & ovfl_mask));
915 }
916 /*
917 * mask monitoring by setting the privilege level to 0
918 * we cannot use psr.pp/psr.up for this, it is controlled by
919 * the user
920 *
921 * if task is current, modify actual registers, otherwise modify
922 * thread save state, i.e., what will be restored in pfm_load_regs()
923 */
924 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
925 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
926 if ((mask & 0x1) == 0UL) continue;
927 ia64_set_pmc(i, th->pmcs[i] & ~0xfUL);
928 th->pmcs[i] &= ~0xfUL;
929 DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i]));
930 }
931 /*
932 * make all of this visible
933 */
934 ia64_srlz_d();
935}
936
937/*
938 * must always be done with task == current
939 *
940 * context must be in MASKED state when calling
941 */
942static void
943pfm_restore_monitoring(struct task_struct *task)
944{
945 pfm_context_t *ctx = PFM_GET_CTX(task);
946 struct thread_struct *th = &task->thread;
947 unsigned long mask, ovfl_mask;
948 unsigned long psr, val;
949 int i, is_system;
950
951 is_system = ctx->ctx_fl_system;
952 ovfl_mask = pmu_conf->ovfl_val;
953
954 if (task != current) {
955 printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid);
956 return;
957 }
958 if (ctx->ctx_state != PFM_CTX_MASKED) {
959 printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__,
960 task->pid, current->pid, ctx->ctx_state);
961 return;
962 }
963 psr = pfm_get_psr();
964 /*
965 * monitoring is masked via the PMC.
966 * As we restore their value, we do not want each counter to
967 * restart right away. We stop monitoring using the PSR,
968 * restore the PMC (and PMD) and then re-establish the psr
969 * as it was. Note that there can be no pending overflow at
970 * this point, because monitoring was MASKED.
971 *
972 * system-wide session are pinned and self-monitoring
973 */
974 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
975 /* disable dcr pp */
976 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
977 pfm_clear_psr_pp();
978 } else {
979 pfm_clear_psr_up();
980 }
981 /*
982 * first, we restore the PMD
983 */
984 mask = ctx->ctx_used_pmds[0];
985 for (i = 0; mask; i++, mask>>=1) {
986 /* skip non used pmds */
987 if ((mask & 0x1) == 0) continue;
988
989 if (PMD_IS_COUNTING(i)) {
990 /*
991 * we split the 64bit value according to
992 * counter width
993 */
994 val = ctx->ctx_pmds[i].val & ovfl_mask;
995 ctx->ctx_pmds[i].val &= ~ovfl_mask;
996 } else {
997 val = ctx->ctx_pmds[i].val;
998 }
999 ia64_set_pmd(i, val);
1000
1001 DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n",
1002 i,
1003 ctx->ctx_pmds[i].val,
1004 val));
1005 }
1006 /*
1007 * restore the PMCs
1008 */
1009 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER;
1010 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) {
1011 if ((mask & 0x1) == 0UL) continue;
1012 th->pmcs[i] = ctx->ctx_pmcs[i];
1013 ia64_set_pmc(i, th->pmcs[i]);
1014 DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i]));
1015 }
1016 ia64_srlz_d();
1017
1018 /*
1019 * must restore DBR/IBR because could be modified while masked
1020 * XXX: need to optimize
1021 */
1022 if (ctx->ctx_fl_using_dbreg) {
1023 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
1024 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
1025 }
1026
1027 /*
1028 * now restore PSR
1029 */
1030 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) {
1031 /* enable dcr pp */
1032 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
1033 ia64_srlz_i();
1034 }
1035 pfm_set_psr_l(psr);
1036}
1037
1038static inline void
1039pfm_save_pmds(unsigned long *pmds, unsigned long mask)
1040{
1041 int i;
1042
1043 ia64_srlz_d();
1044
1045 for (i=0; mask; i++, mask>>=1) {
1046 if (mask & 0x1) pmds[i] = ia64_get_pmd(i);
1047 }
1048}
1049
1050/*
1051 * reload from thread state (used for ctxw only)
1052 */
1053static inline void
1054pfm_restore_pmds(unsigned long *pmds, unsigned long mask)
1055{
1056 int i;
1057 unsigned long val, ovfl_val = pmu_conf->ovfl_val;
1058
1059 for (i=0; mask; i++, mask>>=1) {
1060 if ((mask & 0x1) == 0) continue;
1061 val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i];
1062 ia64_set_pmd(i, val);
1063 }
1064 ia64_srlz_d();
1065}
1066
1067/*
1068 * propagate PMD from context to thread-state
1069 */
1070static inline void
1071pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx)
1072{
1073 struct thread_struct *thread = &task->thread;
1074 unsigned long ovfl_val = pmu_conf->ovfl_val;
1075 unsigned long mask = ctx->ctx_all_pmds[0];
1076 unsigned long val;
1077 int i;
1078
1079 DPRINT(("mask=0x%lx\n", mask));
1080
1081 for (i=0; mask; i++, mask>>=1) {
1082
1083 val = ctx->ctx_pmds[i].val;
1084
1085 /*
1086 * We break up the 64 bit value into 2 pieces
1087 * the lower bits go to the machine state in the
1088 * thread (will be reloaded on ctxsw in).
1089 * The upper part stays in the soft-counter.
1090 */
1091 if (PMD_IS_COUNTING(i)) {
1092 ctx->ctx_pmds[i].val = val & ~ovfl_val;
1093 val &= ovfl_val;
1094 }
1095 thread->pmds[i] = val;
1096
1097 DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n",
1098 i,
1099 thread->pmds[i],
1100 ctx->ctx_pmds[i].val));
1101 }
1102}
1103
1104/*
1105 * propagate PMC from context to thread-state
1106 */
1107static inline void
1108pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx)
1109{
1110 struct thread_struct *thread = &task->thread;
1111 unsigned long mask = ctx->ctx_all_pmcs[0];
1112 int i;
1113
1114 DPRINT(("mask=0x%lx\n", mask));
1115
1116 for (i=0; mask; i++, mask>>=1) {
1117 /* masking 0 with ovfl_val yields 0 */
1118 thread->pmcs[i] = ctx->ctx_pmcs[i];
1119 DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i]));
1120 }
1121}
1122
1123
1124
1125static inline void
1126pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask)
1127{
1128 int i;
1129
1130 for (i=0; mask; i++, mask>>=1) {
1131 if ((mask & 0x1) == 0) continue;
1132 ia64_set_pmc(i, pmcs[i]);
1133 }
1134 ia64_srlz_d();
1135}
1136
1137static inline int
1138pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b)
1139{
1140 return memcmp(a, b, sizeof(pfm_uuid_t));
1141}
1142
1143static inline int
1144pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs)
1145{
1146 int ret = 0;
1147 if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs);
1148 return ret;
1149}
1150
1151static inline int
1152pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size)
1153{
1154 int ret = 0;
1155 if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size);
1156 return ret;
1157}
1158
1159
1160static inline int
1161pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags,
1162 int cpu, void *arg)
1163{
1164 int ret = 0;
1165 if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg);
1166 return ret;
1167}
1168
1169static inline int
1170pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags,
1171 int cpu, void *arg)
1172{
1173 int ret = 0;
1174 if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg);
1175 return ret;
1176}
1177
1178static inline int
1179pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
1180{
1181 int ret = 0;
1182 if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs);
1183 return ret;
1184}
1185
1186static inline int
1187pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
1188{
1189 int ret = 0;
1190 if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs);
1191 return ret;
1192}
1193
1194static pfm_buffer_fmt_t *
1195__pfm_find_buffer_fmt(pfm_uuid_t uuid)
1196{
1197 struct list_head * pos;
1198 pfm_buffer_fmt_t * entry;
1199
1200 list_for_each(pos, &pfm_buffer_fmt_list) {
1201 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
1202 if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0)
1203 return entry;
1204 }
1205 return NULL;
1206}
1207
1208/*
1209 * find a buffer format based on its uuid
1210 */
1211static pfm_buffer_fmt_t *
1212pfm_find_buffer_fmt(pfm_uuid_t uuid)
1213{
1214 pfm_buffer_fmt_t * fmt;
1215 spin_lock(&pfm_buffer_fmt_lock);
1216 fmt = __pfm_find_buffer_fmt(uuid);
1217 spin_unlock(&pfm_buffer_fmt_lock);
1218 return fmt;
1219}
1220
1221int
1222pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt)
1223{
1224 int ret = 0;
1225
1226 /* some sanity checks */
1227 if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL;
1228
1229 /* we need at least a handler */
1230 if (fmt->fmt_handler == NULL) return -EINVAL;
1231
1232 /*
1233 * XXX: need check validity of fmt_arg_size
1234 */
1235
1236 spin_lock(&pfm_buffer_fmt_lock);
1237
1238 if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) {
1239 printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name);
1240 ret = -EBUSY;
1241 goto out;
1242 }
1243 list_add(&fmt->fmt_list, &pfm_buffer_fmt_list);
1244 printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name);
1245
1246out:
1247 spin_unlock(&pfm_buffer_fmt_lock);
1248 return ret;
1249}
1250EXPORT_SYMBOL(pfm_register_buffer_fmt);
1251
1252int
1253pfm_unregister_buffer_fmt(pfm_uuid_t uuid)
1254{
1255 pfm_buffer_fmt_t *fmt;
1256 int ret = 0;
1257
1258 spin_lock(&pfm_buffer_fmt_lock);
1259
1260 fmt = __pfm_find_buffer_fmt(uuid);
1261 if (!fmt) {
1262 printk(KERN_ERR "perfmon: cannot unregister format, not found\n");
1263 ret = -EINVAL;
1264 goto out;
1265 }
1266 list_del_init(&fmt->fmt_list);
1267 printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name);
1268
1269out:
1270 spin_unlock(&pfm_buffer_fmt_lock);
1271 return ret;
1272
1273}
1274EXPORT_SYMBOL(pfm_unregister_buffer_fmt);
1275
1276static int
1277pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu)
1278{
1279 unsigned long flags;
1280 /*
1281 * validy checks on cpu_mask have been done upstream
1282 */
1283 LOCK_PFS(flags);
1284
1285 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
1286 pfm_sessions.pfs_sys_sessions,
1287 pfm_sessions.pfs_task_sessions,
1288 pfm_sessions.pfs_sys_use_dbregs,
1289 is_syswide,
1290 cpu));
1291
1292 if (is_syswide) {
1293 /*
1294 * cannot mix system wide and per-task sessions
1295 */
1296 if (pfm_sessions.pfs_task_sessions > 0UL) {
1297 DPRINT(("system wide not possible, %u conflicting task_sessions\n",
1298 pfm_sessions.pfs_task_sessions));
1299 goto abort;
1300 }
1301
1302 if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict;
1303
1304 DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id()));
1305
1306 pfm_sessions.pfs_sys_session[cpu] = task;
1307
1308 pfm_sessions.pfs_sys_sessions++ ;
1309
1310 } else {
1311 if (pfm_sessions.pfs_sys_sessions) goto abort;
1312 pfm_sessions.pfs_task_sessions++;
1313 }
1314
1315 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
1316 pfm_sessions.pfs_sys_sessions,
1317 pfm_sessions.pfs_task_sessions,
1318 pfm_sessions.pfs_sys_use_dbregs,
1319 is_syswide,
1320 cpu));
1321
1322 UNLOCK_PFS(flags);
1323
1324 return 0;
1325
1326error_conflict:
1327 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n",
1328 pfm_sessions.pfs_sys_session[cpu]->pid,
1329 smp_processor_id()));
1330abort:
1331 UNLOCK_PFS(flags);
1332
1333 return -EBUSY;
1334
1335}
1336
1337static int
1338pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu)
1339{
1340 unsigned long flags;
1341 /*
1342 * validy checks on cpu_mask have been done upstream
1343 */
1344 LOCK_PFS(flags);
1345
1346 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
1347 pfm_sessions.pfs_sys_sessions,
1348 pfm_sessions.pfs_task_sessions,
1349 pfm_sessions.pfs_sys_use_dbregs,
1350 is_syswide,
1351 cpu));
1352
1353
1354 if (is_syswide) {
1355 pfm_sessions.pfs_sys_session[cpu] = NULL;
1356 /*
1357 * would not work with perfmon+more than one bit in cpu_mask
1358 */
1359 if (ctx && ctx->ctx_fl_using_dbreg) {
1360 if (pfm_sessions.pfs_sys_use_dbregs == 0) {
1361 printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx);
1362 } else {
1363 pfm_sessions.pfs_sys_use_dbregs--;
1364 }
1365 }
1366 pfm_sessions.pfs_sys_sessions--;
1367 } else {
1368 pfm_sessions.pfs_task_sessions--;
1369 }
1370 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n",
1371 pfm_sessions.pfs_sys_sessions,
1372 pfm_sessions.pfs_task_sessions,
1373 pfm_sessions.pfs_sys_use_dbregs,
1374 is_syswide,
1375 cpu));
1376
1377 UNLOCK_PFS(flags);
1378
1379 return 0;
1380}
1381
1382/*
1383 * removes virtual mapping of the sampling buffer.
1384 * IMPORTANT: cannot be called with interrupts disable, e.g. inside
1385 * a PROTECT_CTX() section.
1386 */
1387static int
1388pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size)
1389{
1390 int r;
1391
1392 /* sanity checks */
1393 if (task->mm == NULL || size == 0UL || vaddr == NULL) {
1394 printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm);
1395 return -EINVAL;
1396 }
1397
1398 DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size));
1399
1400 /*
1401 * does the actual unmapping
1402 */
1403 down_write(&task->mm->mmap_sem);
1404
1405 DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size));
1406
1407 r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0);
1408
1409 up_write(&task->mm->mmap_sem);
1410 if (r !=0) {
1411 printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size);
1412 }
1413
1414 DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r));
1415
1416 return 0;
1417}
1418
1419/*
1420 * free actual physical storage used by sampling buffer
1421 */
1422#if 0
1423static int
1424pfm_free_smpl_buffer(pfm_context_t *ctx)
1425{
1426 pfm_buffer_fmt_t *fmt;
1427
1428 if (ctx->ctx_smpl_hdr == NULL) goto invalid_free;
1429
1430 /*
1431 * we won't use the buffer format anymore
1432 */
1433 fmt = ctx->ctx_buf_fmt;
1434
1435 DPRINT(("sampling buffer @%p size %lu vaddr=%p\n",
1436 ctx->ctx_smpl_hdr,
1437 ctx->ctx_smpl_size,
1438 ctx->ctx_smpl_vaddr));
1439
1440 pfm_buf_fmt_exit(fmt, current, NULL, NULL);
1441
1442 /*
1443 * free the buffer
1444 */
1445 pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size);
1446
1447 ctx->ctx_smpl_hdr = NULL;
1448 ctx->ctx_smpl_size = 0UL;
1449
1450 return 0;
1451
1452invalid_free:
1453 printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid);
1454 return -EINVAL;
1455}
1456#endif
1457
1458static inline void
1459pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt)
1460{
1461 if (fmt == NULL) return;
1462
1463 pfm_buf_fmt_exit(fmt, current, NULL, NULL);
1464
1465}
1466
1467/*
1468 * pfmfs should _never_ be mounted by userland - too much of security hassle,
1469 * no real gain from having the whole whorehouse mounted. So we don't need
1470 * any operations on the root directory. However, we need a non-trivial
1471 * d_name - pfm: will go nicely and kill the special-casing in procfs.
1472 */
1473static struct vfsmount *pfmfs_mnt;
1474
1475static int __init
1476init_pfm_fs(void)
1477{
1478 int err = register_filesystem(&pfm_fs_type);
1479 if (!err) {
1480 pfmfs_mnt = kern_mount(&pfm_fs_type);
1481 err = PTR_ERR(pfmfs_mnt);
1482 if (IS_ERR(pfmfs_mnt))
1483 unregister_filesystem(&pfm_fs_type);
1484 else
1485 err = 0;
1486 }
1487 return err;
1488}
1489
1490static void __exit
1491exit_pfm_fs(void)
1492{
1493 unregister_filesystem(&pfm_fs_type);
1494 mntput(pfmfs_mnt);
1495}
1496
1497static ssize_t
1498pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
1499{
1500 pfm_context_t *ctx;
1501 pfm_msg_t *msg;
1502 ssize_t ret;
1503 unsigned long flags;
1504 DECLARE_WAITQUEUE(wait, current);
1505 if (PFM_IS_FILE(filp) == 0) {
1506 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
1507 return -EINVAL;
1508 }
1509
1510 ctx = (pfm_context_t *)filp->private_data;
1511 if (ctx == NULL) {
1512 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid);
1513 return -EINVAL;
1514 }
1515
1516 /*
1517 * check even when there is no message
1518 */
1519 if (size < sizeof(pfm_msg_t)) {
1520 DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t)));
1521 return -EINVAL;
1522 }
1523
1524 PROTECT_CTX(ctx, flags);
1525
1526 /*
1527 * put ourselves on the wait queue
1528 */
1529 add_wait_queue(&ctx->ctx_msgq_wait, &wait);
1530
1531
1532 for(;;) {
1533 /*
1534 * check wait queue
1535 */
1536
1537 set_current_state(TASK_INTERRUPTIBLE);
1538
1539 DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail));
1540
1541 ret = 0;
1542 if(PFM_CTXQ_EMPTY(ctx) == 0) break;
1543
1544 UNPROTECT_CTX(ctx, flags);
1545
1546 /*
1547 * check non-blocking read
1548 */
1549 ret = -EAGAIN;
1550 if(filp->f_flags & O_NONBLOCK) break;
1551
1552 /*
1553 * check pending signals
1554 */
1555 if(signal_pending(current)) {
1556 ret = -EINTR;
1557 break;
1558 }
1559 /*
1560 * no message, so wait
1561 */
1562 schedule();
1563
1564 PROTECT_CTX(ctx, flags);
1565 }
1566 DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret));
1567 set_current_state(TASK_RUNNING);
1568 remove_wait_queue(&ctx->ctx_msgq_wait, &wait);
1569
1570 if (ret < 0) goto abort;
1571
1572 ret = -EINVAL;
1573 msg = pfm_get_next_msg(ctx);
1574 if (msg == NULL) {
1575 printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid);
1576 goto abort_locked;
1577 }
1578
1579 DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type));
1580
1581 ret = -EFAULT;
1582 if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t);
1583
1584abort_locked:
1585 UNPROTECT_CTX(ctx, flags);
1586abort:
1587 return ret;
1588}
1589
1590static ssize_t
1591pfm_write(struct file *file, const char __user *ubuf,
1592 size_t size, loff_t *ppos)
1593{
1594 DPRINT(("pfm_write called\n"));
1595 return -EINVAL;
1596}
1597
1598static unsigned int
1599pfm_poll(struct file *filp, poll_table * wait)
1600{
1601 pfm_context_t *ctx;
1602 unsigned long flags;
1603 unsigned int mask = 0;
1604
1605 if (PFM_IS_FILE(filp) == 0) {
1606 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid);
1607 return 0;
1608 }
1609
1610 ctx = (pfm_context_t *)filp->private_data;
1611 if (ctx == NULL) {
1612 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid);
1613 return 0;
1614 }
1615
1616
1617 DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd));
1618
1619 poll_wait(filp, &ctx->ctx_msgq_wait, wait);
1620
1621 PROTECT_CTX(ctx, flags);
1622
1623 if (PFM_CTXQ_EMPTY(ctx) == 0)
1624 mask = POLLIN | POLLRDNORM;
1625
1626 UNPROTECT_CTX(ctx, flags);
1627
1628 DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask));
1629
1630 return mask;
1631}
1632
1633static int
1634pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg)
1635{
1636 DPRINT(("pfm_ioctl called\n"));
1637 return -EINVAL;
1638}
1639
1640/*
1641 * interrupt cannot be masked when coming here
1642 */
1643static inline int
1644pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on)
1645{
1646 int ret;
1647
1648 ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue);
1649
1650 DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
1651 current->pid,
1652 fd,
1653 on,
1654 ctx->ctx_async_queue, ret));
1655
1656 return ret;
1657}
1658
1659static int
1660pfm_fasync(int fd, struct file *filp, int on)
1661{
1662 pfm_context_t *ctx;
1663 int ret;
1664
1665 if (PFM_IS_FILE(filp) == 0) {
1666 printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid);
1667 return -EBADF;
1668 }
1669
1670 ctx = (pfm_context_t *)filp->private_data;
1671 if (ctx == NULL) {
1672 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid);
1673 return -EBADF;
1674 }
1675 /*
1676 * we cannot mask interrupts during this call because this may
1677 * may go to sleep if memory is not readily avalaible.
1678 *
1679 * We are protected from the conetxt disappearing by the get_fd()/put_fd()
1680 * done in caller. Serialization of this function is ensured by caller.
1681 */
1682 ret = pfm_do_fasync(fd, filp, ctx, on);
1683
1684
1685 DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n",
1686 fd,
1687 on,
1688 ctx->ctx_async_queue, ret));
1689
1690 return ret;
1691}
1692
1693#ifdef CONFIG_SMP
1694/*
1695 * this function is exclusively called from pfm_close().
1696 * The context is not protected at that time, nor are interrupts
1697 * on the remote CPU. That's necessary to avoid deadlocks.
1698 */
1699static void
1700pfm_syswide_force_stop(void *info)
1701{
1702 pfm_context_t *ctx = (pfm_context_t *)info;
1703 struct pt_regs *regs = ia64_task_regs(current);
1704 struct task_struct *owner;
1705 unsigned long flags;
1706 int ret;
1707
1708 if (ctx->ctx_cpu != smp_processor_id()) {
1709 printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n",
1710 ctx->ctx_cpu,
1711 smp_processor_id());
1712 return;
1713 }
1714 owner = GET_PMU_OWNER();
1715 if (owner != ctx->ctx_task) {
1716 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n",
1717 smp_processor_id(),
1718 owner->pid, ctx->ctx_task->pid);
1719 return;
1720 }
1721 if (GET_PMU_CTX() != ctx) {
1722 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n",
1723 smp_processor_id(),
1724 GET_PMU_CTX(), ctx);
1725 return;
1726 }
1727
1728 DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid));
1729 /*
1730 * the context is already protected in pfm_close(), we simply
1731 * need to mask interrupts to avoid a PMU interrupt race on
1732 * this CPU
1733 */
1734 local_irq_save(flags);
1735
1736 ret = pfm_context_unload(ctx, NULL, 0, regs);
1737 if (ret) {
1738 DPRINT(("context_unload returned %d\n", ret));
1739 }
1740
1741 /*
1742 * unmask interrupts, PMU interrupts are now spurious here
1743 */
1744 local_irq_restore(flags);
1745}
1746
1747static void
1748pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx)
1749{
1750 int ret;
1751
1752 DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu));
1753 ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1);
1754 DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret));
1755}
1756#endif /* CONFIG_SMP */
1757
1758/*
1759 * called for each close(). Partially free resources.
1760 * When caller is self-monitoring, the context is unloaded.
1761 */
1762static int
1763pfm_flush(struct file *filp)
1764{
1765 pfm_context_t *ctx;
1766 struct task_struct *task;
1767 struct pt_regs *regs;
1768 unsigned long flags;
1769 unsigned long smpl_buf_size = 0UL;
1770 void *smpl_buf_vaddr = NULL;
1771 int state, is_system;
1772
1773 if (PFM_IS_FILE(filp) == 0) {
1774 DPRINT(("bad magic for\n"));
1775 return -EBADF;
1776 }
1777
1778 ctx = (pfm_context_t *)filp->private_data;
1779 if (ctx == NULL) {
1780 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid);
1781 return -EBADF;
1782 }
1783
1784 /*
1785 * remove our file from the async queue, if we use this mode.
1786 * This can be done without the context being protected. We come
1787 * here when the context has become unreacheable by other tasks.
1788 *
1789 * We may still have active monitoring at this point and we may
1790 * end up in pfm_overflow_handler(). However, fasync_helper()
1791 * operates with interrupts disabled and it cleans up the
1792 * queue. If the PMU handler is called prior to entering
1793 * fasync_helper() then it will send a signal. If it is
1794 * invoked after, it will find an empty queue and no
1795 * signal will be sent. In both case, we are safe
1796 */
1797 if (filp->f_flags & FASYNC) {
1798 DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue));
1799 pfm_do_fasync (-1, filp, ctx, 0);
1800 }
1801
1802 PROTECT_CTX(ctx, flags);
1803
1804 state = ctx->ctx_state;
1805 is_system = ctx->ctx_fl_system;
1806
1807 task = PFM_CTX_TASK(ctx);
1808 regs = ia64_task_regs(task);
1809
1810 DPRINT(("ctx_state=%d is_current=%d\n",
1811 state,
1812 task == current ? 1 : 0));
1813
1814 /*
1815 * if state == UNLOADED, then task is NULL
1816 */
1817
1818 /*
1819 * we must stop and unload because we are losing access to the context.
1820 */
1821 if (task == current) {
1822#ifdef CONFIG_SMP
1823 /*
1824 * the task IS the owner but it migrated to another CPU: that's bad
1825 * but we must handle this cleanly. Unfortunately, the kernel does
1826 * not provide a mechanism to block migration (while the context is loaded).
1827 *
1828 * We need to release the resource on the ORIGINAL cpu.
1829 */
1830 if (is_system && ctx->ctx_cpu != smp_processor_id()) {
1831
1832 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
1833 /*
1834 * keep context protected but unmask interrupt for IPI
1835 */
1836 local_irq_restore(flags);
1837
1838 pfm_syswide_cleanup_other_cpu(ctx);
1839
1840 /*
1841 * restore interrupt masking
1842 */
1843 local_irq_save(flags);
1844
1845 /*
1846 * context is unloaded at this point
1847 */
1848 } else
1849#endif /* CONFIG_SMP */
1850 {
1851
1852 DPRINT(("forcing unload\n"));
1853 /*
1854 * stop and unload, returning with state UNLOADED
1855 * and session unreserved.
1856 */
1857 pfm_context_unload(ctx, NULL, 0, regs);
1858
1859 DPRINT(("ctx_state=%d\n", ctx->ctx_state));
1860 }
1861 }
1862
1863 /*
1864 * remove virtual mapping, if any, for the calling task.
1865 * cannot reset ctx field until last user is calling close().
1866 *
1867 * ctx_smpl_vaddr must never be cleared because it is needed
1868 * by every task with access to the context
1869 *
1870 * When called from do_exit(), the mm context is gone already, therefore
1871 * mm is NULL, i.e., the VMA is already gone and we do not have to
1872 * do anything here
1873 */
1874 if (ctx->ctx_smpl_vaddr && current->mm) {
1875 smpl_buf_vaddr = ctx->ctx_smpl_vaddr;
1876 smpl_buf_size = ctx->ctx_smpl_size;
1877 }
1878
1879 UNPROTECT_CTX(ctx, flags);
1880
1881 /*
1882 * if there was a mapping, then we systematically remove it
1883 * at this point. Cannot be done inside critical section
1884 * because some VM function reenables interrupts.
1885 *
1886 */
1887 if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size);
1888
1889 return 0;
1890}
1891/*
1892 * called either on explicit close() or from exit_files().
1893 * Only the LAST user of the file gets to this point, i.e., it is
1894 * called only ONCE.
1895 *
1896 * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero
1897 * (fput()),i.e, last task to access the file. Nobody else can access the
1898 * file at this point.
1899 *
1900 * When called from exit_files(), the VMA has been freed because exit_mm()
1901 * is executed before exit_files().
1902 *
1903 * When called from exit_files(), the current task is not yet ZOMBIE but we
1904 * flush the PMU state to the context.
1905 */
1906static int
1907pfm_close(struct inode *inode, struct file *filp)
1908{
1909 pfm_context_t *ctx;
1910 struct task_struct *task;
1911 struct pt_regs *regs;
1912 DECLARE_WAITQUEUE(wait, current);
1913 unsigned long flags;
1914 unsigned long smpl_buf_size = 0UL;
1915 void *smpl_buf_addr = NULL;
1916 int free_possible = 1;
1917 int state, is_system;
1918
1919 DPRINT(("pfm_close called private=%p\n", filp->private_data));
1920
1921 if (PFM_IS_FILE(filp) == 0) {
1922 DPRINT(("bad magic\n"));
1923 return -EBADF;
1924 }
1925
1926 ctx = (pfm_context_t *)filp->private_data;
1927 if (ctx == NULL) {
1928 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid);
1929 return -EBADF;
1930 }
1931
1932 PROTECT_CTX(ctx, flags);
1933
1934 state = ctx->ctx_state;
1935 is_system = ctx->ctx_fl_system;
1936
1937 task = PFM_CTX_TASK(ctx);
1938 regs = ia64_task_regs(task);
1939
1940 DPRINT(("ctx_state=%d is_current=%d\n",
1941 state,
1942 task == current ? 1 : 0));
1943
1944 /*
1945 * if task == current, then pfm_flush() unloaded the context
1946 */
1947 if (state == PFM_CTX_UNLOADED) goto doit;
1948
1949 /*
1950 * context is loaded/masked and task != current, we need to
1951 * either force an unload or go zombie
1952 */
1953
1954 /*
1955 * The task is currently blocked or will block after an overflow.
1956 * we must force it to wakeup to get out of the
1957 * MASKED state and transition to the unloaded state by itself.
1958 *
1959 * This situation is only possible for per-task mode
1960 */
1961 if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) {
1962
1963 /*
1964 * set a "partial" zombie state to be checked
1965 * upon return from down() in pfm_handle_work().
1966 *
1967 * We cannot use the ZOMBIE state, because it is checked
1968 * by pfm_load_regs() which is called upon wakeup from down().
1969 * In such case, it would free the context and then we would
1970 * return to pfm_handle_work() which would access the
1971 * stale context. Instead, we set a flag invisible to pfm_load_regs()
1972 * but visible to pfm_handle_work().
1973 *
1974 * For some window of time, we have a zombie context with
1975 * ctx_state = MASKED and not ZOMBIE
1976 */
1977 ctx->ctx_fl_going_zombie = 1;
1978
1979 /*
1980 * force task to wake up from MASKED state
1981 */
1982 up(&ctx->ctx_restart_sem);
1983
1984 DPRINT(("waking up ctx_state=%d\n", state));
1985
1986 /*
1987 * put ourself to sleep waiting for the other
1988 * task to report completion
1989 *
1990 * the context is protected by mutex, therefore there
1991 * is no risk of being notified of completion before
1992 * begin actually on the waitq.
1993 */
1994 set_current_state(TASK_INTERRUPTIBLE);
1995 add_wait_queue(&ctx->ctx_zombieq, &wait);
1996
1997 UNPROTECT_CTX(ctx, flags);
1998
1999 /*
2000 * XXX: check for signals :
2001 * - ok for explicit close
2002 * - not ok when coming from exit_files()
2003 */
2004 schedule();
2005
2006
2007 PROTECT_CTX(ctx, flags);
2008
2009
2010 remove_wait_queue(&ctx->ctx_zombieq, &wait);
2011 set_current_state(TASK_RUNNING);
2012
2013 /*
2014 * context is unloaded at this point
2015 */
2016 DPRINT(("after zombie wakeup ctx_state=%d for\n", state));
2017 }
2018 else if (task != current) {
2019#ifdef CONFIG_SMP
2020 /*
2021 * switch context to zombie state
2022 */
2023 ctx->ctx_state = PFM_CTX_ZOMBIE;
2024
2025 DPRINT(("zombie ctx for [%d]\n", task->pid));
2026 /*
2027 * cannot free the context on the spot. deferred until
2028 * the task notices the ZOMBIE state
2029 */
2030 free_possible = 0;
2031#else
2032 pfm_context_unload(ctx, NULL, 0, regs);
2033#endif
2034 }
2035
2036doit:
2037 /* reload state, may have changed during opening of critical section */
2038 state = ctx->ctx_state;
2039
2040 /*
2041 * the context is still attached to a task (possibly current)
2042 * we cannot destroy it right now
2043 */
2044
2045 /*
2046 * we must free the sampling buffer right here because
2047 * we cannot rely on it being cleaned up later by the
2048 * monitored task. It is not possible to free vmalloc'ed
2049 * memory in pfm_load_regs(). Instead, we remove the buffer
2050 * now. should there be subsequent PMU overflow originally
2051 * meant for sampling, the will be converted to spurious
2052 * and that's fine because the monitoring tools is gone anyway.
2053 */
2054 if (ctx->ctx_smpl_hdr) {
2055 smpl_buf_addr = ctx->ctx_smpl_hdr;
2056 smpl_buf_size = ctx->ctx_smpl_size;
2057 /* no more sampling */
2058 ctx->ctx_smpl_hdr = NULL;
2059 ctx->ctx_fl_is_sampling = 0;
2060 }
2061
2062 DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n",
2063 state,
2064 free_possible,
2065 smpl_buf_addr,
2066 smpl_buf_size));
2067
2068 if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt);
2069
2070 /*
2071 * UNLOADED that the session has already been unreserved.
2072 */
2073 if (state == PFM_CTX_ZOMBIE) {
2074 pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu);
2075 }
2076
2077 /*
2078 * disconnect file descriptor from context must be done
2079 * before we unlock.
2080 */
2081 filp->private_data = NULL;
2082
2083 /*
2084 * if we free on the spot, the context is now completely unreacheable
2085 * from the callers side. The monitored task side is also cut, so we
2086 * can freely cut.
2087 *
2088 * If we have a deferred free, only the caller side is disconnected.
2089 */
2090 UNPROTECT_CTX(ctx, flags);
2091
2092 /*
2093 * All memory free operations (especially for vmalloc'ed memory)
2094 * MUST be done with interrupts ENABLED.
2095 */
2096 if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size);
2097
2098 /*
2099 * return the memory used by the context
2100 */
2101 if (free_possible) pfm_context_free(ctx);
2102
2103 return 0;
2104}
2105
2106static int
2107pfm_no_open(struct inode *irrelevant, struct file *dontcare)
2108{
2109 DPRINT(("pfm_no_open called\n"));
2110 return -ENXIO;
2111}
2112
2113
2114
2115static struct file_operations pfm_file_ops = {
2116 .llseek = no_llseek,
2117 .read = pfm_read,
2118 .write = pfm_write,
2119 .poll = pfm_poll,
2120 .ioctl = pfm_ioctl,
2121 .open = pfm_no_open, /* special open code to disallow open via /proc */
2122 .fasync = pfm_fasync,
2123 .release = pfm_close,
2124 .flush = pfm_flush
2125};
2126
2127static int
2128pfmfs_delete_dentry(struct dentry *dentry)
2129{
2130 return 1;
2131}
2132
2133static struct dentry_operations pfmfs_dentry_operations = {
2134 .d_delete = pfmfs_delete_dentry,
2135};
2136
2137
2138static int
2139pfm_alloc_fd(struct file **cfile)
2140{
2141 int fd, ret = 0;
2142 struct file *file = NULL;
2143 struct inode * inode;
2144 char name[32];
2145 struct qstr this;
2146
2147 fd = get_unused_fd();
2148 if (fd < 0) return -ENFILE;
2149
2150 ret = -ENFILE;
2151
2152 file = get_empty_filp();
2153 if (!file) goto out;
2154
2155 /*
2156 * allocate a new inode
2157 */
2158 inode = new_inode(pfmfs_mnt->mnt_sb);
2159 if (!inode) goto out;
2160
2161 DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode));
2162
2163 inode->i_mode = S_IFCHR|S_IRUGO;
2164 inode->i_uid = current->fsuid;
2165 inode->i_gid = current->fsgid;
2166
2167 sprintf(name, "[%lu]", inode->i_ino);
2168 this.name = name;
2169 this.len = strlen(name);
2170 this.hash = inode->i_ino;
2171
2172 ret = -ENOMEM;
2173
2174 /*
2175 * allocate a new dcache entry
2176 */
2177 file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this);
2178 if (!file->f_dentry) goto out;
2179
2180 file->f_dentry->d_op = &pfmfs_dentry_operations;
2181
2182 d_add(file->f_dentry, inode);
2183 file->f_vfsmnt = mntget(pfmfs_mnt);
2184 file->f_mapping = inode->i_mapping;
2185
2186 file->f_op = &pfm_file_ops;
2187 file->f_mode = FMODE_READ;
2188 file->f_flags = O_RDONLY;
2189 file->f_pos = 0;
2190
2191 /*
2192 * may have to delay until context is attached?
2193 */
2194 fd_install(fd, file);
2195
2196 /*
2197 * the file structure we will use
2198 */
2199 *cfile = file;
2200
2201 return fd;
2202out:
2203 if (file) put_filp(file);
2204 put_unused_fd(fd);
2205 return ret;
2206}
2207
2208static void
2209pfm_free_fd(int fd, struct file *file)
2210{
2211 struct files_struct *files = current->files;
2212
2213 /*
2214 * there ie no fd_uninstall(), so we do it here
2215 */
2216 spin_lock(&files->file_lock);
2217 files->fd[fd] = NULL;
2218 spin_unlock(&files->file_lock);
2219
2220 if (file) put_filp(file);
2221 put_unused_fd(fd);
2222}
2223
2224static int
2225pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size)
2226{
2227 DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size));
2228
2229 while (size > 0) {
2230 unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT;
2231
2232
2233 if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY))
2234 return -ENOMEM;
2235
2236 addr += PAGE_SIZE;
2237 buf += PAGE_SIZE;
2238 size -= PAGE_SIZE;
2239 }
2240 return 0;
2241}
2242
2243/*
2244 * allocate a sampling buffer and remaps it into the user address space of the task
2245 */
2246static int
2247pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr)
2248{
2249 struct mm_struct *mm = task->mm;
2250 struct vm_area_struct *vma = NULL;
2251 unsigned long size;
2252 void *smpl_buf;
2253
2254
2255 /*
2256 * the fixed header + requested size and align to page boundary
2257 */
2258 size = PAGE_ALIGN(rsize);
2259
2260 DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size));
2261
2262 /*
2263 * check requested size to avoid Denial-of-service attacks
2264 * XXX: may have to refine this test
2265 * Check against address space limit.
2266 *
2267 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur)
2268 * return -ENOMEM;
2269 */
2270 if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur)
2271 return -ENOMEM;
2272
2273 /*
2274 * We do the easy to undo allocations first.
2275 *
2276 * pfm_rvmalloc(), clears the buffer, so there is no leak
2277 */
2278 smpl_buf = pfm_rvmalloc(size);
2279 if (smpl_buf == NULL) {
2280 DPRINT(("Can't allocate sampling buffer\n"));
2281 return -ENOMEM;
2282 }
2283
2284 DPRINT(("smpl_buf @%p\n", smpl_buf));
2285
2286 /* allocate vma */
2287 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
2288 if (!vma) {
2289 DPRINT(("Cannot allocate vma\n"));
2290 goto error_kmem;
2291 }
2292 memset(vma, 0, sizeof(*vma));
2293
2294 /*
2295 * partially initialize the vma for the sampling buffer
2296 */
2297 vma->vm_mm = mm;
2298 vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED;
2299 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */
2300
2301 /*
2302 * Now we have everything we need and we can initialize
2303 * and connect all the data structures
2304 */
2305
2306 ctx->ctx_smpl_hdr = smpl_buf;
2307 ctx->ctx_smpl_size = size; /* aligned size */
2308
2309 /*
2310 * Let's do the difficult operations next.
2311 *
2312 * now we atomically find some area in the address space and
2313 * remap the buffer in it.
2314 */
2315 down_write(&task->mm->mmap_sem);
2316
2317 /* find some free area in address space, must have mmap sem held */
2318 vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0);
2319 if (vma->vm_start == 0UL) {
2320 DPRINT(("Cannot find unmapped area for size %ld\n", size));
2321 up_write(&task->mm->mmap_sem);
2322 goto error;
2323 }
2324 vma->vm_end = vma->vm_start + size;
2325 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT;
2326
2327 DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start));
2328
2329 /* can only be applied to current task, need to have the mm semaphore held when called */
2330 if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) {
2331 DPRINT(("Can't remap buffer\n"));
2332 up_write(&task->mm->mmap_sem);
2333 goto error;
2334 }
2335
2336 /*
2337 * now insert the vma in the vm list for the process, must be
2338 * done with mmap lock held
2339 */
2340 insert_vm_struct(mm, vma);
2341
2342 mm->total_vm += size >> PAGE_SHIFT;
2343 vm_stat_account(vma);
2344 up_write(&task->mm->mmap_sem);
2345
2346 /*
2347 * keep track of user level virtual address
2348 */
2349 ctx->ctx_smpl_vaddr = (void *)vma->vm_start;
2350 *(unsigned long *)user_vaddr = vma->vm_start;
2351
2352 return 0;
2353
2354error:
2355 kmem_cache_free(vm_area_cachep, vma);
2356error_kmem:
2357 pfm_rvfree(smpl_buf, size);
2358
2359 return -ENOMEM;
2360}
2361
2362/*
2363 * XXX: do something better here
2364 */
2365static int
2366pfm_bad_permissions(struct task_struct *task)
2367{
2368 /* inspired by ptrace_attach() */
2369 DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n",
2370 current->uid,
2371 current->gid,
2372 task->euid,
2373 task->suid,
2374 task->uid,
2375 task->egid,
2376 task->sgid));
2377
2378 return ((current->uid != task->euid)
2379 || (current->uid != task->suid)
2380 || (current->uid != task->uid)
2381 || (current->gid != task->egid)
2382 || (current->gid != task->sgid)
2383 || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE);
2384}
2385
2386static int
2387pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx)
2388{
2389 int ctx_flags;
2390
2391 /* valid signal */
2392
2393 ctx_flags = pfx->ctx_flags;
2394
2395 if (ctx_flags & PFM_FL_SYSTEM_WIDE) {
2396
2397 /*
2398 * cannot block in this mode
2399 */
2400 if (ctx_flags & PFM_FL_NOTIFY_BLOCK) {
2401 DPRINT(("cannot use blocking mode when in system wide monitoring\n"));
2402 return -EINVAL;
2403 }
2404 } else {
2405 }
2406 /* probably more to add here */
2407
2408 return 0;
2409}
2410
2411static int
2412pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags,
2413 unsigned int cpu, pfarg_context_t *arg)
2414{
2415 pfm_buffer_fmt_t *fmt = NULL;
2416 unsigned long size = 0UL;
2417 void *uaddr = NULL;
2418 void *fmt_arg = NULL;
2419 int ret = 0;
2420#define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1)
2421
2422 /* invoke and lock buffer format, if found */
2423 fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id);
2424 if (fmt == NULL) {
2425 DPRINT(("[%d] cannot find buffer format\n", task->pid));
2426 return -EINVAL;
2427 }
2428
2429 /*
2430 * buffer argument MUST be contiguous to pfarg_context_t
2431 */
2432 if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg);
2433
2434 ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg);
2435
2436 DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret));
2437
2438 if (ret) goto error;
2439
2440 /* link buffer format and context */
2441 ctx->ctx_buf_fmt = fmt;
2442
2443 /*
2444 * check if buffer format wants to use perfmon buffer allocation/mapping service
2445 */
2446 ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size);
2447 if (ret) goto error;
2448
2449 if (size) {
2450 /*
2451 * buffer is always remapped into the caller's address space
2452 */
2453 ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr);
2454 if (ret) goto error;
2455
2456 /* keep track of user address of buffer */
2457 arg->ctx_smpl_vaddr = uaddr;
2458 }
2459 ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg);
2460
2461error:
2462 return ret;
2463}
2464
2465static void
2466pfm_reset_pmu_state(pfm_context_t *ctx)
2467{
2468 int i;
2469
2470 /*
2471 * install reset values for PMC.
2472 */
2473 for (i=1; PMC_IS_LAST(i) == 0; i++) {
2474 if (PMC_IS_IMPL(i) == 0) continue;
2475 ctx->ctx_pmcs[i] = PMC_DFL_VAL(i);
2476 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i]));
2477 }
2478 /*
2479 * PMD registers are set to 0UL when the context in memset()
2480 */
2481
2482 /*
2483 * On context switched restore, we must restore ALL pmc and ALL pmd even
2484 * when they are not actively used by the task. In UP, the incoming process
2485 * may otherwise pick up left over PMC, PMD state from the previous process.
2486 * As opposed to PMD, stale PMC can cause harm to the incoming
2487 * process because they may change what is being measured.
2488 * Therefore, we must systematically reinstall the entire
2489 * PMC state. In SMP, the same thing is possible on the
2490 * same CPU but also on between 2 CPUs.
2491 *
2492 * The problem with PMD is information leaking especially
2493 * to user level when psr.sp=0
2494 *
2495 * There is unfortunately no easy way to avoid this problem
2496 * on either UP or SMP. This definitively slows down the
2497 * pfm_load_regs() function.
2498 */
2499
2500 /*
2501 * bitmask of all PMCs accessible to this context
2502 *
2503 * PMC0 is treated differently.
2504 */
2505 ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1;
2506
2507 /*
2508 * bitmask of all PMDs that are accesible to this context
2509 */
2510 ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0];
2511
2512 DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0]));
2513
2514 /*
2515 * useful in case of re-enable after disable
2516 */
2517 ctx->ctx_used_ibrs[0] = 0UL;
2518 ctx->ctx_used_dbrs[0] = 0UL;
2519}
2520
2521static int
2522pfm_ctx_getsize(void *arg, size_t *sz)
2523{
2524 pfarg_context_t *req = (pfarg_context_t *)arg;
2525 pfm_buffer_fmt_t *fmt;
2526
2527 *sz = 0;
2528
2529 if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0;
2530
2531 fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id);
2532 if (fmt == NULL) {
2533 DPRINT(("cannot find buffer format\n"));
2534 return -EINVAL;
2535 }
2536 /* get just enough to copy in user parameters */
2537 *sz = fmt->fmt_arg_size;
2538 DPRINT(("arg_size=%lu\n", *sz));
2539
2540 return 0;
2541}
2542
2543
2544
2545/*
2546 * cannot attach if :
2547 * - kernel task
2548 * - task not owned by caller
2549 * - task incompatible with context mode
2550 */
2551static int
2552pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task)
2553{
2554 /*
2555 * no kernel task or task not owner by caller
2556 */
2557 if (task->mm == NULL) {
2558 DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid));
2559 return -EPERM;
2560 }
2561 if (pfm_bad_permissions(task)) {
2562 DPRINT(("no permission to attach to [%d]\n", task->pid));
2563 return -EPERM;
2564 }
2565 /*
2566 * cannot block in self-monitoring mode
2567 */
2568 if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) {
2569 DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid));
2570 return -EINVAL;
2571 }
2572
2573 if (task->exit_state == EXIT_ZOMBIE) {
2574 DPRINT(("cannot attach to zombie task [%d]\n", task->pid));
2575 return -EBUSY;
2576 }
2577
2578 /*
2579 * always ok for self
2580 */
2581 if (task == current) return 0;
2582
2583 if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
2584 DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state));
2585 return -EBUSY;
2586 }
2587 /*
2588 * make sure the task is off any CPU
2589 */
2590 wait_task_inactive(task);
2591
2592 /* more to come... */
2593
2594 return 0;
2595}
2596
2597static int
2598pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task)
2599{
2600 struct task_struct *p = current;
2601 int ret;
2602
2603 /* XXX: need to add more checks here */
2604 if (pid < 2) return -EPERM;
2605
2606 if (pid != current->pid) {
2607
2608 read_lock(&tasklist_lock);
2609
2610 p = find_task_by_pid(pid);
2611
2612 /* make sure task cannot go away while we operate on it */
2613 if (p) get_task_struct(p);
2614
2615 read_unlock(&tasklist_lock);
2616
2617 if (p == NULL) return -ESRCH;
2618 }
2619
2620 ret = pfm_task_incompatible(ctx, p);
2621 if (ret == 0) {
2622 *task = p;
2623 } else if (p != current) {
2624 pfm_put_task(p);
2625 }
2626 return ret;
2627}
2628
2629
2630
2631static int
2632pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
2633{
2634 pfarg_context_t *req = (pfarg_context_t *)arg;
2635 struct file *filp;
2636 int ctx_flags;
2637 int ret;
2638
2639 /* let's check the arguments first */
2640 ret = pfarg_is_sane(current, req);
2641 if (ret < 0) return ret;
2642
2643 ctx_flags = req->ctx_flags;
2644
2645 ret = -ENOMEM;
2646
2647 ctx = pfm_context_alloc();
2648 if (!ctx) goto error;
2649
2650 ret = pfm_alloc_fd(&filp);
2651 if (ret < 0) goto error_file;
2652
2653 req->ctx_fd = ctx->ctx_fd = ret;
2654
2655 /*
2656 * attach context to file
2657 */
2658 filp->private_data = ctx;
2659
2660 /*
2661 * does the user want to sample?
2662 */
2663 if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) {
2664 ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req);
2665 if (ret) goto buffer_error;
2666 }
2667
2668 /*
2669 * init context protection lock
2670 */
2671 spin_lock_init(&ctx->ctx_lock);
2672
2673 /*
2674 * context is unloaded
2675 */
2676 ctx->ctx_state = PFM_CTX_UNLOADED;
2677
2678 /*
2679 * initialization of context's flags
2680 */
2681 ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0;
2682 ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0;
2683 ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */
2684 ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0;
2685 /*
2686 * will move to set properties
2687 * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0;
2688 */
2689
2690 /*
2691 * init restart semaphore to locked
2692 */
2693 sema_init(&ctx->ctx_restart_sem, 0);
2694
2695 /*
2696 * activation is used in SMP only
2697 */
2698 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
2699 SET_LAST_CPU(ctx, -1);
2700
2701 /*
2702 * initialize notification message queue
2703 */
2704 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0;
2705 init_waitqueue_head(&ctx->ctx_msgq_wait);
2706 init_waitqueue_head(&ctx->ctx_zombieq);
2707
2708 DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n",
2709 ctx,
2710 ctx_flags,
2711 ctx->ctx_fl_system,
2712 ctx->ctx_fl_block,
2713 ctx->ctx_fl_excl_idle,
2714 ctx->ctx_fl_no_msg,
2715 ctx->ctx_fd));
2716
2717 /*
2718 * initialize soft PMU state
2719 */
2720 pfm_reset_pmu_state(ctx);
2721
2722 return 0;
2723
2724buffer_error:
2725 pfm_free_fd(ctx->ctx_fd, filp);
2726
2727 if (ctx->ctx_buf_fmt) {
2728 pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs);
2729 }
2730error_file:
2731 pfm_context_free(ctx);
2732
2733error:
2734 return ret;
2735}
2736
2737static inline unsigned long
2738pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset)
2739{
2740 unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset;
2741 unsigned long new_seed, old_seed = reg->seed, mask = reg->mask;
2742 extern unsigned long carta_random32 (unsigned long seed);
2743
2744 if (reg->flags & PFM_REGFL_RANDOM) {
2745 new_seed = carta_random32(old_seed);
2746 val -= (old_seed & mask); /* counter values are negative numbers! */
2747 if ((mask >> 32) != 0)
2748 /* construct a full 64-bit random value: */
2749 new_seed |= carta_random32(old_seed >> 32) << 32;
2750 reg->seed = new_seed;
2751 }
2752 reg->lval = val;
2753 return val;
2754}
2755
2756static void
2757pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
2758{
2759 unsigned long mask = ovfl_regs[0];
2760 unsigned long reset_others = 0UL;
2761 unsigned long val;
2762 int i;
2763
2764 /*
2765 * now restore reset value on sampling overflowed counters
2766 */
2767 mask >>= PMU_FIRST_COUNTER;
2768 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
2769
2770 if ((mask & 0x1UL) == 0UL) continue;
2771
2772 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
2773 reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
2774
2775 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
2776 }
2777
2778 /*
2779 * Now take care of resetting the other registers
2780 */
2781 for(i = 0; reset_others; i++, reset_others >>= 1) {
2782
2783 if ((reset_others & 0x1) == 0) continue;
2784
2785 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
2786
2787 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
2788 is_long_reset ? "long" : "short", i, val));
2789 }
2790}
2791
2792static void
2793pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset)
2794{
2795 unsigned long mask = ovfl_regs[0];
2796 unsigned long reset_others = 0UL;
2797 unsigned long val;
2798 int i;
2799
2800 DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset));
2801
2802 if (ctx->ctx_state == PFM_CTX_MASKED) {
2803 pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset);
2804 return;
2805 }
2806
2807 /*
2808 * now restore reset value on sampling overflowed counters
2809 */
2810 mask >>= PMU_FIRST_COUNTER;
2811 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) {
2812
2813 if ((mask & 0x1UL) == 0UL) continue;
2814
2815 val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset);
2816 reset_others |= ctx->ctx_pmds[i].reset_pmds[0];
2817
2818 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val));
2819
2820 pfm_write_soft_counter(ctx, i, val);
2821 }
2822
2823 /*
2824 * Now take care of resetting the other registers
2825 */
2826 for(i = 0; reset_others; i++, reset_others >>= 1) {
2827
2828 if ((reset_others & 0x1) == 0) continue;
2829
2830 val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset);
2831
2832 if (PMD_IS_COUNTING(i)) {
2833 pfm_write_soft_counter(ctx, i, val);
2834 } else {
2835 ia64_set_pmd(i, val);
2836 }
2837 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n",
2838 is_long_reset ? "long" : "short", i, val));
2839 }
2840 ia64_srlz_d();
2841}
2842
2843static int
2844pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
2845{
2846 struct thread_struct *thread = NULL;
2847 struct task_struct *task;
2848 pfarg_reg_t *req = (pfarg_reg_t *)arg;
2849 unsigned long value, pmc_pm;
2850 unsigned long smpl_pmds, reset_pmds, impl_pmds;
2851 unsigned int cnum, reg_flags, flags, pmc_type;
2852 int i, can_access_pmu = 0, is_loaded, is_system, expert_mode;
2853 int is_monitor, is_counting, state;
2854 int ret = -EINVAL;
2855 pfm_reg_check_t wr_func;
2856#define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z))
2857
2858 state = ctx->ctx_state;
2859 is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
2860 is_system = ctx->ctx_fl_system;
2861 task = ctx->ctx_task;
2862 impl_pmds = pmu_conf->impl_pmds[0];
2863
2864 if (state == PFM_CTX_ZOMBIE) return -EINVAL;
2865
2866 if (is_loaded) {
2867 thread = &task->thread;
2868 /*
2869 * In system wide and when the context is loaded, access can only happen
2870 * when the caller is running on the CPU being monitored by the session.
2871 * It does not have to be the owner (ctx_task) of the context per se.
2872 */
2873 if (is_system && ctx->ctx_cpu != smp_processor_id()) {
2874 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
2875 return -EBUSY;
2876 }
2877 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
2878 }
2879 expert_mode = pfm_sysctl.expert_mode;
2880
2881 for (i = 0; i < count; i++, req++) {
2882
2883 cnum = req->reg_num;
2884 reg_flags = req->reg_flags;
2885 value = req->reg_value;
2886 smpl_pmds = req->reg_smpl_pmds[0];
2887 reset_pmds = req->reg_reset_pmds[0];
2888 flags = 0;
2889
2890
2891 if (cnum >= PMU_MAX_PMCS) {
2892 DPRINT(("pmc%u is invalid\n", cnum));
2893 goto error;
2894 }
2895
2896 pmc_type = pmu_conf->pmc_desc[cnum].type;
2897 pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1;
2898 is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0;
2899 is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0;
2900
2901 /*
2902 * we reject all non implemented PMC as well
2903 * as attempts to modify PMC[0-3] which are used
2904 * as status registers by the PMU
2905 */
2906 if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) {
2907 DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type));
2908 goto error;
2909 }
2910 wr_func = pmu_conf->pmc_desc[cnum].write_check;
2911 /*
2912 * If the PMC is a monitor, then if the value is not the default:
2913 * - system-wide session: PMCx.pm=1 (privileged monitor)
2914 * - per-task : PMCx.pm=0 (user monitor)
2915 */
2916 if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) {
2917 DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n",
2918 cnum,
2919 pmc_pm,
2920 is_system));
2921 goto error;
2922 }
2923
2924 if (is_counting) {
2925 /*
2926 * enforce generation of overflow interrupt. Necessary on all
2927 * CPUs.
2928 */
2929 value |= 1 << PMU_PMC_OI;
2930
2931 if (reg_flags & PFM_REGFL_OVFL_NOTIFY) {
2932 flags |= PFM_REGFL_OVFL_NOTIFY;
2933 }
2934
2935 if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM;
2936
2937 /* verify validity of smpl_pmds */
2938 if ((smpl_pmds & impl_pmds) != smpl_pmds) {
2939 DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum));
2940 goto error;
2941 }
2942
2943 /* verify validity of reset_pmds */
2944 if ((reset_pmds & impl_pmds) != reset_pmds) {
2945 DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum));
2946 goto error;
2947 }
2948 } else {
2949 if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) {
2950 DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum));
2951 goto error;
2952 }
2953 /* eventid on non-counting monitors are ignored */
2954 }
2955
2956 /*
2957 * execute write checker, if any
2958 */
2959 if (likely(expert_mode == 0 && wr_func)) {
2960 ret = (*wr_func)(task, ctx, cnum, &value, regs);
2961 if (ret) goto error;
2962 ret = -EINVAL;
2963 }
2964
2965 /*
2966 * no error on this register
2967 */
2968 PFM_REG_RETFLAG_SET(req->reg_flags, 0);
2969
2970 /*
2971 * Now we commit the changes to the software state
2972 */
2973
2974 /*
2975 * update overflow information
2976 */
2977 if (is_counting) {
2978 /*
2979 * full flag update each time a register is programmed
2980 */
2981 ctx->ctx_pmds[cnum].flags = flags;
2982
2983 ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds;
2984 ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds;
2985 ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid;
2986
2987 /*
2988 * Mark all PMDS to be accessed as used.
2989 *
2990 * We do not keep track of PMC because we have to
2991 * systematically restore ALL of them.
2992 *
2993 * We do not update the used_monitors mask, because
2994 * if we have not programmed them, then will be in
2995 * a quiescent state, therefore we will not need to
2996 * mask/restore then when context is MASKED.
2997 */
2998 CTX_USED_PMD(ctx, reset_pmds);
2999 CTX_USED_PMD(ctx, smpl_pmds);
3000 /*
3001 * make sure we do not try to reset on
3002 * restart because we have established new values
3003 */
3004 if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
3005 }
3006 /*
3007 * Needed in case the user does not initialize the equivalent
3008 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no
3009 * possible leak here.
3010 */
3011 CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]);
3012
3013 /*
3014 * keep track of the monitor PMC that we are using.
3015 * we save the value of the pmc in ctx_pmcs[] and if
3016 * the monitoring is not stopped for the context we also
3017 * place it in the saved state area so that it will be
3018 * picked up later by the context switch code.
3019 *
3020 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs().
3021 *
3022 * The value in thread->pmcs[] may be modified on overflow, i.e., when
3023 * monitoring needs to be stopped.
3024 */
3025 if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum);
3026
3027 /*
3028 * update context state
3029 */
3030 ctx->ctx_pmcs[cnum] = value;
3031
3032 if (is_loaded) {
3033 /*
3034 * write thread state
3035 */
3036 if (is_system == 0) thread->pmcs[cnum] = value;
3037
3038 /*
3039 * write hardware register if we can
3040 */
3041 if (can_access_pmu) {
3042 ia64_set_pmc(cnum, value);
3043 }
3044#ifdef CONFIG_SMP
3045 else {
3046 /*
3047 * per-task SMP only here
3048 *
3049 * we are guaranteed that the task is not running on the other CPU,
3050 * we indicate that this PMD will need to be reloaded if the task
3051 * is rescheduled on the CPU it ran last on.
3052 */
3053 ctx->ctx_reload_pmcs[0] |= 1UL << cnum;
3054 }
3055#endif
3056 }
3057
3058 DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n",
3059 cnum,
3060 value,
3061 is_loaded,
3062 can_access_pmu,
3063 flags,
3064 ctx->ctx_all_pmcs[0],
3065 ctx->ctx_used_pmds[0],
3066 ctx->ctx_pmds[cnum].eventid,
3067 smpl_pmds,
3068 reset_pmds,
3069 ctx->ctx_reload_pmcs[0],
3070 ctx->ctx_used_monitors[0],
3071 ctx->ctx_ovfl_regs[0]));
3072 }
3073
3074 /*
3075 * make sure the changes are visible
3076 */
3077 if (can_access_pmu) ia64_srlz_d();
3078
3079 return 0;
3080error:
3081 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
3082 return ret;
3083}
3084
3085static int
3086pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3087{
3088 struct thread_struct *thread = NULL;
3089 struct task_struct *task;
3090 pfarg_reg_t *req = (pfarg_reg_t *)arg;
3091 unsigned long value, hw_value, ovfl_mask;
3092 unsigned int cnum;
3093 int i, can_access_pmu = 0, state;
3094 int is_counting, is_loaded, is_system, expert_mode;
3095 int ret = -EINVAL;
3096 pfm_reg_check_t wr_func;
3097
3098
3099 state = ctx->ctx_state;
3100 is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
3101 is_system = ctx->ctx_fl_system;
3102 ovfl_mask = pmu_conf->ovfl_val;
3103 task = ctx->ctx_task;
3104
3105 if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL;
3106
3107 /*
3108 * on both UP and SMP, we can only write to the PMC when the task is
3109 * the owner of the local PMU.
3110 */
3111 if (likely(is_loaded)) {
3112 thread = &task->thread;
3113 /*
3114 * In system wide and when the context is loaded, access can only happen
3115 * when the caller is running on the CPU being monitored by the session.
3116 * It does not have to be the owner (ctx_task) of the context per se.
3117 */
3118 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
3119 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3120 return -EBUSY;
3121 }
3122 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
3123 }
3124 expert_mode = pfm_sysctl.expert_mode;
3125
3126 for (i = 0; i < count; i++, req++) {
3127
3128 cnum = req->reg_num;
3129 value = req->reg_value;
3130
3131 if (!PMD_IS_IMPL(cnum)) {
3132 DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum));
3133 goto abort_mission;
3134 }
3135 is_counting = PMD_IS_COUNTING(cnum);
3136 wr_func = pmu_conf->pmd_desc[cnum].write_check;
3137
3138 /*
3139 * execute write checker, if any
3140 */
3141 if (unlikely(expert_mode == 0 && wr_func)) {
3142 unsigned long v = value;
3143
3144 ret = (*wr_func)(task, ctx, cnum, &v, regs);
3145 if (ret) goto abort_mission;
3146
3147 value = v;
3148 ret = -EINVAL;
3149 }
3150
3151 /*
3152 * no error on this register
3153 */
3154 PFM_REG_RETFLAG_SET(req->reg_flags, 0);
3155
3156 /*
3157 * now commit changes to software state
3158 */
3159 hw_value = value;
3160
3161 /*
3162 * update virtualized (64bits) counter
3163 */
3164 if (is_counting) {
3165 /*
3166 * write context state
3167 */
3168 ctx->ctx_pmds[cnum].lval = value;
3169
3170 /*
3171 * when context is load we use the split value
3172 */
3173 if (is_loaded) {
3174 hw_value = value & ovfl_mask;
3175 value = value & ~ovfl_mask;
3176 }
3177 }
3178 /*
3179 * update reset values (not just for counters)
3180 */
3181 ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset;
3182 ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset;
3183
3184 /*
3185 * update randomization parameters (not just for counters)
3186 */
3187 ctx->ctx_pmds[cnum].seed = req->reg_random_seed;
3188 ctx->ctx_pmds[cnum].mask = req->reg_random_mask;
3189
3190 /*
3191 * update context value
3192 */
3193 ctx->ctx_pmds[cnum].val = value;
3194
3195 /*
3196 * Keep track of what we use
3197 *
3198 * We do not keep track of PMC because we have to
3199 * systematically restore ALL of them.
3200 */
3201 CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum));
3202
3203 /*
3204 * mark this PMD register used as well
3205 */
3206 CTX_USED_PMD(ctx, RDEP(cnum));
3207
3208 /*
3209 * make sure we do not try to reset on
3210 * restart because we have established new values
3211 */
3212 if (is_counting && state == PFM_CTX_MASKED) {
3213 ctx->ctx_ovfl_regs[0] &= ~1UL << cnum;
3214 }
3215
3216 if (is_loaded) {
3217 /*
3218 * write thread state
3219 */
3220 if (is_system == 0) thread->pmds[cnum] = hw_value;
3221
3222 /*
3223 * write hardware register if we can
3224 */
3225 if (can_access_pmu) {
3226 ia64_set_pmd(cnum, hw_value);
3227 } else {
3228#ifdef CONFIG_SMP
3229 /*
3230 * we are guaranteed that the task is not running on the other CPU,
3231 * we indicate that this PMD will need to be reloaded if the task
3232 * is rescheduled on the CPU it ran last on.
3233 */
3234 ctx->ctx_reload_pmds[0] |= 1UL << cnum;
3235#endif
3236 }
3237 }
3238
3239 DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx "
3240 "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n",
3241 cnum,
3242 value,
3243 is_loaded,
3244 can_access_pmu,
3245 hw_value,
3246 ctx->ctx_pmds[cnum].val,
3247 ctx->ctx_pmds[cnum].short_reset,
3248 ctx->ctx_pmds[cnum].long_reset,
3249 PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N',
3250 ctx->ctx_pmds[cnum].seed,
3251 ctx->ctx_pmds[cnum].mask,
3252 ctx->ctx_used_pmds[0],
3253 ctx->ctx_pmds[cnum].reset_pmds[0],
3254 ctx->ctx_reload_pmds[0],
3255 ctx->ctx_all_pmds[0],
3256 ctx->ctx_ovfl_regs[0]));
3257 }
3258
3259 /*
3260 * make changes visible
3261 */
3262 if (can_access_pmu) ia64_srlz_d();
3263
3264 return 0;
3265
3266abort_mission:
3267 /*
3268 * for now, we have only one possibility for error
3269 */
3270 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
3271 return ret;
3272}
3273
3274/*
3275 * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function.
3276 * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an
3277 * interrupt is delivered during the call, it will be kept pending until we leave, making
3278 * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are
3279 * guaranteed to return consistent data to the user, it may simply be old. It is not
3280 * trivial to treat the overflow while inside the call because you may end up in
3281 * some module sampling buffer code causing deadlocks.
3282 */
3283static int
3284pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3285{
3286 struct thread_struct *thread = NULL;
3287 struct task_struct *task;
3288 unsigned long val = 0UL, lval, ovfl_mask, sval;
3289 pfarg_reg_t *req = (pfarg_reg_t *)arg;
3290 unsigned int cnum, reg_flags = 0;
3291 int i, can_access_pmu = 0, state;
3292 int is_loaded, is_system, is_counting, expert_mode;
3293 int ret = -EINVAL;
3294 pfm_reg_check_t rd_func;
3295
3296 /*
3297 * access is possible when loaded only for
3298 * self-monitoring tasks or in UP mode
3299 */
3300
3301 state = ctx->ctx_state;
3302 is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
3303 is_system = ctx->ctx_fl_system;
3304 ovfl_mask = pmu_conf->ovfl_val;
3305 task = ctx->ctx_task;
3306
3307 if (state == PFM_CTX_ZOMBIE) return -EINVAL;
3308
3309 if (likely(is_loaded)) {
3310 thread = &task->thread;
3311 /*
3312 * In system wide and when the context is loaded, access can only happen
3313 * when the caller is running on the CPU being monitored by the session.
3314 * It does not have to be the owner (ctx_task) of the context per se.
3315 */
3316 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
3317 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3318 return -EBUSY;
3319 }
3320 /*
3321 * this can be true when not self-monitoring only in UP
3322 */
3323 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
3324
3325 if (can_access_pmu) ia64_srlz_d();
3326 }
3327 expert_mode = pfm_sysctl.expert_mode;
3328
3329 DPRINT(("ld=%d apmu=%d ctx_state=%d\n",
3330 is_loaded,
3331 can_access_pmu,
3332 state));
3333
3334 /*
3335 * on both UP and SMP, we can only read the PMD from the hardware register when
3336 * the task is the owner of the local PMU.
3337 */
3338
3339 for (i = 0; i < count; i++, req++) {
3340
3341 cnum = req->reg_num;
3342 reg_flags = req->reg_flags;
3343
3344 if (unlikely(!PMD_IS_IMPL(cnum))) goto error;
3345 /*
3346 * we can only read the register that we use. That includes
3347 * the one we explicitely initialize AND the one we want included
3348 * in the sampling buffer (smpl_regs).
3349 *
3350 * Having this restriction allows optimization in the ctxsw routine
3351 * without compromising security (leaks)
3352 */
3353 if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error;
3354
3355 sval = ctx->ctx_pmds[cnum].val;
3356 lval = ctx->ctx_pmds[cnum].lval;
3357 is_counting = PMD_IS_COUNTING(cnum);
3358
3359 /*
3360 * If the task is not the current one, then we check if the
3361 * PMU state is still in the local live register due to lazy ctxsw.
3362 * If true, then we read directly from the registers.
3363 */
3364 if (can_access_pmu){
3365 val = ia64_get_pmd(cnum);
3366 } else {
3367 /*
3368 * context has been saved
3369 * if context is zombie, then task does not exist anymore.
3370 * In this case, we use the full value saved in the context (pfm_flush_regs()).
3371 */
3372 val = is_loaded ? thread->pmds[cnum] : 0UL;
3373 }
3374 rd_func = pmu_conf->pmd_desc[cnum].read_check;
3375
3376 if (is_counting) {
3377 /*
3378 * XXX: need to check for overflow when loaded
3379 */
3380 val &= ovfl_mask;
3381 val += sval;
3382 }
3383
3384 /*
3385 * execute read checker, if any
3386 */
3387 if (unlikely(expert_mode == 0 && rd_func)) {
3388 unsigned long v = val;
3389 ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs);
3390 if (ret) goto error;
3391 val = v;
3392 ret = -EINVAL;
3393 }
3394
3395 PFM_REG_RETFLAG_SET(reg_flags, 0);
3396
3397 DPRINT(("pmd[%u]=0x%lx\n", cnum, val));
3398
3399 /*
3400 * update register return value, abort all if problem during copy.
3401 * we only modify the reg_flags field. no check mode is fine because
3402 * access has been verified upfront in sys_perfmonctl().
3403 */
3404 req->reg_value = val;
3405 req->reg_flags = reg_flags;
3406 req->reg_last_reset_val = lval;
3407 }
3408
3409 return 0;
3410
3411error:
3412 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
3413 return ret;
3414}
3415
3416int
3417pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
3418{
3419 pfm_context_t *ctx;
3420
3421 if (req == NULL) return -EINVAL;
3422
3423 ctx = GET_PMU_CTX();
3424
3425 if (ctx == NULL) return -EINVAL;
3426
3427 /*
3428 * for now limit to current task, which is enough when calling
3429 * from overflow handler
3430 */
3431 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
3432
3433 return pfm_write_pmcs(ctx, req, nreq, regs);
3434}
3435EXPORT_SYMBOL(pfm_mod_write_pmcs);
3436
3437int
3438pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
3439{
3440 pfm_context_t *ctx;
3441
3442 if (req == NULL) return -EINVAL;
3443
3444 ctx = GET_PMU_CTX();
3445
3446 if (ctx == NULL) return -EINVAL;
3447
3448 /*
3449 * for now limit to current task, which is enough when calling
3450 * from overflow handler
3451 */
3452 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
3453
3454 return pfm_read_pmds(ctx, req, nreq, regs);
3455}
3456EXPORT_SYMBOL(pfm_mod_read_pmds);
3457
3458/*
3459 * Only call this function when a process it trying to
3460 * write the debug registers (reading is always allowed)
3461 */
3462int
3463pfm_use_debug_registers(struct task_struct *task)
3464{
3465 pfm_context_t *ctx = task->thread.pfm_context;
3466 unsigned long flags;
3467 int ret = 0;
3468
3469 if (pmu_conf->use_rr_dbregs == 0) return 0;
3470
3471 DPRINT(("called for [%d]\n", task->pid));
3472
3473 /*
3474 * do it only once
3475 */
3476 if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0;
3477
3478 /*
3479 * Even on SMP, we do not need to use an atomic here because
3480 * the only way in is via ptrace() and this is possible only when the
3481 * process is stopped. Even in the case where the ctxsw out is not totally
3482 * completed by the time we come here, there is no way the 'stopped' process
3483 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine.
3484 * So this is always safe.
3485 */
3486 if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1;
3487
3488 LOCK_PFS(flags);
3489
3490 /*
3491 * We cannot allow setting breakpoints when system wide monitoring
3492 * sessions are using the debug registers.
3493 */
3494 if (pfm_sessions.pfs_sys_use_dbregs> 0)
3495 ret = -1;
3496 else
3497 pfm_sessions.pfs_ptrace_use_dbregs++;
3498
3499 DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n",
3500 pfm_sessions.pfs_ptrace_use_dbregs,
3501 pfm_sessions.pfs_sys_use_dbregs,
3502 task->pid, ret));
3503
3504 UNLOCK_PFS(flags);
3505
3506 return ret;
3507}
3508
3509/*
3510 * This function is called for every task that exits with the
3511 * IA64_THREAD_DBG_VALID set. This indicates a task which was
3512 * able to use the debug registers for debugging purposes via
3513 * ptrace(). Therefore we know it was not using them for
3514 * perfmormance monitoring, so we only decrement the number
3515 * of "ptraced" debug register users to keep the count up to date
3516 */
3517int
3518pfm_release_debug_registers(struct task_struct *task)
3519{
3520 unsigned long flags;
3521 int ret;
3522
3523 if (pmu_conf->use_rr_dbregs == 0) return 0;
3524
3525 LOCK_PFS(flags);
3526 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) {
3527 printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid);
3528 ret = -1;
3529 } else {
3530 pfm_sessions.pfs_ptrace_use_dbregs--;
3531 ret = 0;
3532 }
3533 UNLOCK_PFS(flags);
3534
3535 return ret;
3536}
3537
3538static int
3539pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3540{
3541 struct task_struct *task;
3542 pfm_buffer_fmt_t *fmt;
3543 pfm_ovfl_ctrl_t rst_ctrl;
3544 int state, is_system;
3545 int ret = 0;
3546
3547 state = ctx->ctx_state;
3548 fmt = ctx->ctx_buf_fmt;
3549 is_system = ctx->ctx_fl_system;
3550 task = PFM_CTX_TASK(ctx);
3551
3552 switch(state) {
3553 case PFM_CTX_MASKED:
3554 break;
3555 case PFM_CTX_LOADED:
3556 if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break;
3557 /* fall through */
3558 case PFM_CTX_UNLOADED:
3559 case PFM_CTX_ZOMBIE:
3560 DPRINT(("invalid state=%d\n", state));
3561 return -EBUSY;
3562 default:
3563 DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state));
3564 return -EINVAL;
3565 }
3566
3567 /*
3568 * In system wide and when the context is loaded, access can only happen
3569 * when the caller is running on the CPU being monitored by the session.
3570 * It does not have to be the owner (ctx_task) of the context per se.
3571 */
3572 if (is_system && ctx->ctx_cpu != smp_processor_id()) {
3573 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3574 return -EBUSY;
3575 }
3576
3577 /* sanity check */
3578 if (unlikely(task == NULL)) {
3579 printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid);
3580 return -EINVAL;
3581 }
3582
3583 if (task == current || is_system) {
3584
3585 fmt = ctx->ctx_buf_fmt;
3586
3587 DPRINT(("restarting self %d ovfl=0x%lx\n",
3588 task->pid,
3589 ctx->ctx_ovfl_regs[0]));
3590
3591 if (CTX_HAS_SMPL(ctx)) {
3592
3593 prefetch(ctx->ctx_smpl_hdr);
3594
3595 rst_ctrl.bits.mask_monitoring = 0;
3596 rst_ctrl.bits.reset_ovfl_pmds = 0;
3597
3598 if (state == PFM_CTX_LOADED)
3599 ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
3600 else
3601 ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
3602 } else {
3603 rst_ctrl.bits.mask_monitoring = 0;
3604 rst_ctrl.bits.reset_ovfl_pmds = 1;
3605 }
3606
3607 if (ret == 0) {
3608 if (rst_ctrl.bits.reset_ovfl_pmds)
3609 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET);
3610
3611 if (rst_ctrl.bits.mask_monitoring == 0) {
3612 DPRINT(("resuming monitoring for [%d]\n", task->pid));
3613
3614 if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task);
3615 } else {
3616 DPRINT(("keeping monitoring stopped for [%d]\n", task->pid));
3617
3618 // cannot use pfm_stop_monitoring(task, regs);
3619 }
3620 }
3621 /*
3622 * clear overflowed PMD mask to remove any stale information
3623 */
3624 ctx->ctx_ovfl_regs[0] = 0UL;
3625
3626 /*
3627 * back to LOADED state
3628 */
3629 ctx->ctx_state = PFM_CTX_LOADED;
3630
3631 /*
3632 * XXX: not really useful for self monitoring
3633 */
3634 ctx->ctx_fl_can_restart = 0;
3635
3636 return 0;
3637 }
3638
3639 /*
3640 * restart another task
3641 */
3642
3643 /*
3644 * When PFM_CTX_MASKED, we cannot issue a restart before the previous
3645 * one is seen by the task.
3646 */
3647 if (state == PFM_CTX_MASKED) {
3648 if (ctx->ctx_fl_can_restart == 0) return -EINVAL;
3649 /*
3650 * will prevent subsequent restart before this one is
3651 * seen by other task
3652 */
3653 ctx->ctx_fl_can_restart = 0;
3654 }
3655
3656 /*
3657 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e.
3658 * the task is blocked or on its way to block. That's the normal
3659 * restart path. If the monitoring is not masked, then the task
3660 * can be actively monitoring and we cannot directly intervene.
3661 * Therefore we use the trap mechanism to catch the task and
3662 * force it to reset the buffer/reset PMDs.
3663 *
3664 * if non-blocking, then we ensure that the task will go into
3665 * pfm_handle_work() before returning to user mode.
3666 *
3667 * We cannot explicitely reset another task, it MUST always
3668 * be done by the task itself. This works for system wide because
3669 * the tool that is controlling the session is logically doing
3670 * "self-monitoring".
3671 */
3672 if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) {
3673 DPRINT(("unblocking [%d] \n", task->pid));
3674 up(&ctx->ctx_restart_sem);
3675 } else {
3676 DPRINT(("[%d] armed exit trap\n", task->pid));
3677
3678 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET;
3679
3680 PFM_SET_WORK_PENDING(task, 1);
3681
3682 pfm_set_task_notify(task);
3683
3684 /*
3685 * XXX: send reschedule if task runs on another CPU
3686 */
3687 }
3688 return 0;
3689}
3690
3691static int
3692pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3693{
3694 unsigned int m = *(unsigned int *)arg;
3695
3696 pfm_sysctl.debug = m == 0 ? 0 : 1;
3697
3698 pfm_debug_var = pfm_sysctl.debug;
3699
3700 printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off");
3701
3702 if (m == 0) {
3703 memset(pfm_stats, 0, sizeof(pfm_stats));
3704 for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL;
3705 }
3706 return 0;
3707}
3708
3709/*
3710 * arg can be NULL and count can be zero for this function
3711 */
3712static int
3713pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3714{
3715 struct thread_struct *thread = NULL;
3716 struct task_struct *task;
3717 pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg;
3718 unsigned long flags;
3719 dbreg_t dbreg;
3720 unsigned int rnum;
3721 int first_time;
3722 int ret = 0, state;
3723 int i, can_access_pmu = 0;
3724 int is_system, is_loaded;
3725
3726 if (pmu_conf->use_rr_dbregs == 0) return -EINVAL;
3727
3728 state = ctx->ctx_state;
3729 is_loaded = state == PFM_CTX_LOADED ? 1 : 0;
3730 is_system = ctx->ctx_fl_system;
3731 task = ctx->ctx_task;
3732
3733 if (state == PFM_CTX_ZOMBIE) return -EINVAL;
3734
3735 /*
3736 * on both UP and SMP, we can only write to the PMC when the task is
3737 * the owner of the local PMU.
3738 */
3739 if (is_loaded) {
3740 thread = &task->thread;
3741 /*
3742 * In system wide and when the context is loaded, access can only happen
3743 * when the caller is running on the CPU being monitored by the session.
3744 * It does not have to be the owner (ctx_task) of the context per se.
3745 */
3746 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) {
3747 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3748 return -EBUSY;
3749 }
3750 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0;
3751 }
3752
3753 /*
3754 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w
3755 * ensuring that no real breakpoint can be installed via this call.
3756 *
3757 * IMPORTANT: regs can be NULL in this function
3758 */
3759
3760 first_time = ctx->ctx_fl_using_dbreg == 0;
3761
3762 /*
3763 * don't bother if we are loaded and task is being debugged
3764 */
3765 if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) {
3766 DPRINT(("debug registers already in use for [%d]\n", task->pid));
3767 return -EBUSY;
3768 }
3769
3770 /*
3771 * check for debug registers in system wide mode
3772 *
3773 * If though a check is done in pfm_context_load(),
3774 * we must repeat it here, in case the registers are
3775 * written after the context is loaded
3776 */
3777 if (is_loaded) {
3778 LOCK_PFS(flags);
3779
3780 if (first_time && is_system) {
3781 if (pfm_sessions.pfs_ptrace_use_dbregs)
3782 ret = -EBUSY;
3783 else
3784 pfm_sessions.pfs_sys_use_dbregs++;
3785 }
3786 UNLOCK_PFS(flags);
3787 }
3788
3789 if (ret != 0) return ret;
3790
3791 /*
3792 * mark ourself as user of the debug registers for
3793 * perfmon purposes.
3794 */
3795 ctx->ctx_fl_using_dbreg = 1;
3796
3797 /*
3798 * clear hardware registers to make sure we don't
3799 * pick up stale state.
3800 *
3801 * for a system wide session, we do not use
3802 * thread.dbr, thread.ibr because this process
3803 * never leaves the current CPU and the state
3804 * is shared by all processes running on it
3805 */
3806 if (first_time && can_access_pmu) {
3807 DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid));
3808 for (i=0; i < pmu_conf->num_ibrs; i++) {
3809 ia64_set_ibr(i, 0UL);
3810 ia64_dv_serialize_instruction();
3811 }
3812 ia64_srlz_i();
3813 for (i=0; i < pmu_conf->num_dbrs; i++) {
3814 ia64_set_dbr(i, 0UL);
3815 ia64_dv_serialize_data();
3816 }
3817 ia64_srlz_d();
3818 }
3819
3820 /*
3821 * Now install the values into the registers
3822 */
3823 for (i = 0; i < count; i++, req++) {
3824
3825 rnum = req->dbreg_num;
3826 dbreg.val = req->dbreg_value;
3827
3828 ret = -EINVAL;
3829
3830 if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) {
3831 DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n",
3832 rnum, dbreg.val, mode, i, count));
3833
3834 goto abort_mission;
3835 }
3836
3837 /*
3838 * make sure we do not install enabled breakpoint
3839 */
3840 if (rnum & 0x1) {
3841 if (mode == PFM_CODE_RR)
3842 dbreg.ibr.ibr_x = 0;
3843 else
3844 dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0;
3845 }
3846
3847 PFM_REG_RETFLAG_SET(req->dbreg_flags, 0);
3848
3849 /*
3850 * Debug registers, just like PMC, can only be modified
3851 * by a kernel call. Moreover, perfmon() access to those
3852 * registers are centralized in this routine. The hardware
3853 * does not modify the value of these registers, therefore,
3854 * if we save them as they are written, we can avoid having
3855 * to save them on context switch out. This is made possible
3856 * by the fact that when perfmon uses debug registers, ptrace()
3857 * won't be able to modify them concurrently.
3858 */
3859 if (mode == PFM_CODE_RR) {
3860 CTX_USED_IBR(ctx, rnum);
3861
3862 if (can_access_pmu) {
3863 ia64_set_ibr(rnum, dbreg.val);
3864 ia64_dv_serialize_instruction();
3865 }
3866
3867 ctx->ctx_ibrs[rnum] = dbreg.val;
3868
3869 DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n",
3870 rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu));
3871 } else {
3872 CTX_USED_DBR(ctx, rnum);
3873
3874 if (can_access_pmu) {
3875 ia64_set_dbr(rnum, dbreg.val);
3876 ia64_dv_serialize_data();
3877 }
3878 ctx->ctx_dbrs[rnum] = dbreg.val;
3879
3880 DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n",
3881 rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu));
3882 }
3883 }
3884
3885 return 0;
3886
3887abort_mission:
3888 /*
3889 * in case it was our first attempt, we undo the global modifications
3890 */
3891 if (first_time) {
3892 LOCK_PFS(flags);
3893 if (ctx->ctx_fl_system) {
3894 pfm_sessions.pfs_sys_use_dbregs--;
3895 }
3896 UNLOCK_PFS(flags);
3897 ctx->ctx_fl_using_dbreg = 0;
3898 }
3899 /*
3900 * install error return flag
3901 */
3902 PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL);
3903
3904 return ret;
3905}
3906
3907static int
3908pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3909{
3910 return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs);
3911}
3912
3913static int
3914pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3915{
3916 return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs);
3917}
3918
3919int
3920pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
3921{
3922 pfm_context_t *ctx;
3923
3924 if (req == NULL) return -EINVAL;
3925
3926 ctx = GET_PMU_CTX();
3927
3928 if (ctx == NULL) return -EINVAL;
3929
3930 /*
3931 * for now limit to current task, which is enough when calling
3932 * from overflow handler
3933 */
3934 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
3935
3936 return pfm_write_ibrs(ctx, req, nreq, regs);
3937}
3938EXPORT_SYMBOL(pfm_mod_write_ibrs);
3939
3940int
3941pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs)
3942{
3943 pfm_context_t *ctx;
3944
3945 if (req == NULL) return -EINVAL;
3946
3947 ctx = GET_PMU_CTX();
3948
3949 if (ctx == NULL) return -EINVAL;
3950
3951 /*
3952 * for now limit to current task, which is enough when calling
3953 * from overflow handler
3954 */
3955 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY;
3956
3957 return pfm_write_dbrs(ctx, req, nreq, regs);
3958}
3959EXPORT_SYMBOL(pfm_mod_write_dbrs);
3960
3961
3962static int
3963pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3964{
3965 pfarg_features_t *req = (pfarg_features_t *)arg;
3966
3967 req->ft_version = PFM_VERSION;
3968 return 0;
3969}
3970
3971static int
3972pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
3973{
3974 struct pt_regs *tregs;
3975 struct task_struct *task = PFM_CTX_TASK(ctx);
3976 int state, is_system;
3977
3978 state = ctx->ctx_state;
3979 is_system = ctx->ctx_fl_system;
3980
3981 /*
3982 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE)
3983 */
3984 if (state == PFM_CTX_UNLOADED) return -EINVAL;
3985
3986 /*
3987 * In system wide and when the context is loaded, access can only happen
3988 * when the caller is running on the CPU being monitored by the session.
3989 * It does not have to be the owner (ctx_task) of the context per se.
3990 */
3991 if (is_system && ctx->ctx_cpu != smp_processor_id()) {
3992 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
3993 return -EBUSY;
3994 }
3995 DPRINT(("task [%d] ctx_state=%d is_system=%d\n",
3996 PFM_CTX_TASK(ctx)->pid,
3997 state,
3998 is_system));
3999 /*
4000 * in system mode, we need to update the PMU directly
4001 * and the user level state of the caller, which may not
4002 * necessarily be the creator of the context.
4003 */
4004 if (is_system) {
4005 /*
4006 * Update local PMU first
4007 *
4008 * disable dcr pp
4009 */
4010 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP);
4011 ia64_srlz_i();
4012
4013 /*
4014 * update local cpuinfo
4015 */
4016 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
4017
4018 /*
4019 * stop monitoring, does srlz.i
4020 */
4021 pfm_clear_psr_pp();
4022
4023 /*
4024 * stop monitoring in the caller
4025 */
4026 ia64_psr(regs)->pp = 0;
4027
4028 return 0;
4029 }
4030 /*
4031 * per-task mode
4032 */
4033
4034 if (task == current) {
4035 /* stop monitoring at kernel level */
4036 pfm_clear_psr_up();
4037
4038 /*
4039 * stop monitoring at the user level
4040 */
4041 ia64_psr(regs)->up = 0;
4042 } else {
4043 tregs = ia64_task_regs(task);
4044
4045 /*
4046 * stop monitoring at the user level
4047 */
4048 ia64_psr(tregs)->up = 0;
4049
4050 /*
4051 * monitoring disabled in kernel at next reschedule
4052 */
4053 ctx->ctx_saved_psr_up = 0;
4054 DPRINT(("task=[%d]\n", task->pid));
4055 }
4056 return 0;
4057}
4058
4059
4060static int
4061pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4062{
4063 struct pt_regs *tregs;
4064 int state, is_system;
4065
4066 state = ctx->ctx_state;
4067 is_system = ctx->ctx_fl_system;
4068
4069 if (state != PFM_CTX_LOADED) return -EINVAL;
4070
4071 /*
4072 * In system wide and when the context is loaded, access can only happen
4073 * when the caller is running on the CPU being monitored by the session.
4074 * It does not have to be the owner (ctx_task) of the context per se.
4075 */
4076 if (is_system && ctx->ctx_cpu != smp_processor_id()) {
4077 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu));
4078 return -EBUSY;
4079 }
4080
4081 /*
4082 * in system mode, we need to update the PMU directly
4083 * and the user level state of the caller, which may not
4084 * necessarily be the creator of the context.
4085 */
4086 if (is_system) {
4087
4088 /*
4089 * set user level psr.pp for the caller
4090 */
4091 ia64_psr(regs)->pp = 1;
4092
4093 /*
4094 * now update the local PMU and cpuinfo
4095 */
4096 PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP);
4097
4098 /*
4099 * start monitoring at kernel level
4100 */
4101 pfm_set_psr_pp();
4102
4103 /* enable dcr pp */
4104 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP);
4105 ia64_srlz_i();
4106
4107 return 0;
4108 }
4109
4110 /*
4111 * per-process mode
4112 */
4113
4114 if (ctx->ctx_task == current) {
4115
4116 /* start monitoring at kernel level */
4117 pfm_set_psr_up();
4118
4119 /*
4120 * activate monitoring at user level
4121 */
4122 ia64_psr(regs)->up = 1;
4123
4124 } else {
4125 tregs = ia64_task_regs(ctx->ctx_task);
4126
4127 /*
4128 * start monitoring at the kernel level the next
4129 * time the task is scheduled
4130 */
4131 ctx->ctx_saved_psr_up = IA64_PSR_UP;
4132
4133 /*
4134 * activate monitoring at user level
4135 */
4136 ia64_psr(tregs)->up = 1;
4137 }
4138 return 0;
4139}
4140
4141static int
4142pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4143{
4144 pfarg_reg_t *req = (pfarg_reg_t *)arg;
4145 unsigned int cnum;
4146 int i;
4147 int ret = -EINVAL;
4148
4149 for (i = 0; i < count; i++, req++) {
4150
4151 cnum = req->reg_num;
4152
4153 if (!PMC_IS_IMPL(cnum)) goto abort_mission;
4154
4155 req->reg_value = PMC_DFL_VAL(cnum);
4156
4157 PFM_REG_RETFLAG_SET(req->reg_flags, 0);
4158
4159 DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value));
4160 }
4161 return 0;
4162
4163abort_mission:
4164 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL);
4165 return ret;
4166}
4167
4168static int
4169pfm_check_task_exist(pfm_context_t *ctx)
4170{
4171 struct task_struct *g, *t;
4172 int ret = -ESRCH;
4173
4174 read_lock(&tasklist_lock);
4175
4176 do_each_thread (g, t) {
4177 if (t->thread.pfm_context == ctx) {
4178 ret = 0;
4179 break;
4180 }
4181 } while_each_thread (g, t);
4182
4183 read_unlock(&tasklist_lock);
4184
4185 DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx));
4186
4187 return ret;
4188}
4189
4190static int
4191pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4192{
4193 struct task_struct *task;
4194 struct thread_struct *thread;
4195 struct pfm_context_t *old;
4196 unsigned long flags;
4197#ifndef CONFIG_SMP
4198 struct task_struct *owner_task = NULL;
4199#endif
4200 pfarg_load_t *req = (pfarg_load_t *)arg;
4201 unsigned long *pmcs_source, *pmds_source;
4202 int the_cpu;
4203 int ret = 0;
4204 int state, is_system, set_dbregs = 0;
4205
4206 state = ctx->ctx_state;
4207 is_system = ctx->ctx_fl_system;
4208 /*
4209 * can only load from unloaded or terminated state
4210 */
4211 if (state != PFM_CTX_UNLOADED) {
4212 DPRINT(("cannot load to [%d], invalid ctx_state=%d\n",
4213 req->load_pid,
4214 ctx->ctx_state));
4215 return -EINVAL;
4216 }
4217
4218 DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg));
4219
4220 if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) {
4221 DPRINT(("cannot use blocking mode on self\n"));
4222 return -EINVAL;
4223 }
4224
4225 ret = pfm_get_task(ctx, req->load_pid, &task);
4226 if (ret) {
4227 DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret));
4228 return ret;
4229 }
4230
4231 ret = -EINVAL;
4232
4233 /*
4234 * system wide is self monitoring only
4235 */
4236 if (is_system && task != current) {
4237 DPRINT(("system wide is self monitoring only load_pid=%d\n",
4238 req->load_pid));
4239 goto error;
4240 }
4241
4242 thread = &task->thread;
4243
4244 ret = 0;
4245 /*
4246 * cannot load a context which is using range restrictions,
4247 * into a task that is being debugged.
4248 */
4249 if (ctx->ctx_fl_using_dbreg) {
4250 if (thread->flags & IA64_THREAD_DBG_VALID) {
4251 ret = -EBUSY;
4252 DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid));
4253 goto error;
4254 }
4255 LOCK_PFS(flags);
4256
4257 if (is_system) {
4258 if (pfm_sessions.pfs_ptrace_use_dbregs) {
4259 DPRINT(("cannot load [%d] dbregs in use\n", task->pid));
4260 ret = -EBUSY;
4261 } else {
4262 pfm_sessions.pfs_sys_use_dbregs++;
4263 DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs));
4264 set_dbregs = 1;
4265 }
4266 }
4267
4268 UNLOCK_PFS(flags);
4269
4270 if (ret) goto error;
4271 }
4272
4273 /*
4274 * SMP system-wide monitoring implies self-monitoring.
4275 *
4276 * The programming model expects the task to
4277 * be pinned on a CPU throughout the session.
4278 * Here we take note of the current CPU at the
4279 * time the context is loaded. No call from
4280 * another CPU will be allowed.
4281 *
4282 * The pinning via shed_setaffinity()
4283 * must be done by the calling task prior
4284 * to this call.
4285 *
4286 * systemwide: keep track of CPU this session is supposed to run on
4287 */
4288 the_cpu = ctx->ctx_cpu = smp_processor_id();
4289
4290 ret = -EBUSY;
4291 /*
4292 * now reserve the session
4293 */
4294 ret = pfm_reserve_session(current, is_system, the_cpu);
4295 if (ret) goto error;
4296
4297 /*
4298 * task is necessarily stopped at this point.
4299 *
4300 * If the previous context was zombie, then it got removed in
4301 * pfm_save_regs(). Therefore we should not see it here.
4302 * If we see a context, then this is an active context
4303 *
4304 * XXX: needs to be atomic
4305 */
4306 DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n",
4307 thread->pfm_context, ctx));
4308
4309 old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *));
4310 if (old != NULL) {
4311 DPRINT(("load_pid [%d] already has a context\n", req->load_pid));
4312 goto error_unres;
4313 }
4314
4315 pfm_reset_msgq(ctx);
4316
4317 ctx->ctx_state = PFM_CTX_LOADED;
4318
4319 /*
4320 * link context to task
4321 */
4322 ctx->ctx_task = task;
4323
4324 if (is_system) {
4325 /*
4326 * we load as stopped
4327 */
4328 PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE);
4329 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP);
4330
4331 if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE);
4332 } else {
4333 thread->flags |= IA64_THREAD_PM_VALID;
4334 }
4335
4336 /*
4337 * propagate into thread-state
4338 */
4339 pfm_copy_pmds(task, ctx);
4340 pfm_copy_pmcs(task, ctx);
4341
4342 pmcs_source = thread->pmcs;
4343 pmds_source = thread->pmds;
4344
4345 /*
4346 * always the case for system-wide
4347 */
4348 if (task == current) {
4349
4350 if (is_system == 0) {
4351
4352 /* allow user level control */
4353 ia64_psr(regs)->sp = 0;
4354 DPRINT(("clearing psr.sp for [%d]\n", task->pid));
4355
4356 SET_LAST_CPU(ctx, smp_processor_id());
4357 INC_ACTIVATION();
4358 SET_ACTIVATION(ctx);
4359#ifndef CONFIG_SMP
4360 /*
4361 * push the other task out, if any
4362 */
4363 owner_task = GET_PMU_OWNER();
4364 if (owner_task) pfm_lazy_save_regs(owner_task);
4365#endif
4366 }
4367 /*
4368 * load all PMD from ctx to PMU (as opposed to thread state)
4369 * restore all PMC from ctx to PMU
4370 */
4371 pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]);
4372 pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]);
4373
4374 ctx->ctx_reload_pmcs[0] = 0UL;
4375 ctx->ctx_reload_pmds[0] = 0UL;
4376
4377 /*
4378 * guaranteed safe by earlier check against DBG_VALID
4379 */
4380 if (ctx->ctx_fl_using_dbreg) {
4381 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
4382 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
4383 }
4384 /*
4385 * set new ownership
4386 */
4387 SET_PMU_OWNER(task, ctx);
4388
4389 DPRINT(("context loaded on PMU for [%d]\n", task->pid));
4390 } else {
4391 /*
4392 * when not current, task MUST be stopped, so this is safe
4393 */
4394 regs = ia64_task_regs(task);
4395
4396 /* force a full reload */
4397 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
4398 SET_LAST_CPU(ctx, -1);
4399
4400 /* initial saved psr (stopped) */
4401 ctx->ctx_saved_psr_up = 0UL;
4402 ia64_psr(regs)->up = ia64_psr(regs)->pp = 0;
4403 }
4404
4405 ret = 0;
4406
4407error_unres:
4408 if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu);
4409error:
4410 /*
4411 * we must undo the dbregs setting (for system-wide)
4412 */
4413 if (ret && set_dbregs) {
4414 LOCK_PFS(flags);
4415 pfm_sessions.pfs_sys_use_dbregs--;
4416 UNLOCK_PFS(flags);
4417 }
4418 /*
4419 * release task, there is now a link with the context
4420 */
4421 if (is_system == 0 && task != current) {
4422 pfm_put_task(task);
4423
4424 if (ret == 0) {
4425 ret = pfm_check_task_exist(ctx);
4426 if (ret) {
4427 ctx->ctx_state = PFM_CTX_UNLOADED;
4428 ctx->ctx_task = NULL;
4429 }
4430 }
4431 }
4432 return ret;
4433}
4434
4435/*
4436 * in this function, we do not need to increase the use count
4437 * for the task via get_task_struct(), because we hold the
4438 * context lock. If the task were to disappear while having
4439 * a context attached, it would go through pfm_exit_thread()
4440 * which also grabs the context lock and would therefore be blocked
4441 * until we are here.
4442 */
4443static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx);
4444
4445static int
4446pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs)
4447{
4448 struct task_struct *task = PFM_CTX_TASK(ctx);
4449 struct pt_regs *tregs;
4450 int prev_state, is_system;
4451 int ret;
4452
4453 DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1));
4454
4455 prev_state = ctx->ctx_state;
4456 is_system = ctx->ctx_fl_system;
4457
4458 /*
4459 * unload only when necessary
4460 */
4461 if (prev_state == PFM_CTX_UNLOADED) {
4462 DPRINT(("ctx_state=%d, nothing to do\n", prev_state));
4463 return 0;
4464 }
4465
4466 /*
4467 * clear psr and dcr bits
4468 */
4469 ret = pfm_stop(ctx, NULL, 0, regs);
4470 if (ret) return ret;
4471
4472 ctx->ctx_state = PFM_CTX_UNLOADED;
4473
4474 /*
4475 * in system mode, we need to update the PMU directly
4476 * and the user level state of the caller, which may not
4477 * necessarily be the creator of the context.
4478 */
4479 if (is_system) {
4480
4481 /*
4482 * Update cpuinfo
4483 *
4484 * local PMU is taken care of in pfm_stop()
4485 */
4486 PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE);
4487 PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE);
4488
4489 /*
4490 * save PMDs in context
4491 * release ownership
4492 */
4493 pfm_flush_pmds(current, ctx);
4494
4495 /*
4496 * at this point we are done with the PMU
4497 * so we can unreserve the resource.
4498 */
4499 if (prev_state != PFM_CTX_ZOMBIE)
4500 pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu);
4501
4502 /*
4503 * disconnect context from task
4504 */
4505 task->thread.pfm_context = NULL;
4506 /*
4507 * disconnect task from context
4508 */
4509 ctx->ctx_task = NULL;
4510
4511 /*
4512 * There is nothing more to cleanup here.
4513 */
4514 return 0;
4515 }
4516
4517 /*
4518 * per-task mode
4519 */
4520 tregs = task == current ? regs : ia64_task_regs(task);
4521
4522 if (task == current) {
4523 /*
4524 * cancel user level control
4525 */
4526 ia64_psr(regs)->sp = 1;
4527
4528 DPRINT(("setting psr.sp for [%d]\n", task->pid));
4529 }
4530 /*
4531 * save PMDs to context
4532 * release ownership
4533 */
4534 pfm_flush_pmds(task, ctx);
4535
4536 /*
4537 * at this point we are done with the PMU
4538 * so we can unreserve the resource.
4539 *
4540 * when state was ZOMBIE, we have already unreserved.
4541 */
4542 if (prev_state != PFM_CTX_ZOMBIE)
4543 pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu);
4544
4545 /*
4546 * reset activation counter and psr
4547 */
4548 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION;
4549 SET_LAST_CPU(ctx, -1);
4550
4551 /*
4552 * PMU state will not be restored
4553 */
4554 task->thread.flags &= ~IA64_THREAD_PM_VALID;
4555
4556 /*
4557 * break links between context and task
4558 */
4559 task->thread.pfm_context = NULL;
4560 ctx->ctx_task = NULL;
4561
4562 PFM_SET_WORK_PENDING(task, 0);
4563
4564 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
4565 ctx->ctx_fl_can_restart = 0;
4566 ctx->ctx_fl_going_zombie = 0;
4567
4568 DPRINT(("disconnected [%d] from context\n", task->pid));
4569
4570 return 0;
4571}
4572
4573
4574/*
4575 * called only from exit_thread(): task == current
4576 * we come here only if current has a context attached (loaded or masked)
4577 */
4578void
4579pfm_exit_thread(struct task_struct *task)
4580{
4581 pfm_context_t *ctx;
4582 unsigned long flags;
4583 struct pt_regs *regs = ia64_task_regs(task);
4584 int ret, state;
4585 int free_ok = 0;
4586
4587 ctx = PFM_GET_CTX(task);
4588
4589 PROTECT_CTX(ctx, flags);
4590
4591 DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid));
4592
4593 state = ctx->ctx_state;
4594 switch(state) {
4595 case PFM_CTX_UNLOADED:
4596 /*
4597 * only comes to thios function if pfm_context is not NULL, i.e., cannot
4598 * be in unloaded state
4599 */
4600 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid);
4601 break;
4602 case PFM_CTX_LOADED:
4603 case PFM_CTX_MASKED:
4604 ret = pfm_context_unload(ctx, NULL, 0, regs);
4605 if (ret) {
4606 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
4607 }
4608 DPRINT(("ctx unloaded for current state was %d\n", state));
4609
4610 pfm_end_notify_user(ctx);
4611 break;
4612 case PFM_CTX_ZOMBIE:
4613 ret = pfm_context_unload(ctx, NULL, 0, regs);
4614 if (ret) {
4615 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret);
4616 }
4617 free_ok = 1;
4618 break;
4619 default:
4620 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state);
4621 break;
4622 }
4623 UNPROTECT_CTX(ctx, flags);
4624
4625 { u64 psr = pfm_get_psr();
4626 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
4627 BUG_ON(GET_PMU_OWNER());
4628 BUG_ON(ia64_psr(regs)->up);
4629 BUG_ON(ia64_psr(regs)->pp);
4630 }
4631
4632 /*
4633 * All memory free operations (especially for vmalloc'ed memory)
4634 * MUST be done with interrupts ENABLED.
4635 */
4636 if (free_ok) pfm_context_free(ctx);
4637}
4638
4639/*
4640 * functions MUST be listed in the increasing order of their index (see permfon.h)
4641 */
4642#define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz }
4643#define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL }
4644#define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP)
4645#define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW)
4646#define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL}
4647
4648static pfm_cmd_desc_t pfm_cmd_tab[]={
4649/* 0 */PFM_CMD_NONE,
4650/* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
4651/* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
4652/* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
4653/* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS),
4654/* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS),
4655/* 6 */PFM_CMD_NONE,
4656/* 7 */PFM_CMD_NONE,
4657/* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize),
4658/* 9 */PFM_CMD_NONE,
4659/* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW),
4660/* 11 */PFM_CMD_NONE,
4661/* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL),
4662/* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL),
4663/* 14 */PFM_CMD_NONE,
4664/* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL),
4665/* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL),
4666/* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS),
4667/* 18 */PFM_CMD_NONE,
4668/* 19 */PFM_CMD_NONE,
4669/* 20 */PFM_CMD_NONE,
4670/* 21 */PFM_CMD_NONE,
4671/* 22 */PFM_CMD_NONE,
4672/* 23 */PFM_CMD_NONE,
4673/* 24 */PFM_CMD_NONE,
4674/* 25 */PFM_CMD_NONE,
4675/* 26 */PFM_CMD_NONE,
4676/* 27 */PFM_CMD_NONE,
4677/* 28 */PFM_CMD_NONE,
4678/* 29 */PFM_CMD_NONE,
4679/* 30 */PFM_CMD_NONE,
4680/* 31 */PFM_CMD_NONE,
4681/* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL),
4682/* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL)
4683};
4684#define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t))
4685
4686static int
4687pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags)
4688{
4689 struct task_struct *task;
4690 int state, old_state;
4691
4692recheck:
4693 state = ctx->ctx_state;
4694 task = ctx->ctx_task;
4695
4696 if (task == NULL) {
4697 DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state));
4698 return 0;
4699 }
4700
4701 DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n",
4702 ctx->ctx_fd,
4703 state,
4704 task->pid,
4705 task->state, PFM_CMD_STOPPED(cmd)));
4706
4707 /*
4708 * self-monitoring always ok.
4709 *
4710 * for system-wide the caller can either be the creator of the
4711 * context (to one to which the context is attached to) OR
4712 * a task running on the same CPU as the session.
4713 */
4714 if (task == current || ctx->ctx_fl_system) return 0;
4715
4716 /*
4717 * if context is UNLOADED we are safe to go
4718 */
4719 if (state == PFM_CTX_UNLOADED) return 0;
4720
4721 /*
4722 * no command can operate on a zombie context
4723 */
4724 if (state == PFM_CTX_ZOMBIE) {
4725 DPRINT(("cmd %d state zombie cannot operate on context\n", cmd));
4726 return -EINVAL;
4727 }
4728
4729 /*
4730 * context is LOADED or MASKED. Some commands may need to have
4731 * the task stopped.
4732 *
4733 * We could lift this restriction for UP but it would mean that
4734 * the user has no guarantee the task would not run between
4735 * two successive calls to perfmonctl(). That's probably OK.
4736 * If this user wants to ensure the task does not run, then
4737 * the task must be stopped.
4738 */
4739 if (PFM_CMD_STOPPED(cmd)) {
4740 if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) {
4741 DPRINT(("[%d] task not in stopped state\n", task->pid));
4742 return -EBUSY;
4743 }
4744 /*
4745 * task is now stopped, wait for ctxsw out
4746 *
4747 * This is an interesting point in the code.
4748 * We need to unprotect the context because
4749 * the pfm_save_regs() routines needs to grab
4750 * the same lock. There are danger in doing
4751 * this because it leaves a window open for
4752 * another task to get access to the context
4753 * and possibly change its state. The one thing
4754 * that is not possible is for the context to disappear
4755 * because we are protected by the VFS layer, i.e.,
4756 * get_fd()/put_fd().
4757 */
4758 old_state = state;
4759
4760 UNPROTECT_CTX(ctx, flags);
4761
4762 wait_task_inactive(task);
4763
4764 PROTECT_CTX(ctx, flags);
4765
4766 /*
4767 * we must recheck to verify if state has changed
4768 */
4769 if (ctx->ctx_state != old_state) {
4770 DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state));
4771 goto recheck;
4772 }
4773 }
4774 return 0;
4775}
4776
4777/*
4778 * system-call entry point (must return long)
4779 */
4780asmlinkage long
4781sys_perfmonctl (int fd, int cmd, void __user *arg, int count)
4782{
4783 struct file *file = NULL;
4784 pfm_context_t *ctx = NULL;
4785 unsigned long flags = 0UL;
4786 void *args_k = NULL;
4787 long ret; /* will expand int return types */
4788 size_t base_sz, sz, xtra_sz = 0;
4789 int narg, completed_args = 0, call_made = 0, cmd_flags;
4790 int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs);
4791 int (*getsize)(void *arg, size_t *sz);
4792#define PFM_MAX_ARGSIZE 4096
4793
4794 /*
4795 * reject any call if perfmon was disabled at initialization
4796 */
4797 if (unlikely(pmu_conf == NULL)) return -ENOSYS;
4798
4799 if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) {
4800 DPRINT(("invalid cmd=%d\n", cmd));
4801 return -EINVAL;
4802 }
4803
4804 func = pfm_cmd_tab[cmd].cmd_func;
4805 narg = pfm_cmd_tab[cmd].cmd_narg;
4806 base_sz = pfm_cmd_tab[cmd].cmd_argsize;
4807 getsize = pfm_cmd_tab[cmd].cmd_getsize;
4808 cmd_flags = pfm_cmd_tab[cmd].cmd_flags;
4809
4810 if (unlikely(func == NULL)) {
4811 DPRINT(("invalid cmd=%d\n", cmd));
4812 return -EINVAL;
4813 }
4814
4815 DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n",
4816 PFM_CMD_NAME(cmd),
4817 cmd,
4818 narg,
4819 base_sz,
4820 count));
4821
4822 /*
4823 * check if number of arguments matches what the command expects
4824 */
4825 if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count)))
4826 return -EINVAL;
4827
4828restart_args:
4829 sz = xtra_sz + base_sz*count;
4830 /*
4831 * limit abuse to min page size
4832 */
4833 if (unlikely(sz > PFM_MAX_ARGSIZE)) {
4834 printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz);
4835 return -E2BIG;
4836 }
4837
4838 /*
4839 * allocate default-sized argument buffer
4840 */
4841 if (likely(count && args_k == NULL)) {
4842 args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL);
4843 if (args_k == NULL) return -ENOMEM;
4844 }
4845
4846 ret = -EFAULT;
4847
4848 /*
4849 * copy arguments
4850 *
4851 * assume sz = 0 for command without parameters
4852 */
4853 if (sz && copy_from_user(args_k, arg, sz)) {
4854 DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg));
4855 goto error_args;
4856 }
4857
4858 /*
4859 * check if command supports extra parameters
4860 */
4861 if (completed_args == 0 && getsize) {
4862 /*
4863 * get extra parameters size (based on main argument)
4864 */
4865 ret = (*getsize)(args_k, &xtra_sz);
4866 if (ret) goto error_args;
4867
4868 completed_args = 1;
4869
4870 DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz));
4871
4872 /* retry if necessary */
4873 if (likely(xtra_sz)) goto restart_args;
4874 }
4875
4876 if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd;
4877
4878 ret = -EBADF;
4879
4880 file = fget(fd);
4881 if (unlikely(file == NULL)) {
4882 DPRINT(("invalid fd %d\n", fd));
4883 goto error_args;
4884 }
4885 if (unlikely(PFM_IS_FILE(file) == 0)) {
4886 DPRINT(("fd %d not related to perfmon\n", fd));
4887 goto error_args;
4888 }
4889
4890 ctx = (pfm_context_t *)file->private_data;
4891 if (unlikely(ctx == NULL)) {
4892 DPRINT(("no context for fd %d\n", fd));
4893 goto error_args;
4894 }
4895 prefetch(&ctx->ctx_state);
4896
4897 PROTECT_CTX(ctx, flags);
4898
4899 /*
4900 * check task is stopped
4901 */
4902 ret = pfm_check_task_state(ctx, cmd, flags);
4903 if (unlikely(ret)) goto abort_locked;
4904
4905skip_fd:
4906 ret = (*func)(ctx, args_k, count, ia64_task_regs(current));
4907
4908 call_made = 1;
4909
4910abort_locked:
4911 if (likely(ctx)) {
4912 DPRINT(("context unlocked\n"));
4913 UNPROTECT_CTX(ctx, flags);
4914 fput(file);
4915 }
4916
4917 /* copy argument back to user, if needed */
4918 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT;
4919
4920error_args:
4921 if (args_k) kfree(args_k);
4922
4923 DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret));
4924
4925 return ret;
4926}
4927
4928static void
4929pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs)
4930{
4931 pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt;
4932 pfm_ovfl_ctrl_t rst_ctrl;
4933 int state;
4934 int ret = 0;
4935
4936 state = ctx->ctx_state;
4937 /*
4938 * Unlock sampling buffer and reset index atomically
4939 * XXX: not really needed when blocking
4940 */
4941 if (CTX_HAS_SMPL(ctx)) {
4942
4943 rst_ctrl.bits.mask_monitoring = 0;
4944 rst_ctrl.bits.reset_ovfl_pmds = 0;
4945
4946 if (state == PFM_CTX_LOADED)
4947 ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
4948 else
4949 ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs);
4950 } else {
4951 rst_ctrl.bits.mask_monitoring = 0;
4952 rst_ctrl.bits.reset_ovfl_pmds = 1;
4953 }
4954
4955 if (ret == 0) {
4956 if (rst_ctrl.bits.reset_ovfl_pmds) {
4957 pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET);
4958 }
4959 if (rst_ctrl.bits.mask_monitoring == 0) {
4960 DPRINT(("resuming monitoring\n"));
4961 if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current);
4962 } else {
4963 DPRINT(("stopping monitoring\n"));
4964 //pfm_stop_monitoring(current, regs);
4965 }
4966 ctx->ctx_state = PFM_CTX_LOADED;
4967 }
4968}
4969
4970/*
4971 * context MUST BE LOCKED when calling
4972 * can only be called for current
4973 */
4974static void
4975pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs)
4976{
4977 int ret;
4978
4979 DPRINT(("entering for [%d]\n", current->pid));
4980
4981 ret = pfm_context_unload(ctx, NULL, 0, regs);
4982 if (ret) {
4983 printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret);
4984 }
4985
4986 /*
4987 * and wakeup controlling task, indicating we are now disconnected
4988 */
4989 wake_up_interruptible(&ctx->ctx_zombieq);
4990
4991 /*
4992 * given that context is still locked, the controlling
4993 * task will only get access when we return from
4994 * pfm_handle_work().
4995 */
4996}
4997
4998static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds);
4999
5000void
5001pfm_handle_work(void)
5002{
5003 pfm_context_t *ctx;
5004 struct pt_regs *regs;
5005 unsigned long flags;
5006 unsigned long ovfl_regs;
5007 unsigned int reason;
5008 int ret;
5009
5010 ctx = PFM_GET_CTX(current);
5011 if (ctx == NULL) {
5012 printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid);
5013 return;
5014 }
5015
5016 PROTECT_CTX(ctx, flags);
5017
5018 PFM_SET_WORK_PENDING(current, 0);
5019
5020 pfm_clear_task_notify();
5021
5022 regs = ia64_task_regs(current);
5023
5024 /*
5025 * extract reason for being here and clear
5026 */
5027 reason = ctx->ctx_fl_trap_reason;
5028 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE;
5029 ovfl_regs = ctx->ctx_ovfl_regs[0];
5030
5031 DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state));
5032
5033 /*
5034 * must be done before we check for simple-reset mode
5035 */
5036 if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie;
5037
5038
5039 //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking;
5040 if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking;
5041
5042 UNPROTECT_CTX(ctx, flags);
5043
5044 /*
5045 * pfm_handle_work() is currently called with interrupts disabled.
5046 * The down_interruptible call may sleep, therefore we
5047 * must re-enable interrupts to avoid deadlocks. It is
5048 * safe to do so because this function is called ONLY
5049 * when returning to user level (PUStk=1), in which case
5050 * there is no risk of kernel stack overflow due to deep
5051 * interrupt nesting.
5052 */
5053 BUG_ON(flags & IA64_PSR_I);
5054 local_irq_enable();
5055
5056 DPRINT(("before block sleeping\n"));
5057
5058 /*
5059 * may go through without blocking on SMP systems
5060 * if restart has been received already by the time we call down()
5061 */
5062 ret = down_interruptible(&ctx->ctx_restart_sem);
5063
5064 DPRINT(("after block sleeping ret=%d\n", ret));
5065
5066 /*
5067 * disable interrupts to restore state we had upon entering
5068 * this function
5069 */
5070 local_irq_disable();
5071
5072 PROTECT_CTX(ctx, flags);
5073
5074 /*
5075 * we need to read the ovfl_regs only after wake-up
5076 * because we may have had pfm_write_pmds() in between
5077 * and that can changed PMD values and therefore
5078 * ovfl_regs is reset for these new PMD values.
5079 */
5080 ovfl_regs = ctx->ctx_ovfl_regs[0];
5081
5082 if (ctx->ctx_fl_going_zombie) {
5083do_zombie:
5084 DPRINT(("context is zombie, bailing out\n"));
5085 pfm_context_force_terminate(ctx, regs);
5086 goto nothing_to_do;
5087 }
5088 /*
5089 * in case of interruption of down() we don't restart anything
5090 */
5091 if (ret < 0) goto nothing_to_do;
5092
5093skip_blocking:
5094 pfm_resume_after_ovfl(ctx, ovfl_regs, regs);
5095 ctx->ctx_ovfl_regs[0] = 0UL;
5096
5097nothing_to_do:
5098
5099 UNPROTECT_CTX(ctx, flags);
5100}
5101
5102static int
5103pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg)
5104{
5105 if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
5106 DPRINT(("ignoring overflow notification, owner is zombie\n"));
5107 return 0;
5108 }
5109
5110 DPRINT(("waking up somebody\n"));
5111
5112 if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait);
5113
5114 /*
5115 * safe, we are not in intr handler, nor in ctxsw when
5116 * we come here
5117 */
5118 kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN);
5119
5120 return 0;
5121}
5122
5123static int
5124pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds)
5125{
5126 pfm_msg_t *msg = NULL;
5127
5128 if (ctx->ctx_fl_no_msg == 0) {
5129 msg = pfm_get_new_msg(ctx);
5130 if (msg == NULL) {
5131 printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n");
5132 return -1;
5133 }
5134
5135 msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL;
5136 msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd;
5137 msg->pfm_ovfl_msg.msg_active_set = 0;
5138 msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds;
5139 msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL;
5140 msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL;
5141 msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL;
5142 msg->pfm_ovfl_msg.msg_tstamp = 0UL;
5143 }
5144
5145 DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n",
5146 msg,
5147 ctx->ctx_fl_no_msg,
5148 ctx->ctx_fd,
5149 ovfl_pmds));
5150
5151 return pfm_notify_user(ctx, msg);
5152}
5153
5154static int
5155pfm_end_notify_user(pfm_context_t *ctx)
5156{
5157 pfm_msg_t *msg;
5158
5159 msg = pfm_get_new_msg(ctx);
5160 if (msg == NULL) {
5161 printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n");
5162 return -1;
5163 }
5164 /* no leak */
5165 memset(msg, 0, sizeof(*msg));
5166
5167 msg->pfm_end_msg.msg_type = PFM_MSG_END;
5168 msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd;
5169 msg->pfm_ovfl_msg.msg_tstamp = 0UL;
5170
5171 DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n",
5172 msg,
5173 ctx->ctx_fl_no_msg,
5174 ctx->ctx_fd));
5175
5176 return pfm_notify_user(ctx, msg);
5177}
5178
5179/*
5180 * main overflow processing routine.
5181 * it can be called from the interrupt path or explicitely during the context switch code
5182 */
5183static void
5184pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs)
5185{
5186 pfm_ovfl_arg_t *ovfl_arg;
5187 unsigned long mask;
5188 unsigned long old_val, ovfl_val, new_val;
5189 unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds;
5190 unsigned long tstamp;
5191 pfm_ovfl_ctrl_t ovfl_ctrl;
5192 unsigned int i, has_smpl;
5193 int must_notify = 0;
5194
5195 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring;
5196
5197 /*
5198 * sanity test. Should never happen
5199 */
5200 if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check;
5201
5202 tstamp = ia64_get_itc();
5203 mask = pmc0 >> PMU_FIRST_COUNTER;
5204 ovfl_val = pmu_conf->ovfl_val;
5205 has_smpl = CTX_HAS_SMPL(ctx);
5206
5207 DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s "
5208 "used_pmds=0x%lx\n",
5209 pmc0,
5210 task ? task->pid: -1,
5211 (regs ? regs->cr_iip : 0),
5212 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking",
5213 ctx->ctx_used_pmds[0]));
5214
5215
5216 /*
5217 * first we update the virtual counters
5218 * assume there was a prior ia64_srlz_d() issued
5219 */
5220 for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) {
5221
5222 /* skip pmd which did not overflow */
5223 if ((mask & 0x1) == 0) continue;
5224
5225 /*
5226 * Note that the pmd is not necessarily 0 at this point as qualified events
5227 * may have happened before the PMU was frozen. The residual count is not
5228 * taken into consideration here but will be with any read of the pmd via
5229 * pfm_read_pmds().
5230 */
5231 old_val = new_val = ctx->ctx_pmds[i].val;
5232 new_val += 1 + ovfl_val;
5233 ctx->ctx_pmds[i].val = new_val;
5234
5235 /*
5236 * check for overflow condition
5237 */
5238 if (likely(old_val > new_val)) {
5239 ovfl_pmds |= 1UL << i;
5240 if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i;
5241 }
5242
5243 DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n",
5244 i,
5245 new_val,
5246 old_val,
5247 ia64_get_pmd(i) & ovfl_val,
5248 ovfl_pmds,
5249 ovfl_notify));
5250 }
5251
5252 /*
5253 * there was no 64-bit overflow, nothing else to do
5254 */
5255 if (ovfl_pmds == 0UL) return;
5256
5257 /*
5258 * reset all control bits
5259 */
5260 ovfl_ctrl.val = 0;
5261 reset_pmds = 0UL;
5262
5263 /*
5264 * if a sampling format module exists, then we "cache" the overflow by
5265 * calling the module's handler() routine.
5266 */
5267 if (has_smpl) {
5268 unsigned long start_cycles, end_cycles;
5269 unsigned long pmd_mask;
5270 int j, k, ret = 0;
5271 int this_cpu = smp_processor_id();
5272
5273 pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER;
5274 ovfl_arg = &ctx->ctx_ovfl_arg;
5275
5276 prefetch(ctx->ctx_smpl_hdr);
5277
5278 for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) {
5279
5280 mask = 1UL << i;
5281
5282 if ((pmd_mask & 0x1) == 0) continue;
5283
5284 ovfl_arg->ovfl_pmd = (unsigned char )i;
5285 ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0;
5286 ovfl_arg->active_set = 0;
5287 ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */
5288 ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0];
5289
5290 ovfl_arg->pmd_value = ctx->ctx_pmds[i].val;
5291 ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval;
5292 ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid;
5293
5294 /*
5295 * copy values of pmds of interest. Sampling format may copy them
5296 * into sampling buffer.
5297 */
5298 if (smpl_pmds) {
5299 for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) {
5300 if ((smpl_pmds & 0x1) == 0) continue;
5301 ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j);
5302 DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1]));
5303 }
5304 }
5305
5306 pfm_stats[this_cpu].pfm_smpl_handler_calls++;
5307
5308 start_cycles = ia64_get_itc();
5309
5310 /*
5311 * call custom buffer format record (handler) routine
5312 */
5313 ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp);
5314
5315 end_cycles = ia64_get_itc();
5316
5317 /*
5318 * For those controls, we take the union because they have
5319 * an all or nothing behavior.
5320 */
5321 ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user;
5322 ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task;
5323 ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring;
5324 /*
5325 * build the bitmask of pmds to reset now
5326 */
5327 if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask;
5328
5329 pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles;
5330 }
5331 /*
5332 * when the module cannot handle the rest of the overflows, we abort right here
5333 */
5334 if (ret && pmd_mask) {
5335 DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n",
5336 pmd_mask<<PMU_FIRST_COUNTER));
5337 }
5338 /*
5339 * remove the pmds we reset now from the set of pmds to reset in pfm_restart()
5340 */
5341 ovfl_pmds &= ~reset_pmds;
5342 } else {
5343 /*
5344 * when no sampling module is used, then the default
5345 * is to notify on overflow if requested by user
5346 */
5347 ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0;
5348 ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0;
5349 ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */
5350 ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1;
5351 /*
5352 * if needed, we reset all overflowed pmds
5353 */
5354 if (ovfl_notify == 0) reset_pmds = ovfl_pmds;
5355 }
5356
5357 DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds));
5358
5359 /*
5360 * reset the requested PMD registers using the short reset values
5361 */
5362 if (reset_pmds) {
5363 unsigned long bm = reset_pmds;
5364 pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET);
5365 }
5366
5367 if (ovfl_notify && ovfl_ctrl.bits.notify_user) {
5368 /*
5369 * keep track of what to reset when unblocking
5370 */
5371 ctx->ctx_ovfl_regs[0] = ovfl_pmds;
5372
5373 /*
5374 * check for blocking context
5375 */
5376 if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) {
5377
5378 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK;
5379
5380 /*
5381 * set the perfmon specific checking pending work for the task
5382 */
5383 PFM_SET_WORK_PENDING(task, 1);
5384
5385 /*
5386 * when coming from ctxsw, current still points to the
5387 * previous task, therefore we must work with task and not current.
5388 */
5389 pfm_set_task_notify(task);
5390 }
5391 /*
5392 * defer until state is changed (shorten spin window). the context is locked
5393 * anyway, so the signal receiver would come spin for nothing.
5394 */
5395 must_notify = 1;
5396 }
5397
5398 DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n",
5399 GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1,
5400 PFM_GET_WORK_PENDING(task),
5401 ctx->ctx_fl_trap_reason,
5402 ovfl_pmds,
5403 ovfl_notify,
5404 ovfl_ctrl.bits.mask_monitoring ? 1 : 0));
5405 /*
5406 * in case monitoring must be stopped, we toggle the psr bits
5407 */
5408 if (ovfl_ctrl.bits.mask_monitoring) {
5409 pfm_mask_monitoring(task);
5410 ctx->ctx_state = PFM_CTX_MASKED;
5411 ctx->ctx_fl_can_restart = 1;
5412 }
5413
5414 /*
5415 * send notification now
5416 */
5417 if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify);
5418
5419 return;
5420
5421sanity_check:
5422 printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n",
5423 smp_processor_id(),
5424 task ? task->pid : -1,
5425 pmc0);
5426 return;
5427
5428stop_monitoring:
5429 /*
5430 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs().
5431 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can
5432 * come here as zombie only if the task is the current task. In which case, we
5433 * can access the PMU hardware directly.
5434 *
5435 * Note that zombies do have PM_VALID set. So here we do the minimal.
5436 *
5437 * In case the context was zombified it could not be reclaimed at the time
5438 * the monitoring program exited. At this point, the PMU reservation has been
5439 * returned, the sampiing buffer has been freed. We must convert this call
5440 * into a spurious interrupt. However, we must also avoid infinite overflows
5441 * by stopping monitoring for this task. We can only come here for a per-task
5442 * context. All we need to do is to stop monitoring using the psr bits which
5443 * are always task private. By re-enabling secure montioring, we ensure that
5444 * the monitored task will not be able to re-activate monitoring.
5445 * The task will eventually be context switched out, at which point the context
5446 * will be reclaimed (that includes releasing ownership of the PMU).
5447 *
5448 * So there might be a window of time where the number of per-task session is zero
5449 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie
5450 * context. This is safe because if a per-task session comes in, it will push this one
5451 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide
5452 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will
5453 * also push our zombie context out.
5454 *
5455 * Overall pretty hairy stuff....
5456 */
5457 DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1));
5458 pfm_clear_psr_up();
5459 ia64_psr(regs)->up = 0;
5460 ia64_psr(regs)->sp = 1;
5461 return;
5462}
5463
5464static int
5465pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
5466{
5467 struct task_struct *task;
5468 pfm_context_t *ctx;
5469 unsigned long flags;
5470 u64 pmc0;
5471 int this_cpu = smp_processor_id();
5472 int retval = 0;
5473
5474 pfm_stats[this_cpu].pfm_ovfl_intr_count++;
5475
5476 /*
5477 * srlz.d done before arriving here
5478 */
5479 pmc0 = ia64_get_pmc(0);
5480
5481 task = GET_PMU_OWNER();
5482 ctx = GET_PMU_CTX();
5483
5484 /*
5485 * if we have some pending bits set
5486 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1
5487 */
5488 if (PMC0_HAS_OVFL(pmc0) && task) {
5489 /*
5490 * we assume that pmc0.fr is always set here
5491 */
5492
5493 /* sanity check */
5494 if (!ctx) goto report_spurious1;
5495
5496 if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0)
5497 goto report_spurious2;
5498
5499 PROTECT_CTX_NOPRINT(ctx, flags);
5500
5501 pfm_overflow_handler(task, ctx, pmc0, regs);
5502
5503 UNPROTECT_CTX_NOPRINT(ctx, flags);
5504
5505 } else {
5506 pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++;
5507 retval = -1;
5508 }
5509 /*
5510 * keep it unfrozen at all times
5511 */
5512 pfm_unfreeze_pmu();
5513
5514 return retval;
5515
5516report_spurious1:
5517 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n",
5518 this_cpu, task->pid);
5519 pfm_unfreeze_pmu();
5520 return -1;
5521report_spurious2:
5522 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n",
5523 this_cpu,
5524 task->pid);
5525 pfm_unfreeze_pmu();
5526 return -1;
5527}
5528
5529static irqreturn_t
5530pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs)
5531{
5532 unsigned long start_cycles, total_cycles;
5533 unsigned long min, max;
5534 int this_cpu;
5535 int ret;
5536
5537 this_cpu = get_cpu();
5538 min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min;
5539 max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max;
5540
5541 start_cycles = ia64_get_itc();
5542
5543 ret = pfm_do_interrupt_handler(irq, arg, regs);
5544
5545 total_cycles = ia64_get_itc();
5546
5547 /*
5548 * don't measure spurious interrupts
5549 */
5550 if (likely(ret == 0)) {
5551 total_cycles -= start_cycles;
5552
5553 if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles;
5554 if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles;
5555
5556 pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles;
5557 }
5558 put_cpu_no_resched();
5559 return IRQ_HANDLED;
5560}
5561
5562/*
5563 * /proc/perfmon interface, for debug only
5564 */
5565
5566#define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1)
5567
5568static void *
5569pfm_proc_start(struct seq_file *m, loff_t *pos)
5570{
5571 if (*pos == 0) {
5572 return PFM_PROC_SHOW_HEADER;
5573 }
5574
5575 while (*pos <= NR_CPUS) {
5576 if (cpu_online(*pos - 1)) {
5577 return (void *)*pos;
5578 }
5579 ++*pos;
5580 }
5581 return NULL;
5582}
5583
5584static void *
5585pfm_proc_next(struct seq_file *m, void *v, loff_t *pos)
5586{
5587 ++*pos;
5588 return pfm_proc_start(m, pos);
5589}
5590
5591static void
5592pfm_proc_stop(struct seq_file *m, void *v)
5593{
5594}
5595
5596static void
5597pfm_proc_show_header(struct seq_file *m)
5598{
5599 struct list_head * pos;
5600 pfm_buffer_fmt_t * entry;
5601 unsigned long flags;
5602
5603 seq_printf(m,
5604 "perfmon version : %u.%u\n"
5605 "model : %s\n"
5606 "fastctxsw : %s\n"
5607 "expert mode : %s\n"
5608 "ovfl_mask : 0x%lx\n"
5609 "PMU flags : 0x%x\n",
5610 PFM_VERSION_MAJ, PFM_VERSION_MIN,
5611 pmu_conf->pmu_name,
5612 pfm_sysctl.fastctxsw > 0 ? "Yes": "No",
5613 pfm_sysctl.expert_mode > 0 ? "Yes": "No",
5614 pmu_conf->ovfl_val,
5615 pmu_conf->flags);
5616
5617 LOCK_PFS(flags);
5618
5619 seq_printf(m,
5620 "proc_sessions : %u\n"
5621 "sys_sessions : %u\n"
5622 "sys_use_dbregs : %u\n"
5623 "ptrace_use_dbregs : %u\n",
5624 pfm_sessions.pfs_task_sessions,
5625 pfm_sessions.pfs_sys_sessions,
5626 pfm_sessions.pfs_sys_use_dbregs,
5627 pfm_sessions.pfs_ptrace_use_dbregs);
5628
5629 UNLOCK_PFS(flags);
5630
5631 spin_lock(&pfm_buffer_fmt_lock);
5632
5633 list_for_each(pos, &pfm_buffer_fmt_list) {
5634 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list);
5635 seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n",
5636 entry->fmt_uuid[0],
5637 entry->fmt_uuid[1],
5638 entry->fmt_uuid[2],
5639 entry->fmt_uuid[3],
5640 entry->fmt_uuid[4],
5641 entry->fmt_uuid[5],
5642 entry->fmt_uuid[6],
5643 entry->fmt_uuid[7],
5644 entry->fmt_uuid[8],
5645 entry->fmt_uuid[9],
5646 entry->fmt_uuid[10],
5647 entry->fmt_uuid[11],
5648 entry->fmt_uuid[12],
5649 entry->fmt_uuid[13],
5650 entry->fmt_uuid[14],
5651 entry->fmt_uuid[15],
5652 entry->fmt_name);
5653 }
5654 spin_unlock(&pfm_buffer_fmt_lock);
5655
5656}
5657
5658static int
5659pfm_proc_show(struct seq_file *m, void *v)
5660{
5661 unsigned long psr;
5662 unsigned int i;
5663 int cpu;
5664
5665 if (v == PFM_PROC_SHOW_HEADER) {
5666 pfm_proc_show_header(m);
5667 return 0;
5668 }
5669
5670 /* show info for CPU (v - 1) */
5671
5672 cpu = (long)v - 1;
5673 seq_printf(m,
5674 "CPU%-2d overflow intrs : %lu\n"
5675 "CPU%-2d overflow cycles : %lu\n"
5676 "CPU%-2d overflow min : %lu\n"
5677 "CPU%-2d overflow max : %lu\n"
5678 "CPU%-2d smpl handler calls : %lu\n"
5679 "CPU%-2d smpl handler cycles : %lu\n"
5680 "CPU%-2d spurious intrs : %lu\n"
5681 "CPU%-2d replay intrs : %lu\n"
5682 "CPU%-2d syst_wide : %d\n"
5683 "CPU%-2d dcr_pp : %d\n"
5684 "CPU%-2d exclude idle : %d\n"
5685 "CPU%-2d owner : %d\n"
5686 "CPU%-2d context : %p\n"
5687 "CPU%-2d activations : %lu\n",
5688 cpu, pfm_stats[cpu].pfm_ovfl_intr_count,
5689 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles,
5690 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min,
5691 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max,
5692 cpu, pfm_stats[cpu].pfm_smpl_handler_calls,
5693 cpu, pfm_stats[cpu].pfm_smpl_handler_cycles,
5694 cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count,
5695 cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count,
5696 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0,
5697 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0,
5698 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0,
5699 cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1,
5700 cpu, pfm_get_cpu_data(pmu_ctx, cpu),
5701 cpu, pfm_get_cpu_data(pmu_activation_number, cpu));
5702
5703 if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) {
5704
5705 psr = pfm_get_psr();
5706
5707 ia64_srlz_d();
5708
5709 seq_printf(m,
5710 "CPU%-2d psr : 0x%lx\n"
5711 "CPU%-2d pmc0 : 0x%lx\n",
5712 cpu, psr,
5713 cpu, ia64_get_pmc(0));
5714
5715 for (i=0; PMC_IS_LAST(i) == 0; i++) {
5716 if (PMC_IS_COUNTING(i) == 0) continue;
5717 seq_printf(m,
5718 "CPU%-2d pmc%u : 0x%lx\n"
5719 "CPU%-2d pmd%u : 0x%lx\n",
5720 cpu, i, ia64_get_pmc(i),
5721 cpu, i, ia64_get_pmd(i));
5722 }
5723 }
5724 return 0;
5725}
5726
5727struct seq_operations pfm_seq_ops = {
5728 .start = pfm_proc_start,
5729 .next = pfm_proc_next,
5730 .stop = pfm_proc_stop,
5731 .show = pfm_proc_show
5732};
5733
5734static int
5735pfm_proc_open(struct inode *inode, struct file *file)
5736{
5737 return seq_open(file, &pfm_seq_ops);
5738}
5739
5740
5741/*
5742 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens
5743 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring
5744 * is active or inactive based on mode. We must rely on the value in
5745 * local_cpu_data->pfm_syst_info
5746 */
5747void
5748pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin)
5749{
5750 struct pt_regs *regs;
5751 unsigned long dcr;
5752 unsigned long dcr_pp;
5753
5754 dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0;
5755
5756 /*
5757 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0
5758 * on every CPU, so we can rely on the pid to identify the idle task.
5759 */
5760 if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) {
5761 regs = ia64_task_regs(task);
5762 ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0;
5763 return;
5764 }
5765 /*
5766 * if monitoring has started
5767 */
5768 if (dcr_pp) {
5769 dcr = ia64_getreg(_IA64_REG_CR_DCR);
5770 /*
5771 * context switching in?
5772 */
5773 if (is_ctxswin) {
5774 /* mask monitoring for the idle task */
5775 ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP);
5776 pfm_clear_psr_pp();
5777 ia64_srlz_i();
5778 return;
5779 }
5780 /*
5781 * context switching out
5782 * restore monitoring for next task
5783 *
5784 * Due to inlining this odd if-then-else construction generates
5785 * better code.
5786 */
5787 ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP);
5788 pfm_set_psr_pp();
5789 ia64_srlz_i();
5790 }
5791}
5792
5793#ifdef CONFIG_SMP
5794
5795static void
5796pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs)
5797{
5798 struct task_struct *task = ctx->ctx_task;
5799
5800 ia64_psr(regs)->up = 0;
5801 ia64_psr(regs)->sp = 1;
5802
5803 if (GET_PMU_OWNER() == task) {
5804 DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid));
5805 SET_PMU_OWNER(NULL, NULL);
5806 }
5807
5808 /*
5809 * disconnect the task from the context and vice-versa
5810 */
5811 PFM_SET_WORK_PENDING(task, 0);
5812
5813 task->thread.pfm_context = NULL;
5814 task->thread.flags &= ~IA64_THREAD_PM_VALID;
5815
5816 DPRINT(("force cleanup for [%d]\n", task->pid));
5817}
5818
5819
5820/*
5821 * in 2.6, interrupts are masked when we come here and the runqueue lock is held
5822 */
5823void
5824pfm_save_regs(struct task_struct *task)
5825{
5826 pfm_context_t *ctx;
5827 struct thread_struct *t;
5828 unsigned long flags;
5829 u64 psr;
5830
5831
5832 ctx = PFM_GET_CTX(task);
5833 if (ctx == NULL) return;
5834 t = &task->thread;
5835
5836 /*
5837 * we always come here with interrupts ALREADY disabled by
5838 * the scheduler. So we simply need to protect against concurrent
5839 * access, not CPU concurrency.
5840 */
5841 flags = pfm_protect_ctx_ctxsw(ctx);
5842
5843 if (ctx->ctx_state == PFM_CTX_ZOMBIE) {
5844 struct pt_regs *regs = ia64_task_regs(task);
5845
5846 pfm_clear_psr_up();
5847
5848 pfm_force_cleanup(ctx, regs);
5849
5850 BUG_ON(ctx->ctx_smpl_hdr);
5851
5852 pfm_unprotect_ctx_ctxsw(ctx, flags);
5853
5854 pfm_context_free(ctx);
5855 return;
5856 }
5857
5858 /*
5859 * save current PSR: needed because we modify it
5860 */
5861 ia64_srlz_d();
5862 psr = pfm_get_psr();
5863
5864 BUG_ON(psr & (IA64_PSR_I));
5865
5866 /*
5867 * stop monitoring:
5868 * This is the last instruction which may generate an overflow
5869 *
5870 * We do not need to set psr.sp because, it is irrelevant in kernel.
5871 * It will be restored from ipsr when going back to user level
5872 */
5873 pfm_clear_psr_up();
5874
5875 /*
5876 * keep a copy of psr.up (for reload)
5877 */
5878 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
5879
5880 /*
5881 * release ownership of this PMU.
5882 * PM interrupts are masked, so nothing
5883 * can happen.
5884 */
5885 SET_PMU_OWNER(NULL, NULL);
5886
5887 /*
5888 * we systematically save the PMD as we have no
5889 * guarantee we will be schedule at that same
5890 * CPU again.
5891 */
5892 pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
5893
5894 /*
5895 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
5896 * we will need it on the restore path to check
5897 * for pending overflow.
5898 */
5899 t->pmcs[0] = ia64_get_pmc(0);
5900
5901 /*
5902 * unfreeze PMU if had pending overflows
5903 */
5904 if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
5905
5906 /*
5907 * finally, allow context access.
5908 * interrupts will still be masked after this call.
5909 */
5910 pfm_unprotect_ctx_ctxsw(ctx, flags);
5911}
5912
5913#else /* !CONFIG_SMP */
5914void
5915pfm_save_regs(struct task_struct *task)
5916{
5917 pfm_context_t *ctx;
5918 u64 psr;
5919
5920 ctx = PFM_GET_CTX(task);
5921 if (ctx == NULL) return;
5922
5923 /*
5924 * save current PSR: needed because we modify it
5925 */
5926 psr = pfm_get_psr();
5927
5928 BUG_ON(psr & (IA64_PSR_I));
5929
5930 /*
5931 * stop monitoring:
5932 * This is the last instruction which may generate an overflow
5933 *
5934 * We do not need to set psr.sp because, it is irrelevant in kernel.
5935 * It will be restored from ipsr when going back to user level
5936 */
5937 pfm_clear_psr_up();
5938
5939 /*
5940 * keep a copy of psr.up (for reload)
5941 */
5942 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP;
5943}
5944
5945static void
5946pfm_lazy_save_regs (struct task_struct *task)
5947{
5948 pfm_context_t *ctx;
5949 struct thread_struct *t;
5950 unsigned long flags;
5951
5952 { u64 psr = pfm_get_psr();
5953 BUG_ON(psr & IA64_PSR_UP);
5954 }
5955
5956 ctx = PFM_GET_CTX(task);
5957 t = &task->thread;
5958
5959 /*
5960 * we need to mask PMU overflow here to
5961 * make sure that we maintain pmc0 until
5962 * we save it. overflow interrupts are
5963 * treated as spurious if there is no
5964 * owner.
5965 *
5966 * XXX: I don't think this is necessary
5967 */
5968 PROTECT_CTX(ctx,flags);
5969
5970 /*
5971 * release ownership of this PMU.
5972 * must be done before we save the registers.
5973 *
5974 * after this call any PMU interrupt is treated
5975 * as spurious.
5976 */
5977 SET_PMU_OWNER(NULL, NULL);
5978
5979 /*
5980 * save all the pmds we use
5981 */
5982 pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]);
5983
5984 /*
5985 * save pmc0 ia64_srlz_d() done in pfm_save_pmds()
5986 * it is needed to check for pended overflow
5987 * on the restore path
5988 */
5989 t->pmcs[0] = ia64_get_pmc(0);
5990
5991 /*
5992 * unfreeze PMU if had pending overflows
5993 */
5994 if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu();
5995
5996 /*
5997 * now get can unmask PMU interrupts, they will
5998 * be treated as purely spurious and we will not
5999 * lose any information
6000 */
6001 UNPROTECT_CTX(ctx,flags);
6002}
6003#endif /* CONFIG_SMP */
6004
6005#ifdef CONFIG_SMP
6006/*
6007 * in 2.6, interrupts are masked when we come here and the runqueue lock is held
6008 */
6009void
6010pfm_load_regs (struct task_struct *task)
6011{
6012 pfm_context_t *ctx;
6013 struct thread_struct *t;
6014 unsigned long pmc_mask = 0UL, pmd_mask = 0UL;
6015 unsigned long flags;
6016 u64 psr, psr_up;
6017 int need_irq_resend;
6018
6019 ctx = PFM_GET_CTX(task);
6020 if (unlikely(ctx == NULL)) return;
6021
6022 BUG_ON(GET_PMU_OWNER());
6023
6024 t = &task->thread;
6025 /*
6026 * possible on unload
6027 */
6028 if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return;
6029
6030 /*
6031 * we always come here with interrupts ALREADY disabled by
6032 * the scheduler. So we simply need to protect against concurrent
6033 * access, not CPU concurrency.
6034 */
6035 flags = pfm_protect_ctx_ctxsw(ctx);
6036 psr = pfm_get_psr();
6037
6038 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
6039
6040 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
6041 BUG_ON(psr & IA64_PSR_I);
6042
6043 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) {
6044 struct pt_regs *regs = ia64_task_regs(task);
6045
6046 BUG_ON(ctx->ctx_smpl_hdr);
6047
6048 pfm_force_cleanup(ctx, regs);
6049
6050 pfm_unprotect_ctx_ctxsw(ctx, flags);
6051
6052 /*
6053 * this one (kmalloc'ed) is fine with interrupts disabled
6054 */
6055 pfm_context_free(ctx);
6056
6057 return;
6058 }
6059
6060 /*
6061 * we restore ALL the debug registers to avoid picking up
6062 * stale state.
6063 */
6064 if (ctx->ctx_fl_using_dbreg) {
6065 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
6066 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
6067 }
6068 /*
6069 * retrieve saved psr.up
6070 */
6071 psr_up = ctx->ctx_saved_psr_up;
6072
6073 /*
6074 * if we were the last user of the PMU on that CPU,
6075 * then nothing to do except restore psr
6076 */
6077 if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) {
6078
6079 /*
6080 * retrieve partial reload masks (due to user modifications)
6081 */
6082 pmc_mask = ctx->ctx_reload_pmcs[0];
6083 pmd_mask = ctx->ctx_reload_pmds[0];
6084
6085 } else {
6086 /*
6087 * To avoid leaking information to the user level when psr.sp=0,
6088 * we must reload ALL implemented pmds (even the ones we don't use).
6089 * In the kernel we only allow PFM_READ_PMDS on registers which
6090 * we initialized or requested (sampling) so there is no risk there.
6091 */
6092 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
6093
6094 /*
6095 * ALL accessible PMCs are systematically reloaded, unused registers
6096 * get their default (from pfm_reset_pmu_state()) values to avoid picking
6097 * up stale configuration.
6098 *
6099 * PMC0 is never in the mask. It is always restored separately.
6100 */
6101 pmc_mask = ctx->ctx_all_pmcs[0];
6102 }
6103 /*
6104 * when context is MASKED, we will restore PMC with plm=0
6105 * and PMD with stale information, but that's ok, nothing
6106 * will be captured.
6107 *
6108 * XXX: optimize here
6109 */
6110 if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask);
6111 if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask);
6112
6113 /*
6114 * check for pending overflow at the time the state
6115 * was saved.
6116 */
6117 if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
6118 /*
6119 * reload pmc0 with the overflow information
6120 * On McKinley PMU, this will trigger a PMU interrupt
6121 */
6122 ia64_set_pmc(0, t->pmcs[0]);
6123 ia64_srlz_d();
6124 t->pmcs[0] = 0UL;
6125
6126 /*
6127 * will replay the PMU interrupt
6128 */
6129 if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
6130
6131 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
6132 }
6133
6134 /*
6135 * we just did a reload, so we reset the partial reload fields
6136 */
6137 ctx->ctx_reload_pmcs[0] = 0UL;
6138 ctx->ctx_reload_pmds[0] = 0UL;
6139
6140 SET_LAST_CPU(ctx, smp_processor_id());
6141
6142 /*
6143 * dump activation value for this PMU
6144 */
6145 INC_ACTIVATION();
6146 /*
6147 * record current activation for this context
6148 */
6149 SET_ACTIVATION(ctx);
6150
6151 /*
6152 * establish new ownership.
6153 */
6154 SET_PMU_OWNER(task, ctx);
6155
6156 /*
6157 * restore the psr.up bit. measurement
6158 * is active again.
6159 * no PMU interrupt can happen at this point
6160 * because we still have interrupts disabled.
6161 */
6162 if (likely(psr_up)) pfm_set_psr_up();
6163
6164 /*
6165 * allow concurrent access to context
6166 */
6167 pfm_unprotect_ctx_ctxsw(ctx, flags);
6168}
6169#else /* !CONFIG_SMP */
6170/*
6171 * reload PMU state for UP kernels
6172 * in 2.5 we come here with interrupts disabled
6173 */
6174void
6175pfm_load_regs (struct task_struct *task)
6176{
6177 struct thread_struct *t;
6178 pfm_context_t *ctx;
6179 struct task_struct *owner;
6180 unsigned long pmd_mask, pmc_mask;
6181 u64 psr, psr_up;
6182 int need_irq_resend;
6183
6184 owner = GET_PMU_OWNER();
6185 ctx = PFM_GET_CTX(task);
6186 t = &task->thread;
6187 psr = pfm_get_psr();
6188
6189 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP));
6190 BUG_ON(psr & IA64_PSR_I);
6191
6192 /*
6193 * we restore ALL the debug registers to avoid picking up
6194 * stale state.
6195 *
6196 * This must be done even when the task is still the owner
6197 * as the registers may have been modified via ptrace()
6198 * (not perfmon) by the previous task.
6199 */
6200 if (ctx->ctx_fl_using_dbreg) {
6201 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs);
6202 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs);
6203 }
6204
6205 /*
6206 * retrieved saved psr.up
6207 */
6208 psr_up = ctx->ctx_saved_psr_up;
6209 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND;
6210
6211 /*
6212 * short path, our state is still there, just
6213 * need to restore psr and we go
6214 *
6215 * we do not touch either PMC nor PMD. the psr is not touched
6216 * by the overflow_handler. So we are safe w.r.t. to interrupt
6217 * concurrency even without interrupt masking.
6218 */
6219 if (likely(owner == task)) {
6220 if (likely(psr_up)) pfm_set_psr_up();
6221 return;
6222 }
6223
6224 /*
6225 * someone else is still using the PMU, first push it out and
6226 * then we'll be able to install our stuff !
6227 *
6228 * Upon return, there will be no owner for the current PMU
6229 */
6230 if (owner) pfm_lazy_save_regs(owner);
6231
6232 /*
6233 * To avoid leaking information to the user level when psr.sp=0,
6234 * we must reload ALL implemented pmds (even the ones we don't use).
6235 * In the kernel we only allow PFM_READ_PMDS on registers which
6236 * we initialized or requested (sampling) so there is no risk there.
6237 */
6238 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0];
6239
6240 /*
6241 * ALL accessible PMCs are systematically reloaded, unused registers
6242 * get their default (from pfm_reset_pmu_state()) values to avoid picking
6243 * up stale configuration.
6244 *
6245 * PMC0 is never in the mask. It is always restored separately
6246 */
6247 pmc_mask = ctx->ctx_all_pmcs[0];
6248
6249 pfm_restore_pmds(t->pmds, pmd_mask);
6250 pfm_restore_pmcs(t->pmcs, pmc_mask);
6251
6252 /*
6253 * check for pending overflow at the time the state
6254 * was saved.
6255 */
6256 if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) {
6257 /*
6258 * reload pmc0 with the overflow information
6259 * On McKinley PMU, this will trigger a PMU interrupt
6260 */
6261 ia64_set_pmc(0, t->pmcs[0]);
6262 ia64_srlz_d();
6263
6264 t->pmcs[0] = 0UL;
6265
6266 /*
6267 * will replay the PMU interrupt
6268 */
6269 if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR);
6270
6271 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++;
6272 }
6273
6274 /*
6275 * establish new ownership.
6276 */
6277 SET_PMU_OWNER(task, ctx);
6278
6279 /*
6280 * restore the psr.up bit. measurement
6281 * is active again.
6282 * no PMU interrupt can happen at this point
6283 * because we still have interrupts disabled.
6284 */
6285 if (likely(psr_up)) pfm_set_psr_up();
6286}
6287#endif /* CONFIG_SMP */
6288
6289/*
6290 * this function assumes monitoring is stopped
6291 */
6292static void
6293pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx)
6294{
6295 u64 pmc0;
6296 unsigned long mask2, val, pmd_val, ovfl_val;
6297 int i, can_access_pmu = 0;
6298 int is_self;
6299
6300 /*
6301 * is the caller the task being monitored (or which initiated the
6302 * session for system wide measurements)
6303 */
6304 is_self = ctx->ctx_task == task ? 1 : 0;
6305
6306 /*
6307 * can access PMU is task is the owner of the PMU state on the current CPU
6308 * or if we are running on the CPU bound to the context in system-wide mode
6309 * (that is not necessarily the task the context is attached to in this mode).
6310 * In system-wide we always have can_access_pmu true because a task running on an
6311 * invalid processor is flagged earlier in the call stack (see pfm_stop).
6312 */
6313 can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id());
6314 if (can_access_pmu) {
6315 /*
6316 * Mark the PMU as not owned
6317 * This will cause the interrupt handler to do nothing in case an overflow
6318 * interrupt was in-flight
6319 * This also guarantees that pmc0 will contain the final state
6320 * It virtually gives us full control on overflow processing from that point
6321 * on.
6322 */
6323 SET_PMU_OWNER(NULL, NULL);
6324 DPRINT(("releasing ownership\n"));
6325
6326 /*
6327 * read current overflow status:
6328 *
6329 * we are guaranteed to read the final stable state
6330 */
6331 ia64_srlz_d();
6332 pmc0 = ia64_get_pmc(0); /* slow */
6333
6334 /*
6335 * reset freeze bit, overflow status information destroyed
6336 */
6337 pfm_unfreeze_pmu();
6338 } else {
6339 pmc0 = task->thread.pmcs[0];
6340 /*
6341 * clear whatever overflow status bits there were
6342 */
6343 task->thread.pmcs[0] = 0;
6344 }
6345 ovfl_val = pmu_conf->ovfl_val;
6346 /*
6347 * we save all the used pmds
6348 * we take care of overflows for counting PMDs
6349 *
6350 * XXX: sampling situation is not taken into account here
6351 */
6352 mask2 = ctx->ctx_used_pmds[0];
6353
6354 DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2));
6355
6356 for (i = 0; mask2; i++, mask2>>=1) {
6357
6358 /* skip non used pmds */
6359 if ((mask2 & 0x1) == 0) continue;
6360
6361 /*
6362 * can access PMU always true in system wide mode
6363 */
6364 val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i];
6365
6366 if (PMD_IS_COUNTING(i)) {
6367 DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n",
6368 task->pid,
6369 i,
6370 ctx->ctx_pmds[i].val,
6371 val & ovfl_val));
6372
6373 /*
6374 * we rebuild the full 64 bit value of the counter
6375 */
6376 val = ctx->ctx_pmds[i].val + (val & ovfl_val);
6377
6378 /*
6379 * now everything is in ctx_pmds[] and we need
6380 * to clear the saved context from save_regs() such that
6381 * pfm_read_pmds() gets the correct value
6382 */
6383 pmd_val = 0UL;
6384
6385 /*
6386 * take care of overflow inline
6387 */
6388 if (pmc0 & (1UL << i)) {
6389 val += 1 + ovfl_val;
6390 DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i));
6391 }
6392 }
6393
6394 DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val));
6395
6396 if (is_self) task->thread.pmds[i] = pmd_val;
6397
6398 ctx->ctx_pmds[i].val = val;
6399 }
6400}
6401
6402static struct irqaction perfmon_irqaction = {
6403 .handler = pfm_interrupt_handler,
6404 .flags = SA_INTERRUPT,
6405 .name = "perfmon"
6406};
6407
6408/*
6409 * perfmon initialization routine, called from the initcall() table
6410 */
6411static int init_pfm_fs(void);
6412
6413static int __init
6414pfm_probe_pmu(void)
6415{
6416 pmu_config_t **p;
6417 int family;
6418
6419 family = local_cpu_data->family;
6420 p = pmu_confs;
6421
6422 while(*p) {
6423 if ((*p)->probe) {
6424 if ((*p)->probe() == 0) goto found;
6425 } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) {
6426 goto found;
6427 }
6428 p++;
6429 }
6430 return -1;
6431found:
6432 pmu_conf = *p;
6433 return 0;
6434}
6435
6436static struct file_operations pfm_proc_fops = {
6437 .open = pfm_proc_open,
6438 .read = seq_read,
6439 .llseek = seq_lseek,
6440 .release = seq_release,
6441};
6442
6443int __init
6444pfm_init(void)
6445{
6446 unsigned int n, n_counters, i;
6447
6448 printk("perfmon: version %u.%u IRQ %u\n",
6449 PFM_VERSION_MAJ,
6450 PFM_VERSION_MIN,
6451 IA64_PERFMON_VECTOR);
6452
6453 if (pfm_probe_pmu()) {
6454 printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n",
6455 local_cpu_data->family);
6456 return -ENODEV;
6457 }
6458
6459 /*
6460 * compute the number of implemented PMD/PMC from the
6461 * description tables
6462 */
6463 n = 0;
6464 for (i=0; PMC_IS_LAST(i) == 0; i++) {
6465 if (PMC_IS_IMPL(i) == 0) continue;
6466 pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63);
6467 n++;
6468 }
6469 pmu_conf->num_pmcs = n;
6470
6471 n = 0; n_counters = 0;
6472 for (i=0; PMD_IS_LAST(i) == 0; i++) {
6473 if (PMD_IS_IMPL(i) == 0) continue;
6474 pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63);
6475 n++;
6476 if (PMD_IS_COUNTING(i)) n_counters++;
6477 }
6478 pmu_conf->num_pmds = n;
6479 pmu_conf->num_counters = n_counters;
6480
6481 /*
6482 * sanity checks on the number of debug registers
6483 */
6484 if (pmu_conf->use_rr_dbregs) {
6485 if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) {
6486 printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs);
6487 pmu_conf = NULL;
6488 return -1;
6489 }
6490 if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) {
6491 printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs);
6492 pmu_conf = NULL;
6493 return -1;
6494 }
6495 }
6496
6497 printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n",
6498 pmu_conf->pmu_name,
6499 pmu_conf->num_pmcs,
6500 pmu_conf->num_pmds,
6501 pmu_conf->num_counters,
6502 ffz(pmu_conf->ovfl_val));
6503
6504 /* sanity check */
6505 if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) {
6506 printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n");
6507 pmu_conf = NULL;
6508 return -1;
6509 }
6510
6511 /*
6512 * create /proc/perfmon (mostly for debugging purposes)
6513 */
6514 perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL);
6515 if (perfmon_dir == NULL) {
6516 printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n");
6517 pmu_conf = NULL;
6518 return -1;
6519 }
6520 /*
6521 * install customized file operations for /proc/perfmon entry
6522 */
6523 perfmon_dir->proc_fops = &pfm_proc_fops;
6524
6525 /*
6526 * create /proc/sys/kernel/perfmon (for debugging purposes)
6527 */
6528 pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0);
6529
6530 /*
6531 * initialize all our spinlocks
6532 */
6533 spin_lock_init(&pfm_sessions.pfs_lock);
6534 spin_lock_init(&pfm_buffer_fmt_lock);
6535
6536 init_pfm_fs();
6537
6538 for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL;
6539
6540 return 0;
6541}
6542
6543__initcall(pfm_init);
6544
6545/*
6546 * this function is called before pfm_init()
6547 */
6548void
6549pfm_init_percpu (void)
6550{
6551 /*
6552 * make sure no measurement is active
6553 * (may inherit programmed PMCs from EFI).
6554 */
6555 pfm_clear_psr_pp();
6556 pfm_clear_psr_up();
6557
6558 /*
6559 * we run with the PMU not frozen at all times
6560 */
6561 pfm_unfreeze_pmu();
6562
6563 if (smp_processor_id() == 0)
6564 register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction);
6565
6566 ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR);
6567 ia64_srlz_d();
6568}
6569
6570/*
6571 * used for debug purposes only
6572 */
6573void
6574dump_pmu_state(const char *from)
6575{
6576 struct task_struct *task;
6577 struct thread_struct *t;
6578 struct pt_regs *regs;
6579 pfm_context_t *ctx;
6580 unsigned long psr, dcr, info, flags;
6581 int i, this_cpu;
6582
6583 local_irq_save(flags);
6584
6585 this_cpu = smp_processor_id();
6586 regs = ia64_task_regs(current);
6587 info = PFM_CPUINFO_GET();
6588 dcr = ia64_getreg(_IA64_REG_CR_DCR);
6589
6590 if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) {
6591 local_irq_restore(flags);
6592 return;
6593 }
6594
6595 printk("CPU%d from %s() current [%d] iip=0x%lx %s\n",
6596 this_cpu,
6597 from,
6598 current->pid,
6599 regs->cr_iip,
6600 current->comm);
6601
6602 task = GET_PMU_OWNER();
6603 ctx = GET_PMU_CTX();
6604
6605 printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx);
6606
6607 psr = pfm_get_psr();
6608
6609 printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n",
6610 this_cpu,
6611 ia64_get_pmc(0),
6612 psr & IA64_PSR_PP ? 1 : 0,
6613 psr & IA64_PSR_UP ? 1 : 0,
6614 dcr & IA64_DCR_PP ? 1 : 0,
6615 info,
6616 ia64_psr(regs)->up,
6617 ia64_psr(regs)->pp);
6618
6619 ia64_psr(regs)->up = 0;
6620 ia64_psr(regs)->pp = 0;
6621
6622 t = &current->thread;
6623
6624 for (i=1; PMC_IS_LAST(i) == 0; i++) {
6625 if (PMC_IS_IMPL(i) == 0) continue;
6626 printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]);
6627 }
6628
6629 for (i=1; PMD_IS_LAST(i) == 0; i++) {
6630 if (PMD_IS_IMPL(i) == 0) continue;
6631 printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]);
6632 }
6633
6634 if (ctx) {
6635 printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n",
6636 this_cpu,
6637 ctx->ctx_state,
6638 ctx->ctx_smpl_vaddr,
6639 ctx->ctx_smpl_hdr,
6640 ctx->ctx_msgq_head,
6641 ctx->ctx_msgq_tail,
6642 ctx->ctx_saved_psr_up);
6643 }
6644 local_irq_restore(flags);
6645}
6646
6647/*
6648 * called from process.c:copy_thread(). task is new child.
6649 */
6650void
6651pfm_inherit(struct task_struct *task, struct pt_regs *regs)
6652{
6653 struct thread_struct *thread;
6654
6655 DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid));
6656
6657 thread = &task->thread;
6658
6659 /*
6660 * cut links inherited from parent (current)
6661 */
6662 thread->pfm_context = NULL;
6663
6664 PFM_SET_WORK_PENDING(task, 0);
6665
6666 /*
6667 * the psr bits are already set properly in copy_threads()
6668 */
6669}
6670#else /* !CONFIG_PERFMON */
6671asmlinkage long
6672sys_perfmonctl (int fd, int cmd, void *arg, int count)
6673{
6674 return -ENOSYS;
6675}
6676#endif /* CONFIG_PERFMON */
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c
new file mode 100644
index 000000000000..965d29004555
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_default_smpl.c
@@ -0,0 +1,306 @@
1/*
2 * Copyright (C) 2002-2003 Hewlett-Packard Co
3 * Stephane Eranian <eranian@hpl.hp.com>
4 *
5 * This file implements the default sampling buffer format
6 * for the Linux/ia64 perfmon-2 subsystem.
7 */
8#include <linux/kernel.h>
9#include <linux/types.h>
10#include <linux/module.h>
11#include <linux/config.h>
12#include <linux/init.h>
13#include <asm/delay.h>
14#include <linux/smp.h>
15
16#include <asm/perfmon.h>
17#include <asm/perfmon_default_smpl.h>
18
19MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>");
20MODULE_DESCRIPTION("perfmon default sampling format");
21MODULE_LICENSE("GPL");
22
23MODULE_PARM(debug, "i");
24MODULE_PARM_DESC(debug, "debug");
25
26MODULE_PARM(debug_ovfl, "i");
27MODULE_PARM_DESC(debug_ovfl, "debug ovfl");
28
29
30#define DEFAULT_DEBUG 1
31
32#ifdef DEFAULT_DEBUG
33#define DPRINT(a) \
34 do { \
35 if (unlikely(debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
36 } while (0)
37
38#define DPRINT_ovfl(a) \
39 do { \
40 if (unlikely(debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \
41 } while (0)
42
43#else
44#define DPRINT(a)
45#define DPRINT_ovfl(a)
46#endif
47
48static int debug, debug_ovfl;
49
50static int
51default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data)
52{
53 pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data;
54 int ret = 0;
55
56 if (data == NULL) {
57 DPRINT(("[%d] no argument passed\n", task->pid));
58 return -EINVAL;
59 }
60
61 DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu));
62
63 /*
64 * must hold at least the buffer header + one minimally sized entry
65 */
66 if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL;
67
68 DPRINT(("buf_size=%lu\n", arg->buf_size));
69
70 return ret;
71}
72
73static int
74default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size)
75{
76 pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
77
78 /*
79 * size has been validated in default_validate
80 */
81 *size = arg->buf_size;
82
83 return 0;
84}
85
86static int
87default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data)
88{
89 pfm_default_smpl_hdr_t *hdr;
90 pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data;
91
92 hdr = (pfm_default_smpl_hdr_t *)buf;
93
94 hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION;
95 hdr->hdr_buf_size = arg->buf_size;
96 hdr->hdr_cur_offs = sizeof(*hdr);
97 hdr->hdr_overflows = 0UL;
98 hdr->hdr_count = 0UL;
99
100 DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n",
101 task->pid,
102 buf,
103 hdr->hdr_buf_size,
104 sizeof(*hdr),
105 hdr->hdr_version,
106 hdr->hdr_cur_offs));
107
108 return 0;
109}
110
111static int
112default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp)
113{
114 pfm_default_smpl_hdr_t *hdr;
115 pfm_default_smpl_entry_t *ent;
116 void *cur, *last;
117 unsigned long *e, entry_size;
118 unsigned int npmds, i;
119 unsigned char ovfl_pmd;
120 unsigned char ovfl_notify;
121
122 if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) {
123 DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg));
124 return -EINVAL;
125 }
126
127 hdr = (pfm_default_smpl_hdr_t *)buf;
128 cur = buf+hdr->hdr_cur_offs;
129 last = buf+hdr->hdr_buf_size;
130 ovfl_pmd = arg->ovfl_pmd;
131 ovfl_notify = arg->ovfl_notify;
132
133 /*
134 * precheck for sanity
135 */
136 if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
137
138 npmds = hweight64(arg->smpl_pmds[0]);
139
140 ent = (pfm_default_smpl_entry_t *)cur;
141
142 prefetch(arg->smpl_pmds_values);
143
144 entry_size = sizeof(*ent) + (npmds << 3);
145
146 /* position for first pmd */
147 e = (unsigned long *)(ent+1);
148
149 hdr->hdr_count++;
150
151 DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n",
152 task->pid,
153 hdr->hdr_count,
154 cur, last,
155 last-cur,
156 ovfl_pmd,
157 ovfl_notify, npmds));
158
159 /*
160 * current = task running at the time of the overflow.
161 *
162 * per-task mode:
163 * - this is ususally the task being monitored.
164 * Under certain conditions, it might be a different task
165 *
166 * system-wide:
167 * - this is not necessarily the task controlling the session
168 */
169 ent->pid = current->pid;
170 ent->ovfl_pmd = ovfl_pmd;
171 ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val;
172
173 /*
174 * where did the fault happen (includes slot number)
175 */
176 ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3);
177
178 ent->tstamp = stamp;
179 ent->cpu = smp_processor_id();
180 ent->set = arg->active_set;
181 ent->tgid = current->tgid;
182
183 /*
184 * selectively store PMDs in increasing index number
185 */
186 if (npmds) {
187 unsigned long *val = arg->smpl_pmds_values;
188 for(i=0; i < npmds; i++) {
189 *e++ = *val++;
190 }
191 }
192
193 /*
194 * update position for next entry
195 */
196 hdr->hdr_cur_offs += entry_size;
197 cur += entry_size;
198
199 /*
200 * post check to avoid losing the last sample
201 */
202 if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full;
203
204 /*
205 * keep same ovfl_pmds, ovfl_notify
206 */
207 arg->ovfl_ctrl.bits.notify_user = 0;
208 arg->ovfl_ctrl.bits.block_task = 0;
209 arg->ovfl_ctrl.bits.mask_monitoring = 0;
210 arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */
211
212 return 0;
213full:
214 DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify));
215
216 /*
217 * increment number of buffer overflow.
218 * important to detect duplicate set of samples.
219 */
220 hdr->hdr_overflows++;
221
222 /*
223 * if no notification requested, then we saturate the buffer
224 */
225 if (ovfl_notify == 0) {
226 arg->ovfl_ctrl.bits.notify_user = 0;
227 arg->ovfl_ctrl.bits.block_task = 0;
228 arg->ovfl_ctrl.bits.mask_monitoring = 1;
229 arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0;
230 } else {
231 arg->ovfl_ctrl.bits.notify_user = 1;
232 arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */
233 arg->ovfl_ctrl.bits.mask_monitoring = 1;
234 arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */
235 }
236 return -1; /* we are full, sorry */
237}
238
239static int
240default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs)
241{
242 pfm_default_smpl_hdr_t *hdr;
243
244 hdr = (pfm_default_smpl_hdr_t *)buf;
245
246 hdr->hdr_count = 0UL;
247 hdr->hdr_cur_offs = sizeof(*hdr);
248
249 ctrl->bits.mask_monitoring = 0;
250 ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */
251
252 return 0;
253}
254
255static int
256default_exit(struct task_struct *task, void *buf, struct pt_regs *regs)
257{
258 DPRINT(("[%d] exit(%p)\n", task->pid, buf));
259 return 0;
260}
261
262static pfm_buffer_fmt_t default_fmt={
263 .fmt_name = "default_format",
264 .fmt_uuid = PFM_DEFAULT_SMPL_UUID,
265 .fmt_arg_size = sizeof(pfm_default_smpl_arg_t),
266 .fmt_validate = default_validate,
267 .fmt_getsize = default_get_size,
268 .fmt_init = default_init,
269 .fmt_handler = default_handler,
270 .fmt_restart = default_restart,
271 .fmt_restart_active = default_restart,
272 .fmt_exit = default_exit,
273};
274
275static int __init
276pfm_default_smpl_init_module(void)
277{
278 int ret;
279
280 ret = pfm_register_buffer_fmt(&default_fmt);
281 if (ret == 0) {
282 printk("perfmon_default_smpl: %s v%u.%u registered\n",
283 default_fmt.fmt_name,
284 PFM_DEFAULT_SMPL_VERSION_MAJ,
285 PFM_DEFAULT_SMPL_VERSION_MIN);
286 } else {
287 printk("perfmon_default_smpl: %s cannot register ret=%d\n",
288 default_fmt.fmt_name,
289 ret);
290 }
291
292 return ret;
293}
294
295static void __exit
296pfm_default_smpl_cleanup_module(void)
297{
298 int ret;
299 ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid);
300
301 printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret);
302}
303
304module_init(pfm_default_smpl_init_module);
305module_exit(pfm_default_smpl_cleanup_module);
306
diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h
new file mode 100644
index 000000000000..67489478041e
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_generic.h
@@ -0,0 +1,45 @@
1/*
2 * This file contains the generic PMU register description tables
3 * and pmc checker used by perfmon.c.
4 *
5 * Copyright (C) 2002-2003 Hewlett Packard Co
6 * Stephane Eranian <eranian@hpl.hp.com>
7 */
8
9static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={
10/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
11/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
12/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
13/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
14/* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
15/* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
16/* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
17/* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
18 { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
19};
20
21static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={
22/* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
23/* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
24/* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
25/* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}},
26/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
27/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
28/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
29/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
30 { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
31};
32
33/*
34 * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
35 */
36static pmu_config_t pmu_conf_gen={
37 .pmu_name = "Generic",
38 .pmu_family = 0xff, /* any */
39 .ovfl_val = (1UL << 32) - 1,
40 .num_ibrs = 0, /* does not use */
41 .num_dbrs = 0, /* does not use */
42 .pmd_desc = pfm_gen_pmd_desc,
43 .pmc_desc = pfm_gen_pmc_desc
44};
45
diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h
new file mode 100644
index 000000000000..d1d508a0fbd3
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_itanium.h
@@ -0,0 +1,115 @@
1/*
2 * This file contains the Itanium PMU register description tables
3 * and pmc checker used by perfmon.c.
4 *
5 * Copyright (C) 2002-2003 Hewlett Packard Co
6 * Stephane Eranian <eranian@hpl.hp.com>
7 */
8static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
9
10static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={
11/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
12/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
13/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
14/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
15/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
16/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
17/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
18/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
19/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
20/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
21/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
22/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
23/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
24/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
25 { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
26};
27
28static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={
29/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
30/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
31/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
32/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
33/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
34/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
35/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
36/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
37/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
38/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
39/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
40/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
41/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
42/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
43/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
44/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
45/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
46/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
47 { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
48};
49
50static int
51pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
52{
53 int ret;
54 int is_loaded;
55
56 /* sanitfy check */
57 if (ctx == NULL) return -EINVAL;
58
59 is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
60
61 /*
62 * we must clear the (instruction) debug registers if pmc13.ta bit is cleared
63 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
64 */
65 if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) {
66
67 DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val));
68
69 /* don't mix debug with perfmon */
70 if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
71
72 /*
73 * a count of 0 will mark the debug registers as in use and also
74 * ensure that they are properly cleared.
75 */
76 ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs);
77 if (ret) return ret;
78 }
79
80 /*
81 * we must clear the (data) debug registers if pmc11.pt bit is cleared
82 * before they are written (fl_using_dbreg==0) to avoid picking up stale information.
83 */
84 if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) {
85
86 DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val));
87
88 /* don't mix debug with perfmon */
89 if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
90
91 /*
92 * a count of 0 will mark the debug registers as in use and also
93 * ensure that they are properly cleared.
94 */
95 ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs);
96 if (ret) return ret;
97 }
98 return 0;
99}
100
101/*
102 * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
103 */
104static pmu_config_t pmu_conf_ita={
105 .pmu_name = "Itanium",
106 .pmu_family = 0x7,
107 .ovfl_val = (1UL << 32) - 1,
108 .pmd_desc = pfm_ita_pmd_desc,
109 .pmc_desc = pfm_ita_pmc_desc,
110 .num_ibrs = 8,
111 .num_dbrs = 8,
112 .use_rr_dbregs = 1, /* debug register are use for range retrictions */
113};
114
115
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h
new file mode 100644
index 000000000000..9becccda2897
--- /dev/null
+++ b/arch/ia64/kernel/perfmon_mckinley.h
@@ -0,0 +1,187 @@
1/*
2 * This file contains the McKinley PMU register description tables
3 * and pmc checker used by perfmon.c.
4 *
5 * Copyright (C) 2002-2003 Hewlett Packard Co
6 * Stephane Eranian <eranian@hpl.hp.com>
7 */
8static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs);
9
10static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={
11/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
12/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
13/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
14/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
15/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
16/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
17/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
18/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
19/* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
20/* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
21/* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
22/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
23/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
24/* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
25/* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
26/* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}},
27 { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
28};
29
30static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={
31/* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
32/* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}},
33/* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
34/* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
35/* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}},
36/* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}},
37/* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}},
38/* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}},
39/* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
40/* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
41/* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
42/* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
43/* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
44/* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
45/* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
46/* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
47/* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}},
48/* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}},
49 { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */
50};
51
52/*
53 * PMC reserved fields must have their power-up values preserved
54 */
55static int
56pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs)
57{
58 unsigned long tmp1, tmp2, ival = *val;
59
60 /* remove reserved areas from user value */
61 tmp1 = ival & PMC_RSVD_MASK(cnum);
62
63 /* get reserved fields values */
64 tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum);
65
66 *val = tmp1 | tmp2;
67
68 DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n",
69 cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val));
70 return 0;
71}
72
73/*
74 * task can be NULL if the context is unloaded
75 */
76static int
77pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs)
78{
79 int ret = 0, check_case1 = 0;
80 unsigned long val8 = 0, val14 = 0, val13 = 0;
81 int is_loaded;
82
83 /* first preserve the reserved fields */
84 pfm_mck_reserved(cnum, val, regs);
85
86 /* sanitfy check */
87 if (ctx == NULL) return -EINVAL;
88
89 is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED;
90
91 /*
92 * we must clear the debug registers if pmc13 has a value which enable
93 * memory pipeline event constraints. In this case we need to clear the
94 * the debug registers if they have not yet been accessed. This is required
95 * to avoid picking stale state.
96 * PMC13 is "active" if:
97 * one of the pmc13.cfg_dbrpXX field is different from 0x3
98 * AND
99 * at the corresponding pmc13.ena_dbrpXX is set.
100 */
101 DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded));
102
103 if (cnum == 13 && is_loaded
104 && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) {
105
106 DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val));
107
108 /* don't mix debug with perfmon */
109 if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
110
111 /*
112 * a count of 0 will mark the debug registers as in use and also
113 * ensure that they are properly cleared.
114 */
115 ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs);
116 if (ret) return ret;
117 }
118 /*
119 * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled
120 * before they are (fl_using_dbreg==0) to avoid picking up stale information.
121 */
122 if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) {
123
124 DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val));
125
126 /* don't mix debug with perfmon */
127 if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL;
128
129 /*
130 * a count of 0 will mark the debug registers as in use and also
131 * ensure that they are properly cleared.
132 */
133 ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs);
134 if (ret) return ret;
135
136 }
137
138 switch(cnum) {
139 case 4: *val |= 1UL << 23; /* force power enable bit */
140 break;
141 case 8: val8 = *val;
142 val13 = ctx->ctx_pmcs[13];
143 val14 = ctx->ctx_pmcs[14];
144 check_case1 = 1;
145 break;
146 case 13: val8 = ctx->ctx_pmcs[8];
147 val13 = *val;
148 val14 = ctx->ctx_pmcs[14];
149 check_case1 = 1;
150 break;
151 case 14: val8 = ctx->ctx_pmcs[8];
152 val13 = ctx->ctx_pmcs[13];
153 val14 = *val;
154 check_case1 = 1;
155 break;
156 }
157 /* check illegal configuration which can produce inconsistencies in tagging
158 * i-side events in L1D and L2 caches
159 */
160 if (check_case1) {
161 ret = ((val13 >> 45) & 0xf) == 0
162 && ((val8 & 0x1) == 0)
163 && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0)
164 ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0));
165
166 if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n"));
167 }
168
169 return ret ? -EINVAL : 0;
170}
171
172/*
173 * impl_pmcs, impl_pmds are computed at runtime to minimize errors!
174 */
175static pmu_config_t pmu_conf_mck={
176 .pmu_name = "Itanium 2",
177 .pmu_family = 0x1f,
178 .flags = PFM_PMU_IRQ_RESEND,
179 .ovfl_val = (1UL << 47) - 1,
180 .pmd_desc = pfm_mck_pmd_desc,
181 .pmc_desc = pfm_mck_pmc_desc,
182 .num_ibrs = 8,
183 .num_dbrs = 8,
184 .use_rr_dbregs = 1 /* debug register are use for range retrictions */
185};
186
187
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
new file mode 100644
index 000000000000..91293388dd29
--- /dev/null
+++ b/arch/ia64/kernel/process.c
@@ -0,0 +1,800 @@
1/*
2 * Architecture-specific setup.
3 *
4 * Copyright (C) 1998-2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7#define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */
8#include <linux/config.h>
9
10#include <linux/cpu.h>
11#include <linux/pm.h>
12#include <linux/elf.h>
13#include <linux/errno.h>
14#include <linux/kallsyms.h>
15#include <linux/kernel.h>
16#include <linux/mm.h>
17#include <linux/module.h>
18#include <linux/notifier.h>
19#include <linux/personality.h>
20#include <linux/sched.h>
21#include <linux/slab.h>
22#include <linux/smp_lock.h>
23#include <linux/stddef.h>
24#include <linux/thread_info.h>
25#include <linux/unistd.h>
26#include <linux/efi.h>
27#include <linux/interrupt.h>
28#include <linux/delay.h>
29
30#include <asm/cpu.h>
31#include <asm/delay.h>
32#include <asm/elf.h>
33#include <asm/ia32.h>
34#include <asm/irq.h>
35#include <asm/pgalloc.h>
36#include <asm/processor.h>
37#include <asm/sal.h>
38#include <asm/tlbflush.h>
39#include <asm/uaccess.h>
40#include <asm/unwind.h>
41#include <asm/user.h>
42
43#include "entry.h"
44
45#ifdef CONFIG_PERFMON
46# include <asm/perfmon.h>
47#endif
48
49#include "sigframe.h"
50
51void (*ia64_mark_idle)(int);
52static cpumask_t cpu_idle_map;
53
54unsigned long boot_option_idle_override = 0;
55EXPORT_SYMBOL(boot_option_idle_override);
56
57void
58ia64_do_show_stack (struct unw_frame_info *info, void *arg)
59{
60 unsigned long ip, sp, bsp;
61 char buf[128]; /* don't make it so big that it overflows the stack! */
62
63 printk("\nCall Trace:\n");
64 do {
65 unw_get_ip(info, &ip);
66 if (ip == 0)
67 break;
68
69 unw_get_sp(info, &sp);
70 unw_get_bsp(info, &bsp);
71 snprintf(buf, sizeof(buf),
72 " [<%016lx>] %%s\n"
73 " sp=%016lx bsp=%016lx\n",
74 ip, sp, bsp);
75 print_symbol(buf, ip);
76 } while (unw_unwind(info) >= 0);
77}
78
79void
80show_stack (struct task_struct *task, unsigned long *sp)
81{
82 if (!task)
83 unw_init_running(ia64_do_show_stack, NULL);
84 else {
85 struct unw_frame_info info;
86
87 unw_init_from_blocked_task(&info, task);
88 ia64_do_show_stack(&info, NULL);
89 }
90}
91
92void
93dump_stack (void)
94{
95 show_stack(NULL, NULL);
96}
97
98EXPORT_SYMBOL(dump_stack);
99
100void
101show_regs (struct pt_regs *regs)
102{
103 unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
104
105 print_modules();
106 printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm);
107 printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n",
108 regs->cr_ipsr, regs->cr_ifs, ip, print_tainted());
109 print_symbol("ip is at %s\n", ip);
110 printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
111 regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
112 printk("rnat: %016lx bsps: %016lx pr : %016lx\n",
113 regs->ar_rnat, regs->ar_bspstore, regs->pr);
114 printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
115 regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
116 printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
117 printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7);
118 printk("f6 : %05lx%016lx f7 : %05lx%016lx\n",
119 regs->f6.u.bits[1], regs->f6.u.bits[0],
120 regs->f7.u.bits[1], regs->f7.u.bits[0]);
121 printk("f8 : %05lx%016lx f9 : %05lx%016lx\n",
122 regs->f8.u.bits[1], regs->f8.u.bits[0],
123 regs->f9.u.bits[1], regs->f9.u.bits[0]);
124 printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
125 regs->f10.u.bits[1], regs->f10.u.bits[0],
126 regs->f11.u.bits[1], regs->f11.u.bits[0]);
127
128 printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3);
129 printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10);
130 printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13);
131 printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16);
132 printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19);
133 printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22);
134 printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25);
135 printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28);
136 printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31);
137
138 if (user_mode(regs)) {
139 /* print the stacked registers */
140 unsigned long val, *bsp, ndirty;
141 int i, sof, is_nat = 0;
142
143 sof = regs->cr_ifs & 0x7f; /* size of frame */
144 ndirty = (regs->loadrs >> 19);
145 bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty);
146 for (i = 0; i < sof; ++i) {
147 get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i));
148 printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val,
149 ((i == sof - 1) || (i % 3) == 2) ? "\n" : " ");
150 }
151 } else
152 show_stack(NULL, NULL);
153}
154
155void
156do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
157{
158 if (fsys_mode(current, &scr->pt)) {
159 /* defer signal-handling etc. until we return to privilege-level 0. */
160 if (!ia64_psr(&scr->pt)->lp)
161 ia64_psr(&scr->pt)->lp = 1;
162 return;
163 }
164
165#ifdef CONFIG_PERFMON
166 if (current->thread.pfm_needs_checking)
167 pfm_handle_work();
168#endif
169
170 /* deal with pending signal delivery */
171 if (test_thread_flag(TIF_SIGPENDING))
172 ia64_do_signal(oldset, scr, in_syscall);
173}
174
175static int pal_halt = 1;
176static int __init nohalt_setup(char * str)
177{
178 pal_halt = 0;
179 return 1;
180}
181__setup("nohalt", nohalt_setup);
182
183/*
184 * We use this if we don't have any better idle routine..
185 */
186void
187default_idle (void)
188{
189 unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | IA64_PSR_UP);
190
191 while (!need_resched())
192 if (pal_halt && !pmu_active)
193 safe_halt();
194 else
195 cpu_relax();
196}
197
198#ifdef CONFIG_HOTPLUG_CPU
199/* We don't actually take CPU down, just spin without interrupts. */
200static inline void play_dead(void)
201{
202 extern void ia64_cpu_local_tick (void);
203 /* Ack it */
204 __get_cpu_var(cpu_state) = CPU_DEAD;
205
206 /* We shouldn't have to disable interrupts while dead, but
207 * some interrupts just don't seem to go away, and this makes
208 * it "work" for testing purposes. */
209 max_xtp();
210 local_irq_disable();
211 /* Death loop */
212 while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
213 cpu_relax();
214
215 /*
216 * Enable timer interrupts from now on
217 * Not required if we put processor in SAL_BOOT_RENDEZ mode.
218 */
219 local_flush_tlb_all();
220 cpu_set(smp_processor_id(), cpu_online_map);
221 wmb();
222 ia64_cpu_local_tick ();
223 local_irq_enable();
224}
225#else
226static inline void play_dead(void)
227{
228 BUG();
229}
230#endif /* CONFIG_HOTPLUG_CPU */
231
232
233void cpu_idle_wait(void)
234{
235 int cpu;
236 cpumask_t map;
237
238 for_each_online_cpu(cpu)
239 cpu_set(cpu, cpu_idle_map);
240
241 wmb();
242 do {
243 ssleep(1);
244 cpus_and(map, cpu_idle_map, cpu_online_map);
245 } while (!cpus_empty(map));
246}
247EXPORT_SYMBOL_GPL(cpu_idle_wait);
248
249void __attribute__((noreturn))
250cpu_idle (void)
251{
252 void (*mark_idle)(int) = ia64_mark_idle;
253 int cpu = smp_processor_id();
254
255 /* endless idle loop with no priority at all */
256 while (1) {
257#ifdef CONFIG_SMP
258 if (!need_resched())
259 min_xtp();
260#endif
261 while (!need_resched()) {
262 void (*idle)(void);
263
264 if (mark_idle)
265 (*mark_idle)(1);
266
267 if (cpu_isset(cpu, cpu_idle_map))
268 cpu_clear(cpu, cpu_idle_map);
269 rmb();
270 idle = pm_idle;
271 if (!idle)
272 idle = default_idle;
273 (*idle)();
274 }
275
276 if (mark_idle)
277 (*mark_idle)(0);
278
279#ifdef CONFIG_SMP
280 normal_xtp();
281#endif
282 schedule();
283 check_pgt_cache();
284 if (cpu_is_offline(smp_processor_id()))
285 play_dead();
286 }
287}
288
289void
290ia64_save_extra (struct task_struct *task)
291{
292#ifdef CONFIG_PERFMON
293 unsigned long info;
294#endif
295
296 if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
297 ia64_save_debug_regs(&task->thread.dbr[0]);
298
299#ifdef CONFIG_PERFMON
300 if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
301 pfm_save_regs(task);
302
303 info = __get_cpu_var(pfm_syst_info);
304 if (info & PFM_CPUINFO_SYST_WIDE)
305 pfm_syst_wide_update_task(task, info, 0);
306#endif
307
308#ifdef CONFIG_IA32_SUPPORT
309 if (IS_IA32_PROCESS(ia64_task_regs(task)))
310 ia32_save_state(task);
311#endif
312}
313
314void
315ia64_load_extra (struct task_struct *task)
316{
317#ifdef CONFIG_PERFMON
318 unsigned long info;
319#endif
320
321 if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0)
322 ia64_load_debug_regs(&task->thread.dbr[0]);
323
324#ifdef CONFIG_PERFMON
325 if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0)
326 pfm_load_regs(task);
327
328 info = __get_cpu_var(pfm_syst_info);
329 if (info & PFM_CPUINFO_SYST_WIDE)
330 pfm_syst_wide_update_task(task, info, 1);
331#endif
332
333#ifdef CONFIG_IA32_SUPPORT
334 if (IS_IA32_PROCESS(ia64_task_regs(task)))
335 ia32_load_state(task);
336#endif
337}
338
339/*
340 * Copy the state of an ia-64 thread.
341 *
342 * We get here through the following call chain:
343 *
344 * from user-level: from kernel:
345 *
346 * <clone syscall> <some kernel call frames>
347 * sys_clone :
348 * do_fork do_fork
349 * copy_thread copy_thread
350 *
351 * This means that the stack layout is as follows:
352 *
353 * +---------------------+ (highest addr)
354 * | struct pt_regs |
355 * +---------------------+
356 * | struct switch_stack |
357 * +---------------------+
358 * | |
359 * | memory stack |
360 * | | <-- sp (lowest addr)
361 * +---------------------+
362 *
363 * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an
364 * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register,
365 * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the
366 * pt_regs structure in the parent is congruent to that of the child, modulo 512. Since
367 * the stack is page aligned and the page size is at least 4KB, this is always the case,
368 * so there is nothing to worry about.
369 */
370int
371copy_thread (int nr, unsigned long clone_flags,
372 unsigned long user_stack_base, unsigned long user_stack_size,
373 struct task_struct *p, struct pt_regs *regs)
374{
375 extern char ia64_ret_from_clone, ia32_ret_from_clone;
376 struct switch_stack *child_stack, *stack;
377 unsigned long rbs, child_rbs, rbs_size;
378 struct pt_regs *child_ptregs;
379 int retval = 0;
380
381#ifdef CONFIG_SMP
382 /*
383 * For SMP idle threads, fork_by_hand() calls do_fork with
384 * NULL regs.
385 */
386 if (!regs)
387 return 0;
388#endif
389
390 stack = ((struct switch_stack *) regs) - 1;
391
392 child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1;
393 child_stack = (struct switch_stack *) child_ptregs - 1;
394
395 /* copy parent's switch_stack & pt_regs to child: */
396 memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack));
397
398 rbs = (unsigned long) current + IA64_RBS_OFFSET;
399 child_rbs = (unsigned long) p + IA64_RBS_OFFSET;
400 rbs_size = stack->ar_bspstore - rbs;
401
402 /* copy the parent's register backing store to the child: */
403 memcpy((void *) child_rbs, (void *) rbs, rbs_size);
404
405 if (likely(user_mode(child_ptregs))) {
406 if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs))
407 child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */
408 if (user_stack_base) {
409 child_ptregs->r12 = user_stack_base + user_stack_size - 16;
410 child_ptregs->ar_bspstore = user_stack_base;
411 child_ptregs->ar_rnat = 0;
412 child_ptregs->loadrs = 0;
413 }
414 } else {
415 /*
416 * Note: we simply preserve the relative position of
417 * the stack pointer here. There is no need to
418 * allocate a scratch area here, since that will have
419 * been taken care of by the caller of sys_clone()
420 * already.
421 */
422 child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */
423 child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */
424 }
425 child_stack->ar_bspstore = child_rbs + rbs_size;
426 if (IS_IA32_PROCESS(regs))
427 child_stack->b0 = (unsigned long) &ia32_ret_from_clone;
428 else
429 child_stack->b0 = (unsigned long) &ia64_ret_from_clone;
430
431 /* copy parts of thread_struct: */
432 p->thread.ksp = (unsigned long) child_stack - 16;
433
434 /* stop some PSR bits from being inherited.
435 * the psr.up/psr.pp bits must be cleared on fork but inherited on execve()
436 * therefore we must specify them explicitly here and not include them in
437 * IA64_PSR_BITS_TO_CLEAR.
438 */
439 child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET)
440 & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP));
441
442 /*
443 * NOTE: The calling convention considers all floating point
444 * registers in the high partition (fph) to be scratch. Since
445 * the only way to get to this point is through a system call,
446 * we know that the values in fph are all dead. Hence, there
447 * is no need to inherit the fph state from the parent to the
448 * child and all we have to do is to make sure that
449 * IA64_THREAD_FPH_VALID is cleared in the child.
450 *
451 * XXX We could push this optimization a bit further by
452 * clearing IA64_THREAD_FPH_VALID on ANY system call.
453 * However, it's not clear this is worth doing. Also, it
454 * would be a slight deviation from the normal Linux system
455 * call behavior where scratch registers are preserved across
456 * system calls (unless used by the system call itself).
457 */
458# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \
459 | IA64_THREAD_PM_VALID)
460# define THREAD_FLAGS_TO_SET 0
461 p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR)
462 | THREAD_FLAGS_TO_SET);
463 ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */
464#ifdef CONFIG_IA32_SUPPORT
465 /*
466 * If we're cloning an IA32 task then save the IA32 extra
467 * state from the current task to the new task
468 */
469 if (IS_IA32_PROCESS(ia64_task_regs(current))) {
470 ia32_save_state(p);
471 if (clone_flags & CLONE_SETTLS)
472 retval = ia32_clone_tls(p, child_ptregs);
473
474 /* Copy partially mapped page list */
475 if (!retval)
476 retval = ia32_copy_partial_page_list(p, clone_flags);
477 }
478#endif
479
480#ifdef CONFIG_PERFMON
481 if (current->thread.pfm_context)
482 pfm_inherit(p, child_ptregs);
483#endif
484 return retval;
485}
486
487static void
488do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg)
489{
490 unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm;
491 elf_greg_t *dst = arg;
492 struct pt_regs *pt;
493 char nat;
494 int i;
495
496 memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */
497
498 if (unw_unwind_to_user(info) < 0)
499 return;
500
501 unw_get_sp(info, &sp);
502 pt = (struct pt_regs *) (sp + 16);
503
504 urbs_end = ia64_get_user_rbs_end(task, pt, &cfm);
505
506 if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0)
507 return;
508
509 ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end),
510 &ar_rnat);
511
512 /*
513 * coredump format:
514 * r0-r31
515 * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT)
516 * predicate registers (p0-p63)
517 * b0-b7
518 * ip cfm user-mask
519 * ar.rsc ar.bsp ar.bspstore ar.rnat
520 * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec
521 */
522
523 /* r0 is zero */
524 for (i = 1, mask = (1UL << i); i < 32; ++i) {
525 unw_get_gr(info, i, &dst[i], &nat);
526 if (nat)
527 nat_bits |= mask;
528 mask <<= 1;
529 }
530 dst[32] = nat_bits;
531 unw_get_pr(info, &dst[33]);
532
533 for (i = 0; i < 8; ++i)
534 unw_get_br(info, i, &dst[34 + i]);
535
536 unw_get_rp(info, &ip);
537 dst[42] = ip + ia64_psr(pt)->ri;
538 dst[43] = cfm;
539 dst[44] = pt->cr_ipsr & IA64_PSR_UM;
540
541 unw_get_ar(info, UNW_AR_RSC, &dst[45]);
542 /*
543 * For bsp and bspstore, unw_get_ar() would return the kernel
544 * addresses, but we need the user-level addresses instead:
545 */
546 dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */
547 dst[47] = pt->ar_bspstore;
548 dst[48] = ar_rnat;
549 unw_get_ar(info, UNW_AR_CCV, &dst[49]);
550 unw_get_ar(info, UNW_AR_UNAT, &dst[50]);
551 unw_get_ar(info, UNW_AR_FPSR, &dst[51]);
552 dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */
553 unw_get_ar(info, UNW_AR_LC, &dst[53]);
554 unw_get_ar(info, UNW_AR_EC, &dst[54]);
555 unw_get_ar(info, UNW_AR_CSD, &dst[55]);
556 unw_get_ar(info, UNW_AR_SSD, &dst[56]);
557}
558
559void
560do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg)
561{
562 elf_fpreg_t *dst = arg;
563 int i;
564
565 memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits */
566
567 if (unw_unwind_to_user(info) < 0)
568 return;
569
570 /* f0 is 0.0, f1 is 1.0 */
571
572 for (i = 2; i < 32; ++i)
573 unw_get_fr(info, i, dst + i);
574
575 ia64_flush_fph(task);
576 if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0)
577 memcpy(dst + 32, task->thread.fph, 96*16);
578}
579
580void
581do_copy_regs (struct unw_frame_info *info, void *arg)
582{
583 do_copy_task_regs(current, info, arg);
584}
585
586void
587do_dump_fpu (struct unw_frame_info *info, void *arg)
588{
589 do_dump_task_fpu(current, info, arg);
590}
591
592int
593dump_task_regs(struct task_struct *task, elf_gregset_t *regs)
594{
595 struct unw_frame_info tcore_info;
596
597 if (current == task) {
598 unw_init_running(do_copy_regs, regs);
599 } else {
600 memset(&tcore_info, 0, sizeof(tcore_info));
601 unw_init_from_blocked_task(&tcore_info, task);
602 do_copy_task_regs(task, &tcore_info, regs);
603 }
604 return 1;
605}
606
607void
608ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst)
609{
610 unw_init_running(do_copy_regs, dst);
611}
612
613int
614dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst)
615{
616 struct unw_frame_info tcore_info;
617
618 if (current == task) {
619 unw_init_running(do_dump_fpu, dst);
620 } else {
621 memset(&tcore_info, 0, sizeof(tcore_info));
622 unw_init_from_blocked_task(&tcore_info, task);
623 do_dump_task_fpu(task, &tcore_info, dst);
624 }
625 return 1;
626}
627
628int
629dump_fpu (struct pt_regs *pt, elf_fpregset_t dst)
630{
631 unw_init_running(do_dump_fpu, dst);
632 return 1; /* f0-f31 are always valid so we always return 1 */
633}
634
635long
636sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp,
637 struct pt_regs *regs)
638{
639 char *fname;
640 int error;
641
642 fname = getname(filename);
643 error = PTR_ERR(fname);
644 if (IS_ERR(fname))
645 goto out;
646 error = do_execve(fname, argv, envp, regs);
647 putname(fname);
648out:
649 return error;
650}
651
652pid_t
653kernel_thread (int (*fn)(void *), void *arg, unsigned long flags)
654{
655 extern void start_kernel_thread (void);
656 unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread;
657 struct {
658 struct switch_stack sw;
659 struct pt_regs pt;
660 } regs;
661
662 memset(&regs, 0, sizeof(regs));
663 regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */
664 regs.pt.r1 = helper_fptr[1]; /* set GP */
665 regs.pt.r9 = (unsigned long) fn; /* 1st argument */
666 regs.pt.r11 = (unsigned long) arg; /* 2nd argument */
667 /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read. */
668 regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN;
669 regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */
670 regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR);
671 regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET;
672 regs.sw.pr = (1 << PRED_KERNEL_STACK);
673 return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs.pt, 0, NULL, NULL);
674}
675EXPORT_SYMBOL(kernel_thread);
676
677/* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */
678int
679kernel_thread_helper (int (*fn)(void *), void *arg)
680{
681#ifdef CONFIG_IA32_SUPPORT
682 if (IS_IA32_PROCESS(ia64_task_regs(current))) {
683 /* A kernel thread is always a 64-bit process. */
684 current->thread.map_base = DEFAULT_MAP_BASE;
685 current->thread.task_size = DEFAULT_TASK_SIZE;
686 ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob);
687 ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1);
688 }
689#endif
690 return (*fn)(arg);
691}
692
693/*
694 * Flush thread state. This is called when a thread does an execve().
695 */
696void
697flush_thread (void)
698{
699 /* drop floating-point and debug-register state if it exists: */
700 current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID);
701 ia64_drop_fpu(current);
702 if (IS_IA32_PROCESS(ia64_task_regs(current)))
703 ia32_drop_partial_page_list(current);
704}
705
706/*
707 * Clean up state associated with current thread. This is called when
708 * the thread calls exit().
709 */
710void
711exit_thread (void)
712{
713 ia64_drop_fpu(current);
714#ifdef CONFIG_PERFMON
715 /* if needed, stop monitoring and flush state to perfmon context */
716 if (current->thread.pfm_context)
717 pfm_exit_thread(current);
718
719 /* free debug register resources */
720 if (current->thread.flags & IA64_THREAD_DBG_VALID)
721 pfm_release_debug_registers(current);
722#endif
723 if (IS_IA32_PROCESS(ia64_task_regs(current)))
724 ia32_drop_partial_page_list(current);
725}
726
727unsigned long
728get_wchan (struct task_struct *p)
729{
730 struct unw_frame_info info;
731 unsigned long ip;
732 int count = 0;
733
734 /*
735 * Note: p may not be a blocked task (it could be current or
736 * another process running on some other CPU. Rather than
737 * trying to determine if p is really blocked, we just assume
738 * it's blocked and rely on the unwind routines to fail
739 * gracefully if the process wasn't really blocked after all.
740 * --davidm 99/12/15
741 */
742 unw_init_from_blocked_task(&info, p);
743 do {
744 if (unw_unwind(&info) < 0)
745 return 0;
746 unw_get_ip(&info, &ip);
747 if (!in_sched_functions(ip))
748 return ip;
749 } while (count++ < 16);
750 return 0;
751}
752
753void
754cpu_halt (void)
755{
756 pal_power_mgmt_info_u_t power_info[8];
757 unsigned long min_power;
758 int i, min_power_state;
759
760 if (ia64_pal_halt_info(power_info) != 0)
761 return;
762
763 min_power_state = 0;
764 min_power = power_info[0].pal_power_mgmt_info_s.power_consumption;
765 for (i = 1; i < 8; ++i)
766 if (power_info[i].pal_power_mgmt_info_s.im
767 && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) {
768 min_power = power_info[i].pal_power_mgmt_info_s.power_consumption;
769 min_power_state = i;
770 }
771
772 while (1)
773 ia64_pal_halt(min_power_state);
774}
775
776void
777machine_restart (char *restart_cmd)
778{
779 (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL);
780}
781
782EXPORT_SYMBOL(machine_restart);
783
784void
785machine_halt (void)
786{
787 cpu_halt();
788}
789
790EXPORT_SYMBOL(machine_halt);
791
792void
793machine_power_off (void)
794{
795 if (pm_power_off)
796 pm_power_off();
797 machine_halt();
798}
799
800EXPORT_SYMBOL(machine_power_off);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
new file mode 100644
index 000000000000..55789fcd7210
--- /dev/null
+++ b/arch/ia64/kernel/ptrace.c
@@ -0,0 +1,1627 @@
1/*
2 * Kernel support for the ptrace() and syscall tracing interfaces.
3 *
4 * Copyright (C) 1999-2005 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * Derived from the x86 and Alpha versions.
8 */
9#include <linux/config.h>
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/slab.h>
13#include <linux/mm.h>
14#include <linux/errno.h>
15#include <linux/ptrace.h>
16#include <linux/smp_lock.h>
17#include <linux/user.h>
18#include <linux/security.h>
19#include <linux/audit.h>
20
21#include <asm/pgtable.h>
22#include <asm/processor.h>
23#include <asm/ptrace_offsets.h>
24#include <asm/rse.h>
25#include <asm/system.h>
26#include <asm/uaccess.h>
27#include <asm/unwind.h>
28#ifdef CONFIG_PERFMON
29#include <asm/perfmon.h>
30#endif
31
32#include "entry.h"
33
34/*
35 * Bits in the PSR that we allow ptrace() to change:
36 * be, up, ac, mfl, mfh (the user mask; five bits total)
37 * db (debug breakpoint fault; one bit)
38 * id (instruction debug fault disable; one bit)
39 * dd (data debug fault disable; one bit)
40 * ri (restart instruction; two bits)
41 * is (instruction set; one bit)
42 */
43#define IPSR_MASK (IA64_PSR_UM | IA64_PSR_DB | IA64_PSR_IS \
44 | IA64_PSR_ID | IA64_PSR_DD | IA64_PSR_RI)
45
46#define MASK(nbits) ((1UL << (nbits)) - 1) /* mask with NBITS bits set */
47#define PFM_MASK MASK(38)
48
49#define PTRACE_DEBUG 0
50
51#if PTRACE_DEBUG
52# define dprintk(format...) printk(format)
53# define inline
54#else
55# define dprintk(format...)
56#endif
57
58/* Return TRUE if PT was created due to kernel-entry via a system-call. */
59
60static inline int
61in_syscall (struct pt_regs *pt)
62{
63 return (long) pt->cr_ifs >= 0;
64}
65
66/*
67 * Collect the NaT bits for r1-r31 from scratch_unat and return a NaT
68 * bitset where bit i is set iff the NaT bit of register i is set.
69 */
70unsigned long
71ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat)
72{
73# define GET_BITS(first, last, unat) \
74 ({ \
75 unsigned long bit = ia64_unat_pos(&pt->r##first); \
76 unsigned long nbits = (last - first + 1); \
77 unsigned long mask = MASK(nbits) << first; \
78 unsigned long dist; \
79 if (bit < first) \
80 dist = 64 + bit - first; \
81 else \
82 dist = bit - first; \
83 ia64_rotr(unat, dist) & mask; \
84 })
85 unsigned long val;
86
87 /*
88 * Registers that are stored consecutively in struct pt_regs
89 * can be handled in parallel. If the register order in
90 * struct_pt_regs changes, this code MUST be updated.
91 */
92 val = GET_BITS( 1, 1, scratch_unat);
93 val |= GET_BITS( 2, 3, scratch_unat);
94 val |= GET_BITS(12, 13, scratch_unat);
95 val |= GET_BITS(14, 14, scratch_unat);
96 val |= GET_BITS(15, 15, scratch_unat);
97 val |= GET_BITS( 8, 11, scratch_unat);
98 val |= GET_BITS(16, 31, scratch_unat);
99 return val;
100
101# undef GET_BITS
102}
103
104/*
105 * Set the NaT bits for the scratch registers according to NAT and
106 * return the resulting unat (assuming the scratch registers are
107 * stored in PT).
108 */
109unsigned long
110ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat)
111{
112# define PUT_BITS(first, last, nat) \
113 ({ \
114 unsigned long bit = ia64_unat_pos(&pt->r##first); \
115 unsigned long nbits = (last - first + 1); \
116 unsigned long mask = MASK(nbits) << first; \
117 long dist; \
118 if (bit < first) \
119 dist = 64 + bit - first; \
120 else \
121 dist = bit - first; \
122 ia64_rotl(nat & mask, dist); \
123 })
124 unsigned long scratch_unat;
125
126 /*
127 * Registers that are stored consecutively in struct pt_regs
128 * can be handled in parallel. If the register order in
129 * struct_pt_regs changes, this code MUST be updated.
130 */
131 scratch_unat = PUT_BITS( 1, 1, nat);
132 scratch_unat |= PUT_BITS( 2, 3, nat);
133 scratch_unat |= PUT_BITS(12, 13, nat);
134 scratch_unat |= PUT_BITS(14, 14, nat);
135 scratch_unat |= PUT_BITS(15, 15, nat);
136 scratch_unat |= PUT_BITS( 8, 11, nat);
137 scratch_unat |= PUT_BITS(16, 31, nat);
138
139 return scratch_unat;
140
141# undef PUT_BITS
142}
143
144#define IA64_MLX_TEMPLATE 0x2
145#define IA64_MOVL_OPCODE 6
146
147void
148ia64_increment_ip (struct pt_regs *regs)
149{
150 unsigned long w0, ri = ia64_psr(regs)->ri + 1;
151
152 if (ri > 2) {
153 ri = 0;
154 regs->cr_iip += 16;
155 } else if (ri == 2) {
156 get_user(w0, (char __user *) regs->cr_iip + 0);
157 if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) {
158 /*
159 * rfi'ing to slot 2 of an MLX bundle causes
160 * an illegal operation fault. We don't want
161 * that to happen...
162 */
163 ri = 0;
164 regs->cr_iip += 16;
165 }
166 }
167 ia64_psr(regs)->ri = ri;
168}
169
170void
171ia64_decrement_ip (struct pt_regs *regs)
172{
173 unsigned long w0, ri = ia64_psr(regs)->ri - 1;
174
175 if (ia64_psr(regs)->ri == 0) {
176 regs->cr_iip -= 16;
177 ri = 2;
178 get_user(w0, (char __user *) regs->cr_iip + 0);
179 if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) {
180 /*
181 * rfi'ing to slot 2 of an MLX bundle causes
182 * an illegal operation fault. We don't want
183 * that to happen...
184 */
185 ri = 1;
186 }
187 }
188 ia64_psr(regs)->ri = ri;
189}
190
191/*
192 * This routine is used to read an rnat bits that are stored on the
193 * kernel backing store. Since, in general, the alignment of the user
194 * and kernel are different, this is not completely trivial. In
195 * essence, we need to construct the user RNAT based on up to two
196 * kernel RNAT values and/or the RNAT value saved in the child's
197 * pt_regs.
198 *
199 * user rbs
200 *
201 * +--------+ <-- lowest address
202 * | slot62 |
203 * +--------+
204 * | rnat | 0x....1f8
205 * +--------+
206 * | slot00 | \
207 * +--------+ |
208 * | slot01 | > child_regs->ar_rnat
209 * +--------+ |
210 * | slot02 | / kernel rbs
211 * +--------+ +--------+
212 * <- child_regs->ar_bspstore | slot61 | <-- krbs
213 * +- - - - + +--------+
214 * | slot62 |
215 * +- - - - + +--------+
216 * | rnat |
217 * +- - - - + +--------+
218 * vrnat | slot00 |
219 * +- - - - + +--------+
220 * = =
221 * +--------+
222 * | slot00 | \
223 * +--------+ |
224 * | slot01 | > child_stack->ar_rnat
225 * +--------+ |
226 * | slot02 | /
227 * +--------+
228 * <--- child_stack->ar_bspstore
229 *
230 * The way to think of this code is as follows: bit 0 in the user rnat
231 * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat
232 * value. The kernel rnat value holding this bit is stored in
233 * variable rnat0. rnat1 is loaded with the kernel rnat value that
234 * form the upper bits of the user rnat value.
235 *
236 * Boundary cases:
237 *
238 * o when reading the rnat "below" the first rnat slot on the kernel
239 * backing store, rnat0/rnat1 are set to 0 and the low order bits are
240 * merged in from pt->ar_rnat.
241 *
242 * o when reading the rnat "above" the last rnat slot on the kernel
243 * backing store, rnat0/rnat1 gets its value from sw->ar_rnat.
244 */
245static unsigned long
246get_rnat (struct task_struct *task, struct switch_stack *sw,
247 unsigned long *krbs, unsigned long *urnat_addr,
248 unsigned long *urbs_end)
249{
250 unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr;
251 unsigned long umask = 0, mask, m;
252 unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
253 long num_regs, nbits;
254 struct pt_regs *pt;
255
256 pt = ia64_task_regs(task);
257 kbsp = (unsigned long *) sw->ar_bspstore;
258 ubspstore = (unsigned long *) pt->ar_bspstore;
259
260 if (urbs_end < urnat_addr)
261 nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_end);
262 else
263 nbits = 63;
264 mask = MASK(nbits);
265 /*
266 * First, figure out which bit number slot 0 in user-land maps
267 * to in the kernel rnat. Do this by figuring out how many
268 * register slots we're beyond the user's backingstore and
269 * then computing the equivalent address in kernel space.
270 */
271 num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
272 slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
273 shift = ia64_rse_slot_num(slot0_kaddr);
274 rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
275 rnat0_kaddr = rnat1_kaddr - 64;
276
277 if (ubspstore + 63 > urnat_addr) {
278 /* some bits need to be merged in from pt->ar_rnat */
279 umask = MASK(ia64_rse_slot_num(ubspstore)) & mask;
280 urnat = (pt->ar_rnat & umask);
281 mask &= ~umask;
282 if (!mask)
283 return urnat;
284 }
285
286 m = mask << shift;
287 if (rnat0_kaddr >= kbsp)
288 rnat0 = sw->ar_rnat;
289 else if (rnat0_kaddr > krbs)
290 rnat0 = *rnat0_kaddr;
291 urnat |= (rnat0 & m) >> shift;
292
293 m = mask >> (63 - shift);
294 if (rnat1_kaddr >= kbsp)
295 rnat1 = sw->ar_rnat;
296 else if (rnat1_kaddr > krbs)
297 rnat1 = *rnat1_kaddr;
298 urnat |= (rnat1 & m) << (63 - shift);
299 return urnat;
300}
301
302/*
303 * The reverse of get_rnat.
304 */
305static void
306put_rnat (struct task_struct *task, struct switch_stack *sw,
307 unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat,
308 unsigned long *urbs_end)
309{
310 unsigned long rnat0 = 0, rnat1 = 0, *slot0_kaddr, umask = 0, mask, m;
311 unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift;
312 long num_regs, nbits;
313 struct pt_regs *pt;
314 unsigned long cfm, *urbs_kargs;
315
316 pt = ia64_task_regs(task);
317 kbsp = (unsigned long *) sw->ar_bspstore;
318 ubspstore = (unsigned long *) pt->ar_bspstore;
319
320 urbs_kargs = urbs_end;
321 if (in_syscall(pt)) {
322 /*
323 * If entered via syscall, don't allow user to set rnat bits
324 * for syscall args.
325 */
326 cfm = pt->cr_ifs;
327 urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f));
328 }
329
330 if (urbs_kargs >= urnat_addr)
331 nbits = 63;
332 else {
333 if ((urnat_addr - 63) >= urbs_kargs)
334 return;
335 nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_kargs);
336 }
337 mask = MASK(nbits);
338
339 /*
340 * First, figure out which bit number slot 0 in user-land maps
341 * to in the kernel rnat. Do this by figuring out how many
342 * register slots we're beyond the user's backingstore and
343 * then computing the equivalent address in kernel space.
344 */
345 num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1);
346 slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs);
347 shift = ia64_rse_slot_num(slot0_kaddr);
348 rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr);
349 rnat0_kaddr = rnat1_kaddr - 64;
350
351 if (ubspstore + 63 > urnat_addr) {
352 /* some bits need to be place in pt->ar_rnat: */
353 umask = MASK(ia64_rse_slot_num(ubspstore)) & mask;
354 pt->ar_rnat = (pt->ar_rnat & ~umask) | (urnat & umask);
355 mask &= ~umask;
356 if (!mask)
357 return;
358 }
359 /*
360 * Note: Section 11.1 of the EAS guarantees that bit 63 of an
361 * rnat slot is ignored. so we don't have to clear it here.
362 */
363 rnat0 = (urnat << shift);
364 m = mask << shift;
365 if (rnat0_kaddr >= kbsp)
366 sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat0 & m);
367 else if (rnat0_kaddr > krbs)
368 *rnat0_kaddr = ((*rnat0_kaddr & ~m) | (rnat0 & m));
369
370 rnat1 = (urnat >> (63 - shift));
371 m = mask >> (63 - shift);
372 if (rnat1_kaddr >= kbsp)
373 sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat1 & m);
374 else if (rnat1_kaddr > krbs)
375 *rnat1_kaddr = ((*rnat1_kaddr & ~m) | (rnat1 & m));
376}
377
378static inline int
379on_kernel_rbs (unsigned long addr, unsigned long bspstore,
380 unsigned long urbs_end)
381{
382 unsigned long *rnat_addr = ia64_rse_rnat_addr((unsigned long *)
383 urbs_end);
384 return (addr >= bspstore && addr <= (unsigned long) rnat_addr);
385}
386
387/*
388 * Read a word from the user-level backing store of task CHILD. ADDR
389 * is the user-level address to read the word from, VAL a pointer to
390 * the return value, and USER_BSP gives the end of the user-level
391 * backing store (i.e., it's the address that would be in ar.bsp after
392 * the user executed a "cover" instruction).
393 *
394 * This routine takes care of accessing the kernel register backing
395 * store for those registers that got spilled there. It also takes
396 * care of calculating the appropriate RNaT collection words.
397 */
398long
399ia64_peek (struct task_struct *child, struct switch_stack *child_stack,
400 unsigned long user_rbs_end, unsigned long addr, long *val)
401{
402 unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr;
403 struct pt_regs *child_regs;
404 size_t copied;
405 long ret;
406
407 urbs_end = (long *) user_rbs_end;
408 laddr = (unsigned long *) addr;
409 child_regs = ia64_task_regs(child);
410 bspstore = (unsigned long *) child_regs->ar_bspstore;
411 krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
412 if (on_kernel_rbs(addr, (unsigned long) bspstore,
413 (unsigned long) urbs_end))
414 {
415 /*
416 * Attempt to read the RBS in an area that's actually
417 * on the kernel RBS => read the corresponding bits in
418 * the kernel RBS.
419 */
420 rnat_addr = ia64_rse_rnat_addr(laddr);
421 ret = get_rnat(child, child_stack, krbs, rnat_addr, urbs_end);
422
423 if (laddr == rnat_addr) {
424 /* return NaT collection word itself */
425 *val = ret;
426 return 0;
427 }
428
429 if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) {
430 /*
431 * It is implementation dependent whether the
432 * data portion of a NaT value gets saved on a
433 * st8.spill or RSE spill (e.g., see EAS 2.6,
434 * 4.4.4.6 Register Spill and Fill). To get
435 * consistent behavior across all possible
436 * IA-64 implementations, we return zero in
437 * this case.
438 */
439 *val = 0;
440 return 0;
441 }
442
443 if (laddr < urbs_end) {
444 /*
445 * The desired word is on the kernel RBS and
446 * is not a NaT.
447 */
448 regnum = ia64_rse_num_regs(bspstore, laddr);
449 *val = *ia64_rse_skip_regs(krbs, regnum);
450 return 0;
451 }
452 }
453 copied = access_process_vm(child, addr, &ret, sizeof(ret), 0);
454 if (copied != sizeof(ret))
455 return -EIO;
456 *val = ret;
457 return 0;
458}
459
460long
461ia64_poke (struct task_struct *child, struct switch_stack *child_stack,
462 unsigned long user_rbs_end, unsigned long addr, long val)
463{
464 unsigned long *bspstore, *krbs, regnum, *laddr;
465 unsigned long *urbs_end = (long *) user_rbs_end;
466 struct pt_regs *child_regs;
467
468 laddr = (unsigned long *) addr;
469 child_regs = ia64_task_regs(child);
470 bspstore = (unsigned long *) child_regs->ar_bspstore;
471 krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
472 if (on_kernel_rbs(addr, (unsigned long) bspstore,
473 (unsigned long) urbs_end))
474 {
475 /*
476 * Attempt to write the RBS in an area that's actually
477 * on the kernel RBS => write the corresponding bits
478 * in the kernel RBS.
479 */
480 if (ia64_rse_is_rnat_slot(laddr))
481 put_rnat(child, child_stack, krbs, laddr, val,
482 urbs_end);
483 else {
484 if (laddr < urbs_end) {
485 regnum = ia64_rse_num_regs(bspstore, laddr);
486 *ia64_rse_skip_regs(krbs, regnum) = val;
487 }
488 }
489 } else if (access_process_vm(child, addr, &val, sizeof(val), 1)
490 != sizeof(val))
491 return -EIO;
492 return 0;
493}
494
495/*
496 * Calculate the address of the end of the user-level register backing
497 * store. This is the address that would have been stored in ar.bsp
498 * if the user had executed a "cover" instruction right before
499 * entering the kernel. If CFMP is not NULL, it is used to return the
500 * "current frame mask" that was active at the time the kernel was
501 * entered.
502 */
503unsigned long
504ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt,
505 unsigned long *cfmp)
506{
507 unsigned long *krbs, *bspstore, cfm = pt->cr_ifs;
508 long ndirty;
509
510 krbs = (unsigned long *) child + IA64_RBS_OFFSET/8;
511 bspstore = (unsigned long *) pt->ar_bspstore;
512 ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
513
514 if (in_syscall(pt))
515 ndirty += (cfm & 0x7f);
516 else
517 cfm &= ~(1UL << 63); /* clear valid bit */
518
519 if (cfmp)
520 *cfmp = cfm;
521 return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty);
522}
523
524/*
525 * Synchronize (i.e, write) the RSE backing store living in kernel
526 * space to the VM of the CHILD task. SW and PT are the pointers to
527 * the switch_stack and pt_regs structures, respectively.
528 * USER_RBS_END is the user-level address at which the backing store
529 * ends.
530 */
531long
532ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw,
533 unsigned long user_rbs_start, unsigned long user_rbs_end)
534{
535 unsigned long addr, val;
536 long ret;
537
538 /* now copy word for word from kernel rbs to user rbs: */
539 for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) {
540 ret = ia64_peek(child, sw, user_rbs_end, addr, &val);
541 if (ret < 0)
542 return ret;
543 if (access_process_vm(child, addr, &val, sizeof(val), 1)
544 != sizeof(val))
545 return -EIO;
546 }
547 return 0;
548}
549
550static inline int
551thread_matches (struct task_struct *thread, unsigned long addr)
552{
553 unsigned long thread_rbs_end;
554 struct pt_regs *thread_regs;
555
556 if (ptrace_check_attach(thread, 0) < 0)
557 /*
558 * If the thread is not in an attachable state, we'll
559 * ignore it. The net effect is that if ADDR happens
560 * to overlap with the portion of the thread's
561 * register backing store that is currently residing
562 * on the thread's kernel stack, then ptrace() may end
563 * up accessing a stale value. But if the thread
564 * isn't stopped, that's a problem anyhow, so we're
565 * doing as well as we can...
566 */
567 return 0;
568
569 thread_regs = ia64_task_regs(thread);
570 thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL);
571 if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end))
572 return 0;
573
574 return 1; /* looks like we've got a winner */
575}
576
577/*
578 * GDB apparently wants to be able to read the register-backing store
579 * of any thread when attached to a given process. If we are peeking
580 * or poking an address that happens to reside in the kernel-backing
581 * store of another thread, we need to attach to that thread, because
582 * otherwise we end up accessing stale data.
583 *
584 * task_list_lock must be read-locked before calling this routine!
585 */
586static struct task_struct *
587find_thread_for_addr (struct task_struct *child, unsigned long addr)
588{
589 struct task_struct *g, *p;
590 struct mm_struct *mm;
591 int mm_users;
592
593 if (!(mm = get_task_mm(child)))
594 return child;
595
596 /* -1 because of our get_task_mm(): */
597 mm_users = atomic_read(&mm->mm_users) - 1;
598 if (mm_users <= 1)
599 goto out; /* not multi-threaded */
600
601 /*
602 * First, traverse the child's thread-list. Good for scalability with
603 * NPTL-threads.
604 */
605 p = child;
606 do {
607 if (thread_matches(p, addr)) {
608 child = p;
609 goto out;
610 }
611 if (mm_users-- <= 1)
612 goto out;
613 } while ((p = next_thread(p)) != child);
614
615 do_each_thread(g, p) {
616 if (child->mm != mm)
617 continue;
618
619 if (thread_matches(p, addr)) {
620 child = p;
621 goto out;
622 }
623 } while_each_thread(g, p);
624 out:
625 mmput(mm);
626 return child;
627}
628
629/*
630 * Write f32-f127 back to task->thread.fph if it has been modified.
631 */
632inline void
633ia64_flush_fph (struct task_struct *task)
634{
635 struct ia64_psr *psr = ia64_psr(ia64_task_regs(task));
636
637 if (ia64_is_local_fpu_owner(task) && psr->mfh) {
638 psr->mfh = 0;
639 task->thread.flags |= IA64_THREAD_FPH_VALID;
640 ia64_save_fpu(&task->thread.fph[0]);
641 }
642}
643
644/*
645 * Sync the fph state of the task so that it can be manipulated
646 * through thread.fph. If necessary, f32-f127 are written back to
647 * thread.fph or, if the fph state hasn't been used before, thread.fph
648 * is cleared to zeroes. Also, access to f32-f127 is disabled to
649 * ensure that the task picks up the state from thread.fph when it
650 * executes again.
651 */
652void
653ia64_sync_fph (struct task_struct *task)
654{
655 struct ia64_psr *psr = ia64_psr(ia64_task_regs(task));
656
657 ia64_flush_fph(task);
658 if (!(task->thread.flags & IA64_THREAD_FPH_VALID)) {
659 task->thread.flags |= IA64_THREAD_FPH_VALID;
660 memset(&task->thread.fph, 0, sizeof(task->thread.fph));
661 }
662 ia64_drop_fpu(task);
663 psr->dfh = 1;
664}
665
666static int
667access_fr (struct unw_frame_info *info, int regnum, int hi,
668 unsigned long *data, int write_access)
669{
670 struct ia64_fpreg fpval;
671 int ret;
672
673 ret = unw_get_fr(info, regnum, &fpval);
674 if (ret < 0)
675 return ret;
676
677 if (write_access) {
678 fpval.u.bits[hi] = *data;
679 ret = unw_set_fr(info, regnum, fpval);
680 } else
681 *data = fpval.u.bits[hi];
682 return ret;
683}
684
685/*
686 * Change the machine-state of CHILD such that it will return via the normal
687 * kernel exit-path, rather than the syscall-exit path.
688 */
689static void
690convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt,
691 unsigned long cfm)
692{
693 struct unw_frame_info info, prev_info;
694 unsigned long ip, pr;
695
696 unw_init_from_blocked_task(&info, child);
697 while (1) {
698 prev_info = info;
699 if (unw_unwind(&info) < 0)
700 return;
701 if (unw_get_rp(&info, &ip) < 0)
702 return;
703 if (ip < FIXADDR_USER_END)
704 break;
705 }
706
707 unw_get_pr(&prev_info, &pr);
708 pr &= ~(1UL << PRED_SYSCALL);
709 pr |= (1UL << PRED_NON_SYSCALL);
710 unw_set_pr(&prev_info, pr);
711
712 pt->cr_ifs = (1UL << 63) | cfm;
713}
714
715static int
716access_nat_bits (struct task_struct *child, struct pt_regs *pt,
717 struct unw_frame_info *info,
718 unsigned long *data, int write_access)
719{
720 unsigned long regnum, nat_bits, scratch_unat, dummy = 0;
721 char nat = 0;
722
723 if (write_access) {
724 nat_bits = *data;
725 scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits);
726 if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) {
727 dprintk("ptrace: failed to set ar.unat\n");
728 return -1;
729 }
730 for (regnum = 4; regnum <= 7; ++regnum) {
731 unw_get_gr(info, regnum, &dummy, &nat);
732 unw_set_gr(info, regnum, dummy,
733 (nat_bits >> regnum) & 1);
734 }
735 } else {
736 if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) {
737 dprintk("ptrace: failed to read ar.unat\n");
738 return -1;
739 }
740 nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat);
741 for (regnum = 4; regnum <= 7; ++regnum) {
742 unw_get_gr(info, regnum, &dummy, &nat);
743 nat_bits |= (nat != 0) << regnum;
744 }
745 *data = nat_bits;
746 }
747 return 0;
748}
749
750static int
751access_uarea (struct task_struct *child, unsigned long addr,
752 unsigned long *data, int write_access)
753{
754 unsigned long *ptr, regnum, urbs_end, rnat_addr, cfm;
755 struct switch_stack *sw;
756 struct pt_regs *pt;
757# define pt_reg_addr(pt, reg) ((void *) \
758 ((unsigned long) (pt) \
759 + offsetof(struct pt_regs, reg)))
760
761
762 pt = ia64_task_regs(child);
763 sw = (struct switch_stack *) (child->thread.ksp + 16);
764
765 if ((addr & 0x7) != 0) {
766 dprintk("ptrace: unaligned register address 0x%lx\n", addr);
767 return -1;
768 }
769
770 if (addr < PT_F127 + 16) {
771 /* accessing fph */
772 if (write_access)
773 ia64_sync_fph(child);
774 else
775 ia64_flush_fph(child);
776 ptr = (unsigned long *)
777 ((unsigned long) &child->thread.fph + addr);
778 } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) {
779 /* scratch registers untouched by kernel (saved in pt_regs) */
780 ptr = pt_reg_addr(pt, f10) + (addr - PT_F10);
781 } else if (addr >= PT_F12 && addr < PT_F15 + 16) {
782 /*
783 * Scratch registers untouched by kernel (saved in
784 * switch_stack).
785 */
786 ptr = (unsigned long *) ((long) sw
787 + (addr - PT_NAT_BITS - 32));
788 } else if (addr < PT_AR_LC + 8) {
789 /* preserved state: */
790 struct unw_frame_info info;
791 char nat = 0;
792 int ret;
793
794 unw_init_from_blocked_task(&info, child);
795 if (unw_unwind_to_user(&info) < 0)
796 return -1;
797
798 switch (addr) {
799 case PT_NAT_BITS:
800 return access_nat_bits(child, pt, &info,
801 data, write_access);
802
803 case PT_R4: case PT_R5: case PT_R6: case PT_R7:
804 if (write_access) {
805 /* read NaT bit first: */
806 unsigned long dummy;
807
808 ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4,
809 &dummy, &nat);
810 if (ret < 0)
811 return ret;
812 }
813 return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data,
814 &nat, write_access);
815
816 case PT_B1: case PT_B2: case PT_B3:
817 case PT_B4: case PT_B5:
818 return unw_access_br(&info, (addr - PT_B1)/8 + 1, data,
819 write_access);
820
821 case PT_AR_EC:
822 return unw_access_ar(&info, UNW_AR_EC, data,
823 write_access);
824
825 case PT_AR_LC:
826 return unw_access_ar(&info, UNW_AR_LC, data,
827 write_access);
828
829 default:
830 if (addr >= PT_F2 && addr < PT_F5 + 16)
831 return access_fr(&info, (addr - PT_F2)/16 + 2,
832 (addr & 8) != 0, data,
833 write_access);
834 else if (addr >= PT_F16 && addr < PT_F31 + 16)
835 return access_fr(&info,
836 (addr - PT_F16)/16 + 16,
837 (addr & 8) != 0,
838 data, write_access);
839 else {
840 dprintk("ptrace: rejecting access to register "
841 "address 0x%lx\n", addr);
842 return -1;
843 }
844 }
845 } else if (addr < PT_F9+16) {
846 /* scratch state */
847 switch (addr) {
848 case PT_AR_BSP:
849 /*
850 * By convention, we use PT_AR_BSP to refer to
851 * the end of the user-level backing store.
852 * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof)
853 * to get the real value of ar.bsp at the time
854 * the kernel was entered.
855 *
856 * Furthermore, when changing the contents of
857 * PT_AR_BSP (or PT_CFM) we MUST copy any
858 * users-level stacked registers that are
859 * stored on the kernel stack back to
860 * user-space because otherwise, we might end
861 * up clobbering kernel stacked registers.
862 * Also, if this happens while the task is
863 * blocked in a system call, which convert the
864 * state such that the non-system-call exit
865 * path is used. This ensures that the proper
866 * state will be picked up when resuming
867 * execution. However, it *also* means that
868 * once we write PT_AR_BSP/PT_CFM, it won't be
869 * possible to modify the syscall arguments of
870 * the pending system call any longer. This
871 * shouldn't be an issue because modifying
872 * PT_AR_BSP/PT_CFM generally implies that
873 * we're either abandoning the pending system
874 * call or that we defer it's re-execution
875 * (e.g., due to GDB doing an inferior
876 * function call).
877 */
878 urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
879 if (write_access) {
880 if (*data != urbs_end) {
881 if (ia64_sync_user_rbs(child, sw,
882 pt->ar_bspstore,
883 urbs_end) < 0)
884 return -1;
885 if (in_syscall(pt))
886 convert_to_non_syscall(child,
887 pt,
888 cfm);
889 /*
890 * Simulate user-level write
891 * of ar.bsp:
892 */
893 pt->loadrs = 0;
894 pt->ar_bspstore = *data;
895 }
896 } else
897 *data = urbs_end;
898 return 0;
899
900 case PT_CFM:
901 urbs_end = ia64_get_user_rbs_end(child, pt, &cfm);
902 if (write_access) {
903 if (((cfm ^ *data) & PFM_MASK) != 0) {
904 if (ia64_sync_user_rbs(child, sw,
905 pt->ar_bspstore,
906 urbs_end) < 0)
907 return -1;
908 if (in_syscall(pt))
909 convert_to_non_syscall(child,
910 pt,
911 cfm);
912 pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK)
913 | (*data & PFM_MASK));
914 }
915 } else
916 *data = cfm;
917 return 0;
918
919 case PT_CR_IPSR:
920 if (write_access)
921 pt->cr_ipsr = ((*data & IPSR_MASK)
922 | (pt->cr_ipsr & ~IPSR_MASK));
923 else
924 *data = (pt->cr_ipsr & IPSR_MASK);
925 return 0;
926
927 case PT_AR_RNAT:
928 urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
929 rnat_addr = (long) ia64_rse_rnat_addr((long *)
930 urbs_end);
931 if (write_access)
932 return ia64_poke(child, sw, urbs_end,
933 rnat_addr, *data);
934 else
935 return ia64_peek(child, sw, urbs_end,
936 rnat_addr, data);
937
938 case PT_R1:
939 ptr = pt_reg_addr(pt, r1);
940 break;
941 case PT_R2: case PT_R3:
942 ptr = pt_reg_addr(pt, r2) + (addr - PT_R2);
943 break;
944 case PT_R8: case PT_R9: case PT_R10: case PT_R11:
945 ptr = pt_reg_addr(pt, r8) + (addr - PT_R8);
946 break;
947 case PT_R12: case PT_R13:
948 ptr = pt_reg_addr(pt, r12) + (addr - PT_R12);
949 break;
950 case PT_R14:
951 ptr = pt_reg_addr(pt, r14);
952 break;
953 case PT_R15:
954 ptr = pt_reg_addr(pt, r15);
955 break;
956 case PT_R16: case PT_R17: case PT_R18: case PT_R19:
957 case PT_R20: case PT_R21: case PT_R22: case PT_R23:
958 case PT_R24: case PT_R25: case PT_R26: case PT_R27:
959 case PT_R28: case PT_R29: case PT_R30: case PT_R31:
960 ptr = pt_reg_addr(pt, r16) + (addr - PT_R16);
961 break;
962 case PT_B0:
963 ptr = pt_reg_addr(pt, b0);
964 break;
965 case PT_B6:
966 ptr = pt_reg_addr(pt, b6);
967 break;
968 case PT_B7:
969 ptr = pt_reg_addr(pt, b7);
970 break;
971 case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8:
972 case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8:
973 ptr = pt_reg_addr(pt, f6) + (addr - PT_F6);
974 break;
975 case PT_AR_BSPSTORE:
976 ptr = pt_reg_addr(pt, ar_bspstore);
977 break;
978 case PT_AR_RSC:
979 ptr = pt_reg_addr(pt, ar_rsc);
980 break;
981 case PT_AR_UNAT:
982 ptr = pt_reg_addr(pt, ar_unat);
983 break;
984 case PT_AR_PFS:
985 ptr = pt_reg_addr(pt, ar_pfs);
986 break;
987 case PT_AR_CCV:
988 ptr = pt_reg_addr(pt, ar_ccv);
989 break;
990 case PT_AR_FPSR:
991 ptr = pt_reg_addr(pt, ar_fpsr);
992 break;
993 case PT_CR_IIP:
994 ptr = pt_reg_addr(pt, cr_iip);
995 break;
996 case PT_PR:
997 ptr = pt_reg_addr(pt, pr);
998 break;
999 /* scratch register */
1000
1001 default:
1002 /* disallow accessing anything else... */
1003 dprintk("ptrace: rejecting access to register "
1004 "address 0x%lx\n", addr);
1005 return -1;
1006 }
1007 } else if (addr <= PT_AR_SSD) {
1008 ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD);
1009 } else {
1010 /* access debug registers */
1011
1012 if (addr >= PT_IBR) {
1013 regnum = (addr - PT_IBR) >> 3;
1014 ptr = &child->thread.ibr[0];
1015 } else {
1016 regnum = (addr - PT_DBR) >> 3;
1017 ptr = &child->thread.dbr[0];
1018 }
1019
1020 if (regnum >= 8) {
1021 dprintk("ptrace: rejecting access to register "
1022 "address 0x%lx\n", addr);
1023 return -1;
1024 }
1025#ifdef CONFIG_PERFMON
1026 /*
1027 * Check if debug registers are used by perfmon. This
1028 * test must be done once we know that we can do the
1029 * operation, i.e. the arguments are all valid, but
1030 * before we start modifying the state.
1031 *
1032 * Perfmon needs to keep a count of how many processes
1033 * are trying to modify the debug registers for system
1034 * wide monitoring sessions.
1035 *
1036 * We also include read access here, because they may
1037 * cause the PMU-installed debug register state
1038 * (dbr[], ibr[]) to be reset. The two arrays are also
1039 * used by perfmon, but we do not use
1040 * IA64_THREAD_DBG_VALID. The registers are restored
1041 * by the PMU context switch code.
1042 */
1043 if (pfm_use_debug_registers(child)) return -1;
1044#endif
1045
1046 if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) {
1047 child->thread.flags |= IA64_THREAD_DBG_VALID;
1048 memset(child->thread.dbr, 0,
1049 sizeof(child->thread.dbr));
1050 memset(child->thread.ibr, 0,
1051 sizeof(child->thread.ibr));
1052 }
1053
1054 ptr += regnum;
1055
1056 if ((regnum & 1) && write_access) {
1057 /* don't let the user set kernel-level breakpoints: */
1058 *ptr = *data & ~(7UL << 56);
1059 return 0;
1060 }
1061 }
1062 if (write_access)
1063 *ptr = *data;
1064 else
1065 *data = *ptr;
1066 return 0;
1067}
1068
1069static long
1070ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
1071{
1072 unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val;
1073 struct unw_frame_info info;
1074 struct ia64_fpreg fpval;
1075 struct switch_stack *sw;
1076 struct pt_regs *pt;
1077 long ret, retval = 0;
1078 char nat = 0;
1079 int i;
1080
1081 if (!access_ok(VERIFY_WRITE, ppr, sizeof(struct pt_all_user_regs)))
1082 return -EIO;
1083
1084 pt = ia64_task_regs(child);
1085 sw = (struct switch_stack *) (child->thread.ksp + 16);
1086 unw_init_from_blocked_task(&info, child);
1087 if (unw_unwind_to_user(&info) < 0) {
1088 return -EIO;
1089 }
1090
1091 if (((unsigned long) ppr & 0x7) != 0) {
1092 dprintk("ptrace:unaligned register address %p\n", ppr);
1093 return -EIO;
1094 }
1095
1096 if (access_uarea(child, PT_CR_IPSR, &psr, 0) < 0
1097 || access_uarea(child, PT_AR_EC, &ec, 0) < 0
1098 || access_uarea(child, PT_AR_LC, &lc, 0) < 0
1099 || access_uarea(child, PT_AR_RNAT, &rnat, 0) < 0
1100 || access_uarea(child, PT_AR_BSP, &bsp, 0) < 0
1101 || access_uarea(child, PT_CFM, &cfm, 0)
1102 || access_uarea(child, PT_NAT_BITS, &nat_bits, 0))
1103 return -EIO;
1104
1105 /* control regs */
1106
1107 retval |= __put_user(pt->cr_iip, &ppr->cr_iip);
1108 retval |= __put_user(psr, &ppr->cr_ipsr);
1109
1110 /* app regs */
1111
1112 retval |= __put_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
1113 retval |= __put_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
1114 retval |= __put_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
1115 retval |= __put_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
1116 retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
1117 retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
1118
1119 retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]);
1120 retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]);
1121 retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]);
1122 retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]);
1123 retval |= __put_user(cfm, &ppr->cfm);
1124
1125 /* gr1-gr3 */
1126
1127 retval |= __copy_to_user(&ppr->gr[1], &pt->r1, sizeof(long));
1128 retval |= __copy_to_user(&ppr->gr[2], &pt->r2, sizeof(long) *2);
1129
1130 /* gr4-gr7 */
1131
1132 for (i = 4; i < 8; i++) {
1133 if (unw_access_gr(&info, i, &val, &nat, 0) < 0)
1134 return -EIO;
1135 retval |= __put_user(val, &ppr->gr[i]);
1136 }
1137
1138 /* gr8-gr11 */
1139
1140 retval |= __copy_to_user(&ppr->gr[8], &pt->r8, sizeof(long) * 4);
1141
1142 /* gr12-gr15 */
1143
1144 retval |= __copy_to_user(&ppr->gr[12], &pt->r12, sizeof(long) * 2);
1145 retval |= __copy_to_user(&ppr->gr[14], &pt->r14, sizeof(long));
1146 retval |= __copy_to_user(&ppr->gr[15], &pt->r15, sizeof(long));
1147
1148 /* gr16-gr31 */
1149
1150 retval |= __copy_to_user(&ppr->gr[16], &pt->r16, sizeof(long) * 16);
1151
1152 /* b0 */
1153
1154 retval |= __put_user(pt->b0, &ppr->br[0]);
1155
1156 /* b1-b5 */
1157
1158 for (i = 1; i < 6; i++) {
1159 if (unw_access_br(&info, i, &val, 0) < 0)
1160 return -EIO;
1161 __put_user(val, &ppr->br[i]);
1162 }
1163
1164 /* b6-b7 */
1165
1166 retval |= __put_user(pt->b6, &ppr->br[6]);
1167 retval |= __put_user(pt->b7, &ppr->br[7]);
1168
1169 /* fr2-fr5 */
1170
1171 for (i = 2; i < 6; i++) {
1172 if (unw_get_fr(&info, i, &fpval) < 0)
1173 return -EIO;
1174 retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
1175 }
1176
1177 /* fr6-fr11 */
1178
1179 retval |= __copy_to_user(&ppr->fr[6], &pt->f6,
1180 sizeof(struct ia64_fpreg) * 6);
1181
1182 /* fp scratch regs(12-15) */
1183
1184 retval |= __copy_to_user(&ppr->fr[12], &sw->f12,
1185 sizeof(struct ia64_fpreg) * 4);
1186
1187 /* fr16-fr31 */
1188
1189 for (i = 16; i < 32; i++) {
1190 if (unw_get_fr(&info, i, &fpval) < 0)
1191 return -EIO;
1192 retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval));
1193 }
1194
1195 /* fph */
1196
1197 ia64_flush_fph(child);
1198 retval |= __copy_to_user(&ppr->fr[32], &child->thread.fph,
1199 sizeof(ppr->fr[32]) * 96);
1200
1201 /* preds */
1202
1203 retval |= __put_user(pt->pr, &ppr->pr);
1204
1205 /* nat bits */
1206
1207 retval |= __put_user(nat_bits, &ppr->nat);
1208
1209 ret = retval ? -EIO : 0;
1210 return ret;
1211}
1212
1213static long
1214ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr)
1215{
1216 unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val = 0;
1217 struct unw_frame_info info;
1218 struct switch_stack *sw;
1219 struct ia64_fpreg fpval;
1220 struct pt_regs *pt;
1221 long ret, retval = 0;
1222 int i;
1223
1224 memset(&fpval, 0, sizeof(fpval));
1225
1226 if (!access_ok(VERIFY_READ, ppr, sizeof(struct pt_all_user_regs)))
1227 return -EIO;
1228
1229 pt = ia64_task_regs(child);
1230 sw = (struct switch_stack *) (child->thread.ksp + 16);
1231 unw_init_from_blocked_task(&info, child);
1232 if (unw_unwind_to_user(&info) < 0) {
1233 return -EIO;
1234 }
1235
1236 if (((unsigned long) ppr & 0x7) != 0) {
1237 dprintk("ptrace:unaligned register address %p\n", ppr);
1238 return -EIO;
1239 }
1240
1241 /* control regs */
1242
1243 retval |= __get_user(pt->cr_iip, &ppr->cr_iip);
1244 retval |= __get_user(psr, &ppr->cr_ipsr);
1245
1246 /* app regs */
1247
1248 retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]);
1249 retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]);
1250 retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]);
1251 retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]);
1252 retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]);
1253 retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]);
1254
1255 retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]);
1256 retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]);
1257 retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]);
1258 retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]);
1259 retval |= __get_user(cfm, &ppr->cfm);
1260
1261 /* gr1-gr3 */
1262
1263 retval |= __copy_from_user(&pt->r1, &ppr->gr[1], sizeof(long));
1264 retval |= __copy_from_user(&pt->r2, &ppr->gr[2], sizeof(long) * 2);
1265
1266 /* gr4-gr7 */
1267
1268 for (i = 4; i < 8; i++) {
1269 retval |= __get_user(val, &ppr->gr[i]);
1270 /* NaT bit will be set via PT_NAT_BITS: */
1271 if (unw_set_gr(&info, i, val, 0) < 0)
1272 return -EIO;
1273 }
1274
1275 /* gr8-gr11 */
1276
1277 retval |= __copy_from_user(&pt->r8, &ppr->gr[8], sizeof(long) * 4);
1278
1279 /* gr12-gr15 */
1280
1281 retval |= __copy_from_user(&pt->r12, &ppr->gr[12], sizeof(long) * 2);
1282 retval |= __copy_from_user(&pt->r14, &ppr->gr[14], sizeof(long));
1283 retval |= __copy_from_user(&pt->r15, &ppr->gr[15], sizeof(long));
1284
1285 /* gr16-gr31 */
1286
1287 retval |= __copy_from_user(&pt->r16, &ppr->gr[16], sizeof(long) * 16);
1288
1289 /* b0 */
1290
1291 retval |= __get_user(pt->b0, &ppr->br[0]);
1292
1293 /* b1-b5 */
1294
1295 for (i = 1; i < 6; i++) {
1296 retval |= __get_user(val, &ppr->br[i]);
1297 unw_set_br(&info, i, val);
1298 }
1299
1300 /* b6-b7 */
1301
1302 retval |= __get_user(pt->b6, &ppr->br[6]);
1303 retval |= __get_user(pt->b7, &ppr->br[7]);
1304
1305 /* fr2-fr5 */
1306
1307 for (i = 2; i < 6; i++) {
1308 retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval));
1309 if (unw_set_fr(&info, i, fpval) < 0)
1310 return -EIO;
1311 }
1312
1313 /* fr6-fr11 */
1314
1315 retval |= __copy_from_user(&pt->f6, &ppr->fr[6],
1316 sizeof(ppr->fr[6]) * 6);
1317
1318 /* fp scratch regs(12-15) */
1319
1320 retval |= __copy_from_user(&sw->f12, &ppr->fr[12],
1321 sizeof(ppr->fr[12]) * 4);
1322
1323 /* fr16-fr31 */
1324
1325 for (i = 16; i < 32; i++) {
1326 retval |= __copy_from_user(&fpval, &ppr->fr[i],
1327 sizeof(fpval));
1328 if (unw_set_fr(&info, i, fpval) < 0)
1329 return -EIO;
1330 }
1331
1332 /* fph */
1333
1334 ia64_sync_fph(child);
1335 retval |= __copy_from_user(&child->thread.fph, &ppr->fr[32],
1336 sizeof(ppr->fr[32]) * 96);
1337
1338 /* preds */
1339
1340 retval |= __get_user(pt->pr, &ppr->pr);
1341
1342 /* nat bits */
1343
1344 retval |= __get_user(nat_bits, &ppr->nat);
1345
1346 retval |= access_uarea(child, PT_CR_IPSR, &psr, 1);
1347 retval |= access_uarea(child, PT_AR_EC, &ec, 1);
1348 retval |= access_uarea(child, PT_AR_LC, &lc, 1);
1349 retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1);
1350 retval |= access_uarea(child, PT_AR_BSP, &bsp, 1);
1351 retval |= access_uarea(child, PT_CFM, &cfm, 1);
1352 retval |= access_uarea(child, PT_NAT_BITS, &nat_bits, 1);
1353
1354 ret = retval ? -EIO : 0;
1355 return ret;
1356}
1357
1358/*
1359 * Called by kernel/ptrace.c when detaching..
1360 *
1361 * Make sure the single step bit is not set.
1362 */
1363void
1364ptrace_disable (struct task_struct *child)
1365{
1366 struct ia64_psr *child_psr = ia64_psr(ia64_task_regs(child));
1367
1368 /* make sure the single step/taken-branch trap bits are not set: */
1369 child_psr->ss = 0;
1370 child_psr->tb = 0;
1371}
1372
1373asmlinkage long
1374sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
1375{
1376 struct pt_regs *pt;
1377 unsigned long urbs_end, peek_or_poke;
1378 struct task_struct *child;
1379 struct switch_stack *sw;
1380 long ret;
1381
1382 lock_kernel();
1383 ret = -EPERM;
1384 if (request == PTRACE_TRACEME) {
1385 /* are we already being traced? */
1386 if (current->ptrace & PT_PTRACED)
1387 goto out;
1388 ret = security_ptrace(current->parent, current);
1389 if (ret)
1390 goto out;
1391 current->ptrace |= PT_PTRACED;
1392 ret = 0;
1393 goto out;
1394 }
1395
1396 peek_or_poke = (request == PTRACE_PEEKTEXT
1397 || request == PTRACE_PEEKDATA
1398 || request == PTRACE_POKETEXT
1399 || request == PTRACE_POKEDATA);
1400 ret = -ESRCH;
1401 read_lock(&tasklist_lock);
1402 {
1403 child = find_task_by_pid(pid);
1404 if (child) {
1405 if (peek_or_poke)
1406 child = find_thread_for_addr(child, addr);
1407 get_task_struct(child);
1408 }
1409 }
1410 read_unlock(&tasklist_lock);
1411 if (!child)
1412 goto out;
1413 ret = -EPERM;
1414 if (pid == 1) /* no messing around with init! */
1415 goto out_tsk;
1416
1417 if (request == PTRACE_ATTACH) {
1418 ret = ptrace_attach(child);
1419 goto out_tsk;
1420 }
1421
1422 ret = ptrace_check_attach(child, request == PTRACE_KILL);
1423 if (ret < 0)
1424 goto out_tsk;
1425
1426 pt = ia64_task_regs(child);
1427 sw = (struct switch_stack *) (child->thread.ksp + 16);
1428
1429 switch (request) {
1430 case PTRACE_PEEKTEXT:
1431 case PTRACE_PEEKDATA:
1432 /* read word at location addr */
1433 urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
1434 ret = ia64_peek(child, sw, urbs_end, addr, &data);
1435 if (ret == 0) {
1436 ret = data;
1437 /* ensure "ret" is not mistaken as an error code: */
1438 force_successful_syscall_return();
1439 }
1440 goto out_tsk;
1441
1442 case PTRACE_POKETEXT:
1443 case PTRACE_POKEDATA:
1444 /* write the word at location addr */
1445 urbs_end = ia64_get_user_rbs_end(child, pt, NULL);
1446 ret = ia64_poke(child, sw, urbs_end, addr, data);
1447 goto out_tsk;
1448
1449 case PTRACE_PEEKUSR:
1450 /* read the word at addr in the USER area */
1451 if (access_uarea(child, addr, &data, 0) < 0) {
1452 ret = -EIO;
1453 goto out_tsk;
1454 }
1455 ret = data;
1456 /* ensure "ret" is not mistaken as an error code */
1457 force_successful_syscall_return();
1458 goto out_tsk;
1459
1460 case PTRACE_POKEUSR:
1461 /* write the word at addr in the USER area */
1462 if (access_uarea(child, addr, &data, 1) < 0) {
1463 ret = -EIO;
1464 goto out_tsk;
1465 }
1466 ret = 0;
1467 goto out_tsk;
1468
1469 case PTRACE_OLD_GETSIGINFO:
1470 /* for backwards-compatibility */
1471 ret = ptrace_request(child, PTRACE_GETSIGINFO, addr, data);
1472 goto out_tsk;
1473
1474 case PTRACE_OLD_SETSIGINFO:
1475 /* for backwards-compatibility */
1476 ret = ptrace_request(child, PTRACE_SETSIGINFO, addr, data);
1477 goto out_tsk;
1478
1479 case PTRACE_SYSCALL:
1480 /* continue and stop at next (return from) syscall */
1481 case PTRACE_CONT:
1482 /* restart after signal. */
1483 ret = -EIO;
1484 if (data > _NSIG)
1485 goto out_tsk;
1486 if (request == PTRACE_SYSCALL)
1487 set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
1488 else
1489 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
1490 child->exit_code = data;
1491
1492 /*
1493 * Make sure the single step/taken-branch trap bits
1494 * are not set:
1495 */
1496 ia64_psr(pt)->ss = 0;
1497 ia64_psr(pt)->tb = 0;
1498
1499 wake_up_process(child);
1500 ret = 0;
1501 goto out_tsk;
1502
1503 case PTRACE_KILL:
1504 /*
1505 * Make the child exit. Best I can do is send it a
1506 * sigkill. Perhaps it should be put in the status
1507 * that it wants to exit.
1508 */
1509 if (child->exit_state == EXIT_ZOMBIE)
1510 /* already dead */
1511 goto out_tsk;
1512 child->exit_code = SIGKILL;
1513
1514 ptrace_disable(child);
1515 wake_up_process(child);
1516 ret = 0;
1517 goto out_tsk;
1518
1519 case PTRACE_SINGLESTEP:
1520 /* let child execute for one instruction */
1521 case PTRACE_SINGLEBLOCK:
1522 ret = -EIO;
1523 if (data > _NSIG)
1524 goto out_tsk;
1525
1526 clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
1527 if (request == PTRACE_SINGLESTEP) {
1528 ia64_psr(pt)->ss = 1;
1529 } else {
1530 ia64_psr(pt)->tb = 1;
1531 }
1532 child->exit_code = data;
1533
1534 /* give it a chance to run. */
1535 wake_up_process(child);
1536 ret = 0;
1537 goto out_tsk;
1538
1539 case PTRACE_DETACH:
1540 /* detach a process that was attached. */
1541 ret = ptrace_detach(child, data);
1542 goto out_tsk;
1543
1544 case PTRACE_GETREGS:
1545 ret = ptrace_getregs(child,
1546 (struct pt_all_user_regs __user *) data);
1547 goto out_tsk;
1548
1549 case PTRACE_SETREGS:
1550 ret = ptrace_setregs(child,
1551 (struct pt_all_user_regs __user *) data);
1552 goto out_tsk;
1553
1554 default:
1555 ret = ptrace_request(child, request, addr, data);
1556 goto out_tsk;
1557 }
1558 out_tsk:
1559 put_task_struct(child);
1560 out:
1561 unlock_kernel();
1562 return ret;
1563}
1564
1565
1566void
1567syscall_trace (void)
1568{
1569 if (!test_thread_flag(TIF_SYSCALL_TRACE))
1570 return;
1571 if (!(current->ptrace & PT_PTRACED))
1572 return;
1573 /*
1574 * The 0x80 provides a way for the tracing parent to
1575 * distinguish between a syscall stop and SIGTRAP delivery.
1576 */
1577 ptrace_notify(SIGTRAP
1578 | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0));
1579
1580 /*
1581 * This isn't the same as continuing with a signal, but it
1582 * will do for normal use. strace only continues with a
1583 * signal if the stopping signal is not SIGTRAP. -brl
1584 */
1585 if (current->exit_code) {
1586 send_sig(current->exit_code, current, 1);
1587 current->exit_code = 0;
1588 }
1589}
1590
1591/* "asmlinkage" so the input arguments are preserved... */
1592
1593asmlinkage void
1594syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
1595 long arg4, long arg5, long arg6, long arg7,
1596 struct pt_regs regs)
1597{
1598 long syscall;
1599
1600 if (unlikely(current->audit_context)) {
1601 if (IS_IA32_PROCESS(&regs))
1602 syscall = regs.r1;
1603 else
1604 syscall = regs.r15;
1605
1606 audit_syscall_entry(current, syscall, arg0, arg1, arg2, arg3);
1607 }
1608
1609 if (test_thread_flag(TIF_SYSCALL_TRACE)
1610 && (current->ptrace & PT_PTRACED))
1611 syscall_trace();
1612}
1613
1614/* "asmlinkage" so the input arguments are preserved... */
1615
1616asmlinkage void
1617syscall_trace_leave (long arg0, long arg1, long arg2, long arg3,
1618 long arg4, long arg5, long arg6, long arg7,
1619 struct pt_regs regs)
1620{
1621 if (unlikely(current->audit_context))
1622 audit_syscall_exit(current, regs.r8);
1623
1624 if (test_thread_flag(TIF_SYSCALL_TRACE)
1625 && (current->ptrace & PT_PTRACED))
1626 syscall_trace();
1627}
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
new file mode 100644
index 000000000000..acc0f132f86c
--- /dev/null
+++ b/arch/ia64/kernel/sal.c
@@ -0,0 +1,302 @@
1/*
2 * System Abstraction Layer (SAL) interface routines.
3 *
4 * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 * Copyright (C) 1999 VA Linux Systems
7 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
8 */
9#include <linux/config.h>
10
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/module.h>
14#include <linux/spinlock.h>
15#include <linux/string.h>
16
17#include <asm/page.h>
18#include <asm/sal.h>
19#include <asm/pal.h>
20
21 __cacheline_aligned DEFINE_SPINLOCK(sal_lock);
22unsigned long sal_platform_features;
23
24unsigned short sal_revision;
25unsigned short sal_version;
26
27#define SAL_MAJOR(x) ((x) >> 8)
28#define SAL_MINOR(x) ((x) & 0xff)
29
30static struct {
31 void *addr; /* function entry point */
32 void *gpval; /* gp value to use */
33} pdesc;
34
35static long
36default_handler (void)
37{
38 return -1;
39}
40
41ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler;
42ia64_sal_desc_ptc_t *ia64_ptc_domain_info;
43
44const char *
45ia64_sal_strerror (long status)
46{
47 const char *str;
48 switch (status) {
49 case 0: str = "Call completed without error"; break;
50 case 1: str = "Effect a warm boot of the system to complete "
51 "the update"; break;
52 case -1: str = "Not implemented"; break;
53 case -2: str = "Invalid argument"; break;
54 case -3: str = "Call completed with error"; break;
55 case -4: str = "Virtual address not registered"; break;
56 case -5: str = "No information available"; break;
57 case -6: str = "Insufficient space to add the entry"; break;
58 case -7: str = "Invalid entry_addr value"; break;
59 case -8: str = "Invalid interrupt vector"; break;
60 case -9: str = "Requested memory not available"; break;
61 case -10: str = "Unable to write to the NVM device"; break;
62 case -11: str = "Invalid partition type specified"; break;
63 case -12: str = "Invalid NVM_Object id specified"; break;
64 case -13: str = "NVM_Object already has the maximum number "
65 "of partitions"; break;
66 case -14: str = "Insufficient space in partition for the "
67 "requested write sub-function"; break;
68 case -15: str = "Insufficient data buffer space for the "
69 "requested read record sub-function"; break;
70 case -16: str = "Scratch buffer required for the write/delete "
71 "sub-function"; break;
72 case -17: str = "Insufficient space in the NVM_Object for the "
73 "requested create sub-function"; break;
74 case -18: str = "Invalid value specified in the partition_rec "
75 "argument"; break;
76 case -19: str = "Record oriented I/O not supported for this "
77 "partition"; break;
78 case -20: str = "Bad format of record to be written or "
79 "required keyword variable not "
80 "specified"; break;
81 default: str = "Unknown SAL status code"; break;
82 }
83 return str;
84}
85
86void __init
87ia64_sal_handler_init (void *entry_point, void *gpval)
88{
89 /* fill in the SAL procedure descriptor and point ia64_sal to it: */
90 pdesc.addr = entry_point;
91 pdesc.gpval = gpval;
92 ia64_sal = (ia64_sal_handler) &pdesc;
93}
94
95static void __init
96check_versions (struct ia64_sal_systab *systab)
97{
98 sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor;
99 sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor;
100
101 /* Check for broken firmware */
102 if ((sal_revision == SAL_VERSION_CODE(49, 29))
103 && (sal_version == SAL_VERSION_CODE(49, 29)))
104 {
105 /*
106 * Old firmware for zx2000 prototypes have this weird version number,
107 * reset it to something sane.
108 */
109 sal_revision = SAL_VERSION_CODE(2, 8);
110 sal_version = SAL_VERSION_CODE(0, 0);
111 }
112}
113
114static void __init
115sal_desc_entry_point (void *p)
116{
117 struct ia64_sal_desc_entry_point *ep = p;
118 ia64_pal_handler_init(__va(ep->pal_proc));
119 ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp));
120}
121
122#ifdef CONFIG_SMP
123static void __init
124set_smp_redirect (int flag)
125{
126#ifndef CONFIG_HOTPLUG_CPU
127 if (no_int_routing)
128 smp_int_redirect &= ~flag;
129 else
130 smp_int_redirect |= flag;
131#else
132 /*
133 * For CPU Hotplug we dont want to do any chipset supported
134 * interrupt redirection. The reason is this would require that
135 * All interrupts be stopped and hard bind the irq to a cpu.
136 * Later when the interrupt is fired we need to set the redir hint
137 * on again in the vector. This is combersome for something that the
138 * user mode irq balancer will solve anyways.
139 */
140 no_int_routing=1;
141 smp_int_redirect &= ~flag;
142#endif
143}
144#else
145#define set_smp_redirect(flag) do { } while (0)
146#endif
147
148static void __init
149sal_desc_platform_feature (void *p)
150{
151 struct ia64_sal_desc_platform_feature *pf = p;
152 sal_platform_features = pf->feature_mask;
153
154 printk(KERN_INFO "SAL Platform features:");
155 if (!sal_platform_features) {
156 printk(" None\n");
157 return;
158 }
159
160 if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK)
161 printk(" BusLock");
162 if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) {
163 printk(" IRQ_Redirection");
164 set_smp_redirect(SMP_IRQ_REDIRECTION);
165 }
166 if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) {
167 printk(" IPI_Redirection");
168 set_smp_redirect(SMP_IPI_REDIRECTION);
169 }
170 if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)
171 printk(" ITC_Drift");
172 printk("\n");
173}
174
175#ifdef CONFIG_SMP
176static void __init
177sal_desc_ap_wakeup (void *p)
178{
179 struct ia64_sal_desc_ap_wakeup *ap = p;
180
181 switch (ap->mechanism) {
182 case IA64_SAL_AP_EXTERNAL_INT:
183 ap_wakeup_vector = ap->vector;
184 printk(KERN_INFO "SAL: AP wakeup using external interrupt "
185 "vector 0x%lx\n", ap_wakeup_vector);
186 break;
187 default:
188 printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n");
189 break;
190 }
191}
192
193static void __init
194chk_nointroute_opt(void)
195{
196 char *cp;
197 extern char saved_command_line[];
198
199 for (cp = saved_command_line; *cp; ) {
200 if (memcmp(cp, "nointroute", 10) == 0) {
201 no_int_routing = 1;
202 printk ("no_int_routing on\n");
203 break;
204 } else {
205 while (*cp != ' ' && *cp)
206 ++cp;
207 while (*cp == ' ')
208 ++cp;
209 }
210 }
211}
212
213#else
214static void __init sal_desc_ap_wakeup(void *p) { }
215#endif
216
217void __init
218ia64_sal_init (struct ia64_sal_systab *systab)
219{
220 char *p;
221 int i;
222
223 if (!systab) {
224 printk(KERN_WARNING "Hmm, no SAL System Table.\n");
225 return;
226 }
227
228 if (strncmp(systab->signature, "SST_", 4) != 0)
229 printk(KERN_ERR "bad signature in system table!");
230
231 check_versions(systab);
232#ifdef CONFIG_SMP
233 chk_nointroute_opt();
234#endif
235
236 /* revisions are coded in BCD, so %x does the job for us */
237 printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n",
238 SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision),
239 systab->oem_id, systab->product_id,
240 systab->product_id[0] ? " " : "",
241 SAL_MAJOR(sal_version), SAL_MINOR(sal_version));
242
243 p = (char *) (systab + 1);
244 for (i = 0; i < systab->entry_count; i++) {
245 /*
246 * The first byte of each entry type contains the type
247 * descriptor.
248 */
249 switch (*p) {
250 case SAL_DESC_ENTRY_POINT:
251 sal_desc_entry_point(p);
252 break;
253 case SAL_DESC_PLATFORM_FEATURE:
254 sal_desc_platform_feature(p);
255 break;
256 case SAL_DESC_PTC:
257 ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p;
258 break;
259 case SAL_DESC_AP_WAKEUP:
260 sal_desc_ap_wakeup(p);
261 break;
262 }
263 p += SAL_DESC_SIZE(*p);
264 }
265}
266
267int
268ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
269 u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7)
270{
271 if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
272 return -1;
273 SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7);
274 return 0;
275}
276EXPORT_SYMBOL(ia64_sal_oemcall);
277
278int
279ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1,
280 u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6,
281 u64 arg7)
282{
283 if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
284 return -1;
285 SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
286 arg7);
287 return 0;
288}
289EXPORT_SYMBOL(ia64_sal_oemcall_nolock);
290
291int
292ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc,
293 u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5,
294 u64 arg6, u64 arg7)
295{
296 if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX)
297 return -1;
298 SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6,
299 arg7);
300 return 0;
301}
302EXPORT_SYMBOL(ia64_sal_oemcall_reentrant);
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c
new file mode 100644
index 000000000000..d227fabecd02
--- /dev/null
+++ b/arch/ia64/kernel/salinfo.c
@@ -0,0 +1,629 @@
1/*
2 * salinfo.c
3 *
4 * Creates entries in /proc/sal for various system features.
5 *
6 * Copyright (c) 2003 Silicon Graphics, Inc. All rights reserved.
7 * Copyright (c) 2003 Hewlett-Packard Co
8 * Bjorn Helgaas <bjorn.helgaas@hp.com>
9 *
10 * 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo
11 * code to create this file
12 * Oct 23 2003 kaos@sgi.com
13 * Replace IPI with set_cpus_allowed() to read a record from the required cpu.
14 * Redesign salinfo log processing to separate interrupt and user space
15 * contexts.
16 * Cache the record across multi-block reads from user space.
17 * Support > 64 cpus.
18 * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module.
19 *
20 * Jan 28 2004 kaos@sgi.com
21 * Periodically check for outstanding MCA or INIT records.
22 *
23 * Dec 5 2004 kaos@sgi.com
24 * Standardize which records are cleared automatically.
25 */
26
27#include <linux/types.h>
28#include <linux/proc_fs.h>
29#include <linux/module.h>
30#include <linux/smp.h>
31#include <linux/smp_lock.h>
32#include <linux/timer.h>
33#include <linux/vmalloc.h>
34
35#include <asm/semaphore.h>
36#include <asm/sal.h>
37#include <asm/uaccess.h>
38
39MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>");
40MODULE_DESCRIPTION("/proc interface to IA-64 SAL features");
41MODULE_LICENSE("GPL");
42
43static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data);
44
45typedef struct {
46 const char *name; /* name of the proc entry */
47 unsigned long feature; /* feature bit */
48 struct proc_dir_entry *entry; /* registered entry (removal) */
49} salinfo_entry_t;
50
51/*
52 * List {name,feature} pairs for every entry in /proc/sal/<feature>
53 * that this module exports
54 */
55static salinfo_entry_t salinfo_entries[]={
56 { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, },
57 { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, },
58 { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, },
59 { "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, },
60};
61
62#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries)
63
64static char *salinfo_log_name[] = {
65 "mca",
66 "init",
67 "cmc",
68 "cpe",
69};
70
71static struct proc_dir_entry *salinfo_proc_entries[
72 ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */
73 ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */
74 (2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */
75 1]; /* /proc/sal */
76
77/* Some records we get ourselves, some are accessed as saved data in buffers
78 * that are owned by mca.c.
79 */
80struct salinfo_data_saved {
81 u8* buffer;
82 u64 size;
83 u64 id;
84 int cpu;
85};
86
87/* State transitions. Actions are :-
88 * Write "read <cpunum>" to the data file.
89 * Write "clear <cpunum>" to the data file.
90 * Write "oemdata <cpunum> <offset> to the data file.
91 * Read from the data file.
92 * Close the data file.
93 *
94 * Start state is NO_DATA.
95 *
96 * NO_DATA
97 * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
98 * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
99 * write "oemdata <cpunum> <offset> -> return -EINVAL.
100 * read data -> return EOF.
101 * close -> unchanged. Free record areas.
102 *
103 * LOG_RECORD
104 * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
105 * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
106 * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
107 * read data -> return the INIT/MCA/CMC/CPE record.
108 * close -> unchanged. Keep record areas.
109 *
110 * OEMDATA
111 * write "read <cpunum>" -> NO_DATA or LOG_RECORD.
112 * write "clear <cpunum>" -> NO_DATA or LOG_RECORD.
113 * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA.
114 * read data -> return the formatted oemdata.
115 * close -> unchanged. Keep record areas.
116 *
117 * Closing the data file does not change the state. This allows shell scripts
118 * to manipulate salinfo data, each shell redirection opens the file, does one
119 * action then closes it again. The record areas are only freed at close when
120 * the state is NO_DATA.
121 */
122enum salinfo_state {
123 STATE_NO_DATA,
124 STATE_LOG_RECORD,
125 STATE_OEMDATA,
126};
127
128struct salinfo_data {
129 volatile cpumask_t cpu_event; /* which cpus have outstanding events */
130 struct semaphore sem; /* count of cpus with outstanding events (bits set in cpu_event) */
131 u8 *log_buffer;
132 u64 log_size;
133 u8 *oemdata; /* decoded oem data */
134 u64 oemdata_size;
135 int open; /* single-open to prevent races */
136 u8 type;
137 u8 saved_num; /* using a saved record? */
138 enum salinfo_state state :8; /* processing state */
139 u8 padding;
140 int cpu_check; /* next CPU to check */
141 struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */
142};
143
144static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)];
145
146static spinlock_t data_lock, data_saved_lock;
147
148/** salinfo_platform_oemdata - optional callback to decode oemdata from an error
149 * record.
150 * @sect_header: pointer to the start of the section to decode.
151 * @oemdata: returns vmalloc area containing the decded output.
152 * @oemdata_size: returns length of decoded output (strlen).
153 *
154 * Description: If user space asks for oem data to be decoded by the kernel
155 * and/or prom and the platform has set salinfo_platform_oemdata to the address
156 * of a platform specific routine then call that routine. salinfo_platform_oemdata
157 * vmalloc's and formats its output area, returning the address of the text
158 * and its strlen. Returns 0 for success, -ve for error. The callback is
159 * invoked on the cpu that generated the error record.
160 */
161int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size);
162
163struct salinfo_platform_oemdata_parms {
164 const u8 *efi_guid;
165 u8 **oemdata;
166 u64 *oemdata_size;
167 int ret;
168};
169
170static void
171salinfo_platform_oemdata_cpu(void *context)
172{
173 struct salinfo_platform_oemdata_parms *parms = context;
174 parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size);
175}
176
177static void
178shift1_data_saved (struct salinfo_data *data, int shift)
179{
180 memcpy(data->data_saved+shift, data->data_saved+shift+1,
181 (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0]));
182 memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0,
183 sizeof(data->data_saved[0]));
184}
185
186/* This routine is invoked in interrupt context. Note: mca.c enables
187 * interrupts before calling this code for CMC/CPE. MCA and INIT events are
188 * not irq safe, do not call any routines that use spinlocks, they may deadlock.
189 * MCA and INIT records are recorded, a timer event will look for any
190 * outstanding events and wake up the user space code.
191 *
192 * The buffer passed from mca.c points to the output from ia64_log_get. This is
193 * a persistent buffer but its contents can change between the interrupt and
194 * when user space processes the record. Save the record id to identify
195 * changes.
196 */
197void
198salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe)
199{
200 struct salinfo_data *data = salinfo_data + type;
201 struct salinfo_data_saved *data_saved;
202 unsigned long flags = 0;
203 int i;
204 int saved_size = ARRAY_SIZE(data->data_saved);
205
206 BUG_ON(type >= ARRAY_SIZE(salinfo_log_name));
207
208 if (irqsafe)
209 spin_lock_irqsave(&data_saved_lock, flags);
210 for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
211 if (!data_saved->buffer)
212 break;
213 }
214 if (i == saved_size) {
215 if (!data->saved_num) {
216 shift1_data_saved(data, 0);
217 data_saved = data->data_saved + saved_size - 1;
218 } else
219 data_saved = NULL;
220 }
221 if (data_saved) {
222 data_saved->cpu = smp_processor_id();
223 data_saved->id = ((sal_log_record_header_t *)buffer)->id;
224 data_saved->size = size;
225 data_saved->buffer = buffer;
226 }
227 if (irqsafe)
228 spin_unlock_irqrestore(&data_saved_lock, flags);
229
230 if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) {
231 if (irqsafe)
232 up(&data->sem);
233 }
234}
235
236/* Check for outstanding MCA/INIT records every minute (arbitrary) */
237#define SALINFO_TIMER_DELAY (60*HZ)
238static struct timer_list salinfo_timer;
239
240static void
241salinfo_timeout_check(struct salinfo_data *data)
242{
243 int i;
244 if (!data->open)
245 return;
246 for (i = 0; i < NR_CPUS; ++i) {
247 if (test_bit(i, &data->cpu_event)) {
248 /* double up() is not a problem, user space will see no
249 * records for the additional "events".
250 */
251 up(&data->sem);
252 }
253 }
254}
255
256static void
257salinfo_timeout (unsigned long arg)
258{
259 salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA);
260 salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT);
261 salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
262 add_timer(&salinfo_timer);
263}
264
265static int
266salinfo_event_open(struct inode *inode, struct file *file)
267{
268 if (!capable(CAP_SYS_ADMIN))
269 return -EPERM;
270 return 0;
271}
272
273static ssize_t
274salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
275{
276 struct inode *inode = file->f_dentry->d_inode;
277 struct proc_dir_entry *entry = PDE(inode);
278 struct salinfo_data *data = entry->data;
279 char cmd[32];
280 size_t size;
281 int i, n, cpu = -1;
282
283retry:
284 if (down_trylock(&data->sem)) {
285 if (file->f_flags & O_NONBLOCK)
286 return -EAGAIN;
287 if (down_interruptible(&data->sem))
288 return -ERESTARTSYS;
289 }
290
291 n = data->cpu_check;
292 for (i = 0; i < NR_CPUS; i++) {
293 if (test_bit(n, &data->cpu_event)) {
294 cpu = n;
295 break;
296 }
297 if (++n == NR_CPUS)
298 n = 0;
299 }
300
301 if (cpu == -1)
302 goto retry;
303
304 /* events are sticky until the user says "clear" */
305 up(&data->sem);
306
307 /* for next read, start checking at next CPU */
308 data->cpu_check = cpu;
309 if (++data->cpu_check == NR_CPUS)
310 data->cpu_check = 0;
311
312 snprintf(cmd, sizeof(cmd), "read %d\n", cpu);
313
314 size = strlen(cmd);
315 if (size > count)
316 size = count;
317 if (copy_to_user(buffer, cmd, size))
318 return -EFAULT;
319
320 return size;
321}
322
323static struct file_operations salinfo_event_fops = {
324 .open = salinfo_event_open,
325 .read = salinfo_event_read,
326};
327
328static int
329salinfo_log_open(struct inode *inode, struct file *file)
330{
331 struct proc_dir_entry *entry = PDE(inode);
332 struct salinfo_data *data = entry->data;
333
334 if (!capable(CAP_SYS_ADMIN))
335 return -EPERM;
336
337 spin_lock(&data_lock);
338 if (data->open) {
339 spin_unlock(&data_lock);
340 return -EBUSY;
341 }
342 data->open = 1;
343 spin_unlock(&data_lock);
344
345 if (data->state == STATE_NO_DATA &&
346 !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) {
347 data->open = 0;
348 return -ENOMEM;
349 }
350
351 return 0;
352}
353
354static int
355salinfo_log_release(struct inode *inode, struct file *file)
356{
357 struct proc_dir_entry *entry = PDE(inode);
358 struct salinfo_data *data = entry->data;
359
360 if (data->state == STATE_NO_DATA) {
361 vfree(data->log_buffer);
362 vfree(data->oemdata);
363 data->log_buffer = NULL;
364 data->oemdata = NULL;
365 }
366 spin_lock(&data_lock);
367 data->open = 0;
368 spin_unlock(&data_lock);
369 return 0;
370}
371
372static void
373call_on_cpu(int cpu, void (*fn)(void *), void *arg)
374{
375 cpumask_t save_cpus_allowed, new_cpus_allowed;
376 memcpy(&save_cpus_allowed, &current->cpus_allowed, sizeof(save_cpus_allowed));
377 memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed));
378 set_bit(cpu, &new_cpus_allowed);
379 set_cpus_allowed(current, new_cpus_allowed);
380 (*fn)(arg);
381 set_cpus_allowed(current, save_cpus_allowed);
382}
383
384static void
385salinfo_log_read_cpu(void *context)
386{
387 struct salinfo_data *data = context;
388 sal_log_record_header_t *rh;
389 data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer);
390 rh = (sal_log_record_header_t *)(data->log_buffer);
391 /* Clear corrected errors as they are read from SAL */
392 if (rh->severity == sal_log_severity_corrected)
393 ia64_sal_clear_state_info(data->type);
394}
395
396static void
397salinfo_log_new_read(int cpu, struct salinfo_data *data)
398{
399 struct salinfo_data_saved *data_saved;
400 unsigned long flags;
401 int i;
402 int saved_size = ARRAY_SIZE(data->data_saved);
403
404 data->saved_num = 0;
405 spin_lock_irqsave(&data_saved_lock, flags);
406retry:
407 for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) {
408 if (data_saved->buffer && data_saved->cpu == cpu) {
409 sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer);
410 data->log_size = data_saved->size;
411 memcpy(data->log_buffer, rh, data->log_size);
412 barrier(); /* id check must not be moved */
413 if (rh->id == data_saved->id) {
414 data->saved_num = i+1;
415 break;
416 }
417 /* saved record changed by mca.c since interrupt, discard it */
418 shift1_data_saved(data, i);
419 goto retry;
420 }
421 }
422 spin_unlock_irqrestore(&data_saved_lock, flags);
423
424 if (!data->saved_num)
425 call_on_cpu(cpu, salinfo_log_read_cpu, data);
426 if (!data->log_size) {
427 data->state = STATE_NO_DATA;
428 clear_bit(cpu, &data->cpu_event);
429 } else {
430 data->state = STATE_LOG_RECORD;
431 }
432}
433
434static ssize_t
435salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos)
436{
437 struct inode *inode = file->f_dentry->d_inode;
438 struct proc_dir_entry *entry = PDE(inode);
439 struct salinfo_data *data = entry->data;
440 u8 *buf;
441 u64 bufsize;
442
443 if (data->state == STATE_LOG_RECORD) {
444 buf = data->log_buffer;
445 bufsize = data->log_size;
446 } else if (data->state == STATE_OEMDATA) {
447 buf = data->oemdata;
448 bufsize = data->oemdata_size;
449 } else {
450 buf = NULL;
451 bufsize = 0;
452 }
453 return simple_read_from_buffer(buffer, count, ppos, buf, bufsize);
454}
455
456static void
457salinfo_log_clear_cpu(void *context)
458{
459 struct salinfo_data *data = context;
460 ia64_sal_clear_state_info(data->type);
461}
462
463static int
464salinfo_log_clear(struct salinfo_data *data, int cpu)
465{
466 sal_log_record_header_t *rh;
467 data->state = STATE_NO_DATA;
468 if (!test_bit(cpu, &data->cpu_event))
469 return 0;
470 down(&data->sem);
471 clear_bit(cpu, &data->cpu_event);
472 if (data->saved_num) {
473 unsigned long flags;
474 spin_lock_irqsave(&data_saved_lock, flags);
475 shift1_data_saved(data, data->saved_num - 1 );
476 data->saved_num = 0;
477 spin_unlock_irqrestore(&data_saved_lock, flags);
478 }
479 rh = (sal_log_record_header_t *)(data->log_buffer);
480 /* Corrected errors have already been cleared from SAL */
481 if (rh->severity != sal_log_severity_corrected)
482 call_on_cpu(cpu, salinfo_log_clear_cpu, data);
483 /* clearing a record may make a new record visible */
484 salinfo_log_new_read(cpu, data);
485 if (data->state == STATE_LOG_RECORD &&
486 !test_and_set_bit(cpu, &data->cpu_event))
487 up(&data->sem);
488 return 0;
489}
490
491static ssize_t
492salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos)
493{
494 struct inode *inode = file->f_dentry->d_inode;
495 struct proc_dir_entry *entry = PDE(inode);
496 struct salinfo_data *data = entry->data;
497 char cmd[32];
498 size_t size;
499 u32 offset;
500 int cpu;
501
502 size = sizeof(cmd);
503 if (count < size)
504 size = count;
505 if (copy_from_user(cmd, buffer, size))
506 return -EFAULT;
507
508 if (sscanf(cmd, "read %d", &cpu) == 1) {
509 salinfo_log_new_read(cpu, data);
510 } else if (sscanf(cmd, "clear %d", &cpu) == 1) {
511 int ret;
512 if ((ret = salinfo_log_clear(data, cpu)))
513 count = ret;
514 } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) {
515 if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA)
516 return -EINVAL;
517 if (offset > data->log_size - sizeof(efi_guid_t))
518 return -EINVAL;
519 data->state = STATE_OEMDATA;
520 if (salinfo_platform_oemdata) {
521 struct salinfo_platform_oemdata_parms parms = {
522 .efi_guid = data->log_buffer + offset,
523 .oemdata = &data->oemdata,
524 .oemdata_size = &data->oemdata_size
525 };
526 call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms);
527 if (parms.ret)
528 count = parms.ret;
529 } else
530 data->oemdata_size = 0;
531 } else
532 return -EINVAL;
533
534 return count;
535}
536
537static struct file_operations salinfo_data_fops = {
538 .open = salinfo_log_open,
539 .release = salinfo_log_release,
540 .read = salinfo_log_read,
541 .write = salinfo_log_write,
542};
543
544static int __init
545salinfo_init(void)
546{
547 struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */
548 struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */
549 struct proc_dir_entry *dir, *entry;
550 struct salinfo_data *data;
551 int i, j, online;
552
553 salinfo_dir = proc_mkdir("sal", NULL);
554 if (!salinfo_dir)
555 return 0;
556
557 for (i=0; i < NR_SALINFO_ENTRIES; i++) {
558 /* pass the feature bit in question as misc data */
559 *sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir,
560 salinfo_read, (void *)salinfo_entries[i].feature);
561 }
562
563 for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) {
564 data = salinfo_data + i;
565 data->type = i;
566 sema_init(&data->sem, 0);
567 dir = proc_mkdir(salinfo_log_name[i], salinfo_dir);
568 if (!dir)
569 continue;
570
571 entry = create_proc_entry("event", S_IRUSR, dir);
572 if (!entry)
573 continue;
574 entry->data = data;
575 entry->proc_fops = &salinfo_event_fops;
576 *sdir++ = entry;
577
578 entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir);
579 if (!entry)
580 continue;
581 entry->data = data;
582 entry->proc_fops = &salinfo_data_fops;
583 *sdir++ = entry;
584
585 /* we missed any events before now */
586 online = 0;
587 for (j = 0; j < NR_CPUS; j++)
588 if (cpu_online(j)) {
589 set_bit(j, &data->cpu_event);
590 ++online;
591 }
592 sema_init(&data->sem, online);
593
594 *sdir++ = dir;
595 }
596
597 *sdir++ = salinfo_dir;
598
599 init_timer(&salinfo_timer);
600 salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY;
601 salinfo_timer.function = &salinfo_timeout;
602 add_timer(&salinfo_timer);
603
604 return 0;
605}
606
607/*
608 * 'data' contains an integer that corresponds to the feature we're
609 * testing
610 */
611static int
612salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data)
613{
614 int len = 0;
615
616 len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n");
617
618 if (len <= off+count) *eof = 1;
619
620 *start = page + off;
621 len -= off;
622
623 if (len>count) len = count;
624 if (len<0) len = 0;
625
626 return len;
627}
628
629module_init(salinfo_init);
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c
new file mode 100644
index 000000000000..2724ef3fbae2
--- /dev/null
+++ b/arch/ia64/kernel/semaphore.c
@@ -0,0 +1,165 @@
1/*
2 * IA-64 semaphore implementation (derived from x86 version).
3 *
4 * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7
8/*
9 * Semaphores are implemented using a two-way counter: The "count"
10 * variable is decremented for each process that tries to acquire the
11 * semaphore, while the "sleepers" variable is a count of such
12 * acquires.
13 *
14 * Notably, the inline "up()" and "down()" functions can efficiently
15 * test if they need to do any extra work (up needs to do something
16 * only if count was negative before the increment operation.
17 *
18 * "sleeping" and the contention routine ordering is protected
19 * by the spinlock in the semaphore's waitqueue head.
20 *
21 * Note that these functions are only called when there is contention
22 * on the lock, and as such all this is the "non-critical" part of the
23 * whole semaphore business. The critical part is the inline stuff in
24 * <asm/semaphore.h> where we want to avoid any extra jumps and calls.
25 */
26#include <linux/sched.h>
27#include <linux/init.h>
28
29#include <asm/errno.h>
30#include <asm/semaphore.h>
31
32/*
33 * Logic:
34 * - Only on a boundary condition do we need to care. When we go
35 * from a negative count to a non-negative, we wake people up.
36 * - When we go from a non-negative count to a negative do we
37 * (a) synchronize with the "sleepers" count and (b) make sure
38 * that we're on the wakeup list before we synchronize so that
39 * we cannot lose wakeup events.
40 */
41
42void
43__up (struct semaphore *sem)
44{
45 wake_up(&sem->wait);
46}
47
48void __sched __down (struct semaphore *sem)
49{
50 struct task_struct *tsk = current;
51 DECLARE_WAITQUEUE(wait, tsk);
52 unsigned long flags;
53
54 tsk->state = TASK_UNINTERRUPTIBLE;
55 spin_lock_irqsave(&sem->wait.lock, flags);
56 add_wait_queue_exclusive_locked(&sem->wait, &wait);
57
58 sem->sleepers++;
59 for (;;) {
60 int sleepers = sem->sleepers;
61
62 /*
63 * Add "everybody else" into it. They aren't
64 * playing, because we own the spinlock in
65 * the wait_queue_head.
66 */
67 if (!atomic_add_negative(sleepers - 1, &sem->count)) {
68 sem->sleepers = 0;
69 break;
70 }
71 sem->sleepers = 1; /* us - see -1 above */
72 spin_unlock_irqrestore(&sem->wait.lock, flags);
73
74 schedule();
75
76 spin_lock_irqsave(&sem->wait.lock, flags);
77 tsk->state = TASK_UNINTERRUPTIBLE;
78 }
79 remove_wait_queue_locked(&sem->wait, &wait);
80 wake_up_locked(&sem->wait);
81 spin_unlock_irqrestore(&sem->wait.lock, flags);
82 tsk->state = TASK_RUNNING;
83}
84
85int __sched __down_interruptible (struct semaphore * sem)
86{
87 int retval = 0;
88 struct task_struct *tsk = current;
89 DECLARE_WAITQUEUE(wait, tsk);
90 unsigned long flags;
91
92 tsk->state = TASK_INTERRUPTIBLE;
93 spin_lock_irqsave(&sem->wait.lock, flags);
94 add_wait_queue_exclusive_locked(&sem->wait, &wait);
95
96 sem->sleepers ++;
97 for (;;) {
98 int sleepers = sem->sleepers;
99
100 /*
101 * With signals pending, this turns into
102 * the trylock failure case - we won't be
103 * sleeping, and we* can't get the lock as
104 * it has contention. Just correct the count
105 * and exit.
106 */
107 if (signal_pending(current)) {
108 retval = -EINTR;
109 sem->sleepers = 0;
110 atomic_add(sleepers, &sem->count);
111 break;
112 }
113
114 /*
115 * Add "everybody else" into it. They aren't
116 * playing, because we own the spinlock in
117 * wait_queue_head. The "-1" is because we're
118 * still hoping to get the semaphore.
119 */
120 if (!atomic_add_negative(sleepers - 1, &sem->count)) {
121 sem->sleepers = 0;
122 break;
123 }
124 sem->sleepers = 1; /* us - see -1 above */
125 spin_unlock_irqrestore(&sem->wait.lock, flags);
126
127 schedule();
128
129 spin_lock_irqsave(&sem->wait.lock, flags);
130 tsk->state = TASK_INTERRUPTIBLE;
131 }
132 remove_wait_queue_locked(&sem->wait, &wait);
133 wake_up_locked(&sem->wait);
134 spin_unlock_irqrestore(&sem->wait.lock, flags);
135
136 tsk->state = TASK_RUNNING;
137 return retval;
138}
139
140/*
141 * Trylock failed - make sure we correct for having decremented the
142 * count.
143 */
144int
145__down_trylock (struct semaphore *sem)
146{
147 unsigned long flags;
148 int sleepers;
149
150 spin_lock_irqsave(&sem->wait.lock, flags);
151 sleepers = sem->sleepers + 1;
152 sem->sleepers = 0;
153
154 /*
155 * Add "everybody else" and us into it. They aren't
156 * playing, because we own the spinlock in the
157 * wait_queue_head.
158 */
159 if (!atomic_add_negative(sleepers, &sem->count)) {
160 wake_up_locked(&sem->wait);
161 }
162
163 spin_unlock_irqrestore(&sem->wait.lock, flags);
164 return 1;
165}
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
new file mode 100644
index 000000000000..f05650c801d2
--- /dev/null
+++ b/arch/ia64/kernel/setup.c
@@ -0,0 +1,723 @@
1/*
2 * Architecture-specific setup.
3 *
4 * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 * Stephane Eranian <eranian@hpl.hp.com>
7 * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
8 * Copyright (C) 1999 VA Linux Systems
9 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
10 *
11 * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo().
12 * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map
13 * 03/31/00 R.Seth cpu_initialized and current->processor fixes
14 * 02/04/00 D.Mosberger some more get_cpuinfo fixes...
15 * 02/01/00 R.Seth fixed get_cpuinfo for SMP
16 * 01/07/99 S.Eranian added the support for command line argument
17 * 06/24/99 W.Drummond added boot_cpu_data.
18 */
19#include <linux/config.h>
20#include <linux/module.h>
21#include <linux/init.h>
22
23#include <linux/acpi.h>
24#include <linux/bootmem.h>
25#include <linux/console.h>
26#include <linux/delay.h>
27#include <linux/kernel.h>
28#include <linux/reboot.h>
29#include <linux/sched.h>
30#include <linux/seq_file.h>
31#include <linux/string.h>
32#include <linux/threads.h>
33#include <linux/tty.h>
34#include <linux/serial.h>
35#include <linux/serial_core.h>
36#include <linux/efi.h>
37#include <linux/initrd.h>
38
39#include <asm/ia32.h>
40#include <asm/machvec.h>
41#include <asm/mca.h>
42#include <asm/meminit.h>
43#include <asm/page.h>
44#include <asm/patch.h>
45#include <asm/pgtable.h>
46#include <asm/processor.h>
47#include <asm/sal.h>
48#include <asm/sections.h>
49#include <asm/serial.h>
50#include <asm/setup.h>
51#include <asm/smp.h>
52#include <asm/system.h>
53#include <asm/unistd.h>
54
55#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE)
56# error "struct cpuinfo_ia64 too big!"
57#endif
58
59#ifdef CONFIG_SMP
60unsigned long __per_cpu_offset[NR_CPUS];
61EXPORT_SYMBOL(__per_cpu_offset);
62#endif
63
64DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info);
65DEFINE_PER_CPU(unsigned long, local_per_cpu_offset);
66DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8);
67unsigned long ia64_cycles_per_usec;
68struct ia64_boot_param *ia64_boot_param;
69struct screen_info screen_info;
70
71unsigned long ia64_max_cacheline_size;
72unsigned long ia64_iobase; /* virtual address for I/O accesses */
73EXPORT_SYMBOL(ia64_iobase);
74struct io_space io_space[MAX_IO_SPACES];
75EXPORT_SYMBOL(io_space);
76unsigned int num_io_spaces;
77
78/*
79 * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This
80 * mask specifies a mask of address bits that must be 0 in order for two buffers to be
81 * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start
82 * address of the second buffer must be aligned to (merge_mask+1) in order to be
83 * mergeable). By default, we assume there is no I/O MMU which can merge physically
84 * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu
85 * page-size of 2^64.
86 */
87unsigned long ia64_max_iommu_merge_mask = ~0UL;
88EXPORT_SYMBOL(ia64_max_iommu_merge_mask);
89
90/*
91 * We use a special marker for the end of memory and it uses the extra (+1) slot
92 */
93struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1];
94int num_rsvd_regions;
95
96
97/*
98 * Filter incoming memory segments based on the primitive map created from the boot
99 * parameters. Segments contained in the map are removed from the memory ranges. A
100 * caller-specified function is called with the memory ranges that remain after filtering.
101 * This routine does not assume the incoming segments are sorted.
102 */
103int
104filter_rsvd_memory (unsigned long start, unsigned long end, void *arg)
105{
106 unsigned long range_start, range_end, prev_start;
107 void (*func)(unsigned long, unsigned long, int);
108 int i;
109
110#if IGNORE_PFN0
111 if (start == PAGE_OFFSET) {
112 printk(KERN_WARNING "warning: skipping physical page 0\n");
113 start += PAGE_SIZE;
114 if (start >= end) return 0;
115 }
116#endif
117 /*
118 * lowest possible address(walker uses virtual)
119 */
120 prev_start = PAGE_OFFSET;
121 func = arg;
122
123 for (i = 0; i < num_rsvd_regions; ++i) {
124 range_start = max(start, prev_start);
125 range_end = min(end, rsvd_region[i].start);
126
127 if (range_start < range_end)
128 call_pernode_memory(__pa(range_start), range_end - range_start, func);
129
130 /* nothing more available in this segment */
131 if (range_end == end) return 0;
132
133 prev_start = rsvd_region[i].end;
134 }
135 /* end of memory marker allows full processing inside loop body */
136 return 0;
137}
138
139static void
140sort_regions (struct rsvd_region *rsvd_region, int max)
141{
142 int j;
143
144 /* simple bubble sorting */
145 while (max--) {
146 for (j = 0; j < max; ++j) {
147 if (rsvd_region[j].start > rsvd_region[j+1].start) {
148 struct rsvd_region tmp;
149 tmp = rsvd_region[j];
150 rsvd_region[j] = rsvd_region[j + 1];
151 rsvd_region[j + 1] = tmp;
152 }
153 }
154 }
155}
156
157/**
158 * reserve_memory - setup reserved memory areas
159 *
160 * Setup the reserved memory areas set aside for the boot parameters,
161 * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined,
162 * see include/asm-ia64/meminit.h if you need to define more.
163 */
164void
165reserve_memory (void)
166{
167 int n = 0;
168
169 /*
170 * none of the entries in this table overlap
171 */
172 rsvd_region[n].start = (unsigned long) ia64_boot_param;
173 rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param);
174 n++;
175
176 rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap);
177 rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size;
178 n++;
179
180 rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line);
181 rsvd_region[n].end = (rsvd_region[n].start
182 + strlen(__va(ia64_boot_param->command_line)) + 1);
183 n++;
184
185 rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START);
186 rsvd_region[n].end = (unsigned long) ia64_imva(_end);
187 n++;
188
189#ifdef CONFIG_BLK_DEV_INITRD
190 if (ia64_boot_param->initrd_start) {
191 rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start);
192 rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size;
193 n++;
194 }
195#endif
196
197 /* end of memory marker */
198 rsvd_region[n].start = ~0UL;
199 rsvd_region[n].end = ~0UL;
200 n++;
201
202 num_rsvd_regions = n;
203
204 sort_regions(rsvd_region, num_rsvd_regions);
205}
206
207/**
208 * find_initrd - get initrd parameters from the boot parameter structure
209 *
210 * Grab the initrd start and end from the boot parameter struct given us by
211 * the boot loader.
212 */
213void
214find_initrd (void)
215{
216#ifdef CONFIG_BLK_DEV_INITRD
217 if (ia64_boot_param->initrd_start) {
218 initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start);
219 initrd_end = initrd_start+ia64_boot_param->initrd_size;
220
221 printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n",
222 initrd_start, ia64_boot_param->initrd_size);
223 }
224#endif
225}
226
227static void __init
228io_port_init (void)
229{
230 extern unsigned long ia64_iobase;
231 unsigned long phys_iobase;
232
233 /*
234 * Set `iobase' to the appropriate address in region 6 (uncached access range).
235 *
236 * The EFI memory map is the "preferred" location to get the I/O port space base,
237 * rather the relying on AR.KR0. This should become more clear in future SAL
238 * specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is
239 * found in the memory map.
240 */
241 phys_iobase = efi_get_iobase();
242 if (phys_iobase)
243 /* set AR.KR0 since this is all we use it for anyway */
244 ia64_set_kr(IA64_KR_IO_BASE, phys_iobase);
245 else {
246 phys_iobase = ia64_get_kr(IA64_KR_IO_BASE);
247 printk(KERN_INFO "No I/O port range found in EFI memory map, falling back "
248 "to AR.KR0\n");
249 printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase);
250 }
251 ia64_iobase = (unsigned long) ioremap(phys_iobase, 0);
252
253 /* setup legacy IO port space */
254 io_space[0].mmio_base = ia64_iobase;
255 io_space[0].sparse = 1;
256 num_io_spaces = 1;
257}
258
259/**
260 * early_console_setup - setup debugging console
261 *
262 * Consoles started here require little enough setup that we can start using
263 * them very early in the boot process, either right after the machine
264 * vector initialization, or even before if the drivers can detect their hw.
265 *
266 * Returns non-zero if a console couldn't be setup.
267 */
268static inline int __init
269early_console_setup (char *cmdline)
270{
271#ifdef CONFIG_SERIAL_SGI_L1_CONSOLE
272 {
273 extern int sn_serial_console_early_setup(void);
274 if (!sn_serial_console_early_setup())
275 return 0;
276 }
277#endif
278#ifdef CONFIG_EFI_PCDP
279 if (!efi_setup_pcdp_console(cmdline))
280 return 0;
281#endif
282#ifdef CONFIG_SERIAL_8250_CONSOLE
283 if (!early_serial_console_init(cmdline))
284 return 0;
285#endif
286
287 return -1;
288}
289
290static inline void
291mark_bsp_online (void)
292{
293#ifdef CONFIG_SMP
294 /* If we register an early console, allow CPU 0 to printk */
295 cpu_set(smp_processor_id(), cpu_online_map);
296#endif
297}
298
299void __init
300setup_arch (char **cmdline_p)
301{
302 unw_init();
303
304 ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist);
305
306 *cmdline_p = __va(ia64_boot_param->command_line);
307 strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE);
308
309 efi_init();
310 io_port_init();
311
312#ifdef CONFIG_IA64_GENERIC
313 {
314 const char *mvec_name = strstr (*cmdline_p, "machvec=");
315 char str[64];
316
317 if (mvec_name) {
318 const char *end;
319 size_t len;
320
321 mvec_name += 8;
322 end = strchr (mvec_name, ' ');
323 if (end)
324 len = end - mvec_name;
325 else
326 len = strlen (mvec_name);
327 len = min(len, sizeof (str) - 1);
328 strncpy (str, mvec_name, len);
329 str[len] = '\0';
330 mvec_name = str;
331 } else
332 mvec_name = acpi_get_sysname();
333 machvec_init(mvec_name);
334 }
335#endif
336
337 if (early_console_setup(*cmdline_p) == 0)
338 mark_bsp_online();
339
340#ifdef CONFIG_ACPI_BOOT
341 /* Initialize the ACPI boot-time table parser */
342 acpi_table_init();
343# ifdef CONFIG_ACPI_NUMA
344 acpi_numa_init();
345# endif
346#else
347# ifdef CONFIG_SMP
348 smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */
349# endif
350#endif /* CONFIG_APCI_BOOT */
351
352 find_memory();
353
354 /* process SAL system table: */
355 ia64_sal_init(efi.sal_systab);
356
357#ifdef CONFIG_SMP
358 cpu_physical_id(0) = hard_smp_processor_id();
359#endif
360
361 cpu_init(); /* initialize the bootstrap CPU */
362
363#ifdef CONFIG_ACPI_BOOT
364 acpi_boot_init();
365#endif
366
367#ifdef CONFIG_VT
368 if (!conswitchp) {
369# if defined(CONFIG_DUMMY_CONSOLE)
370 conswitchp = &dummy_con;
371# endif
372# if defined(CONFIG_VGA_CONSOLE)
373 /*
374 * Non-legacy systems may route legacy VGA MMIO range to system
375 * memory. vga_con probes the MMIO hole, so memory looks like
376 * a VGA device to it. The EFI memory map can tell us if it's
377 * memory so we can avoid this problem.
378 */
379 if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY)
380 conswitchp = &vga_con;
381# endif
382 }
383#endif
384
385 /* enable IA-64 Machine Check Abort Handling unless disabled */
386 if (!strstr(saved_command_line, "nomca"))
387 ia64_mca_init();
388
389 platform_setup(cmdline_p);
390 paging_init();
391}
392
393/*
394 * Display cpu info for all cpu's.
395 */
396static int
397show_cpuinfo (struct seq_file *m, void *v)
398{
399#ifdef CONFIG_SMP
400# define lpj c->loops_per_jiffy
401# define cpunum c->cpu
402#else
403# define lpj loops_per_jiffy
404# define cpunum 0
405#endif
406 static struct {
407 unsigned long mask;
408 const char *feature_name;
409 } feature_bits[] = {
410 { 1UL << 0, "branchlong" },
411 { 1UL << 1, "spontaneous deferral"},
412 { 1UL << 2, "16-byte atomic ops" }
413 };
414 char family[32], features[128], *cp, sep;
415 struct cpuinfo_ia64 *c = v;
416 unsigned long mask;
417 int i;
418
419 mask = c->features;
420
421 switch (c->family) {
422 case 0x07: memcpy(family, "Itanium", 8); break;
423 case 0x1f: memcpy(family, "Itanium 2", 10); break;
424 default: sprintf(family, "%u", c->family); break;
425 }
426
427 /* build the feature string: */
428 memcpy(features, " standard", 10);
429 cp = features;
430 sep = 0;
431 for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) {
432 if (mask & feature_bits[i].mask) {
433 if (sep)
434 *cp++ = sep;
435 sep = ',';
436 *cp++ = ' ';
437 strcpy(cp, feature_bits[i].feature_name);
438 cp += strlen(feature_bits[i].feature_name);
439 mask &= ~feature_bits[i].mask;
440 }
441 }
442 if (mask) {
443 /* print unknown features as a hex value: */
444 if (sep)
445 *cp++ = sep;
446 sprintf(cp, " 0x%lx", mask);
447 }
448
449 seq_printf(m,
450 "processor : %d\n"
451 "vendor : %s\n"
452 "arch : IA-64\n"
453 "family : %s\n"
454 "model : %u\n"
455 "revision : %u\n"
456 "archrev : %u\n"
457 "features :%s\n" /* don't change this---it _is_ right! */
458 "cpu number : %lu\n"
459 "cpu regs : %u\n"
460 "cpu MHz : %lu.%06lu\n"
461 "itc MHz : %lu.%06lu\n"
462 "BogoMIPS : %lu.%02lu\n\n",
463 cpunum, c->vendor, family, c->model, c->revision, c->archrev,
464 features, c->ppn, c->number,
465 c->proc_freq / 1000000, c->proc_freq % 1000000,
466 c->itc_freq / 1000000, c->itc_freq % 1000000,
467 lpj*HZ/500000, (lpj*HZ/5000) % 100);
468 return 0;
469}
470
471static void *
472c_start (struct seq_file *m, loff_t *pos)
473{
474#ifdef CONFIG_SMP
475 while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map))
476 ++*pos;
477#endif
478 return *pos < NR_CPUS ? cpu_data(*pos) : NULL;
479}
480
481static void *
482c_next (struct seq_file *m, void *v, loff_t *pos)
483{
484 ++*pos;
485 return c_start(m, pos);
486}
487
488static void
489c_stop (struct seq_file *m, void *v)
490{
491}
492
493struct seq_operations cpuinfo_op = {
494 .start = c_start,
495 .next = c_next,
496 .stop = c_stop,
497 .show = show_cpuinfo
498};
499
500void
501identify_cpu (struct cpuinfo_ia64 *c)
502{
503 union {
504 unsigned long bits[5];
505 struct {
506 /* id 0 & 1: */
507 char vendor[16];
508
509 /* id 2 */
510 u64 ppn; /* processor serial number */
511
512 /* id 3: */
513 unsigned number : 8;
514 unsigned revision : 8;
515 unsigned model : 8;
516 unsigned family : 8;
517 unsigned archrev : 8;
518 unsigned reserved : 24;
519
520 /* id 4: */
521 u64 features;
522 } field;
523 } cpuid;
524 pal_vm_info_1_u_t vm1;
525 pal_vm_info_2_u_t vm2;
526 pal_status_t status;
527 unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */
528 int i;
529
530 for (i = 0; i < 5; ++i)
531 cpuid.bits[i] = ia64_get_cpuid(i);
532
533 memcpy(c->vendor, cpuid.field.vendor, 16);
534#ifdef CONFIG_SMP
535 c->cpu = smp_processor_id();
536#endif
537 c->ppn = cpuid.field.ppn;
538 c->number = cpuid.field.number;
539 c->revision = cpuid.field.revision;
540 c->model = cpuid.field.model;
541 c->family = cpuid.field.family;
542 c->archrev = cpuid.field.archrev;
543 c->features = cpuid.field.features;
544
545 status = ia64_pal_vm_summary(&vm1, &vm2);
546 if (status == PAL_STATUS_SUCCESS) {
547 impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb;
548 phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size;
549 }
550 c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1));
551 c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1));
552}
553
554void
555setup_per_cpu_areas (void)
556{
557 /* start_kernel() requires this... */
558}
559
560static void
561get_max_cacheline_size (void)
562{
563 unsigned long line_size, max = 1;
564 u64 l, levels, unique_caches;
565 pal_cache_config_info_t cci;
566 s64 status;
567
568 status = ia64_pal_cache_summary(&levels, &unique_caches);
569 if (status != 0) {
570 printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
571 __FUNCTION__, status);
572 max = SMP_CACHE_BYTES;
573 goto out;
574 }
575
576 for (l = 0; l < levels; ++l) {
577 status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2,
578 &cci);
579 if (status != 0) {
580 printk(KERN_ERR
581 "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n",
582 __FUNCTION__, l, status);
583 max = SMP_CACHE_BYTES;
584 }
585 line_size = 1 << cci.pcci_line_size;
586 if (line_size > max)
587 max = line_size;
588 }
589 out:
590 if (max > ia64_max_cacheline_size)
591 ia64_max_cacheline_size = max;
592}
593
594/*
595 * cpu_init() initializes state that is per-CPU. This function acts
596 * as a 'CPU state barrier', nothing should get across.
597 */
598void
599cpu_init (void)
600{
601 extern void __devinit ia64_mmu_init (void *);
602 unsigned long num_phys_stacked;
603 pal_vm_info_2_u_t vmi;
604 unsigned int max_ctx;
605 struct cpuinfo_ia64 *cpu_info;
606 void *cpu_data;
607
608 cpu_data = per_cpu_init();
609
610 /*
611 * We set ar.k3 so that assembly code in MCA handler can compute
612 * physical addresses of per cpu variables with a simple:
613 * phys = ar.k3 + &per_cpu_var
614 */
615 ia64_set_kr(IA64_KR_PER_CPU_DATA,
616 ia64_tpa(cpu_data) - (long) __per_cpu_start);
617
618 get_max_cacheline_size();
619
620 /*
621 * We can't pass "local_cpu_data" to identify_cpu() because we haven't called
622 * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it
623 * depends on the data returned by identify_cpu(). We break the dependency by
624 * accessing cpu_data() through the canonical per-CPU address.
625 */
626 cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start);
627 identify_cpu(cpu_info);
628
629#ifdef CONFIG_MCKINLEY
630 {
631# define FEATURE_SET 16
632 struct ia64_pal_retval iprv;
633
634 if (cpu_info->family == 0x1f) {
635 PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0);
636 if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80))
637 PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES,
638 (iprv.v1 | 0x80), FEATURE_SET, 0);
639 }
640 }
641#endif
642
643 /* Clear the stack memory reserved for pt_regs: */
644 memset(ia64_task_regs(current), 0, sizeof(struct pt_regs));
645
646 ia64_set_kr(IA64_KR_FPU_OWNER, 0);
647
648 /*
649 * Initialize the page-table base register to a global
650 * directory with all zeroes. This ensure that we can handle
651 * TLB-misses to user address-space even before we created the
652 * first user address-space. This may happen, e.g., due to
653 * aggressive use of lfetch.fault.
654 */
655 ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page)));
656
657 /*
658 * Initialize default control register to defer all speculative faults. The
659 * kernel MUST NOT depend on a particular setting of these bits (in other words,
660 * the kernel must have recovery code for all speculative accesses). Turn on
661 * dcr.lc as per recommendation by the architecture team. Most IA-32 apps
662 * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll
663 * be fine).
664 */
665 ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR
666 | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC));
667 atomic_inc(&init_mm.mm_count);
668 current->active_mm = &init_mm;
669 if (current->mm)
670 BUG();
671
672 ia64_mmu_init(ia64_imva(cpu_data));
673 ia64_mca_cpu_init(ia64_imva(cpu_data));
674
675#ifdef CONFIG_IA32_SUPPORT
676 ia32_cpu_init();
677#endif
678
679 /* Clear ITC to eliminiate sched_clock() overflows in human time. */
680 ia64_set_itc(0);
681
682 /* disable all local interrupt sources: */
683 ia64_set_itv(1 << 16);
684 ia64_set_lrr0(1 << 16);
685 ia64_set_lrr1(1 << 16);
686 ia64_setreg(_IA64_REG_CR_PMV, 1 << 16);
687 ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16);
688
689 /* clear TPR & XTP to enable all interrupt classes: */
690 ia64_setreg(_IA64_REG_CR_TPR, 0);
691#ifdef CONFIG_SMP
692 normal_xtp();
693#endif
694
695 /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */
696 if (ia64_pal_vm_summary(NULL, &vmi) == 0)
697 max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1;
698 else {
699 printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n");
700 max_ctx = (1U << 15) - 1; /* use architected minimum */
701 }
702 while (max_ctx < ia64_ctx.max_ctx) {
703 unsigned int old = ia64_ctx.max_ctx;
704 if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old)
705 break;
706 }
707
708 if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) {
709 printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical "
710 "stacked regs\n");
711 num_phys_stacked = 96;
712 }
713 /* size of physical stacked register partition plus 8 bytes: */
714 __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8;
715 platform_cpu_init();
716}
717
718void
719check_bugs (void)
720{
721 ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles,
722 (unsigned long) __end___mckinley_e9_bundles);
723}
diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h
new file mode 100644
index 000000000000..37b986cb86e0
--- /dev/null
+++ b/arch/ia64/kernel/sigframe.h
@@ -0,0 +1,25 @@
1struct sigscratch {
2 unsigned long scratch_unat; /* ar.unat for the general registers saved in pt */
3 unsigned long ar_pfs; /* for syscalls, the user-level function-state */
4 struct pt_regs pt;
5};
6
7struct sigframe {
8 /*
9 * Place signal handler args where user-level unwinder can find them easily.
10 * DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is
11 * user-level code that depends on their presence!
12 */
13 unsigned long arg0; /* signum */
14 unsigned long arg1; /* siginfo pointer */
15 unsigned long arg2; /* sigcontext pointer */
16 /*
17 * End of architected state.
18 */
19
20 void __user *handler; /* pointer to the plabel of the signal handler */
21 struct siginfo info;
22 struct sigcontext sc;
23};
24
25extern long ia64_do_signal (sigset_t *, struct sigscratch *, long);
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
new file mode 100644
index 000000000000..6891d86937d9
--- /dev/null
+++ b/arch/ia64/kernel/signal.c
@@ -0,0 +1,691 @@
1/*
2 * Architecture-specific signal handling support.
3 *
4 * Copyright (C) 1999-2004 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * Derived from i386 and Alpha versions.
8 */
9
10#include <linux/config.h>
11#include <linux/errno.h>
12#include <linux/kernel.h>
13#include <linux/mm.h>
14#include <linux/ptrace.h>
15#include <linux/sched.h>
16#include <linux/signal.h>
17#include <linux/smp.h>
18#include <linux/smp_lock.h>
19#include <linux/stddef.h>
20#include <linux/tty.h>
21#include <linux/binfmts.h>
22#include <linux/unistd.h>
23#include <linux/wait.h>
24
25#include <asm/ia32.h>
26#include <asm/intrinsics.h>
27#include <asm/uaccess.h>
28#include <asm/rse.h>
29#include <asm/sigcontext.h>
30
31#include "sigframe.h"
32
33#define DEBUG_SIG 0
34#define STACK_ALIGN 16 /* minimal alignment for stack pointer */
35#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
36
37#if _NSIG_WORDS > 1
38# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
39# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t))
40#else
41# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0])
42# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0])
43#endif
44
45long
46ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr)
47{
48 sigset_t oldset, set;
49
50 /* XXX: Don't preclude handling different sized sigset_t's. */
51 if (sigsetsize != sizeof(sigset_t))
52 return -EINVAL;
53
54 if (!access_ok(VERIFY_READ, uset, sigsetsize))
55 return -EFAULT;
56
57 if (GET_SIGSET(&set, uset))
58 return -EFAULT;
59
60 sigdelsetmask(&set, ~_BLOCKABLE);
61
62 spin_lock_irq(&current->sighand->siglock);
63 {
64 oldset = current->blocked;
65 current->blocked = set;
66 recalc_sigpending();
67 }
68 spin_unlock_irq(&current->sighand->siglock);
69
70 /*
71 * The return below usually returns to the signal handler. We need to
72 * pre-set the correct error code here to ensure that the right values
73 * get saved in sigcontext by ia64_do_signal.
74 */
75 scr->pt.r8 = EINTR;
76 scr->pt.r10 = -1;
77
78 while (1) {
79 current->state = TASK_INTERRUPTIBLE;
80 schedule();
81 if (ia64_do_signal(&oldset, scr, 1))
82 return -EINTR;
83 }
84}
85
86asmlinkage long
87sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2,
88 long arg3, long arg4, long arg5, long arg6, long arg7,
89 struct pt_regs regs)
90{
91 return do_sigaltstack(uss, uoss, regs.r12);
92}
93
94static long
95restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr)
96{
97 unsigned long ip, flags, nat, um, cfm;
98 long err;
99
100 /* Always make any pending restarted system calls return -EINTR */
101 current_thread_info()->restart_block.fn = do_no_restart_syscall;
102
103 /* restore scratch that always needs gets updated during signal delivery: */
104 err = __get_user(flags, &sc->sc_flags);
105 err |= __get_user(nat, &sc->sc_nat);
106 err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */
107 err |= __get_user(cfm, &sc->sc_cfm);
108 err |= __get_user(um, &sc->sc_um); /* user mask */
109 err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
110 err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat);
111 err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr);
112 err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
113 err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */
114 err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */
115 err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */
116 err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */
117 err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */
118 err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */
119 err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */
120
121 scr->pt.cr_ifs = cfm | (1UL << 63);
122
123 /* establish new instruction pointer: */
124 scr->pt.cr_iip = ip & ~0x3UL;
125 ia64_psr(&scr->pt)->ri = ip & 0x3;
126 scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM);
127
128 scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat);
129
130 if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) {
131 /* Restore most scratch-state only when not in syscall. */
132 err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */
133 err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */
134 err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */
135 err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
136 err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */
137 err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */
138 }
139
140 if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) {
141 struct ia64_psr *psr = ia64_psr(&scr->pt);
142
143 __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16);
144 psr->mfh = 0; /* drop signal handler's fph contents... */
145 if (psr->dfh)
146 ia64_drop_fpu(current);
147 else {
148 /* We already own the local fph, otherwise psr->dfh wouldn't be 0. */
149 __ia64_load_fpu(current->thread.fph);
150 ia64_set_local_fpu_owner(current);
151 }
152 }
153 return err;
154}
155
156int
157copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from)
158{
159 if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t)))
160 return -EFAULT;
161 if (from->si_code < 0) {
162 if (__copy_to_user(to, from, sizeof(siginfo_t)))
163 return -EFAULT;
164 return 0;
165 } else {
166 int err;
167
168 /*
169 * If you change siginfo_t structure, please be sure this code is fixed
170 * accordingly. It should never copy any pad contained in the structure
171 * to avoid security leaks, but must copy the generic 3 ints plus the
172 * relevant union member.
173 */
174 err = __put_user(from->si_signo, &to->si_signo);
175 err |= __put_user(from->si_errno, &to->si_errno);
176 err |= __put_user((short)from->si_code, &to->si_code);
177 switch (from->si_code >> 16) {
178 case __SI_FAULT >> 16:
179 err |= __put_user(from->si_flags, &to->si_flags);
180 err |= __put_user(from->si_isr, &to->si_isr);
181 case __SI_POLL >> 16:
182 err |= __put_user(from->si_addr, &to->si_addr);
183 err |= __put_user(from->si_imm, &to->si_imm);
184 break;
185 case __SI_TIMER >> 16:
186 err |= __put_user(from->si_tid, &to->si_tid);
187 err |= __put_user(from->si_overrun, &to->si_overrun);
188 err |= __put_user(from->si_ptr, &to->si_ptr);
189 break;
190 case __SI_RT >> 16: /* Not generated by the kernel as of now. */
191 case __SI_MESGQ >> 16:
192 err |= __put_user(from->si_uid, &to->si_uid);
193 err |= __put_user(from->si_pid, &to->si_pid);
194 err |= __put_user(from->si_ptr, &to->si_ptr);
195 break;
196 case __SI_CHLD >> 16:
197 err |= __put_user(from->si_utime, &to->si_utime);
198 err |= __put_user(from->si_stime, &to->si_stime);
199 err |= __put_user(from->si_status, &to->si_status);
200 default:
201 err |= __put_user(from->si_uid, &to->si_uid);
202 err |= __put_user(from->si_pid, &to->si_pid);
203 break;
204 }
205 return err;
206 }
207}
208
209long
210ia64_rt_sigreturn (struct sigscratch *scr)
211{
212 extern char ia64_strace_leave_kernel, ia64_leave_kernel;
213 struct sigcontext __user *sc;
214 struct siginfo si;
215 sigset_t set;
216 long retval;
217
218 sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc;
219
220 /*
221 * When we return to the previously executing context, r8 and r10 have already
222 * been setup the way we want them. Indeed, if the signal wasn't delivered while
223 * in a system call, we must not touch r8 or r10 as otherwise user-level state
224 * could be corrupted.
225 */
226 retval = (long) &ia64_leave_kernel;
227 if (test_thread_flag(TIF_SYSCALL_TRACE))
228 /*
229 * strace expects to be notified after sigreturn returns even though the
230 * context to which we return may not be in the middle of a syscall.
231 * Thus, the return-value that strace displays for sigreturn is
232 * meaningless.
233 */
234 retval = (long) &ia64_strace_leave_kernel;
235
236 if (!access_ok(VERIFY_READ, sc, sizeof(*sc)))
237 goto give_sigsegv;
238
239 if (GET_SIGSET(&set, &sc->sc_mask))
240 goto give_sigsegv;
241
242 sigdelsetmask(&set, ~_BLOCKABLE);
243
244 spin_lock_irq(&current->sighand->siglock);
245 {
246 current->blocked = set;
247 recalc_sigpending();
248 }
249 spin_unlock_irq(&current->sighand->siglock);
250
251 if (restore_sigcontext(sc, scr))
252 goto give_sigsegv;
253
254#if DEBUG_SIG
255 printk("SIG return (%s:%d): sp=%lx ip=%lx\n",
256 current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip);
257#endif
258 /*
259 * It is more difficult to avoid calling this function than to
260 * call it and ignore errors.
261 */
262 do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12);
263 return retval;
264
265 give_sigsegv:
266 si.si_signo = SIGSEGV;
267 si.si_errno = 0;
268 si.si_code = SI_KERNEL;
269 si.si_pid = current->pid;
270 si.si_uid = current->uid;
271 si.si_addr = sc;
272 force_sig_info(SIGSEGV, &si, current);
273 return retval;
274}
275
276/*
277 * This does just the minimum required setup of sigcontext.
278 * Specifically, it only installs data that is either not knowable at
279 * the user-level or that gets modified before execution in the
280 * trampoline starts. Everything else is done at the user-level.
281 */
282static long
283setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr)
284{
285 unsigned long flags = 0, ifs, cfm, nat;
286 long err;
287
288 ifs = scr->pt.cr_ifs;
289
290 if (on_sig_stack((unsigned long) sc))
291 flags |= IA64_SC_FLAG_ONSTACK;
292 if ((ifs & (1UL << 63)) == 0)
293 /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */
294 flags |= IA64_SC_FLAG_IN_SYSCALL;
295 cfm = ifs & ((1UL << 38) - 1);
296 ia64_flush_fph(current);
297 if ((current->thread.flags & IA64_THREAD_FPH_VALID)) {
298 flags |= IA64_SC_FLAG_FPH_VALID;
299 __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16);
300 }
301
302 nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat);
303
304 err = __put_user(flags, &sc->sc_flags);
305 err |= __put_user(nat, &sc->sc_nat);
306 err |= PUT_SIGSET(mask, &sc->sc_mask);
307 err |= __put_user(cfm, &sc->sc_cfm);
308 err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um);
309 err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc);
310 err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat); /* ar.unat */
311 err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */
312 err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs);
313 err |= __put_user(scr->pt.pr, &sc->sc_pr); /* predicates */
314 err |= __put_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */
315 err |= __put_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */
316 err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8); /* r1 */
317 err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8); /* r8-r11 */
318 err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8); /* r12-r13 */
319 err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */
320 err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip);
321
322 if (flags & IA64_SC_FLAG_IN_SYSCALL) {
323 /* Clear scratch registers if the signal interrupted a system call. */
324 err |= __put_user(0, &sc->sc_ar_ccv); /* ar.ccv */
325 err |= __put_user(0, &sc->sc_br[7]); /* b7 */
326 err |= __put_user(0, &sc->sc_gr[14]); /* r14 */
327 err |= __clear_user(&sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */
328 err |= __clear_user(&sc->sc_gr[2], 2*8); /* r2-r3 */
329 err |= __clear_user(&sc->sc_gr[16], 16*8); /* r16-r31 */
330 } else {
331 /* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */
332 err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */
333 err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */
334 err |= __put_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */
335 err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */
336 err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8); /* r2-r3 */
337 err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8); /* r16-r31 */
338 }
339 return err;
340}
341
342/*
343 * Check whether the register-backing store is already on the signal stack.
344 */
345static inline int
346rbs_on_sig_stack (unsigned long bsp)
347{
348 return (bsp - current->sas_ss_sp < current->sas_ss_size);
349}
350
351static long
352force_sigsegv_info (int sig, void __user *addr)
353{
354 unsigned long flags;
355 struct siginfo si;
356
357 if (sig == SIGSEGV) {
358 /*
359 * Acquiring siglock around the sa_handler-update is almost
360 * certainly overkill, but this isn't a
361 * performance-critical path and I'd rather play it safe
362 * here than having to debug a nasty race if and when
363 * something changes in kernel/signal.c that would make it
364 * no longer safe to modify sa_handler without holding the
365 * lock.
366 */
367 spin_lock_irqsave(&current->sighand->siglock, flags);
368 current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL;
369 spin_unlock_irqrestore(&current->sighand->siglock, flags);
370 }
371 si.si_signo = SIGSEGV;
372 si.si_errno = 0;
373 si.si_code = SI_KERNEL;
374 si.si_pid = current->pid;
375 si.si_uid = current->uid;
376 si.si_addr = addr;
377 force_sig_info(SIGSEGV, &si, current);
378 return 0;
379}
380
381static long
382setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
383 struct sigscratch *scr)
384{
385 extern char __kernel_sigtramp[];
386 unsigned long tramp_addr, new_rbs = 0;
387 struct sigframe __user *frame;
388 long err;
389
390 frame = (void __user *) scr->pt.r12;
391 tramp_addr = (unsigned long) __kernel_sigtramp;
392 if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) {
393 frame = (void __user *) ((current->sas_ss_sp + current->sas_ss_size)
394 & ~(STACK_ALIGN - 1));
395 /*
396 * We need to check for the register stack being on the signal stack
397 * separately, because it's switched separately (memory stack is switched
398 * in the kernel, register stack is switched in the signal trampoline).
399 */
400 if (!rbs_on_sig_stack(scr->pt.ar_bspstore))
401 new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1);
402 }
403 frame = (void __user *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1));
404
405 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
406 return force_sigsegv_info(sig, frame);
407
408 err = __put_user(sig, &frame->arg0);
409 err |= __put_user(&frame->info, &frame->arg1);
410 err |= __put_user(&frame->sc, &frame->arg2);
411 err |= __put_user(new_rbs, &frame->sc.sc_rbs_base);
412 err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */
413 err |= __put_user(ka->sa.sa_handler, &frame->handler);
414
415 err |= copy_siginfo_to_user(&frame->info, info);
416
417 err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp);
418 err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size);
419 err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags);
420 err |= setup_sigcontext(&frame->sc, set, scr);
421
422 if (unlikely(err))
423 return force_sigsegv_info(sig, frame);
424
425 scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */
426 scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */
427 scr->pt.cr_iip = tramp_addr;
428 ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */
429 ia64_psr(&scr->pt)->be = 0; /* force little-endian byte-order */
430 /*
431 * Force the interruption function mask to zero. This has no effect when a
432 * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is
433 * ignored), but it has the desirable effect of making it possible to deliver a
434 * signal with an incomplete register frame (which happens when a mandatory RSE
435 * load faults). Furthermore, it has no negative effect on the getting the user's
436 * dirty partition preserved, because that's governed by scr->pt.loadrs.
437 */
438 scr->pt.cr_ifs = (1UL << 63);
439
440 /*
441 * Note: this affects only the NaT bits of the scratch regs (the ones saved in
442 * pt_regs), which is exactly what we want.
443 */
444 scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */
445
446#if DEBUG_SIG
447 printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n",
448 current->comm, current->pid, sig, scr->pt.r12, frame->sc.sc_ip, frame->handler);
449#endif
450 return 1;
451}
452
453static long
454handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
455 struct sigscratch *scr)
456{
457 if (IS_IA32_PROCESS(&scr->pt)) {
458 /* send signal to IA-32 process */
459 if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt))
460 return 0;
461 } else
462 /* send signal to IA-64 process */
463 if (!setup_frame(sig, ka, info, oldset, scr))
464 return 0;
465
466 if (!(ka->sa.sa_flags & SA_NODEFER)) {
467 spin_lock_irq(&current->sighand->siglock);
468 {
469 sigorsets(&current->blocked, &current->blocked, &ka->sa.sa_mask);
470 sigaddset(&current->blocked, sig);
471 recalc_sigpending();
472 }
473 spin_unlock_irq(&current->sighand->siglock);
474 }
475 return 1;
476}
477
478/*
479 * Note that `init' is a special process: it doesn't get signals it doesn't want to
480 * handle. Thus you cannot kill init even with a SIGKILL even by mistake.
481 */
482long
483ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall)
484{
485 struct k_sigaction ka;
486 siginfo_t info;
487 long restart = in_syscall;
488 long errno = scr->pt.r8;
489# define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c))
490
491 /*
492 * In the ia64_leave_kernel code path, we want the common case to go fast, which
493 * is why we may in certain cases get here from kernel mode. Just return without
494 * doing anything if so.
495 */
496 if (!user_mode(&scr->pt))
497 return 0;
498
499 if (!oldset)
500 oldset = &current->blocked;
501
502 /*
503 * This only loops in the rare cases of handle_signal() failing, in which case we
504 * need to push through a forced SIGSEGV.
505 */
506 while (1) {
507 int signr = get_signal_to_deliver(&info, &ka, &scr->pt, NULL);
508
509 /*
510 * get_signal_to_deliver() may have run a debugger (via notify_parent())
511 * and the debugger may have modified the state (e.g., to arrange for an
512 * inferior call), thus it's important to check for restarting _after_
513 * get_signal_to_deliver().
514 */
515 if (IS_IA32_PROCESS(&scr->pt)) {
516 if (in_syscall) {
517 if (errno >= 0)
518 restart = 0;
519 else
520 errno = -errno;
521 }
522 } else if ((long) scr->pt.r10 != -1)
523 /*
524 * A system calls has to be restarted only if one of the error codes
525 * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10
526 * isn't -1 then r8 doesn't hold an error code and we don't need to
527 * restart the syscall, so we can clear the "restart" flag here.
528 */
529 restart = 0;
530
531 if (signr <= 0)
532 break;
533
534 if (unlikely(restart)) {
535 switch (errno) {
536 case ERESTART_RESTARTBLOCK:
537 case ERESTARTNOHAND:
538 scr->pt.r8 = ERR_CODE(EINTR);
539 /* note: scr->pt.r10 is already -1 */
540 break;
541
542 case ERESTARTSYS:
543 if ((ka.sa.sa_flags & SA_RESTART) == 0) {
544 scr->pt.r8 = ERR_CODE(EINTR);
545 /* note: scr->pt.r10 is already -1 */
546 break;
547 }
548 case ERESTARTNOINTR:
549 if (IS_IA32_PROCESS(&scr->pt)) {
550 scr->pt.r8 = scr->pt.r1;
551 scr->pt.cr_iip -= 2;
552 } else
553 ia64_decrement_ip(&scr->pt);
554 restart = 0; /* don't restart twice if handle_signal() fails... */
555 }
556 }
557
558 /*
559 * Whee! Actually deliver the signal. If the delivery failed, we need to
560 * continue to iterate in this loop so we can deliver the SIGSEGV...
561 */
562 if (handle_signal(signr, &ka, &info, oldset, scr))
563 return 1;
564 }
565
566 /* Did we come from a system call? */
567 if (restart) {
568 /* Restart the system call - no handlers present */
569 if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR
570 || errno == ERESTART_RESTARTBLOCK)
571 {
572 if (IS_IA32_PROCESS(&scr->pt)) {
573 scr->pt.r8 = scr->pt.r1;
574 scr->pt.cr_iip -= 2;
575 if (errno == ERESTART_RESTARTBLOCK)
576 scr->pt.r8 = 0; /* x86 version of __NR_restart_syscall */
577 } else {
578 /*
579 * Note: the syscall number is in r15 which is saved in
580 * pt_regs so all we need to do here is adjust ip so that
581 * the "break" instruction gets re-executed.
582 */
583 ia64_decrement_ip(&scr->pt);
584 if (errno == ERESTART_RESTARTBLOCK)
585 scr->pt.r15 = __NR_restart_syscall;
586 }
587 }
588 }
589 return 0;
590}
591
592/* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it
593 * could not be delivered. It is important that the target process is not
594 * allowed to do any more work in user space. Possible cases for the target
595 * process:
596 *
597 * - It is sleeping and will wake up soon. Store the data in the current task,
598 * the signal will be sent when the current task returns from the next
599 * interrupt.
600 *
601 * - It is running in user context. Store the data in the current task, the
602 * signal will be sent when the current task returns from the next interrupt.
603 *
604 * - It is running in kernel context on this or another cpu and will return to
605 * user context. Store the data in the target task, the signal will be sent
606 * to itself when the target task returns to user space.
607 *
608 * - It is running in kernel context on this cpu and will sleep before
609 * returning to user context. Because this is also the current task, the
610 * signal will not get delivered and the task could sleep indefinitely.
611 * Store the data in the idle task for this cpu, the signal will be sent
612 * after the idle task processes its next interrupt.
613 *
614 * To cover all cases, store the data in the target task, the current task and
615 * the idle task on this cpu. Whatever happens, the signal will be delivered
616 * to the target task before it can do any useful user space work. Multiple
617 * deliveries have no unwanted side effects.
618 *
619 * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts
620 * disabled. It must not take any locks nor use kernel structures or services
621 * that require locks.
622 */
623
624/* To ensure that we get the right pid, check its start time. To avoid extra
625 * include files in thread_info.h, convert the task start_time to unsigned long,
626 * giving us a cycle time of > 580 years.
627 */
628static inline unsigned long
629start_time_ul(const struct task_struct *t)
630{
631 return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec;
632}
633
634void
635set_sigdelayed(pid_t pid, int signo, int code, void __user *addr)
636{
637 struct task_struct *t;
638 unsigned long start_time = 0;
639 int i;
640
641 for (i = 1; i <= 3; ++i) {
642 switch (i) {
643 case 1:
644 t = find_task_by_pid(pid);
645 if (t)
646 start_time = start_time_ul(t);
647 break;
648 case 2:
649 t = current;
650 break;
651 default:
652 t = idle_task(smp_processor_id());
653 break;
654 }
655
656 if (!t)
657 return;
658 t->thread_info->sigdelayed.signo = signo;
659 t->thread_info->sigdelayed.code = code;
660 t->thread_info->sigdelayed.addr = addr;
661 t->thread_info->sigdelayed.start_time = start_time;
662 t->thread_info->sigdelayed.pid = pid;
663 wmb();
664 set_tsk_thread_flag(t, TIF_SIGDELAYED);
665 }
666}
667
668/* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that
669 * was detected in MCA/INIT/NMI/PMI context where it could not be delivered.
670 */
671
672void
673do_sigdelayed(void)
674{
675 struct siginfo siginfo;
676 pid_t pid;
677 struct task_struct *t;
678
679 clear_thread_flag(TIF_SIGDELAYED);
680 memset(&siginfo, 0, sizeof(siginfo));
681 siginfo.si_signo = current_thread_info()->sigdelayed.signo;
682 siginfo.si_code = current_thread_info()->sigdelayed.code;
683 siginfo.si_addr = current_thread_info()->sigdelayed.addr;
684 pid = current_thread_info()->sigdelayed.pid;
685 t = find_task_by_pid(pid);
686 if (!t)
687 return;
688 if (current_thread_info()->sigdelayed.start_time != start_time_ul(t))
689 return;
690 force_sig_info(siginfo.si_signo, &siginfo, t);
691}
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c
new file mode 100644
index 000000000000..953095e2ce15
--- /dev/null
+++ b/arch/ia64/kernel/smp.c
@@ -0,0 +1,376 @@
1/*
2 * SMP Support
3 *
4 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
5 * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * Lots of stuff stolen from arch/alpha/kernel/smp.c
8 *
9 * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized
10 * the existing code (on the lines of x86 port).
11 * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy
12 * calibration on each CPU.
13 * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id
14 * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor
15 * & cpu_online_map now gets done here (instead of setup.c)
16 * 99/10/05 davidm Update to bring it in sync with new command-line processing
17 * scheme.
18 * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and
19 * smp_call_function_single to resend IPI on timeouts
20 */
21#include <linux/module.h>
22#include <linux/kernel.h>
23#include <linux/sched.h>
24#include <linux/init.h>
25#include <linux/interrupt.h>
26#include <linux/smp.h>
27#include <linux/kernel_stat.h>
28#include <linux/mm.h>
29#include <linux/cache.h>
30#include <linux/delay.h>
31#include <linux/efi.h>
32#include <linux/bitops.h>
33
34#include <asm/atomic.h>
35#include <asm/current.h>
36#include <asm/delay.h>
37#include <asm/machvec.h>
38#include <asm/io.h>
39#include <asm/irq.h>
40#include <asm/page.h>
41#include <asm/pgalloc.h>
42#include <asm/pgtable.h>
43#include <asm/processor.h>
44#include <asm/ptrace.h>
45#include <asm/sal.h>
46#include <asm/system.h>
47#include <asm/tlbflush.h>
48#include <asm/unistd.h>
49#include <asm/mca.h>
50
51/*
52 * Structure and data for smp_call_function(). This is designed to minimise static memory
53 * requirements. It also looks cleaner.
54 */
55static __cacheline_aligned DEFINE_SPINLOCK(call_lock);
56
57struct call_data_struct {
58 void (*func) (void *info);
59 void *info;
60 long wait;
61 atomic_t started;
62 atomic_t finished;
63};
64
65static volatile struct call_data_struct *call_data;
66
67#define IPI_CALL_FUNC 0
68#define IPI_CPU_STOP 1
69
70/* This needs to be cacheline aligned because it is written to by *other* CPUs. */
71static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned;
72
73extern void cpu_halt (void);
74
75void
76lock_ipi_calllock(void)
77{
78 spin_lock_irq(&call_lock);
79}
80
81void
82unlock_ipi_calllock(void)
83{
84 spin_unlock_irq(&call_lock);
85}
86
87static void
88stop_this_cpu (void)
89{
90 /*
91 * Remove this CPU:
92 */
93 cpu_clear(smp_processor_id(), cpu_online_map);
94 max_xtp();
95 local_irq_disable();
96 cpu_halt();
97}
98
99void
100cpu_die(void)
101{
102 max_xtp();
103 local_irq_disable();
104 cpu_halt();
105 /* Should never be here */
106 BUG();
107 for (;;);
108}
109
110irqreturn_t
111handle_IPI (int irq, void *dev_id, struct pt_regs *regs)
112{
113 int this_cpu = get_cpu();
114 unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation);
115 unsigned long ops;
116
117 mb(); /* Order interrupt and bit testing. */
118 while ((ops = xchg(pending_ipis, 0)) != 0) {
119 mb(); /* Order bit clearing and data access. */
120 do {
121 unsigned long which;
122
123 which = ffz(~ops);
124 ops &= ~(1 << which);
125
126 switch (which) {
127 case IPI_CALL_FUNC:
128 {
129 struct call_data_struct *data;
130 void (*func)(void *info);
131 void *info;
132 int wait;
133
134 /* release the 'pointer lock' */
135 data = (struct call_data_struct *) call_data;
136 func = data->func;
137 info = data->info;
138 wait = data->wait;
139
140 mb();
141 atomic_inc(&data->started);
142 /*
143 * At this point the structure may be gone unless
144 * wait is true.
145 */
146 (*func)(info);
147
148 /* Notify the sending CPU that the task is done. */
149 mb();
150 if (wait)
151 atomic_inc(&data->finished);
152 }
153 break;
154
155 case IPI_CPU_STOP:
156 stop_this_cpu();
157 break;
158
159 default:
160 printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which);
161 break;
162 }
163 } while (ops);
164 mb(); /* Order data access and bit testing. */
165 }
166 put_cpu();
167 return IRQ_HANDLED;
168}
169
170/*
171 * Called with preeemption disabled.
172 */
173static inline void
174send_IPI_single (int dest_cpu, int op)
175{
176 set_bit(op, &per_cpu(ipi_operation, dest_cpu));
177 platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0);
178}
179
180/*
181 * Called with preeemption disabled.
182 */
183static inline void
184send_IPI_allbutself (int op)
185{
186 unsigned int i;
187
188 for (i = 0; i < NR_CPUS; i++) {
189 if (cpu_online(i) && i != smp_processor_id())
190 send_IPI_single(i, op);
191 }
192}
193
194/*
195 * Called with preeemption disabled.
196 */
197static inline void
198send_IPI_all (int op)
199{
200 int i;
201
202 for (i = 0; i < NR_CPUS; i++)
203 if (cpu_online(i))
204 send_IPI_single(i, op);
205}
206
207/*
208 * Called with preeemption disabled.
209 */
210static inline void
211send_IPI_self (int op)
212{
213 send_IPI_single(smp_processor_id(), op);
214}
215
216/*
217 * Called with preeemption disabled.
218 */
219void
220smp_send_reschedule (int cpu)
221{
222 platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0);
223}
224
225void
226smp_flush_tlb_all (void)
227{
228 on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1);
229}
230
231void
232smp_flush_tlb_mm (struct mm_struct *mm)
233{
234 /* this happens for the common case of a single-threaded fork(): */
235 if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1))
236 {
237 local_finish_flush_tlb_mm(mm);
238 return;
239 }
240
241 /*
242 * We could optimize this further by using mm->cpu_vm_mask to track which CPUs
243 * have been running in the address space. It's not clear that this is worth the
244 * trouble though: to avoid races, we have to raise the IPI on the target CPU
245 * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is
246 * rather trivial.
247 */
248 on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1);
249}
250
251/*
252 * Run a function on another CPU
253 * <func> The function to run. This must be fast and non-blocking.
254 * <info> An arbitrary pointer to pass to the function.
255 * <nonatomic> Currently unused.
256 * <wait> If true, wait until function has completed on other CPUs.
257 * [RETURNS] 0 on success, else a negative status code.
258 *
259 * Does not return until the remote CPU is nearly ready to execute <func>
260 * or is or has executed.
261 */
262
263int
264smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic,
265 int wait)
266{
267 struct call_data_struct data;
268 int cpus = 1;
269 int me = get_cpu(); /* prevent preemption and reschedule on another processor */
270
271 if (cpuid == me) {
272 printk("%s: trying to call self\n", __FUNCTION__);
273 put_cpu();
274 return -EBUSY;
275 }
276
277 data.func = func;
278 data.info = info;
279 atomic_set(&data.started, 0);
280 data.wait = wait;
281 if (wait)
282 atomic_set(&data.finished, 0);
283
284 spin_lock_bh(&call_lock);
285
286 call_data = &data;
287 mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
288 send_IPI_single(cpuid, IPI_CALL_FUNC);
289
290 /* Wait for response */
291 while (atomic_read(&data.started) != cpus)
292 cpu_relax();
293
294 if (wait)
295 while (atomic_read(&data.finished) != cpus)
296 cpu_relax();
297 call_data = NULL;
298
299 spin_unlock_bh(&call_lock);
300 put_cpu();
301 return 0;
302}
303EXPORT_SYMBOL(smp_call_function_single);
304
305/*
306 * this function sends a 'generic call function' IPI to all other CPUs
307 * in the system.
308 */
309
310/*
311 * [SUMMARY] Run a function on all other CPUs.
312 * <func> The function to run. This must be fast and non-blocking.
313 * <info> An arbitrary pointer to pass to the function.
314 * <nonatomic> currently unused.
315 * <wait> If true, wait (atomically) until function has completed on other CPUs.
316 * [RETURNS] 0 on success, else a negative status code.
317 *
318 * Does not return until remote CPUs are nearly ready to execute <func> or are or have
319 * executed.
320 *
321 * You must not call this function with disabled interrupts or from a
322 * hardware interrupt handler or from a bottom half handler.
323 */
324int
325smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait)
326{
327 struct call_data_struct data;
328 int cpus = num_online_cpus()-1;
329
330 if (!cpus)
331 return 0;
332
333 /* Can deadlock when called with interrupts disabled */
334 WARN_ON(irqs_disabled());
335
336 data.func = func;
337 data.info = info;
338 atomic_set(&data.started, 0);
339 data.wait = wait;
340 if (wait)
341 atomic_set(&data.finished, 0);
342
343 spin_lock(&call_lock);
344
345 call_data = &data;
346 mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */
347 send_IPI_allbutself(IPI_CALL_FUNC);
348
349 /* Wait for response */
350 while (atomic_read(&data.started) != cpus)
351 cpu_relax();
352
353 if (wait)
354 while (atomic_read(&data.finished) != cpus)
355 cpu_relax();
356 call_data = NULL;
357
358 spin_unlock(&call_lock);
359 return 0;
360}
361EXPORT_SYMBOL(smp_call_function);
362
363/*
364 * this function calls the 'stop' function on all other CPUs in the system.
365 */
366void
367smp_send_stop (void)
368{
369 send_IPI_allbutself(IPI_CPU_STOP);
370}
371
372int __init
373setup_profiling_timer (unsigned int multiplier)
374{
375 return -EINVAL;
376}
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c
new file mode 100644
index 000000000000..5318f0cbfc26
--- /dev/null
+++ b/arch/ia64/kernel/smpboot.c
@@ -0,0 +1,692 @@
1/*
2 * SMP boot-related support
3 *
4 * Copyright (C) 1998-2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here.
8 * 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code.
9 * 02/07/31 David Mosberger <davidm@hpl.hp.com> Switch over to hotplug-CPU boot-sequence.
10 * smp_boot_cpus()/smp_commence() is replaced by
11 * smp_prepare_cpus()/__cpu_up()/smp_cpus_done().
12 */
13#include <linux/config.h>
14
15#include <linux/module.h>
16#include <linux/acpi.h>
17#include <linux/bootmem.h>
18#include <linux/cpu.h>
19#include <linux/delay.h>
20#include <linux/init.h>
21#include <linux/interrupt.h>
22#include <linux/irq.h>
23#include <linux/kernel.h>
24#include <linux/kernel_stat.h>
25#include <linux/mm.h>
26#include <linux/notifier.h>
27#include <linux/smp.h>
28#include <linux/smp_lock.h>
29#include <linux/spinlock.h>
30#include <linux/efi.h>
31#include <linux/percpu.h>
32#include <linux/bitops.h>
33
34#include <asm/atomic.h>
35#include <asm/cache.h>
36#include <asm/current.h>
37#include <asm/delay.h>
38#include <asm/ia32.h>
39#include <asm/io.h>
40#include <asm/irq.h>
41#include <asm/machvec.h>
42#include <asm/mca.h>
43#include <asm/page.h>
44#include <asm/pgalloc.h>
45#include <asm/pgtable.h>
46#include <asm/processor.h>
47#include <asm/ptrace.h>
48#include <asm/sal.h>
49#include <asm/system.h>
50#include <asm/tlbflush.h>
51#include <asm/unistd.h>
52
53#define SMP_DEBUG 0
54
55#if SMP_DEBUG
56#define Dprintk(x...) printk(x)
57#else
58#define Dprintk(x...)
59#endif
60
61
62/*
63 * ITC synchronization related stuff:
64 */
65#define MASTER 0
66#define SLAVE (SMP_CACHE_BYTES/8)
67
68#define NUM_ROUNDS 64 /* magic value */
69#define NUM_ITERS 5 /* likewise */
70
71static DEFINE_SPINLOCK(itc_sync_lock);
72static volatile unsigned long go[SLAVE + 1];
73
74#define DEBUG_ITC_SYNC 0
75
76extern void __devinit calibrate_delay (void);
77extern void start_ap (void);
78extern unsigned long ia64_iobase;
79
80task_t *task_for_booting_cpu;
81
82/*
83 * State for each CPU
84 */
85DEFINE_PER_CPU(int, cpu_state);
86
87/* Bitmasks of currently online, and possible CPUs */
88cpumask_t cpu_online_map;
89EXPORT_SYMBOL(cpu_online_map);
90cpumask_t cpu_possible_map;
91EXPORT_SYMBOL(cpu_possible_map);
92
93/* which logical CPU number maps to which CPU (physical APIC ID) */
94volatile int ia64_cpu_to_sapicid[NR_CPUS];
95EXPORT_SYMBOL(ia64_cpu_to_sapicid);
96
97static volatile cpumask_t cpu_callin_map;
98
99struct smp_boot_data smp_boot_data __initdata;
100
101unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */
102
103char __initdata no_int_routing;
104
105unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */
106
107static int __init
108nointroute (char *str)
109{
110 no_int_routing = 1;
111 printk ("no_int_routing on\n");
112 return 1;
113}
114
115__setup("nointroute", nointroute);
116
117void
118sync_master (void *arg)
119{
120 unsigned long flags, i;
121
122 go[MASTER] = 0;
123
124 local_irq_save(flags);
125 {
126 for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) {
127 while (!go[MASTER]);
128 go[MASTER] = 0;
129 go[SLAVE] = ia64_get_itc();
130 }
131 }
132 local_irq_restore(flags);
133}
134
135/*
136 * Return the number of cycles by which our itc differs from the itc on the master
137 * (time-keeper) CPU. A positive number indicates our itc is ahead of the master,
138 * negative that it is behind.
139 */
140static inline long
141get_delta (long *rt, long *master)
142{
143 unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0;
144 unsigned long tcenter, t0, t1, tm;
145 long i;
146
147 for (i = 0; i < NUM_ITERS; ++i) {
148 t0 = ia64_get_itc();
149 go[MASTER] = 1;
150 while (!(tm = go[SLAVE]));
151 go[SLAVE] = 0;
152 t1 = ia64_get_itc();
153
154 if (t1 - t0 < best_t1 - best_t0)
155 best_t0 = t0, best_t1 = t1, best_tm = tm;
156 }
157
158 *rt = best_t1 - best_t0;
159 *master = best_tm - best_t0;
160
161 /* average best_t0 and best_t1 without overflow: */
162 tcenter = (best_t0/2 + best_t1/2);
163 if (best_t0 % 2 + best_t1 % 2 == 2)
164 ++tcenter;
165 return tcenter - best_tm;
166}
167
168/*
169 * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU
170 * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of
171 * unaccounted-for errors (such as getting a machine check in the middle of a calibration
172 * step). The basic idea is for the slave to ask the master what itc value it has and to
173 * read its own itc before and after the master responds. Each iteration gives us three
174 * timestamps:
175 *
176 * slave master
177 *
178 * t0 ---\
179 * ---\
180 * --->
181 * tm
182 * /---
183 * /---
184 * t1 <---
185 *
186 *
187 * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0
188 * and t1. If we achieve this, the clocks are synchronized provided the interconnect
189 * between the slave and the master is symmetric. Even if the interconnect were
190 * asymmetric, we would still know that the synchronization error is smaller than the
191 * roundtrip latency (t0 - t1).
192 *
193 * When the interconnect is quiet and symmetric, this lets us synchronize the itc to
194 * within one or two cycles. However, we can only *guarantee* that the synchronization is
195 * accurate to within a round-trip time, which is typically in the range of several
196 * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually
197 * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better
198 * than half a micro second or so.
199 */
200void
201ia64_sync_itc (unsigned int master)
202{
203 long i, delta, adj, adjust_latency = 0, done = 0;
204 unsigned long flags, rt, master_time_stamp, bound;
205#if DEBUG_ITC_SYNC
206 struct {
207 long rt; /* roundtrip time */
208 long master; /* master's timestamp */
209 long diff; /* difference between midpoint and master's timestamp */
210 long lat; /* estimate of itc adjustment latency */
211 } t[NUM_ROUNDS];
212#endif
213
214 /*
215 * Make sure local timer ticks are disabled while we sync. If
216 * they were enabled, we'd have to worry about nasty issues
217 * like setting the ITC ahead of (or a long time before) the
218 * next scheduled tick.
219 */
220 BUG_ON((ia64_get_itv() & (1 << 16)) == 0);
221
222 go[MASTER] = 1;
223
224 if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) {
225 printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master);
226 return;
227 }
228
229 while (go[MASTER]); /* wait for master to be ready */
230
231 spin_lock_irqsave(&itc_sync_lock, flags);
232 {
233 for (i = 0; i < NUM_ROUNDS; ++i) {
234 delta = get_delta(&rt, &master_time_stamp);
235 if (delta == 0) {
236 done = 1; /* let's lock on to this... */
237 bound = rt;
238 }
239
240 if (!done) {
241 if (i > 0) {
242 adjust_latency += -delta;
243 adj = -delta + adjust_latency/4;
244 } else
245 adj = -delta;
246
247 ia64_set_itc(ia64_get_itc() + adj);
248 }
249#if DEBUG_ITC_SYNC
250 t[i].rt = rt;
251 t[i].master = master_time_stamp;
252 t[i].diff = delta;
253 t[i].lat = adjust_latency/4;
254#endif
255 }
256 }
257 spin_unlock_irqrestore(&itc_sync_lock, flags);
258
259#if DEBUG_ITC_SYNC
260 for (i = 0; i < NUM_ROUNDS; ++i)
261 printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n",
262 t[i].rt, t[i].master, t[i].diff, t[i].lat);
263#endif
264
265 printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, "
266 "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt);
267}
268
269/*
270 * Ideally sets up per-cpu profiling hooks. Doesn't do much now...
271 */
272static inline void __devinit
273smp_setup_percpu_timer (void)
274{
275}
276
277static void __devinit
278smp_callin (void)
279{
280 int cpuid, phys_id;
281 extern void ia64_init_itm(void);
282
283#ifdef CONFIG_PERFMON
284 extern void pfm_init_percpu(void);
285#endif
286
287 cpuid = smp_processor_id();
288 phys_id = hard_smp_processor_id();
289
290 if (cpu_online(cpuid)) {
291 printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n",
292 phys_id, cpuid);
293 BUG();
294 }
295
296 lock_ipi_calllock();
297 cpu_set(cpuid, cpu_online_map);
298 unlock_ipi_calllock();
299
300 smp_setup_percpu_timer();
301
302 ia64_mca_cmc_vector_setup(); /* Setup vector on AP */
303
304#ifdef CONFIG_PERFMON
305 pfm_init_percpu();
306#endif
307
308 local_irq_enable();
309
310 if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
311 /*
312 * Synchronize the ITC with the BP. Need to do this after irqs are
313 * enabled because ia64_sync_itc() calls smp_call_function_single(), which
314 * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls
315 * local_bh_enable(), which bugs out if irqs are not enabled...
316 */
317 Dprintk("Going to syncup ITC with BP.\n");
318 ia64_sync_itc(0);
319 }
320
321 /*
322 * Get our bogomips.
323 */
324 ia64_init_itm();
325 calibrate_delay();
326 local_cpu_data->loops_per_jiffy = loops_per_jiffy;
327
328#ifdef CONFIG_IA32_SUPPORT
329 ia32_gdt_init();
330#endif
331
332 /*
333 * Allow the master to continue.
334 */
335 cpu_set(cpuid, cpu_callin_map);
336 Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid);
337}
338
339
340/*
341 * Activate a secondary processor. head.S calls this.
342 */
343int __devinit
344start_secondary (void *unused)
345{
346 /* Early console may use I/O ports */
347 ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase));
348
349 Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id());
350 efi_map_pal_code();
351 cpu_init();
352 smp_callin();
353
354 cpu_idle();
355 return 0;
356}
357
358struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
359{
360 return NULL;
361}
362
363struct create_idle {
364 struct task_struct *idle;
365 struct completion done;
366 int cpu;
367};
368
369void
370do_fork_idle(void *_c_idle)
371{
372 struct create_idle *c_idle = _c_idle;
373
374 c_idle->idle = fork_idle(c_idle->cpu);
375 complete(&c_idle->done);
376}
377
378static int __devinit
379do_boot_cpu (int sapicid, int cpu)
380{
381 int timeout;
382 struct create_idle c_idle = {
383 .cpu = cpu,
384 .done = COMPLETION_INITIALIZER(c_idle.done),
385 };
386 DECLARE_WORK(work, do_fork_idle, &c_idle);
387 /*
388 * We can't use kernel_thread since we must avoid to reschedule the child.
389 */
390 if (!keventd_up() || current_is_keventd())
391 work.func(work.data);
392 else {
393 schedule_work(&work);
394 wait_for_completion(&c_idle.done);
395 }
396
397 if (IS_ERR(c_idle.idle))
398 panic("failed fork for CPU %d", cpu);
399 task_for_booting_cpu = c_idle.idle;
400
401 Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid);
402
403 platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0);
404
405 /*
406 * Wait 10s total for the AP to start
407 */
408 Dprintk("Waiting on callin_map ...");
409 for (timeout = 0; timeout < 100000; timeout++) {
410 if (cpu_isset(cpu, cpu_callin_map))
411 break; /* It has booted */
412 udelay(100);
413 }
414 Dprintk("\n");
415
416 if (!cpu_isset(cpu, cpu_callin_map)) {
417 printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid);
418 ia64_cpu_to_sapicid[cpu] = -1;
419 cpu_clear(cpu, cpu_online_map); /* was set in smp_callin() */
420 return -EINVAL;
421 }
422 return 0;
423}
424
425static int __init
426decay (char *str)
427{
428 int ticks;
429 get_option (&str, &ticks);
430 return 1;
431}
432
433__setup("decay=", decay);
434
435/*
436 * Initialize the logical CPU number to SAPICID mapping
437 */
438void __init
439smp_build_cpu_map (void)
440{
441 int sapicid, cpu, i;
442 int boot_cpu_id = hard_smp_processor_id();
443
444 for (cpu = 0; cpu < NR_CPUS; cpu++) {
445 ia64_cpu_to_sapicid[cpu] = -1;
446#ifdef CONFIG_HOTPLUG_CPU
447 cpu_set(cpu, cpu_possible_map);
448#endif
449 }
450
451 ia64_cpu_to_sapicid[0] = boot_cpu_id;
452 cpus_clear(cpu_present_map);
453 cpu_set(0, cpu_present_map);
454 cpu_set(0, cpu_possible_map);
455 for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) {
456 sapicid = smp_boot_data.cpu_phys_id[i];
457 if (sapicid == boot_cpu_id)
458 continue;
459 cpu_set(cpu, cpu_present_map);
460 cpu_set(cpu, cpu_possible_map);
461 ia64_cpu_to_sapicid[cpu] = sapicid;
462 cpu++;
463 }
464}
465
466#ifdef CONFIG_NUMA
467
468/* on which node is each logical CPU (one cacheline even for 64 CPUs) */
469u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned;
470EXPORT_SYMBOL(cpu_to_node_map);
471/* which logical CPUs are on which nodes */
472cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned;
473
474/*
475 * Build cpu to node mapping and initialize the per node cpu masks.
476 */
477void __init
478build_cpu_to_node_map (void)
479{
480 int cpu, i, node;
481
482 for(node=0; node<MAX_NUMNODES; node++)
483 cpus_clear(node_to_cpu_mask[node]);
484 for(cpu = 0; cpu < NR_CPUS; ++cpu) {
485 /*
486 * All Itanium NUMA platforms I know use ACPI, so maybe we
487 * can drop this ifdef completely. [EF]
488 */
489#ifdef CONFIG_ACPI_NUMA
490 node = -1;
491 for (i = 0; i < NR_CPUS; ++i)
492 if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) {
493 node = node_cpuid[i].nid;
494 break;
495 }
496#else
497# error Fixme: Dunno how to build CPU-to-node map.
498#endif
499 cpu_to_node_map[cpu] = (node >= 0) ? node : 0;
500 if (node >= 0)
501 cpu_set(cpu, node_to_cpu_mask[node]);
502 }
503}
504
505#endif /* CONFIG_NUMA */
506
507/*
508 * Cycle through the APs sending Wakeup IPIs to boot each.
509 */
510void __init
511smp_prepare_cpus (unsigned int max_cpus)
512{
513 int boot_cpu_id = hard_smp_processor_id();
514
515 /*
516 * Initialize the per-CPU profiling counter/multiplier
517 */
518
519 smp_setup_percpu_timer();
520
521 /*
522 * We have the boot CPU online for sure.
523 */
524 cpu_set(0, cpu_online_map);
525 cpu_set(0, cpu_callin_map);
526
527 local_cpu_data->loops_per_jiffy = loops_per_jiffy;
528 ia64_cpu_to_sapicid[0] = boot_cpu_id;
529
530 printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id);
531
532 current_thread_info()->cpu = 0;
533
534 /*
535 * If SMP should be disabled, then really disable it!
536 */
537 if (!max_cpus) {
538 printk(KERN_INFO "SMP mode deactivated.\n");
539 cpus_clear(cpu_online_map);
540 cpus_clear(cpu_present_map);
541 cpus_clear(cpu_possible_map);
542 cpu_set(0, cpu_online_map);
543 cpu_set(0, cpu_present_map);
544 cpu_set(0, cpu_possible_map);
545 return;
546 }
547}
548
549void __devinit smp_prepare_boot_cpu(void)
550{
551 cpu_set(smp_processor_id(), cpu_online_map);
552 cpu_set(smp_processor_id(), cpu_callin_map);
553}
554
555#ifdef CONFIG_HOTPLUG_CPU
556extern void fixup_irqs(void);
557/* must be called with cpucontrol mutex held */
558static int __devinit cpu_enable(unsigned int cpu)
559{
560 per_cpu(cpu_state,cpu) = CPU_UP_PREPARE;
561 wmb();
562
563 while (!cpu_online(cpu))
564 cpu_relax();
565 return 0;
566}
567
568int __cpu_disable(void)
569{
570 int cpu = smp_processor_id();
571
572 /*
573 * dont permit boot processor for now
574 */
575 if (cpu == 0)
576 return -EBUSY;
577
578 fixup_irqs();
579 local_flush_tlb_all();
580 printk ("Disabled cpu %u\n", smp_processor_id());
581 return 0;
582}
583
584void __cpu_die(unsigned int cpu)
585{
586 unsigned int i;
587
588 for (i = 0; i < 100; i++) {
589 /* They ack this in play_dead by setting CPU_DEAD */
590 if (per_cpu(cpu_state, cpu) == CPU_DEAD)
591 {
592 /*
593 * TBD: Enable this when physical removal
594 * or when we put the processor is put in
595 * SAL_BOOT_RENDEZ mode
596 * cpu_clear(cpu, cpu_callin_map);
597 */
598 return;
599 }
600 msleep(100);
601 }
602 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
603}
604#else /* !CONFIG_HOTPLUG_CPU */
605static int __devinit cpu_enable(unsigned int cpu)
606{
607 return 0;
608}
609
610int __cpu_disable(void)
611{
612 return -ENOSYS;
613}
614
615void __cpu_die(unsigned int cpu)
616{
617 /* We said "no" in __cpu_disable */
618 BUG();
619}
620#endif /* CONFIG_HOTPLUG_CPU */
621
622void
623smp_cpus_done (unsigned int dummy)
624{
625 int cpu;
626 unsigned long bogosum = 0;
627
628 /*
629 * Allow the user to impress friends.
630 */
631
632 for (cpu = 0; cpu < NR_CPUS; cpu++)
633 if (cpu_online(cpu))
634 bogosum += cpu_data(cpu)->loops_per_jiffy;
635
636 printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
637 (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100);
638}
639
640int __devinit
641__cpu_up (unsigned int cpu)
642{
643 int ret;
644 int sapicid;
645
646 sapicid = ia64_cpu_to_sapicid[cpu];
647 if (sapicid == -1)
648 return -EINVAL;
649
650 /*
651 * Already booted.. just enable and get outa idle lool
652 */
653 if (cpu_isset(cpu, cpu_callin_map))
654 {
655 cpu_enable(cpu);
656 local_irq_enable();
657 while (!cpu_isset(cpu, cpu_online_map))
658 mb();
659 return 0;
660 }
661 /* Processor goes to start_secondary(), sets online flag */
662 ret = do_boot_cpu(sapicid, cpu);
663 if (ret < 0)
664 return ret;
665
666 return 0;
667}
668
669/*
670 * Assume that CPU's have been discovered by some platform-dependent interface. For
671 * SoftSDV/Lion, that would be ACPI.
672 *
673 * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP().
674 */
675void __init
676init_smp_config(void)
677{
678 struct fptr {
679 unsigned long fp;
680 unsigned long gp;
681 } *ap_startup;
682 long sal_ret;
683
684 /* Tell SAL where to drop the AP's. */
685 ap_startup = (struct fptr *) start_ap;
686 sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ,
687 ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0);
688 if (sal_ret < 0)
689 printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
690 ia64_sal_strerror(sal_ret));
691}
692
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c
new file mode 100644
index 000000000000..3ac216e1c8bb
--- /dev/null
+++ b/arch/ia64/kernel/sys_ia64.c
@@ -0,0 +1,298 @@
1/*
2 * This file contains various system calls that have different calling
3 * conventions on different platforms.
4 *
5 * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 */
8#include <linux/config.h>
9#include <linux/errno.h>
10#include <linux/fs.h>
11#include <linux/mm.h>
12#include <linux/mman.h>
13#include <linux/sched.h>
14#include <linux/shm.h>
15#include <linux/file.h> /* doh, must come after sched.h... */
16#include <linux/smp.h>
17#include <linux/smp_lock.h>
18#include <linux/syscalls.h>
19#include <linux/highuid.h>
20#include <linux/hugetlb.h>
21
22#include <asm/shmparam.h>
23#include <asm/uaccess.h>
24
25unsigned long
26arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len,
27 unsigned long pgoff, unsigned long flags)
28{
29 long map_shared = (flags & MAP_SHARED);
30 unsigned long start_addr, align_mask = PAGE_SIZE - 1;
31 struct mm_struct *mm = current->mm;
32 struct vm_area_struct *vma;
33
34 if (len > RGN_MAP_LIMIT)
35 return -ENOMEM;
36
37#ifdef CONFIG_HUGETLB_PAGE
38 if (REGION_NUMBER(addr) == REGION_HPAGE)
39 addr = 0;
40#endif
41 if (!addr)
42 addr = mm->free_area_cache;
43
44 if (map_shared && (TASK_SIZE > 0xfffffffful))
45 /*
46 * For 64-bit tasks, align shared segments to 1MB to avoid potential
47 * performance penalty due to virtual aliasing (see ASDM). For 32-bit
48 * tasks, we prefer to avoid exhausting the address space too quickly by
49 * limiting alignment to a single page.
50 */
51 align_mask = SHMLBA - 1;
52
53 full_search:
54 start_addr = addr = (addr + align_mask) & ~align_mask;
55
56 for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
57 /* At this point: (!vma || addr < vma->vm_end). */
58 if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) {
59 if (start_addr != TASK_UNMAPPED_BASE) {
60 /* Start a new search --- just in case we missed some holes. */
61 addr = TASK_UNMAPPED_BASE;
62 goto full_search;
63 }
64 return -ENOMEM;
65 }
66 if (!vma || addr + len <= vma->vm_start) {
67 /* Remember the address where we stopped this search: */
68 mm->free_area_cache = addr + len;
69 return addr;
70 }
71 addr = (vma->vm_end + align_mask) & ~align_mask;
72 }
73}
74
75asmlinkage long
76ia64_getpriority (int which, int who)
77{
78 long prio;
79
80 prio = sys_getpriority(which, who);
81 if (prio >= 0) {
82 force_successful_syscall_return();
83 prio = 20 - prio;
84 }
85 return prio;
86}
87
88/* XXX obsolete, but leave it here until the old libc is gone... */
89asmlinkage unsigned long
90sys_getpagesize (void)
91{
92 return PAGE_SIZE;
93}
94
95asmlinkage unsigned long
96ia64_shmat (int shmid, void __user *shmaddr, int shmflg)
97{
98 unsigned long raddr;
99 int retval;
100
101 retval = do_shmat(shmid, shmaddr, shmflg, &raddr);
102 if (retval < 0)
103 return retval;
104
105 force_successful_syscall_return();
106 return raddr;
107}
108
109asmlinkage unsigned long
110ia64_brk (unsigned long brk)
111{
112 unsigned long rlim, retval, newbrk, oldbrk;
113 struct mm_struct *mm = current->mm;
114
115 /*
116 * Most of this replicates the code in sys_brk() except for an additional safety
117 * check and the clearing of r8. However, we can't call sys_brk() because we need
118 * to acquire the mmap_sem before we can do the test...
119 */
120 down_write(&mm->mmap_sem);
121
122 if (brk < mm->end_code)
123 goto out;
124 newbrk = PAGE_ALIGN(brk);
125 oldbrk = PAGE_ALIGN(mm->brk);
126 if (oldbrk == newbrk)
127 goto set_brk;
128
129 /* Always allow shrinking brk. */
130 if (brk <= mm->brk) {
131 if (!do_munmap(mm, newbrk, oldbrk-newbrk))
132 goto set_brk;
133 goto out;
134 }
135
136 /* Check against unimplemented/unmapped addresses: */
137 if ((newbrk - oldbrk) > RGN_MAP_LIMIT || REGION_OFFSET(newbrk) > RGN_MAP_LIMIT)
138 goto out;
139
140 /* Check against rlimit.. */
141 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur;
142 if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim)
143 goto out;
144
145 /* Check against existing mmap mappings. */
146 if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE))
147 goto out;
148
149 /* Ok, looks good - let it rip. */
150 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
151 goto out;
152set_brk:
153 mm->brk = brk;
154out:
155 retval = mm->brk;
156 up_write(&mm->mmap_sem);
157 force_successful_syscall_return();
158 return retval;
159}
160
161/*
162 * On IA-64, we return the two file descriptors in ret0 and ret1 (r8
163 * and r9) as this is faster than doing a copy_to_user().
164 */
165asmlinkage long
166sys_pipe (void)
167{
168 struct pt_regs *regs = ia64_task_regs(current);
169 int fd[2];
170 int retval;
171
172 retval = do_pipe(fd);
173 if (retval)
174 goto out;
175 retval = fd[0];
176 regs->r9 = fd[1];
177 out:
178 return retval;
179}
180
181static inline unsigned long
182do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff)
183{
184 unsigned long roff;
185 struct file *file = NULL;
186
187 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
188 if (!(flags & MAP_ANONYMOUS)) {
189 file = fget(fd);
190 if (!file)
191 return -EBADF;
192
193 if (!file->f_op || !file->f_op->mmap) {
194 addr = -ENODEV;
195 goto out;
196 }
197 }
198
199 /*
200 * A zero mmap always succeeds in Linux, independent of whether or not the
201 * remaining arguments are valid.
202 */
203 if (len == 0)
204 goto out;
205
206 /* Careful about overflows.. */
207 len = PAGE_ALIGN(len);
208 if (!len || len > TASK_SIZE) {
209 addr = -EINVAL;
210 goto out;
211 }
212
213 /*
214 * Don't permit mappings into unmapped space, the virtual page table of a region,
215 * or across a region boundary. Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE
216 * (for some integer n <= 61) and len > 0.
217 */
218 roff = REGION_OFFSET(addr);
219 if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) {
220 addr = -EINVAL;
221 goto out;
222 }
223
224 down_write(&current->mm->mmap_sem);
225 addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
226 up_write(&current->mm->mmap_sem);
227
228out: if (file)
229 fput(file);
230 return addr;
231}
232
233/*
234 * mmap2() is like mmap() except that the offset is expressed in units
235 * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces
236 * of) files that are larger than the address space of the CPU.
237 */
238asmlinkage unsigned long
239sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff)
240{
241 addr = do_mmap2(addr, len, prot, flags, fd, pgoff);
242 if (!IS_ERR((void *) addr))
243 force_successful_syscall_return();
244 return addr;
245}
246
247asmlinkage unsigned long
248sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off)
249{
250 if (offset_in_page(off) != 0)
251 return -EINVAL;
252
253 addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT);
254 if (!IS_ERR((void *) addr))
255 force_successful_syscall_return();
256 return addr;
257}
258
259asmlinkage unsigned long
260ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags,
261 unsigned long new_addr)
262{
263 extern unsigned long do_mremap (unsigned long addr,
264 unsigned long old_len,
265 unsigned long new_len,
266 unsigned long flags,
267 unsigned long new_addr);
268
269 down_write(&current->mm->mmap_sem);
270 {
271 addr = do_mremap(addr, old_len, new_len, flags, new_addr);
272 }
273 up_write(&current->mm->mmap_sem);
274
275 if (IS_ERR((void *) addr))
276 return addr;
277
278 force_successful_syscall_return();
279 return addr;
280}
281
282#ifndef CONFIG_PCI
283
284asmlinkage long
285sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
286 void *buf)
287{
288 return -ENOSYS;
289}
290
291asmlinkage long
292sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len,
293 void *buf)
294{
295 return -ENOSYS;
296}
297
298#endif /* CONFIG_PCI */
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
new file mode 100644
index 000000000000..8b8a5a45b621
--- /dev/null
+++ b/arch/ia64/kernel/time.c
@@ -0,0 +1,255 @@
1/*
2 * linux/arch/ia64/kernel/time.c
3 *
4 * Copyright (C) 1998-2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger <davidm@hpl.hp.com>
7 * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
8 * Copyright (C) 1999-2000 VA Linux Systems
9 * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com>
10 */
11#include <linux/config.h>
12
13#include <linux/cpu.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/module.h>
17#include <linux/profile.h>
18#include <linux/sched.h>
19#include <linux/time.h>
20#include <linux/interrupt.h>
21#include <linux/efi.h>
22#include <linux/profile.h>
23#include <linux/timex.h>
24
25#include <asm/machvec.h>
26#include <asm/delay.h>
27#include <asm/hw_irq.h>
28#include <asm/ptrace.h>
29#include <asm/sal.h>
30#include <asm/sections.h>
31#include <asm/system.h>
32
33extern unsigned long wall_jiffies;
34
35u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES;
36
37EXPORT_SYMBOL(jiffies_64);
38
39#define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */
40
41#ifdef CONFIG_IA64_DEBUG_IRQ
42
43unsigned long last_cli_ip;
44EXPORT_SYMBOL(last_cli_ip);
45
46#endif
47
48static struct time_interpolator itc_interpolator = {
49 .shift = 16,
50 .mask = 0xffffffffffffffffLL,
51 .source = TIME_SOURCE_CPU
52};
53
54static irqreturn_t
55timer_interrupt (int irq, void *dev_id, struct pt_regs *regs)
56{
57 unsigned long new_itm;
58
59 if (unlikely(cpu_is_offline(smp_processor_id()))) {
60 return IRQ_HANDLED;
61 }
62
63 platform_timer_interrupt(irq, dev_id, regs);
64
65 new_itm = local_cpu_data->itm_next;
66
67 if (!time_after(ia64_get_itc(), new_itm))
68 printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n",
69 ia64_get_itc(), new_itm);
70
71 profile_tick(CPU_PROFILING, regs);
72
73 while (1) {
74 update_process_times(user_mode(regs));
75
76 new_itm += local_cpu_data->itm_delta;
77
78 if (smp_processor_id() == TIME_KEEPER_ID) {
79 /*
80 * Here we are in the timer irq handler. We have irqs locally
81 * disabled, but we don't know if the timer_bh is running on
82 * another CPU. We need to avoid to SMP race by acquiring the
83 * xtime_lock.
84 */
85 write_seqlock(&xtime_lock);
86 do_timer(regs);
87 local_cpu_data->itm_next = new_itm;
88 write_sequnlock(&xtime_lock);
89 } else
90 local_cpu_data->itm_next = new_itm;
91
92 if (time_after(new_itm, ia64_get_itc()))
93 break;
94 }
95
96 do {
97 /*
98 * If we're too close to the next clock tick for
99 * comfort, we increase the safety margin by
100 * intentionally dropping the next tick(s). We do NOT
101 * update itm.next because that would force us to call
102 * do_timer() which in turn would let our clock run
103 * too fast (with the potentially devastating effect
104 * of losing monotony of time).
105 */
106 while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2))
107 new_itm += local_cpu_data->itm_delta;
108 ia64_set_itm(new_itm);
109 /* double check, in case we got hit by a (slow) PMI: */
110 } while (time_after_eq(ia64_get_itc(), new_itm));
111 return IRQ_HANDLED;
112}
113
114/*
115 * Encapsulate access to the itm structure for SMP.
116 */
117void
118ia64_cpu_local_tick (void)
119{
120 int cpu = smp_processor_id();
121 unsigned long shift = 0, delta;
122
123 /* arrange for the cycle counter to generate a timer interrupt: */
124 ia64_set_itv(IA64_TIMER_VECTOR);
125
126 delta = local_cpu_data->itm_delta;
127 /*
128 * Stagger the timer tick for each CPU so they don't occur all at (almost) the
129 * same time:
130 */
131 if (cpu) {
132 unsigned long hi = 1UL << ia64_fls(cpu);
133 shift = (2*(cpu - hi) + 1) * delta/hi/2;
134 }
135 local_cpu_data->itm_next = ia64_get_itc() + delta + shift;
136 ia64_set_itm(local_cpu_data->itm_next);
137}
138
139static int nojitter;
140
141static int __init nojitter_setup(char *str)
142{
143 nojitter = 1;
144 printk("Jitter checking for ITC timers disabled\n");
145 return 1;
146}
147
148__setup("nojitter", nojitter_setup);
149
150
151void __devinit
152ia64_init_itm (void)
153{
154 unsigned long platform_base_freq, itc_freq;
155 struct pal_freq_ratio itc_ratio, proc_ratio;
156 long status, platform_base_drift, itc_drift;
157
158 /*
159 * According to SAL v2.6, we need to use a SAL call to determine the platform base
160 * frequency and then a PAL call to determine the frequency ratio between the ITC
161 * and the base frequency.
162 */
163 status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
164 &platform_base_freq, &platform_base_drift);
165 if (status != 0) {
166 printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status));
167 } else {
168 status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio);
169 if (status != 0)
170 printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status);
171 }
172 if (status != 0) {
173 /* invent "random" values */
174 printk(KERN_ERR
175 "SAL/PAL failed to obtain frequency info---inventing reasonable values\n");
176 platform_base_freq = 100000000;
177 platform_base_drift = -1; /* no drift info */
178 itc_ratio.num = 3;
179 itc_ratio.den = 1;
180 }
181 if (platform_base_freq < 40000000) {
182 printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n",
183 platform_base_freq);
184 platform_base_freq = 75000000;
185 platform_base_drift = -1;
186 }
187 if (!proc_ratio.den)
188 proc_ratio.den = 1; /* avoid division by zero */
189 if (!itc_ratio.den)
190 itc_ratio.den = 1; /* avoid division by zero */
191
192 itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den;
193
194 local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ;
195 printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, "
196 "ITC freq=%lu.%03luMHz", smp_processor_id(),
197 platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000,
198 itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000);
199
200 if (platform_base_drift != -1) {
201 itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den;
202 printk("+/-%ldppm\n", itc_drift);
203 } else {
204 itc_drift = -1;
205 printk("\n");
206 }
207
208 local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den;
209 local_cpu_data->itc_freq = itc_freq;
210 local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC;
211 local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT)
212 + itc_freq/2)/itc_freq;
213
214 if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) {
215 itc_interpolator.frequency = local_cpu_data->itc_freq;
216 itc_interpolator.drift = itc_drift;
217#ifdef CONFIG_SMP
218 /* On IA64 in an SMP configuration ITCs are never accurately synchronized.
219 * Jitter compensation requires a cmpxchg which may limit
220 * the scalability of the syscalls for retrieving time.
221 * The ITC synchronization is usually successful to within a few
222 * ITC ticks but this is not a sure thing. If you need to improve
223 * timer performance in SMP situations then boot the kernel with the
224 * "nojitter" option. However, doing so may result in time fluctuating (maybe
225 * even going backward) if the ITC offsets between the individual CPUs
226 * are too large.
227 */
228 if (!nojitter) itc_interpolator.jitter = 1;
229#endif
230 register_time_interpolator(&itc_interpolator);
231 }
232
233 /* Setup the CPU local timer tick */
234 ia64_cpu_local_tick();
235}
236
237static struct irqaction timer_irqaction = {
238 .handler = timer_interrupt,
239 .flags = SA_INTERRUPT,
240 .name = "timer"
241};
242
243void __init
244time_init (void)
245{
246 register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction);
247 efi_gettimeofday(&xtime);
248 ia64_init_itm();
249
250 /*
251 * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the
252 * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC).
253 */
254 set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec);
255}
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c
new file mode 100644
index 000000000000..f1aafd4c05f9
--- /dev/null
+++ b/arch/ia64/kernel/topology.c
@@ -0,0 +1,92 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * This file contains NUMA specific variables and functions which can
7 * be split away from DISCONTIGMEM and are used on NUMA machines with
8 * contiguous memory.
9 * 2002/08/07 Erich Focht <efocht@ess.nec.de>
10 * Populate cpu entries in sysfs for non-numa systems as well
11 * Intel Corporation - Ashok Raj
12 */
13
14#include <linux/config.h>
15#include <linux/cpu.h>
16#include <linux/kernel.h>
17#include <linux/mm.h>
18#include <linux/node.h>
19#include <linux/init.h>
20#include <linux/bootmem.h>
21#include <linux/nodemask.h>
22#include <asm/mmzone.h>
23#include <asm/numa.h>
24#include <asm/cpu.h>
25
26#ifdef CONFIG_NUMA
27static struct node *sysfs_nodes;
28#endif
29static struct ia64_cpu *sysfs_cpus;
30
31int arch_register_cpu(int num)
32{
33 struct node *parent = NULL;
34
35#ifdef CONFIG_NUMA
36 parent = &sysfs_nodes[cpu_to_node(num)];
37#endif /* CONFIG_NUMA */
38
39 return register_cpu(&sysfs_cpus[num].cpu, num, parent);
40}
41
42#ifdef CONFIG_HOTPLUG_CPU
43
44void arch_unregister_cpu(int num)
45{
46 struct node *parent = NULL;
47
48#ifdef CONFIG_NUMA
49 int node = cpu_to_node(num);
50 parent = &sysfs_nodes[node];
51#endif /* CONFIG_NUMA */
52
53 return unregister_cpu(&sysfs_cpus[num].cpu, parent);
54}
55EXPORT_SYMBOL(arch_register_cpu);
56EXPORT_SYMBOL(arch_unregister_cpu);
57#endif /*CONFIG_HOTPLUG_CPU*/
58
59
60static int __init topology_init(void)
61{
62 int i, err = 0;
63
64#ifdef CONFIG_NUMA
65 sysfs_nodes = kmalloc(sizeof(struct node) * MAX_NUMNODES, GFP_KERNEL);
66 if (!sysfs_nodes) {
67 err = -ENOMEM;
68 goto out;
69 }
70 memset(sysfs_nodes, 0, sizeof(struct node) * MAX_NUMNODES);
71
72 /* MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes? */
73 for_each_online_node(i)
74 if ((err = register_node(&sysfs_nodes[i], i, 0)))
75 goto out;
76#endif
77
78 sysfs_cpus = kmalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL);
79 if (!sysfs_cpus) {
80 err = -ENOMEM;
81 goto out;
82 }
83 memset(sysfs_cpus, 0, sizeof(struct ia64_cpu) * NR_CPUS);
84
85 for_each_present_cpu(i)
86 if((err = arch_register_cpu(i)))
87 goto out;
88out:
89 return err;
90}
91
92__initcall(topology_init);
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c
new file mode 100644
index 000000000000..e82ad78081b3
--- /dev/null
+++ b/arch/ia64/kernel/traps.c
@@ -0,0 +1,609 @@
1/*
2 * Architecture-specific trap handling.
3 *
4 * Copyright (C) 1998-2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE
8 */
9
10#include <linux/config.h>
11#include <linux/kernel.h>
12#include <linux/init.h>
13#include <linux/sched.h>
14#include <linux/tty.h>
15#include <linux/vt_kern.h> /* For unblank_screen() */
16#include <linux/module.h> /* for EXPORT_SYMBOL */
17#include <linux/hardirq.h>
18
19#include <asm/fpswa.h>
20#include <asm/ia32.h>
21#include <asm/intrinsics.h>
22#include <asm/processor.h>
23#include <asm/uaccess.h>
24
25extern spinlock_t timerlist_lock;
26
27fpswa_interface_t *fpswa_interface;
28EXPORT_SYMBOL(fpswa_interface);
29
30void __init
31trap_init (void)
32{
33 if (ia64_boot_param->fpswa)
34 /* FPSWA fixup: make the interface pointer a kernel virtual address: */
35 fpswa_interface = __va(ia64_boot_param->fpswa);
36}
37
38/*
39 * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock
40 * is acquired through the console unblank code)
41 */
42void
43bust_spinlocks (int yes)
44{
45 int loglevel_save = console_loglevel;
46
47 if (yes) {
48 oops_in_progress = 1;
49 return;
50 }
51
52#ifdef CONFIG_VT
53 unblank_screen();
54#endif
55 oops_in_progress = 0;
56 /*
57 * OK, the message is on the console. Now we call printk() without
58 * oops_in_progress set so that printk will give klogd a poke. Hold onto
59 * your hats...
60 */
61 console_loglevel = 15; /* NMI oopser may have shut the console up */
62 printk(" ");
63 console_loglevel = loglevel_save;
64}
65
66void
67die (const char *str, struct pt_regs *regs, long err)
68{
69 static struct {
70 spinlock_t lock;
71 u32 lock_owner;
72 int lock_owner_depth;
73 } die = {
74 .lock = SPIN_LOCK_UNLOCKED,
75 .lock_owner = -1,
76 .lock_owner_depth = 0
77 };
78 static int die_counter;
79
80 if (die.lock_owner != smp_processor_id()) {
81 console_verbose();
82 spin_lock_irq(&die.lock);
83 die.lock_owner = smp_processor_id();
84 die.lock_owner_depth = 0;
85 bust_spinlocks(1);
86 }
87
88 if (++die.lock_owner_depth < 3) {
89 printk("%s[%d]: %s %ld [%d]\n",
90 current->comm, current->pid, str, err, ++die_counter);
91 show_regs(regs);
92 } else
93 printk(KERN_ERR "Recursive die() failure, output suppressed\n");
94
95 bust_spinlocks(0);
96 die.lock_owner = -1;
97 spin_unlock_irq(&die.lock);
98 do_exit(SIGSEGV);
99}
100
101void
102die_if_kernel (char *str, struct pt_regs *regs, long err)
103{
104 if (!user_mode(regs))
105 die(str, regs, err);
106}
107
108void
109ia64_bad_break (unsigned long break_num, struct pt_regs *regs)
110{
111 siginfo_t siginfo;
112 int sig, code;
113
114 /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */
115 siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
116 siginfo.si_imm = break_num;
117 siginfo.si_flags = 0; /* clear __ISR_VALID */
118 siginfo.si_isr = 0;
119
120 switch (break_num) {
121 case 0: /* unknown error (used by GCC for __builtin_abort()) */
122 die_if_kernel("bugcheck!", regs, break_num);
123 sig = SIGILL; code = ILL_ILLOPC;
124 break;
125
126 case 1: /* integer divide by zero */
127 sig = SIGFPE; code = FPE_INTDIV;
128 break;
129
130 case 2: /* integer overflow */
131 sig = SIGFPE; code = FPE_INTOVF;
132 break;
133
134 case 3: /* range check/bounds check */
135 sig = SIGFPE; code = FPE_FLTSUB;
136 break;
137
138 case 4: /* null pointer dereference */
139 sig = SIGSEGV; code = SEGV_MAPERR;
140 break;
141
142 case 5: /* misaligned data */
143 sig = SIGSEGV; code = BUS_ADRALN;
144 break;
145
146 case 6: /* decimal overflow */
147 sig = SIGFPE; code = __FPE_DECOVF;
148 break;
149
150 case 7: /* decimal divide by zero */
151 sig = SIGFPE; code = __FPE_DECDIV;
152 break;
153
154 case 8: /* packed decimal error */
155 sig = SIGFPE; code = __FPE_DECERR;
156 break;
157
158 case 9: /* invalid ASCII digit */
159 sig = SIGFPE; code = __FPE_INVASC;
160 break;
161
162 case 10: /* invalid decimal digit */
163 sig = SIGFPE; code = __FPE_INVDEC;
164 break;
165
166 case 11: /* paragraph stack overflow */
167 sig = SIGSEGV; code = __SEGV_PSTKOVF;
168 break;
169
170 case 0x3f000 ... 0x3ffff: /* bundle-update in progress */
171 sig = SIGILL; code = __ILL_BNDMOD;
172 break;
173
174 default:
175 if (break_num < 0x40000 || break_num > 0x100000)
176 die_if_kernel("Bad break", regs, break_num);
177
178 if (break_num < 0x80000) {
179 sig = SIGILL; code = __ILL_BREAK;
180 } else {
181 sig = SIGTRAP; code = TRAP_BRKPT;
182 }
183 }
184 siginfo.si_signo = sig;
185 siginfo.si_errno = 0;
186 siginfo.si_code = code;
187 force_sig_info(sig, &siginfo, current);
188}
189
190/*
191 * disabled_fph_fault() is called when a user-level process attempts to access f32..f127
192 * and it doesn't own the fp-high register partition. When this happens, we save the
193 * current fph partition in the task_struct of the fpu-owner (if necessary) and then load
194 * the fp-high partition of the current task (if necessary). Note that the kernel has
195 * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes
196 * care of clearing psr.dfh.
197 */
198static inline void
199disabled_fph_fault (struct pt_regs *regs)
200{
201 struct ia64_psr *psr = ia64_psr(regs);
202
203 /* first, grant user-level access to fph partition: */
204 psr->dfh = 0;
205#ifndef CONFIG_SMP
206 {
207 struct task_struct *fpu_owner
208 = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER);
209
210 if (ia64_is_local_fpu_owner(current))
211 return;
212
213 if (fpu_owner)
214 ia64_flush_fph(fpu_owner);
215 }
216#endif /* !CONFIG_SMP */
217 ia64_set_local_fpu_owner(current);
218 if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) {
219 __ia64_load_fpu(current->thread.fph);
220 psr->mfh = 0;
221 } else {
222 __ia64_init_fpu();
223 /*
224 * Set mfh because the state in thread.fph does not match the state in
225 * the fph partition.
226 */
227 psr->mfh = 1;
228 }
229}
230
231static inline int
232fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs,
233 struct pt_regs *regs)
234{
235 fp_state_t fp_state;
236 fpswa_ret_t ret;
237
238 if (!fpswa_interface)
239 return -1;
240
241 memset(&fp_state, 0, sizeof(fp_state_t));
242
243 /*
244 * compute fp_state. only FP registers f6 - f11 are used by the
245 * kernel, so set those bits in the mask and set the low volatile
246 * pointer to point to these registers.
247 */
248 fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */
249
250 fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
251 /*
252 * unsigned long (*EFI_FPSWA) (
253 * unsigned long trap_type,
254 * void *Bundle,
255 * unsigned long *pipsr,
256 * unsigned long *pfsr,
257 * unsigned long *pisr,
258 * unsigned long *ppreds,
259 * unsigned long *pifs,
260 * void *fp_state);
261 */
262 ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle,
263 (unsigned long *) ipsr, (unsigned long *) fpsr,
264 (unsigned long *) isr, (unsigned long *) pr,
265 (unsigned long *) ifs, &fp_state);
266
267 return ret.status;
268}
269
270/*
271 * Handle floating-point assist faults and traps.
272 */
273static int
274handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr)
275{
276 long exception, bundle[2];
277 unsigned long fault_ip;
278 struct siginfo siginfo;
279 static int fpu_swa_count = 0;
280 static unsigned long last_time;
281
282 fault_ip = regs->cr_iip;
283 if (!fp_fault && (ia64_psr(regs)->ri == 0))
284 fault_ip -= 16;
285 if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle)))
286 return -1;
287
288 if (jiffies - last_time > 5*HZ)
289 fpu_swa_count = 0;
290 if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) {
291 last_time = jiffies;
292 ++fpu_swa_count;
293 printk(KERN_WARNING
294 "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n",
295 current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr);
296 }
297
298 exception = fp_emulate(fp_fault, bundle, &regs->cr_ipsr, &regs->ar_fpsr, &isr, &regs->pr,
299 &regs->cr_ifs, regs);
300 if (fp_fault) {
301 if (exception == 0) {
302 /* emulation was successful */
303 ia64_increment_ip(regs);
304 } else if (exception == -1) {
305 printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
306 return -1;
307 } else {
308 /* is next instruction a trap? */
309 if (exception & 2) {
310 ia64_increment_ip(regs);
311 }
312 siginfo.si_signo = SIGFPE;
313 siginfo.si_errno = 0;
314 siginfo.si_code = __SI_FAULT; /* default code */
315 siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
316 if (isr & 0x11) {
317 siginfo.si_code = FPE_FLTINV;
318 } else if (isr & 0x22) {
319 /* denormal operand gets the same si_code as underflow
320 * see arch/i386/kernel/traps.c:math_error() */
321 siginfo.si_code = FPE_FLTUND;
322 } else if (isr & 0x44) {
323 siginfo.si_code = FPE_FLTDIV;
324 }
325 siginfo.si_isr = isr;
326 siginfo.si_flags = __ISR_VALID;
327 siginfo.si_imm = 0;
328 force_sig_info(SIGFPE, &siginfo, current);
329 }
330 } else {
331 if (exception == -1) {
332 printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n");
333 return -1;
334 } else if (exception != 0) {
335 /* raise exception */
336 siginfo.si_signo = SIGFPE;
337 siginfo.si_errno = 0;
338 siginfo.si_code = __SI_FAULT; /* default code */
339 siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri);
340 if (isr & 0x880) {
341 siginfo.si_code = FPE_FLTOVF;
342 } else if (isr & 0x1100) {
343 siginfo.si_code = FPE_FLTUND;
344 } else if (isr & 0x2200) {
345 siginfo.si_code = FPE_FLTRES;
346 }
347 siginfo.si_isr = isr;
348 siginfo.si_flags = __ISR_VALID;
349 siginfo.si_imm = 0;
350 force_sig_info(SIGFPE, &siginfo, current);
351 }
352 }
353 return 0;
354}
355
356struct illegal_op_return {
357 unsigned long fkt, arg1, arg2, arg3;
358};
359
360struct illegal_op_return
361ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3,
362 long arg4, long arg5, long arg6, long arg7,
363 struct pt_regs regs)
364{
365 struct illegal_op_return rv;
366 struct siginfo si;
367 char buf[128];
368
369#ifdef CONFIG_IA64_BRL_EMU
370 {
371 extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long);
372
373 rv = ia64_emulate_brl(&regs, ec);
374 if (rv.fkt != (unsigned long) -1)
375 return rv;
376 }
377#endif
378
379 sprintf(buf, "IA-64 Illegal operation fault");
380 die_if_kernel(buf, &regs, 0);
381
382 memset(&si, 0, sizeof(si));
383 si.si_signo = SIGILL;
384 si.si_code = ILL_ILLOPC;
385 si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(&regs)->ri);
386 force_sig_info(SIGILL, &si, current);
387 rv.fkt = 0;
388 return rv;
389}
390
391void
392ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa,
393 unsigned long iim, unsigned long itir, long arg5, long arg6,
394 long arg7, struct pt_regs regs)
395{
396 unsigned long code, error = isr, iip;
397 struct siginfo siginfo;
398 char buf[128];
399 int result, sig;
400 static const char *reason[] = {
401 "IA-64 Illegal Operation fault",
402 "IA-64 Privileged Operation fault",
403 "IA-64 Privileged Register fault",
404 "IA-64 Reserved Register/Field fault",
405 "Disabled Instruction Set Transition fault",
406 "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault",
407 "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12",
408 "Unknown fault 13", "Unknown fault 14", "Unknown fault 15"
409 };
410
411 if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) {
412 /*
413 * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel
414 * the lfetch.
415 */
416 ia64_psr(&regs)->ed = 1;
417 return;
418 }
419
420 iip = regs.cr_iip + ia64_psr(&regs)->ri;
421
422 switch (vector) {
423 case 24: /* General Exception */
424 code = (isr >> 4) & 0xf;
425 sprintf(buf, "General Exception: %s%s", reason[code],
426 (code == 3) ? ((isr & (1UL << 37))
427 ? " (RSE access)" : " (data access)") : "");
428 if (code == 8) {
429# ifdef CONFIG_IA64_PRINT_HAZARDS
430 printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n",
431 current->comm, current->pid,
432 regs.cr_iip + ia64_psr(&regs)->ri, regs.pr);
433# endif
434 return;
435 }
436 break;
437
438 case 25: /* Disabled FP-Register */
439 if (isr & 2) {
440 disabled_fph_fault(&regs);
441 return;
442 }
443 sprintf(buf, "Disabled FPL fault---not supposed to happen!");
444 break;
445
446 case 26: /* NaT Consumption */
447 if (user_mode(&regs)) {
448 void __user *addr;
449
450 if (((isr >> 4) & 0xf) == 2) {
451 /* NaT page consumption */
452 sig = SIGSEGV;
453 code = SEGV_ACCERR;
454 addr = (void __user *) ifa;
455 } else {
456 /* register NaT consumption */
457 sig = SIGILL;
458 code = ILL_ILLOPN;
459 addr = (void __user *) (regs.cr_iip
460 + ia64_psr(&regs)->ri);
461 }
462 siginfo.si_signo = sig;
463 siginfo.si_code = code;
464 siginfo.si_errno = 0;
465 siginfo.si_addr = addr;
466 siginfo.si_imm = vector;
467 siginfo.si_flags = __ISR_VALID;
468 siginfo.si_isr = isr;
469 force_sig_info(sig, &siginfo, current);
470 return;
471 } else if (ia64_done_with_exception(&regs))
472 return;
473 sprintf(buf, "NaT consumption");
474 break;
475
476 case 31: /* Unsupported Data Reference */
477 if (user_mode(&regs)) {
478 siginfo.si_signo = SIGILL;
479 siginfo.si_code = ILL_ILLOPN;
480 siginfo.si_errno = 0;
481 siginfo.si_addr = (void __user *) iip;
482 siginfo.si_imm = vector;
483 siginfo.si_flags = __ISR_VALID;
484 siginfo.si_isr = isr;
485 force_sig_info(SIGILL, &siginfo, current);
486 return;
487 }
488 sprintf(buf, "Unsupported data reference");
489 break;
490
491 case 29: /* Debug */
492 case 35: /* Taken Branch Trap */
493 case 36: /* Single Step Trap */
494 if (fsys_mode(current, &regs)) {
495 extern char __kernel_syscall_via_break[];
496 /*
497 * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap
498 * need special handling; Debug trap is not supposed to happen.
499 */
500 if (unlikely(vector == 29)) {
501 die("Got debug trap in fsys-mode---not supposed to happen!",
502 &regs, 0);
503 return;
504 }
505 /* re-do the system call via break 0x100000: */
506 regs.cr_iip = (unsigned long) __kernel_syscall_via_break;
507 ia64_psr(&regs)->ri = 0;
508 ia64_psr(&regs)->cpl = 3;
509 return;
510 }
511 switch (vector) {
512 case 29:
513 siginfo.si_code = TRAP_HWBKPT;
514#ifdef CONFIG_ITANIUM
515 /*
516 * Erratum 10 (IFA may contain incorrect address) now has
517 * "NoFix" status. There are no plans for fixing this.
518 */
519 if (ia64_psr(&regs)->is == 0)
520 ifa = regs.cr_iip;
521#endif
522 break;
523 case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break;
524 case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break;
525 }
526 siginfo.si_signo = SIGTRAP;
527 siginfo.si_errno = 0;
528 siginfo.si_addr = (void __user *) ifa;
529 siginfo.si_imm = 0;
530 siginfo.si_flags = __ISR_VALID;
531 siginfo.si_isr = isr;
532 force_sig_info(SIGTRAP, &siginfo, current);
533 return;
534
535 case 32: /* fp fault */
536 case 33: /* fp trap */
537 result = handle_fpu_swa((vector == 32) ? 1 : 0, &regs, isr);
538 if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) {
539 siginfo.si_signo = SIGFPE;
540 siginfo.si_errno = 0;
541 siginfo.si_code = FPE_FLTINV;
542 siginfo.si_addr = (void __user *) iip;
543 siginfo.si_flags = __ISR_VALID;
544 siginfo.si_isr = isr;
545 siginfo.si_imm = 0;
546 force_sig_info(SIGFPE, &siginfo, current);
547 }
548 return;
549
550 case 34:
551 if (isr & 0x2) {
552 /* Lower-Privilege Transfer Trap */
553 /*
554 * Just clear PSR.lp and then return immediately: all the
555 * interesting work (e.g., signal delivery is done in the kernel
556 * exit path).
557 */
558 ia64_psr(&regs)->lp = 0;
559 return;
560 } else {
561 /* Unimplemented Instr. Address Trap */
562 if (user_mode(&regs)) {
563 siginfo.si_signo = SIGILL;
564 siginfo.si_code = ILL_BADIADDR;
565 siginfo.si_errno = 0;
566 siginfo.si_flags = 0;
567 siginfo.si_isr = 0;
568 siginfo.si_imm = 0;
569 siginfo.si_addr = (void __user *) iip;
570 force_sig_info(SIGILL, &siginfo, current);
571 return;
572 }
573 sprintf(buf, "Unimplemented Instruction Address fault");
574 }
575 break;
576
577 case 45:
578#ifdef CONFIG_IA32_SUPPORT
579 if (ia32_exception(&regs, isr) == 0)
580 return;
581#endif
582 printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n");
583 printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n",
584 iip, ifa, isr);
585 force_sig(SIGSEGV, current);
586 break;
587
588 case 46:
589#ifdef CONFIG_IA32_SUPPORT
590 if (ia32_intercept(&regs, isr) == 0)
591 return;
592#endif
593 printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n");
594 printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n",
595 iip, ifa, isr, iim);
596 force_sig(SIGSEGV, current);
597 return;
598
599 case 47:
600 sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16);
601 break;
602
603 default:
604 sprintf(buf, "Fault %lu", vector);
605 break;
606 }
607 die_if_kernel(buf, &regs, error);
608 force_sig(SIGILL, current);
609}
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c
new file mode 100644
index 000000000000..43b45b65ee5a
--- /dev/null
+++ b/arch/ia64/kernel/unaligned.c
@@ -0,0 +1,1521 @@
1/*
2 * Architecture-specific unaligned trap handling.
3 *
4 * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 * David Mosberger-Tang <davidm@hpl.hp.com>
7 *
8 * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
9 * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
10 * stacked register returns an undefined value; it does NOT trigger a
11 * "rsvd register fault").
12 * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
13 * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
14 * 2001/01/17 Add support emulation of unaligned kernel accesses.
15 */
16#include <linux/kernel.h>
17#include <linux/sched.h>
18#include <linux/smp_lock.h>
19#include <linux/tty.h>
20
21#include <asm/intrinsics.h>
22#include <asm/processor.h>
23#include <asm/rse.h>
24#include <asm/uaccess.h>
25#include <asm/unaligned.h>
26
27extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn));
28
29#undef DEBUG_UNALIGNED_TRAP
30
31#ifdef DEBUG_UNALIGNED_TRAP
32# define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0)
33# define DDUMP(str,vp,len) dump(str, vp, len)
34
35static void
36dump (const char *str, void *vp, size_t len)
37{
38 unsigned char *cp = vp;
39 int i;
40
41 printk("%s", str);
42 for (i = 0; i < len; ++i)
43 printk (" %02x", *cp++);
44 printk("\n");
45}
46#else
47# define DPRINT(a...)
48# define DDUMP(str,vp,len)
49#endif
50
51#define IA64_FIRST_STACKED_GR 32
52#define IA64_FIRST_ROTATING_FR 32
53#define SIGN_EXT9 0xffffffffffffff00ul
54
55/*
56 * For M-unit:
57 *
58 * opcode | m | x6 |
59 * --------|------|---------|
60 * [40-37] | [36] | [35:30] |
61 * --------|------|---------|
62 * 4 | 1 | 6 | = 11 bits
63 * --------------------------
64 * However bits [31:30] are not directly useful to distinguish between
65 * load/store so we can use [35:32] instead, which gives the following
66 * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
67 * checking the m-bit until later in the load/store emulation.
68 */
69#define IA64_OPCODE_MASK 0x1ef
70#define IA64_OPCODE_SHIFT 32
71
72/*
73 * Table C-28 Integer Load/Store
74 *
75 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
76 *
77 * ld8.fill, st8.fill MUST be aligned because the RNATs are based on
78 * the address (bits [8:3]), so we must failed.
79 */
80#define LD_OP 0x080
81#define LDS_OP 0x081
82#define LDA_OP 0x082
83#define LDSA_OP 0x083
84#define LDBIAS_OP 0x084
85#define LDACQ_OP 0x085
86/* 0x086, 0x087 are not relevant */
87#define LDCCLR_OP 0x088
88#define LDCNC_OP 0x089
89#define LDCCLRACQ_OP 0x08a
90#define ST_OP 0x08c
91#define STREL_OP 0x08d
92/* 0x08e,0x8f are not relevant */
93
94/*
95 * Table C-29 Integer Load +Reg
96 *
97 * we use the ld->m (bit [36:36]) field to determine whether or not we have
98 * a load/store of this form.
99 */
100
101/*
102 * Table C-30 Integer Load/Store +Imm
103 *
104 * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
105 *
106 * ld8.fill, st8.fill must be aligned because the Nat register are based on
107 * the address, so we must fail and the program must be fixed.
108 */
109#define LD_IMM_OP 0x0a0
110#define LDS_IMM_OP 0x0a1
111#define LDA_IMM_OP 0x0a2
112#define LDSA_IMM_OP 0x0a3
113#define LDBIAS_IMM_OP 0x0a4
114#define LDACQ_IMM_OP 0x0a5
115/* 0x0a6, 0xa7 are not relevant */
116#define LDCCLR_IMM_OP 0x0a8
117#define LDCNC_IMM_OP 0x0a9
118#define LDCCLRACQ_IMM_OP 0x0aa
119#define ST_IMM_OP 0x0ac
120#define STREL_IMM_OP 0x0ad
121/* 0x0ae,0xaf are not relevant */
122
123/*
124 * Table C-32 Floating-point Load/Store
125 */
126#define LDF_OP 0x0c0
127#define LDFS_OP 0x0c1
128#define LDFA_OP 0x0c2
129#define LDFSA_OP 0x0c3
130/* 0x0c6 is irrelevant */
131#define LDFCCLR_OP 0x0c8
132#define LDFCNC_OP 0x0c9
133/* 0x0cb is irrelevant */
134#define STF_OP 0x0cc
135
136/*
137 * Table C-33 Floating-point Load +Reg
138 *
139 * we use the ld->m (bit [36:36]) field to determine whether or not we have
140 * a load/store of this form.
141 */
142
143/*
144 * Table C-34 Floating-point Load/Store +Imm
145 */
146#define LDF_IMM_OP 0x0e0
147#define LDFS_IMM_OP 0x0e1
148#define LDFA_IMM_OP 0x0e2
149#define LDFSA_IMM_OP 0x0e3
150/* 0x0e6 is irrelevant */
151#define LDFCCLR_IMM_OP 0x0e8
152#define LDFCNC_IMM_OP 0x0e9
153#define STF_IMM_OP 0x0ec
154
155typedef struct {
156 unsigned long qp:6; /* [0:5] */
157 unsigned long r1:7; /* [6:12] */
158 unsigned long imm:7; /* [13:19] */
159 unsigned long r3:7; /* [20:26] */
160 unsigned long x:1; /* [27:27] */
161 unsigned long hint:2; /* [28:29] */
162 unsigned long x6_sz:2; /* [30:31] */
163 unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */
164 unsigned long m:1; /* [36:36] */
165 unsigned long op:4; /* [37:40] */
166 unsigned long pad:23; /* [41:63] */
167} load_store_t;
168
169
170typedef enum {
171 UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
172 UPD_REG /* ldXZ r1=[r3],r2 */
173} update_t;
174
175/*
176 * We use tables to keep track of the offsets of registers in the saved state.
177 * This way we save having big switch/case statements.
178 *
179 * We use bit 0 to indicate switch_stack or pt_regs.
180 * The offset is simply shifted by 1 bit.
181 * A 2-byte value should be enough to hold any kind of offset
182 *
183 * In case the calling convention changes (and thus pt_regs/switch_stack)
184 * simply use RSW instead of RPT or vice-versa.
185 */
186
187#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
188#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
189
190#define RPT(x) (RPO(x) << 1)
191#define RSW(x) (1| RSO(x)<<1)
192
193#define GR_OFFS(x) (gr_info[x]>>1)
194#define GR_IN_SW(x) (gr_info[x] & 0x1)
195
196#define FR_OFFS(x) (fr_info[x]>>1)
197#define FR_IN_SW(x) (fr_info[x] & 0x1)
198
199static u16 gr_info[32]={
200 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
201
202 RPT(r1), RPT(r2), RPT(r3),
203
204 RSW(r4), RSW(r5), RSW(r6), RSW(r7),
205
206 RPT(r8), RPT(r9), RPT(r10), RPT(r11),
207 RPT(r12), RPT(r13), RPT(r14), RPT(r15),
208
209 RPT(r16), RPT(r17), RPT(r18), RPT(r19),
210 RPT(r20), RPT(r21), RPT(r22), RPT(r23),
211 RPT(r24), RPT(r25), RPT(r26), RPT(r27),
212 RPT(r28), RPT(r29), RPT(r30), RPT(r31)
213};
214
215static u16 fr_info[32]={
216 0, /* constant : WE SHOULD NEVER GET THIS */
217 0, /* constant : WE SHOULD NEVER GET THIS */
218
219 RSW(f2), RSW(f3), RSW(f4), RSW(f5),
220
221 RPT(f6), RPT(f7), RPT(f8), RPT(f9),
222 RPT(f10), RPT(f11),
223
224 RSW(f12), RSW(f13), RSW(f14),
225 RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
226 RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
227 RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
228 RSW(f30), RSW(f31)
229};
230
231/* Invalidate ALAT entry for integer register REGNO. */
232static void
233invala_gr (int regno)
234{
235# define F(reg) case reg: ia64_invala_gr(reg); break
236
237 switch (regno) {
238 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
239 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
240 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
241 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
242 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
243 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
244 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
245 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
246 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
247 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
248 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
249 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
250 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
251 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
252 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
253 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
254 }
255# undef F
256}
257
258/* Invalidate ALAT entry for floating-point register REGNO. */
259static void
260invala_fr (int regno)
261{
262# define F(reg) case reg: ia64_invala_fr(reg); break
263
264 switch (regno) {
265 F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
266 F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
267 F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
268 F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
269 F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
270 F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
271 F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
272 F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
273 F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
274 F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
275 F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
276 F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
277 F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
278 F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
279 F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
280 F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
281 }
282# undef F
283}
284
285static inline unsigned long
286rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
287{
288 reg += rrb;
289 if (reg >= sor)
290 reg -= sor;
291 return reg;
292}
293
294static void
295set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
296{
297 struct switch_stack *sw = (struct switch_stack *) regs - 1;
298 unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end;
299 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
300 unsigned long rnats, nat_mask;
301 unsigned long on_kbs;
302 long sof = (regs->cr_ifs) & 0x7f;
303 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
304 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
305 long ridx = r1 - 32;
306
307 if (ridx >= sof) {
308 /* this should never happen, as the "rsvd register fault" has higher priority */
309 DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
310 return;
311 }
312
313 if (ridx < sor)
314 ridx = rotate_reg(sor, rrb_gr, ridx);
315
316 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
317 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
318
319 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
320 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
321 if (addr >= kbs) {
322 /* the register is on the kernel backing store: easy... */
323 rnat_addr = ia64_rse_rnat_addr(addr);
324 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
325 rnat_addr = &sw->ar_rnat;
326 nat_mask = 1UL << ia64_rse_slot_num(addr);
327
328 *addr = val;
329 if (nat)
330 *rnat_addr |= nat_mask;
331 else
332 *rnat_addr &= ~nat_mask;
333 return;
334 }
335
336 if (!user_stack(current, regs)) {
337 DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
338 return;
339 }
340
341 bspstore = (unsigned long *)regs->ar_bspstore;
342 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
343 bsp = ia64_rse_skip_regs(ubs_end, -sof);
344 addr = ia64_rse_skip_regs(bsp, ridx);
345
346 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
347
348 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
349
350 rnat_addr = ia64_rse_rnat_addr(addr);
351
352 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
353 DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
354 (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
355
356 nat_mask = 1UL << ia64_rse_slot_num(addr);
357 if (nat)
358 rnats |= nat_mask;
359 else
360 rnats &= ~nat_mask;
361 ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
362
363 DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
364}
365
366
367static void
368get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat)
369{
370 struct switch_stack *sw = (struct switch_stack *) regs - 1;
371 unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore;
372 unsigned long *kbs = (void *) current + IA64_RBS_OFFSET;
373 unsigned long rnats, nat_mask;
374 unsigned long on_kbs;
375 long sof = (regs->cr_ifs) & 0x7f;
376 long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
377 long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
378 long ridx = r1 - 32;
379
380 if (ridx >= sof) {
381 /* read of out-of-frame register returns an undefined value; 0 in our case. */
382 DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
383 goto fail;
384 }
385
386 if (ridx < sor)
387 ridx = rotate_reg(sor, rrb_gr, ridx);
388
389 DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
390 r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
391
392 on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
393 addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
394 if (addr >= kbs) {
395 /* the register is on the kernel backing store: easy... */
396 *val = *addr;
397 if (nat) {
398 rnat_addr = ia64_rse_rnat_addr(addr);
399 if ((unsigned long) rnat_addr >= sw->ar_bspstore)
400 rnat_addr = &sw->ar_rnat;
401 nat_mask = 1UL << ia64_rse_slot_num(addr);
402 *nat = (*rnat_addr & nat_mask) != 0;
403 }
404 return;
405 }
406
407 if (!user_stack(current, regs)) {
408 DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
409 goto fail;
410 }
411
412 bspstore = (unsigned long *)regs->ar_bspstore;
413 ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
414 bsp = ia64_rse_skip_regs(ubs_end, -sof);
415 addr = ia64_rse_skip_regs(bsp, ridx);
416
417 DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr);
418
419 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
420
421 if (nat) {
422 rnat_addr = ia64_rse_rnat_addr(addr);
423 nat_mask = 1UL << ia64_rse_slot_num(addr);
424
425 DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
426
427 ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
428 *nat = (rnats & nat_mask) != 0;
429 }
430 return;
431
432 fail:
433 *val = 0;
434 if (nat)
435 *nat = 0;
436 return;
437}
438
439
440static void
441setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
442{
443 struct switch_stack *sw = (struct switch_stack *) regs - 1;
444 unsigned long addr;
445 unsigned long bitmask;
446 unsigned long *unat;
447
448 /*
449 * First takes care of stacked registers
450 */
451 if (regnum >= IA64_FIRST_STACKED_GR) {
452 set_rse_reg(regs, regnum, val, nat);
453 return;
454 }
455
456 /*
457 * Using r0 as a target raises a General Exception fault which has higher priority
458 * than the Unaligned Reference fault.
459 */
460
461 /*
462 * Now look at registers in [0-31] range and init correct UNAT
463 */
464 if (GR_IN_SW(regnum)) {
465 addr = (unsigned long)sw;
466 unat = &sw->ar_unat;
467 } else {
468 addr = (unsigned long)regs;
469 unat = &sw->caller_unat;
470 }
471 DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
472 addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
473 /*
474 * add offset from base of struct
475 * and do it !
476 */
477 addr += GR_OFFS(regnum);
478
479 *(unsigned long *)addr = val;
480
481 /*
482 * We need to clear the corresponding UNAT bit to fully emulate the load
483 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
484 */
485 bitmask = 1UL << (addr >> 3 & 0x3f);
486 DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat);
487 if (nat) {
488 *unat |= bitmask;
489 } else {
490 *unat &= ~bitmask;
491 }
492 DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat);
493}
494
495/*
496 * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
497 * range from 32-127, result is in the range from 0-95.
498 */
499static inline unsigned long
500fph_index (struct pt_regs *regs, long regnum)
501{
502 unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
503 return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
504}
505
506static void
507setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
508{
509 struct switch_stack *sw = (struct switch_stack *)regs - 1;
510 unsigned long addr;
511
512 /*
513 * From EAS-2.5: FPDisableFault has higher priority than Unaligned
514 * Fault. Thus, when we get here, we know the partition is enabled.
515 * To update f32-f127, there are three choices:
516 *
517 * (1) save f32-f127 to thread.fph and update the values there
518 * (2) use a gigantic switch statement to directly access the registers
519 * (3) generate code on the fly to update the desired register
520 *
521 * For now, we are using approach (1).
522 */
523 if (regnum >= IA64_FIRST_ROTATING_FR) {
524 ia64_sync_fph(current);
525 current->thread.fph[fph_index(regs, regnum)] = *fpval;
526 } else {
527 /*
528 * pt_regs or switch_stack ?
529 */
530 if (FR_IN_SW(regnum)) {
531 addr = (unsigned long)sw;
532 } else {
533 addr = (unsigned long)regs;
534 }
535
536 DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
537
538 addr += FR_OFFS(regnum);
539 *(struct ia64_fpreg *)addr = *fpval;
540
541 /*
542 * mark the low partition as being used now
543 *
544 * It is highly unlikely that this bit is not already set, but
545 * let's do it for safety.
546 */
547 regs->cr_ipsr |= IA64_PSR_MFL;
548 }
549}
550
551/*
552 * Those 2 inline functions generate the spilled versions of the constant floating point
553 * registers which can be used with stfX
554 */
555static inline void
556float_spill_f0 (struct ia64_fpreg *final)
557{
558 ia64_stf_spill(final, 0);
559}
560
561static inline void
562float_spill_f1 (struct ia64_fpreg *final)
563{
564 ia64_stf_spill(final, 1);
565}
566
567static void
568getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs)
569{
570 struct switch_stack *sw = (struct switch_stack *) regs - 1;
571 unsigned long addr;
572
573 /*
574 * From EAS-2.5: FPDisableFault has higher priority than
575 * Unaligned Fault. Thus, when we get here, we know the partition is
576 * enabled.
577 *
578 * When regnum > 31, the register is still live and we need to force a save
579 * to current->thread.fph to get access to it. See discussion in setfpreg()
580 * for reasons and other ways of doing this.
581 */
582 if (regnum >= IA64_FIRST_ROTATING_FR) {
583 ia64_flush_fph(current);
584 *fpval = current->thread.fph[fph_index(regs, regnum)];
585 } else {
586 /*
587 * f0 = 0.0, f1= 1.0. Those registers are constant and are thus
588 * not saved, we must generate their spilled form on the fly
589 */
590 switch(regnum) {
591 case 0:
592 float_spill_f0(fpval);
593 break;
594 case 1:
595 float_spill_f1(fpval);
596 break;
597 default:
598 /*
599 * pt_regs or switch_stack ?
600 */
601 addr = FR_IN_SW(regnum) ? (unsigned long)sw
602 : (unsigned long)regs;
603
604 DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
605 FR_IN_SW(regnum), addr, FR_OFFS(regnum));
606
607 addr += FR_OFFS(regnum);
608 *fpval = *(struct ia64_fpreg *)addr;
609 }
610 }
611}
612
613
614static void
615getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs)
616{
617 struct switch_stack *sw = (struct switch_stack *) regs - 1;
618 unsigned long addr, *unat;
619
620 if (regnum >= IA64_FIRST_STACKED_GR) {
621 get_rse_reg(regs, regnum, val, nat);
622 return;
623 }
624
625 /*
626 * take care of r0 (read-only always evaluate to 0)
627 */
628 if (regnum == 0) {
629 *val = 0;
630 if (nat)
631 *nat = 0;
632 return;
633 }
634
635 /*
636 * Now look at registers in [0-31] range and init correct UNAT
637 */
638 if (GR_IN_SW(regnum)) {
639 addr = (unsigned long)sw;
640 unat = &sw->ar_unat;
641 } else {
642 addr = (unsigned long)regs;
643 unat = &sw->caller_unat;
644 }
645
646 DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
647
648 addr += GR_OFFS(regnum);
649
650 *val = *(unsigned long *)addr;
651
652 /*
653 * do it only when requested
654 */
655 if (nat)
656 *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL;
657}
658
659static void
660emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
661{
662 /*
663 * IMPORTANT:
664 * Given the way we handle unaligned speculative loads, we should
665 * not get to this point in the code but we keep this sanity check,
666 * just in case.
667 */
668 if (ld.x6_op == 1 || ld.x6_op == 3) {
669 printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__);
670 die_if_kernel("unaligned reference on speculative load with register update\n",
671 regs, 30);
672 }
673
674
675 /*
676 * at this point, we know that the base register to update is valid i.e.,
677 * it's not r0
678 */
679 if (type == UPD_IMMEDIATE) {
680 unsigned long imm;
681
682 /*
683 * Load +Imm: ldXZ r1=[r3],imm(9)
684 *
685 *
686 * form imm9: [13:19] contain the first 7 bits
687 */
688 imm = ld.x << 7 | ld.imm;
689
690 /*
691 * sign extend (1+8bits) if m set
692 */
693 if (ld.m) imm |= SIGN_EXT9;
694
695 /*
696 * ifa == r3 and we know that the NaT bit on r3 was clear so
697 * we can directly use ifa.
698 */
699 ifa += imm;
700
701 setreg(ld.r3, ifa, 0, regs);
702
703 DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
704
705 } else if (ld.m) {
706 unsigned long r2;
707 int nat_r2;
708
709 /*
710 * Load +Reg Opcode: ldXZ r1=[r3],r2
711 *
712 * Note: that we update r3 even in the case of ldfX.a
713 * (where the load does not happen)
714 *
715 * The way the load algorithm works, we know that r3 does not
716 * have its NaT bit set (would have gotten NaT consumption
717 * before getting the unaligned fault). So we can use ifa
718 * which equals r3 at this point.
719 *
720 * IMPORTANT:
721 * The above statement holds ONLY because we know that we
722 * never reach this code when trying to do a ldX.s.
723 * If we ever make it to here on an ldfX.s then
724 */
725 getreg(ld.imm, &r2, &nat_r2, regs);
726
727 ifa += r2;
728
729 /*
730 * propagate Nat r2 -> r3
731 */
732 setreg(ld.r3, ifa, nat_r2, regs);
733
734 DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
735 }
736}
737
738
739static int
740emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
741{
742 unsigned int len = 1 << ld.x6_sz;
743 unsigned long val = 0;
744
745 /*
746 * r0, as target, doesn't need to be checked because Illegal Instruction
747 * faults have higher priority than unaligned faults.
748 *
749 * r0 cannot be found as the base as it would never generate an
750 * unaligned reference.
751 */
752
753 /*
754 * ldX.a we will emulate load and also invalidate the ALAT entry.
755 * See comment below for explanation on how we handle ldX.a
756 */
757
758 if (len != 2 && len != 4 && len != 8) {
759 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
760 return -1;
761 }
762 /* this assumes little-endian byte-order: */
763 if (copy_from_user(&val, (void __user *) ifa, len))
764 return -1;
765 setreg(ld.r1, val, 0, regs);
766
767 /*
768 * check for updates on any kind of loads
769 */
770 if (ld.op == 0x5 || ld.m)
771 emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
772
773 /*
774 * handling of various loads (based on EAS2.4):
775 *
776 * ldX.acq (ordered load):
777 * - acquire semantics would have been used, so force fence instead.
778 *
779 * ldX.c.clr (check load and clear):
780 * - if we get to this handler, it's because the entry was not in the ALAT.
781 * Therefore the operation reverts to a normal load
782 *
783 * ldX.c.nc (check load no clear):
784 * - same as previous one
785 *
786 * ldX.c.clr.acq (ordered check load and clear):
787 * - same as above for c.clr part. The load needs to have acquire semantics. So
788 * we use the fence semantics which is stronger and thus ensures correctness.
789 *
790 * ldX.a (advanced load):
791 * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
792 * address doesn't match requested size alignment. This means that we would
793 * possibly need more than one load to get the result.
794 *
795 * The load part can be handled just like a normal load, however the difficult
796 * part is to get the right thing into the ALAT. The critical piece of information
797 * in the base address of the load & size. To do that, a ld.a must be executed,
798 * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
799 * if we use the same target register, we will be okay for the check.a instruction.
800 * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
801 * which would overlap within [r3,r3+X] (the size of the load was store in the
802 * ALAT). If such an entry is found the entry is invalidated. But this is not good
803 * enough, take the following example:
804 * r3=3
805 * ld4.a r1=[r3]
806 *
807 * Could be emulated by doing:
808 * ld1.a r1=[r3],1
809 * store to temporary;
810 * ld1.a r1=[r3],1
811 * store & shift to temporary;
812 * ld1.a r1=[r3],1
813 * store & shift to temporary;
814 * ld1.a r1=[r3]
815 * store & shift to temporary;
816 * r1=temporary
817 *
818 * So in this case, you would get the right value is r1 but the wrong info in
819 * the ALAT. Notice that you could do it in reverse to finish with address 3
820 * but you would still get the size wrong. To get the size right, one needs to
821 * execute exactly the same kind of load. You could do it from a aligned
822 * temporary location, but you would get the address wrong.
823 *
824 * So no matter what, it is not possible to emulate an advanced load
825 * correctly. But is that really critical ?
826 *
827 * We will always convert ld.a into a normal load with ALAT invalidated. This
828 * will enable compiler to do optimization where certain code path after ld.a
829 * is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
830 *
831 * If there is a store after the advanced load, one must either do a ld.c.* or
832 * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
833 * entry found in ALAT), and that's perfectly ok because:
834 *
835 * - ld.c.*, if the entry is not present a normal load is executed
836 * - chk.a.*, if the entry is not present, execution jumps to recovery code
837 *
838 * In either case, the load can be potentially retried in another form.
839 *
840 * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
841 * up a stale entry later). The register base update MUST also be performed.
842 */
843
844 /*
845 * when the load has the .acq completer then
846 * use ordering fence.
847 */
848 if (ld.x6_op == 0x5 || ld.x6_op == 0xa)
849 mb();
850
851 /*
852 * invalidate ALAT entry in case of advanced load
853 */
854 if (ld.x6_op == 0x2)
855 invala_gr(ld.r1);
856
857 return 0;
858}
859
860static int
861emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
862{
863 unsigned long r2;
864 unsigned int len = 1 << ld.x6_sz;
865
866 /*
867 * if we get to this handler, Nat bits on both r3 and r2 have already
868 * been checked. so we don't need to do it
869 *
870 * extract the value to be stored
871 */
872 getreg(ld.imm, &r2, NULL, regs);
873
874 /*
875 * we rely on the macros in unaligned.h for now i.e.,
876 * we let the compiler figure out how to read memory gracefully.
877 *
878 * We need this switch/case because the way the inline function
879 * works. The code is optimized by the compiler and looks like
880 * a single switch/case.
881 */
882 DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
883
884 if (len != 2 && len != 4 && len != 8) {
885 DPRINT("unknown size: x6=%d\n", ld.x6_sz);
886 return -1;
887 }
888
889 /* this assumes little-endian byte-order: */
890 if (copy_to_user((void __user *) ifa, &r2, len))
891 return -1;
892
893 /*
894 * stX [r3]=r2,imm(9)
895 *
896 * NOTE:
897 * ld.r3 can never be r0, because r0 would not generate an
898 * unaligned access.
899 */
900 if (ld.op == 0x5) {
901 unsigned long imm;
902
903 /*
904 * form imm9: [12:6] contain first 7bits
905 */
906 imm = ld.x << 7 | ld.r1;
907 /*
908 * sign extend (8bits) if m set
909 */
910 if (ld.m) imm |= SIGN_EXT9;
911 /*
912 * ifa == r3 (NaT is necessarily cleared)
913 */
914 ifa += imm;
915
916 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
917
918 setreg(ld.r3, ifa, 0, regs);
919 }
920 /*
921 * we don't have alat_invalidate_multiple() so we need
922 * to do the complete flush :-<<
923 */
924 ia64_invala();
925
926 /*
927 * stX.rel: use fence instead of release
928 */
929 if (ld.x6_op == 0xd)
930 mb();
931
932 return 0;
933}
934
935/*
936 * floating point operations sizes in bytes
937 */
938static const unsigned char float_fsz[4]={
939 10, /* extended precision (e) */
940 8, /* integer (8) */
941 4, /* single precision (s) */
942 8 /* double precision (d) */
943};
944
945static inline void
946mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
947{
948 ia64_ldfe(6, init);
949 ia64_stop();
950 ia64_stf_spill(final, 6);
951}
952
953static inline void
954mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
955{
956 ia64_ldf8(6, init);
957 ia64_stop();
958 ia64_stf_spill(final, 6);
959}
960
961static inline void
962mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
963{
964 ia64_ldfs(6, init);
965 ia64_stop();
966 ia64_stf_spill(final, 6);
967}
968
969static inline void
970mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
971{
972 ia64_ldfd(6, init);
973 ia64_stop();
974 ia64_stf_spill(final, 6);
975}
976
977static inline void
978float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final)
979{
980 ia64_ldf_fill(6, init);
981 ia64_stop();
982 ia64_stfe(final, 6);
983}
984
985static inline void
986float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final)
987{
988 ia64_ldf_fill(6, init);
989 ia64_stop();
990 ia64_stf8(final, 6);
991}
992
993static inline void
994float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final)
995{
996 ia64_ldf_fill(6, init);
997 ia64_stop();
998 ia64_stfs(final, 6);
999}
1000
1001static inline void
1002float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final)
1003{
1004 ia64_ldf_fill(6, init);
1005 ia64_stop();
1006 ia64_stfd(final, 6);
1007}
1008
1009static int
1010emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1011{
1012 struct ia64_fpreg fpr_init[2];
1013 struct ia64_fpreg fpr_final[2];
1014 unsigned long len = float_fsz[ld.x6_sz];
1015
1016 /*
1017 * fr0 & fr1 don't need to be checked because Illegal Instruction faults have
1018 * higher priority than unaligned faults.
1019 *
1020 * r0 cannot be found as the base as it would never generate an unaligned
1021 * reference.
1022 */
1023
1024 /*
1025 * make sure we get clean buffers
1026 */
1027 memset(&fpr_init, 0, sizeof(fpr_init));
1028 memset(&fpr_final, 0, sizeof(fpr_final));
1029
1030 /*
1031 * ldfpX.a: we don't try to emulate anything but we must
1032 * invalidate the ALAT entry and execute updates, if any.
1033 */
1034 if (ld.x6_op != 0x2) {
1035 /*
1036 * This assumes little-endian byte-order. Note that there is no "ldfpe"
1037 * instruction:
1038 */
1039 if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
1040 || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
1041 return -1;
1042
1043 DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
1044 DDUMP("frp_init =", &fpr_init, 2*len);
1045 /*
1046 * XXX fixme
1047 * Could optimize inlines by using ldfpX & 2 spills
1048 */
1049 switch( ld.x6_sz ) {
1050 case 0:
1051 mem2float_extended(&fpr_init[0], &fpr_final[0]);
1052 mem2float_extended(&fpr_init[1], &fpr_final[1]);
1053 break;
1054 case 1:
1055 mem2float_integer(&fpr_init[0], &fpr_final[0]);
1056 mem2float_integer(&fpr_init[1], &fpr_final[1]);
1057 break;
1058 case 2:
1059 mem2float_single(&fpr_init[0], &fpr_final[0]);
1060 mem2float_single(&fpr_init[1], &fpr_final[1]);
1061 break;
1062 case 3:
1063 mem2float_double(&fpr_init[0], &fpr_final[0]);
1064 mem2float_double(&fpr_init[1], &fpr_final[1]);
1065 break;
1066 }
1067 DDUMP("fpr_final =", &fpr_final, 2*len);
1068 /*
1069 * XXX fixme
1070 *
1071 * A possible optimization would be to drop fpr_final and directly
1072 * use the storage from the saved context i.e., the actual final
1073 * destination (pt_regs, switch_stack or thread structure).
1074 */
1075 setfpreg(ld.r1, &fpr_final[0], regs);
1076 setfpreg(ld.imm, &fpr_final[1], regs);
1077 }
1078
1079 /*
1080 * Check for updates: only immediate updates are available for this
1081 * instruction.
1082 */
1083 if (ld.m) {
1084 /*
1085 * the immediate is implicit given the ldsz of the operation:
1086 * single: 8 (2x4) and for all others it's 16 (2x8)
1087 */
1088 ifa += len<<1;
1089
1090 /*
1091 * IMPORTANT:
1092 * the fact that we force the NaT of r3 to zero is ONLY valid
1093 * as long as we don't come here with a ldfpX.s.
1094 * For this reason we keep this sanity check
1095 */
1096 if (ld.x6_op == 1 || ld.x6_op == 3)
1097 printk(KERN_ERR "%s: register update on speculative load pair, error\n",
1098 __FUNCTION__);
1099
1100 setreg(ld.r3, ifa, 0, regs);
1101 }
1102
1103 /*
1104 * Invalidate ALAT entries, if any, for both registers.
1105 */
1106 if (ld.x6_op == 0x2) {
1107 invala_fr(ld.r1);
1108 invala_fr(ld.imm);
1109 }
1110 return 0;
1111}
1112
1113
1114static int
1115emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1116{
1117 struct ia64_fpreg fpr_init;
1118 struct ia64_fpreg fpr_final;
1119 unsigned long len = float_fsz[ld.x6_sz];
1120
1121 /*
1122 * fr0 & fr1 don't need to be checked because Illegal Instruction
1123 * faults have higher priority than unaligned faults.
1124 *
1125 * r0 cannot be found as the base as it would never generate an
1126 * unaligned reference.
1127 */
1128
1129 /*
1130 * make sure we get clean buffers
1131 */
1132 memset(&fpr_init,0, sizeof(fpr_init));
1133 memset(&fpr_final,0, sizeof(fpr_final));
1134
1135 /*
1136 * ldfX.a we don't try to emulate anything but we must
1137 * invalidate the ALAT entry.
1138 * See comments in ldX for descriptions on how the various loads are handled.
1139 */
1140 if (ld.x6_op != 0x2) {
1141 if (copy_from_user(&fpr_init, (void __user *) ifa, len))
1142 return -1;
1143
1144 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1145 DDUMP("fpr_init =", &fpr_init, len);
1146 /*
1147 * we only do something for x6_op={0,8,9}
1148 */
1149 switch( ld.x6_sz ) {
1150 case 0:
1151 mem2float_extended(&fpr_init, &fpr_final);
1152 break;
1153 case 1:
1154 mem2float_integer(&fpr_init, &fpr_final);
1155 break;
1156 case 2:
1157 mem2float_single(&fpr_init, &fpr_final);
1158 break;
1159 case 3:
1160 mem2float_double(&fpr_init, &fpr_final);
1161 break;
1162 }
1163 DDUMP("fpr_final =", &fpr_final, len);
1164 /*
1165 * XXX fixme
1166 *
1167 * A possible optimization would be to drop fpr_final and directly
1168 * use the storage from the saved context i.e., the actual final
1169 * destination (pt_regs, switch_stack or thread structure).
1170 */
1171 setfpreg(ld.r1, &fpr_final, regs);
1172 }
1173
1174 /*
1175 * check for updates on any loads
1176 */
1177 if (ld.op == 0x7 || ld.m)
1178 emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
1179
1180 /*
1181 * invalidate ALAT entry in case of advanced floating point loads
1182 */
1183 if (ld.x6_op == 0x2)
1184 invala_fr(ld.r1);
1185
1186 return 0;
1187}
1188
1189
1190static int
1191emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
1192{
1193 struct ia64_fpreg fpr_init;
1194 struct ia64_fpreg fpr_final;
1195 unsigned long len = float_fsz[ld.x6_sz];
1196
1197 /*
1198 * make sure we get clean buffers
1199 */
1200 memset(&fpr_init,0, sizeof(fpr_init));
1201 memset(&fpr_final,0, sizeof(fpr_final));
1202
1203 /*
1204 * if we get to this handler, Nat bits on both r3 and r2 have already
1205 * been checked. so we don't need to do it
1206 *
1207 * extract the value to be stored
1208 */
1209 getfpreg(ld.imm, &fpr_init, regs);
1210 /*
1211 * during this step, we extract the spilled registers from the saved
1212 * context i.e., we refill. Then we store (no spill) to temporary
1213 * aligned location
1214 */
1215 switch( ld.x6_sz ) {
1216 case 0:
1217 float2mem_extended(&fpr_init, &fpr_final);
1218 break;
1219 case 1:
1220 float2mem_integer(&fpr_init, &fpr_final);
1221 break;
1222 case 2:
1223 float2mem_single(&fpr_init, &fpr_final);
1224 break;
1225 case 3:
1226 float2mem_double(&fpr_init, &fpr_final);
1227 break;
1228 }
1229 DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
1230 DDUMP("fpr_init =", &fpr_init, len);
1231 DDUMP("fpr_final =", &fpr_final, len);
1232
1233 if (copy_to_user((void __user *) ifa, &fpr_final, len))
1234 return -1;
1235
1236 /*
1237 * stfX [r3]=r2,imm(9)
1238 *
1239 * NOTE:
1240 * ld.r3 can never be r0, because r0 would not generate an
1241 * unaligned access.
1242 */
1243 if (ld.op == 0x7) {
1244 unsigned long imm;
1245
1246 /*
1247 * form imm9: [12:6] contain first 7bits
1248 */
1249 imm = ld.x << 7 | ld.r1;
1250 /*
1251 * sign extend (8bits) if m set
1252 */
1253 if (ld.m)
1254 imm |= SIGN_EXT9;
1255 /*
1256 * ifa == r3 (NaT is necessarily cleared)
1257 */
1258 ifa += imm;
1259
1260 DPRINT("imm=%lx r3=%lx\n", imm, ifa);
1261
1262 setreg(ld.r3, ifa, 0, regs);
1263 }
1264 /*
1265 * we don't have alat_invalidate_multiple() so we need
1266 * to do the complete flush :-<<
1267 */
1268 ia64_invala();
1269
1270 return 0;
1271}
1272
1273/*
1274 * Make sure we log the unaligned access, so that user/sysadmin can notice it and
1275 * eventually fix the program. However, we don't want to do that for every access so we
1276 * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
1277 * either...
1278 */
1279static int
1280within_logging_rate_limit (void)
1281{
1282 static unsigned long count, last_time;
1283
1284 if (jiffies - last_time > 5*HZ)
1285 count = 0;
1286 if (++count < 5) {
1287 last_time = jiffies;
1288 return 1;
1289 }
1290 return 0;
1291
1292}
1293
1294void
1295ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
1296{
1297 struct ia64_psr *ipsr = ia64_psr(regs);
1298 mm_segment_t old_fs = get_fs();
1299 unsigned long bundle[2];
1300 unsigned long opcode;
1301 struct siginfo si;
1302 const struct exception_table_entry *eh = NULL;
1303 union {
1304 unsigned long l;
1305 load_store_t insn;
1306 } u;
1307 int ret = -1;
1308
1309 if (ia64_psr(regs)->be) {
1310 /* we don't support big-endian accesses */
1311 die_if_kernel("big-endian unaligned accesses are not supported", regs, 0);
1312 goto force_sigbus;
1313 }
1314
1315 /*
1316 * Treat kernel accesses for which there is an exception handler entry the same as
1317 * user-level unaligned accesses. Otherwise, a clever program could trick this
1318 * handler into reading an arbitrary kernel addresses...
1319 */
1320 if (!user_mode(regs))
1321 eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
1322 if (user_mode(regs) || eh) {
1323 if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
1324 goto force_sigbus;
1325
1326 if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT)
1327 && within_logging_rate_limit())
1328 {
1329 char buf[200]; /* comm[] is at most 16 bytes... */
1330 size_t len;
1331
1332 len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
1333 "ip=0x%016lx\n\r", current->comm, current->pid,
1334 ifa, regs->cr_iip + ipsr->ri);
1335 /*
1336 * Don't call tty_write_message() if we're in the kernel; we might
1337 * be holding locks...
1338 */
1339 if (user_mode(regs))
1340 tty_write_message(current->signal->tty, buf);
1341 buf[len-1] = '\0'; /* drop '\r' */
1342 printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */
1343 }
1344 } else {
1345 if (within_logging_rate_limit())
1346 printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
1347 ifa, regs->cr_iip + ipsr->ri);
1348 set_fs(KERNEL_DS);
1349 }
1350
1351 DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
1352 regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
1353
1354 if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
1355 goto failure;
1356
1357 /*
1358 * extract the instruction from the bundle given the slot number
1359 */
1360 switch (ipsr->ri) {
1361 case 0: u.l = (bundle[0] >> 5); break;
1362 case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break;
1363 case 2: u.l = (bundle[1] >> 23); break;
1364 }
1365 opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
1366
1367 DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
1368 "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
1369 u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
1370
1371 /*
1372 * IMPORTANT:
1373 * Notice that the switch statement DOES not cover all possible instructions
1374 * that DO generate unaligned references. This is made on purpose because for some
1375 * instructions it DOES NOT make sense to try and emulate the access. Sometimes it
1376 * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
1377 * the program will get a signal and die:
1378 *
1379 * load/store:
1380 * - ldX.spill
1381 * - stX.spill
1382 * Reason: RNATs are based on addresses
1383 * - ld16
1384 * - st16
1385 * Reason: ld16 and st16 are supposed to occur in a single
1386 * memory op
1387 *
1388 * synchronization:
1389 * - cmpxchg
1390 * - fetchadd
1391 * - xchg
1392 * Reason: ATOMIC operations cannot be emulated properly using multiple
1393 * instructions.
1394 *
1395 * speculative loads:
1396 * - ldX.sZ
1397 * Reason: side effects, code must be ready to deal with failure so simpler
1398 * to let the load fail.
1399 * ---------------------------------------------------------------------------------
1400 * XXX fixme
1401 *
1402 * I would like to get rid of this switch case and do something
1403 * more elegant.
1404 */
1405 switch (opcode) {
1406 case LDS_OP:
1407 case LDSA_OP:
1408 if (u.insn.x)
1409 /* oops, really a semaphore op (cmpxchg, etc) */
1410 goto failure;
1411 /* no break */
1412 case LDS_IMM_OP:
1413 case LDSA_IMM_OP:
1414 case LDFS_OP:
1415 case LDFSA_OP:
1416 case LDFS_IMM_OP:
1417 /*
1418 * The instruction will be retried with deferred exceptions turned on, and
1419 * we should get Nat bit installed
1420 *
1421 * IMPORTANT: When PSR_ED is set, the register & immediate update forms
1422 * are actually executed even though the operation failed. So we don't
1423 * need to take care of this.
1424 */
1425 DPRINT("forcing PSR_ED\n");
1426 regs->cr_ipsr |= IA64_PSR_ED;
1427 goto done;
1428
1429 case LD_OP:
1430 case LDA_OP:
1431 case LDBIAS_OP:
1432 case LDACQ_OP:
1433 case LDCCLR_OP:
1434 case LDCNC_OP:
1435 case LDCCLRACQ_OP:
1436 if (u.insn.x)
1437 /* oops, really a semaphore op (cmpxchg, etc) */
1438 goto failure;
1439 /* no break */
1440 case LD_IMM_OP:
1441 case LDA_IMM_OP:
1442 case LDBIAS_IMM_OP:
1443 case LDACQ_IMM_OP:
1444 case LDCCLR_IMM_OP:
1445 case LDCNC_IMM_OP:
1446 case LDCCLRACQ_IMM_OP:
1447 ret = emulate_load_int(ifa, u.insn, regs);
1448 break;
1449
1450 case ST_OP:
1451 case STREL_OP:
1452 if (u.insn.x)
1453 /* oops, really a semaphore op (cmpxchg, etc) */
1454 goto failure;
1455 /* no break */
1456 case ST_IMM_OP:
1457 case STREL_IMM_OP:
1458 ret = emulate_store_int(ifa, u.insn, regs);
1459 break;
1460
1461 case LDF_OP:
1462 case LDFA_OP:
1463 case LDFCCLR_OP:
1464 case LDFCNC_OP:
1465 case LDF_IMM_OP:
1466 case LDFA_IMM_OP:
1467 case LDFCCLR_IMM_OP:
1468 case LDFCNC_IMM_OP:
1469 if (u.insn.x)
1470 ret = emulate_load_floatpair(ifa, u.insn, regs);
1471 else
1472 ret = emulate_load_float(ifa, u.insn, regs);
1473 break;
1474
1475 case STF_OP:
1476 case STF_IMM_OP:
1477 ret = emulate_store_float(ifa, u.insn, regs);
1478 break;
1479
1480 default:
1481 goto failure;
1482 }
1483 DPRINT("ret=%d\n", ret);
1484 if (ret)
1485 goto failure;
1486
1487 if (ipsr->ri == 2)
1488 /*
1489 * given today's architecture this case is not likely to happen because a
1490 * memory access instruction (M) can never be in the last slot of a
1491 * bundle. But let's keep it for now.
1492 */
1493 regs->cr_iip += 16;
1494 ipsr->ri = (ipsr->ri + 1) & 0x3;
1495
1496 DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
1497 done:
1498 set_fs(old_fs); /* restore original address limit */
1499 return;
1500
1501 failure:
1502 /* something went wrong... */
1503 if (!user_mode(regs)) {
1504 if (eh) {
1505 ia64_handle_exception(regs, eh);
1506 goto done;
1507 }
1508 die_if_kernel("error during unaligned kernel access\n", regs, ret);
1509 /* NOT_REACHED */
1510 }
1511 force_sigbus:
1512 si.si_signo = SIGBUS;
1513 si.si_errno = 0;
1514 si.si_code = BUS_ADRALN;
1515 si.si_addr = (void __user *) ifa;
1516 si.si_flags = 0;
1517 si.si_isr = 0;
1518 si.si_imm = 0;
1519 force_sig_info(SIGBUS, &si, current);
1520 goto done;
1521}
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c
new file mode 100644
index 000000000000..d494ff647cac
--- /dev/null
+++ b/arch/ia64/kernel/unwind.c
@@ -0,0 +1,2306 @@
1/*
2 * Copyright (C) 1999-2004 Hewlett-Packard Co
3 * David Mosberger-Tang <davidm@hpl.hp.com>
4 * Copyright (C) 2003 Fenghua Yu <fenghua.yu@intel.com>
5 * - Change pt_regs_off() to make it less dependant on pt_regs structure.
6 */
7/*
8 * This file implements call frame unwind support for the Linux
9 * kernel. Parsing and processing the unwind information is
10 * time-consuming, so this implementation translates the unwind
11 * descriptors into unwind scripts. These scripts are very simple
12 * (basically a sequence of assignments) and efficient to execute.
13 * They are cached for later re-use. Each script is specific for a
14 * given instruction pointer address and the set of predicate values
15 * that the script depends on (most unwind descriptors are
16 * unconditional and scripts often do not depend on predicates at
17 * all). This code is based on the unwind conventions described in
18 * the "IA-64 Software Conventions and Runtime Architecture" manual.
19 *
20 * SMP conventions:
21 * o updates to the global unwind data (in structure "unw") are serialized
22 * by the unw.lock spinlock
23 * o each unwind script has its own read-write lock; a thread must acquire
24 * a read lock before executing a script and must acquire a write lock
25 * before modifying a script
26 * o if both the unw.lock spinlock and a script's read-write lock must be
27 * acquired, then the read-write lock must be acquired first.
28 */
29#include <linux/module.h>
30#include <linux/bootmem.h>
31#include <linux/elf.h>
32#include <linux/kernel.h>
33#include <linux/sched.h>
34#include <linux/slab.h>
35
36#include <asm/unwind.h>
37
38#include <asm/delay.h>
39#include <asm/page.h>
40#include <asm/ptrace.h>
41#include <asm/ptrace_offsets.h>
42#include <asm/rse.h>
43#include <asm/sections.h>
44#include <asm/system.h>
45#include <asm/uaccess.h>
46
47#include "entry.h"
48#include "unwind_i.h"
49
50#define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */
51#define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE)
52
53#define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1)
54#define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE)
55
56#define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */
57
58#ifdef UNW_DEBUG
59 static unsigned int unw_debug_level = UNW_DEBUG;
60# define UNW_DEBUG_ON(n) unw_debug_level >= n
61 /* Do not code a printk level, not all debug lines end in newline */
62# define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__)
63# define inline
64#else /* !UNW_DEBUG */
65# define UNW_DEBUG_ON(n) 0
66# define UNW_DPRINT(n, ...)
67#endif /* UNW_DEBUG */
68
69#if UNW_STATS
70# define STAT(x...) x
71#else
72# define STAT(x...)
73#endif
74
75#define alloc_reg_state() kmalloc(sizeof(struct unw_reg_state), GFP_ATOMIC)
76#define free_reg_state(usr) kfree(usr)
77#define alloc_labeled_state() kmalloc(sizeof(struct unw_labeled_state), GFP_ATOMIC)
78#define free_labeled_state(usr) kfree(usr)
79
80typedef unsigned long unw_word;
81typedef unsigned char unw_hash_index_t;
82
83static struct {
84 spinlock_t lock; /* spinlock for unwind data */
85
86 /* list of unwind tables (one per load-module) */
87 struct unw_table *tables;
88
89 unsigned long r0; /* constant 0 for r0 */
90
91 /* table of registers that prologues can save (and order in which they're saved): */
92 const unsigned char save_order[8];
93
94 /* maps a preserved register index (preg_index) to corresponding switch_stack offset: */
95 unsigned short sw_off[sizeof(struct unw_frame_info) / 8];
96
97 unsigned short lru_head; /* index of lead-recently used script */
98 unsigned short lru_tail; /* index of most-recently used script */
99
100 /* index into unw_frame_info for preserved register i */
101 unsigned short preg_index[UNW_NUM_REGS];
102
103 short pt_regs_offsets[32];
104
105 /* unwind table for the kernel: */
106 struct unw_table kernel_table;
107
108 /* unwind table describing the gate page (kernel code that is mapped into user space): */
109 size_t gate_table_size;
110 unsigned long *gate_table;
111
112 /* hash table that maps instruction pointer to script index: */
113 unsigned short hash[UNW_HASH_SIZE];
114
115 /* script cache: */
116 struct unw_script cache[UNW_CACHE_SIZE];
117
118# ifdef UNW_DEBUG
119 const char *preg_name[UNW_NUM_REGS];
120# endif
121# if UNW_STATS
122 struct {
123 struct {
124 int lookups;
125 int hinted_hits;
126 int normal_hits;
127 int collision_chain_traversals;
128 } cache;
129 struct {
130 unsigned long build_time;
131 unsigned long run_time;
132 unsigned long parse_time;
133 int builds;
134 int news;
135 int collisions;
136 int runs;
137 } script;
138 struct {
139 unsigned long init_time;
140 unsigned long unwind_time;
141 int inits;
142 int unwinds;
143 } api;
144 } stat;
145# endif
146} unw = {
147 .tables = &unw.kernel_table,
148 .lock = SPIN_LOCK_UNLOCKED,
149 .save_order = {
150 UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR,
151 UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR
152 },
153 .preg_index = {
154 offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_GR */
155 offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_MEM */
156 offsetof(struct unw_frame_info, bsp_loc)/8,
157 offsetof(struct unw_frame_info, bspstore_loc)/8,
158 offsetof(struct unw_frame_info, pfs_loc)/8,
159 offsetof(struct unw_frame_info, rnat_loc)/8,
160 offsetof(struct unw_frame_info, psp)/8,
161 offsetof(struct unw_frame_info, rp_loc)/8,
162 offsetof(struct unw_frame_info, r4)/8,
163 offsetof(struct unw_frame_info, r5)/8,
164 offsetof(struct unw_frame_info, r6)/8,
165 offsetof(struct unw_frame_info, r7)/8,
166 offsetof(struct unw_frame_info, unat_loc)/8,
167 offsetof(struct unw_frame_info, pr_loc)/8,
168 offsetof(struct unw_frame_info, lc_loc)/8,
169 offsetof(struct unw_frame_info, fpsr_loc)/8,
170 offsetof(struct unw_frame_info, b1_loc)/8,
171 offsetof(struct unw_frame_info, b2_loc)/8,
172 offsetof(struct unw_frame_info, b3_loc)/8,
173 offsetof(struct unw_frame_info, b4_loc)/8,
174 offsetof(struct unw_frame_info, b5_loc)/8,
175 offsetof(struct unw_frame_info, f2_loc)/8,
176 offsetof(struct unw_frame_info, f3_loc)/8,
177 offsetof(struct unw_frame_info, f4_loc)/8,
178 offsetof(struct unw_frame_info, f5_loc)/8,
179 offsetof(struct unw_frame_info, fr_loc[16 - 16])/8,
180 offsetof(struct unw_frame_info, fr_loc[17 - 16])/8,
181 offsetof(struct unw_frame_info, fr_loc[18 - 16])/8,
182 offsetof(struct unw_frame_info, fr_loc[19 - 16])/8,
183 offsetof(struct unw_frame_info, fr_loc[20 - 16])/8,
184 offsetof(struct unw_frame_info, fr_loc[21 - 16])/8,
185 offsetof(struct unw_frame_info, fr_loc[22 - 16])/8,
186 offsetof(struct unw_frame_info, fr_loc[23 - 16])/8,
187 offsetof(struct unw_frame_info, fr_loc[24 - 16])/8,
188 offsetof(struct unw_frame_info, fr_loc[25 - 16])/8,
189 offsetof(struct unw_frame_info, fr_loc[26 - 16])/8,
190 offsetof(struct unw_frame_info, fr_loc[27 - 16])/8,
191 offsetof(struct unw_frame_info, fr_loc[28 - 16])/8,
192 offsetof(struct unw_frame_info, fr_loc[29 - 16])/8,
193 offsetof(struct unw_frame_info, fr_loc[30 - 16])/8,
194 offsetof(struct unw_frame_info, fr_loc[31 - 16])/8,
195 },
196 .pt_regs_offsets = {
197 [0] = -1,
198 offsetof(struct pt_regs, r1),
199 offsetof(struct pt_regs, r2),
200 offsetof(struct pt_regs, r3),
201 [4] = -1, [5] = -1, [6] = -1, [7] = -1,
202 offsetof(struct pt_regs, r8),
203 offsetof(struct pt_regs, r9),
204 offsetof(struct pt_regs, r10),
205 offsetof(struct pt_regs, r11),
206 offsetof(struct pt_regs, r12),
207 offsetof(struct pt_regs, r13),
208 offsetof(struct pt_regs, r14),
209 offsetof(struct pt_regs, r15),
210 offsetof(struct pt_regs, r16),
211 offsetof(struct pt_regs, r17),
212 offsetof(struct pt_regs, r18),
213 offsetof(struct pt_regs, r19),
214 offsetof(struct pt_regs, r20),
215 offsetof(struct pt_regs, r21),
216 offsetof(struct pt_regs, r22),
217 offsetof(struct pt_regs, r23),
218 offsetof(struct pt_regs, r24),
219 offsetof(struct pt_regs, r25),
220 offsetof(struct pt_regs, r26),
221 offsetof(struct pt_regs, r27),
222 offsetof(struct pt_regs, r28),
223 offsetof(struct pt_regs, r29),
224 offsetof(struct pt_regs, r30),
225 offsetof(struct pt_regs, r31),
226 },
227 .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 },
228#ifdef UNW_DEBUG
229 .preg_name = {
230 "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", "ar.rnat", "psp", "rp",
231 "r4", "r5", "r6", "r7",
232 "ar.unat", "pr", "ar.lc", "ar.fpsr",
233 "b1", "b2", "b3", "b4", "b5",
234 "f2", "f3", "f4", "f5",
235 "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
236 "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
237 }
238#endif
239};
240
241static inline int
242read_only (void *addr)
243{
244 return (unsigned long) ((char *) addr - (char *) &unw.r0) < sizeof(unw.r0);
245}
246
247/*
248 * Returns offset of rREG in struct pt_regs.
249 */
250static inline unsigned long
251pt_regs_off (unsigned long reg)
252{
253 short off = -1;
254
255 if (reg < ARRAY_SIZE(unw.pt_regs_offsets))
256 off = unw.pt_regs_offsets[reg];
257
258 if (off < 0) {
259 UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __FUNCTION__, reg);
260 off = 0;
261 }
262 return (unsigned long) off;
263}
264
265static inline struct pt_regs *
266get_scratch_regs (struct unw_frame_info *info)
267{
268 if (!info->pt) {
269 /* This should not happen with valid unwind info. */
270 UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __FUNCTION__);
271 if (info->flags & UNW_FLAG_INTERRUPT_FRAME)
272 info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1);
273 else
274 info->pt = info->sp - 16;
275 }
276 UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp, info->pt);
277 return (struct pt_regs *) info->pt;
278}
279
280/* Unwind accessors. */
281
282int
283unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char *nat, int write)
284{
285 unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat;
286 struct unw_ireg *ireg;
287 struct pt_regs *pt;
288
289 if ((unsigned) regnum - 1 >= 127) {
290 if (regnum == 0 && !write) {
291 *val = 0; /* read r0 always returns 0 */
292 *nat = 0;
293 return 0;
294 }
295 UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n",
296 __FUNCTION__, regnum);
297 return -1;
298 }
299
300 if (regnum < 32) {
301 if (regnum >= 4 && regnum <= 7) {
302 /* access a preserved register */
303 ireg = &info->r4 + (regnum - 4);
304 addr = ireg->loc;
305 if (addr) {
306 nat_addr = addr + ireg->nat.off;
307 switch (ireg->nat.type) {
308 case UNW_NAT_VAL:
309 /* simulate getf.sig/setf.sig */
310 if (write) {
311 if (*nat) {
312 /* write NaTVal and be done with it */
313 addr[0] = 0;
314 addr[1] = 0x1fffe;
315 return 0;
316 }
317 addr[1] = 0x1003e;
318 } else {
319 if (addr[0] == 0 && addr[1] == 0x1ffe) {
320 /* return NaT and be done with it */
321 *val = 0;
322 *nat = 1;
323 return 0;
324 }
325 }
326 /* fall through */
327 case UNW_NAT_NONE:
328 dummy_nat = 0;
329 nat_addr = &dummy_nat;
330 break;
331
332 case UNW_NAT_MEMSTK:
333 nat_mask = (1UL << ((long) addr & 0x1f8)/8);
334 break;
335
336 case UNW_NAT_REGSTK:
337 nat_addr = ia64_rse_rnat_addr(addr);
338 if ((unsigned long) addr < info->regstk.limit
339 || (unsigned long) addr >= info->regstk.top)
340 {
341 UNW_DPRINT(0, "unwind.%s: %p outside of regstk "
342 "[0x%lx-0x%lx)\n",
343 __FUNCTION__, (void *) addr,
344 info->regstk.limit,
345 info->regstk.top);
346 return -1;
347 }
348 if ((unsigned long) nat_addr >= info->regstk.top)
349 nat_addr = &info->sw->ar_rnat;
350 nat_mask = (1UL << ia64_rse_slot_num(addr));
351 break;
352 }
353 } else {
354 addr = &info->sw->r4 + (regnum - 4);
355 nat_addr = &info->sw->ar_unat;
356 nat_mask = (1UL << ((long) addr & 0x1f8)/8);
357 }
358 } else {
359 /* access a scratch register */
360 pt = get_scratch_regs(info);
361 addr = (unsigned long *) ((unsigned long)pt + pt_regs_off(regnum));
362 if (info->pri_unat_loc)
363 nat_addr = info->pri_unat_loc;
364 else
365 nat_addr = &info->sw->ar_unat;
366 nat_mask = (1UL << ((long) addr & 0x1f8)/8);
367 }
368 } else {
369 /* access a stacked register */
370 addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 32);
371 nat_addr = ia64_rse_rnat_addr(addr);
372 if ((unsigned long) addr < info->regstk.limit
373 || (unsigned long) addr >= info->regstk.top)
374 {
375 UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside "
376 "of rbs\n", __FUNCTION__);
377 return -1;
378 }
379 if ((unsigned long) nat_addr >= info->regstk.top)
380 nat_addr = &info->sw->ar_rnat;
381 nat_mask = (1UL << ia64_rse_slot_num(addr));
382 }
383
384 if (write) {
385 if (read_only(addr)) {
386 UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
387 __FUNCTION__);
388 } else {
389 *addr = *val;
390 if (*nat)
391 *nat_addr |= nat_mask;
392 else
393 *nat_addr &= ~nat_mask;
394 }
395 } else {
396 if ((*nat_addr & nat_mask) == 0) {
397 *val = *addr;
398 *nat = 0;
399 } else {
400 *val = 0; /* if register is a NaT, *addr may contain kernel data! */
401 *nat = 1;
402 }
403 }
404 return 0;
405}
406EXPORT_SYMBOL(unw_access_gr);
407
408int
409unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int write)
410{
411 unsigned long *addr;
412 struct pt_regs *pt;
413
414 switch (regnum) {
415 /* scratch: */
416 case 0: pt = get_scratch_regs(info); addr = &pt->b0; break;
417 case 6: pt = get_scratch_regs(info); addr = &pt->b6; break;
418 case 7: pt = get_scratch_regs(info); addr = &pt->b7; break;
419
420 /* preserved: */
421 case 1: case 2: case 3: case 4: case 5:
422 addr = *(&info->b1_loc + (regnum - 1));
423 if (!addr)
424 addr = &info->sw->b1 + (regnum - 1);
425 break;
426
427 default:
428 UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n",
429 __FUNCTION__, regnum);
430 return -1;
431 }
432 if (write)
433 if (read_only(addr)) {
434 UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
435 __FUNCTION__);
436 } else
437 *addr = *val;
438 else
439 *val = *addr;
440 return 0;
441}
442EXPORT_SYMBOL(unw_access_br);
443
444int
445unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val, int write)
446{
447 struct ia64_fpreg *addr = NULL;
448 struct pt_regs *pt;
449
450 if ((unsigned) (regnum - 2) >= 126) {
451 UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n",
452 __FUNCTION__, regnum);
453 return -1;
454 }
455
456 if (regnum <= 5) {
457 addr = *(&info->f2_loc + (regnum - 2));
458 if (!addr)
459 addr = &info->sw->f2 + (regnum - 2);
460 } else if (regnum <= 15) {
461 if (regnum <= 11) {
462 pt = get_scratch_regs(info);
463 addr = &pt->f6 + (regnum - 6);
464 }
465 else
466 addr = &info->sw->f12 + (regnum - 12);
467 } else if (regnum <= 31) {
468 addr = info->fr_loc[regnum - 16];
469 if (!addr)
470 addr = &info->sw->f16 + (regnum - 16);
471 } else {
472 struct task_struct *t = info->task;
473
474 if (write)
475 ia64_sync_fph(t);
476 else
477 ia64_flush_fph(t);
478 addr = t->thread.fph + (regnum - 32);
479 }
480
481 if (write)
482 if (read_only(addr)) {
483 UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
484 __FUNCTION__);
485 } else
486 *addr = *val;
487 else
488 *val = *addr;
489 return 0;
490}
491EXPORT_SYMBOL(unw_access_fr);
492
493int
494unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int write)
495{
496 unsigned long *addr;
497 struct pt_regs *pt;
498
499 switch (regnum) {
500 case UNW_AR_BSP:
501 addr = info->bsp_loc;
502 if (!addr)
503 addr = &info->sw->ar_bspstore;
504 break;
505
506 case UNW_AR_BSPSTORE:
507 addr = info->bspstore_loc;
508 if (!addr)
509 addr = &info->sw->ar_bspstore;
510 break;
511
512 case UNW_AR_PFS:
513 addr = info->pfs_loc;
514 if (!addr)
515 addr = &info->sw->ar_pfs;
516 break;
517
518 case UNW_AR_RNAT:
519 addr = info->rnat_loc;
520 if (!addr)
521 addr = &info->sw->ar_rnat;
522 break;
523
524 case UNW_AR_UNAT:
525 addr = info->unat_loc;
526 if (!addr)
527 addr = &info->sw->ar_unat;
528 break;
529
530 case UNW_AR_LC:
531 addr = info->lc_loc;
532 if (!addr)
533 addr = &info->sw->ar_lc;
534 break;
535
536 case UNW_AR_EC:
537 if (!info->cfm_loc)
538 return -1;
539 if (write)
540 *info->cfm_loc =
541 (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 0x3f) << 52);
542 else
543 *val = (*info->cfm_loc >> 52) & 0x3f;
544 return 0;
545
546 case UNW_AR_FPSR:
547 addr = info->fpsr_loc;
548 if (!addr)
549 addr = &info->sw->ar_fpsr;
550 break;
551
552 case UNW_AR_RSC:
553 pt = get_scratch_regs(info);
554 addr = &pt->ar_rsc;
555 break;
556
557 case UNW_AR_CCV:
558 pt = get_scratch_regs(info);
559 addr = &pt->ar_ccv;
560 break;
561
562 case UNW_AR_CSD:
563 pt = get_scratch_regs(info);
564 addr = &pt->ar_csd;
565 break;
566
567 case UNW_AR_SSD:
568 pt = get_scratch_regs(info);
569 addr = &pt->ar_ssd;
570 break;
571
572 default:
573 UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n",
574 __FUNCTION__, regnum);
575 return -1;
576 }
577
578 if (write) {
579 if (read_only(addr)) {
580 UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
581 __FUNCTION__);
582 } else
583 *addr = *val;
584 } else
585 *val = *addr;
586 return 0;
587}
588EXPORT_SYMBOL(unw_access_ar);
589
590int
591unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write)
592{
593 unsigned long *addr;
594
595 addr = info->pr_loc;
596 if (!addr)
597 addr = &info->sw->pr;
598
599 if (write) {
600 if (read_only(addr)) {
601 UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n",
602 __FUNCTION__);
603 } else
604 *addr = *val;
605 } else
606 *val = *addr;
607 return 0;
608}
609EXPORT_SYMBOL(unw_access_pr);
610
611
612/* Routines to manipulate the state stack. */
613
614static inline void
615push (struct unw_state_record *sr)
616{
617 struct unw_reg_state *rs;
618
619 rs = alloc_reg_state();
620 if (!rs) {
621 printk(KERN_ERR "unwind: cannot stack reg state!\n");
622 return;
623 }
624 memcpy(rs, &sr->curr, sizeof(*rs));
625 sr->curr.next = rs;
626}
627
628static void
629pop (struct unw_state_record *sr)
630{
631 struct unw_reg_state *rs = sr->curr.next;
632
633 if (!rs) {
634 printk(KERN_ERR "unwind: stack underflow!\n");
635 return;
636 }
637 memcpy(&sr->curr, rs, sizeof(*rs));
638 free_reg_state(rs);
639}
640
641/* Make a copy of the state stack. Non-recursive to avoid stack overflows. */
642static struct unw_reg_state *
643dup_state_stack (struct unw_reg_state *rs)
644{
645 struct unw_reg_state *copy, *prev = NULL, *first = NULL;
646
647 while (rs) {
648 copy = alloc_reg_state();
649 if (!copy) {
650 printk(KERN_ERR "unwind.dup_state_stack: out of memory\n");
651 return NULL;
652 }
653 memcpy(copy, rs, sizeof(*copy));
654 if (first)
655 prev->next = copy;
656 else
657 first = copy;
658 rs = rs->next;
659 prev = copy;
660 }
661 return first;
662}
663
664/* Free all stacked register states (but not RS itself). */
665static void
666free_state_stack (struct unw_reg_state *rs)
667{
668 struct unw_reg_state *p, *next;
669
670 for (p = rs->next; p != NULL; p = next) {
671 next = p->next;
672 free_reg_state(p);
673 }
674 rs->next = NULL;
675}
676
677/* Unwind decoder routines */
678
679static enum unw_register_index __attribute_const__
680decode_abreg (unsigned char abreg, int memory)
681{
682 switch (abreg) {
683 case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04);
684 case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22);
685 case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30);
686 case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41);
687 case 0x60: return UNW_REG_PR;
688 case 0x61: return UNW_REG_PSP;
689 case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR;
690 case 0x63: return UNW_REG_RP;
691 case 0x64: return UNW_REG_BSP;
692 case 0x65: return UNW_REG_BSPSTORE;
693 case 0x66: return UNW_REG_RNAT;
694 case 0x67: return UNW_REG_UNAT;
695 case 0x68: return UNW_REG_FPSR;
696 case 0x69: return UNW_REG_PFS;
697 case 0x6a: return UNW_REG_LC;
698 default:
699 break;
700 }
701 UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg);
702 return UNW_REG_LC;
703}
704
705static void
706set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned long val)
707{
708 reg->val = val;
709 reg->where = where;
710 if (reg->when == UNW_WHEN_NEVER)
711 reg->when = when;
712}
713
714static void
715alloc_spill_area (unsigned long *offp, unsigned long regsize,
716 struct unw_reg_info *lo, struct unw_reg_info *hi)
717{
718 struct unw_reg_info *reg;
719
720 for (reg = hi; reg >= lo; --reg) {
721 if (reg->where == UNW_WHERE_SPILL_HOME) {
722 reg->where = UNW_WHERE_PSPREL;
723 *offp -= regsize;
724 reg->val = *offp;
725 }
726 }
727}
728
729static inline void
730spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word t)
731{
732 struct unw_reg_info *reg;
733
734 for (reg = *regp; reg <= lim; ++reg) {
735 if (reg->where == UNW_WHERE_SPILL_HOME) {
736 reg->when = t;
737 *regp = reg + 1;
738 return;
739 }
740 }
741 UNW_DPRINT(0, "unwind.%s: excess spill!\n", __FUNCTION__);
742}
743
744static inline void
745finish_prologue (struct unw_state_record *sr)
746{
747 struct unw_reg_info *reg;
748 unsigned long off;
749 int i;
750
751 /*
752 * First, resolve implicit register save locations (see Section "11.4.2.3 Rules
753 * for Using Unwind Descriptors", rule 3):
754 */
755 for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) {
756 reg = sr->curr.reg + unw.save_order[i];
757 if (reg->where == UNW_WHERE_GR_SAVE) {
758 reg->where = UNW_WHERE_GR;
759 reg->val = sr->gr_save_loc++;
760 }
761 }
762
763 /*
764 * Next, compute when the fp, general, and branch registers get
765 * saved. This must come before alloc_spill_area() because
766 * we need to know which registers are spilled to their home
767 * locations.
768 */
769 if (sr->imask) {
770 unsigned char kind, mask = 0, *cp = sr->imask;
771 int t;
772 static const unsigned char limit[3] = {
773 UNW_REG_F31, UNW_REG_R7, UNW_REG_B5
774 };
775 struct unw_reg_info *(regs[3]);
776
777 regs[0] = sr->curr.reg + UNW_REG_F2;
778 regs[1] = sr->curr.reg + UNW_REG_R4;
779 regs[2] = sr->curr.reg + UNW_REG_B1;
780
781 for (t = 0; t < sr->region_len; ++t) {
782 if ((t & 3) == 0)
783 mask = *cp++;
784 kind = (mask >> 2*(3-(t & 3))) & 3;
785 if (kind > 0)
786 spill_next_when(&regs[kind - 1], sr->curr.reg + limit[kind - 1],
787 sr->region_start + t);
788 }
789 }
790 /*
791 * Next, lay out the memory stack spill area:
792 */
793 if (sr->any_spills) {
794 off = sr->spill_offset;
795 alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, sr->curr.reg + UNW_REG_F31);
796 alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1, sr->curr.reg + UNW_REG_B5);
797 alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4, sr->curr.reg + UNW_REG_R7);
798 }
799}
800
801/*
802 * Region header descriptors.
803 */
804
805static void
806desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char grsave,
807 struct unw_state_record *sr)
808{
809 int i, region_start;
810
811 if (!(sr->in_body || sr->first_region))
812 finish_prologue(sr);
813 sr->first_region = 0;
814
815 /* check if we're done: */
816 if (sr->when_target < sr->region_start + sr->region_len) {
817 sr->done = 1;
818 return;
819 }
820
821 region_start = sr->region_start + sr->region_len;
822
823 for (i = 0; i < sr->epilogue_count; ++i)
824 pop(sr);
825 sr->epilogue_count = 0;
826 sr->epilogue_start = UNW_WHEN_NEVER;
827
828 sr->region_start = region_start;
829 sr->region_len = rlen;
830 sr->in_body = body;
831
832 if (!body) {
833 push(sr);
834
835 for (i = 0; i < 4; ++i) {
836 if (mask & 0x8)
837 set_reg(sr->curr.reg + unw.save_order[i], UNW_WHERE_GR,
838 sr->region_start + sr->region_len - 1, grsave++);
839 mask <<= 1;
840 }
841 sr->gr_save_loc = grsave;
842 sr->any_spills = 0;
843 sr->imask = NULL;
844 sr->spill_offset = 0x10; /* default to psp+16 */
845 }
846}
847
848/*
849 * Prologue descriptors.
850 */
851
852static inline void
853desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr)
854{
855 if (abi == 3 && context == 'i') {
856 sr->flags |= UNW_FLAG_INTERRUPT_FRAME;
857 UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __FUNCTION__);
858 }
859 else
860 UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n",
861 __FUNCTION__, abi, context);
862}
863
864static inline void
865desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record *sr)
866{
867 int i;
868
869 for (i = 0; i < 5; ++i) {
870 if (brmask & 1)
871 set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR,
872 sr->region_start + sr->region_len - 1, gr++);
873 brmask >>= 1;
874 }
875}
876
877static inline void
878desc_br_mem (unsigned char brmask, struct unw_state_record *sr)
879{
880 int i;
881
882 for (i = 0; i < 5; ++i) {
883 if (brmask & 1) {
884 set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME,
885 sr->region_start + sr->region_len - 1, 0);
886 sr->any_spills = 1;
887 }
888 brmask >>= 1;
889 }
890}
891
892static inline void
893desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record *sr)
894{
895 int i;
896
897 for (i = 0; i < 4; ++i) {
898 if ((grmask & 1) != 0) {
899 set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME,
900 sr->region_start + sr->region_len - 1, 0);
901 sr->any_spills = 1;
902 }
903 grmask >>= 1;
904 }
905 for (i = 0; i < 20; ++i) {
906 if ((frmask & 1) != 0) {
907 int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4;
908 set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME,
909 sr->region_start + sr->region_len - 1, 0);
910 sr->any_spills = 1;
911 }
912 frmask >>= 1;
913 }
914}
915
916static inline void
917desc_fr_mem (unsigned char frmask, struct unw_state_record *sr)
918{
919 int i;
920
921 for (i = 0; i < 4; ++i) {
922 if ((frmask & 1) != 0) {
923 set_reg(sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME,
924 sr->region_start + sr->region_len - 1, 0);
925 sr->any_spills = 1;
926 }
927 frmask >>= 1;
928 }
929}
930
931static inline void
932desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record *sr)
933{
934 int i;
935
936 for (i = 0; i < 4; ++i) {
937 if ((grmask & 1) != 0)
938 set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR,
939 sr->region_start + sr->region_len - 1, gr++);
940 grmask >>= 1;
941 }
942}
943
944static inline void
945desc_gr_mem (unsigned char grmask, struct unw_state_record *sr)
946{
947 int i;
948
949 for (i = 0; i < 4; ++i) {
950 if ((grmask & 1) != 0) {
951 set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME,
952 sr->region_start + sr->region_len - 1, 0);
953 sr->any_spills = 1;
954 }
955 grmask >>= 1;
956 }
957}
958
959static inline void
960desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr)
961{
962 set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE,
963 sr->region_start + min_t(int, t, sr->region_len - 1), 16*size);
964}
965
966static inline void
967desc_mem_stack_v (unw_word t, struct unw_state_record *sr)
968{
969 sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, sr->region_len - 1);
970}
971
972static inline void
973desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr)
974{
975 set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + sr->region_len - 1, dst);
976}
977
978static inline void
979desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record *sr)
980{
981 set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + sr->region_len - 1,
982 0x10 - 4*pspoff);
983}
984
985static inline void
986desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr)
987{
988 set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + sr->region_len - 1,
989 4*spoff);
990}
991
992static inline void
993desc_rp_br (unsigned char dst, struct unw_state_record *sr)
994{
995 sr->return_link_reg = dst;
996}
997
998static inline void
999desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr)
1000{
1001 struct unw_reg_info *reg = sr->curr.reg + regnum;
1002
1003 if (reg->where == UNW_WHERE_NONE)
1004 reg->where = UNW_WHERE_GR_SAVE;
1005 reg->when = sr->region_start + min_t(int, t, sr->region_len - 1);
1006}
1007
1008static inline void
1009desc_spill_base (unw_word pspoff, struct unw_state_record *sr)
1010{
1011 sr->spill_offset = 0x10 - 4*pspoff;
1012}
1013
1014static inline unsigned char *
1015desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr)
1016{
1017 sr->imask = imaskp;
1018 return imaskp + (2*sr->region_len + 7)/8;
1019}
1020
1021/*
1022 * Body descriptors.
1023 */
1024static inline void
1025desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr)
1026{
1027 sr->epilogue_start = sr->region_start + sr->region_len - 1 - t;
1028 sr->epilogue_count = ecount + 1;
1029}
1030
1031static inline void
1032desc_copy_state (unw_word label, struct unw_state_record *sr)
1033{
1034 struct unw_labeled_state *ls;
1035
1036 for (ls = sr->labeled_states; ls; ls = ls->next) {
1037 if (ls->label == label) {
1038 free_state_stack(&sr->curr);
1039 memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr));
1040 sr->curr.next = dup_state_stack(ls->saved_state.next);
1041 return;
1042 }
1043 }
1044 printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label);
1045}
1046
1047static inline void
1048desc_label_state (unw_word label, struct unw_state_record *sr)
1049{
1050 struct unw_labeled_state *ls;
1051
1052 ls = alloc_labeled_state();
1053 if (!ls) {
1054 printk(KERN_ERR "unwind.desc_label_state(): out of memory\n");
1055 return;
1056 }
1057 ls->label = label;
1058 memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state));
1059 ls->saved_state.next = dup_state_stack(sr->curr.next);
1060
1061 /* insert into list of labeled states: */
1062 ls->next = sr->labeled_states;
1063 sr->labeled_states = ls;
1064}
1065
1066/*
1067 * General descriptors.
1068 */
1069
1070static inline int
1071desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr)
1072{
1073 if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len - 1))
1074 return 0;
1075 if (qp > 0) {
1076 if ((sr->pr_val & (1UL << qp)) == 0)
1077 return 0;
1078 sr->pr_mask |= (1UL << qp);
1079 }
1080 return 1;
1081}
1082
1083static inline void
1084desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct unw_state_record *sr)
1085{
1086 struct unw_reg_info *r;
1087
1088 if (!desc_is_active(qp, t, sr))
1089 return;
1090
1091 r = sr->curr.reg + decode_abreg(abreg, 0);
1092 r->where = UNW_WHERE_NONE;
1093 r->when = UNW_WHEN_NEVER;
1094 r->val = 0;
1095}
1096
1097static inline void
1098desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned char x,
1099 unsigned char ytreg, struct unw_state_record *sr)
1100{
1101 enum unw_where where = UNW_WHERE_GR;
1102 struct unw_reg_info *r;
1103
1104 if (!desc_is_active(qp, t, sr))
1105 return;
1106
1107 if (x)
1108 where = UNW_WHERE_BR;
1109 else if (ytreg & 0x80)
1110 where = UNW_WHERE_FR;
1111
1112 r = sr->curr.reg + decode_abreg(abreg, 0);
1113 r->where = where;
1114 r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
1115 r->val = (ytreg & 0x7f);
1116}
1117
1118static inline void
1119desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word pspoff,
1120 struct unw_state_record *sr)
1121{
1122 struct unw_reg_info *r;
1123
1124 if (!desc_is_active(qp, t, sr))
1125 return;
1126
1127 r = sr->curr.reg + decode_abreg(abreg, 1);
1128 r->where = UNW_WHERE_PSPREL;
1129 r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
1130 r->val = 0x10 - 4*pspoff;
1131}
1132
1133static inline void
1134desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word spoff,
1135 struct unw_state_record *sr)
1136{
1137 struct unw_reg_info *r;
1138
1139 if (!desc_is_active(qp, t, sr))
1140 return;
1141
1142 r = sr->curr.reg + decode_abreg(abreg, 1);
1143 r->where = UNW_WHERE_SPREL;
1144 r->when = sr->region_start + min_t(int, t, sr->region_len - 1);
1145 r->val = 4*spoff;
1146}
1147
1148#define UNW_DEC_BAD_CODE(code) printk(KERN_ERR "unwind: unknown code 0x%02x\n", \
1149 code);
1150
1151/*
1152 * region headers:
1153 */
1154#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg)
1155#define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg)
1156/*
1157 * prologue descriptors:
1158 */
1159#define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg)
1160#define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg)
1161#define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg)
1162#define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg)
1163#define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg)
1164#define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg)
1165#define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg)
1166#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg)
1167#define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg)
1168#define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg)
1169#define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg)
1170#define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg)
1171#define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg)
1172#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg)
1173#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg)
1174#define UNW_DEC_PRIUNAT_GR(fmt,r,arg) desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg)
1175#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg) desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg)
1176#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg) desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg)
1177#define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg)
1178#define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg)
1179#define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg))
1180/*
1181 * body descriptors:
1182 */
1183#define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg)
1184#define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg)
1185#define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg)
1186/*
1187 * general unwind descriptors:
1188 */
1189#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg)
1190#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg)
1191#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg)
1192#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg)
1193#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg)
1194#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg)
1195#define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg)
1196#define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg)
1197
1198#include "unwind_decoder.c"
1199
1200
1201/* Unwind scripts. */
1202
1203static inline unw_hash_index_t
1204hash (unsigned long ip)
1205{
1206# define hashmagic 0x9e3779b97f4a7c16UL /* based on (sqrt(5)/2-1)*2^64 */
1207
1208 return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE);
1209#undef hashmagic
1210}
1211
1212static inline long
1213cache_match (struct unw_script *script, unsigned long ip, unsigned long pr)
1214{
1215 read_lock(&script->lock);
1216 if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0)
1217 /* keep the read lock... */
1218 return 1;
1219 read_unlock(&script->lock);
1220 return 0;
1221}
1222
1223static inline struct unw_script *
1224script_lookup (struct unw_frame_info *info)
1225{
1226 struct unw_script *script = unw.cache + info->hint;
1227 unsigned short index;
1228 unsigned long ip, pr;
1229
1230 if (UNW_DEBUG_ON(0))
1231 return NULL; /* Always regenerate scripts in debug mode */
1232
1233 STAT(++unw.stat.cache.lookups);
1234
1235 ip = info->ip;
1236 pr = info->pr;
1237
1238 if (cache_match(script, ip, pr)) {
1239 STAT(++unw.stat.cache.hinted_hits);
1240 return script;
1241 }
1242
1243 index = unw.hash[hash(ip)];
1244 if (index >= UNW_CACHE_SIZE)
1245 return NULL;
1246
1247 script = unw.cache + index;
1248 while (1) {
1249 if (cache_match(script, ip, pr)) {
1250 /* update hint; no locking required as single-word writes are atomic */
1251 STAT(++unw.stat.cache.normal_hits);
1252 unw.cache[info->prev_script].hint = script - unw.cache;
1253 return script;
1254 }
1255 if (script->coll_chain >= UNW_HASH_SIZE)
1256 return NULL;
1257 script = unw.cache + script->coll_chain;
1258 STAT(++unw.stat.cache.collision_chain_traversals);
1259 }
1260}
1261
1262/*
1263 * On returning, a write lock for the SCRIPT is still being held.
1264 */
1265static inline struct unw_script *
1266script_new (unsigned long ip)
1267{
1268 struct unw_script *script, *prev, *tmp;
1269 unw_hash_index_t index;
1270 unsigned short head;
1271
1272 STAT(++unw.stat.script.news);
1273
1274 /*
1275 * Can't (easily) use cmpxchg() here because of ABA problem
1276 * that is intrinsic in cmpxchg()...
1277 */
1278 head = unw.lru_head;
1279 script = unw.cache + head;
1280 unw.lru_head = script->lru_chain;
1281
1282 /*
1283 * We'd deadlock here if we interrupted a thread that is holding a read lock on
1284 * script->lock. Thus, if the write_trylock() fails, we simply bail out. The
1285 * alternative would be to disable interrupts whenever we hold a read-lock, but
1286 * that seems silly.
1287 */
1288 if (!write_trylock(&script->lock))
1289 return NULL;
1290
1291 /* re-insert script at the tail of the LRU chain: */
1292 unw.cache[unw.lru_tail].lru_chain = head;
1293 unw.lru_tail = head;
1294
1295 /* remove the old script from the hash table (if it's there): */
1296 if (script->ip) {
1297 index = hash(script->ip);
1298 tmp = unw.cache + unw.hash[index];
1299 prev = NULL;
1300 while (1) {
1301 if (tmp == script) {
1302 if (prev)
1303 prev->coll_chain = tmp->coll_chain;
1304 else
1305 unw.hash[index] = tmp->coll_chain;
1306 break;
1307 } else
1308 prev = tmp;
1309 if (tmp->coll_chain >= UNW_CACHE_SIZE)
1310 /* old script wasn't in the hash-table */
1311 break;
1312 tmp = unw.cache + tmp->coll_chain;
1313 }
1314 }
1315
1316 /* enter new script in the hash table */
1317 index = hash(ip);
1318 script->coll_chain = unw.hash[index];
1319 unw.hash[index] = script - unw.cache;
1320
1321 script->ip = ip; /* set new IP while we're holding the locks */
1322
1323 STAT(if (script->coll_chain < UNW_CACHE_SIZE) ++unw.stat.script.collisions);
1324
1325 script->flags = 0;
1326 script->hint = 0;
1327 script->count = 0;
1328 return script;
1329}
1330
1331static void
1332script_finalize (struct unw_script *script, struct unw_state_record *sr)
1333{
1334 script->pr_mask = sr->pr_mask;
1335 script->pr_val = sr->pr_val;
1336 /*
1337 * We could down-grade our write-lock on script->lock here but
1338 * the rwlock API doesn't offer atomic lock downgrading, so
1339 * we'll just keep the write-lock and release it later when
1340 * we're done using the script.
1341 */
1342}
1343
1344static inline void
1345script_emit (struct unw_script *script, struct unw_insn insn)
1346{
1347 if (script->count >= UNW_MAX_SCRIPT_LEN) {
1348 UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n",
1349 __FUNCTION__, UNW_MAX_SCRIPT_LEN);
1350 return;
1351 }
1352 script->insn[script->count++] = insn;
1353}
1354
1355static inline void
1356emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script)
1357{
1358 struct unw_reg_info *r = sr->curr.reg + i;
1359 enum unw_insn_opcode opc;
1360 struct unw_insn insn;
1361 unsigned long val = 0;
1362
1363 switch (r->where) {
1364 case UNW_WHERE_GR:
1365 if (r->val >= 32) {
1366 /* register got spilled to a stacked register */
1367 opc = UNW_INSN_SETNAT_TYPE;
1368 val = UNW_NAT_REGSTK;
1369 } else
1370 /* register got spilled to a scratch register */
1371 opc = UNW_INSN_SETNAT_MEMSTK;
1372 break;
1373
1374 case UNW_WHERE_FR:
1375 opc = UNW_INSN_SETNAT_TYPE;
1376 val = UNW_NAT_VAL;
1377 break;
1378
1379 case UNW_WHERE_BR:
1380 opc = UNW_INSN_SETNAT_TYPE;
1381 val = UNW_NAT_NONE;
1382 break;
1383
1384 case UNW_WHERE_PSPREL:
1385 case UNW_WHERE_SPREL:
1386 opc = UNW_INSN_SETNAT_MEMSTK;
1387 break;
1388
1389 default:
1390 UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n",
1391 __FUNCTION__, r->where);
1392 return;
1393 }
1394 insn.opc = opc;
1395 insn.dst = unw.preg_index[i];
1396 insn.val = val;
1397 script_emit(script, insn);
1398}
1399
1400static void
1401compile_reg (struct unw_state_record *sr, int i, struct unw_script *script)
1402{
1403 struct unw_reg_info *r = sr->curr.reg + i;
1404 enum unw_insn_opcode opc;
1405 unsigned long val, rval;
1406 struct unw_insn insn;
1407 long need_nat_info;
1408
1409 if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target)
1410 return;
1411
1412 opc = UNW_INSN_MOVE;
1413 val = rval = r->val;
1414 need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7);
1415
1416 switch (r->where) {
1417 case UNW_WHERE_GR:
1418 if (rval >= 32) {
1419 opc = UNW_INSN_MOVE_STACKED;
1420 val = rval - 32;
1421 } else if (rval >= 4 && rval <= 7) {
1422 if (need_nat_info) {
1423 opc = UNW_INSN_MOVE2;
1424 need_nat_info = 0;
1425 }
1426 val = unw.preg_index[UNW_REG_R4 + (rval - 4)];
1427 } else if (rval == 0) {
1428 opc = UNW_INSN_MOVE_CONST;
1429 val = 0;
1430 } else {
1431 /* register got spilled to a scratch register */
1432 opc = UNW_INSN_MOVE_SCRATCH;
1433 val = pt_regs_off(rval);
1434 }
1435 break;
1436
1437 case UNW_WHERE_FR:
1438 if (rval <= 5)
1439 val = unw.preg_index[UNW_REG_F2 + (rval - 2)];
1440 else if (rval >= 16 && rval <= 31)
1441 val = unw.preg_index[UNW_REG_F16 + (rval - 16)];
1442 else {
1443 opc = UNW_INSN_MOVE_SCRATCH;
1444 if (rval <= 11)
1445 val = offsetof(struct pt_regs, f6) + 16*(rval - 6);
1446 else
1447 UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n",
1448 __FUNCTION__, rval);
1449 }
1450 break;
1451
1452 case UNW_WHERE_BR:
1453 if (rval >= 1 && rval <= 5)
1454 val = unw.preg_index[UNW_REG_B1 + (rval - 1)];
1455 else {
1456 opc = UNW_INSN_MOVE_SCRATCH;
1457 if (rval == 0)
1458 val = offsetof(struct pt_regs, b0);
1459 else if (rval == 6)
1460 val = offsetof(struct pt_regs, b6);
1461 else
1462 val = offsetof(struct pt_regs, b7);
1463 }
1464 break;
1465
1466 case UNW_WHERE_SPREL:
1467 opc = UNW_INSN_ADD_SP;
1468 break;
1469
1470 case UNW_WHERE_PSPREL:
1471 opc = UNW_INSN_ADD_PSP;
1472 break;
1473
1474 default:
1475 UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n",
1476 __FUNCTION__, i, r->where);
1477 break;
1478 }
1479 insn.opc = opc;
1480 insn.dst = unw.preg_index[i];
1481 insn.val = val;
1482 script_emit(script, insn);
1483 if (need_nat_info)
1484 emit_nat_info(sr, i, script);
1485
1486 if (i == UNW_REG_PSP) {
1487 /*
1488 * info->psp must contain the _value_ of the previous
1489 * sp, not it's save location. We get this by
1490 * dereferencing the value we just stored in
1491 * info->psp:
1492 */
1493 insn.opc = UNW_INSN_LOAD;
1494 insn.dst = insn.val = unw.preg_index[UNW_REG_PSP];
1495 script_emit(script, insn);
1496 }
1497}
1498
1499static inline const struct unw_table_entry *
1500lookup (struct unw_table *table, unsigned long rel_ip)
1501{
1502 const struct unw_table_entry *e = NULL;
1503 unsigned long lo, hi, mid;
1504
1505 /* do a binary search for right entry: */
1506 for (lo = 0, hi = table->length; lo < hi; ) {
1507 mid = (lo + hi) / 2;
1508 e = &table->array[mid];
1509 if (rel_ip < e->start_offset)
1510 hi = mid;
1511 else if (rel_ip >= e->end_offset)
1512 lo = mid + 1;
1513 else
1514 break;
1515 }
1516 if (rel_ip < e->start_offset || rel_ip >= e->end_offset)
1517 return NULL;
1518 return e;
1519}
1520
1521/*
1522 * Build an unwind script that unwinds from state OLD_STATE to the
1523 * entrypoint of the function that called OLD_STATE.
1524 */
1525static inline struct unw_script *
1526build_script (struct unw_frame_info *info)
1527{
1528 const struct unw_table_entry *e = NULL;
1529 struct unw_script *script = NULL;
1530 struct unw_labeled_state *ls, *next;
1531 unsigned long ip = info->ip;
1532 struct unw_state_record sr;
1533 struct unw_table *table;
1534 struct unw_reg_info *r;
1535 struct unw_insn insn;
1536 u8 *dp, *desc_end;
1537 u64 hdr;
1538 int i;
1539 STAT(unsigned long start, parse_start;)
1540
1541 STAT(++unw.stat.script.builds; start = ia64_get_itc());
1542
1543 /* build state record */
1544 memset(&sr, 0, sizeof(sr));
1545 for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
1546 r->when = UNW_WHEN_NEVER;
1547 sr.pr_val = info->pr;
1548
1549 UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip);
1550 script = script_new(ip);
1551 if (!script) {
1552 UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __FUNCTION__);
1553 STAT(unw.stat.script.build_time += ia64_get_itc() - start);
1554 return NULL;
1555 }
1556 unw.cache[info->prev_script].hint = script - unw.cache;
1557
1558 /* search the kernels and the modules' unwind tables for IP: */
1559
1560 STAT(parse_start = ia64_get_itc());
1561
1562 for (table = unw.tables; table; table = table->next) {
1563 if (ip >= table->start && ip < table->end) {
1564 e = lookup(table, ip - table->segment_base);
1565 break;
1566 }
1567 }
1568 if (!e) {
1569 /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */
1570 UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n",
1571 __FUNCTION__, ip, unw.cache[info->prev_script].ip);
1572 sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
1573 sr.curr.reg[UNW_REG_RP].when = -1;
1574 sr.curr.reg[UNW_REG_RP].val = 0;
1575 compile_reg(&sr, UNW_REG_RP, script);
1576 script_finalize(script, &sr);
1577 STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
1578 STAT(unw.stat.script.build_time += ia64_get_itc() - start);
1579 return script;
1580 }
1581
1582 sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + e->start_offset))/16
1583 + (ip & 0xfUL));
1584 hdr = *(u64 *) (table->segment_base + e->info_offset);
1585 dp = (u8 *) (table->segment_base + e->info_offset + 8);
1586 desc_end = dp + 8*UNW_LENGTH(hdr);
1587
1588 while (!sr.done && dp < desc_end)
1589 dp = unw_decode(dp, sr.in_body, &sr);
1590
1591 if (sr.when_target > sr.epilogue_start) {
1592 /*
1593 * sp has been restored and all values on the memory stack below
1594 * psp also have been restored.
1595 */
1596 sr.curr.reg[UNW_REG_PSP].val = 0;
1597 sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE;
1598 sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER;
1599 for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r)
1600 if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10)
1601 || r->where == UNW_WHERE_SPREL)
1602 {
1603 r->val = 0;
1604 r->where = UNW_WHERE_NONE;
1605 r->when = UNW_WHEN_NEVER;
1606 }
1607 }
1608
1609 script->flags = sr.flags;
1610
1611 /*
1612 * If RP did't get saved, generate entry for the return link
1613 * register.
1614 */
1615 if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) {
1616 sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR;
1617 sr.curr.reg[UNW_REG_RP].when = -1;
1618 sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg;
1619 UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n",
1620 __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where,
1621 sr.curr.reg[UNW_REG_RP].val);
1622 }
1623
1624#ifdef UNW_DEBUG
1625 UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n",
1626 __FUNCTION__, table->segment_base + e->start_offset, sr.when_target);
1627 for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) {
1628 if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) {
1629 UNW_DPRINT(1, " %s <- ", unw.preg_name[r - sr.curr.reg]);
1630 switch (r->where) {
1631 case UNW_WHERE_GR: UNW_DPRINT(1, "r%lu", r->val); break;
1632 case UNW_WHERE_FR: UNW_DPRINT(1, "f%lu", r->val); break;
1633 case UNW_WHERE_BR: UNW_DPRINT(1, "b%lu", r->val); break;
1634 case UNW_WHERE_SPREL: UNW_DPRINT(1, "[sp+0x%lx]", r->val); break;
1635 case UNW_WHERE_PSPREL: UNW_DPRINT(1, "[psp+0x%lx]", r->val); break;
1636 case UNW_WHERE_NONE:
1637 UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - sr.curr.reg], r->val);
1638 break;
1639
1640 default:
1641 UNW_DPRINT(1, "BADWHERE(%d)", r->where);
1642 break;
1643 }
1644 UNW_DPRINT(1, "\t\t%d\n", r->when);
1645 }
1646 }
1647#endif
1648
1649 STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
1650
1651 /* translate state record into unwinder instructions: */
1652
1653 /*
1654 * First, set psp if we're dealing with a fixed-size frame;
1655 * subsequent instructions may depend on this value.
1656 */
1657 if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when
1658 && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE)
1659 && sr.curr.reg[UNW_REG_PSP].val != 0) {
1660 /* new psp is sp plus frame size */
1661 insn.opc = UNW_INSN_ADD;
1662 insn.dst = offsetof(struct unw_frame_info, psp)/8;
1663 insn.val = sr.curr.reg[UNW_REG_PSP].val; /* frame size */
1664 script_emit(script, insn);
1665 }
1666
1667 /* determine where the primary UNaT is: */
1668 if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
1669 i = UNW_REG_PRI_UNAT_MEM;
1670 else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when)
1671 i = UNW_REG_PRI_UNAT_GR;
1672 else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > sr.curr.reg[UNW_REG_PRI_UNAT_GR].when)
1673 i = UNW_REG_PRI_UNAT_MEM;
1674 else
1675 i = UNW_REG_PRI_UNAT_GR;
1676
1677 compile_reg(&sr, i, script);
1678
1679 for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i)
1680 compile_reg(&sr, i, script);
1681
1682 /* free labeled register states & stack: */
1683
1684 STAT(parse_start = ia64_get_itc());
1685 for (ls = sr.labeled_states; ls; ls = next) {
1686 next = ls->next;
1687 free_state_stack(&ls->saved_state);
1688 free_labeled_state(ls);
1689 }
1690 free_state_stack(&sr.curr);
1691 STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start);
1692
1693 script_finalize(script, &sr);
1694 STAT(unw.stat.script.build_time += ia64_get_itc() - start);
1695 return script;
1696}
1697
1698/*
1699 * Apply the unwinding actions represented by OPS and update SR to
1700 * reflect the state that existed upon entry to the function that this
1701 * unwinder represents.
1702 */
1703static inline void
1704run_script (struct unw_script *script, struct unw_frame_info *state)
1705{
1706 struct unw_insn *ip, *limit, next_insn;
1707 unsigned long opc, dst, val, off;
1708 unsigned long *s = (unsigned long *) state;
1709 STAT(unsigned long start;)
1710
1711 STAT(++unw.stat.script.runs; start = ia64_get_itc());
1712 state->flags = script->flags;
1713 ip = script->insn;
1714 limit = script->insn + script->count;
1715 next_insn = *ip;
1716
1717 while (ip++ < limit) {
1718 opc = next_insn.opc;
1719 dst = next_insn.dst;
1720 val = next_insn.val;
1721 next_insn = *ip;
1722
1723 redo:
1724 switch (opc) {
1725 case UNW_INSN_ADD:
1726 s[dst] += val;
1727 break;
1728
1729 case UNW_INSN_MOVE2:
1730 if (!s[val])
1731 goto lazy_init;
1732 s[dst+1] = s[val+1];
1733 s[dst] = s[val];
1734 break;
1735
1736 case UNW_INSN_MOVE:
1737 if (!s[val])
1738 goto lazy_init;
1739 s[dst] = s[val];
1740 break;
1741
1742 case UNW_INSN_MOVE_SCRATCH:
1743 if (state->pt) {
1744 s[dst] = (unsigned long) get_scratch_regs(state) + val;
1745 } else {
1746 s[dst] = 0;
1747 UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n",
1748 __FUNCTION__, dst, val);
1749 }
1750 break;
1751
1752 case UNW_INSN_MOVE_CONST:
1753 if (val == 0)
1754 s[dst] = (unsigned long) &unw.r0;
1755 else {
1756 s[dst] = 0;
1757 UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n",
1758 __FUNCTION__, val);
1759 }
1760 break;
1761
1762
1763 case UNW_INSN_MOVE_STACKED:
1764 s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned long *)state->bsp,
1765 val);
1766 break;
1767
1768 case UNW_INSN_ADD_PSP:
1769 s[dst] = state->psp + val;
1770 break;
1771
1772 case UNW_INSN_ADD_SP:
1773 s[dst] = state->sp + val;
1774 break;
1775
1776 case UNW_INSN_SETNAT_MEMSTK:
1777 if (!state->pri_unat_loc)
1778 state->pri_unat_loc = &state->sw->ar_unat;
1779 /* register off. is a multiple of 8, so the least 3 bits (type) are 0 */
1780 s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK;
1781 break;
1782
1783 case UNW_INSN_SETNAT_TYPE:
1784 s[dst+1] = val;
1785 break;
1786
1787 case UNW_INSN_LOAD:
1788#ifdef UNW_DEBUG
1789 if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) != 0
1790 || s[val] < TASK_SIZE)
1791 {
1792 UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n",
1793 __FUNCTION__, s[val]);
1794 break;
1795 }
1796#endif
1797 s[dst] = *(unsigned long *) s[val];
1798 break;
1799 }
1800 }
1801 STAT(unw.stat.script.run_time += ia64_get_itc() - start);
1802 return;
1803
1804 lazy_init:
1805 off = unw.sw_off[val];
1806 s[val] = (unsigned long) state->sw + off;
1807 if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct switch_stack, r7))
1808 /*
1809 * We're initializing a general register: init NaT info, too. Note that
1810 * the offset is a multiple of 8 which gives us the 3 bits needed for
1811 * the type field.
1812 */
1813 s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | UNW_NAT_MEMSTK;
1814 goto redo;
1815}
1816
1817static int
1818find_save_locs (struct unw_frame_info *info)
1819{
1820 int have_write_lock = 0;
1821 struct unw_script *scr;
1822 unsigned long flags = 0;
1823
1824 if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) {
1825 /* don't let obviously bad addresses pollute the cache */
1826 /* FIXME: should really be level 0 but it occurs too often. KAO */
1827 UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __FUNCTION__, info->ip);
1828 info->rp_loc = NULL;
1829 return -1;
1830 }
1831
1832 scr = script_lookup(info);
1833 if (!scr) {
1834 spin_lock_irqsave(&unw.lock, flags);
1835 scr = build_script(info);
1836 if (!scr) {
1837 spin_unlock_irqrestore(&unw.lock, flags);
1838 UNW_DPRINT(0,
1839 "unwind.%s: failed to locate/build unwind script for ip %lx\n",
1840 __FUNCTION__, info->ip);
1841 return -1;
1842 }
1843 have_write_lock = 1;
1844 }
1845 info->hint = scr->hint;
1846 info->prev_script = scr - unw.cache;
1847
1848 run_script(scr, info);
1849
1850 if (have_write_lock) {
1851 write_unlock(&scr->lock);
1852 spin_unlock_irqrestore(&unw.lock, flags);
1853 } else
1854 read_unlock(&scr->lock);
1855 return 0;
1856}
1857
1858int
1859unw_unwind (struct unw_frame_info *info)
1860{
1861 unsigned long prev_ip, prev_sp, prev_bsp;
1862 unsigned long ip, pr, num_regs;
1863 STAT(unsigned long start, flags;)
1864 int retval;
1865
1866 STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = ia64_get_itc());
1867
1868 prev_ip = info->ip;
1869 prev_sp = info->sp;
1870 prev_bsp = info->bsp;
1871
1872 /* restore the ip */
1873 if (!info->rp_loc) {
1874 /* FIXME: should really be level 0 but it occurs too often. KAO */
1875 UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n",
1876 __FUNCTION__, info->ip);
1877 STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
1878 return -1;
1879 }
1880 ip = info->ip = *info->rp_loc;
1881 if (ip < GATE_ADDR) {
1882 UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __FUNCTION__, ip);
1883 STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
1884 return -1;
1885 }
1886
1887 /* restore the cfm: */
1888 if (!info->pfs_loc) {
1889 UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __FUNCTION__);
1890 STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
1891 return -1;
1892 }
1893 info->cfm_loc = info->pfs_loc;
1894
1895 /* restore the bsp: */
1896 pr = info->pr;
1897 num_regs = 0;
1898 if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) {
1899 info->pt = info->sp + 16;
1900 if ((pr & (1UL << PRED_NON_SYSCALL)) != 0)
1901 num_regs = *info->cfm_loc & 0x7f; /* size of frame */
1902 info->pfs_loc =
1903 (unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs));
1904 UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __FUNCTION__, info->pt);
1905 } else
1906 num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of locals */
1907 info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs);
1908 if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) {
1909 UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n",
1910 __FUNCTION__, info->bsp, info->regstk.limit, info->regstk.top);
1911 STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
1912 return -1;
1913 }
1914
1915 /* restore the sp: */
1916 info->sp = info->psp;
1917 if (info->sp < info->memstk.top || info->sp > info->memstk.limit) {
1918 UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n",
1919 __FUNCTION__, info->sp, info->memstk.top, info->memstk.limit);
1920 STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
1921 return -1;
1922 }
1923
1924 if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) {
1925 UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n",
1926 __FUNCTION__, ip);
1927 STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
1928 return -1;
1929 }
1930
1931 /* as we unwind, the saved ar.unat becomes the primary unat: */
1932 info->pri_unat_loc = info->unat_loc;
1933
1934 /* finally, restore the predicates: */
1935 unw_get_pr(info, &info->pr);
1936
1937 retval = find_save_locs(info);
1938 STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags));
1939 return retval;
1940}
1941EXPORT_SYMBOL(unw_unwind);
1942
1943int
1944unw_unwind_to_user (struct unw_frame_info *info)
1945{
1946 unsigned long ip, sp;
1947
1948 while (unw_unwind(info) >= 0) {
1949 if (unw_get_rp(info, &ip) < 0) {
1950 unw_get_ip(info, &ip);
1951 UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n",
1952 __FUNCTION__, ip);
1953 return -1;
1954 }
1955 unw_get_sp(info, &sp);
1956 if (sp >= (unsigned long)info->task + IA64_STK_OFFSET)
1957 break;
1958 if (ip < FIXADDR_USER_END)
1959 return 0;
1960 }
1961 unw_get_ip(info, &ip);
1962 UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip);
1963 return -1;
1964}
1965EXPORT_SYMBOL(unw_unwind_to_user);
1966
1967static void
1968init_frame_info (struct unw_frame_info *info, struct task_struct *t,
1969 struct switch_stack *sw, unsigned long stktop)
1970{
1971 unsigned long rbslimit, rbstop, stklimit;
1972 STAT(unsigned long start, flags;)
1973
1974 STAT(local_irq_save(flags); ++unw.stat.api.inits; start = ia64_get_itc());
1975
1976 /*
1977 * Subtle stuff here: we _could_ unwind through the switch_stack frame but we
1978 * don't want to do that because it would be slow as each preserved register would
1979 * have to be processed. Instead, what we do here is zero out the frame info and
1980 * start the unwind process at the function that created the switch_stack frame.
1981 * When a preserved value in switch_stack needs to be accessed, run_script() will
1982 * initialize the appropriate pointer on demand.
1983 */
1984 memset(info, 0, sizeof(*info));
1985
1986 rbslimit = (unsigned long) t + IA64_RBS_OFFSET;
1987 rbstop = sw->ar_bspstore;
1988 if (rbstop - (unsigned long) t >= IA64_STK_OFFSET)
1989 rbstop = rbslimit;
1990
1991 stklimit = (unsigned long) t + IA64_STK_OFFSET;
1992 if (stktop <= rbstop)
1993 stktop = rbstop;
1994
1995 info->regstk.limit = rbslimit;
1996 info->regstk.top = rbstop;
1997 info->memstk.limit = stklimit;
1998 info->memstk.top = stktop;
1999 info->task = t;
2000 info->sw = sw;
2001 info->sp = info->psp = stktop;
2002 info->pr = sw->pr;
2003 UNW_DPRINT(3, "unwind.%s:\n"
2004 " task 0x%lx\n"
2005 " rbs = [0x%lx-0x%lx)\n"
2006 " stk = [0x%lx-0x%lx)\n"
2007 " pr 0x%lx\n"
2008 " sw 0x%lx\n"
2009 " sp 0x%lx\n",
2010 __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit,
2011 info->pr, (unsigned long) info->sw, info->sp);
2012 STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags));
2013}
2014
2015void
2016unw_init_from_interruption (struct unw_frame_info *info, struct task_struct *t,
2017 struct pt_regs *pt, struct switch_stack *sw)
2018{
2019 unsigned long sof;
2020
2021 init_frame_info(info, t, sw, pt->r12);
2022 info->cfm_loc = &pt->cr_ifs;
2023 info->unat_loc = &pt->ar_unat;
2024 info->pfs_loc = &pt->ar_pfs;
2025 sof = *info->cfm_loc & 0x7f;
2026 info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sof);
2027 info->ip = pt->cr_iip + ia64_psr(pt)->ri;
2028 info->pt = (unsigned long) pt;
2029 UNW_DPRINT(3, "unwind.%s:\n"
2030 " bsp 0x%lx\n"
2031 " sof 0x%lx\n"
2032 " ip 0x%lx\n",
2033 __FUNCTION__, info->bsp, sof, info->ip);
2034 find_save_locs(info);
2035}
2036
2037void
2038unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw)
2039{
2040 unsigned long sol;
2041
2042 init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16);
2043 info->cfm_loc = &sw->ar_pfs;
2044 sol = (*info->cfm_loc >> 7) & 0x7f;
2045 info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sol);
2046 info->ip = sw->b0;
2047 UNW_DPRINT(3, "unwind.%s:\n"
2048 " bsp 0x%lx\n"
2049 " sol 0x%lx\n"
2050 " ip 0x%lx\n",
2051 __FUNCTION__, info->bsp, sol, info->ip);
2052 find_save_locs(info);
2053}
2054
2055EXPORT_SYMBOL(unw_init_frame_info);
2056
2057void
2058unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t)
2059{
2060 struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16);
2061
2062 UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__);
2063 unw_init_frame_info(info, t, sw);
2064}
2065EXPORT_SYMBOL(unw_init_from_blocked_task);
2066
2067static void
2068init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base,
2069 unsigned long gp, const void *table_start, const void *table_end)
2070{
2071 const struct unw_table_entry *start = table_start, *end = table_end;
2072
2073 table->name = name;
2074 table->segment_base = segment_base;
2075 table->gp = gp;
2076 table->start = segment_base + start[0].start_offset;
2077 table->end = segment_base + end[-1].end_offset;
2078 table->array = start;
2079 table->length = end - start;
2080}
2081
2082void *
2083unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp,
2084 const void *table_start, const void *table_end)
2085{
2086 const struct unw_table_entry *start = table_start, *end = table_end;
2087 struct unw_table *table;
2088 unsigned long flags;
2089
2090 if (end - start <= 0) {
2091 UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n",
2092 __FUNCTION__);
2093 return NULL;
2094 }
2095
2096 table = kmalloc(sizeof(*table), GFP_USER);
2097 if (!table)
2098 return NULL;
2099
2100 init_unwind_table(table, name, segment_base, gp, table_start, table_end);
2101
2102 spin_lock_irqsave(&unw.lock, flags);
2103 {
2104 /* keep kernel unwind table at the front (it's searched most commonly): */
2105 table->next = unw.tables->next;
2106 unw.tables->next = table;
2107 }
2108 spin_unlock_irqrestore(&unw.lock, flags);
2109
2110 return table;
2111}
2112
2113void
2114unw_remove_unwind_table (void *handle)
2115{
2116 struct unw_table *table, *prev;
2117 struct unw_script *tmp;
2118 unsigned long flags;
2119 long index;
2120
2121 if (!handle) {
2122 UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n",
2123 __FUNCTION__);
2124 return;
2125 }
2126
2127 table = handle;
2128 if (table == &unw.kernel_table) {
2129 UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a "
2130 "no-can-do!\n", __FUNCTION__);
2131 return;
2132 }
2133
2134 spin_lock_irqsave(&unw.lock, flags);
2135 {
2136 /* first, delete the table: */
2137
2138 for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next)
2139 if (prev->next == table)
2140 break;
2141 if (!prev) {
2142 UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n",
2143 __FUNCTION__, (void *) table);
2144 spin_unlock_irqrestore(&unw.lock, flags);
2145 return;
2146 }
2147 prev->next = table->next;
2148 }
2149 spin_unlock_irqrestore(&unw.lock, flags);
2150
2151 /* next, remove hash table entries for this table */
2152
2153 for (index = 0; index <= UNW_HASH_SIZE; ++index) {
2154 tmp = unw.cache + unw.hash[index];
2155 if (unw.hash[index] >= UNW_CACHE_SIZE
2156 || tmp->ip < table->start || tmp->ip >= table->end)
2157 continue;
2158
2159 write_lock(&tmp->lock);
2160 {
2161 if (tmp->ip >= table->start && tmp->ip < table->end) {
2162 unw.hash[index] = tmp->coll_chain;
2163 tmp->ip = 0;
2164 }
2165 }
2166 write_unlock(&tmp->lock);
2167 }
2168
2169 kfree(table);
2170}
2171
2172static int __init
2173create_gate_table (void)
2174{
2175 const struct unw_table_entry *entry, *start, *end;
2176 unsigned long *lp, segbase = GATE_ADDR;
2177 size_t info_size, size;
2178 char *info;
2179 Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
2180 int i;
2181
2182 for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr)
2183 if (phdr->p_type == PT_IA_64_UNWIND) {
2184 punw = phdr;
2185 break;
2186 }
2187
2188 if (!punw) {
2189 printk("%s: failed to find gate DSO's unwind table!\n", __FUNCTION__);
2190 return 0;
2191 }
2192
2193 start = (const struct unw_table_entry *) punw->p_vaddr;
2194 end = (struct unw_table_entry *) ((char *) start + punw->p_memsz);
2195 size = 0;
2196
2197 unw_add_unwind_table("linux-gate.so", segbase, 0, start, end);
2198
2199 for (entry = start; entry < end; ++entry)
2200 size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset));
2201 size += 8; /* reserve space for "end of table" marker */
2202
2203 unw.gate_table = kmalloc(size, GFP_KERNEL);
2204 if (!unw.gate_table) {
2205 unw.gate_table_size = 0;
2206 printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __FUNCTION__);
2207 return 0;
2208 }
2209 unw.gate_table_size = size;
2210
2211 lp = unw.gate_table;
2212 info = (char *) unw.gate_table + size;
2213
2214 for (entry = start; entry < end; ++entry, lp += 3) {
2215 info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset));
2216 info -= info_size;
2217 memcpy(info, (char *) segbase + entry->info_offset, info_size);
2218
2219 lp[0] = segbase + entry->start_offset; /* start */
2220 lp[1] = segbase + entry->end_offset; /* end */
2221 lp[2] = info - (char *) unw.gate_table; /* info */
2222 }
2223 *lp = 0; /* end-of-table marker */
2224 return 0;
2225}
2226
2227__initcall(create_gate_table);
2228
2229void __init
2230unw_init (void)
2231{
2232 extern char __gp[];
2233 extern void unw_hash_index_t_is_too_narrow (void);
2234 long i, off;
2235
2236 if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE)
2237 unw_hash_index_t_is_too_narrow();
2238
2239 unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(AR_UNAT);
2240 unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE);
2241 unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_UNAT);
2242 unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0);
2243 unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(AR_UNAT);
2244 unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR);
2245 unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC);
2246 unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR);
2247 for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8)
2248 unw.sw_off[unw.preg_index[i]] = off;
2249 for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8)
2250 unw.sw_off[unw.preg_index[i]] = off;
2251 for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16)
2252 unw.sw_off[unw.preg_index[i]] = off;
2253 for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16)
2254 unw.sw_off[unw.preg_index[i]] = off;
2255
2256 for (i = 0; i < UNW_CACHE_SIZE; ++i) {
2257 if (i > 0)
2258 unw.cache[i].lru_chain = (i - 1);
2259 unw.cache[i].coll_chain = -1;
2260 rwlock_init(&unw.cache[i].lock);
2261 }
2262 unw.lru_head = UNW_CACHE_SIZE - 1;
2263 unw.lru_tail = 0;
2264
2265 init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) __gp,
2266 __start_unwind, __end_unwind);
2267}
2268
2269/*
2270 * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
2271 *
2272 * This system call has been deprecated. The new and improved way to get
2273 * at the kernel's unwind info is via the gate DSO. The address of the
2274 * ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR.
2275 *
2276 * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED
2277 *
2278 * This system call copies the unwind data into the buffer pointed to by BUF and returns
2279 * the size of the unwind data. If BUF_SIZE is smaller than the size of the unwind data
2280 * or if BUF is NULL, nothing is copied, but the system call still returns the size of the
2281 * unwind data.
2282 *
2283 * The first portion of the unwind data contains an unwind table and rest contains the
2284 * associated unwind info (in no particular order). The unwind table consists of a table
2285 * of entries of the form:
2286 *
2287 * u64 start; (64-bit address of start of function)
2288 * u64 end; (64-bit address of start of function)
2289 * u64 info; (BUF-relative offset to unwind info)
2290 *
2291 * The end of the unwind table is indicated by an entry with a START address of zero.
2292 *
2293 * Please see the IA-64 Software Conventions and Runtime Architecture manual for details
2294 * on the format of the unwind info.
2295 *
2296 * ERRORS
2297 * EFAULT BUF points outside your accessible address space.
2298 */
2299asmlinkage long
2300sys_getunwind (void __user *buf, size_t buf_size)
2301{
2302 if (buf && buf_size >= unw.gate_table_size)
2303 if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0)
2304 return -EFAULT;
2305 return unw.gate_table_size;
2306}
diff --git a/arch/ia64/kernel/unwind_decoder.c b/arch/ia64/kernel/unwind_decoder.c
new file mode 100644
index 000000000000..50ac2d82f9bf
--- /dev/null
+++ b/arch/ia64/kernel/unwind_decoder.c
@@ -0,0 +1,459 @@
1/*
2 * Copyright (C) 2000 Hewlett-Packard Co
3 * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
4 *
5 * Generic IA-64 unwind info decoder.
6 *
7 * This file is used both by the Linux kernel and objdump. Please keep
8 * the two copies of this file in sync.
9 *
10 * You need to customize the decoder by defining the following
11 * macros/constants before including this file:
12 *
13 * Types:
14 * unw_word Unsigned integer type with at least 64 bits
15 *
16 * Register names:
17 * UNW_REG_BSP
18 * UNW_REG_BSPSTORE
19 * UNW_REG_FPSR
20 * UNW_REG_LC
21 * UNW_REG_PFS
22 * UNW_REG_PR
23 * UNW_REG_RNAT
24 * UNW_REG_PSP
25 * UNW_REG_RP
26 * UNW_REG_UNAT
27 *
28 * Decoder action macros:
29 * UNW_DEC_BAD_CODE(code)
30 * UNW_DEC_ABI(fmt,abi,context,arg)
31 * UNW_DEC_BR_GR(fmt,brmask,gr,arg)
32 * UNW_DEC_BR_MEM(fmt,brmask,arg)
33 * UNW_DEC_COPY_STATE(fmt,label,arg)
34 * UNW_DEC_EPILOGUE(fmt,t,ecount,arg)
35 * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg)
36 * UNW_DEC_FR_MEM(fmt,frmask,arg)
37 * UNW_DEC_GR_GR(fmt,grmask,gr,arg)
38 * UNW_DEC_GR_MEM(fmt,grmask,arg)
39 * UNW_DEC_LABEL_STATE(fmt,label,arg)
40 * UNW_DEC_MEM_STACK_F(fmt,t,size,arg)
41 * UNW_DEC_MEM_STACK_V(fmt,t,arg)
42 * UNW_DEC_PRIUNAT_GR(fmt,r,arg)
43 * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg)
44 * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg)
45 * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg)
46 * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg)
47 * UNW_DEC_PROLOGUE(fmt,body,rlen,arg)
48 * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg)
49 * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg)
50 * UNW_DEC_REG_REG(fmt,src,dst,arg)
51 * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg)
52 * UNW_DEC_REG_WHEN(fmt,reg,t,arg)
53 * UNW_DEC_RESTORE(fmt,t,abreg,arg)
54 * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg)
55 * UNW_DEC_SPILL_BASE(fmt,pspoff,arg)
56 * UNW_DEC_SPILL_MASK(fmt,imaskp,arg)
57 * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg)
58 * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg)
59 * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg)
60 * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg)
61 * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg)
62 * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg)
63 */
64
65static unw_word
66unw_decode_uleb128 (unsigned char **dpp)
67{
68 unsigned shift = 0;
69 unw_word byte, result = 0;
70 unsigned char *bp = *dpp;
71
72 while (1)
73 {
74 byte = *bp++;
75 result |= (byte & 0x7f) << shift;
76 if ((byte & 0x80) == 0)
77 break;
78 shift += 7;
79 }
80 *dpp = bp;
81 return result;
82}
83
84static unsigned char *
85unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg)
86{
87 unsigned char byte1, abreg;
88 unw_word t, off;
89
90 byte1 = *dp++;
91 t = unw_decode_uleb128 (&dp);
92 off = unw_decode_uleb128 (&dp);
93 abreg = (byte1 & 0x7f);
94 if (byte1 & 0x80)
95 UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg);
96 else
97 UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg);
98 return dp;
99}
100
101static unsigned char *
102unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg)
103{
104 unsigned char byte1, byte2, abreg, x, ytreg;
105 unw_word t;
106
107 byte1 = *dp++; byte2 = *dp++;
108 t = unw_decode_uleb128 (&dp);
109 abreg = (byte1 & 0x7f);
110 ytreg = byte2;
111 x = (byte1 >> 7) & 1;
112 if ((byte1 & 0x80) == 0 && ytreg == 0)
113 UNW_DEC_RESTORE(X2, t, abreg, arg);
114 else
115 UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg);
116 return dp;
117}
118
119static unsigned char *
120unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg)
121{
122 unsigned char byte1, byte2, abreg, qp;
123 unw_word t, off;
124
125 byte1 = *dp++; byte2 = *dp++;
126 t = unw_decode_uleb128 (&dp);
127 off = unw_decode_uleb128 (&dp);
128
129 qp = (byte1 & 0x3f);
130 abreg = (byte2 & 0x7f);
131
132 if (byte1 & 0x80)
133 UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg);
134 else
135 UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg);
136 return dp;
137}
138
139static unsigned char *
140unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg)
141{
142 unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg;
143 unw_word t;
144
145 byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
146 t = unw_decode_uleb128 (&dp);
147
148 qp = (byte1 & 0x3f);
149 abreg = (byte2 & 0x7f);
150 x = (byte2 >> 7) & 1;
151 ytreg = byte3;
152
153 if ((byte2 & 0x80) == 0 && byte3 == 0)
154 UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg);
155 else
156 UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg);
157 return dp;
158}
159
160static unsigned char *
161unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg)
162{
163 int body = (code & 0x20) != 0;
164 unw_word rlen;
165
166 rlen = (code & 0x1f);
167 UNW_DEC_PROLOGUE(R1, body, rlen, arg);
168 return dp;
169}
170
171static unsigned char *
172unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg)
173{
174 unsigned char byte1, mask, grsave;
175 unw_word rlen;
176
177 byte1 = *dp++;
178
179 mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
180 grsave = (byte1 & 0x7f);
181 rlen = unw_decode_uleb128 (&dp);
182 UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg);
183 return dp;
184}
185
186static unsigned char *
187unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg)
188{
189 unw_word rlen;
190
191 rlen = unw_decode_uleb128 (&dp);
192 UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg);
193 return dp;
194}
195
196static unsigned char *
197unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg)
198{
199 unsigned char brmask = (code & 0x1f);
200
201 UNW_DEC_BR_MEM(P1, brmask, arg);
202 return dp;
203}
204
205static unsigned char *
206unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg)
207{
208 if ((code & 0x10) == 0)
209 {
210 unsigned char byte1 = *dp++;
211
212 UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1),
213 (byte1 & 0x7f), arg);
214 }
215 else if ((code & 0x08) == 0)
216 {
217 unsigned char byte1 = *dp++, r, dst;
218
219 r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1);
220 dst = (byte1 & 0x7f);
221 switch (r)
222 {
223 case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break;
224 case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break;
225 case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break;
226 case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break;
227 case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break;
228 case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break;
229 case 6: UNW_DEC_RP_BR(P3, dst, arg); break;
230 case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break;
231 case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break;
232 case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break;
233 case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break;
234 case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break;
235 default: UNW_DEC_BAD_CODE(r); break;
236 }
237 }
238 else if ((code & 0x7) == 0)
239 UNW_DEC_SPILL_MASK(P4, dp, arg);
240 else if ((code & 0x7) == 1)
241 {
242 unw_word grmask, frmask, byte1, byte2, byte3;
243
244 byte1 = *dp++; byte2 = *dp++; byte3 = *dp++;
245 grmask = ((byte1 >> 4) & 0xf);
246 frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3;
247 UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg);
248 }
249 else
250 UNW_DEC_BAD_CODE(code);
251 return dp;
252}
253
254static unsigned char *
255unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg)
256{
257 int gregs = (code & 0x10) != 0;
258 unsigned char mask = (code & 0x0f);
259
260 if (gregs)
261 UNW_DEC_GR_MEM(P6, mask, arg);
262 else
263 UNW_DEC_FR_MEM(P6, mask, arg);
264 return dp;
265}
266
267static unsigned char *
268unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg)
269{
270 unsigned char r, byte1, byte2;
271 unw_word t, size;
272
273 if ((code & 0x10) == 0)
274 {
275 r = (code & 0xf);
276 t = unw_decode_uleb128 (&dp);
277 switch (r)
278 {
279 case 0:
280 size = unw_decode_uleb128 (&dp);
281 UNW_DEC_MEM_STACK_F(P7, t, size, arg);
282 break;
283
284 case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break;
285 case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break;
286 case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break;
287 case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break;
288 case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break;
289 case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break;
290 case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break;
291 case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break;
292 case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break;
293 case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break;
294 case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break;
295 case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break;
296 case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break;
297 case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break;
298 case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break;
299 default: UNW_DEC_BAD_CODE(r); break;
300 }
301 }
302 else
303 {
304 switch (code & 0xf)
305 {
306 case 0x0: /* p8 */
307 {
308 r = *dp++;
309 t = unw_decode_uleb128 (&dp);
310 switch (r)
311 {
312 case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break;
313 case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break;
314 case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break;
315 case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break;
316 case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break;
317 case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break;
318 case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break;
319 case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break;
320 case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break;
321 case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break;
322 case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
323 case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break;
324 case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break;
325 case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break;
326 case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break;
327 case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break;
328 case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break;
329 case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break;
330 case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break;
331 default: UNW_DEC_BAD_CODE(r); break;
332 }
333 }
334 break;
335
336 case 0x1:
337 byte1 = *dp++; byte2 = *dp++;
338 UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg);
339 break;
340
341 case 0xf: /* p10 */
342 byte1 = *dp++; byte2 = *dp++;
343 UNW_DEC_ABI(P10, byte1, byte2, arg);
344 break;
345
346 case 0x9:
347 return unw_decode_x1 (dp, code, arg);
348
349 case 0xa:
350 return unw_decode_x2 (dp, code, arg);
351
352 case 0xb:
353 return unw_decode_x3 (dp, code, arg);
354
355 case 0xc:
356 return unw_decode_x4 (dp, code, arg);
357
358 default:
359 UNW_DEC_BAD_CODE(code);
360 break;
361 }
362 }
363 return dp;
364}
365
366static unsigned char *
367unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg)
368{
369 unw_word label = (code & 0x1f);
370
371 if ((code & 0x20) != 0)
372 UNW_DEC_COPY_STATE(B1, label, arg);
373 else
374 UNW_DEC_LABEL_STATE(B1, label, arg);
375 return dp;
376}
377
378static unsigned char *
379unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg)
380{
381 unw_word t;
382
383 t = unw_decode_uleb128 (&dp);
384 UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg);
385 return dp;
386}
387
388static unsigned char *
389unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg)
390{
391 unw_word t, ecount, label;
392
393 if ((code & 0x10) == 0)
394 {
395 t = unw_decode_uleb128 (&dp);
396 ecount = unw_decode_uleb128 (&dp);
397 UNW_DEC_EPILOGUE(B3, t, ecount, arg);
398 }
399 else if ((code & 0x07) == 0)
400 {
401 label = unw_decode_uleb128 (&dp);
402 if ((code & 0x08) != 0)
403 UNW_DEC_COPY_STATE(B4, label, arg);
404 else
405 UNW_DEC_LABEL_STATE(B4, label, arg);
406 }
407 else
408 switch (code & 0x7)
409 {
410 case 1: return unw_decode_x1 (dp, code, arg);
411 case 2: return unw_decode_x2 (dp, code, arg);
412 case 3: return unw_decode_x3 (dp, code, arg);
413 case 4: return unw_decode_x4 (dp, code, arg);
414 default: UNW_DEC_BAD_CODE(code); break;
415 }
416 return dp;
417}
418
419typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *);
420
421static unw_decoder unw_decode_table[2][8] =
422{
423 /* prologue table: */
424 {
425 unw_decode_r1, /* 0 */
426 unw_decode_r1,
427 unw_decode_r2,
428 unw_decode_r3,
429 unw_decode_p1, /* 4 */
430 unw_decode_p2_p5,
431 unw_decode_p6,
432 unw_decode_p7_p10
433 },
434 {
435 unw_decode_r1, /* 0 */
436 unw_decode_r1,
437 unw_decode_r2,
438 unw_decode_r3,
439 unw_decode_b1, /* 4 */
440 unw_decode_b1,
441 unw_decode_b2,
442 unw_decode_b3_x4
443 }
444};
445
446/*
447 * Decode one descriptor and return address of next descriptor.
448 */
449static inline unsigned char *
450unw_decode (unsigned char *dp, int inside_body, void *arg)
451{
452 unw_decoder decoder;
453 unsigned char code;
454
455 code = *dp++;
456 decoder = unw_decode_table[inside_body][code >> 5];
457 dp = (*decoder) (dp, code, arg);
458 return dp;
459}
diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h
new file mode 100644
index 000000000000..96693a6ae370
--- /dev/null
+++ b/arch/ia64/kernel/unwind_i.h
@@ -0,0 +1,164 @@
1/*
2 * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co
3 * David Mosberger-Tang <davidm@hpl.hp.com>
4 *
5 * Kernel unwind support.
6 */
7
8#define UNW_VER(x) ((x) >> 48)
9#define UNW_FLAG_MASK 0x0000ffff00000000
10#define UNW_FLAG_OSMASK 0x0000f00000000000
11#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L)
12#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L)
13#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL)
14
15enum unw_register_index {
16 /* primary unat: */
17 UNW_REG_PRI_UNAT_GR,
18 UNW_REG_PRI_UNAT_MEM,
19
20 /* register stack */
21 UNW_REG_BSP, /* register stack pointer */
22 UNW_REG_BSPSTORE,
23 UNW_REG_PFS, /* previous function state */
24 UNW_REG_RNAT,
25 /* memory stack */
26 UNW_REG_PSP, /* previous memory stack pointer */
27 /* return pointer: */
28 UNW_REG_RP,
29
30 /* preserved registers: */
31 UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7,
32 UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR,
33 UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5,
34 UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5,
35 UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19,
36 UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23,
37 UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27,
38 UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31,
39 UNW_NUM_REGS
40};
41
42struct unw_info_block {
43 u64 header;
44 u64 desc[0]; /* unwind descriptors */
45 /* personality routine and language-specific data follow behind descriptors */
46};
47
48struct unw_table {
49 struct unw_table *next; /* must be first member! */
50 const char *name;
51 unsigned long gp; /* global pointer for this load-module */
52 unsigned long segment_base; /* base for offsets in the unwind table entries */
53 unsigned long start;
54 unsigned long end;
55 const struct unw_table_entry *array;
56 unsigned long length;
57};
58
59enum unw_where {
60 UNW_WHERE_NONE, /* register isn't saved at all */
61 UNW_WHERE_GR, /* register is saved in a general register */
62 UNW_WHERE_FR, /* register is saved in a floating-point register */
63 UNW_WHERE_BR, /* register is saved in a branch register */
64 UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */
65 UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */
66 /*
67 * At the end of each prologue these locations get resolved to
68 * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively:
69 */
70 UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */
71 UNW_WHERE_GR_SAVE /* register is saved in next general register */
72};
73
74#define UNW_WHEN_NEVER 0x7fffffff
75
76struct unw_reg_info {
77 unsigned long val; /* save location: register number or offset */
78 enum unw_where where; /* where the register gets saved */
79 int when; /* when the register gets saved */
80};
81
82struct unw_reg_state {
83 struct unw_reg_state *next; /* next (outer) element on state stack */
84 struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */
85};
86
87struct unw_labeled_state {
88 struct unw_labeled_state *next; /* next labeled state (or NULL) */
89 unsigned long label; /* label for this state */
90 struct unw_reg_state saved_state;
91};
92
93struct unw_state_record {
94 unsigned int first_region : 1; /* is this the first region? */
95 unsigned int done : 1; /* are we done scanning descriptors? */
96 unsigned int any_spills : 1; /* got any register spills? */
97 unsigned int in_body : 1; /* are we inside a body (as opposed to a prologue)? */
98 unsigned long flags; /* see UNW_FLAG_* in unwind.h */
99
100 u8 *imask; /* imask of spill_mask record or NULL */
101 unsigned long pr_val; /* predicate values */
102 unsigned long pr_mask; /* predicate mask */
103 long spill_offset; /* psp-relative offset for spill base */
104 int region_start;
105 int region_len;
106 int epilogue_start;
107 int epilogue_count;
108 int when_target;
109
110 u8 gr_save_loc; /* next general register to use for saving a register */
111 u8 return_link_reg; /* branch register in which the return link is passed */
112
113 struct unw_labeled_state *labeled_states; /* list of all labeled states */
114 struct unw_reg_state curr; /* current state */
115};
116
117enum unw_nat_type {
118 UNW_NAT_NONE, /* NaT not represented */
119 UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */
120 UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */
121 UNW_NAT_REGSTK /* NaT is in rnat */
122};
123
124enum unw_insn_opcode {
125 UNW_INSN_ADD, /* s[dst] += val */
126 UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */
127 UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */
128 UNW_INSN_MOVE, /* s[dst] = s[val] */
129 UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1] */
130 UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val) */
131 UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK;
132 s[dst+1].nat.off = *s.pri_unat - s[dst] */
133 UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */
134 UNW_INSN_LOAD, /* s[dst] = *s[val] */
135 UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */
136 UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */
137};
138
139struct unw_insn {
140 unsigned int opc : 4;
141 unsigned int dst : 9;
142 signed int val : 19;
143};
144
145/*
146 * Preserved general static registers (r4-r7) give rise to two script
147 * instructions; everything else yields at most one instruction; at
148 * the end of the script, the psp gets popped, accounting for one more
149 * instruction.
150 */
151#define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5)
152
153struct unw_script {
154 unsigned long ip; /* ip this script is for */
155 unsigned long pr_mask; /* mask of predicates script depends on */
156 unsigned long pr_val; /* predicate values this script is for */
157 rwlock_t lock;
158 unsigned int flags; /* see UNW_FLAG_* in unwind.h */
159 unsigned short lru_chain; /* used for least-recently-used chain */
160 unsigned short coll_chain; /* used for hash collisions */
161 unsigned short hint; /* hint for next script to try (or -1) */
162 unsigned short count; /* number of instructions in script */
163 struct unw_insn insn[UNW_MAX_SCRIPT_LEN];
164};
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
new file mode 100644
index 000000000000..b9f0db4c1b04
--- /dev/null
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -0,0 +1,251 @@
1#include <linux/config.h>
2
3#include <asm/cache.h>
4#include <asm/ptrace.h>
5#include <asm/system.h>
6#include <asm/pgtable.h>
7
8#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE)
9#include <asm-generic/vmlinux.lds.h>
10
11OUTPUT_FORMAT("elf64-ia64-little")
12OUTPUT_ARCH(ia64)
13ENTRY(phys_start)
14jiffies = jiffies_64;
15PHDRS {
16 code PT_LOAD;
17 percpu PT_LOAD;
18 data PT_LOAD;
19}
20SECTIONS
21{
22 /* Sections to be discarded */
23 /DISCARD/ : {
24 *(.exit.text)
25 *(.exit.data)
26 *(.exitcall.exit)
27 *(.IA_64.unwind.exit.text)
28 *(.IA_64.unwind_info.exit.text)
29 }
30
31 v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */
32 phys_start = _start - LOAD_OFFSET;
33
34 code : { } :code
35 . = KERNEL_START;
36
37 _text = .;
38 _stext = .;
39
40 .text : AT(ADDR(.text) - LOAD_OFFSET)
41 {
42 *(.text.ivt)
43 *(.text)
44 SCHED_TEXT
45 LOCK_TEXT
46 *(.gnu.linkonce.t*)
47 }
48 .text2 : AT(ADDR(.text2) - LOAD_OFFSET)
49 { *(.text2) }
50#ifdef CONFIG_SMP
51 .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET)
52 { *(.text.lock) }
53#endif
54 _etext = .;
55
56 /* Read-only data */
57
58 /* Exception table */
59 . = ALIGN(16);
60 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET)
61 {
62 __start___ex_table = .;
63 *(__ex_table)
64 __stop___ex_table = .;
65 }
66
67 .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET)
68 {
69 __start___vtop_patchlist = .;
70 *(.data.patch.vtop)
71 __end___vtop_patchlist = .;
72 }
73
74 .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET)
75 {
76 __start___mckinley_e9_bundles = .;
77 *(.data.patch.mckinley_e9)
78 __end___mckinley_e9_bundles = .;
79 }
80
81 /* Global data */
82 _data = .;
83
84#if defined(CONFIG_IA64_GENERIC)
85 /* Machine Vector */
86 . = ALIGN(16);
87 .machvec : AT(ADDR(.machvec) - LOAD_OFFSET)
88 {
89 machvec_start = .;
90 *(.machvec)
91 machvec_end = .;
92 }
93#endif
94
95 /* Unwind info & table: */
96 . = ALIGN(8);
97 .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET)
98 { *(.IA_64.unwind_info*) }
99 .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET)
100 {
101 __start_unwind = .;
102 *(.IA_64.unwind*)
103 __end_unwind = .;
104 }
105
106 RODATA
107
108 .opd : AT(ADDR(.opd) - LOAD_OFFSET)
109 { *(.opd) }
110
111 /* Initialization code and data: */
112
113 . = ALIGN(PAGE_SIZE);
114 __init_begin = .;
115 .init.text : AT(ADDR(.init.text) - LOAD_OFFSET)
116 {
117 _sinittext = .;
118 *(.init.text)
119 _einittext = .;
120 }
121
122 .init.data : AT(ADDR(.init.data) - LOAD_OFFSET)
123 { *(.init.data) }
124
125 .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET)
126 {
127 __initramfs_start = .;
128 *(.init.ramfs)
129 __initramfs_end = .;
130 }
131
132 . = ALIGN(16);
133 .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET)
134 {
135 __setup_start = .;
136 *(.init.setup)
137 __setup_end = .;
138 }
139 .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
140 {
141 __initcall_start = .;
142 *(.initcall1.init)
143 *(.initcall2.init)
144 *(.initcall3.init)
145 *(.initcall4.init)
146 *(.initcall5.init)
147 *(.initcall6.init)
148 *(.initcall7.init)
149 __initcall_end = .;
150 }
151 __con_initcall_start = .;
152 .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET)
153 { *(.con_initcall.init) }
154 __con_initcall_end = .;
155 __security_initcall_start = .;
156 .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET)
157 { *(.security_initcall.init) }
158 __security_initcall_end = .;
159 . = ALIGN(PAGE_SIZE);
160 __init_end = .;
161
162 /* The initial task and kernel stack */
163 .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET)
164 { *(.data.init_task) }
165
166 .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET)
167 { *(__special_page_section)
168 __start_gate_section = .;
169 *(.data.gate)
170 __stop_gate_section = .;
171 }
172 . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */
173
174 .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
175 { *(.data.cacheline_aligned) }
176
177 /* Per-cpu data: */
178 percpu : { } :percpu
179 . = ALIGN(PERCPU_PAGE_SIZE);
180 __phys_per_cpu_start = .;
181 .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
182 {
183 __per_cpu_start = .;
184 *(.data.percpu)
185 __per_cpu_end = .;
186 }
187 . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */
188
189 data : { } :data
190 .data : AT(ADDR(.data) - LOAD_OFFSET)
191 { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS }
192
193 . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */
194 .got : AT(ADDR(.got) - LOAD_OFFSET)
195 { *(.got.plt) *(.got) }
196 __gp = ADDR(.got) + 0x200000;
197 /* We want the small data sections together, so single-instruction offsets
198 can access them all, and initialized data all before uninitialized, so
199 we can shorten the on-disk segment size. */
200 .sdata : AT(ADDR(.sdata) - LOAD_OFFSET)
201 { *(.sdata) *(.sdata1) *(.srdata) }
202 _edata = .;
203 _bss = .;
204 .sbss : AT(ADDR(.sbss) - LOAD_OFFSET)
205 { *(.sbss) *(.scommon) }
206 .bss : AT(ADDR(.bss) - LOAD_OFFSET)
207 { *(.bss) *(COMMON) }
208
209 _end = .;
210
211 code : { } :code
212 /* Stabs debugging sections. */
213 .stab 0 : { *(.stab) }
214 .stabstr 0 : { *(.stabstr) }
215 .stab.excl 0 : { *(.stab.excl) }
216 .stab.exclstr 0 : { *(.stab.exclstr) }
217 .stab.index 0 : { *(.stab.index) }
218 .stab.indexstr 0 : { *(.stab.indexstr) }
219 /* DWARF debug sections.
220 Symbols in the DWARF debugging sections are relative to the beginning
221 of the section so we begin them at 0. */
222 /* DWARF 1 */
223 .debug 0 : { *(.debug) }
224 .line 0 : { *(.line) }
225 /* GNU DWARF 1 extensions */
226 .debug_srcinfo 0 : { *(.debug_srcinfo) }
227 .debug_sfnames 0 : { *(.debug_sfnames) }
228 /* DWARF 1.1 and DWARF 2 */
229 .debug_aranges 0 : { *(.debug_aranges) }
230 .debug_pubnames 0 : { *(.debug_pubnames) }
231 /* DWARF 2 */
232 .debug_info 0 : { *(.debug_info) }
233 .debug_abbrev 0 : { *(.debug_abbrev) }
234 .debug_line 0 : { *(.debug_line) }
235 .debug_frame 0 : { *(.debug_frame) }
236 .debug_str 0 : { *(.debug_str) }
237 .debug_loc 0 : { *(.debug_loc) }
238 .debug_macinfo 0 : { *(.debug_macinfo) }
239 /* SGI/MIPS DWARF 2 extensions */
240 .debug_weaknames 0 : { *(.debug_weaknames) }
241 .debug_funcnames 0 : { *(.debug_funcnames) }
242 .debug_typenames 0 : { *(.debug_typenames) }
243 .debug_varnames 0 : { *(.debug_varnames) }
244 /* These must appear regardless of . */
245 /* Discard them for now since Intel SoftSDV cannot handle them.
246 .comment 0 : { *(.comment) }
247 .note 0 : { *(.note) }
248 */
249 /DISCARD/ : { *(.comment) }
250 /DISCARD/ : { *(.note) }
251}
diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile
new file mode 100644
index 000000000000..1902c3c2ef92
--- /dev/null
+++ b/arch/ia64/lib/Makefile
@@ -0,0 +1,52 @@
1#
2# Makefile for ia64-specific library routines..
3#
4
5obj-y := io.o
6
7lib-y := __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \
8 __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \
9 bitop.o checksum.o clear_page.o csum_partial_copy.o copy_page.o \
10 clear_user.o strncpy_from_user.o strlen_user.o strnlen_user.o \
11 flush.o ip_fast_csum.o do_csum.o \
12 memset.o strlen.o swiotlb.o
13
14lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o
15lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o
16lib-$(CONFIG_PERFMON) += carta_random.o
17lib-$(CONFIG_MD_RAID5) += xor.o
18lib-$(CONFIG_HAVE_DEC_LOCK) += dec_and_lock.o
19
20AFLAGS___divdi3.o =
21AFLAGS___udivdi3.o = -DUNSIGNED
22AFLAGS___moddi3.o = -DMODULO
23AFLAGS___umoddi3.o = -DUNSIGNED -DMODULO
24
25AFLAGS___divsi3.o =
26AFLAGS___udivsi3.o = -DUNSIGNED
27AFLAGS___modsi3.o = -DMODULO
28AFLAGS___umodsi3.o = -DUNSIGNED -DMODULO
29
30$(obj)/__divdi3.o: $(src)/idiv64.S FORCE
31 $(call if_changed_dep,as_o_S)
32
33$(obj)/__udivdi3.o: $(src)/idiv64.S FORCE
34 $(call if_changed_dep,as_o_S)
35
36$(obj)/__moddi3.o: $(src)/idiv64.S FORCE
37 $(call if_changed_dep,as_o_S)
38
39$(obj)/__umoddi3.o: $(src)/idiv64.S FORCE
40 $(call if_changed_dep,as_o_S)
41
42$(obj)/__divsi3.o: $(src)/idiv32.S FORCE
43 $(call if_changed_dep,as_o_S)
44
45$(obj)/__udivsi3.o: $(src)/idiv32.S FORCE
46 $(call if_changed_dep,as_o_S)
47
48$(obj)/__modsi3.o: $(src)/idiv32.S FORCE
49 $(call if_changed_dep,as_o_S)
50
51$(obj)/__umodsi3.o: $(src)/idiv32.S FORCE
52 $(call if_changed_dep,as_o_S)
diff --git a/arch/ia64/lib/bitop.c b/arch/ia64/lib/bitop.c
new file mode 100644
index 000000000000..82e299c8464e
--- /dev/null
+++ b/arch/ia64/lib/bitop.c
@@ -0,0 +1,88 @@
1#include <linux/compiler.h>
2#include <linux/types.h>
3#include <asm/intrinsics.h>
4#include <linux/module.h>
5#include <linux/bitops.h>
6
7/*
8 * Find next zero bit in a bitmap reasonably efficiently..
9 */
10
11int __find_next_zero_bit (const void *addr, unsigned long size, unsigned long offset)
12{
13 unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
14 unsigned long result = offset & ~63UL;
15 unsigned long tmp;
16
17 if (offset >= size)
18 return size;
19 size -= result;
20 offset &= 63UL;
21 if (offset) {
22 tmp = *(p++);
23 tmp |= ~0UL >> (64-offset);
24 if (size < 64)
25 goto found_first;
26 if (~tmp)
27 goto found_middle;
28 size -= 64;
29 result += 64;
30 }
31 while (size & ~63UL) {
32 if (~(tmp = *(p++)))
33 goto found_middle;
34 result += 64;
35 size -= 64;
36 }
37 if (!size)
38 return result;
39 tmp = *p;
40found_first:
41 tmp |= ~0UL << size;
42 if (tmp == ~0UL) /* any bits zero? */
43 return result + size; /* nope */
44found_middle:
45 return result + ffz(tmp);
46}
47EXPORT_SYMBOL(__find_next_zero_bit);
48
49/*
50 * Find next bit in a bitmap reasonably efficiently..
51 */
52int __find_next_bit(const void *addr, unsigned long size, unsigned long offset)
53{
54 unsigned long *p = ((unsigned long *) addr) + (offset >> 6);
55 unsigned long result = offset & ~63UL;
56 unsigned long tmp;
57
58 if (offset >= size)
59 return size;
60 size -= result;
61 offset &= 63UL;
62 if (offset) {
63 tmp = *(p++);
64 tmp &= ~0UL << offset;
65 if (size < 64)
66 goto found_first;
67 if (tmp)
68 goto found_middle;
69 size -= 64;
70 result += 64;
71 }
72 while (size & ~63UL) {
73 if ((tmp = *(p++)))
74 goto found_middle;
75 result += 64;
76 size -= 64;
77 }
78 if (!size)
79 return result;
80 tmp = *p;
81 found_first:
82 tmp &= ~0UL >> (64-size);
83 if (tmp == 0UL) /* Are any bits set? */
84 return result + size; /* Nope. */
85 found_middle:
86 return result + __ffs(tmp);
87}
88EXPORT_SYMBOL(__find_next_bit);
diff --git a/arch/ia64/lib/carta_random.S b/arch/ia64/lib/carta_random.S
new file mode 100644
index 000000000000..d0674c360364
--- /dev/null
+++ b/arch/ia64/lib/carta_random.S
@@ -0,0 +1,54 @@
1/*
2 * Fast, simple, yet decent quality random number generator based on
3 * a paper by David G. Carta ("Two Fast Implementations of the
4 * `Minimal Standard' Random Number Generator," Communications of the
5 * ACM, January, 1990).
6 *
7 * Copyright (C) 2002 Hewlett-Packard Co
8 * David Mosberger-Tang <davidm@hpl.hp.com>
9 */
10
11#include <asm/asmmacro.h>
12
13#define a r2
14#define m r3
15#define lo r8
16#define hi r9
17#define t0 r16
18#define t1 r17
19#define seed r32
20
21GLOBAL_ENTRY(carta_random32)
22 movl a = (16807 << 16) | 16807
23 ;;
24 pmpyshr2.u t0 = a, seed, 0
25 pmpyshr2.u t1 = a, seed, 16
26 ;;
27 unpack2.l t0 = t1, t0
28 dep m = -1, r0, 0, 31
29 ;;
30 zxt4 lo = t0
31 shr.u hi = t0, 32
32 ;;
33 dep t0 = 0, hi, 15, 49 // t0 = (hi & 0x7fff)
34 ;;
35 shl t0 = t0, 16 // t0 = (hi & 0x7fff) << 16
36 shr t1 = hi, 15 // t1 = (hi >> 15)
37 ;;
38 add lo = lo, t0
39 ;;
40 cmp.gtu p6, p0 = lo, m
41 ;;
42(p6) and lo = lo, m
43 ;;
44(p6) add lo = 1, lo
45 ;;
46 add lo = lo, t1
47 ;;
48 cmp.gtu p6, p0 = lo, m
49 ;;
50(p6) and lo = lo, m
51 ;;
52(p6) add lo = 1, lo
53 br.ret.sptk.many rp
54END(carta_random32)
diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c
new file mode 100644
index 000000000000..beb11721d9f5
--- /dev/null
+++ b/arch/ia64/lib/checksum.c
@@ -0,0 +1,102 @@
1/*
2 * Network checksum routines
3 *
4 * Copyright (C) 1999, 2003 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 *
7 * Most of the code coming from arch/alpha/lib/checksum.c
8 *
9 * This file contains network checksum routines that are better done
10 * in an architecture-specific manner due to speed..
11 */
12
13#include <linux/module.h>
14#include <linux/string.h>
15
16#include <asm/byteorder.h>
17
18static inline unsigned short
19from64to16 (unsigned long x)
20{
21 /* add up 32-bit words for 33 bits */
22 x = (x & 0xffffffff) + (x >> 32);
23 /* add up 16-bit and 17-bit words for 17+c bits */
24 x = (x & 0xffff) + (x >> 16);
25 /* add up 16-bit and 2-bit for 16+c bit */
26 x = (x & 0xffff) + (x >> 16);
27 /* add up carry.. */
28 x = (x & 0xffff) + (x >> 16);
29 return x;
30}
31
32/*
33 * computes the checksum of the TCP/UDP pseudo-header
34 * returns a 16-bit checksum, already complemented.
35 */
36unsigned short int
37csum_tcpudp_magic (unsigned long saddr, unsigned long daddr, unsigned short len,
38 unsigned short proto, unsigned int sum)
39{
40 return ~from64to16(saddr + daddr + sum + ((unsigned long) ntohs(len) << 16) +
41 ((unsigned long) proto << 8));
42}
43
44EXPORT_SYMBOL(csum_tcpudp_magic);
45
46unsigned int
47csum_tcpudp_nofold (unsigned long saddr, unsigned long daddr, unsigned short len,
48 unsigned short proto, unsigned int sum)
49{
50 unsigned long result;
51
52 result = (saddr + daddr + sum +
53 ((unsigned long) ntohs(len) << 16) +
54 ((unsigned long) proto << 8));
55
56 /* Fold down to 32-bits so we don't lose in the typedef-less network stack. */
57 /* 64 to 33 */
58 result = (result & 0xffffffff) + (result >> 32);
59 /* 33 to 32 */
60 result = (result & 0xffffffff) + (result >> 32);
61 return result;
62}
63
64extern unsigned long do_csum (const unsigned char *, long);
65
66/*
67 * computes the checksum of a memory block at buff, length len,
68 * and adds in "sum" (32-bit)
69 *
70 * returns a 32-bit number suitable for feeding into itself
71 * or csum_tcpudp_magic
72 *
73 * this function must be called with even lengths, except
74 * for the last fragment, which may be odd
75 *
76 * it's best to have buff aligned on a 32-bit boundary
77 */
78unsigned int
79csum_partial (const unsigned char * buff, int len, unsigned int sum)
80{
81 unsigned long result = do_csum(buff, len);
82
83 /* add in old sum, and carry.. */
84 result += sum;
85 /* 32+c bits -> 32 bits */
86 result = (result & 0xffffffff) + (result >> 32);
87 return result;
88}
89
90EXPORT_SYMBOL(csum_partial);
91
92/*
93 * this routine is used for miscellaneous IP-like checksums, mainly
94 * in icmp.c
95 */
96unsigned short
97ip_compute_csum (unsigned char * buff, int len)
98{
99 return ~do_csum(buff,len);
100}
101
102EXPORT_SYMBOL(ip_compute_csum);
diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S
new file mode 100644
index 000000000000..d4987061dda7
--- /dev/null
+++ b/arch/ia64/lib/clear_page.S
@@ -0,0 +1,77 @@
1/*
2 * Copyright (C) 1999-2002 Hewlett-Packard Co
3 * Stephane Eranian <eranian@hpl.hp.com>
4 * David Mosberger-Tang <davidm@hpl.hp.com>
5 * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
6 *
7 * 1/06/01 davidm Tuned for Itanium.
8 * 2/12/02 kchen Tuned for both Itanium and McKinley
9 * 3/08/02 davidm Some more tweaking
10 */
11#include <linux/config.h>
12
13#include <asm/asmmacro.h>
14#include <asm/page.h>
15
16#ifdef CONFIG_ITANIUM
17# define L3_LINE_SIZE 64 // Itanium L3 line size
18# define PREFETCH_LINES 9 // magic number
19#else
20# define L3_LINE_SIZE 128 // McKinley L3 line size
21# define PREFETCH_LINES 12 // magic number
22#endif
23
24#define saved_lc r2
25#define dst_fetch r3
26#define dst1 r8
27#define dst2 r9
28#define dst3 r10
29#define dst4 r11
30
31#define dst_last r31
32
33GLOBAL_ENTRY(clear_page)
34 .prologue
35 .regstk 1,0,0,0
36 mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until
37 .save ar.lc, saved_lc
38 mov saved_lc = ar.lc
39
40 .body
41 mov ar.lc = (PREFETCH_LINES - 1)
42 mov dst_fetch = in0
43 adds dst1 = 16, in0
44 adds dst2 = 32, in0
45 ;;
46.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
47 adds dst3 = 48, in0 // executing this multiple times is harmless
48 br.cloop.sptk.few .fetch
49 ;;
50 addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch
51 mov ar.lc = r16 // one L3 line per iteration
52 adds dst4 = 64, in0
53 ;;
54#ifdef CONFIG_ITANIUM
55 // Optimized for Itanium
561: stf.spill.nta [dst1] = f0, 64
57 stf.spill.nta [dst2] = f0, 64
58 cmp.lt p8,p0=dst_fetch, dst_last
59 ;;
60#else
61 // Optimized for McKinley
621: stf.spill.nta [dst1] = f0, 64
63 stf.spill.nta [dst2] = f0, 64
64 stf.spill.nta [dst3] = f0, 64
65 stf.spill.nta [dst4] = f0, 128
66 cmp.lt p8,p0=dst_fetch, dst_last
67 ;;
68 stf.spill.nta [dst1] = f0, 64
69 stf.spill.nta [dst2] = f0, 64
70#endif
71 stf.spill.nta [dst3] = f0, 64
72(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE
73 br.cloop.sptk.few 1b
74 ;;
75 mov ar.lc = saved_lc // restore lc
76 br.ret.sptk.many rp
77END(clear_page)
diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S
new file mode 100644
index 000000000000..eecd8577b209
--- /dev/null
+++ b/arch/ia64/lib/clear_user.S
@@ -0,0 +1,209 @@
1/*
2 * This routine clears to zero a linear memory buffer in user space.
3 *
4 * Inputs:
5 * in0: address of buffer
6 * in1: length of buffer in bytes
7 * Outputs:
8 * r8: number of bytes that didn't get cleared due to a fault
9 *
10 * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
11 * Stephane Eranian <eranian@hpl.hp.com>
12 */
13
14#include <asm/asmmacro.h>
15
16//
17// arguments
18//
19#define buf r32
20#define len r33
21
22//
23// local registers
24//
25#define cnt r16
26#define buf2 r17
27#define saved_lc r18
28#define saved_pfs r19
29#define tmp r20
30#define len2 r21
31#define len3 r22
32
33//
34// Theory of operations:
35// - we check whether or not the buffer is small, i.e., less than 17
36// in which case we do the byte by byte loop.
37//
38// - Otherwise we go progressively from 1 byte store to 8byte store in
39// the head part, the body is a 16byte store loop and we finish we the
40// tail for the last 15 bytes.
41// The good point about this breakdown is that the long buffer handling
42// contains only 2 branches.
43//
44// The reason for not using shifting & masking for both the head and the
45// tail is to stay semantically correct. This routine is not supposed
46// to write bytes outside of the buffer. While most of the time this would
47// be ok, we can't tolerate a mistake. A classical example is the case
48// of multithreaded code were to the extra bytes touched is actually owned
49// by another thread which runs concurrently to ours. Another, less likely,
50// example is with device drivers where reading an I/O mapped location may
51// have side effects (same thing for writing).
52//
53
54GLOBAL_ENTRY(__do_clear_user)
55 .prologue
56 .save ar.pfs, saved_pfs
57 alloc saved_pfs=ar.pfs,2,0,0,0
58 cmp.eq p6,p0=r0,len // check for zero length
59 .save ar.lc, saved_lc
60 mov saved_lc=ar.lc // preserve ar.lc (slow)
61 .body
62 ;; // avoid WAW on CFM
63 adds tmp=-1,len // br.ctop is repeat/until
64 mov ret0=len // return value is length at this point
65(p6) br.ret.spnt.many rp
66 ;;
67 cmp.lt p6,p0=16,len // if len > 16 then long memset
68 mov ar.lc=tmp // initialize lc for small count
69(p6) br.cond.dptk .long_do_clear
70 ;; // WAR on ar.lc
71 //
72 // worst case 16 iterations, avg 8 iterations
73 //
74 // We could have played with the predicates to use the extra
75 // M slot for 2 stores/iteration but the cost the initialization
76 // the various counters compared to how long the loop is supposed
77 // to last on average does not make this solution viable.
78 //
791:
80 EX( .Lexit1, st1 [buf]=r0,1 )
81 adds len=-1,len // countdown length using len
82 br.cloop.dptk 1b
83 ;; // avoid RAW on ar.lc
84 //
85 // .Lexit4: comes from byte by byte loop
86 // len contains bytes left
87.Lexit1:
88 mov ret0=len // faster than using ar.lc
89 mov ar.lc=saved_lc
90 br.ret.sptk.many rp // end of short clear_user
91
92
93 //
94 // At this point we know we have more than 16 bytes to copy
95 // so we focus on alignment (no branches required)
96 //
97 // The use of len/len2 for countdown of the number of bytes left
98 // instead of ret0 is due to the fact that the exception code
99 // changes the values of r8.
100 //
101.long_do_clear:
102 tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear)
103 ;;
104 EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned
105(p6) adds len=-1,len;; // sync because buf is modified
106 tbit.nz p6,p0=buf,1
107 ;;
108 EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned
109(p6) adds len=-2,len;;
110 tbit.nz p6,p0=buf,2
111 ;;
112 EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned
113(p6) adds len=-4,len;;
114 tbit.nz p6,p0=buf,3
115 ;;
116 EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned
117(p6) adds len=-8,len;;
118 shr.u cnt=len,4 // number of 128-bit (2x64bit) words
119 ;;
120 cmp.eq p6,p0=r0,cnt
121 adds tmp=-1,cnt
122(p6) br.cond.dpnt .dotail // we have less than 16 bytes left
123 ;;
124 adds buf2=8,buf // setup second base pointer
125 mov ar.lc=tmp
126 ;;
127
128 //
129 // 16bytes/iteration core loop
130 //
131 // The second store can never generate a fault because
132 // we come into the loop only when we are 16-byte aligned.
133 // This means that if we cross a page then it will always be
134 // in the first store and never in the second.
135 //
136 //
137 // We need to keep track of the remaining length. A possible (optimistic)
138 // way would be to use ar.lc and derive how many byte were left by
139 // doing : left= 16*ar.lc + 16. this would avoid the addition at
140 // every iteration.
141 // However we need to keep the synchronization point. A template
142 // M;;MB does not exist and thus we can keep the addition at no
143 // extra cycle cost (use a nop slot anyway). It also simplifies the
144 // (unlikely) error recovery code
145 //
146
1472: EX(.Lexit3, st8 [buf]=r0,16 )
148 ;; // needed to get len correct when error
149 st8 [buf2]=r0,16
150 adds len=-16,len
151 br.cloop.dptk 2b
152 ;;
153 mov ar.lc=saved_lc
154 //
155 // tail correction based on len only
156 //
157 // We alternate the use of len3,len2 to allow parallelism and correct
158 // error handling. We also reuse p6/p7 to return correct value.
159 // The addition of len2/len3 does not cost anything more compared to
160 // the regular memset as we had empty slots.
161 //
162.dotail:
163 mov len2=len // for parallelization of error handling
164 mov len3=len
165 tbit.nz p6,p0=len,3
166 ;;
167 EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes
168(p6) adds len3=-8,len2
169 tbit.nz p7,p6=len,2
170 ;;
171 EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes
172(p7) adds len2=-4,len3
173 tbit.nz p6,p7=len,1
174 ;;
175 EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes
176(p6) adds len3=-2,len2
177 tbit.nz p7,p6=len,0
178 ;;
179 EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left
180 mov ret0=r0 // success
181 br.ret.sptk.many rp // end of most likely path
182
183 //
184 // Outlined error handling code
185 //
186
187 //
188 // .Lexit3: comes from core loop, need restore pr/lc
189 // len contains bytes left
190 //
191 //
192 // .Lexit2:
193 // if p6 -> coming from st8 or st2 : len2 contains what's left
194 // if p7 -> coming from st4 or st1 : len3 contains what's left
195 // We must restore lc/pr even though might not have been used.
196.Lexit2:
197 .pred.rel "mutex", p6, p7
198(p6) mov len=len2
199(p7) mov len=len3
200 ;;
201 //
202 // .Lexit4: comes from head, need not restore pr/lc
203 // len contains bytes left
204 //
205.Lexit3:
206 mov ret0=len
207 mov ar.lc=saved_lc
208 br.ret.sptk.many rp
209END(__do_clear_user)
diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S
new file mode 100644
index 000000000000..127d1d050d78
--- /dev/null
+++ b/arch/ia64/lib/copy_page.S
@@ -0,0 +1,98 @@
1/*
2 *
3 * Optimized version of the standard copy_page() function
4 *
5 * Inputs:
6 * in0: address of target page
7 * in1: address of source page
8 * Output:
9 * no return value
10 *
11 * Copyright (C) 1999, 2001 Hewlett-Packard Co
12 * Stephane Eranian <eranian@hpl.hp.com>
13 * David Mosberger <davidm@hpl.hp.com>
14 *
15 * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies.
16 */
17#include <asm/asmmacro.h>
18#include <asm/page.h>
19
20#define PIPE_DEPTH 3
21#define EPI p[PIPE_DEPTH-1]
22
23#define lcount r16
24#define saved_pr r17
25#define saved_lc r18
26#define saved_pfs r19
27#define src1 r20
28#define src2 r21
29#define tgt1 r22
30#define tgt2 r23
31#define srcf r24
32#define tgtf r25
33#define tgt_last r26
34
35#define Nrot ((8*PIPE_DEPTH+7)&~7)
36
37GLOBAL_ENTRY(copy_page)
38 .prologue
39 .save ar.pfs, saved_pfs
40 alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
41
42 .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \
43 t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH]
44 .rotp p[PIPE_DEPTH]
45
46 .save ar.lc, saved_lc
47 mov saved_lc=ar.lc
48 mov ar.ec=PIPE_DEPTH
49
50 mov lcount=PAGE_SIZE/64-1
51 .save pr, saved_pr
52 mov saved_pr=pr
53 mov pr.rot=1<<16
54
55 .body
56
57 mov src1=in1
58 adds src2=8,in1
59 mov tgt_last = PAGE_SIZE
60 ;;
61 adds tgt2=8,in0
62 add srcf=512,in1
63 mov ar.lc=lcount
64 mov tgt1=in0
65 add tgtf=512,in0
66 add tgt_last = tgt_last, in0
67 ;;
681:
69(p[0]) ld8 t1[0]=[src1],16
70(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16
71(p[0]) ld8 t2[0]=[src2],16
72(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16
73 cmp.ltu p6,p0 = tgtf, tgt_last
74 ;;
75(p[0]) ld8 t3[0]=[src1],16
76(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16
77(p[0]) ld8 t4[0]=[src2],16
78(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16
79 ;;
80(p[0]) ld8 t5[0]=[src1],16
81(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16
82(p[0]) ld8 t6[0]=[src2],16
83(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16
84 ;;
85(p[0]) ld8 t7[0]=[src1],16
86(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16
87(p[0]) ld8 t8[0]=[src2],16
88(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16
89
90(p6) lfetch [srcf], 64
91(p6) lfetch [tgtf], 64
92 br.ctop.sptk.few 1b
93 ;;
94 mov pr=saved_pr,0xffffffffffff0000 // restore predicates
95 mov ar.pfs=saved_pfs
96 mov ar.lc=saved_lc
97 br.ret.sptk.many rp
98END(copy_page)
diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S
new file mode 100644
index 000000000000..3c45d60a81b4
--- /dev/null
+++ b/arch/ia64/lib/copy_page_mck.S
@@ -0,0 +1,185 @@
1/*
2 * McKinley-optimized version of copy_page().
3 *
4 * Copyright (C) 2002 Hewlett-Packard Co
5 * David Mosberger <davidm@hpl.hp.com>
6 *
7 * Inputs:
8 * in0: address of target page
9 * in1: address of source page
10 * Output:
11 * no return value
12 *
13 * General idea:
14 * - use regular loads and stores to prefetch data to avoid consuming M-slot just for
15 * lfetches => good for in-cache performance
16 * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single
17 * cycle
18 *
19 * Principle of operation:
20 * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes.
21 * To avoid secondary misses in L2, we prefetch both source and destination with a line-size
22 * of 128 bytes. When both of these lines are in the L2 and the first half of the
23 * source line is in L1, we start copying the remaining words. The second half of the
24 * source line is prefetched in an earlier iteration, so that by the time we start
25 * accessing it, it's also present in the L1.
26 *
27 * We use a software-pipelined loop to control the overall operation. The pipeline
28 * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching
29 * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination
30 * cache-lines, the last K stages are used to copy the cache-line words not copied by
31 * the prefetches. The four relevant points in the pipelined are called A, B, C, D:
32 * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line
33 * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought
34 * into L1D and p[D] is TRUE if a cacheline needs to be copied.
35 *
36 * This all sounds very complicated, but thanks to the modulo-scheduled loop support,
37 * the resulting code is very regular and quite easy to follow (once you get the idea).
38 *
39 * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented
40 * as the separate .prefetch_loop. Logically, this loop performs exactly like the
41 * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed,
42 * so that each loop iteration is faster (again, good for cached case).
43 *
44 * When reading the code, it helps to keep the following picture in mind:
45 *
46 * word 0 word 1
47 * +------+------+---
48 * | v[x] | t1 | ^
49 * | t2 | t3 | |
50 * | t4 | t5 | |
51 * | t6 | t7 | | 128 bytes
52 * | n[y] | t9 | | (L2 cache line)
53 * | t10 | t11 | |
54 * | t12 | t13 | |
55 * | t14 | t15 | v
56 * +------+------+---
57 *
58 * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C]
59 * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in
60 * an order that avoids bank conflicts.
61 */
62#include <asm/asmmacro.h>
63#include <asm/page.h>
64
65#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st)
66
67#define src0 r2
68#define src1 r3
69#define dst0 r9
70#define dst1 r10
71#define src_pre_mem r11
72#define dst_pre_mem r14
73#define src_pre_l2 r15
74#define dst_pre_l2 r16
75#define t1 r17
76#define t2 r18
77#define t3 r19
78#define t4 r20
79#define t5 t1 // alias!
80#define t6 t2 // alias!
81#define t7 t3 // alias!
82#define t9 t5 // alias!
83#define t10 t4 // alias!
84#define t11 t7 // alias!
85#define t12 t6 // alias!
86#define t14 t10 // alias!
87#define t13 r21
88#define t15 r22
89
90#define saved_lc r23
91#define saved_pr r24
92
93#define A 0
94#define B (PREFETCH_DIST)
95#define C (B + PREFETCH_DIST)
96#define D (C + 3)
97#define N (D + 1)
98#define Nrot ((N + 7) & ~7)
99
100GLOBAL_ENTRY(copy_page)
101 .prologue
102 alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot
103
104 .rotr v[2*PREFETCH_DIST], n[D-C+1]
105 .rotp p[N]
106
107 .save ar.lc, saved_lc
108 mov saved_lc = ar.lc
109 .save pr, saved_pr
110 mov saved_pr = pr
111 .body
112
113 mov src_pre_mem = in1
114 mov pr.rot = 0x10000
115 mov ar.ec = 1 // special unrolled loop
116
117 mov dst_pre_mem = in0
118 mov ar.lc = 2*PREFETCH_DIST - 1
119
120 add src_pre_l2 = 8*8, in1
121 add dst_pre_l2 = 8*8, in0
122 add src0 = 8, in1 // first t1 src
123 add src1 = 3*8, in1 // first t3 src
124 add dst0 = 8, in0 // first t1 dst
125 add dst1 = 3*8, in0 // first t3 dst
126 mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1
127 nop.m 0
128 nop.i 0
129 ;;
130 // same as .line_copy loop, but with all predicated-off instructions removed:
131.prefetch_loop:
132(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0
133(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2
134 br.ctop.sptk .prefetch_loop
135 ;;
136 cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero)
137 mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits!
138 mov ar.ec = N // # of stages in pipeline
139 ;;
140.line_copy:
141(p[D]) ld8 t2 = [src0], 3*8 // M0
142(p[D]) ld8 t4 = [src1], 3*8 // M1
143(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory
144(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2
145 ;;
146(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory
147(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2
148(p[D]) st8 [dst0] = t1, 8 // M2
149(p[D]) st8 [dst1] = t3, 8 // M3
150 ;;
151(p[D]) ld8 t5 = [src0], 8
152(p[D]) ld8 t7 = [src1], 3*8
153(p[D]) st8 [dst0] = t2, 3*8
154(p[D]) st8 [dst1] = t4, 3*8
155 ;;
156(p[D]) ld8 t6 = [src0], 3*8
157(p[D]) ld8 t10 = [src1], 8
158(p[D]) st8 [dst0] = t5, 8
159(p[D]) st8 [dst1] = t7, 3*8
160 ;;
161(p[D]) ld8 t9 = [src0], 3*8
162(p[D]) ld8 t11 = [src1], 3*8
163(p[D]) st8 [dst0] = t6, 3*8
164(p[D]) st8 [dst1] = t10, 8
165 ;;
166(p[D]) ld8 t12 = [src0], 8
167(p[D]) ld8 t14 = [src1], 8
168(p[D]) st8 [dst0] = t9, 3*8
169(p[D]) st8 [dst1] = t11, 3*8
170 ;;
171(p[D]) ld8 t13 = [src0], 4*8
172(p[D]) ld8 t15 = [src1], 4*8
173(p[D]) st8 [dst0] = t12, 8
174(p[D]) st8 [dst1] = t14, 8
175 ;;
176(p[D-1])ld8 t1 = [src0], 8
177(p[D-1])ld8 t3 = [src1], 8
178(p[D]) st8 [dst0] = t13, 4*8
179(p[D]) st8 [dst1] = t15, 4*8
180 br.ctop.sptk .line_copy
181 ;;
182 mov ar.lc = saved_lc
183 mov pr = saved_pr, -1
184 br.ret.sptk.many rp
185END(copy_page)
diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S
new file mode 100644
index 000000000000..c952bdc6a093
--- /dev/null
+++ b/arch/ia64/lib/copy_user.S
@@ -0,0 +1,610 @@
1/*
2 *
3 * Optimized version of the copy_user() routine.
4 * It is used to copy date across the kernel/user boundary.
5 *
6 * The source and destination are always on opposite side of
7 * the boundary. When reading from user space we must catch
8 * faults on loads. When writing to user space we must catch
9 * errors on stores. Note that because of the nature of the copy
10 * we don't need to worry about overlapping regions.
11 *
12 *
13 * Inputs:
14 * in0 address of source buffer
15 * in1 address of destination buffer
16 * in2 number of bytes to copy
17 *
18 * Outputs:
19 * ret0 0 in case of success. The number of bytes NOT copied in
20 * case of error.
21 *
22 * Copyright (C) 2000-2001 Hewlett-Packard Co
23 * Stephane Eranian <eranian@hpl.hp.com>
24 *
25 * Fixme:
26 * - handle the case where we have more than 16 bytes and the alignment
27 * are different.
28 * - more benchmarking
29 * - fix extraneous stop bit introduced by the EX() macro.
30 */
31
32#include <asm/asmmacro.h>
33
34//
35// Tuneable parameters
36//
37#define COPY_BREAK 16 // we do byte copy below (must be >=16)
38#define PIPE_DEPTH 21 // pipe depth
39
40#define EPI p[PIPE_DEPTH-1]
41
42//
43// arguments
44//
45#define dst in0
46#define src in1
47#define len in2
48
49//
50// local registers
51//
52#define t1 r2 // rshift in bytes
53#define t2 r3 // lshift in bytes
54#define rshift r14 // right shift in bits
55#define lshift r15 // left shift in bits
56#define word1 r16
57#define word2 r17
58#define cnt r18
59#define len2 r19
60#define saved_lc r20
61#define saved_pr r21
62#define tmp r22
63#define val r23
64#define src1 r24
65#define dst1 r25
66#define src2 r26
67#define dst2 r27
68#define len1 r28
69#define enddst r29
70#define endsrc r30
71#define saved_pfs r31
72
73GLOBAL_ENTRY(__copy_user)
74 .prologue
75 .save ar.pfs, saved_pfs
76 alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7)
77
78 .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH]
79 .rotp p[PIPE_DEPTH]
80
81 adds len2=-1,len // br.ctop is repeat/until
82 mov ret0=r0
83
84 ;; // RAW of cfm when len=0
85 cmp.eq p8,p0=r0,len // check for zero length
86 .save ar.lc, saved_lc
87 mov saved_lc=ar.lc // preserve ar.lc (slow)
88(p8) br.ret.spnt.many rp // empty mempcy()
89 ;;
90 add enddst=dst,len // first byte after end of source
91 add endsrc=src,len // first byte after end of destination
92 .save pr, saved_pr
93 mov saved_pr=pr // preserve predicates
94
95 .body
96
97 mov dst1=dst // copy because of rotation
98 mov ar.ec=PIPE_DEPTH
99 mov pr.rot=1<<16 // p16=true all others are false
100
101 mov src1=src // copy because of rotation
102 mov ar.lc=len2 // initialize lc for small count
103 cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy
104
105 xor tmp=src,dst // same alignment test prepare
106(p10) br.cond.dptk .long_copy_user
107 ;; // RAW pr.rot/p16 ?
108 //
109 // Now we do the byte by byte loop with software pipeline
110 //
111 // p7 is necessarily false by now
1121:
113 EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
114 EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
115 br.ctop.dptk.few 1b
116 ;;
117 mov ar.lc=saved_lc
118 mov pr=saved_pr,0xffffffffffff0000
119 mov ar.pfs=saved_pfs // restore ar.ec
120 br.ret.sptk.many rp // end of short memcpy
121
122 //
123 // Not 8-byte aligned
124 //
125.diff_align_copy_user:
126 // At this point we know we have more than 16 bytes to copy
127 // and also that src and dest do _not_ have the same alignment.
128 and src2=0x7,src1 // src offset
129 and dst2=0x7,dst1 // dst offset
130 ;;
131 // The basic idea is that we copy byte-by-byte at the head so
132 // that we can reach 8-byte alignment for both src1 and dst1.
133 // Then copy the body using software pipelined 8-byte copy,
134 // shifting the two back-to-back words right and left, then copy
135 // the tail by copying byte-by-byte.
136 //
137 // Fault handling. If the byte-by-byte at the head fails on the
138 // load, then restart and finish the pipleline by copying zeros
139 // to the dst1. Then copy zeros for the rest of dst1.
140 // If 8-byte software pipeline fails on the load, do the same as
141 // failure_in3 does. If the byte-by-byte at the tail fails, it is
142 // handled simply by failure_in_pipe1.
143 //
144 // The case p14 represents the source has more bytes in the
145 // the first word (by the shifted part), whereas the p15 needs to
146 // copy some bytes from the 2nd word of the source that has the
147 // tail of the 1st of the destination.
148 //
149
150 //
151 // Optimization. If dst1 is 8-byte aligned (quite common), we don't need
152 // to copy the head to dst1, to start 8-byte copy software pipeline.
153 // We know src1 is not 8-byte aligned in this case.
154 //
155 cmp.eq p14,p15=r0,dst2
156(p15) br.cond.spnt 1f
157 ;;
158 sub t1=8,src2
159 mov t2=src2
160 ;;
161 shl rshift=t2,3
162 sub len1=len,t1 // set len1
163 ;;
164 sub lshift=64,rshift
165 ;;
166 br.cond.spnt .word_copy_user
167 ;;
1681:
169 cmp.leu p14,p15=src2,dst2
170 sub t1=dst2,src2
171 ;;
172 .pred.rel "mutex", p14, p15
173(p14) sub word1=8,src2 // (8 - src offset)
174(p15) sub t1=r0,t1 // absolute value
175(p15) sub word1=8,dst2 // (8 - dst offset)
176 ;;
177 // For the case p14, we don't need to copy the shifted part to
178 // the 1st word of destination.
179 sub t2=8,t1
180(p14) sub word1=word1,t1
181 ;;
182 sub len1=len,word1 // resulting len
183(p15) shl rshift=t1,3 // in bits
184(p14) shl rshift=t2,3
185 ;;
186(p14) sub len1=len1,t1
187 adds cnt=-1,word1
188 ;;
189 sub lshift=64,rshift
190 mov ar.ec=PIPE_DEPTH
191 mov pr.rot=1<<16 // p16=true all others are false
192 mov ar.lc=cnt
193 ;;
1942:
195 EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1)
196 EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
197 br.ctop.dptk.few 2b
198 ;;
199 clrrrb
200 ;;
201.word_copy_user:
202 cmp.gtu p9,p0=16,len1
203(p9) br.cond.spnt 4f // if (16 > len1) skip 8-byte copy
204 ;;
205 shr.u cnt=len1,3 // number of 64-bit words
206 ;;
207 adds cnt=-1,cnt
208 ;;
209 .pred.rel "mutex", p14, p15
210(p14) sub src1=src1,t2
211(p15) sub src1=src1,t1
212 //
213 // Now both src1 and dst1 point to an 8-byte aligned address. And
214 // we have more than 8 bytes to copy.
215 //
216 mov ar.lc=cnt
217 mov ar.ec=PIPE_DEPTH
218 mov pr.rot=1<<16 // p16=true all others are false
219 ;;
2203:
221 //
222 // The pipleline consists of 3 stages:
223 // 1 (p16): Load a word from src1
224 // 2 (EPI_1): Shift right pair, saving to tmp
225 // 3 (EPI): Store tmp to dst1
226 //
227 // To make it simple, use at least 2 (p16) loops to set up val1[n]
228 // because we need 2 back-to-back val1[] to get tmp.
229 // Note that this implies EPI_2 must be p18 or greater.
230 //
231
232#define EPI_1 p[PIPE_DEPTH-2]
233#define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift
234#define CASE(pred, shift) \
235 (pred) br.cond.spnt .copy_user_bit##shift
236#define BODY(rshift) \
237.copy_user_bit##rshift: \
2381: \
239 EX(.failure_out,(EPI) st8 [dst1]=tmp,8); \
240(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \
241 EX(3f,(p16) ld8 val1[1]=[src1],8); \
242(p16) mov val1[0]=r0; \
243 br.ctop.dptk 1b; \
244 ;; \
245 br.cond.sptk.many .diff_align_do_tail; \
2462: \
247(EPI) st8 [dst1]=tmp,8; \
248(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \
2493: \
250(p16) mov val1[1]=r0; \
251(p16) mov val1[0]=r0; \
252 br.ctop.dptk 2b; \
253 ;; \
254 br.cond.sptk.many .failure_in2
255
256 //
257 // Since the instruction 'shrp' requires a fixed 128-bit value
258 // specifying the bits to shift, we need to provide 7 cases
259 // below.
260 //
261 SWITCH(p6, 8)
262 SWITCH(p7, 16)
263 SWITCH(p8, 24)
264 SWITCH(p9, 32)
265 SWITCH(p10, 40)
266 SWITCH(p11, 48)
267 SWITCH(p12, 56)
268 ;;
269 CASE(p6, 8)
270 CASE(p7, 16)
271 CASE(p8, 24)
272 CASE(p9, 32)
273 CASE(p10, 40)
274 CASE(p11, 48)
275 CASE(p12, 56)
276 ;;
277 BODY(8)
278 BODY(16)
279 BODY(24)
280 BODY(32)
281 BODY(40)
282 BODY(48)
283 BODY(56)
284 ;;
285.diff_align_do_tail:
286 .pred.rel "mutex", p14, p15
287(p14) sub src1=src1,t1
288(p14) adds dst1=-8,dst1
289(p15) sub dst1=dst1,t1
290 ;;
2914:
292 // Tail correction.
293 //
294 // The problem with this piplelined loop is that the last word is not
295 // loaded and thus parf of the last word written is not correct.
296 // To fix that, we simply copy the tail byte by byte.
297
298 sub len1=endsrc,src1,1
299 clrrrb
300 ;;
301 mov ar.ec=PIPE_DEPTH
302 mov pr.rot=1<<16 // p16=true all others are false
303 mov ar.lc=len1
304 ;;
3055:
306 EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1)
307 EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1)
308 br.ctop.dptk.few 5b
309 ;;
310 mov ar.lc=saved_lc
311 mov pr=saved_pr,0xffffffffffff0000
312 mov ar.pfs=saved_pfs
313 br.ret.sptk.many rp
314
315 //
316 // Beginning of long mempcy (i.e. > 16 bytes)
317 //
318.long_copy_user:
319 tbit.nz p6,p7=src1,0 // odd alignment
320 and tmp=7,tmp
321 ;;
322 cmp.eq p10,p8=r0,tmp
323 mov len1=len // copy because of rotation
324(p8) br.cond.dpnt .diff_align_copy_user
325 ;;
326 // At this point we know we have more than 16 bytes to copy
327 // and also that both src and dest have the same alignment
328 // which may not be the one we want. So for now we must move
329 // forward slowly until we reach 16byte alignment: no need to
330 // worry about reaching the end of buffer.
331 //
332 EX(.failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned
333(p6) adds len1=-1,len1;;
334 tbit.nz p7,p0=src1,1
335 ;;
336 EX(.failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned
337(p7) adds len1=-2,len1;;
338 tbit.nz p8,p0=src1,2
339 ;;
340 //
341 // Stop bit not required after ld4 because if we fail on ld4
342 // we have never executed the ld1, therefore st1 is not executed.
343 //
344 EX(.failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned
345 ;;
346 EX(.failure_out,(p6) st1 [dst1]=val1[0],1)
347 tbit.nz p9,p0=src1,3
348 ;;
349 //
350 // Stop bit not required after ld8 because if we fail on ld8
351 // we have never executed the ld2, therefore st2 is not executed.
352 //
353 EX(.failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned
354 EX(.failure_out,(p7) st2 [dst1]=val1[1],2)
355(p8) adds len1=-4,len1
356 ;;
357 EX(.failure_out, (p8) st4 [dst1]=val2[0],4)
358(p9) adds len1=-8,len1;;
359 shr.u cnt=len1,4 // number of 128-bit (2x64bit) words
360 ;;
361 EX(.failure_out, (p9) st8 [dst1]=val2[1],8)
362 tbit.nz p6,p0=len1,3
363 cmp.eq p7,p0=r0,cnt
364 adds tmp=-1,cnt // br.ctop is repeat/until
365(p7) br.cond.dpnt .dotail // we have less than 16 bytes left
366 ;;
367 adds src2=8,src1
368 adds dst2=8,dst1
369 mov ar.lc=tmp
370 ;;
371 //
372 // 16bytes/iteration
373 //
3742:
375 EX(.failure_in3,(p16) ld8 val1[0]=[src1],16)
376(p16) ld8 val2[0]=[src2],16
377
378 EX(.failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16)
379(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16
380 br.ctop.dptk 2b
381 ;; // RAW on src1 when fall through from loop
382 //
383 // Tail correction based on len only
384 //
385 // No matter where we come from (loop or test) the src1 pointer
386 // is 16 byte aligned AND we have less than 16 bytes to copy.
387 //
388.dotail:
389 EX(.failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes
390 tbit.nz p7,p0=len1,2
391 ;;
392 EX(.failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes
393 tbit.nz p8,p0=len1,1
394 ;;
395 EX(.failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes
396 tbit.nz p9,p0=len1,0
397 ;;
398 EX(.failure_out, (p6) st8 [dst1]=val1[0],8)
399 ;;
400 EX(.failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left
401 mov ar.lc=saved_lc
402 ;;
403 EX(.failure_out,(p7) st4 [dst1]=val1[1],4)
404 mov pr=saved_pr,0xffffffffffff0000
405 ;;
406 EX(.failure_out, (p8) st2 [dst1]=val2[0],2)
407 mov ar.pfs=saved_pfs
408 ;;
409 EX(.failure_out, (p9) st1 [dst1]=val2[1])
410 br.ret.sptk.many rp
411
412
413 //
414 // Here we handle the case where the byte by byte copy fails
415 // on the load.
416 // Several factors make the zeroing of the rest of the buffer kind of
417 // tricky:
418 // - the pipeline: loads/stores are not in sync (pipeline)
419 //
420 // In the same loop iteration, the dst1 pointer does not directly
421 // reflect where the faulty load was.
422 //
423 // - pipeline effect
424 // When you get a fault on load, you may have valid data from
425 // previous loads not yet store in transit. Such data must be
426 // store normally before moving onto zeroing the rest.
427 //
428 // - single/multi dispersal independence.
429 //
430 // solution:
431 // - we don't disrupt the pipeline, i.e. data in transit in
432 // the software pipeline will be eventually move to memory.
433 // We simply replace the load with a simple mov and keep the
434 // pipeline going. We can't really do this inline because
435 // p16 is always reset to 1 when lc > 0.
436 //
437.failure_in_pipe1:
438 sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
4391:
440(p16) mov val1[0]=r0
441(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1
442 br.ctop.dptk 1b
443 ;;
444 mov pr=saved_pr,0xffffffffffff0000
445 mov ar.lc=saved_lc
446 mov ar.pfs=saved_pfs
447 br.ret.sptk.many rp
448
449 //
450 // This is the case where the byte by byte copy fails on the load
451 // when we copy the head. We need to finish the pipeline and copy
452 // zeros for the rest of the destination. Since this happens
453 // at the top we still need to fill the body and tail.
454.failure_in_pipe2:
455 sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
4562:
457(p16) mov val1[0]=r0
458(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1
459 br.ctop.dptk 2b
460 ;;
461 sub len=enddst,dst1,1 // precompute len
462 br.cond.dptk.many .failure_in1bis
463 ;;
464
465 //
466 // Here we handle the head & tail part when we check for alignment.
467 // The following code handles only the load failures. The
468 // main diffculty comes from the fact that loads/stores are
469 // scheduled. So when you fail on a load, the stores corresponding
470 // to previous successful loads must be executed.
471 //
472 // However some simplifications are possible given the way
473 // things work.
474 //
475 // 1) HEAD
476 // Theory of operation:
477 //
478 // Page A | Page B
479 // ---------|-----
480 // 1|8 x
481 // 1 2|8 x
482 // 4|8 x
483 // 1 4|8 x
484 // 2 4|8 x
485 // 1 2 4|8 x
486 // |1
487 // |2 x
488 // |4 x
489 //
490 // page_size >= 4k (2^12). (x means 4, 2, 1)
491 // Here we suppose Page A exists and Page B does not.
492 //
493 // As we move towards eight byte alignment we may encounter faults.
494 // The numbers on each page show the size of the load (current alignment).
495 //
496 // Key point:
497 // - if you fail on 1, 2, 4 then you have never executed any smaller
498 // size loads, e.g. failing ld4 means no ld1 nor ld2 executed
499 // before.
500 //
501 // This allows us to simplify the cleanup code, because basically you
502 // only have to worry about "pending" stores in the case of a failing
503 // ld8(). Given the way the code is written today, this means only
504 // worry about st2, st4. There we can use the information encapsulated
505 // into the predicates.
506 //
507 // Other key point:
508 // - if you fail on the ld8 in the head, it means you went straight
509 // to it, i.e. 8byte alignment within an unexisting page.
510 // Again this comes from the fact that if you crossed just for the ld8 then
511 // you are 8byte aligned but also 16byte align, therefore you would
512 // either go for the 16byte copy loop OR the ld8 in the tail part.
513 // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible
514 // because it would mean you had 15bytes to copy in which case you
515 // would have defaulted to the byte by byte copy.
516 //
517 //
518 // 2) TAIL
519 // Here we now we have less than 16 bytes AND we are either 8 or 16 byte
520 // aligned.
521 //
522 // Key point:
523 // This means that we either:
524 // - are right on a page boundary
525 // OR
526 // - are at more than 16 bytes from a page boundary with
527 // at most 15 bytes to copy: no chance of crossing.
528 //
529 // This allows us to assume that if we fail on a load we haven't possibly
530 // executed any of the previous (tail) ones, so we don't need to do
531 // any stores. For instance, if we fail on ld2, this means we had
532 // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4.
533 //
534 // This means that we are in a situation similar the a fault in the
535 // head part. That's nice!
536 //
537.failure_in1:
538 sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
539 sub len=endsrc,src1,1
540 //
541 // we know that ret0 can never be zero at this point
542 // because we failed why trying to do a load, i.e. there is still
543 // some work to do.
544 // The failure_in1bis and length problem is taken care of at the
545 // calling side.
546 //
547 ;;
548.failure_in1bis: // from (.failure_in3)
549 mov ar.lc=len // Continue with a stupid byte store.
550 ;;
5515:
552 st1 [dst1]=r0,1
553 br.cloop.dptk 5b
554 ;;
555 mov pr=saved_pr,0xffffffffffff0000
556 mov ar.lc=saved_lc
557 mov ar.pfs=saved_pfs
558 br.ret.sptk.many rp
559
560 //
561 // Here we simply restart the loop but instead
562 // of doing loads we fill the pipeline with zeroes
563 // We can't simply store r0 because we may have valid
564 // data in transit in the pipeline.
565 // ar.lc and ar.ec are setup correctly at this point
566 //
567 // we MUST use src1/endsrc here and not dst1/enddst because
568 // of the pipeline effect.
569 //
570.failure_in3:
571 sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied
572 ;;
5732:
574(p16) mov val1[0]=r0
575(p16) mov val2[0]=r0
576(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16
577(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16
578 br.ctop.dptk 2b
579 ;;
580 cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ?
581 sub len=enddst,dst1,1 // precompute len
582(p6) br.cond.dptk .failure_in1bis
583 ;;
584 mov pr=saved_pr,0xffffffffffff0000
585 mov ar.lc=saved_lc
586 mov ar.pfs=saved_pfs
587 br.ret.sptk.many rp
588
589.failure_in2:
590 sub ret0=endsrc,src1
591 cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ?
592 sub len=enddst,dst1,1 // precompute len
593(p6) br.cond.dptk .failure_in1bis
594 ;;
595 mov pr=saved_pr,0xffffffffffff0000
596 mov ar.lc=saved_lc
597 mov ar.pfs=saved_pfs
598 br.ret.sptk.many rp
599
600 //
601 // handling of failures on stores: that's the easy part
602 //
603.failure_out:
604 sub ret0=enddst,dst1
605 mov pr=saved_pr,0xffffffffffff0000
606 mov ar.lc=saved_lc
607
608 mov ar.pfs=saved_pfs
609 br.ret.sptk.many rp
610END(__copy_user)
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
new file mode 100644
index 000000000000..36866e8a5d2b
--- /dev/null
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -0,0 +1,151 @@
1/*
2 * Network Checksum & Copy routine
3 *
4 * Copyright (C) 1999, 2003-2004 Hewlett-Packard Co
5 * Stephane Eranian <eranian@hpl.hp.com>
6 *
7 * Most of the code has been imported from Linux/Alpha
8 */
9
10#include <linux/module.h>
11#include <linux/types.h>
12#include <linux/string.h>
13
14#include <asm/uaccess.h>
15
16/*
17 * XXX Fixme: those 2 inlines are meant for debugging and will go away
18 */
19static inline unsigned
20short from64to16(unsigned long x)
21{
22 /* add up 32-bit words for 33 bits */
23 x = (x & 0xffffffff) + (x >> 32);
24 /* add up 16-bit and 17-bit words for 17+c bits */
25 x = (x & 0xffff) + (x >> 16);
26 /* add up 16-bit and 2-bit for 16+c bit */
27 x = (x & 0xffff) + (x >> 16);
28 /* add up carry.. */
29 x = (x & 0xffff) + (x >> 16);
30 return x;
31}
32
33static inline
34unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum)
35{
36 int odd, count;
37 unsigned long result = (unsigned long)psum;
38
39 if (len <= 0)
40 goto out;
41 odd = 1 & (unsigned long) buff;
42 if (odd) {
43 result = *buff << 8;
44 len--;
45 buff++;
46 }
47 count = len >> 1; /* nr of 16-bit words.. */
48 if (count) {
49 if (2 & (unsigned long) buff) {
50 result += *(unsigned short *) buff;
51 count--;
52 len -= 2;
53 buff += 2;
54 }
55 count >>= 1; /* nr of 32-bit words.. */
56 if (count) {
57 if (4 & (unsigned long) buff) {
58 result += *(unsigned int *) buff;
59 count--;
60 len -= 4;
61 buff += 4;
62 }
63 count >>= 1; /* nr of 64-bit words.. */
64 if (count) {
65 unsigned long carry = 0;
66 do {
67 unsigned long w = *(unsigned long *) buff;
68 count--;
69 buff += 8;
70 result += carry;
71 result += w;
72 carry = (w > result);
73 } while (count);
74 result += carry;
75 result = (result & 0xffffffff) + (result >> 32);
76 }
77 if (len & 4) {
78 result += *(unsigned int *) buff;
79 buff += 4;
80 }
81 }
82 if (len & 2) {
83 result += *(unsigned short *) buff;
84 buff += 2;
85 }
86 }
87 if (len & 1)
88 result += *buff;
89
90 result = from64to16(result);
91
92 if (odd)
93 result = ((result >> 8) & 0xff) | ((result & 0xff) << 8);
94
95out:
96 return result;
97}
98
99/*
100 * XXX Fixme
101 *
102 * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS.
103 * But it's very tricky to get right even in C.
104 */
105extern unsigned long do_csum(const unsigned char *, long);
106
107static unsigned int
108do_csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst,
109 int len, unsigned int psum, int *errp)
110{
111 unsigned long result;
112
113 /* XXX Fixme
114 * for now we separate the copy from checksum for obvious
115 * alignment difficulties. Look at the Alpha code and you'll be
116 * scared.
117 */
118
119 if (__copy_from_user(dst, src, len) != 0 && errp)
120 *errp = -EFAULT;
121
122 result = do_csum(dst, len);
123
124 /* add in old sum, and carry.. */
125 result += psum;
126 /* 32+c bits -> 32 bits */
127 result = (result & 0xffffffff) + (result >> 32);
128 return result;
129}
130
131unsigned int
132csum_partial_copy_from_user (const unsigned char __user *src, unsigned char *dst,
133 int len, unsigned int sum, int *errp)
134{
135 if (!access_ok(VERIFY_READ, src, len)) {
136 *errp = -EFAULT;
137 memset(dst, 0, len);
138 return sum;
139 }
140
141 return do_csum_partial_copy_from_user(src, dst, len, sum, errp);
142}
143
144unsigned int
145csum_partial_copy_nocheck(const unsigned char __user *src, unsigned char *dst,
146 int len, unsigned int sum)
147{
148 return do_csum_partial_copy_from_user(src, dst, len, sum, NULL);
149}
150
151EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/ia64/lib/dec_and_lock.c b/arch/ia64/lib/dec_and_lock.c
new file mode 100644
index 000000000000..c7ce92f968f1
--- /dev/null
+++ b/arch/ia64/lib/dec_and_lock.c
@@ -0,0 +1,42 @@
1/*
2 * Copyright (C) 2003 Jerome Marchand, Bull S.A.
3 * Cleaned up by David Mosberger-Tang <davidm@hpl.hp.com>
4 *
5 * This file is released under the GPLv2, or at your option any later version.
6 *
7 * ia64 version of "atomic_dec_and_lock()" using the atomic "cmpxchg" instruction. This
8 * code is an adaptation of the x86 version of "atomic_dec_and_lock()".
9 */
10
11#include <linux/compiler.h>
12#include <linux/module.h>
13#include <linux/spinlock.h>
14#include <asm/atomic.h>
15
16/*
17 * Decrement REFCOUNT and if the count reaches zero, acquire the spinlock. Both of these
18 * operations have to be done atomically, so that the count doesn't drop to zero without
19 * acquiring the spinlock first.
20 */
21int
22_atomic_dec_and_lock (atomic_t *refcount, spinlock_t *lock)
23{
24 int old, new;
25
26 do {
27 old = atomic_read(refcount);
28 new = old - 1;
29
30 if (unlikely (old == 1)) {
31 /* oops, we may be decrementing to zero, do it the slow way... */
32 spin_lock(lock);
33 if (atomic_dec_and_test(refcount))
34 return 1;
35 spin_unlock(lock);
36 return 0;
37 }
38 } while (cmpxchg(&refcount->counter, old, new) != old);
39 return 0;
40}
41
42EXPORT_SYMBOL(_atomic_dec_and_lock);
diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S
new file mode 100644
index 000000000000..6bec2fc9f5b2
--- /dev/null
+++ b/arch/ia64/lib/do_csum.S
@@ -0,0 +1,323 @@
1/*
2 *
3 * Optmized version of the standard do_csum() function
4 *
5 * Return: a 64bit quantity containing the 16bit Internet checksum
6 *
7 * Inputs:
8 * in0: address of buffer to checksum (char *)
9 * in1: length of the buffer (int)
10 *
11 * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co
12 * Stephane Eranian <eranian@hpl.hp.com>
13 *
14 * 02/04/22 Ken Chen <kenneth.w.chen@intel.com>
15 * Data locality study on the checksum buffer.
16 * More optimization cleanup - remove excessive stop bits.
17 * 02/04/08 David Mosberger <davidm@hpl.hp.com>
18 * More cleanup and tuning.
19 * 01/04/18 Jun Nakajima <jun.nakajima@intel.com>
20 * Clean up and optimize and the software pipeline, loading two
21 * back-to-back 8-byte words per loop. Clean up the initialization
22 * for the loop. Support the cases where load latency = 1 or 2.
23 * Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default).
24 */
25
26#include <asm/asmmacro.h>
27
28//
29// Theory of operations:
30// The goal is to go as quickly as possible to the point where
31// we can checksum 16 bytes/loop. Before reaching that point we must
32// take care of incorrect alignment of first byte.
33//
34// The code hereafter also takes care of the "tail" part of the buffer
35// before entering the core loop, if any. The checksum is a sum so it
36// allows us to commute operations. So we do the "head" and "tail"
37// first to finish at full speed in the body. Once we get the head and
38// tail values, we feed them into the pipeline, very handy initialization.
39//
40// Of course we deal with the special case where the whole buffer fits
41// into one 8 byte word. In this case we have only one entry in the pipeline.
42//
43// We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for
44// possible load latency and also to accommodate for head and tail.
45//
46// The end of the function deals with folding the checksum from 64bits
47// down to 16bits taking care of the carry.
48//
49// This version avoids synchronization in the core loop by also using a
50// pipeline for the accumulation of the checksum in resultx[] (x=1,2).
51//
52// wordx[] (x=1,2)
53// |---|
54// | | 0 : new value loaded in pipeline
55// |---|
56// | | - : in transit data
57// |---|
58// | | LOAD_LATENCY : current value to add to checksum
59// |---|
60// | | LOAD_LATENCY+1 : previous value added to checksum
61// |---| (previous iteration)
62//
63// resultx[] (x=1,2)
64// |---|
65// | | 0 : initial value
66// |---|
67// | | LOAD_LATENCY-1 : new checksum
68// |---|
69// | | LOAD_LATENCY : previous value of checksum
70// |---|
71// | | LOAD_LATENCY+1 : final checksum when out of the loop
72// |---|
73//
74//
75// See RFC1071 "Computing the Internet Checksum" for various techniques for
76// calculating the Internet checksum.
77//
78// NOT YET DONE:
79// - Maybe another algorithm which would take care of the folding at the
80// end in a different manner
81// - Work with people more knowledgeable than me on the network stack
82// to figure out if we could not split the function depending on the
83// type of packet or alignment we get. Like the ip_fast_csum() routine
84// where we know we have at least 20bytes worth of data to checksum.
85// - Do a better job of handling small packets.
86// - Note on prefetching: it was found that under various load, i.e. ftp read/write,
87// nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8%
88// on the data that buffer points to (partly because the checksum is often preceded by
89// a copy_from_user()). This finding indiate that lfetch will not be beneficial since
90// the data is already in the cache.
91//
92
93#define saved_pfs r11
94#define hmask r16
95#define tmask r17
96#define first1 r18
97#define firstval r19
98#define firstoff r20
99#define last r21
100#define lastval r22
101#define lastoff r23
102#define saved_lc r24
103#define saved_pr r25
104#define tmp1 r26
105#define tmp2 r27
106#define tmp3 r28
107#define carry1 r29
108#define carry2 r30
109#define first2 r31
110
111#define buf in0
112#define len in1
113
114#define LOAD_LATENCY 2 // XXX fix me
115
116#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2)
117# error "Only 1 or 2 is supported/tested for LOAD_LATENCY."
118#endif
119
120#define PIPE_DEPTH (LOAD_LATENCY+2)
121#define ELD p[LOAD_LATENCY] // end of load
122#define ELD_1 p[LOAD_LATENCY+1] // and next stage
123
124// unsigned long do_csum(unsigned char *buf,long len)
125
126GLOBAL_ENTRY(do_csum)
127 .prologue
128 .save ar.pfs, saved_pfs
129 alloc saved_pfs=ar.pfs,2,16,0,16
130 .rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2]
131 .rotp p[PIPE_DEPTH], pC1[2], pC2[2]
132 mov ret0=r0 // in case we have zero length
133 cmp.lt p0,p6=r0,len // check for zero length or negative (32bit len)
134 ;;
135 add tmp1=buf,len // last byte's address
136 .save pr, saved_pr
137 mov saved_pr=pr // preserve predicates (rotation)
138(p6) br.ret.spnt.many rp // return if zero or negative length
139
140 mov hmask=-1 // initialize head mask
141 tbit.nz p15,p0=buf,0 // is buf an odd address?
142 and first1=-8,buf // 8-byte align down address of first1 element
143
144 and firstoff=7,buf // how many bytes off for first1 element
145 mov tmask=-1 // initialize tail mask
146
147 ;;
148 adds tmp2=-1,tmp1 // last-1
149 and lastoff=7,tmp1 // how many bytes off for last element
150 ;;
151 sub tmp1=8,lastoff // complement to lastoff
152 and last=-8,tmp2 // address of word containing last byte
153 ;;
154 sub tmp3=last,first1 // tmp3=distance from first1 to last
155 .save ar.lc, saved_lc
156 mov saved_lc=ar.lc // save lc
157 cmp.eq p8,p9=last,first1 // everything fits in one word ?
158
159 ld8 firstval=[first1],8 // load, ahead of time, "first1" word
160 and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0
161 shl tmp2=firstoff,3 // number of bits
162 ;;
163(p9) ld8 lastval=[last] // load, ahead of time, "last" word, if needed
164 shl tmp1=tmp1,3 // number of bits
165(p9) adds tmp3=-8,tmp3 // effectively loaded
166 ;;
167(p8) mov lastval=r0 // we don't need lastval if first1==last
168 shl hmask=hmask,tmp2 // build head mask, mask off [0,first1off[
169 shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff]
170 ;;
171 .body
172#define count tmp3
173
174(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only
175(p9) and word2[0]=lastval,tmask // mask last it as appropriate
176 shr.u count=count,3 // how many 8-byte?
177 ;;
178 // If count is odd, finish this 8-byte word so that we can
179 // load two back-to-back 8-byte words per loop thereafter.
180 and word1[0]=firstval,hmask // and mask it as appropriate
181 tbit.nz p10,p11=count,0 // if (count is odd)
182 ;;
183(p8) mov result1[0]=word1[0]
184(p9) add result1[0]=word1[0],word2[0]
185 ;;
186 cmp.ltu p6,p0=result1[0],word1[0] // check the carry
187 cmp.eq.or.andcm p8,p0=0,count // exit if zero 8-byte
188 ;;
189(p6) adds result1[0]=1,result1[0]
190(p8) br.cond.dptk .do_csum_exit // if (within an 8-byte word)
191(p11) br.cond.dptk .do_csum16 // if (count is even)
192
193 // Here count is odd.
194 ld8 word1[1]=[first1],8 // load an 8-byte word
195 cmp.eq p9,p10=1,count // if (count == 1)
196 adds count=-1,count // loaded an 8-byte word
197 ;;
198 add result1[0]=result1[0],word1[1]
199 ;;
200 cmp.ltu p6,p0=result1[0],word1[1]
201 ;;
202(p6) adds result1[0]=1,result1[0]
203(p9) br.cond.sptk .do_csum_exit // if (count == 1) exit
204 // Fall through to caluculate the checksum, feeding result1[0] as
205 // the initial value in result1[0].
206 //
207 // Calculate the checksum loading two 8-byte words per loop.
208 //
209.do_csum16:
210 add first2=8,first1
211 shr.u count=count,1 // we do 16 bytes per loop
212 ;;
213 adds count=-1,count
214 mov carry1=r0
215 mov carry2=r0
216 brp.loop.imp 1f,2f
217 ;;
218 mov ar.ec=PIPE_DEPTH
219 mov ar.lc=count // set lc
220 mov pr.rot=1<<16
221 // result1[0] must be initialized in advance.
222 mov result2[0]=r0
223 ;;
224 .align 32
2251:
226(ELD_1) cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1]
227(pC1[1])adds carry1=1,carry1
228(ELD_1) cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1]
229(pC2[1])adds carry2=1,carry2
230(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY]
231(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY]
2322:
233(p[0]) ld8 word1[0]=[first1],16
234(p[0]) ld8 word2[0]=[first2],16
235 br.ctop.sptk 1b
236 ;;
237 // Since len is a 32-bit value, carry cannot be larger than a 64-bit value.
238(pC1[1])adds carry1=1,carry1 // since we miss the last one
239(pC2[1])adds carry2=1,carry2
240 ;;
241 add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1
242 add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2
243 ;;
244 cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1
245 cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2
246 ;;
247(p6) adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1]
248(p7) adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1]
249 ;;
250 add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1]
251 ;;
252 cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1]
253 ;;
254(p6) adds result1[0]=1,result1[0]
255 ;;
256.do_csum_exit:
257 //
258 // now fold 64 into 16 bits taking care of carry
259 // that's not very good because it has lots of sequentiality
260 //
261 mov tmp3=0xffff
262 zxt4 tmp1=result1[0]
263 shr.u tmp2=result1[0],32
264 ;;
265 add result1[0]=tmp1,tmp2
266 ;;
267 and tmp1=result1[0],tmp3
268 shr.u tmp2=result1[0],16
269 ;;
270 add result1[0]=tmp1,tmp2
271 ;;
272 and tmp1=result1[0],tmp3
273 shr.u tmp2=result1[0],16
274 ;;
275 add result1[0]=tmp1,tmp2
276 ;;
277 and tmp1=result1[0],tmp3
278 shr.u tmp2=result1[0],16
279 ;;
280 add ret0=tmp1,tmp2
281 mov pr=saved_pr,0xffffffffffff0000
282 ;;
283 // if buf was odd then swap bytes
284 mov ar.pfs=saved_pfs // restore ar.ec
285(p15) mux1 ret0=ret0,@rev // reverse word
286 ;;
287 mov ar.lc=saved_lc
288(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes
289 br.ret.sptk.many rp
290
291// I (Jun Nakajima) wrote an equivalent code (see below), but it was
292// not much better than the original. So keep the original there so that
293// someone else can challenge.
294//
295// shr.u word1[0]=result1[0],32
296// zxt4 result1[0]=result1[0]
297// ;;
298// add result1[0]=result1[0],word1[0]
299// ;;
300// zxt2 result2[0]=result1[0]
301// extr.u word1[0]=result1[0],16,16
302// shr.u carry1=result1[0],32
303// ;;
304// add result2[0]=result2[0],word1[0]
305// ;;
306// add result2[0]=result2[0],carry1
307// ;;
308// extr.u ret0=result2[0],16,16
309// ;;
310// add ret0=ret0,result2[0]
311// ;;
312// zxt2 ret0=ret0
313// mov ar.pfs=saved_pfs // restore ar.ec
314// mov pr=saved_pr,0xffffffffffff0000
315// ;;
316// // if buf was odd then swap bytes
317// mov ar.lc=saved_lc
318//(p15) mux1 ret0=ret0,@rev // reverse word
319// ;;
320//(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes
321// br.ret.sptk.many rp
322
323END(do_csum)
diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S
new file mode 100644
index 000000000000..29c802b19669
--- /dev/null
+++ b/arch/ia64/lib/flush.S
@@ -0,0 +1,39 @@
1/*
2 * Cache flushing routines.
3 *
4 * Copyright (C) 1999-2001 Hewlett-Packard Co
5 * Copyright (C) 1999-2001 David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7#include <asm/asmmacro.h>
8#include <asm/page.h>
9
10 /*
11 * flush_icache_range(start,end)
12 * Must flush range from start to end-1 but nothing else (need to
13 * be careful not to touch addresses that may be unmapped).
14 */
15GLOBAL_ENTRY(flush_icache_range)
16 .prologue
17 alloc r2=ar.pfs,2,0,0,0
18 sub r8=in1,in0,1
19 ;;
20 shr.u r8=r8,5 // we flush 32 bytes per iteration
21 .save ar.lc, r3
22 mov r3=ar.lc // save ar.lc
23 ;;
24
25 .body
26
27 mov ar.lc=r8
28 ;;
29.Loop: fc in0 // issuable on M0 only
30 add in0=32,in0
31 br.cloop.sptk.few .Loop
32 ;;
33 sync.i
34 ;;
35 srlz.i
36 ;;
37 mov ar.lc=r3 // restore ar.lc
38 br.ret.sptk.many rp
39END(flush_icache_range)
diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S
new file mode 100644
index 000000000000..2ac28bf0a662
--- /dev/null
+++ b/arch/ia64/lib/idiv32.S
@@ -0,0 +1,83 @@
1/*
2 * Copyright (C) 2000 Hewlett-Packard Co
3 * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
4 *
5 * 32-bit integer division.
6 *
7 * This code is based on the application note entitled "Divide, Square Root
8 * and Remainder Algorithms for the IA-64 Architecture". This document
9 * is available as Intel document number 248725-002 or via the web at
10 * http://developer.intel.com/software/opensource/numerics/
11 *
12 * For more details on the theory behind these algorithms, see "IA-64
13 * and Elementary Functions" by Peter Markstein; HP Professional Books
14 * (http://www.hp.com/go/retailbooks/)
15 */
16
17#include <asm/asmmacro.h>
18
19#ifdef MODULO
20# define OP mod
21#else
22# define OP div
23#endif
24
25#ifdef UNSIGNED
26# define SGN u
27# define EXTEND zxt4
28# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
29# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
30#else
31# define SGN
32# define EXTEND sxt4
33# define INT_TO_FP(a,b) fcvt.xf a=b
34# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
35#endif
36
37#define PASTE1(a,b) a##b
38#define PASTE(a,b) PASTE1(a,b)
39#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3))
40
41GLOBAL_ENTRY(NAME)
42 .regstk 2,0,0,0
43 // Transfer inputs to FP registers.
44 mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias)
45 EXTEND in0 = in0 // in0 = a
46 EXTEND in1 = in1 // in1 = b
47 ;;
48 setf.sig f8 = in0
49 setf.sig f9 = in1
50#ifdef MODULO
51 sub in1 = r0, in1 // in1 = -b
52#endif
53 ;;
54 // Convert the inputs to FP, to avoid FP software-assist faults.
55 INT_TO_FP(f8, f8)
56 INT_TO_FP(f9, f9)
57 ;;
58 setf.exp f7 = r2 // f7 = 2^-34
59 frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b)
60 ;;
61(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0
62(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1
63 ;;
64#ifdef MODULO
65 setf.sig f9 = in1 // f9 = -b
66#endif
67(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0
68(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34
69 ;;
70#ifdef MODULO
71 setf.sig f7 = in0
72#endif
73(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1
74 ;;
75 FP_TO_INT(f6, f6) // q = trunc(q2)
76 ;;
77#ifdef MODULO
78 xma.l f6 = f6, f9, f7 // r = q*(-b) + a
79 ;;
80#endif
81 getf.sig r8 = f6 // transfer result to result register
82 br.ret.sptk.many rp
83END(NAME)
diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S
new file mode 100644
index 000000000000..f69bd2b0987a
--- /dev/null
+++ b/arch/ia64/lib/idiv64.S
@@ -0,0 +1,80 @@
1/*
2 * Copyright (C) 1999-2000 Hewlett-Packard Co
3 * Copyright (C) 1999-2000 David Mosberger-Tang <davidm@hpl.hp.com>
4 *
5 * 64-bit integer division.
6 *
7 * This code is based on the application note entitled "Divide, Square Root
8 * and Remainder Algorithms for the IA-64 Architecture". This document
9 * is available as Intel document number 248725-002 or via the web at
10 * http://developer.intel.com/software/opensource/numerics/
11 *
12 * For more details on the theory behind these algorithms, see "IA-64
13 * and Elementary Functions" by Peter Markstein; HP Professional Books
14 * (http://www.hp.com/go/retailbooks/)
15 */
16
17#include <asm/asmmacro.h>
18
19#ifdef MODULO
20# define OP mod
21#else
22# define OP div
23#endif
24
25#ifdef UNSIGNED
26# define SGN u
27# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
28# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
29#else
30# define SGN
31# define INT_TO_FP(a,b) fcvt.xf a=b
32# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
33#endif
34
35#define PASTE1(a,b) a##b
36#define PASTE(a,b) PASTE1(a,b)
37#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3))
38
39GLOBAL_ENTRY(NAME)
40 .regstk 2,0,0,0
41 // Transfer inputs to FP registers.
42 setf.sig f8 = in0
43 setf.sig f9 = in1
44 ;;
45 // Convert the inputs to FP, to avoid FP software-assist faults.
46 INT_TO_FP(f8, f8)
47 INT_TO_FP(f9, f9)
48 ;;
49 frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b)
50 ;;
51(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0
52(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1
53 ;;
54(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0
55(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0
56 ;;
57#ifdef MODULO
58 sub in1 = r0, in1 // in1 = -b
59#endif
60(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1
61(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0
62 ;;
63(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1
64(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a
65 ;;
66#ifdef MODULO
67 setf.sig f8 = in0 // f8 = a
68 setf.sig f9 = in1 // f9 = -b
69#endif
70(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2
71 ;;
72 FP_TO_INT(f11, f11) // q = trunc(q3)
73 ;;
74#ifdef MODULO
75 xma.l f11 = f11, f9, f8 // r = q*(-b) + a
76 ;;
77#endif
78 getf.sig r8 = f11 // transfer result to result register
79 br.ret.sptk.many rp
80END(NAME)
diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c
new file mode 100644
index 000000000000..8949e44091ac
--- /dev/null
+++ b/arch/ia64/lib/io.c
@@ -0,0 +1,165 @@
1#include <linux/config.h>
2#include <linux/module.h>
3#include <linux/types.h>
4
5#include <asm/io.h>
6
7/*
8 * Copy data from IO memory space to "real" memory space.
9 * This needs to be optimized.
10 */
11void memcpy_fromio(void *to, const volatile void __iomem *from, long count)
12{
13 char *dst = to;
14
15 while (count) {
16 count--;
17 *dst++ = readb(from++);
18 }
19}
20EXPORT_SYMBOL(memcpy_fromio);
21
22/*
23 * Copy data from "real" memory space to IO memory space.
24 * This needs to be optimized.
25 */
26void memcpy_toio(volatile void __iomem *to, const void *from, long count)
27{
28 const char *src = from;
29
30 while (count) {
31 count--;
32 writeb(*src++, to++);
33 }
34}
35EXPORT_SYMBOL(memcpy_toio);
36
37/*
38 * "memset" on IO memory space.
39 * This needs to be optimized.
40 */
41void memset_io(volatile void __iomem *dst, int c, long count)
42{
43 unsigned char ch = (char)(c & 0xff);
44
45 while (count) {
46 count--;
47 writeb(ch, dst);
48 dst++;
49 }
50}
51EXPORT_SYMBOL(memset_io);
52
53#ifdef CONFIG_IA64_GENERIC
54
55#undef __ia64_inb
56#undef __ia64_inw
57#undef __ia64_inl
58#undef __ia64_outb
59#undef __ia64_outw
60#undef __ia64_outl
61#undef __ia64_readb
62#undef __ia64_readw
63#undef __ia64_readl
64#undef __ia64_readq
65#undef __ia64_readb_relaxed
66#undef __ia64_readw_relaxed
67#undef __ia64_readl_relaxed
68#undef __ia64_readq_relaxed
69#undef __ia64_writeb
70#undef __ia64_writew
71#undef __ia64_writel
72#undef __ia64_writeq
73#undef __ia64_mmiowb
74
75unsigned int
76__ia64_inb (unsigned long port)
77{
78 return ___ia64_inb(port);
79}
80
81unsigned int
82__ia64_inw (unsigned long port)
83{
84 return ___ia64_inw(port);
85}
86
87unsigned int
88__ia64_inl (unsigned long port)
89{
90 return ___ia64_inl(port);
91}
92
93void
94__ia64_outb (unsigned char val, unsigned long port)
95{
96 ___ia64_outb(val, port);
97}
98
99void
100__ia64_outw (unsigned short val, unsigned long port)
101{
102 ___ia64_outw(val, port);
103}
104
105void
106__ia64_outl (unsigned int val, unsigned long port)
107{
108 ___ia64_outl(val, port);
109}
110
111unsigned char
112__ia64_readb (void __iomem *addr)
113{
114 return ___ia64_readb (addr);
115}
116
117unsigned short
118__ia64_readw (void __iomem *addr)
119{
120 return ___ia64_readw (addr);
121}
122
123unsigned int
124__ia64_readl (void __iomem *addr)
125{
126 return ___ia64_readl (addr);
127}
128
129unsigned long
130__ia64_readq (void __iomem *addr)
131{
132 return ___ia64_readq (addr);
133}
134
135unsigned char
136__ia64_readb_relaxed (void __iomem *addr)
137{
138 return ___ia64_readb (addr);
139}
140
141unsigned short
142__ia64_readw_relaxed (void __iomem *addr)
143{
144 return ___ia64_readw (addr);
145}
146
147unsigned int
148__ia64_readl_relaxed (void __iomem *addr)
149{
150 return ___ia64_readl (addr);
151}
152
153unsigned long
154__ia64_readq_relaxed (void __iomem *addr)
155{
156 return ___ia64_readq (addr);
157}
158
159void
160__ia64_mmiowb(void)
161{
162 ___ia64_mmiowb();
163}
164
165#endif /* CONFIG_IA64_GENERIC */
diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S
new file mode 100644
index 000000000000..19674ca2acfc
--- /dev/null
+++ b/arch/ia64/lib/ip_fast_csum.S
@@ -0,0 +1,90 @@
1/*
2 * Optmized version of the ip_fast_csum() function
3 * Used for calculating IP header checksum
4 *
5 * Return: 16bit checksum, complemented
6 *
7 * Inputs:
8 * in0: address of buffer to checksum (char *)
9 * in1: length of the buffer (int)
10 *
11 * Copyright (C) 2002 Intel Corp.
12 * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
13 */
14
15#include <asm/asmmacro.h>
16
17/*
18 * Since we know that most likely this function is called with buf aligned
19 * on 4-byte boundary and 20 bytes in length, we can execution rather quickly
20 * versus calling generic version of do_csum, which has lots of overhead in
21 * handling various alignments and sizes. However, due to lack of constrains
22 * put on the function input argument, cases with alignment not on 4-byte or
23 * size not equal to 20 bytes will be handled by the generic do_csum function.
24 */
25
26#define in0 r32
27#define in1 r33
28#define ret0 r8
29
30GLOBAL_ENTRY(ip_fast_csum)
31 .prologue
32 .body
33 cmp.ne p6,p7=5,in1 // size other than 20 byte?
34 and r14=3,in0 // is it aligned on 4-byte?
35 add r15=4,in0 // second source pointer
36 ;;
37 cmp.ne.or.andcm p6,p7=r14,r0
38 ;;
39(p7) ld4 r20=[in0],8
40(p7) ld4 r21=[r15],8
41(p6) br.spnt .generic
42 ;;
43 ld4 r22=[in0],8
44 ld4 r23=[r15],8
45 ;;
46 ld4 r24=[in0]
47 add r20=r20,r21
48 add r22=r22,r23
49 ;;
50 add r20=r20,r22
51 ;;
52 add r20=r20,r24
53 ;;
54 shr.u ret0=r20,16 // now need to add the carry
55 zxt2 r20=r20
56 ;;
57 add r20=ret0,r20
58 ;;
59 shr.u ret0=r20,16 // add carry again
60 zxt2 r20=r20
61 ;;
62 add r20=ret0,r20
63 ;;
64 shr.u ret0=r20,16
65 zxt2 r20=r20
66 ;;
67 add r20=ret0,r20
68 ;;
69 andcm ret0=-1,r20
70 .restore sp // reset frame state
71 br.ret.sptk.many b0
72 ;;
73
74.generic:
75 .prologue
76 .save ar.pfs, r35
77 alloc r35=ar.pfs,2,2,2,0
78 .save rp, r34
79 mov r34=b0
80 .body
81 dep.z out1=in1,2,30
82 mov out0=in0
83 ;;
84 br.call.sptk.many b0=do_csum
85 ;;
86 andcm ret0=-1,ret0
87 mov ar.pfs=r35
88 mov b0=r34
89 br.ret.sptk.many b0
90END(ip_fast_csum)
diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S
new file mode 100644
index 000000000000..448908d80b69
--- /dev/null
+++ b/arch/ia64/lib/memcpy.S
@@ -0,0 +1,301 @@
1/*
2 *
3 * Optimized version of the standard memcpy() function
4 *
5 * Inputs:
6 * in0: destination address
7 * in1: source address
8 * in2: number of bytes to copy
9 * Output:
10 * no return value
11 *
12 * Copyright (C) 2000-2001 Hewlett-Packard Co
13 * Stephane Eranian <eranian@hpl.hp.com>
14 * David Mosberger-Tang <davidm@hpl.hp.com>
15 */
16#include <asm/asmmacro.h>
17
18GLOBAL_ENTRY(memcpy)
19
20# define MEM_LAT 21 /* latency to memory */
21
22# define dst r2
23# define src r3
24# define retval r8
25# define saved_pfs r9
26# define saved_lc r10
27# define saved_pr r11
28# define cnt r16
29# define src2 r17
30# define t0 r18
31# define t1 r19
32# define t2 r20
33# define t3 r21
34# define t4 r22
35# define src_end r23
36
37# define N (MEM_LAT + 4)
38# define Nrot ((N + 7) & ~7)
39
40 /*
41 * First, check if everything (src, dst, len) is a multiple of eight. If
42 * so, we handle everything with no taken branches (other than the loop
43 * itself) and a small icache footprint. Otherwise, we jump off to
44 * the more general copy routine handling arbitrary
45 * sizes/alignment etc.
46 */
47 .prologue
48 .save ar.pfs, saved_pfs
49 alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot
50 .save ar.lc, saved_lc
51 mov saved_lc=ar.lc
52 or t0=in0,in1
53 ;;
54
55 or t0=t0,in2
56 .save pr, saved_pr
57 mov saved_pr=pr
58
59 .body
60
61 cmp.eq p6,p0=in2,r0 // zero length?
62 mov retval=in0 // return dst
63(p6) br.ret.spnt.many rp // zero length, return immediately
64 ;;
65
66 mov dst=in0 // copy because of rotation
67 shr.u cnt=in2,3 // number of 8-byte words to copy
68 mov pr.rot=1<<16
69 ;;
70
71 adds cnt=-1,cnt // br.ctop is repeat/until
72 cmp.gtu p7,p0=16,in2 // copying less than 16 bytes?
73 mov ar.ec=N
74 ;;
75
76 and t0=0x7,t0
77 mov ar.lc=cnt
78 ;;
79 cmp.ne p6,p0=t0,r0
80
81 mov src=in1 // copy because of rotation
82(p7) br.cond.spnt.few .memcpy_short
83(p6) br.cond.spnt.few .memcpy_long
84 ;;
85 nop.m 0
86 ;;
87 nop.m 0
88 nop.i 0
89 ;;
90 nop.m 0
91 ;;
92 .rotr val[N]
93 .rotp p[N]
94 .align 32
951: { .mib
96(p[0]) ld8 val[0]=[src],8
97 nop.i 0
98 brp.loop.imp 1b, 2f
99}
1002: { .mfb
101(p[N-1])st8 [dst]=val[N-1],8
102 nop.f 0
103 br.ctop.dptk.few 1b
104}
105 ;;
106 mov ar.lc=saved_lc
107 mov pr=saved_pr,-1
108 mov ar.pfs=saved_pfs
109 br.ret.sptk.many rp
110
111 /*
112 * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time
113 * copy loop. This performs relatively poorly on Itanium, but it doesn't
114 * get used very often (gcc inlines small copies) and due to atomicity
115 * issues, we want to avoid read-modify-write of entire words.
116 */
117 .align 32
118.memcpy_short:
119 adds cnt=-1,in2 // br.ctop is repeat/until
120 mov ar.ec=MEM_LAT
121 brp.loop.imp 1f, 2f
122 ;;
123 mov ar.lc=cnt
124 ;;
125 nop.m 0
126 ;;
127 nop.m 0
128 nop.i 0
129 ;;
130 nop.m 0
131 ;;
132 nop.m 0
133 ;;
134 /*
135 * It is faster to put a stop bit in the loop here because it makes
136 * the pipeline shorter (and latency is what matters on short copies).
137 */
138 .align 32
1391: { .mib
140(p[0]) ld1 val[0]=[src],1
141 nop.i 0
142 brp.loop.imp 1b, 2f
143} ;;
1442: { .mfb
145(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1
146 nop.f 0
147 br.ctop.dptk.few 1b
148} ;;
149 mov ar.lc=saved_lc
150 mov pr=saved_pr,-1
151 mov ar.pfs=saved_pfs
152 br.ret.sptk.many rp
153
154 /*
155 * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't
156 * an overriding concern here, but throughput is. We first do
157 * sub-word copying until the destination is aligned, then we check
158 * if the source is also aligned. If so, we do a simple load/store-loop
159 * until there are less than 8 bytes left over and then we do the tail,
160 * by storing the last few bytes using sub-word copying. If the source
161 * is not aligned, we branch off to the non-congruent loop.
162 *
163 * stage: op:
164 * 0 ld
165 * :
166 * MEM_LAT+3 shrp
167 * MEM_LAT+4 st
168 *
169 * On Itanium, the pipeline itself runs without stalls. However, br.ctop
170 * seems to introduce an unavoidable bubble in the pipeline so the overall
171 * latency is 2 cycles/iteration. This gives us a _copy_ throughput
172 * of 4 byte/cycle. Still not bad.
173 */
174# undef N
175# undef Nrot
176# define N (MEM_LAT + 5) /* number of stages */
177# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */
178
179#define LOG_LOOP_SIZE 6
180
181.memcpy_long:
182 alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame
183 and t0=-8,src // t0 = src & ~7
184 and t2=7,src // t2 = src & 7
185 ;;
186 ld8 t0=[t0] // t0 = 1st source word
187 adds src2=7,src // src2 = (src + 7)
188 sub t4=r0,dst // t4 = -dst
189 ;;
190 and src2=-8,src2 // src2 = (src + 7) & ~7
191 shl t2=t2,3 // t2 = 8*(src & 7)
192 shl t4=t4,3 // t4 = 8*(dst & 7)
193 ;;
194 ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise
195 sub t3=64,t2 // t3 = 64-8*(src & 7)
196 shr.u t0=t0,t2
197 ;;
198 add src_end=src,in2
199 shl t1=t1,t3
200 mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7)
201 ;;
202 or t0=t0,t1
203 mov cnt=r0
204 adds src_end=-1,src_end
205 ;;
206(p3) st1 [dst]=t0,1
207(p3) shr.u t0=t0,8
208(p3) adds cnt=1,cnt
209 ;;
210(p4) st2 [dst]=t0,2
211(p4) shr.u t0=t0,16
212(p4) adds cnt=2,cnt
213 ;;
214(p5) st4 [dst]=t0,4
215(p5) adds cnt=4,cnt
216 and src_end=-8,src_end // src_end = last word of source buffer
217 ;;
218
219 // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy:
220
2211:{ add src=cnt,src // make src point to remainder of source buffer
222 sub cnt=in2,cnt // cnt = number of bytes left to copy
223 mov t4=ip
224 } ;;
225 and src2=-8,src // align source pointer
226 adds t4=.memcpy_loops-1b,t4
227 mov ar.ec=N
228
229 and t0=7,src // t0 = src & 7
230 shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy
231 shl cnt=cnt,3 // move bits 0-2 to 3-5
232 ;;
233
234 .rotr val[N+1], w[2]
235 .rotp p[N]
236
237 cmp.ne p6,p0=t0,r0 // is src aligned, too?
238 shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7)
239 adds t2=-1,t2 // br.ctop is repeat/until
240 ;;
241 add t4=t0,t4
242 mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy
243 mov ar.lc=t2
244 ;;
245 nop.m 0
246 ;;
247 nop.m 0
248 nop.i 0
249 ;;
250 nop.m 0
251 ;;
252(p6) ld8 val[1]=[src2],8 // prime the pump...
253 mov b6=t4
254 br.sptk.few b6
255 ;;
256
257.memcpy_tail:
258 // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is
259 // less than 8) and t0 contains the last few bytes of the src buffer:
260(p5) st4 [dst]=t0,4
261(p5) shr.u t0=t0,32
262 mov ar.lc=saved_lc
263 ;;
264(p4) st2 [dst]=t0,2
265(p4) shr.u t0=t0,16
266 mov ar.pfs=saved_pfs
267 ;;
268(p3) st1 [dst]=t0
269 mov pr=saved_pr,-1
270 br.ret.sptk.many rp
271
272///////////////////////////////////////////////////////
273 .align 64
274
275#define COPY(shift,index) \
276 1: { .mib \
277 (p[0]) ld8 val[0]=[src2],8; \
278 (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \
279 brp.loop.imp 1b, 2f \
280 }; \
281 2: { .mfb \
282 (p[MEM_LAT+4]) st8 [dst]=w[1],8; \
283 nop.f 0; \
284 br.ctop.dptk.few 1b; \
285 }; \
286 ;; \
287 ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \
288 ;; \
289 shrp t0=val[N-1],val[N-index],shift; \
290 br .memcpy_tail
291.memcpy_loops:
292 COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */
293 COPY(8, 0)
294 COPY(16, 0)
295 COPY(24, 0)
296 COPY(32, 0)
297 COPY(40, 0)
298 COPY(48, 0)
299 COPY(56, 0)
300
301END(memcpy)
diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S
new file mode 100644
index 000000000000..6f26ef7cc236
--- /dev/null
+++ b/arch/ia64/lib/memcpy_mck.S
@@ -0,0 +1,661 @@
1/*
2 * Itanium 2-optimized version of memcpy and copy_user function
3 *
4 * Inputs:
5 * in0: destination address
6 * in1: source address
7 * in2: number of bytes to copy
8 * Output:
9 * 0 if success, or number of byte NOT copied if error occurred.
10 *
11 * Copyright (C) 2002 Intel Corp.
12 * Copyright (C) 2002 Ken Chen <kenneth.w.chen@intel.com>
13 */
14#include <linux/config.h>
15#include <asm/asmmacro.h>
16#include <asm/page.h>
17
18#define EK(y...) EX(y)
19
20/* McKinley specific optimization */
21
22#define retval r8
23#define saved_pfs r31
24#define saved_lc r10
25#define saved_pr r11
26#define saved_in0 r14
27#define saved_in1 r15
28#define saved_in2 r16
29
30#define src0 r2
31#define src1 r3
32#define dst0 r17
33#define dst1 r18
34#define cnt r9
35
36/* r19-r30 are temp for each code section */
37#define PREFETCH_DIST 8
38#define src_pre_mem r19
39#define dst_pre_mem r20
40#define src_pre_l2 r21
41#define dst_pre_l2 r22
42#define t1 r23
43#define t2 r24
44#define t3 r25
45#define t4 r26
46#define t5 t1 // alias!
47#define t6 t2 // alias!
48#define t7 t3 // alias!
49#define n8 r27
50#define t9 t5 // alias!
51#define t10 t4 // alias!
52#define t11 t7 // alias!
53#define t12 t6 // alias!
54#define t14 t10 // alias!
55#define t13 r28
56#define t15 r29
57#define tmp r30
58
59/* defines for long_copy block */
60#define A 0
61#define B (PREFETCH_DIST)
62#define C (B + PREFETCH_DIST)
63#define D (C + 1)
64#define N (D + 1)
65#define Nrot ((N + 7) & ~7)
66
67/* alias */
68#define in0 r32
69#define in1 r33
70#define in2 r34
71
72GLOBAL_ENTRY(memcpy)
73 and r28=0x7,in0
74 and r29=0x7,in1
75 mov f6=f0
76 br.cond.sptk .common_code
77 ;;
78GLOBAL_ENTRY(__copy_user)
79 .prologue
80// check dest alignment
81 and r28=0x7,in0
82 and r29=0x7,in1
83 mov f6=f1
84 mov saved_in0=in0 // save dest pointer
85 mov saved_in1=in1 // save src pointer
86 mov saved_in2=in2 // save len
87 ;;
88.common_code:
89 cmp.gt p15,p0=8,in2 // check for small size
90 cmp.ne p13,p0=0,r28 // check dest alignment
91 cmp.ne p14,p0=0,r29 // check src alignment
92 add src0=0,in1
93 sub r30=8,r28 // for .align_dest
94 mov retval=r0 // initialize return value
95 ;;
96 add dst0=0,in0
97 add dst1=1,in0 // dest odd index
98 cmp.le p6,p0 = 1,r30 // for .align_dest
99(p15) br.cond.dpnt .memcpy_short
100(p13) br.cond.dpnt .align_dest
101(p14) br.cond.dpnt .unaligned_src
102 ;;
103
104// both dest and src are aligned on 8-byte boundary
105.aligned_src:
106 .save ar.pfs, saved_pfs
107 alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot
108 .save pr, saved_pr
109 mov saved_pr=pr
110
111 shr.u cnt=in2,7 // this much cache line
112 ;;
113 cmp.lt p6,p0=2*PREFETCH_DIST,cnt
114 cmp.lt p7,p8=1,cnt
115 .save ar.lc, saved_lc
116 mov saved_lc=ar.lc
117 .body
118 add cnt=-1,cnt
119 add src_pre_mem=0,in1 // prefetch src pointer
120 add dst_pre_mem=0,in0 // prefetch dest pointer
121 ;;
122(p7) mov ar.lc=cnt // prefetch count
123(p8) mov ar.lc=r0
124(p6) br.cond.dpnt .long_copy
125 ;;
126
127.prefetch:
128 lfetch.fault [src_pre_mem], 128
129 lfetch.fault.excl [dst_pre_mem], 128
130 br.cloop.dptk.few .prefetch
131 ;;
132
133.medium_copy:
134 and tmp=31,in2 // copy length after iteration
135 shr.u r29=in2,5 // number of 32-byte iteration
136 add dst1=8,dst0 // 2nd dest pointer
137 ;;
138 add cnt=-1,r29 // ctop iteration adjustment
139 cmp.eq p10,p0=r29,r0 // do we really need to loop?
140 add src1=8,src0 // 2nd src pointer
141 cmp.le p6,p0=8,tmp
142 ;;
143 cmp.le p7,p0=16,tmp
144 mov ar.lc=cnt // loop setup
145 cmp.eq p16,p17 = r0,r0
146 mov ar.ec=2
147(p10) br.dpnt.few .aligned_src_tail
148 ;;
149 TEXT_ALIGN(32)
1501:
151EX(.ex_handler, (p16) ld8 r34=[src0],16)
152EK(.ex_handler, (p16) ld8 r38=[src1],16)
153EX(.ex_handler, (p17) st8 [dst0]=r33,16)
154EK(.ex_handler, (p17) st8 [dst1]=r37,16)
155 ;;
156EX(.ex_handler, (p16) ld8 r32=[src0],16)
157EK(.ex_handler, (p16) ld8 r36=[src1],16)
158EX(.ex_handler, (p16) st8 [dst0]=r34,16)
159EK(.ex_handler, (p16) st8 [dst1]=r38,16)
160 br.ctop.dptk.few 1b
161 ;;
162
163.aligned_src_tail:
164EX(.ex_handler, (p6) ld8 t1=[src0])
165 mov ar.lc=saved_lc
166 mov ar.pfs=saved_pfs
167EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8)
168 cmp.le p8,p0=24,tmp
169 and r21=-8,tmp
170 ;;
171EX(.ex_hndlr_s, (p8) ld8 t3=[src1])
172EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1
173 and in2=7,tmp // remaining length
174EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2
175 add src0=src0,r21 // setting up src pointer
176 add dst0=dst0,r21 // setting up dest pointer
177 ;;
178EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3
179 mov pr=saved_pr,-1
180 br.dptk.many .memcpy_short
181 ;;
182
183/* code taken from copy_page_mck */
184.long_copy:
185 .rotr v[2*PREFETCH_DIST]
186 .rotp p[N]
187
188 mov src_pre_mem = src0
189 mov pr.rot = 0x10000
190 mov ar.ec = 1 // special unrolled loop
191
192 mov dst_pre_mem = dst0
193
194 add src_pre_l2 = 8*8, src0
195 add dst_pre_l2 = 8*8, dst0
196 ;;
197 add src0 = 8, src_pre_mem // first t1 src
198 mov ar.lc = 2*PREFETCH_DIST - 1
199 shr.u cnt=in2,7 // number of lines
200 add src1 = 3*8, src_pre_mem // first t3 src
201 add dst0 = 8, dst_pre_mem // first t1 dst
202 add dst1 = 3*8, dst_pre_mem // first t3 dst
203 ;;
204 and tmp=127,in2 // remaining bytes after this block
205 add cnt = -(2*PREFETCH_DIST) - 1, cnt
206 // same as .line_copy loop, but with all predicated-off instructions removed:
207.prefetch_loop:
208EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0
209EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2
210 br.ctop.sptk .prefetch_loop
211 ;;
212 cmp.eq p16, p0 = r0, r0 // reset p16 to 1
213 mov ar.lc = cnt
214 mov ar.ec = N // # of stages in pipeline
215 ;;
216.line_copy:
217EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0
218EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1
219EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory
220EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2
221 ;;
222EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory
223EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2
224EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2
225EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3
226 ;;
227EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8)
228EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8)
229EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8)
230EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8)
231 ;;
232EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8)
233EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8)
234EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8)
235EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8)
236 ;;
237EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8)
238EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8)
239EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8)
240EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8)
241 ;;
242EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8)
243EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8)
244EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8)
245EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8)
246 ;;
247EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8)
248EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8)
249EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8)
250EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8)
251 ;;
252EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8)
253EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8)
254EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8)
255EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8)
256 br.ctop.sptk .line_copy
257 ;;
258
259 add dst0=-8,dst0
260 add src0=-8,src0
261 mov in2=tmp
262 .restore sp
263 br.sptk.many .medium_copy
264 ;;
265
266#define BLOCK_SIZE 128*32
267#define blocksize r23
268#define curlen r24
269
270// dest is on 8-byte boundary, src is not. We need to do
271// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle.
272.unaligned_src:
273 .prologue
274 .save ar.pfs, saved_pfs
275 alloc saved_pfs=ar.pfs,3,5,0,8
276 .save ar.lc, saved_lc
277 mov saved_lc=ar.lc
278 .save pr, saved_pr
279 mov saved_pr=pr
280 .body
281.4k_block:
282 mov saved_in0=dst0 // need to save all input arguments
283 mov saved_in2=in2
284 mov blocksize=BLOCK_SIZE
285 ;;
286 cmp.lt p6,p7=blocksize,in2
287 mov saved_in1=src0
288 ;;
289(p6) mov in2=blocksize
290 ;;
291 shr.u r21=in2,7 // this much cache line
292 shr.u r22=in2,4 // number of 16-byte iteration
293 and curlen=15,in2 // copy length after iteration
294 and r30=7,src0 // source alignment
295 ;;
296 cmp.lt p7,p8=1,r21
297 add cnt=-1,r21
298 ;;
299
300 add src_pre_mem=0,src0 // prefetch src pointer
301 add dst_pre_mem=0,dst0 // prefetch dest pointer
302 and src0=-8,src0 // 1st src pointer
303(p7) mov ar.lc = r21
304(p8) mov ar.lc = r0
305 ;;
306 TEXT_ALIGN(32)
3071: lfetch.fault [src_pre_mem], 128
308 lfetch.fault.excl [dst_pre_mem], 128
309 br.cloop.dptk.few 1b
310 ;;
311
312 shladd dst1=r22,3,dst0 // 2nd dest pointer
313 shladd src1=r22,3,src0 // 2nd src pointer
314 cmp.eq p8,p9=r22,r0 // do we really need to loop?
315 cmp.le p6,p7=8,curlen; // have at least 8 byte remaining?
316 add cnt=-1,r22 // ctop iteration adjustment
317 ;;
318EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer
319EK(.ex_handler, (p9) ld8 r37=[src1],8)
320(p8) br.dpnt.few .noloop
321 ;;
322
323// The jump address is calculated based on src alignment. The COPYU
324// macro below need to confine its size to power of two, so an entry
325// can be caulated using shl instead of an expensive multiply. The
326// size is then hard coded by the following #define to match the
327// actual size. This make it somewhat tedious when COPYU macro gets
328// changed and this need to be adjusted to match.
329#define LOOP_SIZE 6
3301:
331 mov r29=ip // jmp_table thread
332 mov ar.lc=cnt
333 ;;
334 add r29=.jump_table - 1b - (.jmp1-.jump_table), r29
335 shl r28=r30, LOOP_SIZE // jmp_table thread
336 mov ar.ec=2 // loop setup
337 ;;
338 add r29=r29,r28 // jmp_table thread
339 cmp.eq p16,p17=r0,r0
340 ;;
341 mov b6=r29 // jmp_table thread
342 ;;
343 br.cond.sptk.few b6
344
345// for 8-15 byte case
346// We will skip the loop, but need to replicate the side effect
347// that the loop produces.
348.noloop:
349EX(.ex_handler, (p6) ld8 r37=[src1],8)
350 add src0=8,src0
351(p6) shl r25=r30,3
352 ;;
353EX(.ex_handler, (p6) ld8 r27=[src1])
354(p6) shr.u r28=r37,r25
355(p6) sub r26=64,r25
356 ;;
357(p6) shl r27=r27,r26
358 ;;
359(p6) or r21=r28,r27
360
361.unaligned_src_tail:
362/* check if we have more than blocksize to copy, if so go back */
363 cmp.gt p8,p0=saved_in2,blocksize
364 ;;
365(p8) add dst0=saved_in0,blocksize
366(p8) add src0=saved_in1,blocksize
367(p8) sub in2=saved_in2,blocksize
368(p8) br.dpnt .4k_block
369 ;;
370
371/* we have up to 15 byte to copy in the tail.
372 * part of work is already done in the jump table code
373 * we are at the following state.
374 * src side:
375 *
376 * xxxxxx xx <----- r21 has xxxxxxxx already
377 * -------- -------- --------
378 * 0 8 16
379 * ^
380 * |
381 * src1
382 *
383 * dst
384 * -------- -------- --------
385 * ^
386 * |
387 * dst1
388 */
389EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy
390(p6) add curlen=-8,curlen // update length
391 mov ar.pfs=saved_pfs
392 ;;
393 mov ar.lc=saved_lc
394 mov pr=saved_pr,-1
395 mov in2=curlen // remaining length
396 mov dst0=dst1 // dest pointer
397 add src0=src1,r30 // forward by src alignment
398 ;;
399
400// 7 byte or smaller.
401.memcpy_short:
402 cmp.le p8,p9 = 1,in2
403 cmp.le p10,p11 = 2,in2
404 cmp.le p12,p13 = 3,in2
405 cmp.le p14,p15 = 4,in2
406 add src1=1,src0 // second src pointer
407 add dst1=1,dst0 // second dest pointer
408 ;;
409
410EX(.ex_handler_short, (p8) ld1 t1=[src0],2)
411EK(.ex_handler_short, (p10) ld1 t2=[src1],2)
412(p9) br.ret.dpnt rp // 0 byte copy
413 ;;
414
415EX(.ex_handler_short, (p8) st1 [dst0]=t1,2)
416EK(.ex_handler_short, (p10) st1 [dst1]=t2,2)
417(p11) br.ret.dpnt rp // 1 byte copy
418
419EX(.ex_handler_short, (p12) ld1 t3=[src0],2)
420EK(.ex_handler_short, (p14) ld1 t4=[src1],2)
421(p13) br.ret.dpnt rp // 2 byte copy
422 ;;
423
424 cmp.le p6,p7 = 5,in2
425 cmp.le p8,p9 = 6,in2
426 cmp.le p10,p11 = 7,in2
427
428EX(.ex_handler_short, (p12) st1 [dst0]=t3,2)
429EK(.ex_handler_short, (p14) st1 [dst1]=t4,2)
430(p15) br.ret.dpnt rp // 3 byte copy
431 ;;
432
433EX(.ex_handler_short, (p6) ld1 t5=[src0],2)
434EK(.ex_handler_short, (p8) ld1 t6=[src1],2)
435(p7) br.ret.dpnt rp // 4 byte copy
436 ;;
437
438EX(.ex_handler_short, (p6) st1 [dst0]=t5,2)
439EK(.ex_handler_short, (p8) st1 [dst1]=t6,2)
440(p9) br.ret.dptk rp // 5 byte copy
441
442EX(.ex_handler_short, (p10) ld1 t7=[src0],2)
443(p11) br.ret.dptk rp // 6 byte copy
444 ;;
445
446EX(.ex_handler_short, (p10) st1 [dst0]=t7,2)
447 br.ret.dptk rp // done all cases
448
449
450/* Align dest to nearest 8-byte boundary. We know we have at
451 * least 7 bytes to copy, enough to crawl to 8-byte boundary.
452 * Actual number of byte to crawl depend on the dest alignment.
453 * 7 byte or less is taken care at .memcpy_short
454
455 * src0 - source even index
456 * src1 - source odd index
457 * dst0 - dest even index
458 * dst1 - dest odd index
459 * r30 - distance to 8-byte boundary
460 */
461
462.align_dest:
463 add src1=1,in1 // source odd index
464 cmp.le p7,p0 = 2,r30 // for .align_dest
465 cmp.le p8,p0 = 3,r30 // for .align_dest
466EX(.ex_handler_short, (p6) ld1 t1=[src0],2)
467 cmp.le p9,p0 = 4,r30 // for .align_dest
468 cmp.le p10,p0 = 5,r30
469 ;;
470EX(.ex_handler_short, (p7) ld1 t2=[src1],2)
471EK(.ex_handler_short, (p8) ld1 t3=[src0],2)
472 cmp.le p11,p0 = 6,r30
473EX(.ex_handler_short, (p6) st1 [dst0] = t1,2)
474 cmp.le p12,p0 = 7,r30
475 ;;
476EX(.ex_handler_short, (p9) ld1 t4=[src1],2)
477EK(.ex_handler_short, (p10) ld1 t5=[src0],2)
478EX(.ex_handler_short, (p7) st1 [dst1] = t2,2)
479EK(.ex_handler_short, (p8) st1 [dst0] = t3,2)
480 ;;
481EX(.ex_handler_short, (p11) ld1 t6=[src1],2)
482EK(.ex_handler_short, (p12) ld1 t7=[src0],2)
483 cmp.eq p6,p7=r28,r29
484EX(.ex_handler_short, (p9) st1 [dst1] = t4,2)
485EK(.ex_handler_short, (p10) st1 [dst0] = t5,2)
486 sub in2=in2,r30
487 ;;
488EX(.ex_handler_short, (p11) st1 [dst1] = t6,2)
489EK(.ex_handler_short, (p12) st1 [dst0] = t7)
490 add dst0=in0,r30 // setup arguments
491 add src0=in1,r30
492(p6) br.cond.dptk .aligned_src
493(p7) br.cond.dpnt .unaligned_src
494 ;;
495
496/* main loop body in jump table format */
497#define COPYU(shift) \
4981: \
499EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \
500EK(.ex_handler, (p16) ld8 r36=[src1],8); \
501 (p17) shrp r35=r33,r34,shift;; /* 1 */ \
502EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \
503 nop.m 0; \
504 (p16) shrp r38=r36,r37,shift; \
505EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \
506EK(.ex_handler, (p17) st8 [dst1]=r39,8); \
507 br.ctop.dptk.few 1b;; \
508 (p7) add src1=-8,src1; /* back out for <8 byte case */ \
509 shrp r21=r22,r38,shift; /* speculative work */ \
510 br.sptk.few .unaligned_src_tail /* branch out of jump table */ \
511 ;;
512 TEXT_ALIGN(32)
513.jump_table:
514 COPYU(8) // unaligned cases
515.jmp1:
516 COPYU(16)
517 COPYU(24)
518 COPYU(32)
519 COPYU(40)
520 COPYU(48)
521 COPYU(56)
522
523#undef A
524#undef B
525#undef C
526#undef D
527END(memcpy)
528
529/*
530 * Due to lack of local tag support in gcc 2.x assembler, it is not clear which
531 * instruction failed in the bundle. The exception algorithm is that we
532 * first figure out the faulting address, then detect if there is any
533 * progress made on the copy, if so, redo the copy from last known copied
534 * location up to the faulting address (exclusive). In the copy_from_user
535 * case, remaining byte in kernel buffer will be zeroed.
536 *
537 * Take copy_from_user as an example, in the code there are multiple loads
538 * in a bundle and those multiple loads could span over two pages, the
539 * faulting address is calculated as page_round_down(max(src0, src1)).
540 * This is based on knowledge that if we can access one byte in a page, we
541 * can access any byte in that page.
542 *
543 * predicate used in the exception handler:
544 * p6-p7: direction
545 * p10-p11: src faulting addr calculation
546 * p12-p13: dst faulting addr calculation
547 */
548
549#define A r19
550#define B r20
551#define C r21
552#define D r22
553#define F r28
554
555#define memset_arg0 r32
556#define memset_arg2 r33
557
558#define saved_retval loc0
559#define saved_rtlink loc1
560#define saved_pfs_stack loc2
561
562.ex_hndlr_s:
563 add src0=8,src0
564 br.sptk .ex_handler
565 ;;
566.ex_hndlr_d:
567 add dst0=8,dst0
568 br.sptk .ex_handler
569 ;;
570.ex_hndlr_lcpy_1:
571 mov src1=src_pre_mem
572 mov dst1=dst_pre_mem
573 cmp.gtu p10,p11=src_pre_mem,saved_in1
574 cmp.gtu p12,p13=dst_pre_mem,saved_in0
575 ;;
576(p10) add src0=8,saved_in1
577(p11) mov src0=saved_in1
578(p12) add dst0=8,saved_in0
579(p13) mov dst0=saved_in0
580 br.sptk .ex_handler
581.ex_handler_lcpy:
582 // in line_copy block, the preload addresses should always ahead
583 // of the other two src/dst pointers. Furthermore, src1/dst1 should
584 // always ahead of src0/dst0.
585 mov src1=src_pre_mem
586 mov dst1=dst_pre_mem
587.ex_handler:
588 mov pr=saved_pr,-1 // first restore pr, lc, and pfs
589 mov ar.lc=saved_lc
590 mov ar.pfs=saved_pfs
591 ;;
592.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs
593 cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction
594 cmp.ltu p10,p11=src0,src1
595 cmp.ltu p12,p13=dst0,dst1
596 fcmp.eq p8,p0=f6,f0 // is it memcpy?
597 mov tmp = dst0
598 ;;
599(p11) mov src1 = src0 // pick the larger of the two
600(p13) mov dst0 = dst1 // make dst0 the smaller one
601(p13) mov dst1 = tmp // and dst1 the larger one
602 ;;
603(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary
604(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary
605 ;;
606(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store
607(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load
608 mov retval=saved_in2
609(p8) ld1 tmp=[src1] // force an oops for memcpy call
610(p8) st1 [dst1]=r0 // force an oops for memcpy call
611(p14) br.ret.sptk.many rp
612
613/*
614 * The remaining byte to copy is calculated as:
615 *
616 * A = (faulting_addr - orig_src) -> len to faulting ld address
617 * or
618 * (faulting_addr - orig_dst) -> len to faulting st address
619 * B = (cur_dst - orig_dst) -> len copied so far
620 * C = A - B -> len need to be copied
621 * D = orig_len - A -> len need to be zeroed
622 */
623(p6) sub A = F, saved_in0
624(p7) sub A = F, saved_in1
625 clrrrb
626 ;;
627 alloc saved_pfs_stack=ar.pfs,3,3,3,0
628 sub B = dst0, saved_in0 // how many byte copied so far
629 ;;
630 sub C = A, B
631 sub D = saved_in2, A
632 ;;
633 cmp.gt p8,p0=C,r0 // more than 1 byte?
634 add memset_arg0=saved_in0, A
635(p6) mov memset_arg2=0 // copy_to_user should not call memset
636(p7) mov memset_arg2=D // copy_from_user need to have kbuf zeroed
637 mov r8=0
638 mov saved_retval = D
639 mov saved_rtlink = b0
640
641 add out0=saved_in0, B
642 add out1=saved_in1, B
643 mov out2=C
644(p8) br.call.sptk.few b0=__copy_user // recursive call
645 ;;
646
647 add saved_retval=saved_retval,r8 // above might return non-zero value
648 cmp.gt p8,p0=memset_arg2,r0 // more than 1 byte?
649 mov out0=memset_arg0 // *s
650 mov out1=r0 // c
651 mov out2=memset_arg2 // n
652(p8) br.call.sptk.few b0=memset
653 ;;
654
655 mov retval=saved_retval
656 mov ar.pfs=saved_pfs_stack
657 mov b0=saved_rtlink
658 br.ret.sptk.many rp
659
660/* end of McKinley specific optimization */
661END(__copy_user)
diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S
new file mode 100644
index 000000000000..bd8cf907fe22
--- /dev/null
+++ b/arch/ia64/lib/memset.S
@@ -0,0 +1,362 @@
1/* Optimized version of the standard memset() function.
2
3 Copyright (c) 2002 Hewlett-Packard Co/CERN
4 Sverre Jarp <Sverre.Jarp@cern.ch>
5
6 Return: dest
7
8 Inputs:
9 in0: dest
10 in1: value
11 in2: count
12
13 The algorithm is fairly straightforward: set byte by byte until we
14 we get to a 16B-aligned address, then loop on 128 B chunks using an
15 early store as prefetching, then loop on 32B chucks, then clear remaining
16 words, finally clear remaining bytes.
17 Since a stf.spill f0 can store 16B in one go, we use this instruction
18 to get peak speed when value = 0. */
19
20#include <asm/asmmacro.h>
21#undef ret
22
23#define dest in0
24#define value in1
25#define cnt in2
26
27#define tmp r31
28#define save_lc r30
29#define ptr0 r29
30#define ptr1 r28
31#define ptr2 r27
32#define ptr3 r26
33#define ptr9 r24
34#define loopcnt r23
35#define linecnt r22
36#define bytecnt r21
37
38#define fvalue f6
39
40// This routine uses only scratch predicate registers (p6 - p15)
41#define p_scr p6 // default register for same-cycle branches
42#define p_nz p7
43#define p_zr p8
44#define p_unalgn p9
45#define p_y p11
46#define p_n p12
47#define p_yy p13
48#define p_nn p14
49
50#define MIN1 15
51#define MIN1P1HALF 8
52#define LINE_SIZE 128
53#define LSIZE_SH 7 // shift amount
54#define PREF_AHEAD 8
55
56GLOBAL_ENTRY(memset)
57{ .mmi
58 .prologue
59 alloc tmp = ar.pfs, 3, 0, 0, 0
60 .body
61 lfetch.nt1 [dest] //
62 .save ar.lc, save_lc
63 mov.i save_lc = ar.lc
64} { .mmi
65 mov ret0 = dest // return value
66 cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero
67 cmp.eq p_scr, p0 = cnt, r0
68;; }
69{ .mmi
70 and ptr2 = -(MIN1+1), dest // aligned address
71 and tmp = MIN1, dest // prepare to check for correct alignment
72 tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U)
73} { .mib
74 mov ptr1 = dest
75 mux1 value = value, @brcst // create 8 identical bytes in word
76(p_scr) br.ret.dpnt.many rp // return immediately if count = 0
77;; }
78{ .mib
79 cmp.ne p_unalgn, p0 = tmp, r0 //
80} { .mib
81 sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt
82 cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task?
83(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U)
84;; }
85{ .mmi
86(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment
87(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment
88(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ?
89;; }
90{ .mib
91(p_y) add cnt = -8, cnt //
92(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ?
93} { .mib
94(p_y) st8 [ptr2] = value,-4 //
95(p_n) add ptr2 = 4, ptr2 //
96;; }
97{ .mib
98(p_yy) add cnt = -4, cnt //
99(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ?
100} { .mib
101(p_yy) st4 [ptr2] = value,-2 //
102(p_nn) add ptr2 = 2, ptr2 //
103;; }
104{ .mmi
105 mov tmp = LINE_SIZE+1 // for compare
106(p_y) add cnt = -2, cnt //
107(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ?
108} { .mmi
109 setf.sig fvalue=value // transfer value to FLP side
110(p_y) st2 [ptr2] = value,-1 //
111(p_n) add ptr2 = 1, ptr2 //
112;; }
113
114{ .mmi
115(p_yy) st1 [ptr2] = value //
116 cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task?
117} { .mbb
118(p_yy) add cnt = -1, cnt //
119(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few
120;; }
121
122{ .mib
123 nop.m 0
124 shr.u linecnt = cnt, LSIZE_SH
125(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill
126;; }
127
128 TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later
129{ .mmi
130 and tmp = -(LINE_SIZE), cnt // compute end of range
131 mov ptr9 = ptr1 // used for prefetching
132 and cnt = (LINE_SIZE-1), cnt // remainder
133} { .mmi
134 mov loopcnt = PREF_AHEAD-1 // default prefetch loop
135 cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
136;; }
137{ .mmi
138(p_scr) add loopcnt = -1, linecnt //
139 add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores)
140 add ptr1 = tmp, ptr1 // first address beyond total range
141;; }
142{ .mmi
143 add tmp = -1, linecnt // next loop count
144 mov.i ar.lc = loopcnt //
145;; }
146.pref_l1a:
147{ .mib
148 stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart
149 nop.i 0
150 br.cloop.dptk.few .pref_l1a
151;; }
152{ .mmi
153 add ptr0 = 16, ptr2 // Two stores in parallel
154 mov.i ar.lc = tmp //
155;; }
156.l1ax:
157 { .mmi
158 stf8 [ptr2] = fvalue, 8
159 stf8 [ptr0] = fvalue, 8
160 ;; }
161 { .mmi
162 stf8 [ptr2] = fvalue, 24
163 stf8 [ptr0] = fvalue, 24
164 ;; }
165 { .mmi
166 stf8 [ptr2] = fvalue, 8
167 stf8 [ptr0] = fvalue, 8
168 ;; }
169 { .mmi
170 stf8 [ptr2] = fvalue, 24
171 stf8 [ptr0] = fvalue, 24
172 ;; }
173 { .mmi
174 stf8 [ptr2] = fvalue, 8
175 stf8 [ptr0] = fvalue, 8
176 ;; }
177 { .mmi
178 stf8 [ptr2] = fvalue, 24
179 stf8 [ptr0] = fvalue, 24
180 ;; }
181 { .mmi
182 stf8 [ptr2] = fvalue, 8
183 stf8 [ptr0] = fvalue, 32
184 cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
185 ;; }
186{ .mmb
187 stf8 [ptr2] = fvalue, 24
188(p_scr) stf8 [ptr9] = fvalue, 128
189 br.cloop.dptk.few .l1ax
190;; }
191{ .mbb
192 cmp.le p_scr, p0 = 8, cnt // just a few bytes left ?
193(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2
194 br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3
195;; }
196
197 TEXT_ALIGN(32)
198.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later
199{ .mmi
200 and tmp = -(LINE_SIZE), cnt // compute end of range
201 mov ptr9 = ptr1 // used for prefetching
202 and cnt = (LINE_SIZE-1), cnt // remainder
203} { .mmi
204 mov loopcnt = PREF_AHEAD-1 // default prefetch loop
205 cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value
206;; }
207{ .mmi
208(p_scr) add loopcnt = -1, linecnt
209 add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores)
210 add ptr1 = tmp, ptr1 // first address beyond total range
211;; }
212{ .mmi
213 add tmp = -1, linecnt // next loop count
214 mov.i ar.lc = loopcnt
215;; }
216.pref_l1b:
217{ .mib
218 stf.spill [ptr9] = f0, 128 // Do stores one cache line apart
219 nop.i 0
220 br.cloop.dptk.few .pref_l1b
221;; }
222{ .mmi
223 add ptr0 = 16, ptr2 // Two stores in parallel
224 mov.i ar.lc = tmp
225;; }
226.l1bx:
227 { .mmi
228 stf.spill [ptr2] = f0, 32
229 stf.spill [ptr0] = f0, 32
230 ;; }
231 { .mmi
232 stf.spill [ptr2] = f0, 32
233 stf.spill [ptr0] = f0, 32
234 ;; }
235 { .mmi
236 stf.spill [ptr2] = f0, 32
237 stf.spill [ptr0] = f0, 64
238 cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching?
239 ;; }
240{ .mmb
241 stf.spill [ptr2] = f0, 32
242(p_scr) stf.spill [ptr9] = f0, 128
243 br.cloop.dptk.few .l1bx
244;; }
245{ .mib
246 cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
247(p_scr) br.cond.dpnt.many .move_bytes_from_alignment //
248;; }
249
250.fraction_of_line:
251{ .mib
252 add ptr2 = 16, ptr1
253 shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32
254;; }
255{ .mib
256 cmp.eq p_scr, p0 = loopcnt, r0
257 add loopcnt = -1, loopcnt
258(p_scr) br.cond.dpnt.many .store_words
259;; }
260{ .mib
261 and cnt = 0x1f, cnt // compute the remaining cnt
262 mov.i ar.lc = loopcnt
263;; }
264 TEXT_ALIGN(32)
265.l2: // ------------------------------------ // L2A: store 32B in 2 cycles
266{ .mmb
267 stf8 [ptr1] = fvalue, 8
268 stf8 [ptr2] = fvalue, 8
269;; } { .mmb
270 stf8 [ptr1] = fvalue, 24
271 stf8 [ptr2] = fvalue, 24
272 br.cloop.dptk.many .l2
273;; }
274.store_words:
275{ .mib
276 cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ?
277(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch
278;; }
279
280{ .mmi
281 stf8 [ptr1] = fvalue, 8 // store
282 cmp.le p_y, p_n = 16, cnt
283 add cnt = -8, cnt // subtract
284;; }
285{ .mmi
286(p_y) stf8 [ptr1] = fvalue, 8 // store
287(p_y) cmp.le.unc p_yy, p_nn = 16, cnt
288(p_y) add cnt = -8, cnt // subtract
289;; }
290{ .mmi // store
291(p_yy) stf8 [ptr1] = fvalue, 8
292(p_yy) add cnt = -8, cnt // subtract
293;; }
294
295.move_bytes_from_alignment:
296{ .mib
297 cmp.eq p_scr, p0 = cnt, r0
298 tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ?
299(p_scr) br.cond.dpnt.few .restore_and_exit
300;; }
301{ .mib
302(p_y) st4 [ptr1] = value,4
303 tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ?
304;; }
305{ .mib
306(p_yy) st2 [ptr1] = value,2
307 tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ?
308;; }
309
310{ .mib
311(p_y) st1 [ptr1] = value
312;; }
313.restore_and_exit:
314{ .mib
315 nop.m 0
316 mov.i ar.lc = save_lc
317 br.ret.sptk.many rp
318;; }
319
320.move_bytes_unaligned:
321{ .mmi
322 .pred.rel "mutex",p_y, p_n
323 .pred.rel "mutex",p_yy, p_nn
324(p_n) cmp.le p_yy, p_nn = 4, cnt
325(p_y) cmp.le p_yy, p_nn = 5, cnt
326(p_n) add ptr2 = 2, ptr1
327} { .mmi
328(p_y) add ptr2 = 3, ptr1
329(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left]
330(p_y) add cnt = -1, cnt
331;; }
332{ .mmi
333(p_yy) cmp.le.unc p_y, p0 = 8, cnt
334 add ptr3 = ptr1, cnt // prepare last store
335 mov.i ar.lc = save_lc
336} { .mmi
337(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
338(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left]
339(p_yy) add cnt = -4, cnt
340;; }
341{ .mmi
342(p_y) cmp.le.unc p_yy, p0 = 8, cnt
343 add ptr3 = -1, ptr3 // last store
344 tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ?
345} { .mmi
346(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
347(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left]
348(p_y) add cnt = -4, cnt
349;; }
350{ .mmi
351(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes
352(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left]
353 tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ?
354} { .mmi
355(p_yy) add cnt = -4, cnt
356;; }
357{ .mmb
358(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes
359(p_y) st1 [ptr3] = value // fill last byte (using ptr3)
360 br.ret.sptk.many rp
361}
362END(memset)
diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S
new file mode 100644
index 000000000000..e0cdac0a85b8
--- /dev/null
+++ b/arch/ia64/lib/strlen.S
@@ -0,0 +1,192 @@
1/*
2 *
3 * Optimized version of the standard strlen() function
4 *
5 *
6 * Inputs:
7 * in0 address of string
8 *
9 * Outputs:
10 * ret0 the number of characters in the string (0 if empty string)
11 * does not count the \0
12 *
13 * Copyright (C) 1999, 2001 Hewlett-Packard Co
14 * Stephane Eranian <eranian@hpl.hp.com>
15 *
16 * 09/24/99 S.Eranian add speculation recovery code
17 */
18
19#include <asm/asmmacro.h>
20
21//
22//
23// This is an enhanced version of the basic strlen. it includes a combination
24// of compute zero index (czx), parallel comparisons, speculative loads and
25// loop unroll using rotating registers.
26//
27// General Ideas about the algorithm:
28// The goal is to look at the string in chunks of 8 bytes.
29// so we need to do a few extra checks at the beginning because the
30// string may not be 8-byte aligned. In this case we load the 8byte
31// quantity which includes the start of the string and mask the unused
32// bytes with 0xff to avoid confusing czx.
33// We use speculative loads and software pipelining to hide memory
34// latency and do read ahead safely. This way we defer any exception.
35//
36// Because we don't want the kernel to be relying on particular
37// settings of the DCR register, we provide recovery code in case
38// speculation fails. The recovery code is going to "redo" the work using
39// only normal loads. If we still get a fault then we generate a
40// kernel panic. Otherwise we return the strlen as usual.
41//
42// The fact that speculation may fail can be caused, for instance, by
43// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
44// a NaT bit will be set if the translation is not present. The normal
45// load, on the other hand, will cause the translation to be inserted
46// if the mapping exists.
47//
48// It should be noted that we execute recovery code only when we need
49// to use the data that has been speculatively loaded: we don't execute
50// recovery code on pure read ahead data.
51//
52// Remarks:
53// - the cmp r0,r0 is used as a fast way to initialize a predicate
54// register to 1. This is required to make sure that we get the parallel
55// compare correct.
56//
57// - we don't use the epilogue counter to exit the loop but we need to set
58// it to zero beforehand.
59//
60// - after the loop we must test for Nat values because neither the
61// czx nor cmp instruction raise a NaT consumption fault. We must be
62// careful not to look too far for a Nat for which we don't care.
63// For instance we don't need to look at a NaT in val2 if the zero byte
64// was in val1.
65//
66// - Clearly performance tuning is required.
67//
68//
69//
70#define saved_pfs r11
71#define tmp r10
72#define base r16
73#define orig r17
74#define saved_pr r18
75#define src r19
76#define mask r20
77#define val r21
78#define val1 r22
79#define val2 r23
80
81GLOBAL_ENTRY(strlen)
82 .prologue
83 .save ar.pfs, saved_pfs
84 alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8
85
86 .rotr v[2], w[2] // declares our 4 aliases
87
88 extr.u tmp=in0,0,3 // tmp=least significant 3 bits
89 mov orig=in0 // keep trackof initial byte address
90 dep src=0,in0,0,3 // src=8byte-aligned in0 address
91 .save pr, saved_pr
92 mov saved_pr=pr // preserve predicates (rotation)
93 ;;
94
95 .body
96
97 ld8 v[1]=[src],8 // must not speculate: can fail here
98 shl tmp=tmp,3 // multiply by 8bits/byte
99 mov mask=-1 // our mask
100 ;;
101 ld8.s w[1]=[src],8 // speculatively load next
102 cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and
103 sub tmp=64,tmp // how many bits to shift our mask on the right
104 ;;
105 shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part
106 mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs)
107 ;;
108 add base=-16,src // keep track of aligned base
109 or v[1]=v[1],mask // now we have a safe initial byte pattern
110 ;;
1111:
112 ld8.s v[0]=[src],8 // speculatively load next
113 czx1.r val1=v[1] // search 0 byte from right
114 czx1.r val2=w[1] // search 0 byte from right following 8bytes
115 ;;
116 ld8.s w[0]=[src],8 // speculatively load next to next
117 cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
118 cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
119(p6) br.wtop.dptk 1b // loop until p6 == 0
120 ;;
121 //
122 // We must return try the recovery code iff
123 // val1_is_nat || (val1==8 && val2_is_nat)
124 //
125 // XXX Fixme
126 // - there must be a better way of doing the test
127 //
128 cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
129 tnat.nz p6,p7=val1 // test NaT on val1
130(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT
131 ;;
132 //
133 // if we come here p7 is true, i.e., initialized for // cmp
134 //
135 cmp.eq.and p7,p0=8,val1// val1==8?
136 tnat.nz.and p7,p0=val2 // test NaT if val2
137(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT
138 ;;
139(p8) mov val1=val2 // the other test got us out of the loop
140(p8) adds src=-16,src // correct position when 3 ahead
141(p9) adds src=-24,src // correct position when 4 ahead
142 ;;
143 sub ret0=src,orig // distance from base
144 sub tmp=8,val1 // which byte in word
145 mov pr=saved_pr,0xffffffffffff0000
146 ;;
147 sub ret0=ret0,tmp // adjust
148 mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
149 br.ret.sptk.many rp // end of normal execution
150
151 //
152 // Outlined recovery code when speculation failed
153 //
154 // This time we don't use speculation and rely on the normal exception
155 // mechanism. that's why the loop is not as good as the previous one
156 // because read ahead is not possible
157 //
158 // IMPORTANT:
159 // Please note that in the case of strlen() as opposed to strlen_user()
160 // we don't use the exception mechanism, as this function is not
161 // supposed to fail. If that happens it means we have a bug and the
162 // code will cause of kernel fault.
163 //
164 // XXX Fixme
165 // - today we restart from the beginning of the string instead
166 // of trying to continue where we left off.
167 //
168.recover:
169 ld8 val=[base],8 // will fail if unrecoverable fault
170 ;;
171 or val=val,mask // remask first bytes
172 cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop
173 ;;
174 //
175 // ar.ec is still zero here
176 //
1772:
178(p6) ld8 val=[base],8 // will fail if unrecoverable fault
179 ;;
180 czx1.r val1=val // search 0 byte from right
181 ;;
182 cmp.eq p6,p0=8,val1 // val1==8 ?
183(p6) br.wtop.dptk 2b // loop until p6 == 0
184 ;; // (avoid WAW on p63)
185 sub ret0=base,orig // distance from base
186 sub tmp=8,val1
187 mov pr=saved_pr,0xffffffffffff0000
188 ;;
189 sub ret0=ret0,tmp // length=now - back -1
190 mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
191 br.ret.sptk.many rp // end of successful recovery code
192END(strlen)
diff --git a/arch/ia64/lib/strlen_user.S b/arch/ia64/lib/strlen_user.S
new file mode 100644
index 000000000000..c71eded4285e
--- /dev/null
+++ b/arch/ia64/lib/strlen_user.S
@@ -0,0 +1,198 @@
1/*
2 * Optimized version of the strlen_user() function
3 *
4 * Inputs:
5 * in0 address of buffer
6 *
7 * Outputs:
8 * ret0 0 in case of fault, strlen(buffer)+1 otherwise
9 *
10 * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co
11 * David Mosberger-Tang <davidm@hpl.hp.com>
12 * Stephane Eranian <eranian@hpl.hp.com>
13 *
14 * 01/19/99 S.Eranian heavily enhanced version (see details below)
15 * 09/24/99 S.Eranian added speculation recovery code
16 */
17
18#include <asm/asmmacro.h>
19
20//
21// int strlen_user(char *)
22// ------------------------
23// Returns:
24// - length of string + 1
25// - 0 in case an exception is raised
26//
27// This is an enhanced version of the basic strlen_user. it includes a
28// combination of compute zero index (czx), parallel comparisons, speculative
29// loads and loop unroll using rotating registers.
30//
31// General Ideas about the algorithm:
32// The goal is to look at the string in chunks of 8 bytes.
33// so we need to do a few extra checks at the beginning because the
34// string may not be 8-byte aligned. In this case we load the 8byte
35// quantity which includes the start of the string and mask the unused
36// bytes with 0xff to avoid confusing czx.
37// We use speculative loads and software pipelining to hide memory
38// latency and do read ahead safely. This way we defer any exception.
39//
40// Because we don't want the kernel to be relying on particular
41// settings of the DCR register, we provide recovery code in case
42// speculation fails. The recovery code is going to "redo" the work using
43// only normal loads. If we still get a fault then we return an
44// error (ret0=0). Otherwise we return the strlen+1 as usual.
45// The fact that speculation may fail can be caused, for instance, by
46// the DCR.dm bit being set. In this case TLB misses are deferred, i.e.,
47// a NaT bit will be set if the translation is not present. The normal
48// load, on the other hand, will cause the translation to be inserted
49// if the mapping exists.
50//
51// It should be noted that we execute recovery code only when we need
52// to use the data that has been speculatively loaded: we don't execute
53// recovery code on pure read ahead data.
54//
55// Remarks:
56// - the cmp r0,r0 is used as a fast way to initialize a predicate
57// register to 1. This is required to make sure that we get the parallel
58// compare correct.
59//
60// - we don't use the epilogue counter to exit the loop but we need to set
61// it to zero beforehand.
62//
63// - after the loop we must test for Nat values because neither the
64// czx nor cmp instruction raise a NaT consumption fault. We must be
65// careful not to look too far for a Nat for which we don't care.
66// For instance we don't need to look at a NaT in val2 if the zero byte
67// was in val1.
68//
69// - Clearly performance tuning is required.
70//
71
72#define saved_pfs r11
73#define tmp r10
74#define base r16
75#define orig r17
76#define saved_pr r18
77#define src r19
78#define mask r20
79#define val r21
80#define val1 r22
81#define val2 r23
82
83GLOBAL_ENTRY(__strlen_user)
84 .prologue
85 .save ar.pfs, saved_pfs
86 alloc saved_pfs=ar.pfs,11,0,0,8
87
88 .rotr v[2], w[2] // declares our 4 aliases
89
90 extr.u tmp=in0,0,3 // tmp=least significant 3 bits
91 mov orig=in0 // keep trackof initial byte address
92 dep src=0,in0,0,3 // src=8byte-aligned in0 address
93 .save pr, saved_pr
94 mov saved_pr=pr // preserve predicates (rotation)
95 ;;
96
97 .body
98
99 ld8.s v[1]=[src],8 // load the initial 8bytes (must speculate)
100 shl tmp=tmp,3 // multiply by 8bits/byte
101 mov mask=-1 // our mask
102 ;;
103 ld8.s w[1]=[src],8 // load next 8 bytes in 2nd pipeline
104 cmp.eq p6,p0=r0,r0 // sets p6 (required because of // cmp.and)
105 sub tmp=64,tmp // how many bits to shift our mask on the right
106 ;;
107 shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part
108 mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs)
109 ;;
110 add base=-16,src // keep track of aligned base
111 chk.s v[1], .recover // if already NaT, then directly skip to recover
112 or v[1]=v[1],mask // now we have a safe initial byte pattern
113 ;;
1141:
115 ld8.s v[0]=[src],8 // speculatively load next
116 czx1.r val1=v[1] // search 0 byte from right
117 czx1.r val2=w[1] // search 0 byte from right following 8bytes
118 ;;
119 ld8.s w[0]=[src],8 // speculatively load next to next
120 cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8
121 cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8
122(p6) br.wtop.dptk.few 1b // loop until p6 == 0
123 ;;
124 //
125 // We must return try the recovery code iff
126 // val1_is_nat || (val1==8 && val2_is_nat)
127 //
128 // XXX Fixme
129 // - there must be a better way of doing the test
130 //
131 cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate)
132 tnat.nz p6,p7=val1 // test NaT on val1
133(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT
134 ;;
135 //
136 // if we come here p7 is true, i.e., initialized for // cmp
137 //
138 cmp.eq.and p7,p0=8,val1// val1==8?
139 tnat.nz.and p7,p0=val2 // test NaT if val2
140(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT
141 ;;
142(p8) mov val1=val2 // val2 contains the value
143(p8) adds src=-16,src // correct position when 3 ahead
144(p9) adds src=-24,src // correct position when 4 ahead
145 ;;
146 sub ret0=src,orig // distance from origin
147 sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1
148 mov pr=saved_pr,0xffffffffffff0000
149 ;;
150 sub ret0=ret0,tmp // length=now - back -1
151 mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
152 br.ret.sptk.many rp // end of normal execution
153
154 //
155 // Outlined recovery code when speculation failed
156 //
157 // This time we don't use speculation and rely on the normal exception
158 // mechanism. that's why the loop is not as good as the previous one
159 // because read ahead is not possible
160 //
161 // XXX Fixme
162 // - today we restart from the beginning of the string instead
163 // of trying to continue where we left off.
164 //
165.recover:
166 EX(.Lexit1, ld8 val=[base],8) // load the initial bytes
167 ;;
168 or val=val,mask // remask first bytes
169 cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop
170 ;;
171 //
172 // ar.ec is still zero here
173 //
1742:
175 EX(.Lexit1, (p6) ld8 val=[base],8)
176 ;;
177 czx1.r val1=val // search 0 byte from right
178 ;;
179 cmp.eq p6,p0=8,val1 // val1==8 ?
180(p6) br.wtop.dptk.few 2b // loop until p6 == 0
181 ;;
182 sub ret0=base,orig // distance from base
183 sub tmp=7,val1 // 7=8-1 because this strlen returns strlen+1
184 mov pr=saved_pr,0xffffffffffff0000
185 ;;
186 sub ret0=ret0,tmp // length=now - back -1
187 mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
188 br.ret.sptk.many rp // end of successful recovery code
189
190 //
191 // We failed even on the normal load (called from exception handler)
192 //
193.Lexit1:
194 mov ret0=0
195 mov pr=saved_pr,0xffffffffffff0000
196 mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what
197 br.ret.sptk.many rp
198END(__strlen_user)
diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S
new file mode 100644
index 000000000000..a504381f31eb
--- /dev/null
+++ b/arch/ia64/lib/strncpy_from_user.S
@@ -0,0 +1,44 @@
1/*
2 * Just like strncpy() except that if a fault occurs during copying,
3 * -EFAULT is returned.
4 *
5 * Inputs:
6 * in0: address of destination buffer
7 * in1: address of string to be copied
8 * in2: length of buffer in bytes
9 * Outputs:
10 * r8: -EFAULT in case of fault or number of bytes copied if no fault
11 *
12 * Copyright (C) 1998-2001 Hewlett-Packard Co
13 * Copyright (C) 1998-2001 David Mosberger-Tang <davidm@hpl.hp.com>
14 *
15 * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by
16 * by Andreas Schwab <schwab@suse.de>).
17 */
18
19#include <asm/asmmacro.h>
20
21GLOBAL_ENTRY(__strncpy_from_user)
22 alloc r2=ar.pfs,3,0,0,0
23 mov r8=0
24 mov r9=in1
25 ;;
26 add r10=in1,in2
27 cmp.eq p6,p0=r0,in2
28(p6) br.ret.spnt.many rp
29
30 // XXX braindead copy loop---this needs to be optimized
31.Loop1:
32 EX(.Lexit, ld1 r8=[in1],1)
33 ;;
34 EX(.Lexit, st1 [in0]=r8,1)
35 cmp.ne p6,p7=r8,r0
36 ;;
37(p6) cmp.ne.unc p8,p0=in1,r10
38(p8) br.cond.dpnt.few .Loop1
39 ;;
40(p6) mov r8=in2 // buffer filled up---return buffer length
41(p7) sub r8=in1,r9,1 // return string length (excluding NUL character)
42[.Lexit:]
43 br.ret.sptk.many rp
44END(__strncpy_from_user)
diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S
new file mode 100644
index 000000000000..d09066b1e49d
--- /dev/null
+++ b/arch/ia64/lib/strnlen_user.S
@@ -0,0 +1,45 @@
1/*
2 * Returns 0 if exception before NUL or reaching the supplied limit (N),
3 * a value greater than N if the string is longer than the limit, else
4 * strlen.
5 *
6 * Inputs:
7 * in0: address of buffer
8 * in1: string length limit N
9 * Outputs:
10 * r8: 0 in case of fault, strlen(buffer)+1 otherwise
11 *
12 * Copyright (C) 1999, 2001 David Mosberger-Tang <davidm@hpl.hp.com>
13 */
14
15#include <asm/asmmacro.h>
16
17GLOBAL_ENTRY(__strnlen_user)
18 .prologue
19 alloc r2=ar.pfs,2,0,0,0
20 .save ar.lc, r16
21 mov r16=ar.lc // preserve ar.lc
22
23 .body
24
25 add r3=-1,in1
26 ;;
27 mov ar.lc=r3
28 mov r9=0
29 ;;
30 // XXX braindead strlen loop---this needs to be optimized
31.Loop1:
32 EXCLR(.Lexit, ld1 r8=[in0],1)
33 add r9=1,r9
34 ;;
35 cmp.eq p6,p0=r8,r0
36(p6) br.cond.dpnt .Lexit
37 br.cloop.dptk.few .Loop1
38
39 add r9=1,in1 // NUL not found---return N+1
40 ;;
41.Lexit:
42 mov r8=r9
43 mov ar.lc=r16 // restore ar.lc
44 br.ret.sptk.many rp
45END(__strnlen_user)
diff --git a/arch/ia64/lib/swiotlb.c b/arch/ia64/lib/swiotlb.c
new file mode 100644
index 000000000000..ab7b3ad99a7f
--- /dev/null
+++ b/arch/ia64/lib/swiotlb.c
@@ -0,0 +1,658 @@
1/*
2 * Dynamic DMA mapping support.
3 *
4 * This implementation is for IA-64 platforms that do not support
5 * I/O TLBs (aka DMA address translation hardware).
6 * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
7 * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
8 * Copyright (C) 2000, 2003 Hewlett-Packard Co
9 * David Mosberger-Tang <davidm@hpl.hp.com>
10 *
11 * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API.
12 * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid
13 * unnecessary i-cache flushing.
14 * 04/07/.. ak Better overflow handling. Assorted fixes.
15 */
16
17#include <linux/cache.h>
18#include <linux/mm.h>
19#include <linux/module.h>
20#include <linux/pci.h>
21#include <linux/spinlock.h>
22#include <linux/string.h>
23#include <linux/types.h>
24#include <linux/ctype.h>
25
26#include <asm/io.h>
27#include <asm/pci.h>
28#include <asm/dma.h>
29
30#include <linux/init.h>
31#include <linux/bootmem.h>
32
33#define OFFSET(val,align) ((unsigned long) \
34 ( (val) & ( (align) - 1)))
35
36#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
37#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
38
39/*
40 * Maximum allowable number of contiguous slabs to map,
41 * must be a power of 2. What is the appropriate value ?
42 * The complexity of {map,unmap}_single is linearly dependent on this value.
43 */
44#define IO_TLB_SEGSIZE 128
45
46/*
47 * log of the size of each IO TLB slab. The number of slabs is command line
48 * controllable.
49 */
50#define IO_TLB_SHIFT 11
51
52int swiotlb_force;
53
54/*
55 * Used to do a quick range check in swiotlb_unmap_single and
56 * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
57 * API.
58 */
59static char *io_tlb_start, *io_tlb_end;
60
61/*
62 * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
63 * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
64 */
65static unsigned long io_tlb_nslabs;
66
67/*
68 * When the IOMMU overflows we return a fallback buffer. This sets the size.
69 */
70static unsigned long io_tlb_overflow = 32*1024;
71
72void *io_tlb_overflow_buffer;
73
74/*
75 * This is a free list describing the number of free entries available from
76 * each index
77 */
78static unsigned int *io_tlb_list;
79static unsigned int io_tlb_index;
80
81/*
82 * We need to save away the original address corresponding to a mapped entry
83 * for the sync operations.
84 */
85static unsigned char **io_tlb_orig_addr;
86
87/*
88 * Protect the above data structures in the map and unmap calls
89 */
90static DEFINE_SPINLOCK(io_tlb_lock);
91
92static int __init
93setup_io_tlb_npages(char *str)
94{
95 if (isdigit(*str)) {
96 io_tlb_nslabs = simple_strtoul(str, &str, 0) <<
97 (PAGE_SHIFT - IO_TLB_SHIFT);
98 /* avoid tail segment of size < IO_TLB_SEGSIZE */
99 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
100 }
101 if (*str == ',')
102 ++str;
103 if (!strcmp(str, "force"))
104 swiotlb_force = 1;
105 return 1;
106}
107__setup("swiotlb=", setup_io_tlb_npages);
108/* make io_tlb_overflow tunable too? */
109
110/*
111 * Statically reserve bounce buffer space and initialize bounce buffer data
112 * structures for the software IO TLB used to implement the PCI DMA API.
113 */
114void
115swiotlb_init_with_default_size (size_t default_size)
116{
117 unsigned long i;
118
119 if (!io_tlb_nslabs) {
120 io_tlb_nslabs = (default_size >> PAGE_SHIFT);
121 io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
122 }
123
124 /*
125 * Get IO TLB memory from the low pages
126 */
127 io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs *
128 (1 << IO_TLB_SHIFT));
129 if (!io_tlb_start)
130 panic("Cannot allocate SWIOTLB buffer");
131 io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
132
133 /*
134 * Allocate and initialize the free list array. This array is used
135 * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
136 * between io_tlb_start and io_tlb_end.
137 */
138 io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
139 for (i = 0; i < io_tlb_nslabs; i++)
140 io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
141 io_tlb_index = 0;
142 io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(char *));
143
144 /*
145 * Get the overflow emergency buffer
146 */
147 io_tlb_overflow_buffer = alloc_bootmem_low(io_tlb_overflow);
148 printk(KERN_INFO "Placing software IO TLB between 0x%lx - 0x%lx\n",
149 virt_to_phys(io_tlb_start), virt_to_phys(io_tlb_end));
150}
151
152void
153swiotlb_init (void)
154{
155 swiotlb_init_with_default_size(64 * (1<<20)); /* default to 64MB */
156}
157
158static inline int
159address_needs_mapping(struct device *hwdev, dma_addr_t addr)
160{
161 dma_addr_t mask = 0xffffffff;
162 /* If the device has a mask, use it, otherwise default to 32 bits */
163 if (hwdev && hwdev->dma_mask)
164 mask = *hwdev->dma_mask;
165 return (addr & ~mask) != 0;
166}
167
168/*
169 * Allocates bounce buffer and returns its kernel virtual address.
170 */
171static void *
172map_single(struct device *hwdev, char *buffer, size_t size, int dir)
173{
174 unsigned long flags;
175 char *dma_addr;
176 unsigned int nslots, stride, index, wrap;
177 int i;
178
179 /*
180 * For mappings greater than a page, we limit the stride (and
181 * hence alignment) to a page size.
182 */
183 nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
184 if (size > PAGE_SIZE)
185 stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
186 else
187 stride = 1;
188
189 if (!nslots)
190 BUG();
191
192 /*
193 * Find suitable number of IO TLB entries size that will fit this
194 * request and allocate a buffer from that IO TLB pool.
195 */
196 spin_lock_irqsave(&io_tlb_lock, flags);
197 {
198 wrap = index = ALIGN(io_tlb_index, stride);
199
200 if (index >= io_tlb_nslabs)
201 wrap = index = 0;
202
203 do {
204 /*
205 * If we find a slot that indicates we have 'nslots'
206 * number of contiguous buffers, we allocate the
207 * buffers from that slot and mark the entries as '0'
208 * indicating unavailable.
209 */
210 if (io_tlb_list[index] >= nslots) {
211 int count = 0;
212
213 for (i = index; i < (int) (index + nslots); i++)
214 io_tlb_list[i] = 0;
215 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
216 io_tlb_list[i] = ++count;
217 dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
218
219 /*
220 * Update the indices to avoid searching in
221 * the next round.
222 */
223 io_tlb_index = ((index + nslots) < io_tlb_nslabs
224 ? (index + nslots) : 0);
225
226 goto found;
227 }
228 index += stride;
229 if (index >= io_tlb_nslabs)
230 index = 0;
231 } while (index != wrap);
232
233 spin_unlock_irqrestore(&io_tlb_lock, flags);
234 return NULL;
235 }
236 found:
237 spin_unlock_irqrestore(&io_tlb_lock, flags);
238
239 /*
240 * Save away the mapping from the original address to the DMA address.
241 * This is needed when we sync the memory. Then we sync the buffer if
242 * needed.
243 */
244 io_tlb_orig_addr[index] = buffer;
245 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
246 memcpy(dma_addr, buffer, size);
247
248 return dma_addr;
249}
250
251/*
252 * dma_addr is the kernel virtual address of the bounce buffer to unmap.
253 */
254static void
255unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
256{
257 unsigned long flags;
258 int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
259 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
260 char *buffer = io_tlb_orig_addr[index];
261
262 /*
263 * First, sync the memory before unmapping the entry
264 */
265 if (buffer && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
266 /*
267 * bounce... copy the data back into the original buffer * and
268 * delete the bounce buffer.
269 */
270 memcpy(buffer, dma_addr, size);
271
272 /*
273 * Return the buffer to the free list by setting the corresponding
274 * entries to indicate the number of contigous entries available.
275 * While returning the entries to the free list, we merge the entries
276 * with slots below and above the pool being returned.
277 */
278 spin_lock_irqsave(&io_tlb_lock, flags);
279 {
280 count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
281 io_tlb_list[index + nslots] : 0);
282 /*
283 * Step 1: return the slots to the free list, merging the
284 * slots with superceeding slots
285 */
286 for (i = index + nslots - 1; i >= index; i--)
287 io_tlb_list[i] = ++count;
288 /*
289 * Step 2: merge the returned slots with the preceding slots,
290 * if available (non zero)
291 */
292 for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
293 io_tlb_list[i] = ++count;
294 }
295 spin_unlock_irqrestore(&io_tlb_lock, flags);
296}
297
298static void
299sync_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
300{
301 int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
302 char *buffer = io_tlb_orig_addr[index];
303
304 /*
305 * bounce... copy the data back into/from the original buffer
306 * XXX How do you handle DMA_BIDIRECTIONAL here ?
307 */
308 if (dir == DMA_FROM_DEVICE)
309 memcpy(buffer, dma_addr, size);
310 else if (dir == DMA_TO_DEVICE)
311 memcpy(dma_addr, buffer, size);
312 else
313 BUG();
314}
315
316void *
317swiotlb_alloc_coherent(struct device *hwdev, size_t size,
318 dma_addr_t *dma_handle, int flags)
319{
320 unsigned long dev_addr;
321 void *ret;
322 int order = get_order(size);
323
324 /*
325 * XXX fix me: the DMA API should pass us an explicit DMA mask
326 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
327 * bit range instead of a 16MB one).
328 */
329 flags |= GFP_DMA;
330
331 ret = (void *)__get_free_pages(flags, order);
332 if (ret && address_needs_mapping(hwdev, virt_to_phys(ret))) {
333 /*
334 * The allocated memory isn't reachable by the device.
335 * Fall back on swiotlb_map_single().
336 */
337 free_pages((unsigned long) ret, order);
338 ret = NULL;
339 }
340 if (!ret) {
341 /*
342 * We are either out of memory or the device can't DMA
343 * to GFP_DMA memory; fall back on
344 * swiotlb_map_single(), which will grab memory from
345 * the lowest available address range.
346 */
347 dma_addr_t handle;
348 handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
349 if (dma_mapping_error(handle))
350 return NULL;
351
352 ret = phys_to_virt(handle);
353 }
354
355 memset(ret, 0, size);
356 dev_addr = virt_to_phys(ret);
357
358 /* Confirm address can be DMA'd by device */
359 if (address_needs_mapping(hwdev, dev_addr)) {
360 printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016lx\n",
361 (unsigned long long)*hwdev->dma_mask, dev_addr);
362 panic("swiotlb_alloc_coherent: allocated memory is out of "
363 "range for device");
364 }
365 *dma_handle = dev_addr;
366 return ret;
367}
368
369void
370swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
371 dma_addr_t dma_handle)
372{
373 if (!(vaddr >= (void *)io_tlb_start
374 && vaddr < (void *)io_tlb_end))
375 free_pages((unsigned long) vaddr, get_order(size));
376 else
377 /* DMA_TO_DEVICE to avoid memcpy in unmap_single */
378 swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
379}
380
381static void
382swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
383{
384 /*
385 * Ran out of IOMMU space for this operation. This is very bad.
386 * Unfortunately the drivers cannot handle this operation properly.
387 * unless they check for pci_dma_mapping_error (most don't)
388 * When the mapping is small enough return a static buffer to limit
389 * the damage, or panic when the transfer is too big.
390 */
391 printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %lu bytes at "
392 "device %s\n", size, dev ? dev->bus_id : "?");
393
394 if (size > io_tlb_overflow && do_panic) {
395 if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
396 panic("PCI-DMA: Memory would be corrupted\n");
397 if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
398 panic("PCI-DMA: Random memory would be DMAed\n");
399 }
400}
401
402/*
403 * Map a single buffer of the indicated size for DMA in streaming mode. The
404 * PCI address to use is returned.
405 *
406 * Once the device is given the dma address, the device owns this memory until
407 * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
408 */
409dma_addr_t
410swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
411{
412 unsigned long dev_addr = virt_to_phys(ptr);
413 void *map;
414
415 if (dir == DMA_NONE)
416 BUG();
417 /*
418 * If the pointer passed in happens to be in the device's DMA window,
419 * we can safely return the device addr and not worry about bounce
420 * buffering it.
421 */
422 if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
423 return dev_addr;
424
425 /*
426 * Oh well, have to allocate and map a bounce buffer.
427 */
428 map = map_single(hwdev, ptr, size, dir);
429 if (!map) {
430 swiotlb_full(hwdev, size, dir, 1);
431 map = io_tlb_overflow_buffer;
432 }
433
434 dev_addr = virt_to_phys(map);
435
436 /*
437 * Ensure that the address returned is DMA'ble
438 */
439 if (address_needs_mapping(hwdev, dev_addr))
440 panic("map_single: bounce buffer is not DMA'ble");
441
442 return dev_addr;
443}
444
445/*
446 * Since DMA is i-cache coherent, any (complete) pages that were written via
447 * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to
448 * flush them when they get mapped into an executable vm-area.
449 */
450static void
451mark_clean(void *addr, size_t size)
452{
453 unsigned long pg_addr, end;
454
455 pg_addr = PAGE_ALIGN((unsigned long) addr);
456 end = (unsigned long) addr + size;
457 while (pg_addr + PAGE_SIZE <= end) {
458 struct page *page = virt_to_page(pg_addr);
459 set_bit(PG_arch_1, &page->flags);
460 pg_addr += PAGE_SIZE;
461 }
462}
463
464/*
465 * Unmap a single streaming mode DMA translation. The dma_addr and size must
466 * match what was provided for in a previous swiotlb_map_single call. All
467 * other usages are undefined.
468 *
469 * After this call, reads by the cpu to the buffer are guaranteed to see
470 * whatever the device wrote there.
471 */
472void
473swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
474 int dir)
475{
476 char *dma_addr = phys_to_virt(dev_addr);
477
478 if (dir == DMA_NONE)
479 BUG();
480 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
481 unmap_single(hwdev, dma_addr, size, dir);
482 else if (dir == DMA_FROM_DEVICE)
483 mark_clean(dma_addr, size);
484}
485
486/*
487 * Make physical memory consistent for a single streaming mode DMA translation
488 * after a transfer.
489 *
490 * If you perform a swiotlb_map_single() but wish to interrogate the buffer
491 * using the cpu, yet do not wish to teardown the PCI dma mapping, you must
492 * call this function before doing so. At the next point you give the PCI dma
493 * address back to the card, you must first perform a
494 * swiotlb_dma_sync_for_device, and then the device again owns the buffer
495 */
496void
497swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
498 size_t size, int dir)
499{
500 char *dma_addr = phys_to_virt(dev_addr);
501
502 if (dir == DMA_NONE)
503 BUG();
504 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
505 sync_single(hwdev, dma_addr, size, dir);
506 else if (dir == DMA_FROM_DEVICE)
507 mark_clean(dma_addr, size);
508}
509
510void
511swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
512 size_t size, int dir)
513{
514 char *dma_addr = phys_to_virt(dev_addr);
515
516 if (dir == DMA_NONE)
517 BUG();
518 if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
519 sync_single(hwdev, dma_addr, size, dir);
520 else if (dir == DMA_FROM_DEVICE)
521 mark_clean(dma_addr, size);
522}
523
524/*
525 * Map a set of buffers described by scatterlist in streaming mode for DMA.
526 * This is the scatter-gather version of the above swiotlb_map_single
527 * interface. Here the scatter gather list elements are each tagged with the
528 * appropriate dma address and length. They are obtained via
529 * sg_dma_{address,length}(SG).
530 *
531 * NOTE: An implementation may be able to use a smaller number of
532 * DMA address/length pairs than there are SG table elements.
533 * (for example via virtual mapping capabilities)
534 * The routine returns the number of addr/length pairs actually
535 * used, at most nents.
536 *
537 * Device ownership issues as mentioned above for swiotlb_map_single are the
538 * same here.
539 */
540int
541swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
542 int dir)
543{
544 void *addr;
545 unsigned long dev_addr;
546 int i;
547
548 if (dir == DMA_NONE)
549 BUG();
550
551 for (i = 0; i < nelems; i++, sg++) {
552 addr = SG_ENT_VIRT_ADDRESS(sg);
553 dev_addr = virt_to_phys(addr);
554 if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
555 sg->dma_address = (dma_addr_t) virt_to_phys(map_single(hwdev, addr, sg->length, dir));
556 if (!sg->dma_address) {
557 /* Don't panic here, we expect map_sg users
558 to do proper error handling. */
559 swiotlb_full(hwdev, sg->length, dir, 0);
560 swiotlb_unmap_sg(hwdev, sg - i, i, dir);
561 sg[0].dma_length = 0;
562 return 0;
563 }
564 } else
565 sg->dma_address = dev_addr;
566 sg->dma_length = sg->length;
567 }
568 return nelems;
569}
570
571/*
572 * Unmap a set of streaming mode DMA translations. Again, cpu read rules
573 * concerning calls here are the same as for swiotlb_unmap_single() above.
574 */
575void
576swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems,
577 int dir)
578{
579 int i;
580
581 if (dir == DMA_NONE)
582 BUG();
583
584 for (i = 0; i < nelems; i++, sg++)
585 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
586 unmap_single(hwdev, (void *) phys_to_virt(sg->dma_address), sg->dma_length, dir);
587 else if (dir == DMA_FROM_DEVICE)
588 mark_clean(SG_ENT_VIRT_ADDRESS(sg), sg->dma_length);
589}
590
591/*
592 * Make physical memory consistent for a set of streaming mode DMA translations
593 * after a transfer.
594 *
595 * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
596 * and usage.
597 */
598void
599swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
600 int nelems, int dir)
601{
602 int i;
603
604 if (dir == DMA_NONE)
605 BUG();
606
607 for (i = 0; i < nelems; i++, sg++)
608 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
609 sync_single(hwdev, (void *) sg->dma_address,
610 sg->dma_length, dir);
611}
612
613void
614swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
615 int nelems, int dir)
616{
617 int i;
618
619 if (dir == DMA_NONE)
620 BUG();
621
622 for (i = 0; i < nelems; i++, sg++)
623 if (sg->dma_address != SG_ENT_PHYS_ADDRESS(sg))
624 sync_single(hwdev, (void *) sg->dma_address,
625 sg->dma_length, dir);
626}
627
628int
629swiotlb_dma_mapping_error(dma_addr_t dma_addr)
630{
631 return (dma_addr == virt_to_phys(io_tlb_overflow_buffer));
632}
633
634/*
635 * Return whether the given PCI device DMA address mask can be supported
636 * properly. For example, if your device can only drive the low 24-bits
637 * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
638 * this function.
639 */
640int
641swiotlb_dma_supported (struct device *hwdev, u64 mask)
642{
643 return (virt_to_phys (io_tlb_end) - 1) <= mask;
644}
645
646EXPORT_SYMBOL(swiotlb_init);
647EXPORT_SYMBOL(swiotlb_map_single);
648EXPORT_SYMBOL(swiotlb_unmap_single);
649EXPORT_SYMBOL(swiotlb_map_sg);
650EXPORT_SYMBOL(swiotlb_unmap_sg);
651EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
652EXPORT_SYMBOL(swiotlb_sync_single_for_device);
653EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
654EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
655EXPORT_SYMBOL(swiotlb_dma_mapping_error);
656EXPORT_SYMBOL(swiotlb_alloc_coherent);
657EXPORT_SYMBOL(swiotlb_free_coherent);
658EXPORT_SYMBOL(swiotlb_dma_supported);
diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S
new file mode 100644
index 000000000000..54e3f7eab8e9
--- /dev/null
+++ b/arch/ia64/lib/xor.S
@@ -0,0 +1,184 @@
1/*
2 * arch/ia64/lib/xor.S
3 *
4 * Optimized RAID-5 checksumming functions for IA-64.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2, or (at your option)
9 * any later version.
10 *
11 * You should have received a copy of the GNU General Public License
12 * (for example /usr/src/linux/COPYING); if not, write to the Free
13 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
14 */
15
16#include <asm/asmmacro.h>
17
18GLOBAL_ENTRY(xor_ia64_2)
19 .prologue
20 .fframe 0
21 .save ar.pfs, r31
22 alloc r31 = ar.pfs, 3, 0, 13, 16
23 .save ar.lc, r30
24 mov r30 = ar.lc
25 .save pr, r29
26 mov r29 = pr
27 ;;
28 .body
29 mov r8 = in1
30 mov ar.ec = 6 + 2
31 shr in0 = in0, 3
32 ;;
33 adds in0 = -1, in0
34 mov r16 = in1
35 mov r17 = in2
36 ;;
37 mov ar.lc = in0
38 mov pr.rot = 1 << 16
39 ;;
40 .rotr s1[6+1], s2[6+1], d[2]
41 .rotp p[6+2]
420:
43(p[0]) ld8.nta s1[0] = [r16], 8
44(p[0]) ld8.nta s2[0] = [r17], 8
45(p[6]) xor d[0] = s1[6], s2[6]
46(p[6+1])st8.nta [r8] = d[1], 8
47 nop.f 0
48 br.ctop.dptk.few 0b
49 ;;
50 mov ar.lc = r30
51 mov pr = r29, -1
52 br.ret.sptk.few rp
53END(xor_ia64_2)
54
55GLOBAL_ENTRY(xor_ia64_3)
56 .prologue
57 .fframe 0
58 .save ar.pfs, r31
59 alloc r31 = ar.pfs, 4, 0, 20, 24
60 .save ar.lc, r30
61 mov r30 = ar.lc
62 .save pr, r29
63 mov r29 = pr
64 ;;
65 .body
66 mov r8 = in1
67 mov ar.ec = 6 + 2
68 shr in0 = in0, 3
69 ;;
70 adds in0 = -1, in0
71 mov r16 = in1
72 mov r17 = in2
73 ;;
74 mov r18 = in3
75 mov ar.lc = in0
76 mov pr.rot = 1 << 16
77 ;;
78 .rotr s1[6+1], s2[6+1], s3[6+1], d[2]
79 .rotp p[6+2]
800:
81(p[0]) ld8.nta s1[0] = [r16], 8
82(p[0]) ld8.nta s2[0] = [r17], 8
83(p[6]) xor d[0] = s1[6], s2[6]
84 ;;
85(p[0]) ld8.nta s3[0] = [r18], 8
86(p[6+1])st8.nta [r8] = d[1], 8
87(p[6]) xor d[0] = d[0], s3[6]
88 br.ctop.dptk.few 0b
89 ;;
90 mov ar.lc = r30
91 mov pr = r29, -1
92 br.ret.sptk.few rp
93END(xor_ia64_3)
94
95GLOBAL_ENTRY(xor_ia64_4)
96 .prologue
97 .fframe 0
98 .save ar.pfs, r31
99 alloc r31 = ar.pfs, 5, 0, 27, 32
100 .save ar.lc, r30
101 mov r30 = ar.lc
102 .save pr, r29
103 mov r29 = pr
104 ;;
105 .body
106 mov r8 = in1
107 mov ar.ec = 6 + 2
108 shr in0 = in0, 3
109 ;;
110 adds in0 = -1, in0
111 mov r16 = in1
112 mov r17 = in2
113 ;;
114 mov r18 = in3
115 mov ar.lc = in0
116 mov pr.rot = 1 << 16
117 mov r19 = in4
118 ;;
119 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2]
120 .rotp p[6+2]
1210:
122(p[0]) ld8.nta s1[0] = [r16], 8
123(p[0]) ld8.nta s2[0] = [r17], 8
124(p[6]) xor d[0] = s1[6], s2[6]
125(p[0]) ld8.nta s3[0] = [r18], 8
126(p[0]) ld8.nta s4[0] = [r19], 8
127(p[6]) xor r20 = s3[6], s4[6]
128 ;;
129(p[6+1])st8.nta [r8] = d[1], 8
130(p[6]) xor d[0] = d[0], r20
131 br.ctop.dptk.few 0b
132 ;;
133 mov ar.lc = r30
134 mov pr = r29, -1
135 br.ret.sptk.few rp
136END(xor_ia64_4)
137
138GLOBAL_ENTRY(xor_ia64_5)
139 .prologue
140 .fframe 0
141 .save ar.pfs, r31
142 alloc r31 = ar.pfs, 6, 0, 34, 40
143 .save ar.lc, r30
144 mov r30 = ar.lc
145 .save pr, r29
146 mov r29 = pr
147 ;;
148 .body
149 mov r8 = in1
150 mov ar.ec = 6 + 2
151 shr in0 = in0, 3
152 ;;
153 adds in0 = -1, in0
154 mov r16 = in1
155 mov r17 = in2
156 ;;
157 mov r18 = in3
158 mov ar.lc = in0
159 mov pr.rot = 1 << 16
160 mov r19 = in4
161 mov r20 = in5
162 ;;
163 .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2]
164 .rotp p[6+2]
1650:
166(p[0]) ld8.nta s1[0] = [r16], 8
167(p[0]) ld8.nta s2[0] = [r17], 8
168(p[6]) xor d[0] = s1[6], s2[6]
169(p[0]) ld8.nta s3[0] = [r18], 8
170(p[0]) ld8.nta s4[0] = [r19], 8
171(p[6]) xor r21 = s3[6], s4[6]
172 ;;
173(p[0]) ld8.nta s5[0] = [r20], 8
174(p[6+1])st8.nta [r8] = d[1], 8
175(p[6]) xor d[0] = d[0], r21
176 ;;
177(p[6]) xor d[0] = d[0], s5[6]
178 nop.f 0
179 br.ctop.dptk.few 0b
180 ;;
181 mov ar.lc = r30
182 mov pr = r29, -1
183 br.ret.sptk.few rp
184END(xor_ia64_5)
diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile
new file mode 100644
index 000000000000..7078f67887ec
--- /dev/null
+++ b/arch/ia64/mm/Makefile
@@ -0,0 +1,12 @@
1#
2# Makefile for the ia64-specific parts of the memory manager.
3#
4
5obj-y := init.o fault.o tlb.o extable.o
6
7obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
8obj-$(CONFIG_NUMA) += numa.o
9obj-$(CONFIG_DISCONTIGMEM) += discontig.o
10ifndef CONFIG_DISCONTIGMEM
11obj-y += contig.o
12endif
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
new file mode 100644
index 000000000000..6daf15ac8940
--- /dev/null
+++ b/arch/ia64/mm/contig.c
@@ -0,0 +1,299 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1998-2003 Hewlett-Packard Co
7 * David Mosberger-Tang <davidm@hpl.hp.com>
8 * Stephane Eranian <eranian@hpl.hp.com>
9 * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com>
10 * Copyright (C) 1999 VA Linux Systems
11 * Copyright (C) 1999 Walt Drummond <drummond@valinux.com>
12 * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
13 *
14 * Routines used by ia64 machines with contiguous (or virtually contiguous)
15 * memory.
16 */
17#include <linux/config.h>
18#include <linux/bootmem.h>
19#include <linux/efi.h>
20#include <linux/mm.h>
21#include <linux/swap.h>
22
23#include <asm/meminit.h>
24#include <asm/pgalloc.h>
25#include <asm/pgtable.h>
26#include <asm/sections.h>
27#include <asm/mca.h>
28
29#ifdef CONFIG_VIRTUAL_MEM_MAP
30static unsigned long num_dma_physpages;
31#endif
32
33/**
34 * show_mem - display a memory statistics summary
35 *
36 * Just walks the pages in the system and describes where they're allocated.
37 */
38void
39show_mem (void)
40{
41 int i, total = 0, reserved = 0;
42 int shared = 0, cached = 0;
43
44 printk("Mem-info:\n");
45 show_free_areas();
46
47 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
48 i = max_mapnr;
49 while (i-- > 0) {
50 if (!pfn_valid(i))
51 continue;
52 total++;
53 if (PageReserved(mem_map+i))
54 reserved++;
55 else if (PageSwapCache(mem_map+i))
56 cached++;
57 else if (page_count(mem_map + i))
58 shared += page_count(mem_map + i) - 1;
59 }
60 printk("%d pages of RAM\n", total);
61 printk("%d reserved pages\n", reserved);
62 printk("%d pages shared\n", shared);
63 printk("%d pages swap cached\n", cached);
64 printk("%ld pages in page table cache\n", pgtable_cache_size);
65}
66
67/* physical address where the bootmem map is located */
68unsigned long bootmap_start;
69
70/**
71 * find_max_pfn - adjust the maximum page number callback
72 * @start: start of range
73 * @end: end of range
74 * @arg: address of pointer to global max_pfn variable
75 *
76 * Passed as a callback function to efi_memmap_walk() to determine the highest
77 * available page frame number in the system.
78 */
79int
80find_max_pfn (unsigned long start, unsigned long end, void *arg)
81{
82 unsigned long *max_pfnp = arg, pfn;
83
84 pfn = (PAGE_ALIGN(end - 1) - PAGE_OFFSET) >> PAGE_SHIFT;
85 if (pfn > *max_pfnp)
86 *max_pfnp = pfn;
87 return 0;
88}
89
90/**
91 * find_bootmap_location - callback to find a memory area for the bootmap
92 * @start: start of region
93 * @end: end of region
94 * @arg: unused callback data
95 *
96 * Find a place to put the bootmap and return its starting address in
97 * bootmap_start. This address must be page-aligned.
98 */
99int
100find_bootmap_location (unsigned long start, unsigned long end, void *arg)
101{
102 unsigned long needed = *(unsigned long *)arg;
103 unsigned long range_start, range_end, free_start;
104 int i;
105
106#if IGNORE_PFN0
107 if (start == PAGE_OFFSET) {
108 start += PAGE_SIZE;
109 if (start >= end)
110 return 0;
111 }
112#endif
113
114 free_start = PAGE_OFFSET;
115
116 for (i = 0; i < num_rsvd_regions; i++) {
117 range_start = max(start, free_start);
118 range_end = min(end, rsvd_region[i].start & PAGE_MASK);
119
120 free_start = PAGE_ALIGN(rsvd_region[i].end);
121
122 if (range_end <= range_start)
123 continue; /* skip over empty range */
124
125 if (range_end - range_start >= needed) {
126 bootmap_start = __pa(range_start);
127 return -1; /* done */
128 }
129
130 /* nothing more available in this segment */
131 if (range_end == end)
132 return 0;
133 }
134 return 0;
135}
136
137/**
138 * find_memory - setup memory map
139 *
140 * Walk the EFI memory map and find usable memory for the system, taking
141 * into account reserved areas.
142 */
143void
144find_memory (void)
145{
146 unsigned long bootmap_size;
147
148 reserve_memory();
149
150 /* first find highest page frame number */
151 max_pfn = 0;
152 efi_memmap_walk(find_max_pfn, &max_pfn);
153
154 /* how many bytes to cover all the pages */
155 bootmap_size = bootmem_bootmap_pages(max_pfn) << PAGE_SHIFT;
156
157 /* look for a location to hold the bootmap */
158 bootmap_start = ~0UL;
159 efi_memmap_walk(find_bootmap_location, &bootmap_size);
160 if (bootmap_start == ~0UL)
161 panic("Cannot find %ld bytes for bootmap\n", bootmap_size);
162
163 bootmap_size = init_bootmem(bootmap_start >> PAGE_SHIFT, max_pfn);
164
165 /* Free all available memory, then mark bootmem-map as being in use. */
166 efi_memmap_walk(filter_rsvd_memory, free_bootmem);
167 reserve_bootmem(bootmap_start, bootmap_size);
168
169 find_initrd();
170}
171
172#ifdef CONFIG_SMP
173/**
174 * per_cpu_init - setup per-cpu variables
175 *
176 * Allocate and setup per-cpu data areas.
177 */
178void *
179per_cpu_init (void)
180{
181 void *cpu_data;
182 int cpu;
183
184 /*
185 * get_free_pages() cannot be used before cpu_init() done. BSP
186 * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
187 * get_zeroed_page().
188 */
189 if (smp_processor_id() == 0) {
190 cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS,
191 PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
192 for (cpu = 0; cpu < NR_CPUS; cpu++) {
193 memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
194 __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
195 cpu_data += PERCPU_PAGE_SIZE;
196 per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
197 }
198 }
199 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
200}
201#endif /* CONFIG_SMP */
202
203static int
204count_pages (u64 start, u64 end, void *arg)
205{
206 unsigned long *count = arg;
207
208 *count += (end - start) >> PAGE_SHIFT;
209 return 0;
210}
211
212#ifdef CONFIG_VIRTUAL_MEM_MAP
213static int
214count_dma_pages (u64 start, u64 end, void *arg)
215{
216 unsigned long *count = arg;
217
218 if (start < MAX_DMA_ADDRESS)
219 *count += (min(end, MAX_DMA_ADDRESS) - start) >> PAGE_SHIFT;
220 return 0;
221}
222#endif
223
224/*
225 * Set up the page tables.
226 */
227
228void
229paging_init (void)
230{
231 unsigned long max_dma;
232 unsigned long zones_size[MAX_NR_ZONES];
233#ifdef CONFIG_VIRTUAL_MEM_MAP
234 unsigned long zholes_size[MAX_NR_ZONES];
235 unsigned long max_gap;
236#endif
237
238 /* initialize mem_map[] */
239
240 memset(zones_size, 0, sizeof(zones_size));
241
242 num_physpages = 0;
243 efi_memmap_walk(count_pages, &num_physpages);
244
245 max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
246
247#ifdef CONFIG_VIRTUAL_MEM_MAP
248 memset(zholes_size, 0, sizeof(zholes_size));
249
250 num_dma_physpages = 0;
251 efi_memmap_walk(count_dma_pages, &num_dma_physpages);
252
253 if (max_low_pfn < max_dma) {
254 zones_size[ZONE_DMA] = max_low_pfn;
255 zholes_size[ZONE_DMA] = max_low_pfn - num_dma_physpages;
256 } else {
257 zones_size[ZONE_DMA] = max_dma;
258 zholes_size[ZONE_DMA] = max_dma - num_dma_physpages;
259 if (num_physpages > num_dma_physpages) {
260 zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
261 zholes_size[ZONE_NORMAL] =
262 ((max_low_pfn - max_dma) -
263 (num_physpages - num_dma_physpages));
264 }
265 }
266
267 max_gap = 0;
268 efi_memmap_walk(find_largest_hole, (u64 *)&max_gap);
269 if (max_gap < LARGE_GAP) {
270 vmem_map = (struct page *) 0;
271 free_area_init_node(0, &contig_page_data, zones_size, 0,
272 zholes_size);
273 } else {
274 unsigned long map_size;
275
276 /* allocate virtual_mem_map */
277
278 map_size = PAGE_ALIGN(max_low_pfn * sizeof(struct page));
279 vmalloc_end -= map_size;
280 vmem_map = (struct page *) vmalloc_end;
281 efi_memmap_walk(create_mem_map_page_table, NULL);
282
283 NODE_DATA(0)->node_mem_map = vmem_map;
284 free_area_init_node(0, &contig_page_data, zones_size,
285 0, zholes_size);
286
287 printk("Virtual mem_map starts at 0x%p\n", mem_map);
288 }
289#else /* !CONFIG_VIRTUAL_MEM_MAP */
290 if (max_low_pfn < max_dma)
291 zones_size[ZONE_DMA] = max_low_pfn;
292 else {
293 zones_size[ZONE_DMA] = max_dma;
294 zones_size[ZONE_NORMAL] = max_low_pfn - max_dma;
295 }
296 free_area_init(zones_size);
297#endif /* !CONFIG_VIRTUAL_MEM_MAP */
298 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
299}
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
new file mode 100644
index 000000000000..3456a9b6971e
--- /dev/null
+++ b/arch/ia64/mm/discontig.c
@@ -0,0 +1,737 @@
1/*
2 * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved.
3 * Copyright (c) 2001 Intel Corp.
4 * Copyright (c) 2001 Tony Luck <tony.luck@intel.com>
5 * Copyright (c) 2002 NEC Corp.
6 * Copyright (c) 2002 Kimio Suganuma <k-suganuma@da.jp.nec.com>
7 * Copyright (c) 2004 Silicon Graphics, Inc
8 * Russ Anderson <rja@sgi.com>
9 * Jesse Barnes <jbarnes@sgi.com>
10 * Jack Steiner <steiner@sgi.com>
11 */
12
13/*
14 * Platform initialization for Discontig Memory
15 */
16
17#include <linux/kernel.h>
18#include <linux/mm.h>
19#include <linux/swap.h>
20#include <linux/bootmem.h>
21#include <linux/acpi.h>
22#include <linux/efi.h>
23#include <linux/nodemask.h>
24#include <asm/pgalloc.h>
25#include <asm/tlb.h>
26#include <asm/meminit.h>
27#include <asm/numa.h>
28#include <asm/sections.h>
29
30/*
31 * Track per-node information needed to setup the boot memory allocator, the
32 * per-node areas, and the real VM.
33 */
34struct early_node_data {
35 struct ia64_node_data *node_data;
36 pg_data_t *pgdat;
37 unsigned long pernode_addr;
38 unsigned long pernode_size;
39 struct bootmem_data bootmem_data;
40 unsigned long num_physpages;
41 unsigned long num_dma_physpages;
42 unsigned long min_pfn;
43 unsigned long max_pfn;
44};
45
46static struct early_node_data mem_data[MAX_NUMNODES] __initdata;
47
48/**
49 * reassign_cpu_only_nodes - called from find_memory to move CPU-only nodes to a memory node
50 *
51 * This function will move nodes with only CPUs (no memory)
52 * to a node with memory which is at the minimum numa_slit distance.
53 * Any reassigments will result in the compression of the nodes
54 * and renumbering the nid values where appropriate.
55 * The static declarations below are to avoid large stack size which
56 * makes the code not re-entrant.
57 */
58static void __init reassign_cpu_only_nodes(void)
59{
60 struct node_memblk_s *p;
61 int i, j, k, nnode, nid, cpu, cpunid, pxm;
62 u8 cslit, slit;
63 static DECLARE_BITMAP(nodes_with_mem, MAX_NUMNODES) __initdata;
64 static u8 numa_slit_fix[MAX_NUMNODES * MAX_NUMNODES] __initdata;
65 static int node_flip[MAX_NUMNODES] __initdata;
66 static int old_nid_map[NR_CPUS] __initdata;
67
68 for (nnode = 0, p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
69 if (!test_bit(p->nid, (void *) nodes_with_mem)) {
70 set_bit(p->nid, (void *) nodes_with_mem);
71 nnode++;
72 }
73
74 /*
75 * All nids with memory.
76 */
77 if (nnode == num_online_nodes())
78 return;
79
80 /*
81 * Change nids and attempt to migrate CPU-only nodes
82 * to the best numa_slit (closest neighbor) possible.
83 * For reassigned CPU nodes a nid can't be arrived at
84 * until after this loop because the target nid's new
85 * identity might not have been established yet. So
86 * new nid values are fabricated above num_online_nodes() and
87 * mapped back later to their true value.
88 */
89 /* MCD - This code is a bit complicated, but may be unnecessary now.
90 * We can now handle much more interesting node-numbering.
91 * The old requirement that 0 <= nid <= numnodes <= MAX_NUMNODES
92 * and that there be no holes in the numbering 0..numnodes
93 * has become simply 0 <= nid <= MAX_NUMNODES.
94 */
95 nid = 0;
96 for_each_online_node(i) {
97 if (test_bit(i, (void *) nodes_with_mem)) {
98 /*
99 * Save original nid value for numa_slit
100 * fixup and node_cpuid reassignments.
101 */
102 node_flip[nid] = i;
103
104 if (i == nid) {
105 nid++;
106 continue;
107 }
108
109 for (p = &node_memblk[0]; p < &node_memblk[num_node_memblks]; p++)
110 if (p->nid == i)
111 p->nid = nid;
112
113 cpunid = nid;
114 nid++;
115 } else
116 cpunid = MAX_NUMNODES;
117
118 for (cpu = 0; cpu < NR_CPUS; cpu++)
119 if (node_cpuid[cpu].nid == i) {
120 /*
121 * For nodes not being reassigned just
122 * fix the cpu's nid and reverse pxm map
123 */
124 if (cpunid < MAX_NUMNODES) {
125 pxm = nid_to_pxm_map[i];
126 pxm_to_nid_map[pxm] =
127 node_cpuid[cpu].nid = cpunid;
128 continue;
129 }
130
131 /*
132 * For nodes being reassigned, find best node by
133 * numa_slit information and then make a temporary
134 * nid value based on current nid and num_online_nodes().
135 */
136 slit = 0xff;
137 k = 2*num_online_nodes();
138 for_each_online_node(j) {
139 if (i == j)
140 continue;
141 else if (test_bit(j, (void *) nodes_with_mem)) {
142 cslit = numa_slit[i * num_online_nodes() + j];
143 if (cslit < slit) {
144 k = num_online_nodes() + j;
145 slit = cslit;
146 }
147 }
148 }
149
150 /* save old nid map so we can update the pxm */
151 old_nid_map[cpu] = node_cpuid[cpu].nid;
152 node_cpuid[cpu].nid = k;
153 }
154 }
155
156 /*
157 * Fixup temporary nid values for CPU-only nodes.
158 */
159 for (cpu = 0; cpu < NR_CPUS; cpu++)
160 if (node_cpuid[cpu].nid == (2*num_online_nodes())) {
161 pxm = nid_to_pxm_map[old_nid_map[cpu]];
162 pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = nnode - 1;
163 } else {
164 for (i = 0; i < nnode; i++) {
165 if (node_flip[i] != (node_cpuid[cpu].nid - num_online_nodes()))
166 continue;
167
168 pxm = nid_to_pxm_map[old_nid_map[cpu]];
169 pxm_to_nid_map[pxm] = node_cpuid[cpu].nid = i;
170 break;
171 }
172 }
173
174 /*
175 * Fix numa_slit by compressing from larger
176 * nid array to reduced nid array.
177 */
178 for (i = 0; i < nnode; i++)
179 for (j = 0; j < nnode; j++)
180 numa_slit_fix[i * nnode + j] =
181 numa_slit[node_flip[i] * num_online_nodes() + node_flip[j]];
182
183 memcpy(numa_slit, numa_slit_fix, sizeof (numa_slit));
184
185 nodes_clear(node_online_map);
186 for (i = 0; i < nnode; i++)
187 node_set_online(i);
188
189 return;
190}
191
192/*
193 * To prevent cache aliasing effects, align per-node structures so that they
194 * start at addresses that are strided by node number.
195 */
196#define NODEDATA_ALIGN(addr, node) \
197 ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + (node)*PERCPU_PAGE_SIZE)
198
199/**
200 * build_node_maps - callback to setup bootmem structs for each node
201 * @start: physical start of range
202 * @len: length of range
203 * @node: node where this range resides
204 *
205 * We allocate a struct bootmem_data for each piece of memory that we wish to
206 * treat as a virtually contiguous block (i.e. each node). Each such block
207 * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down
208 * if necessary. Any non-existent pages will simply be part of the virtual
209 * memmap. We also update min_low_pfn and max_low_pfn here as we receive
210 * memory ranges from the caller.
211 */
212static int __init build_node_maps(unsigned long start, unsigned long len,
213 int node)
214{
215 unsigned long cstart, epfn, end = start + len;
216 struct bootmem_data *bdp = &mem_data[node].bootmem_data;
217
218 epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT;
219 cstart = GRANULEROUNDDOWN(start);
220
221 if (!bdp->node_low_pfn) {
222 bdp->node_boot_start = cstart;
223 bdp->node_low_pfn = epfn;
224 } else {
225 bdp->node_boot_start = min(cstart, bdp->node_boot_start);
226 bdp->node_low_pfn = max(epfn, bdp->node_low_pfn);
227 }
228
229 min_low_pfn = min(min_low_pfn, bdp->node_boot_start>>PAGE_SHIFT);
230 max_low_pfn = max(max_low_pfn, bdp->node_low_pfn);
231
232 return 0;
233}
234
235/**
236 * early_nr_phys_cpus_node - return number of physical cpus on a given node
237 * @node: node to check
238 *
239 * Count the number of physical cpus on @node. These are cpus that actually
240 * exist. We can't use nr_cpus_node() yet because
241 * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
242 * called yet.
243 */
244static int early_nr_phys_cpus_node(int node)
245{
246 int cpu, n = 0;
247
248 for (cpu = 0; cpu < NR_CPUS; cpu++)
249 if (node == node_cpuid[cpu].nid)
250 if ((cpu == 0) || node_cpuid[cpu].phys_id)
251 n++;
252
253 return n;
254}
255
256
257/**
258 * early_nr_cpus_node - return number of cpus on a given node
259 * @node: node to check
260 *
261 * Count the number of cpus on @node. We can't use nr_cpus_node() yet because
262 * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been
263 * called yet. Note that node 0 will also count all non-existent cpus.
264 */
265static int early_nr_cpus_node(int node)
266{
267 int cpu, n = 0;
268
269 for (cpu = 0; cpu < NR_CPUS; cpu++)
270 if (node == node_cpuid[cpu].nid)
271 n++;
272
273 return n;
274}
275
276/**
277 * find_pernode_space - allocate memory for memory map and per-node structures
278 * @start: physical start of range
279 * @len: length of range
280 * @node: node where this range resides
281 *
282 * This routine reserves space for the per-cpu data struct, the list of
283 * pg_data_ts and the per-node data struct. Each node will have something like
284 * the following in the first chunk of addr. space large enough to hold it.
285 *
286 * ________________________
287 * | |
288 * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first
289 * | PERCPU_PAGE_SIZE * | start and length big enough
290 * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus.
291 * |------------------------|
292 * | local pg_data_t * |
293 * |------------------------|
294 * | local ia64_node_data |
295 * |------------------------|
296 * | ??? |
297 * |________________________|
298 *
299 * Once this space has been set aside, the bootmem maps are initialized. We
300 * could probably move the allocation of the per-cpu and ia64_node_data space
301 * outside of this function and use alloc_bootmem_node(), but doing it here
302 * is straightforward and we get the alignments we want so...
303 */
304static int __init find_pernode_space(unsigned long start, unsigned long len,
305 int node)
306{
307 unsigned long epfn, cpu, cpus, phys_cpus;
308 unsigned long pernodesize = 0, pernode, pages, mapsize;
309 void *cpu_data;
310 struct bootmem_data *bdp = &mem_data[node].bootmem_data;
311
312 epfn = (start + len) >> PAGE_SHIFT;
313
314 pages = bdp->node_low_pfn - (bdp->node_boot_start >> PAGE_SHIFT);
315 mapsize = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
316
317 /*
318 * Make sure this memory falls within this node's usable memory
319 * since we may have thrown some away in build_maps().
320 */
321 if (start < bdp->node_boot_start || epfn > bdp->node_low_pfn)
322 return 0;
323
324 /* Don't setup this node's local space twice... */
325 if (mem_data[node].pernode_addr)
326 return 0;
327
328 /*
329 * Calculate total size needed, incl. what's necessary
330 * for good alignment and alias prevention.
331 */
332 cpus = early_nr_cpus_node(node);
333 phys_cpus = early_nr_phys_cpus_node(node);
334 pernodesize += PERCPU_PAGE_SIZE * cpus;
335 pernodesize += node * L1_CACHE_BYTES;
336 pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t));
337 pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
338 pernodesize = PAGE_ALIGN(pernodesize);
339 pernode = NODEDATA_ALIGN(start, node);
340
341 /* Is this range big enough for what we want to store here? */
342 if (start + len > (pernode + pernodesize + mapsize)) {
343 mem_data[node].pernode_addr = pernode;
344 mem_data[node].pernode_size = pernodesize;
345 memset(__va(pernode), 0, pernodesize);
346
347 cpu_data = (void *)pernode;
348 pernode += PERCPU_PAGE_SIZE * cpus;
349 pernode += node * L1_CACHE_BYTES;
350
351 mem_data[node].pgdat = __va(pernode);
352 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
353
354 mem_data[node].node_data = __va(pernode);
355 pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data));
356
357 mem_data[node].pgdat->bdata = bdp;
358 pernode += L1_CACHE_ALIGN(sizeof(pg_data_t));
359
360 /*
361 * Copy the static per-cpu data into the region we
362 * just set aside and then setup __per_cpu_offset
363 * for each CPU on this node.
364 */
365 for (cpu = 0; cpu < NR_CPUS; cpu++) {
366 if (node == node_cpuid[cpu].nid) {
367 memcpy(__va(cpu_data), __phys_per_cpu_start,
368 __per_cpu_end - __per_cpu_start);
369 __per_cpu_offset[cpu] = (char*)__va(cpu_data) -
370 __per_cpu_start;
371 cpu_data += PERCPU_PAGE_SIZE;
372 }
373 }
374 }
375
376 return 0;
377}
378
379/**
380 * free_node_bootmem - free bootmem allocator memory for use
381 * @start: physical start of range
382 * @len: length of range
383 * @node: node where this range resides
384 *
385 * Simply calls the bootmem allocator to free the specified ranged from
386 * the given pg_data_t's bdata struct. After this function has been called
387 * for all the entries in the EFI memory map, the bootmem allocator will
388 * be ready to service allocation requests.
389 */
390static int __init free_node_bootmem(unsigned long start, unsigned long len,
391 int node)
392{
393 free_bootmem_node(mem_data[node].pgdat, start, len);
394
395 return 0;
396}
397
398/**
399 * reserve_pernode_space - reserve memory for per-node space
400 *
401 * Reserve the space used by the bootmem maps & per-node space in the boot
402 * allocator so that when we actually create the real mem maps we don't
403 * use their memory.
404 */
405static void __init reserve_pernode_space(void)
406{
407 unsigned long base, size, pages;
408 struct bootmem_data *bdp;
409 int node;
410
411 for_each_online_node(node) {
412 pg_data_t *pdp = mem_data[node].pgdat;
413
414 bdp = pdp->bdata;
415
416 /* First the bootmem_map itself */
417 pages = bdp->node_low_pfn - (bdp->node_boot_start>>PAGE_SHIFT);
418 size = bootmem_bootmap_pages(pages) << PAGE_SHIFT;
419 base = __pa(bdp->node_bootmem_map);
420 reserve_bootmem_node(pdp, base, size);
421
422 /* Now the per-node space */
423 size = mem_data[node].pernode_size;
424 base = __pa(mem_data[node].pernode_addr);
425 reserve_bootmem_node(pdp, base, size);
426 }
427}
428
429/**
430 * initialize_pernode_data - fixup per-cpu & per-node pointers
431 *
432 * Each node's per-node area has a copy of the global pg_data_t list, so
433 * we copy that to each node here, as well as setting the per-cpu pointer
434 * to the local node data structure. The active_cpus field of the per-node
435 * structure gets setup by the platform_cpu_init() function later.
436 */
437static void __init initialize_pernode_data(void)
438{
439 int cpu, node;
440 pg_data_t *pgdat_list[MAX_NUMNODES];
441
442 for_each_online_node(node)
443 pgdat_list[node] = mem_data[node].pgdat;
444
445 /* Copy the pg_data_t list to each node and init the node field */
446 for_each_online_node(node) {
447 memcpy(mem_data[node].node_data->pg_data_ptrs, pgdat_list,
448 sizeof(pgdat_list));
449 }
450
451 /* Set the node_data pointer for each per-cpu struct */
452 for (cpu = 0; cpu < NR_CPUS; cpu++) {
453 node = node_cpuid[cpu].nid;
454 per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
455 }
456}
457
458/**
459 * find_memory - walk the EFI memory map and setup the bootmem allocator
460 *
461 * Called early in boot to setup the bootmem allocator, and to
462 * allocate the per-cpu and per-node structures.
463 */
464void __init find_memory(void)
465{
466 int node;
467
468 reserve_memory();
469
470 if (num_online_nodes() == 0) {
471 printk(KERN_ERR "node info missing!\n");
472 node_set_online(0);
473 }
474
475 min_low_pfn = -1;
476 max_low_pfn = 0;
477
478 if (num_online_nodes() > 1)
479 reassign_cpu_only_nodes();
480
481 /* These actually end up getting called by call_pernode_memory() */
482 efi_memmap_walk(filter_rsvd_memory, build_node_maps);
483 efi_memmap_walk(filter_rsvd_memory, find_pernode_space);
484
485 /*
486 * Initialize the boot memory maps in reverse order since that's
487 * what the bootmem allocator expects
488 */
489 for (node = MAX_NUMNODES - 1; node >= 0; node--) {
490 unsigned long pernode, pernodesize, map;
491 struct bootmem_data *bdp;
492
493 if (!node_online(node))
494 continue;
495
496 bdp = &mem_data[node].bootmem_data;
497 pernode = mem_data[node].pernode_addr;
498 pernodesize = mem_data[node].pernode_size;
499 map = pernode + pernodesize;
500
501 /* Sanity check... */
502 if (!pernode)
503 panic("pernode space for node %d "
504 "could not be allocated!", node);
505
506 init_bootmem_node(mem_data[node].pgdat,
507 map>>PAGE_SHIFT,
508 bdp->node_boot_start>>PAGE_SHIFT,
509 bdp->node_low_pfn);
510 }
511
512 efi_memmap_walk(filter_rsvd_memory, free_node_bootmem);
513
514 reserve_pernode_space();
515 initialize_pernode_data();
516
517 max_pfn = max_low_pfn;
518
519 find_initrd();
520}
521
522/**
523 * per_cpu_init - setup per-cpu variables
524 *
525 * find_pernode_space() does most of this already, we just need to set
526 * local_per_cpu_offset
527 */
528void *per_cpu_init(void)
529{
530 int cpu;
531
532 if (smp_processor_id() == 0) {
533 for (cpu = 0; cpu < NR_CPUS; cpu++) {
534 per_cpu(local_per_cpu_offset, cpu) =
535 __per_cpu_offset[cpu];
536 }
537 }
538
539 return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
540}
541
542/**
543 * show_mem - give short summary of memory stats
544 *
545 * Shows a simple page count of reserved and used pages in the system.
546 * For discontig machines, it does this on a per-pgdat basis.
547 */
548void show_mem(void)
549{
550 int i, total_reserved = 0;
551 int total_shared = 0, total_cached = 0;
552 unsigned long total_present = 0;
553 pg_data_t *pgdat;
554
555 printk("Mem-info:\n");
556 show_free_areas();
557 printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
558 for_each_pgdat(pgdat) {
559 unsigned long present = pgdat->node_present_pages;
560 int shared = 0, cached = 0, reserved = 0;
561 printk("Node ID: %d\n", pgdat->node_id);
562 for(i = 0; i < pgdat->node_spanned_pages; i++) {
563 if (!ia64_pfn_valid(pgdat->node_start_pfn+i))
564 continue;
565 if (PageReserved(pgdat->node_mem_map+i))
566 reserved++;
567 else if (PageSwapCache(pgdat->node_mem_map+i))
568 cached++;
569 else if (page_count(pgdat->node_mem_map+i))
570 shared += page_count(pgdat->node_mem_map+i)-1;
571 }
572 total_present += present;
573 total_reserved += reserved;
574 total_cached += cached;
575 total_shared += shared;
576 printk("\t%ld pages of RAM\n", present);
577 printk("\t%d reserved pages\n", reserved);
578 printk("\t%d pages shared\n", shared);
579 printk("\t%d pages swap cached\n", cached);
580 }
581 printk("%ld pages of RAM\n", total_present);
582 printk("%d reserved pages\n", total_reserved);
583 printk("%d pages shared\n", total_shared);
584 printk("%d pages swap cached\n", total_cached);
585 printk("Total of %ld pages in page table cache\n", pgtable_cache_size);
586 printk("%d free buffer pages\n", nr_free_buffer_pages());
587}
588
589/**
590 * call_pernode_memory - use SRAT to call callback functions with node info
591 * @start: physical start of range
592 * @len: length of range
593 * @arg: function to call for each range
594 *
595 * efi_memmap_walk() knows nothing about layout of memory across nodes. Find
596 * out to which node a block of memory belongs. Ignore memory that we cannot
597 * identify, and split blocks that run across multiple nodes.
598 *
599 * Take this opportunity to round the start address up and the end address
600 * down to page boundaries.
601 */
602void call_pernode_memory(unsigned long start, unsigned long len, void *arg)
603{
604 unsigned long rs, re, end = start + len;
605 void (*func)(unsigned long, unsigned long, int);
606 int i;
607
608 start = PAGE_ALIGN(start);
609 end &= PAGE_MASK;
610 if (start >= end)
611 return;
612
613 func = arg;
614
615 if (!num_node_memblks) {
616 /* No SRAT table, so assume one node (node 0) */
617 if (start < end)
618 (*func)(start, end - start, 0);
619 return;
620 }
621
622 for (i = 0; i < num_node_memblks; i++) {
623 rs = max(start, node_memblk[i].start_paddr);
624 re = min(end, node_memblk[i].start_paddr +
625 node_memblk[i].size);
626
627 if (rs < re)
628 (*func)(rs, re - rs, node_memblk[i].nid);
629
630 if (re == end)
631 break;
632 }
633}
634
635/**
636 * count_node_pages - callback to build per-node memory info structures
637 * @start: physical start of range
638 * @len: length of range
639 * @node: node where this range resides
640 *
641 * Each node has it's own number of physical pages, DMAable pages, start, and
642 * end page frame number. This routine will be called by call_pernode_memory()
643 * for each piece of usable memory and will setup these values for each node.
644 * Very similar to build_maps().
645 */
646static __init int count_node_pages(unsigned long start, unsigned long len, int node)
647{
648 unsigned long end = start + len;
649
650 mem_data[node].num_physpages += len >> PAGE_SHIFT;
651 if (start <= __pa(MAX_DMA_ADDRESS))
652 mem_data[node].num_dma_physpages +=
653 (min(end, __pa(MAX_DMA_ADDRESS)) - start) >>PAGE_SHIFT;
654 start = GRANULEROUNDDOWN(start);
655 start = ORDERROUNDDOWN(start);
656 end = GRANULEROUNDUP(end);
657 mem_data[node].max_pfn = max(mem_data[node].max_pfn,
658 end >> PAGE_SHIFT);
659 mem_data[node].min_pfn = min(mem_data[node].min_pfn,
660 start >> PAGE_SHIFT);
661
662 return 0;
663}
664
665/**
666 * paging_init - setup page tables
667 *
668 * paging_init() sets up the page tables for each node of the system and frees
669 * the bootmem allocator memory for general use.
670 */
671void __init paging_init(void)
672{
673 unsigned long max_dma;
674 unsigned long zones_size[MAX_NR_ZONES];
675 unsigned long zholes_size[MAX_NR_ZONES];
676 unsigned long pfn_offset = 0;
677 int node;
678
679 max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT;
680
681 /* so min() will work in count_node_pages */
682 for_each_online_node(node)
683 mem_data[node].min_pfn = ~0UL;
684
685 efi_memmap_walk(filter_rsvd_memory, count_node_pages);
686
687 for_each_online_node(node) {
688 memset(zones_size, 0, sizeof(zones_size));
689 memset(zholes_size, 0, sizeof(zholes_size));
690
691 num_physpages += mem_data[node].num_physpages;
692
693 if (mem_data[node].min_pfn >= max_dma) {
694 /* All of this node's memory is above ZONE_DMA */
695 zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
696 mem_data[node].min_pfn;
697 zholes_size[ZONE_NORMAL] = mem_data[node].max_pfn -
698 mem_data[node].min_pfn -
699 mem_data[node].num_physpages;
700 } else if (mem_data[node].max_pfn < max_dma) {
701 /* All of this node's memory is in ZONE_DMA */
702 zones_size[ZONE_DMA] = mem_data[node].max_pfn -
703 mem_data[node].min_pfn;
704 zholes_size[ZONE_DMA] = mem_data[node].max_pfn -
705 mem_data[node].min_pfn -
706 mem_data[node].num_dma_physpages;
707 } else {
708 /* This node has memory in both zones */
709 zones_size[ZONE_DMA] = max_dma -
710 mem_data[node].min_pfn;
711 zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] -
712 mem_data[node].num_dma_physpages;
713 zones_size[ZONE_NORMAL] = mem_data[node].max_pfn -
714 max_dma;
715 zholes_size[ZONE_NORMAL] = zones_size[ZONE_NORMAL] -
716 (mem_data[node].num_physpages -
717 mem_data[node].num_dma_physpages);
718 }
719
720 if (node == 0) {
721 vmalloc_end -=
722 PAGE_ALIGN(max_low_pfn * sizeof(struct page));
723 vmem_map = (struct page *) vmalloc_end;
724
725 efi_memmap_walk(create_mem_map_page_table, NULL);
726 printk("Virtual mem_map starts at 0x%p\n", vmem_map);
727 }
728
729 pfn_offset = mem_data[node].min_pfn;
730
731 NODE_DATA(node)->node_mem_map = vmem_map + pfn_offset;
732 free_area_init_node(node, NODE_DATA(node), zones_size,
733 pfn_offset, zholes_size);
734 }
735
736 zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page));
737}
diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c
new file mode 100644
index 000000000000..6d259e34f359
--- /dev/null
+++ b/arch/ia64/mm/extable.c
@@ -0,0 +1,90 @@
1/*
2 * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c.
3 *
4 * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7
8#include <linux/config.h>
9#include <linux/sort.h>
10
11#include <asm/uaccess.h>
12#include <asm/module.h>
13
14static int cmp_ex(const void *a, const void *b)
15{
16 const struct exception_table_entry *l = a, *r = b;
17 u64 lip = (u64) &l->addr + l->addr;
18 u64 rip = (u64) &r->addr + r->addr;
19
20 /* avoid overflow */
21 if (lip > rip)
22 return 1;
23 if (lip < rip)
24 return -1;
25 return 0;
26}
27
28static void swap_ex(void *a, void *b, int size)
29{
30 struct exception_table_entry *l = a, *r = b, tmp;
31 u64 delta = (u64) r - (u64) l;
32
33 tmp = *l;
34 l->addr = r->addr + delta;
35 l->cont = r->cont + delta;
36 r->addr = tmp.addr - delta;
37 r->cont = tmp.cont - delta;
38}
39
40/*
41 * Sort the exception table. It's usually already sorted, but there
42 * may be unordered entries due to multiple text sections (such as the
43 * .init text section). Note that the exception-table-entries contain
44 * location-relative addresses, which requires a bit of care during
45 * sorting to avoid overflows in the offset members (e.g., it would
46 * not be safe to make a temporary copy of an exception-table entry on
47 * the stack, because the stack may be more than 2GB away from the
48 * exception-table).
49 */
50void sort_extable (struct exception_table_entry *start,
51 struct exception_table_entry *finish)
52{
53 sort(start, finish - start, sizeof(struct exception_table_entry),
54 cmp_ex, swap_ex);
55}
56
57const struct exception_table_entry *
58search_extable (const struct exception_table_entry *first,
59 const struct exception_table_entry *last,
60 unsigned long ip)
61{
62 const struct exception_table_entry *mid;
63 unsigned long mid_ip;
64 long diff;
65
66 while (first <= last) {
67 mid = &first[(last - first)/2];
68 mid_ip = (u64) &mid->addr + mid->addr;
69 diff = mid_ip - ip;
70 if (diff == 0)
71 return mid;
72 else if (diff < 0)
73 first = mid + 1;
74 else
75 last = mid - 1;
76 }
77 return NULL;
78}
79
80void
81ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e)
82{
83 long fix = (u64) &e->cont + e->cont;
84
85 regs->r8 = -EFAULT;
86 if (fix & 4)
87 regs->r9 = 0;
88 regs->cr_iip = fix & ~0xf;
89 ia64_psr(regs)->ri = fix & 0x3; /* set continuation slot number */
90}
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
new file mode 100644
index 000000000000..da859125aaef
--- /dev/null
+++ b/arch/ia64/mm/fault.c
@@ -0,0 +1,261 @@
1/*
2 * MMU fault handling support.
3 *
4 * Copyright (C) 1998-2002 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7#include <linux/sched.h>
8#include <linux/kernel.h>
9#include <linux/mm.h>
10#include <linux/smp_lock.h>
11#include <linux/interrupt.h>
12
13#include <asm/pgtable.h>
14#include <asm/processor.h>
15#include <asm/system.h>
16#include <asm/uaccess.h>
17
18extern void die (char *, struct pt_regs *, long);
19
20/*
21 * This routine is analogous to expand_stack() but instead grows the
22 * register backing store (which grows towards higher addresses).
23 * Since the register backing store is access sequentially, we
24 * disallow growing the RBS by more than a page at a time. Note that
25 * the VM_GROWSUP flag can be set on any VM area but that's fine
26 * because the total process size is still limited by RLIMIT_STACK and
27 * RLIMIT_AS.
28 */
29static inline long
30expand_backing_store (struct vm_area_struct *vma, unsigned long address)
31{
32 unsigned long grow;
33
34 grow = PAGE_SIZE >> PAGE_SHIFT;
35 if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
36 || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
37 return -ENOMEM;
38 vma->vm_end += PAGE_SIZE;
39 vma->vm_mm->total_vm += grow;
40 if (vma->vm_flags & VM_LOCKED)
41 vma->vm_mm->locked_vm += grow;
42 __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
43 return 0;
44}
45
46/*
47 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
48 * (inside region 5, on ia64) and that page is present.
49 */
50static int
51mapped_kernel_page_is_present (unsigned long address)
52{
53 pgd_t *pgd;
54 pud_t *pud;
55 pmd_t *pmd;
56 pte_t *ptep, pte;
57
58 pgd = pgd_offset_k(address);
59 if (pgd_none(*pgd) || pgd_bad(*pgd))
60 return 0;
61
62 pud = pud_offset(pgd, address);
63 if (pud_none(*pud) || pud_bad(*pud))
64 return 0;
65
66 pmd = pmd_offset(pud, address);
67 if (pmd_none(*pmd) || pmd_bad(*pmd))
68 return 0;
69
70 ptep = pte_offset_kernel(pmd, address);
71 if (!ptep)
72 return 0;
73
74 pte = *ptep;
75 return pte_present(pte);
76}
77
78void
79ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
80{
81 int signal = SIGSEGV, code = SEGV_MAPERR;
82 struct vm_area_struct *vma, *prev_vma;
83 struct mm_struct *mm = current->mm;
84 struct siginfo si;
85 unsigned long mask;
86
87 /*
88 * If we're in an interrupt or have no user context, we must not take the fault..
89 */
90 if (in_atomic() || !mm)
91 goto no_context;
92
93#ifdef CONFIG_VIRTUAL_MEM_MAP
94 /*
95 * If fault is in region 5 and we are in the kernel, we may already
96 * have the mmap_sem (pfn_valid macro is called during mmap). There
97 * is no vma for region 5 addr's anyway, so skip getting the semaphore
98 * and go directly to the exception handling code.
99 */
100
101 if ((REGION_NUMBER(address) == 5) && !user_mode(regs))
102 goto bad_area_no_up;
103#endif
104
105 down_read(&mm->mmap_sem);
106
107 vma = find_vma_prev(mm, address, &prev_vma);
108 if (!vma)
109 goto bad_area;
110
111 /* find_vma_prev() returns vma such that address < vma->vm_end or NULL */
112 if (address < vma->vm_start)
113 goto check_expansion;
114
115 good_area:
116 code = SEGV_ACCERR;
117
118 /* OK, we've got a good vm_area for this memory area. Check the access permissions: */
119
120# define VM_READ_BIT 0
121# define VM_WRITE_BIT 1
122# define VM_EXEC_BIT 2
123
124# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \
125 || (1 << VM_EXEC_BIT) != VM_EXEC)
126# error File is out of sync with <linux/mm.h>. Please update.
127# endif
128
129 mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
130 | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)
131 | (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT));
132
133 if ((vma->vm_flags & mask) != mask)
134 goto bad_area;
135
136 survive:
137 /*
138 * If for any reason at all we couldn't handle the fault, make
139 * sure we exit gracefully rather than endlessly redo the
140 * fault.
141 */
142 switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) {
143 case VM_FAULT_MINOR:
144 ++current->min_flt;
145 break;
146 case VM_FAULT_MAJOR:
147 ++current->maj_flt;
148 break;
149 case VM_FAULT_SIGBUS:
150 /*
151 * We ran out of memory, or some other thing happened
152 * to us that made us unable to handle the page fault
153 * gracefully.
154 */
155 signal = SIGBUS;
156 goto bad_area;
157 case VM_FAULT_OOM:
158 goto out_of_memory;
159 default:
160 BUG();
161 }
162 up_read(&mm->mmap_sem);
163 return;
164
165 check_expansion:
166 if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
167 if (!(vma->vm_flags & VM_GROWSDOWN))
168 goto bad_area;
169 if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
170 || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
171 goto bad_area;
172 if (expand_stack(vma, address))
173 goto bad_area;
174 } else {
175 vma = prev_vma;
176 if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
177 || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
178 goto bad_area;
179 if (expand_backing_store(vma, address))
180 goto bad_area;
181 }
182 goto good_area;
183
184 bad_area:
185 up_read(&mm->mmap_sem);
186#ifdef CONFIG_VIRTUAL_MEM_MAP
187 bad_area_no_up:
188#endif
189 if ((isr & IA64_ISR_SP)
190 || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
191 {
192 /*
193 * This fault was due to a speculative load or lfetch.fault, set the "ed"
194 * bit in the psr to ensure forward progress. (Target register will get a
195 * NaT for ld.s, lfetch will be canceled.)
196 */
197 ia64_psr(regs)->ed = 1;
198 return;
199 }
200 if (user_mode(regs)) {
201 si.si_signo = signal;
202 si.si_errno = 0;
203 si.si_code = code;
204 si.si_addr = (void __user *) address;
205 si.si_isr = isr;
206 si.si_flags = __ISR_VALID;
207 force_sig_info(signal, &si, current);
208 return;
209 }
210
211 no_context:
212 if (isr & IA64_ISR_SP) {
213 /*
214 * This fault was due to a speculative load set the "ed" bit in the psr to
215 * ensure forward progress (target register will get a NaT).
216 */
217 ia64_psr(regs)->ed = 1;
218 return;
219 }
220
221 if (ia64_done_with_exception(regs))
222 return;
223
224 /*
225 * Since we have no vma's for region 5, we might get here even if the address is
226 * valid, due to the VHPT walker inserting a non present translation that becomes
227 * stale. If that happens, the non present fault handler already purged the stale
228 * translation, which fixed the problem. So, we check to see if the translation is
229 * valid, and return if it is.
230 */
231 if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
232 return;
233
234 /*
235 * Oops. The kernel tried to access some bad page. We'll have to terminate things
236 * with extreme prejudice.
237 */
238 bust_spinlocks(1);
239
240 if (address < PAGE_SIZE)
241 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address);
242 else
243 printk(KERN_ALERT "Unable to handle kernel paging request at "
244 "virtual address %016lx\n", address);
245 die("Oops", regs, isr);
246 bust_spinlocks(0);
247 do_exit(SIGKILL);
248 return;
249
250 out_of_memory:
251 up_read(&mm->mmap_sem);
252 if (current->pid == 1) {
253 yield();
254 down_read(&mm->mmap_sem);
255 goto survive;
256 }
257 printk(KERN_CRIT "VM: killing process %s\n", current->comm);
258 if (user_mode(regs))
259 do_exit(SIGKILL);
260 goto no_context;
261}
diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
new file mode 100644
index 000000000000..40ad8328ffd5
--- /dev/null
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -0,0 +1,357 @@
1/*
2 * IA-64 Huge TLB Page Support for Kernel.
3 *
4 * Copyright (C) 2002-2004 Rohit Seth <rohit.seth@intel.com>
5 * Copyright (C) 2003-2004 Ken Chen <kenneth.w.chen@intel.com>
6 *
7 * Sep, 2003: add numa support
8 * Feb, 2004: dynamic hugetlb page size via boot parameter
9 */
10
11#include <linux/config.h>
12#include <linux/init.h>
13#include <linux/fs.h>
14#include <linux/mm.h>
15#include <linux/hugetlb.h>
16#include <linux/pagemap.h>
17#include <linux/smp_lock.h>
18#include <linux/slab.h>
19#include <linux/sysctl.h>
20#include <asm/mman.h>
21#include <asm/pgalloc.h>
22#include <asm/tlb.h>
23#include <asm/tlbflush.h>
24
25unsigned int hpage_shift=HPAGE_SHIFT_DEFAULT;
26
27static pte_t *
28huge_pte_alloc (struct mm_struct *mm, unsigned long addr)
29{
30 unsigned long taddr = htlbpage_to_page(addr);
31 pgd_t *pgd;
32 pud_t *pud;
33 pmd_t *pmd;
34 pte_t *pte = NULL;
35
36 pgd = pgd_offset(mm, taddr);
37 pud = pud_alloc(mm, pgd, taddr);
38 if (pud) {
39 pmd = pmd_alloc(mm, pud, taddr);
40 if (pmd)
41 pte = pte_alloc_map(mm, pmd, taddr);
42 }
43 return pte;
44}
45
46static pte_t *
47huge_pte_offset (struct mm_struct *mm, unsigned long addr)
48{
49 unsigned long taddr = htlbpage_to_page(addr);
50 pgd_t *pgd;
51 pud_t *pud;
52 pmd_t *pmd;
53 pte_t *pte = NULL;
54
55 pgd = pgd_offset(mm, taddr);
56 if (pgd_present(*pgd)) {
57 pud = pud_offset(pgd, taddr);
58 if (pud_present(*pud)) {
59 pmd = pmd_offset(pud, taddr);
60 if (pmd_present(*pmd))
61 pte = pte_offset_map(pmd, taddr);
62 }
63 }
64
65 return pte;
66}
67
68#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; }
69
70static void
71set_huge_pte (struct mm_struct *mm, struct vm_area_struct *vma,
72 struct page *page, pte_t * page_table, int write_access)
73{
74 pte_t entry;
75
76 add_mm_counter(mm, rss, HPAGE_SIZE / PAGE_SIZE);
77 if (write_access) {
78 entry =
79 pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
80 } else
81 entry = pte_wrprotect(mk_pte(page, vma->vm_page_prot));
82 entry = pte_mkyoung(entry);
83 mk_pte_huge(entry);
84 set_pte(page_table, entry);
85 return;
86}
87/*
88 * This function checks for proper alignment of input addr and len parameters.
89 */
90int is_aligned_hugepage_range(unsigned long addr, unsigned long len)
91{
92 if (len & ~HPAGE_MASK)
93 return -EINVAL;
94 if (addr & ~HPAGE_MASK)
95 return -EINVAL;
96 if (REGION_NUMBER(addr) != REGION_HPAGE)
97 return -EINVAL;
98
99 return 0;
100}
101
102int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
103 struct vm_area_struct *vma)
104{
105 pte_t *src_pte, *dst_pte, entry;
106 struct page *ptepage;
107 unsigned long addr = vma->vm_start;
108 unsigned long end = vma->vm_end;
109
110 while (addr < end) {
111 dst_pte = huge_pte_alloc(dst, addr);
112 if (!dst_pte)
113 goto nomem;
114 src_pte = huge_pte_offset(src, addr);
115 entry = *src_pte;
116 ptepage = pte_page(entry);
117 get_page(ptepage);
118 set_pte(dst_pte, entry);
119 add_mm_counter(dst, rss, HPAGE_SIZE / PAGE_SIZE);
120 addr += HPAGE_SIZE;
121 }
122 return 0;
123nomem:
124 return -ENOMEM;
125}
126
127int
128follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
129 struct page **pages, struct vm_area_struct **vmas,
130 unsigned long *st, int *length, int i)
131{
132 pte_t *ptep, pte;
133 unsigned long start = *st;
134 unsigned long pstart;
135 int len = *length;
136 struct page *page;
137
138 do {
139 pstart = start & HPAGE_MASK;
140 ptep = huge_pte_offset(mm, start);
141 pte = *ptep;
142
143back1:
144 page = pte_page(pte);
145 if (pages) {
146 page += ((start & ~HPAGE_MASK) >> PAGE_SHIFT);
147 get_page(page);
148 pages[i] = page;
149 }
150 if (vmas)
151 vmas[i] = vma;
152 i++;
153 len--;
154 start += PAGE_SIZE;
155 if (((start & HPAGE_MASK) == pstart) && len &&
156 (start < vma->vm_end))
157 goto back1;
158 } while (len && start < vma->vm_end);
159 *length = len;
160 *st = start;
161 return i;
162}
163
164struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
165{
166 struct page *page;
167 pte_t *ptep;
168
169 if (REGION_NUMBER(addr) != REGION_HPAGE)
170 return ERR_PTR(-EINVAL);
171
172 ptep = huge_pte_offset(mm, addr);
173 if (!ptep || pte_none(*ptep))
174 return NULL;
175 page = pte_page(*ptep);
176 page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT);
177 return page;
178}
179int pmd_huge(pmd_t pmd)
180{
181 return 0;
182}
183struct page *
184follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
185{
186 return NULL;
187}
188
189/*
190 * Same as generic free_pgtables(), except constant PGDIR_* and pgd_offset
191 * are hugetlb region specific.
192 */
193void hugetlb_free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *prev,
194 unsigned long start, unsigned long end)
195{
196 unsigned long first = start & HUGETLB_PGDIR_MASK;
197 unsigned long last = end + HUGETLB_PGDIR_SIZE - 1;
198 struct mm_struct *mm = tlb->mm;
199
200 if (!prev) {
201 prev = mm->mmap;
202 if (!prev)
203 goto no_mmaps;
204 if (prev->vm_end > start) {
205 if (last > prev->vm_start)
206 last = prev->vm_start;
207 goto no_mmaps;
208 }
209 }
210 for (;;) {
211 struct vm_area_struct *next = prev->vm_next;
212
213 if (next) {
214 if (next->vm_start < start) {
215 prev = next;
216 continue;
217 }
218 if (last > next->vm_start)
219 last = next->vm_start;
220 }
221 if (prev->vm_end > first)
222 first = prev->vm_end;
223 break;
224 }
225no_mmaps:
226 if (last < first) /* for arches with discontiguous pgd indices */
227 return;
228 clear_page_range(tlb, first, last);
229}
230
231void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
232{
233 struct mm_struct *mm = vma->vm_mm;
234 unsigned long address;
235 pte_t *pte;
236 struct page *page;
237
238 BUG_ON(start & (HPAGE_SIZE - 1));
239 BUG_ON(end & (HPAGE_SIZE - 1));
240
241 for (address = start; address < end; address += HPAGE_SIZE) {
242 pte = huge_pte_offset(mm, address);
243 if (pte_none(*pte))
244 continue;
245 page = pte_page(*pte);
246 put_page(page);
247 pte_clear(mm, address, pte);
248 }
249 add_mm_counter(mm, rss, - ((end - start) >> PAGE_SHIFT));
250 flush_tlb_range(vma, start, end);
251}
252
253int hugetlb_prefault(struct address_space *mapping, struct vm_area_struct *vma)
254{
255 struct mm_struct *mm = current->mm;
256 unsigned long addr;
257 int ret = 0;
258
259 BUG_ON(vma->vm_start & ~HPAGE_MASK);
260 BUG_ON(vma->vm_end & ~HPAGE_MASK);
261
262 spin_lock(&mm->page_table_lock);
263 for (addr = vma->vm_start; addr < vma->vm_end; addr += HPAGE_SIZE) {
264 unsigned long idx;
265 pte_t *pte = huge_pte_alloc(mm, addr);
266 struct page *page;
267
268 if (!pte) {
269 ret = -ENOMEM;
270 goto out;
271 }
272 if (!pte_none(*pte))
273 continue;
274
275 idx = ((addr - vma->vm_start) >> HPAGE_SHIFT)
276 + (vma->vm_pgoff >> (HPAGE_SHIFT - PAGE_SHIFT));
277 page = find_get_page(mapping, idx);
278 if (!page) {
279 /* charge the fs quota first */
280 if (hugetlb_get_quota(mapping)) {
281 ret = -ENOMEM;
282 goto out;
283 }
284 page = alloc_huge_page();
285 if (!page) {
286 hugetlb_put_quota(mapping);
287 ret = -ENOMEM;
288 goto out;
289 }
290 ret = add_to_page_cache(page, mapping, idx, GFP_ATOMIC);
291 if (! ret) {
292 unlock_page(page);
293 } else {
294 hugetlb_put_quota(mapping);
295 page_cache_release(page);
296 goto out;
297 }
298 }
299 set_huge_pte(mm, vma, page, pte, vma->vm_flags & VM_WRITE);
300 }
301out:
302 spin_unlock(&mm->page_table_lock);
303 return ret;
304}
305
306unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
307 unsigned long pgoff, unsigned long flags)
308{
309 struct vm_area_struct *vmm;
310
311 if (len > RGN_MAP_LIMIT)
312 return -ENOMEM;
313 if (len & ~HPAGE_MASK)
314 return -EINVAL;
315 /* This code assumes that REGION_HPAGE != 0. */
316 if ((REGION_NUMBER(addr) != REGION_HPAGE) || (addr & (HPAGE_SIZE - 1)))
317 addr = HPAGE_REGION_BASE;
318 else
319 addr = ALIGN(addr, HPAGE_SIZE);
320 for (vmm = find_vma(current->mm, addr); ; vmm = vmm->vm_next) {
321 /* At this point: (!vmm || addr < vmm->vm_end). */
322 if (REGION_OFFSET(addr) + len > RGN_MAP_LIMIT)
323 return -ENOMEM;
324 if (!vmm || (addr + len) <= vmm->vm_start)
325 return addr;
326 addr = ALIGN(vmm->vm_end, HPAGE_SIZE);
327 }
328}
329
330static int __init hugetlb_setup_sz(char *str)
331{
332 u64 tr_pages;
333 unsigned long long size;
334
335 if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0)
336 /*
337 * shouldn't happen, but just in case.
338 */
339 tr_pages = 0x15557000UL;
340
341 size = memparse(str, &str);
342 if (*str || (size & (size-1)) || !(tr_pages & size) ||
343 size <= PAGE_SIZE ||
344 size >= (1UL << PAGE_SHIFT << MAX_ORDER)) {
345 printk(KERN_WARNING "Invalid huge page size specified\n");
346 return 1;
347 }
348
349 hpage_shift = __ffs(size);
350 /*
351 * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT
352 * override here with new page shift.
353 */
354 ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2);
355 return 1;
356}
357__setup("hugepagesz=", hugetlb_setup_sz);
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
new file mode 100644
index 000000000000..65cf839573ea
--- /dev/null
+++ b/arch/ia64/mm/init.c
@@ -0,0 +1,597 @@
1/*
2 * Initialize MMU support.
3 *
4 * Copyright (C) 1998-2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 */
7#include <linux/config.h>
8#include <linux/kernel.h>
9#include <linux/init.h>
10
11#include <linux/bootmem.h>
12#include <linux/efi.h>
13#include <linux/elf.h>
14#include <linux/mm.h>
15#include <linux/mmzone.h>
16#include <linux/module.h>
17#include <linux/personality.h>
18#include <linux/reboot.h>
19#include <linux/slab.h>
20#include <linux/swap.h>
21#include <linux/proc_fs.h>
22#include <linux/bitops.h>
23
24#include <asm/a.out.h>
25#include <asm/dma.h>
26#include <asm/ia32.h>
27#include <asm/io.h>
28#include <asm/machvec.h>
29#include <asm/numa.h>
30#include <asm/patch.h>
31#include <asm/pgalloc.h>
32#include <asm/sal.h>
33#include <asm/sections.h>
34#include <asm/system.h>
35#include <asm/tlb.h>
36#include <asm/uaccess.h>
37#include <asm/unistd.h>
38#include <asm/mca.h>
39
40DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
41
42extern void ia64_tlb_init (void);
43
44unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
45
46#ifdef CONFIG_VIRTUAL_MEM_MAP
47unsigned long vmalloc_end = VMALLOC_END_INIT;
48EXPORT_SYMBOL(vmalloc_end);
49struct page *vmem_map;
50EXPORT_SYMBOL(vmem_map);
51#endif
52
53static int pgt_cache_water[2] = { 25, 50 };
54
55struct page *zero_page_memmap_ptr; /* map entry for zero page */
56EXPORT_SYMBOL(zero_page_memmap_ptr);
57
58void
59check_pgt_cache (void)
60{
61 int low, high;
62
63 low = pgt_cache_water[0];
64 high = pgt_cache_water[1];
65
66 preempt_disable();
67 if (pgtable_cache_size > (u64) high) {
68 do {
69 if (pgd_quicklist)
70 free_page((unsigned long)pgd_alloc_one_fast(NULL));
71 if (pmd_quicklist)
72 free_page((unsigned long)pmd_alloc_one_fast(NULL, 0));
73 } while (pgtable_cache_size > (u64) low);
74 }
75 preempt_enable();
76}
77
78void
79lazy_mmu_prot_update (pte_t pte)
80{
81 unsigned long addr;
82 struct page *page;
83
84 if (!pte_exec(pte))
85 return; /* not an executable page... */
86
87 page = pte_page(pte);
88 addr = (unsigned long) page_address(page);
89
90 if (test_bit(PG_arch_1, &page->flags))
91 return; /* i-cache is already coherent with d-cache */
92
93 flush_icache_range(addr, addr + PAGE_SIZE);
94 set_bit(PG_arch_1, &page->flags); /* mark page as clean */
95}
96
97inline void
98ia64_set_rbs_bot (void)
99{
100 unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16;
101
102 if (stack_size > MAX_USER_STACK_SIZE)
103 stack_size = MAX_USER_STACK_SIZE;
104 current->thread.rbs_bot = STACK_TOP - stack_size;
105}
106
107/*
108 * This performs some platform-dependent address space initialization.
109 * On IA-64, we want to setup the VM area for the register backing
110 * store (which grows upwards) and install the gateway page which is
111 * used for signal trampolines, etc.
112 */
113void
114ia64_init_addr_space (void)
115{
116 struct vm_area_struct *vma;
117
118 ia64_set_rbs_bot();
119
120 /*
121 * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore
122 * the problem. When the process attempts to write to the register backing store
123 * for the first time, it will get a SEGFAULT in this case.
124 */
125 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
126 if (vma) {
127 memset(vma, 0, sizeof(*vma));
128 vma->vm_mm = current->mm;
129 vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
130 vma->vm_end = vma->vm_start + PAGE_SIZE;
131 vma->vm_page_prot = protection_map[VM_DATA_DEFAULT_FLAGS & 0x7];
132 vma->vm_flags = VM_DATA_DEFAULT_FLAGS | VM_GROWSUP;
133 down_write(&current->mm->mmap_sem);
134 if (insert_vm_struct(current->mm, vma)) {
135 up_write(&current->mm->mmap_sem);
136 kmem_cache_free(vm_area_cachep, vma);
137 return;
138 }
139 up_write(&current->mm->mmap_sem);
140 }
141
142 /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */
143 if (!(current->personality & MMAP_PAGE_ZERO)) {
144 vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
145 if (vma) {
146 memset(vma, 0, sizeof(*vma));
147 vma->vm_mm = current->mm;
148 vma->vm_end = PAGE_SIZE;
149 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
150 vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
151 down_write(&current->mm->mmap_sem);
152 if (insert_vm_struct(current->mm, vma)) {
153 up_write(&current->mm->mmap_sem);
154 kmem_cache_free(vm_area_cachep, vma);
155 return;
156 }
157 up_write(&current->mm->mmap_sem);
158 }
159 }
160}
161
162void
163free_initmem (void)
164{
165 unsigned long addr, eaddr;
166
167 addr = (unsigned long) ia64_imva(__init_begin);
168 eaddr = (unsigned long) ia64_imva(__init_end);
169 while (addr < eaddr) {
170 ClearPageReserved(virt_to_page(addr));
171 set_page_count(virt_to_page(addr), 1);
172 free_page(addr);
173 ++totalram_pages;
174 addr += PAGE_SIZE;
175 }
176 printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n",
177 (__init_end - __init_begin) >> 10);
178}
179
180void
181free_initrd_mem (unsigned long start, unsigned long end)
182{
183 struct page *page;
184 /*
185 * EFI uses 4KB pages while the kernel can use 4KB or bigger.
186 * Thus EFI and the kernel may have different page sizes. It is
187 * therefore possible to have the initrd share the same page as
188 * the end of the kernel (given current setup).
189 *
190 * To avoid freeing/using the wrong page (kernel sized) we:
191 * - align up the beginning of initrd
192 * - align down the end of initrd
193 *
194 * | |
195 * |=============| a000
196 * | |
197 * | |
198 * | | 9000
199 * |/////////////|
200 * |/////////////|
201 * |=============| 8000
202 * |///INITRD////|
203 * |/////////////|
204 * |/////////////| 7000
205 * | |
206 * |KKKKKKKKKKKKK|
207 * |=============| 6000
208 * |KKKKKKKKKKKKK|
209 * |KKKKKKKKKKKKK|
210 * K=kernel using 8KB pages
211 *
212 * In this example, we must free page 8000 ONLY. So we must align up
213 * initrd_start and keep initrd_end as is.
214 */
215 start = PAGE_ALIGN(start);
216 end = end & PAGE_MASK;
217
218 if (start < end)
219 printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10);
220
221 for (; start < end; start += PAGE_SIZE) {
222 if (!virt_addr_valid(start))
223 continue;
224 page = virt_to_page(start);
225 ClearPageReserved(page);
226 set_page_count(page, 1);
227 free_page(start);
228 ++totalram_pages;
229 }
230}
231
232/*
233 * This installs a clean page in the kernel's page table.
234 */
235struct page *
236put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot)
237{
238 pgd_t *pgd;
239 pud_t *pud;
240 pmd_t *pmd;
241 pte_t *pte;
242
243 if (!PageReserved(page))
244 printk(KERN_ERR "put_kernel_page: page at 0x%p not in reserved memory\n",
245 page_address(page));
246
247 pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */
248
249 spin_lock(&init_mm.page_table_lock);
250 {
251 pud = pud_alloc(&init_mm, pgd, address);
252 if (!pud)
253 goto out;
254
255 pmd = pmd_alloc(&init_mm, pud, address);
256 if (!pmd)
257 goto out;
258 pte = pte_alloc_map(&init_mm, pmd, address);
259 if (!pte)
260 goto out;
261 if (!pte_none(*pte)) {
262 pte_unmap(pte);
263 goto out;
264 }
265 set_pte(pte, mk_pte(page, pgprot));
266 pte_unmap(pte);
267 }
268 out: spin_unlock(&init_mm.page_table_lock);
269 /* no need for flush_tlb */
270 return page;
271}
272
273static void
274setup_gate (void)
275{
276 struct page *page;
277
278 /*
279 * Map the gate page twice: once read-only to export the ELF headers etc. and once
280 * execute-only page to enable privilege-promotion via "epc":
281 */
282 page = virt_to_page(ia64_imva(__start_gate_section));
283 put_kernel_page(page, GATE_ADDR, PAGE_READONLY);
284#ifdef HAVE_BUGGY_SEGREL
285 page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE));
286 put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE);
287#else
288 put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE);
289#endif
290 ia64_patch_gate();
291}
292
293void __devinit
294ia64_mmu_init (void *my_cpu_data)
295{
296 unsigned long psr, pta, impl_va_bits;
297 extern void __devinit tlb_init (void);
298
299#ifdef CONFIG_DISABLE_VHPT
300# define VHPT_ENABLE_BIT 0
301#else
302# define VHPT_ENABLE_BIT 1
303#endif
304
305 /* Pin mapping for percpu area into TLB */
306 psr = ia64_clear_ic();
307 ia64_itr(0x2, IA64_TR_PERCPU_DATA, PERCPU_ADDR,
308 pte_val(pfn_pte(__pa(my_cpu_data) >> PAGE_SHIFT, PAGE_KERNEL)),
309 PERCPU_PAGE_SHIFT);
310
311 ia64_set_psr(psr);
312 ia64_srlz_i();
313
314 /*
315 * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped
316 * address space. The IA-64 architecture guarantees that at least 50 bits of
317 * virtual address space are implemented but if we pick a large enough page size
318 * (e.g., 64KB), the mapped address space is big enough that it will overlap with
319 * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages,
320 * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a
321 * problem in practice. Alternatively, we could truncate the top of the mapped
322 * address space to not permit mappings that would overlap with the VMLPT.
323 * --davidm 00/12/06
324 */
325# define pte_bits 3
326# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT)
327 /*
328 * The virtual page table has to cover the entire implemented address space within
329 * a region even though not all of this space may be mappable. The reason for
330 * this is that the Access bit and Dirty bit fault handlers perform
331 * non-speculative accesses to the virtual page table, so the address range of the
332 * virtual page table itself needs to be covered by virtual page table.
333 */
334# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits)
335# define POW2(n) (1ULL << (n))
336
337 impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61)));
338
339 if (impl_va_bits < 51 || impl_va_bits > 61)
340 panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1);
341
342 /* place the VMLPT at the end of each page-table mapped region: */
343 pta = POW2(61) - POW2(vmlpt_bits);
344
345 if (POW2(mapped_space_bits) >= pta)
346 panic("mm/init: overlap between virtually mapped linear page table and "
347 "mapped kernel space!");
348 /*
349 * Set the (virtually mapped linear) page table address. Bit
350 * 8 selects between the short and long format, bits 2-7 the
351 * size of the table, and bit 0 whether the VHPT walker is
352 * enabled.
353 */
354 ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT);
355
356 ia64_tlb_init();
357
358#ifdef CONFIG_HUGETLB_PAGE
359 ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2);
360 ia64_srlz_d();
361#endif
362}
363
364#ifdef CONFIG_VIRTUAL_MEM_MAP
365
366int
367create_mem_map_page_table (u64 start, u64 end, void *arg)
368{
369 unsigned long address, start_page, end_page;
370 struct page *map_start, *map_end;
371 int node;
372 pgd_t *pgd;
373 pud_t *pud;
374 pmd_t *pmd;
375 pte_t *pte;
376
377 map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
378 map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
379
380 start_page = (unsigned long) map_start & PAGE_MASK;
381 end_page = PAGE_ALIGN((unsigned long) map_end);
382 node = paddr_to_nid(__pa(start));
383
384 for (address = start_page; address < end_page; address += PAGE_SIZE) {
385 pgd = pgd_offset_k(address);
386 if (pgd_none(*pgd))
387 pgd_populate(&init_mm, pgd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
388 pud = pud_offset(pgd, address);
389
390 if (pud_none(*pud))
391 pud_populate(&init_mm, pud, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
392 pmd = pmd_offset(pud, address);
393
394 if (pmd_none(*pmd))
395 pmd_populate_kernel(&init_mm, pmd, alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE));
396 pte = pte_offset_kernel(pmd, address);
397
398 if (pte_none(*pte))
399 set_pte(pte, pfn_pte(__pa(alloc_bootmem_pages_node(NODE_DATA(node), PAGE_SIZE)) >> PAGE_SHIFT,
400 PAGE_KERNEL));
401 }
402 return 0;
403}
404
405struct memmap_init_callback_data {
406 struct page *start;
407 struct page *end;
408 int nid;
409 unsigned long zone;
410};
411
412static int
413virtual_memmap_init (u64 start, u64 end, void *arg)
414{
415 struct memmap_init_callback_data *args;
416 struct page *map_start, *map_end;
417
418 args = (struct memmap_init_callback_data *) arg;
419 map_start = vmem_map + (__pa(start) >> PAGE_SHIFT);
420 map_end = vmem_map + (__pa(end) >> PAGE_SHIFT);
421
422 if (map_start < args->start)
423 map_start = args->start;
424 if (map_end > args->end)
425 map_end = args->end;
426
427 /*
428 * We have to initialize "out of bounds" struct page elements that fit completely
429 * on the same pages that were allocated for the "in bounds" elements because they
430 * may be referenced later (and found to be "reserved").
431 */
432 map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1)) / sizeof(struct page);
433 map_end += ((PAGE_ALIGN((unsigned long) map_end) - (unsigned long) map_end)
434 / sizeof(struct page));
435
436 if (map_start < map_end)
437 memmap_init_zone((unsigned long)(map_end - map_start),
438 args->nid, args->zone, page_to_pfn(map_start));
439 return 0;
440}
441
442void
443memmap_init (unsigned long size, int nid, unsigned long zone,
444 unsigned long start_pfn)
445{
446 if (!vmem_map)
447 memmap_init_zone(size, nid, zone, start_pfn);
448 else {
449 struct page *start;
450 struct memmap_init_callback_data args;
451
452 start = pfn_to_page(start_pfn);
453 args.start = start;
454 args.end = start + size;
455 args.nid = nid;
456 args.zone = zone;
457
458 efi_memmap_walk(virtual_memmap_init, &args);
459 }
460}
461
462int
463ia64_pfn_valid (unsigned long pfn)
464{
465 char byte;
466 struct page *pg = pfn_to_page(pfn);
467
468 return (__get_user(byte, (char __user *) pg) == 0)
469 && ((((u64)pg & PAGE_MASK) == (((u64)(pg + 1) - 1) & PAGE_MASK))
470 || (__get_user(byte, (char __user *) (pg + 1) - 1) == 0));
471}
472EXPORT_SYMBOL(ia64_pfn_valid);
473
474int
475find_largest_hole (u64 start, u64 end, void *arg)
476{
477 u64 *max_gap = arg;
478
479 static u64 last_end = PAGE_OFFSET;
480
481 /* NOTE: this algorithm assumes efi memmap table is ordered */
482
483 if (*max_gap < (start - last_end))
484 *max_gap = start - last_end;
485 last_end = end;
486 return 0;
487}
488#endif /* CONFIG_VIRTUAL_MEM_MAP */
489
490static int
491count_reserved_pages (u64 start, u64 end, void *arg)
492{
493 unsigned long num_reserved = 0;
494 unsigned long *count = arg;
495
496 for (; start < end; start += PAGE_SIZE)
497 if (PageReserved(virt_to_page(start)))
498 ++num_reserved;
499 *count += num_reserved;
500 return 0;
501}
502
503/*
504 * Boot command-line option "nolwsys" can be used to disable the use of any light-weight
505 * system call handler. When this option is in effect, all fsyscalls will end up bubbling
506 * down into the kernel and calling the normal (heavy-weight) syscall handler. This is
507 * useful for performance testing, but conceivably could also come in handy for debugging
508 * purposes.
509 */
510
511static int nolwsys;
512
513static int __init
514nolwsys_setup (char *s)
515{
516 nolwsys = 1;
517 return 1;
518}
519
520__setup("nolwsys", nolwsys_setup);
521
522void
523mem_init (void)
524{
525 long reserved_pages, codesize, datasize, initsize;
526 unsigned long num_pgt_pages;
527 pg_data_t *pgdat;
528 int i;
529 static struct kcore_list kcore_mem, kcore_vmem, kcore_kernel;
530
531#ifdef CONFIG_PCI
532 /*
533 * This needs to be called _after_ the command line has been parsed but _before_
534 * any drivers that may need the PCI DMA interface are initialized or bootmem has
535 * been freed.
536 */
537 platform_dma_init();
538#endif
539
540#ifndef CONFIG_DISCONTIGMEM
541 if (!mem_map)
542 BUG();
543 max_mapnr = max_low_pfn;
544#endif
545
546 high_memory = __va(max_low_pfn * PAGE_SIZE);
547
548 kclist_add(&kcore_mem, __va(0), max_low_pfn * PAGE_SIZE);
549 kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START);
550 kclist_add(&kcore_kernel, _stext, _end - _stext);
551
552 for_each_pgdat(pgdat)
553 totalram_pages += free_all_bootmem_node(pgdat);
554
555 reserved_pages = 0;
556 efi_memmap_walk(count_reserved_pages, &reserved_pages);
557
558 codesize = (unsigned long) _etext - (unsigned long) _stext;
559 datasize = (unsigned long) _edata - (unsigned long) _etext;
560 initsize = (unsigned long) __init_end - (unsigned long) __init_begin;
561
562 printk(KERN_INFO "Memory: %luk/%luk available (%luk code, %luk reserved, "
563 "%luk data, %luk init)\n", (unsigned long) nr_free_pages() << (PAGE_SHIFT - 10),
564 num_physpages << (PAGE_SHIFT - 10), codesize >> 10,
565 reserved_pages << (PAGE_SHIFT - 10), datasize >> 10, initsize >> 10);
566
567 /*
568 * Allow for enough (cached) page table pages so that we can map the entire memory
569 * at least once. Each task also needs a couple of page tables pages, so add in a
570 * fudge factor for that (don't use "threads-max" here; that would be wrong!).
571 * Don't allow the cache to be more than 10% of total memory, though.
572 */
573# define NUM_TASKS 500 /* typical number of tasks */
574 num_pgt_pages = nr_free_pages() / PTRS_PER_PGD + NUM_TASKS;
575 if (num_pgt_pages > nr_free_pages() / 10)
576 num_pgt_pages = nr_free_pages() / 10;
577 if (num_pgt_pages > (u64) pgt_cache_water[1])
578 pgt_cache_water[1] = num_pgt_pages;
579
580 /*
581 * For fsyscall entrpoints with no light-weight handler, use the ordinary
582 * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry
583 * code can tell them apart.
584 */
585 for (i = 0; i < NR_syscalls; ++i) {
586 extern unsigned long fsyscall_table[NR_syscalls];
587 extern unsigned long sys_call_table[NR_syscalls];
588
589 if (!fsyscall_table[i] || nolwsys)
590 fsyscall_table[i] = sys_call_table[i] | 1;
591 }
592 setup_gate();
593
594#ifdef CONFIG_IA32_SUPPORT
595 ia32_mem_init();
596#endif
597}
diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c
new file mode 100644
index 000000000000..77118bbf3d8b
--- /dev/null
+++ b/arch/ia64/mm/numa.c
@@ -0,0 +1,49 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * This file contains NUMA specific variables and functions which can
7 * be split away from DISCONTIGMEM and are used on NUMA machines with
8 * contiguous memory.
9 *
10 * 2002/08/07 Erich Focht <efocht@ess.nec.de>
11 */
12
13#include <linux/config.h>
14#include <linux/cpu.h>
15#include <linux/kernel.h>
16#include <linux/mm.h>
17#include <linux/node.h>
18#include <linux/init.h>
19#include <linux/bootmem.h>
20#include <asm/mmzone.h>
21#include <asm/numa.h>
22
23
24/*
25 * The following structures are usually initialized by ACPI or
26 * similar mechanisms and describe the NUMA characteristics of the machine.
27 */
28int num_node_memblks;
29struct node_memblk_s node_memblk[NR_NODE_MEMBLKS];
30struct node_cpuid_s node_cpuid[NR_CPUS];
31/*
32 * This is a matrix with "distances" between nodes, they should be
33 * proportional to the memory access latency ratios.
34 */
35u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES];
36
37/* Identify which cnode a physical address resides on */
38int
39paddr_to_nid(unsigned long paddr)
40{
41 int i;
42
43 for (i = 0; i < num_node_memblks; i++)
44 if (paddr >= node_memblk[i].start_paddr &&
45 paddr < node_memblk[i].start_paddr + node_memblk[i].size)
46 break;
47
48 return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0);
49}
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
new file mode 100644
index 000000000000..464557e4ed82
--- /dev/null
+++ b/arch/ia64/mm/tlb.c
@@ -0,0 +1,190 @@
1/*
2 * TLB support routines.
3 *
4 * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
5 * David Mosberger-Tang <davidm@hpl.hp.com>
6 *
7 * 08/02/00 A. Mallick <asit.k.mallick@intel.com>
8 * Modified RID allocation for SMP
9 * Goutham Rao <goutham.rao@intel.com>
10 * IPI based ptc implementation and A-step IPI implementation.
11 */
12#include <linux/config.h>
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/kernel.h>
16#include <linux/sched.h>
17#include <linux/smp.h>
18#include <linux/mm.h>
19
20#include <asm/delay.h>
21#include <asm/mmu_context.h>
22#include <asm/pgalloc.h>
23#include <asm/pal.h>
24#include <asm/tlbflush.h>
25
26static struct {
27 unsigned long mask; /* mask of supported purge page-sizes */
28 unsigned long max_bits; /* log2() of largest supported purge page-size */
29} purge;
30
31struct ia64_ctx ia64_ctx = {
32 .lock = SPIN_LOCK_UNLOCKED,
33 .next = 1,
34 .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */
35 .max_ctx = ~0U
36};
37
38DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
39
40/*
41 * Acquire the ia64_ctx.lock before calling this function!
42 */
43void
44wrap_mmu_context (struct mm_struct *mm)
45{
46 unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx;
47 struct task_struct *tsk;
48 int i;
49
50 if (ia64_ctx.next > max_ctx)
51 ia64_ctx.next = 300; /* skip daemons */
52 ia64_ctx.limit = max_ctx + 1;
53
54 /*
55 * Scan all the task's mm->context and set proper safe range
56 */
57
58 read_lock(&tasklist_lock);
59 repeat:
60 for_each_process(tsk) {
61 if (!tsk->mm)
62 continue;
63 tsk_context = tsk->mm->context;
64 if (tsk_context == ia64_ctx.next) {
65 if (++ia64_ctx.next >= ia64_ctx.limit) {
66 /* empty range: reset the range limit and start over */
67 if (ia64_ctx.next > max_ctx)
68 ia64_ctx.next = 300;
69 ia64_ctx.limit = max_ctx + 1;
70 goto repeat;
71 }
72 }
73 if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit))
74 ia64_ctx.limit = tsk_context;
75 }
76 read_unlock(&tasklist_lock);
77 /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */
78 {
79 int cpu = get_cpu(); /* prevent preemption/migration */
80 for (i = 0; i < NR_CPUS; ++i)
81 if (cpu_online(i) && (i != cpu))
82 per_cpu(ia64_need_tlb_flush, i) = 1;
83 put_cpu();
84 }
85 local_flush_tlb_all();
86}
87
88void
89ia64_global_tlb_purge (unsigned long start, unsigned long end, unsigned long nbits)
90{
91 static DEFINE_SPINLOCK(ptcg_lock);
92
93 /* HW requires global serialization of ptc.ga. */
94 spin_lock(&ptcg_lock);
95 {
96 do {
97 /*
98 * Flush ALAT entries also.
99 */
100 ia64_ptcga(start, (nbits<<2));
101 ia64_srlz_i();
102 start += (1UL << nbits);
103 } while (start < end);
104 }
105 spin_unlock(&ptcg_lock);
106}
107
108void
109local_flush_tlb_all (void)
110{
111 unsigned long i, j, flags, count0, count1, stride0, stride1, addr;
112
113 addr = local_cpu_data->ptce_base;
114 count0 = local_cpu_data->ptce_count[0];
115 count1 = local_cpu_data->ptce_count[1];
116 stride0 = local_cpu_data->ptce_stride[0];
117 stride1 = local_cpu_data->ptce_stride[1];
118
119 local_irq_save(flags);
120 for (i = 0; i < count0; ++i) {
121 for (j = 0; j < count1; ++j) {
122 ia64_ptce(addr);
123 addr += stride1;
124 }
125 addr += stride0;
126 }
127 local_irq_restore(flags);
128 ia64_srlz_i(); /* srlz.i implies srlz.d */
129}
130
131void
132flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end)
133{
134 struct mm_struct *mm = vma->vm_mm;
135 unsigned long size = end - start;
136 unsigned long nbits;
137
138 if (mm != current->active_mm) {
139 /* this does happen, but perhaps it's not worth optimizing for? */
140#ifdef CONFIG_SMP
141 flush_tlb_all();
142#else
143 mm->context = 0;
144#endif
145 return;
146 }
147
148 nbits = ia64_fls(size + 0xfff);
149 while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits))
150 ++nbits;
151 if (nbits > purge.max_bits)
152 nbits = purge.max_bits;
153 start &= ~((1UL << nbits) - 1);
154
155# ifdef CONFIG_SMP
156 platform_global_tlb_purge(start, end, nbits);
157# else
158 do {
159 ia64_ptcl(start, (nbits<<2));
160 start += (1UL << nbits);
161 } while (start < end);
162# endif
163
164 ia64_srlz_i(); /* srlz.i implies srlz.d */
165}
166EXPORT_SYMBOL(flush_tlb_range);
167
168void __devinit
169ia64_tlb_init (void)
170{
171 ia64_ptce_info_t ptce_info;
172 unsigned long tr_pgbits;
173 long status;
174
175 if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) {
176 printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld;"
177 "defaulting to architected purge page-sizes.\n", status);
178 purge.mask = 0x115557000UL;
179 }
180 purge.max_bits = ia64_fls(purge.mask);
181
182 ia64_get_ptce(&ptce_info);
183 local_cpu_data->ptce_base = ptce_info.base;
184 local_cpu_data->ptce_count[0] = ptce_info.count[0];
185 local_cpu_data->ptce_count[1] = ptce_info.count[1];
186 local_cpu_data->ptce_stride[0] = ptce_info.stride[0];
187 local_cpu_data->ptce_stride[1] = ptce_info.stride[1];
188
189 local_flush_tlb_all(); /* nuke left overs from bootstrapping... */
190}
diff --git a/arch/ia64/module.lds b/arch/ia64/module.lds
new file mode 100644
index 000000000000..6481f42fbd8c
--- /dev/null
+++ b/arch/ia64/module.lds
@@ -0,0 +1,13 @@
1SECTIONS {
2 /* Group unwind sections into a single section: */
3 .IA_64.unwind_info : { *(.IA_64.unwind_info*) }
4 .IA_64.unwind : { *(.IA_64.unwind*) }
5 /*
6 * Create place-holder sections to hold the PLTs, GOT, and
7 * official procedure-descriptors (.opd).
8 */
9 .core.plt : { BYTE(0) }
10 .init.plt : { BYTE(0) }
11 .got : { BYTE(0) }
12 .opd : { BYTE(0) }
13}
diff --git a/arch/ia64/oprofile/Kconfig b/arch/ia64/oprofile/Kconfig
new file mode 100644
index 000000000000..56e6f614b04a
--- /dev/null
+++ b/arch/ia64/oprofile/Kconfig
@@ -0,0 +1,26 @@
1
2menu "Profiling support"
3 depends on EXPERIMENTAL
4
5config PROFILING
6 bool "Profiling support (EXPERIMENTAL)"
7 help
8 Say Y here to enable the extended profiling support mechanisms used
9 by profilers such as OProfile.
10
11config OPROFILE
12 tristate "OProfile system profiling (EXPERIMENTAL)"
13 depends on PROFILING
14 help
15 OProfile is a profiling system capable of profiling the
16 whole system, include the kernel, kernel modules, libraries,
17 and applications.
18
19 Due to firmware bugs, you may need to use the "nohalt" boot
20 option if you're using OProfile with the hardware performance
21 counters.
22
23 If unsure, say N.
24
25endmenu
26
diff --git a/arch/ia64/oprofile/Makefile b/arch/ia64/oprofile/Makefile
new file mode 100644
index 000000000000..aad27a718ee0
--- /dev/null
+++ b/arch/ia64/oprofile/Makefile
@@ -0,0 +1,10 @@
1obj-$(CONFIG_OPROFILE) += oprofile.o
2
3DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
4 oprof.o cpu_buffer.o buffer_sync.o \
5 event_buffer.o oprofile_files.o \
6 oprofilefs.o oprofile_stats.o \
7 timer_int.o )
8
9oprofile-y := $(DRIVER_OBJS) init.o backtrace.o
10oprofile-$(CONFIG_PERFMON) += perfmon.o
diff --git a/arch/ia64/oprofile/backtrace.c b/arch/ia64/oprofile/backtrace.c
new file mode 100644
index 000000000000..b7dabbfb0d61
--- /dev/null
+++ b/arch/ia64/oprofile/backtrace.c
@@ -0,0 +1,150 @@
1/**
2 * @file backtrace.c
3 *
4 * @remark Copyright 2004 Silicon Graphics Inc. All Rights Reserved.
5 * @remark Read the file COPYING
6 *
7 * @author Greg Banks <gnb@melbourne.sgi.com>
8 * @author Keith Owens <kaos@melbourne.sgi.com>
9 * Based on work done for the ia64 port of the SGI kernprof patch, which is
10 * Copyright (c) 2003-2004 Silicon Graphics Inc. All Rights Reserved.
11 */
12
13#include <linux/oprofile.h>
14#include <linux/sched.h>
15#include <linux/mm.h>
16#include <asm/ptrace.h>
17#include <asm/system.h>
18
19/*
20 * For IA64 we need to perform a complex little dance to get both
21 * the struct pt_regs and a synthetic struct switch_stack in place
22 * to allow the unwind code to work. This dance requires our unwind
23 * using code to be called from a function called from unw_init_running().
24 * There we only get a single void* data pointer, so use this struct
25 * to hold all the data we need during the unwind.
26 */
27typedef struct
28{
29 unsigned int depth;
30 struct pt_regs *regs;
31 struct unw_frame_info frame;
32 u64 *prev_pfs_loc; /* state for WAR for old spinlock ool code */
33} ia64_backtrace_t;
34
35#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3)
36/*
37 * Returns non-zero if the PC is in the spinlock contention out-of-line code
38 * with non-standard calling sequence (on older compilers).
39 */
40static __inline__ int in_old_ool_spinlock_code(unsigned long pc)
41{
42 extern const char ia64_spinlock_contention_pre3_4[] __attribute__ ((weak));
43 extern const char ia64_spinlock_contention_pre3_4_end[] __attribute__ ((weak));
44 unsigned long sc_start = (unsigned long)ia64_spinlock_contention_pre3_4;
45 unsigned long sc_end = (unsigned long)ia64_spinlock_contention_pre3_4_end;
46 return (sc_start && sc_end && pc >= sc_start && pc < sc_end);
47}
48#else
49/* Newer spinlock code does a proper br.call and works fine with the unwinder */
50#define in_old_ool_spinlock_code(pc) 0
51#endif
52
53/* Returns non-zero if the PC is in the Interrupt Vector Table */
54static __inline__ int in_ivt_code(unsigned long pc)
55{
56 extern char ia64_ivt[];
57 return (pc >= (u_long)ia64_ivt && pc < (u_long)ia64_ivt+32768);
58}
59
60/*
61 * Unwind to next stack frame.
62 */
63static __inline__ int next_frame(ia64_backtrace_t *bt)
64{
65 /*
66 * Avoid unsightly console message from unw_unwind() when attempting
67 * to unwind through the Interrupt Vector Table which has no unwind
68 * information.
69 */
70 if (in_ivt_code(bt->frame.ip))
71 return 0;
72
73 /*
74 * WAR for spinlock contention from leaf functions. ia64_spinlock_contention_pre3_4
75 * has ar.pfs == r0. Leaf functions do not modify ar.pfs so ar.pfs remains
76 * as 0, stopping the backtrace. Record the previous ar.pfs when the current
77 * IP is in ia64_spinlock_contention_pre3_4 then unwind, if pfs_loc has not changed
78 * after unwind then use pt_regs.ar_pfs which is where the real ar.pfs is for
79 * leaf functions.
80 */
81 if (bt->prev_pfs_loc && bt->regs && bt->frame.pfs_loc == bt->prev_pfs_loc)
82 bt->frame.pfs_loc = &bt->regs->ar_pfs;
83 bt->prev_pfs_loc = (in_old_ool_spinlock_code(bt->frame.ip) ? bt->frame.pfs_loc : NULL);
84
85 return unw_unwind(&bt->frame) == 0;
86}
87
88
89static void do_ia64_backtrace(struct unw_frame_info *info, void *vdata)
90{
91 ia64_backtrace_t *bt = vdata;
92 struct switch_stack *sw;
93 int count = 0;
94 u_long pc, sp;
95
96 sw = (struct switch_stack *)(info+1);
97 /* padding from unw_init_running */
98 sw = (struct switch_stack *)(((unsigned long)sw + 15) & ~15);
99
100 unw_init_frame_info(&bt->frame, current, sw);
101
102 /* skip over interrupt frame and oprofile calls */
103 do {
104 unw_get_sp(&bt->frame, &sp);
105 if (sp >= (u_long)bt->regs)
106 break;
107 if (!next_frame(bt))
108 return;
109 } while (count++ < 200);
110
111 /* finally, grab the actual sample */
112 while (bt->depth-- && next_frame(bt)) {
113 unw_get_ip(&bt->frame, &pc);
114 oprofile_add_trace(pc);
115 if (unw_is_intr_frame(&bt->frame)) {
116 /*
117 * Interrupt received on kernel stack; this can
118 * happen when timer interrupt fires while processing
119 * a softirq from the tail end of a hardware interrupt
120 * which interrupted a system call. Don't laugh, it
121 * happens! Splice the backtrace into two parts to
122 * avoid spurious cycles in the gprof output.
123 */
124 /* TODO: split rather than drop the 2nd half */
125 break;
126 }
127 }
128}
129
130void
131ia64_backtrace(struct pt_regs * const regs, unsigned int depth)
132{
133 ia64_backtrace_t bt;
134 unsigned long flags;
135
136 /*
137 * On IA64 there is little hope of getting backtraces from
138 * user space programs -- the problems of getting the unwind
139 * information from arbitrary user programs are extreme.
140 */
141 if (user_mode(regs))
142 return;
143
144 bt.depth = depth;
145 bt.regs = regs;
146 bt.prev_pfs_loc = NULL;
147 local_irq_save(flags);
148 unw_init_running(do_ia64_backtrace, &bt);
149 local_irq_restore(flags);
150}
diff --git a/arch/ia64/oprofile/init.c b/arch/ia64/oprofile/init.c
new file mode 100644
index 000000000000..125a602a660d
--- /dev/null
+++ b/arch/ia64/oprofile/init.c
@@ -0,0 +1,38 @@
1/**
2 * @file init.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
8 */
9
10#include <linux/kernel.h>
11#include <linux/oprofile.h>
12#include <linux/init.h>
13#include <linux/errno.h>
14
15extern int perfmon_init(struct oprofile_operations * ops);
16extern void perfmon_exit(void);
17extern void ia64_backtrace(struct pt_regs * const regs, unsigned int depth);
18
19int __init oprofile_arch_init(struct oprofile_operations * ops)
20{
21 int ret = -ENODEV;
22
23#ifdef CONFIG_PERFMON
24 /* perfmon_init() can fail, but we have no way to report it */
25 ret = perfmon_init(ops);
26#endif
27 ops->backtrace = ia64_backtrace;
28
29 return ret;
30}
31
32
33void oprofile_arch_exit(void)
34{
35#ifdef CONFIG_PERFMON
36 perfmon_exit();
37#endif
38}
diff --git a/arch/ia64/oprofile/perfmon.c b/arch/ia64/oprofile/perfmon.c
new file mode 100644
index 000000000000..b7975a469fb8
--- /dev/null
+++ b/arch/ia64/oprofile/perfmon.c
@@ -0,0 +1,100 @@
1/**
2 * @file perfmon.c
3 *
4 * @remark Copyright 2003 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
8 */
9
10#include <linux/kernel.h>
11#include <linux/config.h>
12#include <linux/oprofile.h>
13#include <linux/sched.h>
14#include <asm/perfmon.h>
15#include <asm/ptrace.h>
16#include <asm/errno.h>
17
18static int allow_ints;
19
20static int
21perfmon_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg,
22 struct pt_regs *regs, unsigned long stamp)
23{
24 int event = arg->pmd_eventid;
25
26 arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1;
27
28 /* the owner of the oprofile event buffer may have exited
29 * without perfmon being shutdown (e.g. SIGSEGV)
30 */
31 if (allow_ints)
32 oprofile_add_sample(regs, event);
33 return 0;
34}
35
36
37static int perfmon_start(void)
38{
39 allow_ints = 1;
40 return 0;
41}
42
43
44static void perfmon_stop(void)
45{
46 allow_ints = 0;
47}
48
49
50#define OPROFILE_FMT_UUID { \
51 0x77, 0x7a, 0x6e, 0x61, 0x20, 0x65, 0x73, 0x69, 0x74, 0x6e, 0x72, 0x20, 0x61, 0x65, 0x0a, 0x6c }
52
53static pfm_buffer_fmt_t oprofile_fmt = {
54 .fmt_name = "oprofile_format",
55 .fmt_uuid = OPROFILE_FMT_UUID,
56 .fmt_handler = perfmon_handler,
57};
58
59
60static char * get_cpu_type(void)
61{
62 __u8 family = local_cpu_data->family;
63
64 switch (family) {
65 case 0x07:
66 return "ia64/itanium";
67 case 0x1f:
68 return "ia64/itanium2";
69 default:
70 return "ia64/ia64";
71 }
72}
73
74
75/* all the ops are handled via userspace for IA64 perfmon */
76
77static int using_perfmon;
78
79int perfmon_init(struct oprofile_operations * ops)
80{
81 int ret = pfm_register_buffer_fmt(&oprofile_fmt);
82 if (ret)
83 return -ENODEV;
84
85 ops->cpu_type = get_cpu_type();
86 ops->start = perfmon_start;
87 ops->stop = perfmon_stop;
88 using_perfmon = 1;
89 printk(KERN_INFO "oprofile: using perfmon.\n");
90 return 0;
91}
92
93
94void perfmon_exit(void)
95{
96 if (!using_perfmon)
97 return;
98
99 pfm_unregister_buffer_fmt(oprofile_fmt.fmt_uuid);
100}
diff --git a/arch/ia64/pci/Makefile b/arch/ia64/pci/Makefile
new file mode 100644
index 000000000000..e66889e6922a
--- /dev/null
+++ b/arch/ia64/pci/Makefile
@@ -0,0 +1,4 @@
1#
2# Makefile for the ia64-specific parts of the pci bus
3#
4obj-y := pci.o
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
new file mode 100644
index 000000000000..88641e5095b5
--- /dev/null
+++ b/arch/ia64/pci/pci.c
@@ -0,0 +1,735 @@
1/*
2 * pci.c - Low-Level PCI Access in IA-64
3 *
4 * Derived from bios32.c of i386 tree.
5 *
6 * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P.
7 * David Mosberger-Tang <davidm@hpl.hp.com>
8 * Bjorn Helgaas <bjorn.helgaas@hp.com>
9 * Copyright (C) 2004 Silicon Graphics, Inc.
10 *
11 * Note: Above list of copyright holders is incomplete...
12 */
13#include <linux/config.h>
14
15#include <linux/acpi.h>
16#include <linux/types.h>
17#include <linux/kernel.h>
18#include <linux/pci.h>
19#include <linux/init.h>
20#include <linux/ioport.h>
21#include <linux/slab.h>
22#include <linux/smp_lock.h>
23#include <linux/spinlock.h>
24
25#include <asm/machvec.h>
26#include <asm/page.h>
27#include <asm/segment.h>
28#include <asm/system.h>
29#include <asm/io.h>
30#include <asm/sal.h>
31#include <asm/smp.h>
32#include <asm/irq.h>
33#include <asm/hw_irq.h>
34
35
36static int pci_routeirq;
37
38/*
39 * Low-level SAL-based PCI configuration access functions. Note that SAL
40 * calls are already serialized (via sal_lock), so we don't need another
41 * synchronization mechanism here.
42 */
43
44#define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \
45 (((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg))
46
47/* SAL 3.2 adds support for extended config space. */
48
49#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \
50 (((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg))
51
52static int
53pci_sal_read (unsigned int seg, unsigned int bus, unsigned int devfn,
54 int reg, int len, u32 *value)
55{
56 u64 addr, data = 0;
57 int mode, result;
58
59 if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
60 return -EINVAL;
61
62 if ((seg | reg) <= 255) {
63 addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
64 mode = 0;
65 } else {
66 addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
67 mode = 1;
68 }
69 result = ia64_sal_pci_config_read(addr, mode, len, &data);
70 if (result != 0)
71 return -EINVAL;
72
73 *value = (u32) data;
74 return 0;
75}
76
77static int
78pci_sal_write (unsigned int seg, unsigned int bus, unsigned int devfn,
79 int reg, int len, u32 value)
80{
81 u64 addr;
82 int mode, result;
83
84 if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095))
85 return -EINVAL;
86
87 if ((seg | reg) <= 255) {
88 addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg);
89 mode = 0;
90 } else {
91 addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg);
92 mode = 1;
93 }
94 result = ia64_sal_pci_config_write(addr, mode, len, value);
95 if (result != 0)
96 return -EINVAL;
97 return 0;
98}
99
100static struct pci_raw_ops pci_sal_ops = {
101 .read = pci_sal_read,
102 .write = pci_sal_write
103};
104
105struct pci_raw_ops *raw_pci_ops = &pci_sal_ops;
106
107static int
108pci_read (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *value)
109{
110 return raw_pci_ops->read(pci_domain_nr(bus), bus->number,
111 devfn, where, size, value);
112}
113
114static int
115pci_write (struct pci_bus *bus, unsigned int devfn, int where, int size, u32 value)
116{
117 return raw_pci_ops->write(pci_domain_nr(bus), bus->number,
118 devfn, where, size, value);
119}
120
121struct pci_ops pci_root_ops = {
122 .read = pci_read,
123 .write = pci_write,
124};
125
126#ifdef CONFIG_NUMA
127extern acpi_status acpi_map_iosapic(acpi_handle, u32, void *, void **);
128static void acpi_map_iosapics(void)
129{
130 acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL);
131}
132#else
133static void acpi_map_iosapics(void)
134{
135 return;
136}
137#endif /* CONFIG_NUMA */
138
139static int __init
140pci_acpi_init (void)
141{
142 struct pci_dev *dev = NULL;
143
144 printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
145
146 acpi_map_iosapics();
147
148 if (pci_routeirq) {
149 /*
150 * PCI IRQ routing is set up by pci_enable_device(), but we
151 * also do it here in case there are still broken drivers that
152 * don't use pci_enable_device().
153 */
154 printk(KERN_INFO "PCI: Routing interrupts for all devices because \"pci=routeirq\" specified\n");
155 for_each_pci_dev(dev)
156 acpi_pci_irq_enable(dev);
157 } else
158 printk(KERN_INFO "PCI: If a device doesn't work, try \"pci=routeirq\". If it helps, post a report\n");
159
160 return 0;
161}
162
163subsys_initcall(pci_acpi_init);
164
165/* Called by ACPI when it finds a new root bus. */
166
167static struct pci_controller * __devinit
168alloc_pci_controller (int seg)
169{
170 struct pci_controller *controller;
171
172 controller = kmalloc(sizeof(*controller), GFP_KERNEL);
173 if (!controller)
174 return NULL;
175
176 memset(controller, 0, sizeof(*controller));
177 controller->segment = seg;
178 return controller;
179}
180
181static u64 __devinit
182add_io_space (struct acpi_resource_address64 *addr)
183{
184 u64 offset;
185 int sparse = 0;
186 int i;
187
188 if (addr->address_translation_offset == 0)
189 return IO_SPACE_BASE(0); /* part of legacy IO space */
190
191 if (addr->attribute.io.translation_attribute == ACPI_SPARSE_TRANSLATION)
192 sparse = 1;
193
194 offset = (u64) ioremap(addr->address_translation_offset, 0);
195 for (i = 0; i < num_io_spaces; i++)
196 if (io_space[i].mmio_base == offset &&
197 io_space[i].sparse == sparse)
198 return IO_SPACE_BASE(i);
199
200 if (num_io_spaces == MAX_IO_SPACES) {
201 printk("Too many IO port spaces\n");
202 return ~0;
203 }
204
205 i = num_io_spaces++;
206 io_space[i].mmio_base = offset;
207 io_space[i].sparse = sparse;
208
209 return IO_SPACE_BASE(i);
210}
211
212static acpi_status __devinit
213count_window (struct acpi_resource *resource, void *data)
214{
215 unsigned int *windows = (unsigned int *) data;
216 struct acpi_resource_address64 addr;
217 acpi_status status;
218
219 status = acpi_resource_to_address64(resource, &addr);
220 if (ACPI_SUCCESS(status))
221 if (addr.resource_type == ACPI_MEMORY_RANGE ||
222 addr.resource_type == ACPI_IO_RANGE)
223 (*windows)++;
224
225 return AE_OK;
226}
227
228struct pci_root_info {
229 struct pci_controller *controller;
230 char *name;
231};
232
233static __devinit acpi_status add_window(struct acpi_resource *res, void *data)
234{
235 struct pci_root_info *info = data;
236 struct pci_window *window;
237 struct acpi_resource_address64 addr;
238 acpi_status status;
239 unsigned long flags, offset = 0;
240 struct resource *root;
241
242 status = acpi_resource_to_address64(res, &addr);
243 if (!ACPI_SUCCESS(status))
244 return AE_OK;
245
246 if (!addr.address_length)
247 return AE_OK;
248
249 if (addr.resource_type == ACPI_MEMORY_RANGE) {
250 flags = IORESOURCE_MEM;
251 root = &iomem_resource;
252 offset = addr.address_translation_offset;
253 } else if (addr.resource_type == ACPI_IO_RANGE) {
254 flags = IORESOURCE_IO;
255 root = &ioport_resource;
256 offset = add_io_space(&addr);
257 if (offset == ~0)
258 return AE_OK;
259 } else
260 return AE_OK;
261
262 window = &info->controller->window[info->controller->windows++];
263 window->resource.name = info->name;
264 window->resource.flags = flags;
265 window->resource.start = addr.min_address_range + offset;
266 window->resource.end = addr.max_address_range + offset;
267 window->resource.child = NULL;
268 window->offset = offset;
269
270 if (insert_resource(root, &window->resource)) {
271 printk(KERN_ERR "alloc 0x%lx-0x%lx from %s for %s failed\n",
272 window->resource.start, window->resource.end,
273 root->name, info->name);
274 }
275
276 return AE_OK;
277}
278
279static void __devinit
280pcibios_setup_root_windows(struct pci_bus *bus, struct pci_controller *ctrl)
281{
282 int i, j;
283
284 j = 0;
285 for (i = 0; i < ctrl->windows; i++) {
286 struct resource *res = &ctrl->window[i].resource;
287 /* HP's firmware has a hack to work around a Windows bug.
288 * Ignore these tiny memory ranges */
289 if ((res->flags & IORESOURCE_MEM) &&
290 (res->end - res->start < 16))
291 continue;
292 if (j >= PCI_BUS_NUM_RESOURCES) {
293 printk("Ignoring range [%lx-%lx] (%lx)\n", res->start,
294 res->end, res->flags);
295 continue;
296 }
297 bus->resource[j++] = res;
298 }
299}
300
301struct pci_bus * __devinit
302pci_acpi_scan_root(struct acpi_device *device, int domain, int bus)
303{
304 struct pci_root_info info;
305 struct pci_controller *controller;
306 unsigned int windows = 0;
307 struct pci_bus *pbus;
308 char *name;
309
310 controller = alloc_pci_controller(domain);
311 if (!controller)
312 goto out1;
313
314 controller->acpi_handle = device->handle;
315
316 acpi_walk_resources(device->handle, METHOD_NAME__CRS, count_window,
317 &windows);
318 controller->window = kmalloc(sizeof(*controller->window) * windows,
319 GFP_KERNEL);
320 if (!controller->window)
321 goto out2;
322
323 name = kmalloc(16, GFP_KERNEL);
324 if (!name)
325 goto out3;
326
327 sprintf(name, "PCI Bus %04x:%02x", domain, bus);
328 info.controller = controller;
329 info.name = name;
330 acpi_walk_resources(device->handle, METHOD_NAME__CRS, add_window,
331 &info);
332
333 pbus = pci_scan_bus(bus, &pci_root_ops, controller);
334 if (pbus)
335 pcibios_setup_root_windows(pbus, controller);
336
337 return pbus;
338
339out3:
340 kfree(controller->window);
341out2:
342 kfree(controller);
343out1:
344 return NULL;
345}
346
347void pcibios_resource_to_bus(struct pci_dev *dev,
348 struct pci_bus_region *region, struct resource *res)
349{
350 struct pci_controller *controller = PCI_CONTROLLER(dev);
351 unsigned long offset = 0;
352 int i;
353
354 for (i = 0; i < controller->windows; i++) {
355 struct pci_window *window = &controller->window[i];
356 if (!(window->resource.flags & res->flags))
357 continue;
358 if (window->resource.start > res->start)
359 continue;
360 if (window->resource.end < res->end)
361 continue;
362 offset = window->offset;
363 break;
364 }
365
366 region->start = res->start - offset;
367 region->end = res->end - offset;
368}
369EXPORT_SYMBOL(pcibios_resource_to_bus);
370
371void pcibios_bus_to_resource(struct pci_dev *dev,
372 struct resource *res, struct pci_bus_region *region)
373{
374 struct pci_controller *controller = PCI_CONTROLLER(dev);
375 unsigned long offset = 0;
376 int i;
377
378 for (i = 0; i < controller->windows; i++) {
379 struct pci_window *window = &controller->window[i];
380 if (!(window->resource.flags & res->flags))
381 continue;
382 if (window->resource.start - window->offset > region->start)
383 continue;
384 if (window->resource.end - window->offset < region->end)
385 continue;
386 offset = window->offset;
387 break;
388 }
389
390 res->start = region->start + offset;
391 res->end = region->end + offset;
392}
393
394static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
395{
396 struct pci_bus_region region;
397 int i;
398 int limit = (dev->hdr_type == PCI_HEADER_TYPE_NORMAL) ? \
399 PCI_BRIDGE_RESOURCES : PCI_NUM_RESOURCES;
400
401 for (i = 0; i < limit; i++) {
402 if (!dev->resource[i].flags)
403 continue;
404 region.start = dev->resource[i].start;
405 region.end = dev->resource[i].end;
406 pcibios_bus_to_resource(dev, &dev->resource[i], &region);
407 pci_claim_resource(dev, i);
408 }
409}
410
411/*
412 * Called after each bus is probed, but before its children are examined.
413 */
414void __devinit
415pcibios_fixup_bus (struct pci_bus *b)
416{
417 struct pci_dev *dev;
418
419 list_for_each_entry(dev, &b->devices, bus_list)
420 pcibios_fixup_device_resources(dev);
421
422 return;
423}
424
425void __devinit
426pcibios_update_irq (struct pci_dev *dev, int irq)
427{
428 pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
429
430 /* ??? FIXME -- record old value for shutdown. */
431}
432
433static inline int
434pcibios_enable_resources (struct pci_dev *dev, int mask)
435{
436 u16 cmd, old_cmd;
437 int idx;
438 struct resource *r;
439
440 if (!dev)
441 return -EINVAL;
442
443 pci_read_config_word(dev, PCI_COMMAND, &cmd);
444 old_cmd = cmd;
445 for (idx=0; idx<6; idx++) {
446 /* Only set up the desired resources. */
447 if (!(mask & (1 << idx)))
448 continue;
449
450 r = &dev->resource[idx];
451 if (!r->start && r->end) {
452 printk(KERN_ERR
453 "PCI: Device %s not available because of resource collisions\n",
454 pci_name(dev));
455 return -EINVAL;
456 }
457 if (r->flags & IORESOURCE_IO)
458 cmd |= PCI_COMMAND_IO;
459 if (r->flags & IORESOURCE_MEM)
460 cmd |= PCI_COMMAND_MEMORY;
461 }
462 if (dev->resource[PCI_ROM_RESOURCE].start)
463 cmd |= PCI_COMMAND_MEMORY;
464 if (cmd != old_cmd) {
465 printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd);
466 pci_write_config_word(dev, PCI_COMMAND, cmd);
467 }
468 return 0;
469}
470
471int
472pcibios_enable_device (struct pci_dev *dev, int mask)
473{
474 int ret;
475
476 ret = pcibios_enable_resources(dev, mask);
477 if (ret < 0)
478 return ret;
479
480 return acpi_pci_irq_enable(dev);
481}
482
483#ifdef CONFIG_ACPI_DEALLOCATE_IRQ
484void
485pcibios_disable_device (struct pci_dev *dev)
486{
487 acpi_pci_irq_disable(dev);
488}
489#endif /* CONFIG_ACPI_DEALLOCATE_IRQ */
490
491void
492pcibios_align_resource (void *data, struct resource *res,
493 unsigned long size, unsigned long align)
494{
495}
496
497/*
498 * PCI BIOS setup, always defaults to SAL interface
499 */
500char * __init
501pcibios_setup (char *str)
502{
503 if (!strcmp(str, "routeirq"))
504 pci_routeirq = 1;
505 return NULL;
506}
507
508int
509pci_mmap_page_range (struct pci_dev *dev, struct vm_area_struct *vma,
510 enum pci_mmap_state mmap_state, int write_combine)
511{
512 /*
513 * I/O space cannot be accessed via normal processor loads and
514 * stores on this platform.
515 */
516 if (mmap_state == pci_mmap_io)
517 /*
518 * XXX we could relax this for I/O spaces for which ACPI
519 * indicates that the space is 1-to-1 mapped. But at the
520 * moment, we don't support multiple PCI address spaces and
521 * the legacy I/O space is not 1-to-1 mapped, so this is moot.
522 */
523 return -EINVAL;
524
525 /*
526 * Leave vm_pgoff as-is, the PCI space address is the physical
527 * address on this platform.
528 */
529 vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO);
530
531 if (write_combine && efi_range_is_wc(vma->vm_start,
532 vma->vm_end - vma->vm_start))
533 vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
534 else
535 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
536
537 if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
538 vma->vm_end - vma->vm_start, vma->vm_page_prot))
539 return -EAGAIN;
540
541 return 0;
542}
543
544/**
545 * ia64_pci_get_legacy_mem - generic legacy mem routine
546 * @bus: bus to get legacy memory base address for
547 *
548 * Find the base of legacy memory for @bus. This is typically the first
549 * megabyte of bus address space for @bus or is simply 0 on platforms whose
550 * chipsets support legacy I/O and memory routing. Returns the base address
551 * or an error pointer if an error occurred.
552 *
553 * This is the ia64 generic version of this routine. Other platforms
554 * are free to override it with a machine vector.
555 */
556char *ia64_pci_get_legacy_mem(struct pci_bus *bus)
557{
558 return (char *)__IA64_UNCACHED_OFFSET;
559}
560
561/**
562 * pci_mmap_legacy_page_range - map legacy memory space to userland
563 * @bus: bus whose legacy space we're mapping
564 * @vma: vma passed in by mmap
565 *
566 * Map legacy memory space for this device back to userspace using a machine
567 * vector to get the base address.
568 */
569int
570pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma)
571{
572 char *addr;
573
574 addr = pci_get_legacy_mem(bus);
575 if (IS_ERR(addr))
576 return PTR_ERR(addr);
577
578 vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT;
579 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
580 vma->vm_flags |= (VM_SHM | VM_RESERVED | VM_IO);
581
582 if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
583 vma->vm_end - vma->vm_start, vma->vm_page_prot))
584 return -EAGAIN;
585
586 return 0;
587}
588
589/**
590 * ia64_pci_legacy_read - read from legacy I/O space
591 * @bus: bus to read
592 * @port: legacy port value
593 * @val: caller allocated storage for returned value
594 * @size: number of bytes to read
595 *
596 * Simply reads @size bytes from @port and puts the result in @val.
597 *
598 * Again, this (and the write routine) are generic versions that can be
599 * overridden by the platform. This is necessary on platforms that don't
600 * support legacy I/O routing or that hard fail on legacy I/O timeouts.
601 */
602int ia64_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
603{
604 int ret = size;
605
606 switch (size) {
607 case 1:
608 *val = inb(port);
609 break;
610 case 2:
611 *val = inw(port);
612 break;
613 case 4:
614 *val = inl(port);
615 break;
616 default:
617 ret = -EINVAL;
618 break;
619 }
620
621 return ret;
622}
623
624/**
625 * ia64_pci_legacy_write - perform a legacy I/O write
626 * @bus: bus pointer
627 * @port: port to write
628 * @val: value to write
629 * @size: number of bytes to write from @val
630 *
631 * Simply writes @size bytes of @val to @port.
632 */
633int ia64_pci_legacy_write(struct pci_dev *bus, u16 port, u32 val, u8 size)
634{
635 int ret = 0;
636
637 switch (size) {
638 case 1:
639 outb(val, port);
640 break;
641 case 2:
642 outw(val, port);
643 break;
644 case 4:
645 outl(val, port);
646 break;
647 default:
648 ret = -EINVAL;
649 break;
650 }
651
652 return ret;
653}
654
655/**
656 * pci_cacheline_size - determine cacheline size for PCI devices
657 * @dev: void
658 *
659 * We want to use the line-size of the outer-most cache. We assume
660 * that this line-size is the same for all CPUs.
661 *
662 * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info().
663 *
664 * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
665 */
666static unsigned long
667pci_cacheline_size (void)
668{
669 u64 levels, unique_caches;
670 s64 status;
671 pal_cache_config_info_t cci;
672 static u8 cacheline_size;
673
674 if (cacheline_size)
675 return cacheline_size;
676
677 status = ia64_pal_cache_summary(&levels, &unique_caches);
678 if (status != 0) {
679 printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n",
680 __FUNCTION__, status);
681 return SMP_CACHE_BYTES;
682 }
683
684 status = ia64_pal_cache_config_info(levels - 1, /* cache_type (data_or_unified)= */ 2,
685 &cci);
686 if (status != 0) {
687 printk(KERN_ERR "%s: ia64_pal_cache_config_info() failed (status=%ld)\n",
688 __FUNCTION__, status);
689 return SMP_CACHE_BYTES;
690 }
691 cacheline_size = 1 << cci.pcci_line_size;
692 return cacheline_size;
693}
694
695/**
696 * pcibios_prep_mwi - helper function for drivers/pci/pci.c:pci_set_mwi()
697 * @dev: the PCI device for which MWI is enabled
698 *
699 * For ia64, we can get the cacheline sizes from PAL.
700 *
701 * RETURNS: An appropriate -ERRNO error value on eror, or zero for success.
702 */
703int
704pcibios_prep_mwi (struct pci_dev *dev)
705{
706 unsigned long desired_linesize, current_linesize;
707 int rc = 0;
708 u8 pci_linesize;
709
710 desired_linesize = pci_cacheline_size();
711
712 pci_read_config_byte(dev, PCI_CACHE_LINE_SIZE, &pci_linesize);
713 current_linesize = 4 * pci_linesize;
714 if (desired_linesize != current_linesize) {
715 printk(KERN_WARNING "PCI: slot %s has incorrect PCI cache line size of %lu bytes,",
716 pci_name(dev), current_linesize);
717 if (current_linesize > desired_linesize) {
718 printk(" expected %lu bytes instead\n", desired_linesize);
719 rc = -EINVAL;
720 } else {
721 printk(" correcting to %lu\n", desired_linesize);
722 pci_write_config_byte(dev, PCI_CACHE_LINE_SIZE, desired_linesize / 4);
723 }
724 }
725 return rc;
726}
727
728int pci_vector_resources(int last, int nr_released)
729{
730 int count = nr_released;
731
732 count += (IA64_LAST_DEVICE_VECTOR - last);
733
734 return count;
735}
diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas
new file mode 100755
index 000000000000..2499e0b2243d
--- /dev/null
+++ b/arch/ia64/scripts/check-gas
@@ -0,0 +1,15 @@
1#!/bin/sh
2dir=$(dirname $0)
3CC=$1
4OBJDUMP=$2
5tmp=${TMPDIR:-/tmp}
6out=$tmp/out$$.o
7$CC -c $dir/check-gas-asm.S -o $out
8res=$($OBJDUMP -r --section .data $out | fgrep 00004 | tr -s ' ' |cut -f3 -d' ')
9rm -f $out
10if [ $res != ".text" ]; then
11 echo buggy
12else
13 echo good
14fi
15exit 0
diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S
new file mode 100644
index 000000000000..010e1d227e5d
--- /dev/null
+++ b/arch/ia64/scripts/check-gas-asm.S
@@ -0,0 +1,2 @@
1[1:] nop 0
2 .xdata4 ".data", 0, 1b-.
diff --git a/arch/ia64/scripts/check-model.c b/arch/ia64/scripts/check-model.c
new file mode 100644
index 000000000000..e1d4e86e3d63
--- /dev/null
+++ b/arch/ia64/scripts/check-model.c
@@ -0,0 +1 @@
int __attribute__ ((__model__ (__small__))) x;
diff --git a/arch/ia64/scripts/check-segrel.S b/arch/ia64/scripts/check-segrel.S
new file mode 100644
index 000000000000..3be4e3dbeb83
--- /dev/null
+++ b/arch/ia64/scripts/check-segrel.S
@@ -0,0 +1,4 @@
1 .rodata
2 data4 @segrel(start)
3 .data
4start:
diff --git a/arch/ia64/scripts/check-segrel.lds b/arch/ia64/scripts/check-segrel.lds
new file mode 100644
index 000000000000..1c2f13e181d0
--- /dev/null
+++ b/arch/ia64/scripts/check-segrel.lds
@@ -0,0 +1,11 @@
1SECTIONS {
2 . = SIZEOF_HEADERS;
3 .rodata : { *(.rodata) } :ro
4 . = 0xa0000;
5 .data : { *(.data) } :dat
6 /DISCARD/ : { *(*) }
7}
8PHDRS {
9 ro PT_LOAD FILEHDR PHDRS;
10 dat PT_LOAD;
11}
diff --git a/arch/ia64/scripts/check-serialize.S b/arch/ia64/scripts/check-serialize.S
new file mode 100644
index 000000000000..0400c106806c
--- /dev/null
+++ b/arch/ia64/scripts/check-serialize.S
@@ -0,0 +1,2 @@
1 .serialize.data
2 .serialize.instruction
diff --git a/arch/ia64/scripts/check-text-align.S b/arch/ia64/scripts/check-text-align.S
new file mode 100644
index 000000000000..03f586abb734
--- /dev/null
+++ b/arch/ia64/scripts/check-text-align.S
@@ -0,0 +1,6 @@
1 .proc foo
2 .prologue
3foo: .save rp, r2
4 nop 0
5 .align 64
6 .endp foo
diff --git a/arch/ia64/scripts/toolchain-flags b/arch/ia64/scripts/toolchain-flags
new file mode 100755
index 000000000000..3f0c2adacb70
--- /dev/null
+++ b/arch/ia64/scripts/toolchain-flags
@@ -0,0 +1,53 @@
1#!/bin/sh
2#
3# Check whether linker can handle cross-segment @segrel():
4#
5CPPFLAGS=""
6CC=$1
7OBJDUMP=$2
8READELF=$3
9dir=$(dirname $0)
10tmp=${TMPDIR:-/tmp}
11out=$tmp/out$$
12
13# Check whether cross-segment segment-relative relocs work fine. We need
14# that for building the gate DSO:
15
16$CC -nostdlib -static -Wl,-T$dir/check-segrel.lds $dir/check-segrel.S -o $out
17res=$($OBJDUMP --full --section .rodata $out | fgrep 000 | cut -f3 -d' ')
18rm -f $out
19if [ $res != 00000a00 ]; then
20 CPPFLAGS="$CPPFLAGS -DHAVE_BUGGY_SEGREL"
21 cat >&2 <<EOF
22warning: your linker cannot handle cross-segment segment-relative relocations.
23 please upgrade to a newer version (it is safe to use this linker, but
24 the kernel will be bigger than strictly necessary).
25EOF
26fi
27
28# Check whether .align inside a function works as expected.
29
30$CC -c $dir/check-text-align.S -o $out
31$READELF -u $out | fgrep -q 'prologue(rlen=12)'
32res=$?
33rm -f $out
34if [ $res -eq 0 ]; then
35 CPPFLAGS="$CPPFLAGS -DHAVE_WORKING_TEXT_ALIGN"
36fi
37
38if ! $CC -c $dir/check-model.c -o $out 2>&1 | grep __model__ | grep -q attrib
39then
40 CPPFLAGS="$CPPFLAGS -DHAVE_MODEL_SMALL_ATTRIBUTE"
41fi
42rm -f $out
43
44# Check whether assembler supports .serialize.{data,instruction} directive.
45
46$CC -c $dir/check-serialize.S -o $out 2>/dev/null
47res=$?
48rm -f $out
49if [ $res -eq 0 ]; then
50 CPPFLAGS="$CPPFLAGS -DHAVE_SERIALIZE_DIRECTIVE"
51fi
52
53echo $CPPFLAGS
diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py
new file mode 100755
index 000000000000..c27849889e19
--- /dev/null
+++ b/arch/ia64/scripts/unwcheck.py
@@ -0,0 +1,64 @@
1#!/usr/bin/env python
2#
3# Usage: unwcheck.py FILE
4#
5# This script checks the unwind info of each function in file FILE
6# and verifies that the sum of the region-lengths matches the total
7# length of the function.
8#
9# Based on a shell/awk script originally written by Harish Patil,
10# which was converted to Perl by Matthew Chapman, which was converted
11# to Python by David Mosberger.
12#
13import os
14import re
15import sys
16
17if len(sys.argv) != 2:
18 print "Usage: %s FILE" % sys.argv[0]
19 sys.exit(2)
20
21readelf = os.getenv("READELF", "readelf")
22
23start_pattern = re.compile("<([^>]*)>: \[0x([0-9a-f]+)-0x([0-9a-f]+)\]")
24rlen_pattern = re.compile(".*rlen=([0-9]+)")
25
26def check_func (func, slots, rlen_sum):
27 if slots != rlen_sum:
28 global num_errors
29 num_errors += 1
30 if not func: func = "[%#x-%#x]" % (start, end)
31 print "ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum)
32 return
33
34num_funcs = 0
35num_errors = 0
36func = False
37slots = 0
38rlen_sum = 0
39for line in os.popen("%s -u %s" % (readelf, sys.argv[1])):
40 m = start_pattern.match(line)
41 if m:
42 check_func(func, slots, rlen_sum)
43
44 func = m.group(1)
45 start = long(m.group(2), 16)
46 end = long(m.group(3), 16)
47 slots = 3 * (end - start) / 16
48 rlen_sum = 0L
49 num_funcs += 1
50 else:
51 m = rlen_pattern.match(line)
52 if m:
53 rlen_sum += long(m.group(1))
54check_func(func, slots, rlen_sum)
55
56if num_errors == 0:
57 print "No errors detected in %u functions." % num_funcs
58else:
59 if num_errors > 1:
60 err="errors"
61 else:
62 err="error"
63 print "%u %s detected in %u functions." % (num_errors, err, num_funcs)
64 sys.exit(1)
diff --git a/arch/ia64/sn/Makefile b/arch/ia64/sn/Makefile
new file mode 100644
index 000000000000..a269f6d84c29
--- /dev/null
+++ b/arch/ia64/sn/Makefile
@@ -0,0 +1,14 @@
1# arch/ia64/sn/Makefile
2#
3# This file is subject to the terms and conditions of the GNU General Public
4# License. See the file "COPYING" in the main directory of this archive
5# for more details.
6#
7# Copyright (C) 2004 Silicon Graphics, Inc. All Rights Reserved.
8#
9# Makefile for the sn ia64 subplatform
10#
11
12CPPFLAGS += -I$(srctree)/arch/ia64/sn/include
13
14obj-y += kernel/ pci/
diff --git a/arch/ia64/sn/include/ioerror.h b/arch/ia64/sn/include/ioerror.h
new file mode 100644
index 000000000000..e68f2b0789a7
--- /dev/null
+++ b/arch/ia64/sn/include/ioerror.h
@@ -0,0 +1,81 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
7 */
8#ifndef _ASM_IA64_SN_IOERROR_H
9#define _ASM_IA64_SN_IOERROR_H
10
11/*
12 * IO error structure.
13 *
14 * This structure would expand to hold the information retrieved from
15 * all IO related error registers.
16 *
17 * This structure is defined to hold all system specific
18 * information related to a single error.
19 *
20 * This serves a couple of purpose.
21 * - Error handling often involves translating one form of address to other
22 * form. So, instead of having different data structures at each level,
23 * we have a single structure, and the appropriate fields get filled in
24 * at each layer.
25 * - This provides a way to dump all error related information in any layer
26 * of erorr handling (debugging aid).
27 *
28 * A second possibility is to allow each layer to define its own error
29 * data structure, and fill in the proper fields. This has the advantage
30 * of isolating the layers.
31 * A big concern is the potential stack usage (and overflow), if each layer
32 * defines these structures on stack (assuming we don't want to do kmalloc.
33 *
34 * Any layer wishing to pass extra information to a layer next to it in
35 * error handling hierarchy, can do so as a separate parameter.
36 */
37
38typedef struct io_error_s {
39 /* Bit fields indicating which structure fields are valid */
40 union {
41 struct {
42 unsigned ievb_errortype:1;
43 unsigned ievb_widgetnum:1;
44 unsigned ievb_widgetdev:1;
45 unsigned ievb_srccpu:1;
46 unsigned ievb_srcnode:1;
47 unsigned ievb_errnode:1;
48 unsigned ievb_sysioaddr:1;
49 unsigned ievb_xtalkaddr:1;
50 unsigned ievb_busspace:1;
51 unsigned ievb_busaddr:1;
52 unsigned ievb_vaddr:1;
53 unsigned ievb_memaddr:1;
54 unsigned ievb_epc:1;
55 unsigned ievb_ef:1;
56 unsigned ievb_tnum:1;
57 } iev_b;
58 unsigned iev_a;
59 } ie_v;
60
61 short ie_errortype; /* error type: extra info about error */
62 short ie_widgetnum; /* Widget number that's in error */
63 short ie_widgetdev; /* Device within widget in error */
64 cpuid_t ie_srccpu; /* CPU on srcnode generating error */
65 cnodeid_t ie_srcnode; /* Node which caused the error */
66 cnodeid_t ie_errnode; /* Node where error was noticed */
67 iopaddr_t ie_sysioaddr; /* Sys specific IO address */
68 iopaddr_t ie_xtalkaddr; /* Xtalk (48bit) addr of Error */
69 iopaddr_t ie_busspace; /* Bus specific address space */
70 iopaddr_t ie_busaddr; /* Bus specific address */
71 caddr_t ie_vaddr; /* Virtual address of error */
72 iopaddr_t ie_memaddr; /* Physical memory address */
73 caddr_t ie_epc; /* pc when error reported */
74 caddr_t ie_ef; /* eframe when error reported */
75 short ie_tnum; /* Xtalk TNUM field */
76} ioerror_t;
77
78#define IOERROR_INIT(e) do { (e)->ie_v.iev_a = 0; } while (0)
79#define IOERROR_SETVALUE(e,f,v) do { (e)->ie_ ## f = (v); (e)->ie_v.iev_b.ievb_ ## f = 1; } while (0)
80
81#endif /* _ASM_IA64_SN_IOERROR_H */
diff --git a/arch/ia64/sn/include/pci/pcibr_provider.h b/arch/ia64/sn/include/pci/pcibr_provider.h
new file mode 100644
index 000000000000..b1f05ffec70b
--- /dev/null
+++ b/arch/ia64/sn/include/pci/pcibr_provider.h
@@ -0,0 +1,149 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8#ifndef _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
9#define _ASM_IA64_SN_PCI_PCIBR_PROVIDER_H
10
11/* Workarounds */
12#define PV907516 (1 << 1) /* TIOCP: Don't write the write buffer flush reg */
13
14#define BUSTYPE_MASK 0x1
15
16/* Macros given a pcibus structure */
17#define IS_PCIX(ps) ((ps)->pbi_bridge_mode & BUSTYPE_MASK)
18#define IS_PCI_BRIDGE_ASIC(asic) (asic == PCIIO_ASIC_TYPE_PIC || \
19 asic == PCIIO_ASIC_TYPE_TIOCP)
20#define IS_PIC_SOFT(ps) (ps->pbi_bridge_type == PCIBR_BRIDGETYPE_PIC)
21
22
23/*
24 * The different PCI Bridge types supported on the SGI Altix platforms
25 */
26#define PCIBR_BRIDGETYPE_UNKNOWN -1
27#define PCIBR_BRIDGETYPE_PIC 2
28#define PCIBR_BRIDGETYPE_TIOCP 3
29
30/*
31 * Bridge 64bit Direct Map Attributes
32 */
33#define PCI64_ATTR_PREF (1ull << 59)
34#define PCI64_ATTR_PREC (1ull << 58)
35#define PCI64_ATTR_VIRTUAL (1ull << 57)
36#define PCI64_ATTR_BAR (1ull << 56)
37#define PCI64_ATTR_SWAP (1ull << 55)
38#define PCI64_ATTR_VIRTUAL1 (1ull << 54)
39
40#define PCI32_LOCAL_BASE 0
41#define PCI32_MAPPED_BASE 0x40000000
42#define PCI32_DIRECT_BASE 0x80000000
43
44#define IS_PCI32_MAPPED(x) ((uint64_t)(x) < PCI32_DIRECT_BASE && \
45 (uint64_t)(x) >= PCI32_MAPPED_BASE)
46#define IS_PCI32_DIRECT(x) ((uint64_t)(x) >= PCI32_MAPPED_BASE)
47
48
49/*
50 * Bridge PMU Address Transaltion Entry Attibutes
51 */
52#define PCI32_ATE_V (0x1 << 0)
53#define PCI32_ATE_CO (0x1 << 1)
54#define PCI32_ATE_PREC (0x1 << 2)
55#define PCI32_ATE_PREF (0x1 << 3)
56#define PCI32_ATE_BAR (0x1 << 4)
57#define PCI32_ATE_ADDR_SHFT 12
58
59#define MINIMAL_ATES_REQUIRED(addr, size) \
60 (IOPG(IOPGOFF(addr) + (size) - 1) == IOPG((size) - 1))
61
62#define MINIMAL_ATE_FLAG(addr, size) \
63 (MINIMAL_ATES_REQUIRED((uint64_t)addr, size) ? 1 : 0)
64
65/* bit 29 of the pci address is the SWAP bit */
66#define ATE_SWAPSHIFT 29
67#define ATE_SWAP_ON(x) ((x) |= (1 << ATE_SWAPSHIFT))
68#define ATE_SWAP_OFF(x) ((x) &= ~(1 << ATE_SWAPSHIFT))
69
70/*
71 * I/O page size
72 */
73#if PAGE_SIZE < 16384
74#define IOPFNSHIFT 12 /* 4K per mapped page */
75#else
76#define IOPFNSHIFT 14 /* 16K per mapped page */
77#endif
78
79#define IOPGSIZE (1 << IOPFNSHIFT)
80#define IOPG(x) ((x) >> IOPFNSHIFT)
81#define IOPGOFF(x) ((x) & (IOPGSIZE-1))
82
83#define PCIBR_DEV_SWAP_DIR (1ull << 19)
84#define PCIBR_CTRL_PAGE_SIZE (0x1 << 21)
85
86/*
87 * PMU resources.
88 */
89struct ate_resource{
90 uint64_t *ate;
91 uint64_t num_ate;
92 uint64_t lowest_free_index;
93};
94
95struct pcibus_info {
96 struct pcibus_bussoft pbi_buscommon; /* common header */
97 uint32_t pbi_moduleid;
98 short pbi_bridge_type;
99 short pbi_bridge_mode;
100
101 struct ate_resource pbi_int_ate_resource;
102 uint64_t pbi_int_ate_size;
103
104 uint64_t pbi_dir_xbase;
105 char pbi_hub_xid;
106
107 uint64_t pbi_devreg[8];
108 spinlock_t pbi_lock;
109
110 uint32_t pbi_valid_devices;
111 uint32_t pbi_enabled_devices;
112};
113
114/*
115 * pcibus_info structure locking macros
116 */
117inline static unsigned long
118pcibr_lock(struct pcibus_info *pcibus_info)
119{
120 unsigned long flag;
121 spin_lock_irqsave(&pcibus_info->pbi_lock, flag);
122 return(flag);
123}
124#define pcibr_unlock(pcibus_info, flag) spin_unlock_irqrestore(&pcibus_info->pbi_lock, flag)
125
126extern void *pcibr_bus_fixup(struct pcibus_bussoft *);
127extern uint64_t pcibr_dma_map(struct pcidev_info *, unsigned long, size_t, unsigned int);
128extern void pcibr_dma_unmap(struct pcidev_info *, dma_addr_t, int);
129
130/*
131 * prototypes for the bridge asic register access routines in pcibr_reg.c
132 */
133extern void pcireg_control_bit_clr(struct pcibus_info *, uint64_t);
134extern void pcireg_control_bit_set(struct pcibus_info *, uint64_t);
135extern uint64_t pcireg_tflush_get(struct pcibus_info *);
136extern uint64_t pcireg_intr_status_get(struct pcibus_info *);
137extern void pcireg_intr_enable_bit_clr(struct pcibus_info *, uint64_t);
138extern void pcireg_intr_enable_bit_set(struct pcibus_info *, uint64_t);
139extern void pcireg_intr_addr_addr_set(struct pcibus_info *, int, uint64_t);
140extern void pcireg_force_intr_set(struct pcibus_info *, int);
141extern uint64_t pcireg_wrb_flush_get(struct pcibus_info *, int);
142extern void pcireg_int_ate_set(struct pcibus_info *, int, uint64_t);
143extern uint64_t * pcireg_int_ate_addr(struct pcibus_info *, int);
144extern void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info);
145extern void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info);
146extern int pcibr_ate_alloc(struct pcibus_info *, int);
147extern void pcibr_ate_free(struct pcibus_info *, int);
148extern void ate_write(struct pcibus_info *, int, int, uint64_t);
149#endif
diff --git a/arch/ia64/sn/include/pci/pcibus_provider_defs.h b/arch/ia64/sn/include/pci/pcibus_provider_defs.h
new file mode 100644
index 000000000000..07065615bbea
--- /dev/null
+++ b/arch/ia64/sn/include/pci/pcibus_provider_defs.h
@@ -0,0 +1,43 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8#ifndef _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
9#define _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H
10
11/*
12 * SN pci asic types. Do not ever renumber these or reuse values. The
13 * values must agree with what prom thinks they are.
14 */
15
16#define PCIIO_ASIC_TYPE_UNKNOWN 0
17#define PCIIO_ASIC_TYPE_PPB 1
18#define PCIIO_ASIC_TYPE_PIC 2
19#define PCIIO_ASIC_TYPE_TIOCP 3
20
21/*
22 * Common pciio bus provider data. There should be one of these as the
23 * first field in any pciio based provider soft structure (e.g. pcibr_soft
24 * tioca_soft, etc).
25 */
26
27struct pcibus_bussoft {
28 uint32_t bs_asic_type; /* chipset type */
29 uint32_t bs_xid; /* xwidget id */
30 uint64_t bs_persist_busnum; /* Persistent Bus Number */
31 uint64_t bs_legacy_io; /* legacy io pio addr */
32 uint64_t bs_legacy_mem; /* legacy mem pio addr */
33 uint64_t bs_base; /* widget base */
34 struct xwidget_info *bs_xwidget_info;
35};
36
37/*
38 * DMA mapping flags
39 */
40
41#define SN_PCIDMA_CONSISTENT 0x0001
42
43#endif /* _ASM_IA64_SN_PCI_PCIBUS_PROVIDER_H */
diff --git a/arch/ia64/sn/include/pci/pcidev.h b/arch/ia64/sn/include/pci/pcidev.h
new file mode 100644
index 000000000000..81eb95d3bf47
--- /dev/null
+++ b/arch/ia64/sn/include/pci/pcidev.h
@@ -0,0 +1,54 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8#ifndef _ASM_IA64_SN_PCI_PCIDEV_H
9#define _ASM_IA64_SN_PCI_PCIDEV_H
10
11#include <linux/pci.h>
12
13extern struct sn_irq_info **sn_irq;
14
15#define SN_PCIDEV_INFO(pci_dev) \
16 ((struct pcidev_info *)(pci_dev)->sysdata)
17
18/*
19 * Given a pci_bus, return the sn pcibus_bussoft struct. Note that
20 * this only works for root busses, not for busses represented by PPB's.
21 */
22
23#define SN_PCIBUS_BUSSOFT(pci_bus) \
24 ((struct pcibus_bussoft *)(PCI_CONTROLLER((pci_bus))->platform_data))
25
26/*
27 * Given a struct pci_dev, return the sn pcibus_bussoft struct. Note
28 * that this is not equivalent to SN_PCIBUS_BUSSOFT(pci_dev->bus) due
29 * due to possible PPB's in the path.
30 */
31
32#define SN_PCIDEV_BUSSOFT(pci_dev) \
33 (SN_PCIDEV_INFO(pci_dev)->pdi_host_pcidev_info->pdi_pcibus_info)
34
35#define PCIIO_BUS_NONE 255 /* bus 255 reserved */
36#define PCIIO_SLOT_NONE 255
37#define PCIIO_FUNC_NONE 255
38#define PCIIO_VENDOR_ID_NONE (-1)
39
40struct pcidev_info {
41 uint64_t pdi_pio_mapped_addr[7]; /* 6 BARs PLUS 1 ROM */
42 uint64_t pdi_slot_host_handle; /* Bus and devfn Host pci_dev */
43
44 struct pcibus_bussoft *pdi_pcibus_info; /* Kernel common bus soft */
45 struct pcidev_info *pdi_host_pcidev_info; /* Kernel Host pci_dev */
46 struct pci_dev *pdi_linux_pcidev; /* Kernel pci_dev */
47
48 struct sn_irq_info *pdi_sn_irq_info;
49};
50
51extern void sn_irq_fixup(struct pci_dev *pci_dev,
52 struct sn_irq_info *sn_irq_info);
53
54#endif /* _ASM_IA64_SN_PCI_PCIDEV_H */
diff --git a/arch/ia64/sn/include/pci/pic.h b/arch/ia64/sn/include/pci/pic.h
new file mode 100644
index 000000000000..fd18acecb1e6
--- /dev/null
+++ b/arch/ia64/sn/include/pci/pic.h
@@ -0,0 +1,261 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000-2003 Silicon Graphics, Inc. All rights reserved.
7 */
8#ifndef _ASM_IA64_SN_PCI_PIC_H
9#define _ASM_IA64_SN_PCI_PIC_H
10
11/*
12 * PIC AS DEVICE ZERO
13 * ------------------
14 *
15 * PIC handles PCI/X busses. PCI/X requires that the 'bridge' (i.e. PIC)
16 * be designated as 'device 0'. That is a departure from earlier SGI
17 * PCI bridges. Because of that we use config space 1 to access the
18 * config space of the first actual PCI device on the bus.
19 * Here's what the PIC manual says:
20 *
21 * The current PCI-X bus specification now defines that the parent
22 * hosts bus bridge (PIC for example) must be device 0 on bus 0. PIC
23 * reduced the total number of devices from 8 to 4 and removed the
24 * device registers and windows, now only supporting devices 0,1,2, and
25 * 3. PIC did leave all 8 configuration space windows. The reason was
26 * there was nothing to gain by removing them. Here in lies the problem.
27 * The device numbering we do using 0 through 3 is unrelated to the device
28 * numbering which PCI-X requires in configuration space. In the past we
29 * correlated Configs pace and our device space 0 <-> 0, 1 <-> 1, etc.
30 * PCI-X requires we start a 1, not 0 and currently the PX brick
31 * does associate our:
32 *
33 * device 0 with configuration space window 1,
34 * device 1 with configuration space window 2,
35 * device 2 with configuration space window 3,
36 * device 3 with configuration space window 4.
37 *
38 * The net effect is that all config space access are off-by-one with
39 * relation to other per-slot accesses on the PIC.
40 * Here is a table that shows some of that:
41 *
42 * Internal Slot#
43 * |
44 * | 0 1 2 3
45 * ----------|---------------------------------------
46 * config | 0x21000 0x22000 0x23000 0x24000
47 * |
48 * even rrb | 0[0] n/a 1[0] n/a [] == implied even/odd
49 * |
50 * odd rrb | n/a 0[1] n/a 1[1]
51 * |
52 * int dev | 00 01 10 11
53 * |
54 * ext slot# | 1 2 3 4
55 * ----------|---------------------------------------
56 */
57
58#define PIC_ATE_TARGETID_SHFT 8
59#define PIC_HOST_INTR_ADDR 0x0000FFFFFFFFFFFFUL
60#define PIC_PCI64_ATTR_TARG_SHFT 60
61
62
63/*****************************************************************************
64 *********************** PIC MMR structure mapping ***************************
65 *****************************************************************************/
66
67/* NOTE: PIC WAR. PV#854697. PIC does not allow writes just to [31:0]
68 * of a 64-bit register. When writing PIC registers, always write the
69 * entire 64 bits.
70 */
71
72struct pic {
73
74 /* 0x000000-0x00FFFF -- Local Registers */
75
76 /* 0x000000-0x000057 -- Standard Widget Configuration */
77 uint64_t p_wid_id; /* 0x000000 */
78 uint64_t p_wid_stat; /* 0x000008 */
79 uint64_t p_wid_err_upper; /* 0x000010 */
80 uint64_t p_wid_err_lower; /* 0x000018 */
81 #define p_wid_err p_wid_err_lower
82 uint64_t p_wid_control; /* 0x000020 */
83 uint64_t p_wid_req_timeout; /* 0x000028 */
84 uint64_t p_wid_int_upper; /* 0x000030 */
85 uint64_t p_wid_int_lower; /* 0x000038 */
86 #define p_wid_int p_wid_int_lower
87 uint64_t p_wid_err_cmdword; /* 0x000040 */
88 uint64_t p_wid_llp; /* 0x000048 */
89 uint64_t p_wid_tflush; /* 0x000050 */
90
91 /* 0x000058-0x00007F -- Bridge-specific Widget Configuration */
92 uint64_t p_wid_aux_err; /* 0x000058 */
93 uint64_t p_wid_resp_upper; /* 0x000060 */
94 uint64_t p_wid_resp_lower; /* 0x000068 */
95 #define p_wid_resp p_wid_resp_lower
96 uint64_t p_wid_tst_pin_ctrl; /* 0x000070 */
97 uint64_t p_wid_addr_lkerr; /* 0x000078 */
98
99 /* 0x000080-0x00008F -- PMU & MAP */
100 uint64_t p_dir_map; /* 0x000080 */
101 uint64_t _pad_000088; /* 0x000088 */
102
103 /* 0x000090-0x00009F -- SSRAM */
104 uint64_t p_map_fault; /* 0x000090 */
105 uint64_t _pad_000098; /* 0x000098 */
106
107 /* 0x0000A0-0x0000AF -- Arbitration */
108 uint64_t p_arb; /* 0x0000A0 */
109 uint64_t _pad_0000A8; /* 0x0000A8 */
110
111 /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */
112 uint64_t p_ate_parity_err; /* 0x0000B0 */
113 uint64_t _pad_0000B8; /* 0x0000B8 */
114
115 /* 0x0000C0-0x0000FF -- PCI/GIO */
116 uint64_t p_bus_timeout; /* 0x0000C0 */
117 uint64_t p_pci_cfg; /* 0x0000C8 */
118 uint64_t p_pci_err_upper; /* 0x0000D0 */
119 uint64_t p_pci_err_lower; /* 0x0000D8 */
120 #define p_pci_err p_pci_err_lower
121 uint64_t _pad_0000E0[4]; /* 0x0000{E0..F8} */
122
123 /* 0x000100-0x0001FF -- Interrupt */
124 uint64_t p_int_status; /* 0x000100 */
125 uint64_t p_int_enable; /* 0x000108 */
126 uint64_t p_int_rst_stat; /* 0x000110 */
127 uint64_t p_int_mode; /* 0x000118 */
128 uint64_t p_int_device; /* 0x000120 */
129 uint64_t p_int_host_err; /* 0x000128 */
130 uint64_t p_int_addr[8]; /* 0x0001{30,,,68} */
131 uint64_t p_err_int_view; /* 0x000170 */
132 uint64_t p_mult_int; /* 0x000178 */
133 uint64_t p_force_always[8]; /* 0x0001{80,,,B8} */
134 uint64_t p_force_pin[8]; /* 0x0001{C0,,,F8} */
135
136 /* 0x000200-0x000298 -- Device */
137 uint64_t p_device[4]; /* 0x0002{00,,,18} */
138 uint64_t _pad_000220[4]; /* 0x0002{20,,,38} */
139 uint64_t p_wr_req_buf[4]; /* 0x0002{40,,,58} */
140 uint64_t _pad_000260[4]; /* 0x0002{60,,,78} */
141 uint64_t p_rrb_map[2]; /* 0x0002{80,,,88} */
142 #define p_even_resp p_rrb_map[0] /* 0x000280 */
143 #define p_odd_resp p_rrb_map[1] /* 0x000288 */
144 uint64_t p_resp_status; /* 0x000290 */
145 uint64_t p_resp_clear; /* 0x000298 */
146
147 uint64_t _pad_0002A0[12]; /* 0x0002{A0..F8} */
148
149 /* 0x000300-0x0003F8 -- Buffer Address Match Registers */
150 struct {
151 uint64_t upper; /* 0x0003{00,,,F0} */
152 uint64_t lower; /* 0x0003{08,,,F8} */
153 } p_buf_addr_match[16];
154
155 /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */
156 struct {
157 uint64_t flush_w_touch; /* 0x000{400,,,5C0} */
158 uint64_t flush_wo_touch; /* 0x000{408,,,5C8} */
159 uint64_t inflight; /* 0x000{410,,,5D0} */
160 uint64_t prefetch; /* 0x000{418,,,5D8} */
161 uint64_t total_pci_retry; /* 0x000{420,,,5E0} */
162 uint64_t max_pci_retry; /* 0x000{428,,,5E8} */
163 uint64_t max_latency; /* 0x000{430,,,5F0} */
164 uint64_t clear_all; /* 0x000{438,,,5F8} */
165 } p_buf_count[8];
166
167
168 /* 0x000600-0x0009FF -- PCI/X registers */
169 uint64_t p_pcix_bus_err_addr; /* 0x000600 */
170 uint64_t p_pcix_bus_err_attr; /* 0x000608 */
171 uint64_t p_pcix_bus_err_data; /* 0x000610 */
172 uint64_t p_pcix_pio_split_addr; /* 0x000618 */
173 uint64_t p_pcix_pio_split_attr; /* 0x000620 */
174 uint64_t p_pcix_dma_req_err_attr; /* 0x000628 */
175 uint64_t p_pcix_dma_req_err_addr; /* 0x000630 */
176 uint64_t p_pcix_timeout; /* 0x000638 */
177
178 uint64_t _pad_000640[120]; /* 0x000{640,,,9F8} */
179
180 /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */
181 struct {
182 uint64_t p_buf_addr; /* 0x000{A00,,,AF0} */
183 uint64_t p_buf_attr; /* 0X000{A08,,,AF8} */
184 } p_pcix_read_buf_64[16];
185
186 struct {
187 uint64_t p_buf_addr; /* 0x000{B00,,,BE0} */
188 uint64_t p_buf_attr; /* 0x000{B08,,,BE8} */
189 uint64_t p_buf_valid; /* 0x000{B10,,,BF0} */
190 uint64_t __pad1; /* 0x000{B18,,,BF8} */
191 } p_pcix_write_buf_64[8];
192
193 /* End of Local Registers -- Start of Address Map space */
194
195 char _pad_000c00[0x010000 - 0x000c00];
196
197 /* 0x010000-0x011fff -- Internal ATE RAM (Auto Parity Generation) */
198 uint64_t p_int_ate_ram[1024]; /* 0x010000-0x011fff */
199
200 /* 0x012000-0x013fff -- Internal ATE RAM (Manual Parity Generation) */
201 uint64_t p_int_ate_ram_mp[1024]; /* 0x012000-0x013fff */
202
203 char _pad_014000[0x18000 - 0x014000];
204
205 /* 0x18000-0x197F8 -- PIC Write Request Ram */
206 uint64_t p_wr_req_lower[256]; /* 0x18000 - 0x187F8 */
207 uint64_t p_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */
208 uint64_t p_wr_req_parity[256]; /* 0x19000 - 0x197F8 */
209
210 char _pad_019800[0x20000 - 0x019800];
211
212 /* 0x020000-0x027FFF -- PCI Device Configuration Spaces */
213 union {
214 uint8_t c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */
215 uint16_t s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */
216 uint32_t l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */
217 uint64_t d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */
218 union {
219 uint8_t c[0x100 / 1];
220 uint16_t s[0x100 / 2];
221 uint32_t l[0x100 / 4];
222 uint64_t d[0x100 / 8];
223 } f[8];
224 } p_type0_cfg_dev[8]; /* 0x02{0000,,,7FFF} */
225
226 /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */
227 union {
228 uint8_t c[0x1000 / 1]; /* 0x028000-0x029000 */
229 uint16_t s[0x1000 / 2]; /* 0x028000-0x029000 */
230 uint32_t l[0x1000 / 4]; /* 0x028000-0x029000 */
231 uint64_t d[0x1000 / 8]; /* 0x028000-0x029000 */
232 union {
233 uint8_t c[0x100 / 1];
234 uint16_t s[0x100 / 2];
235 uint32_t l[0x100 / 4];
236 uint64_t d[0x100 / 8];
237 } f[8];
238 } p_type1_cfg; /* 0x028000-0x029000 */
239
240 char _pad_029000[0x030000-0x029000];
241
242 /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */
243 union {
244 uint8_t c[8 / 1];
245 uint16_t s[8 / 2];
246 uint32_t l[8 / 4];
247 uint64_t d[8 / 8];
248 } p_pci_iack; /* 0x030000-0x030007 */
249
250 char _pad_030007[0x040000-0x030008];
251
252 /* 0x040000-0x030007 -- PCIX Special Cycle */
253 union {
254 uint8_t c[8 / 1];
255 uint16_t s[8 / 2];
256 uint32_t l[8 / 4];
257 uint64_t d[8 / 8];
258 } p_pcix_cycle; /* 0x040000-0x040007 */
259};
260
261#endif /* _ASM_IA64_SN_PCI_PIC_H */
diff --git a/arch/ia64/sn/include/pci/tiocp.h b/arch/ia64/sn/include/pci/tiocp.h
new file mode 100644
index 000000000000..f07c83b2bf6e
--- /dev/null
+++ b/arch/ia64/sn/include/pci/tiocp.h
@@ -0,0 +1,256 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2003-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8#ifndef _ASM_IA64_SN_PCI_TIOCP_H
9#define _ASM_IA64_SN_PCI_TIOCP_H
10
11#define TIOCP_HOST_INTR_ADDR 0x003FFFFFFFFFFFFFUL
12#define TIOCP_PCI64_CMDTYPE_MEM (0x1ull << 60)
13
14
15/*****************************************************************************
16 *********************** TIOCP MMR structure mapping ***************************
17 *****************************************************************************/
18
19struct tiocp{
20
21 /* 0x000000-0x00FFFF -- Local Registers */
22
23 /* 0x000000-0x000057 -- (Legacy Widget Space) Configuration */
24 uint64_t cp_id; /* 0x000000 */
25 uint64_t cp_stat; /* 0x000008 */
26 uint64_t cp_err_upper; /* 0x000010 */
27 uint64_t cp_err_lower; /* 0x000018 */
28 #define cp_err cp_err_lower
29 uint64_t cp_control; /* 0x000020 */
30 uint64_t cp_req_timeout; /* 0x000028 */
31 uint64_t cp_intr_upper; /* 0x000030 */
32 uint64_t cp_intr_lower; /* 0x000038 */
33 #define cp_intr cp_intr_lower
34 uint64_t cp_err_cmdword; /* 0x000040 */
35 uint64_t _pad_000048; /* 0x000048 */
36 uint64_t cp_tflush; /* 0x000050 */
37
38 /* 0x000058-0x00007F -- Bridge-specific Configuration */
39 uint64_t cp_aux_err; /* 0x000058 */
40 uint64_t cp_resp_upper; /* 0x000060 */
41 uint64_t cp_resp_lower; /* 0x000068 */
42 #define cp_resp cp_resp_lower
43 uint64_t cp_tst_pin_ctrl; /* 0x000070 */
44 uint64_t cp_addr_lkerr; /* 0x000078 */
45
46 /* 0x000080-0x00008F -- PMU & MAP */
47 uint64_t cp_dir_map; /* 0x000080 */
48 uint64_t _pad_000088; /* 0x000088 */
49
50 /* 0x000090-0x00009F -- SSRAM */
51 uint64_t cp_map_fault; /* 0x000090 */
52 uint64_t _pad_000098; /* 0x000098 */
53
54 /* 0x0000A0-0x0000AF -- Arbitration */
55 uint64_t cp_arb; /* 0x0000A0 */
56 uint64_t _pad_0000A8; /* 0x0000A8 */
57
58 /* 0x0000B0-0x0000BF -- Number In A Can or ATE Parity Error */
59 uint64_t cp_ate_parity_err; /* 0x0000B0 */
60 uint64_t _pad_0000B8; /* 0x0000B8 */
61
62 /* 0x0000C0-0x0000FF -- PCI/GIO */
63 uint64_t cp_bus_timeout; /* 0x0000C0 */
64 uint64_t cp_pci_cfg; /* 0x0000C8 */
65 uint64_t cp_pci_err_upper; /* 0x0000D0 */
66 uint64_t cp_pci_err_lower; /* 0x0000D8 */
67 #define cp_pci_err cp_pci_err_lower
68 uint64_t _pad_0000E0[4]; /* 0x0000{E0..F8} */
69
70 /* 0x000100-0x0001FF -- Interrupt */
71 uint64_t cp_int_status; /* 0x000100 */
72 uint64_t cp_int_enable; /* 0x000108 */
73 uint64_t cp_int_rst_stat; /* 0x000110 */
74 uint64_t cp_int_mode; /* 0x000118 */
75 uint64_t cp_int_device; /* 0x000120 */
76 uint64_t cp_int_host_err; /* 0x000128 */
77 uint64_t cp_int_addr[8]; /* 0x0001{30,,,68} */
78 uint64_t cp_err_int_view; /* 0x000170 */
79 uint64_t cp_mult_int; /* 0x000178 */
80 uint64_t cp_force_always[8]; /* 0x0001{80,,,B8} */
81 uint64_t cp_force_pin[8]; /* 0x0001{C0,,,F8} */
82
83 /* 0x000200-0x000298 -- Device */
84 uint64_t cp_device[4]; /* 0x0002{00,,,18} */
85 uint64_t _pad_000220[4]; /* 0x0002{20,,,38} */
86 uint64_t cp_wr_req_buf[4]; /* 0x0002{40,,,58} */
87 uint64_t _pad_000260[4]; /* 0x0002{60,,,78} */
88 uint64_t cp_rrb_map[2]; /* 0x0002{80,,,88} */
89 #define cp_even_resp cp_rrb_map[0] /* 0x000280 */
90 #define cp_odd_resp cp_rrb_map[1] /* 0x000288 */
91 uint64_t cp_resp_status; /* 0x000290 */
92 uint64_t cp_resp_clear; /* 0x000298 */
93
94 uint64_t _pad_0002A0[12]; /* 0x0002{A0..F8} */
95
96 /* 0x000300-0x0003F8 -- Buffer Address Match Registers */
97 struct {
98 uint64_t upper; /* 0x0003{00,,,F0} */
99 uint64_t lower; /* 0x0003{08,,,F8} */
100 } cp_buf_addr_match[16];
101
102 /* 0x000400-0x0005FF -- Performance Monitor Registers (even only) */
103 struct {
104 uint64_t flush_w_touch; /* 0x000{400,,,5C0} */
105 uint64_t flush_wo_touch; /* 0x000{408,,,5C8} */
106 uint64_t inflight; /* 0x000{410,,,5D0} */
107 uint64_t prefetch; /* 0x000{418,,,5D8} */
108 uint64_t total_pci_retry; /* 0x000{420,,,5E0} */
109 uint64_t max_pci_retry; /* 0x000{428,,,5E8} */
110 uint64_t max_latency; /* 0x000{430,,,5F0} */
111 uint64_t clear_all; /* 0x000{438,,,5F8} */
112 } cp_buf_count[8];
113
114
115 /* 0x000600-0x0009FF -- PCI/X registers */
116 uint64_t cp_pcix_bus_err_addr; /* 0x000600 */
117 uint64_t cp_pcix_bus_err_attr; /* 0x000608 */
118 uint64_t cp_pcix_bus_err_data; /* 0x000610 */
119 uint64_t cp_pcix_pio_split_addr; /* 0x000618 */
120 uint64_t cp_pcix_pio_split_attr; /* 0x000620 */
121 uint64_t cp_pcix_dma_req_err_attr; /* 0x000628 */
122 uint64_t cp_pcix_dma_req_err_addr; /* 0x000630 */
123 uint64_t cp_pcix_timeout; /* 0x000638 */
124
125 uint64_t _pad_000640[24]; /* 0x000{640,,,6F8} */
126
127 /* 0x000700-0x000737 -- Debug Registers */
128 uint64_t cp_ct_debug_ctl; /* 0x000700 */
129 uint64_t cp_br_debug_ctl; /* 0x000708 */
130 uint64_t cp_mux3_debug_ctl; /* 0x000710 */
131 uint64_t cp_mux4_debug_ctl; /* 0x000718 */
132 uint64_t cp_mux5_debug_ctl; /* 0x000720 */
133 uint64_t cp_mux6_debug_ctl; /* 0x000728 */
134 uint64_t cp_mux7_debug_ctl; /* 0x000730 */
135
136 uint64_t _pad_000738[89]; /* 0x000{738,,,9F8} */
137
138 /* 0x000A00-0x000BFF -- PCI/X Read&Write Buffer */
139 struct {
140 uint64_t cp_buf_addr; /* 0x000{A00,,,AF0} */
141 uint64_t cp_buf_attr; /* 0X000{A08,,,AF8} */
142 } cp_pcix_read_buf_64[16];
143
144 struct {
145 uint64_t cp_buf_addr; /* 0x000{B00,,,BE0} */
146 uint64_t cp_buf_attr; /* 0x000{B08,,,BE8} */
147 uint64_t cp_buf_valid; /* 0x000{B10,,,BF0} */
148 uint64_t __pad1; /* 0x000{B18,,,BF8} */
149 } cp_pcix_write_buf_64[8];
150
151 /* End of Local Registers -- Start of Address Map space */
152
153 char _pad_000c00[0x010000 - 0x000c00];
154
155 /* 0x010000-0x011FF8 -- Internal ATE RAM (Auto Parity Generation) */
156 uint64_t cp_int_ate_ram[1024]; /* 0x010000-0x011FF8 */
157
158 char _pad_012000[0x14000 - 0x012000];
159
160 /* 0x014000-0x015FF8 -- Internal ATE RAM (Manual Parity Generation) */
161 uint64_t cp_int_ate_ram_mp[1024]; /* 0x014000-0x015FF8 */
162
163 char _pad_016000[0x18000 - 0x016000];
164
165 /* 0x18000-0x197F8 -- TIOCP Write Request Ram */
166 uint64_t cp_wr_req_lower[256]; /* 0x18000 - 0x187F8 */
167 uint64_t cp_wr_req_upper[256]; /* 0x18800 - 0x18FF8 */
168 uint64_t cp_wr_req_parity[256]; /* 0x19000 - 0x197F8 */
169
170 char _pad_019800[0x1C000 - 0x019800];
171
172 /* 0x1C000-0x1EFF8 -- TIOCP Read Response Ram */
173 uint64_t cp_rd_resp_lower[512]; /* 0x1C000 - 0x1CFF8 */
174 uint64_t cp_rd_resp_upper[512]; /* 0x1D000 - 0x1DFF8 */
175 uint64_t cp_rd_resp_parity[512]; /* 0x1E000 - 0x1EFF8 */
176
177 char _pad_01F000[0x20000 - 0x01F000];
178
179 /* 0x020000-0x021FFF -- Host Device (CP) Configuration Space (not used) */
180 char _pad_020000[0x021000 - 0x20000];
181
182 /* 0x021000-0x027FFF -- PCI Device Configuration Spaces */
183 union {
184 uint8_t c[0x1000 / 1]; /* 0x02{0000,,,7FFF} */
185 uint16_t s[0x1000 / 2]; /* 0x02{0000,,,7FFF} */
186 uint32_t l[0x1000 / 4]; /* 0x02{0000,,,7FFF} */
187 uint64_t d[0x1000 / 8]; /* 0x02{0000,,,7FFF} */
188 union {
189 uint8_t c[0x100 / 1];
190 uint16_t s[0x100 / 2];
191 uint32_t l[0x100 / 4];
192 uint64_t d[0x100 / 8];
193 } f[8];
194 } cp_type0_cfg_dev[7]; /* 0x02{1000,,,7FFF} */
195
196 /* 0x028000-0x028FFF -- PCI Type 1 Configuration Space */
197 union {
198 uint8_t c[0x1000 / 1]; /* 0x028000-0x029000 */
199 uint16_t s[0x1000 / 2]; /* 0x028000-0x029000 */
200 uint32_t l[0x1000 / 4]; /* 0x028000-0x029000 */
201 uint64_t d[0x1000 / 8]; /* 0x028000-0x029000 */
202 union {
203 uint8_t c[0x100 / 1];
204 uint16_t s[0x100 / 2];
205 uint32_t l[0x100 / 4];
206 uint64_t d[0x100 / 8];
207 } f[8];
208 } cp_type1_cfg; /* 0x028000-0x029000 */
209
210 char _pad_029000[0x030000-0x029000];
211
212 /* 0x030000-0x030007 -- PCI Interrupt Acknowledge Cycle */
213 union {
214 uint8_t c[8 / 1];
215 uint16_t s[8 / 2];
216 uint32_t l[8 / 4];
217 uint64_t d[8 / 8];
218 } cp_pci_iack; /* 0x030000-0x030007 */
219
220 char _pad_030007[0x040000-0x030008];
221
222 /* 0x040000-0x040007 -- PCIX Special Cycle */
223 union {
224 uint8_t c[8 / 1];
225 uint16_t s[8 / 2];
226 uint32_t l[8 / 4];
227 uint64_t d[8 / 8];
228 } cp_pcix_cycle; /* 0x040000-0x040007 */
229
230 char _pad_040007[0x200000-0x040008];
231
232 /* 0x200000-0x7FFFFF -- PCI/GIO Device Spaces */
233 union {
234 uint8_t c[0x100000 / 1];
235 uint16_t s[0x100000 / 2];
236 uint32_t l[0x100000 / 4];
237 uint64_t d[0x100000 / 8];
238 } cp_devio_raw[6]; /* 0x200000-0x7FFFFF */
239
240 #define cp_devio(n) cp_devio_raw[((n)<2)?(n*2):(n+2)]
241
242 char _pad_800000[0xA00000-0x800000];
243
244 /* 0xA00000-0xBFFFFF -- PCI/GIO Device Spaces w/flush */
245 union {
246 uint8_t c[0x100000 / 1];
247 uint16_t s[0x100000 / 2];
248 uint32_t l[0x100000 / 4];
249 uint64_t d[0x100000 / 8];
250 } cp_devio_raw_flush[6]; /* 0xA00000-0xBFFFFF */
251
252 #define cp_devio_flush(n) cp_devio_raw_flush[((n)<2)?(n*2):(n+2)]
253
254};
255
256#endif /* _ASM_IA64_SN_PCI_TIOCP_H */
diff --git a/arch/ia64/sn/include/tio.h b/arch/ia64/sn/include/tio.h
new file mode 100644
index 000000000000..0139124dd54a
--- /dev/null
+++ b/arch/ia64/sn/include/tio.h
@@ -0,0 +1,37 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#ifndef _ASM_IA64_SN_TIO_H
10#define _ASM_IA64_SN_TIO_H
11
12#define TIO_MMR_ADDR_MOD
13
14#define TIO_NODE_ID TIO_MMR_ADDR_MOD(0x0000000090060e80)
15
16#define TIO_ITTE_BASE 0xb0008800 /* base of translation table entries */
17#define TIO_ITTE(bigwin) (TIO_ITTE_BASE + 8*(bigwin))
18
19#define TIO_ITTE_OFFSET_BITS 8 /* size of offset field */
20#define TIO_ITTE_OFFSET_MASK ((1<<TIO_ITTE_OFFSET_BITS)-1)
21#define TIO_ITTE_OFFSET_SHIFT 0
22
23#define TIO_ITTE_WIDGET_BITS 2 /* size of widget field */
24#define TIO_ITTE_WIDGET_MASK ((1<<TIO_ITTE_WIDGET_BITS)-1)
25#define TIO_ITTE_WIDGET_SHIFT 12
26#define TIO_ITTE_VALID_MASK 0x1
27#define TIO_ITTE_VALID_SHIFT 16
28
29
30#define TIO_ITTE_PUT(nasid, bigwin, widget, addr, valid) \
31 REMOTE_HUB_S((nasid), TIO_ITTE(bigwin), \
32 (((((addr) >> TIO_BWIN_SIZE_BITS) & \
33 TIO_ITTE_OFFSET_MASK) << TIO_ITTE_OFFSET_SHIFT) | \
34 (((widget) & TIO_ITTE_WIDGET_MASK) << TIO_ITTE_WIDGET_SHIFT)) | \
35 (( (valid) & TIO_ITTE_VALID_MASK) << TIO_ITTE_VALID_SHIFT))
36
37#endif /* _ASM_IA64_SN_TIO_H */
diff --git a/arch/ia64/sn/include/xtalk/hubdev.h b/arch/ia64/sn/include/xtalk/hubdev.h
new file mode 100644
index 000000000000..868e7ecae84b
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/hubdev.h
@@ -0,0 +1,67 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8#ifndef _ASM_IA64_SN_XTALK_HUBDEV_H
9#define _ASM_IA64_SN_XTALK_HUBDEV_H
10
11#define HUB_WIDGET_ID_MAX 0xf
12#define DEV_PER_WIDGET (2*2*8)
13#define IIO_ITTE_WIDGET_BITS 4 /* size of widget field */
14#define IIO_ITTE_WIDGET_MASK ((1<<IIO_ITTE_WIDGET_BITS)-1)
15#define IIO_ITTE_WIDGET_SHIFT 8
16
17/*
18 * Use the top big window as a surrogate for the first small window
19 */
20#define SWIN0_BIGWIN HUB_NUM_BIG_WINDOW
21#define IIO_NUM_ITTES 7
22#define HUB_NUM_BIG_WINDOW (IIO_NUM_ITTES - 1)
23
24struct sn_flush_device_list {
25 int sfdl_bus;
26 int sfdl_slot;
27 int sfdl_pin;
28 struct bar_list {
29 unsigned long start;
30 unsigned long end;
31 } sfdl_bar_list[6];
32 unsigned long sfdl_force_int_addr;
33 unsigned long sfdl_flush_value;
34 volatile unsigned long *sfdl_flush_addr;
35 uint64_t sfdl_persistent_busnum;
36 struct pcibus_info *sfdl_pcibus_info;
37 spinlock_t sfdl_flush_lock;
38};
39
40/*
41 * **widget_p - Used as an array[wid_num][device] of sn_flush_device_list.
42 */
43struct sn_flush_nasid_entry {
44 struct sn_flush_device_list **widget_p; /* Used as a array of wid_num */
45 uint64_t iio_itte[8];
46};
47
48struct hubdev_info {
49 geoid_t hdi_geoid;
50 short hdi_nasid;
51 short hdi_peer_nasid; /* Dual Porting Peer */
52
53 struct sn_flush_nasid_entry hdi_flush_nasid_list;
54 struct xwidget_info hdi_xwidget_info[HUB_WIDGET_ID_MAX + 1];
55
56
57 void *hdi_nodepda;
58 void *hdi_node_vertex;
59 void *hdi_xtalk_vertex;
60};
61
62extern void hubdev_init_node(nodepda_t *, cnodeid_t);
63extern void hub_error_init(struct hubdev_info *);
64extern void ice_error_init(struct hubdev_info *);
65
66
67#endif /* _ASM_IA64_SN_XTALK_HUBDEV_H */
diff --git a/arch/ia64/sn/include/xtalk/xbow.h b/arch/ia64/sn/include/xtalk/xbow.h
new file mode 100644
index 000000000000..ec56b3432f17
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/xbow.h
@@ -0,0 +1,291 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992-1997,2000-2004 Silicon Graphics, Inc. All Rights Reserved.
7 */
8#ifndef _ASM_IA64_SN_XTALK_XBOW_H
9#define _ASM_IA64_SN_XTALK_XBOW_H
10
11#define XBOW_PORT_8 0x8
12#define XBOW_PORT_C 0xc
13#define XBOW_PORT_F 0xf
14
15#define MAX_XBOW_PORTS 8 /* number of ports on xbow chip */
16#define BASE_XBOW_PORT XBOW_PORT_8 /* Lowest external port */
17
18#define XBOW_CREDIT 4
19
20#define MAX_XBOW_NAME 16
21
22/* Register set for each xbow link */
23typedef volatile struct xb_linkregs_s {
24/*
25 * we access these through synergy unswizzled space, so the address
26 * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.)
27 * That's why we put the register first and filler second.
28 */
29 uint32_t link_ibf;
30 uint32_t filler0; /* filler for proper alignment */
31 uint32_t link_control;
32 uint32_t filler1;
33 uint32_t link_status;
34 uint32_t filler2;
35 uint32_t link_arb_upper;
36 uint32_t filler3;
37 uint32_t link_arb_lower;
38 uint32_t filler4;
39 uint32_t link_status_clr;
40 uint32_t filler5;
41 uint32_t link_reset;
42 uint32_t filler6;
43 uint32_t link_aux_status;
44 uint32_t filler7;
45} xb_linkregs_t;
46
47typedef volatile struct xbow_s {
48 /* standard widget configuration 0x000000-0x000057 */
49 struct widget_cfg xb_widget; /* 0x000000 */
50
51 /* helper fieldnames for accessing bridge widget */
52
53#define xb_wid_id xb_widget.w_id
54#define xb_wid_stat xb_widget.w_status
55#define xb_wid_err_upper xb_widget.w_err_upper_addr
56#define xb_wid_err_lower xb_widget.w_err_lower_addr
57#define xb_wid_control xb_widget.w_control
58#define xb_wid_req_timeout xb_widget.w_req_timeout
59#define xb_wid_int_upper xb_widget.w_intdest_upper_addr
60#define xb_wid_int_lower xb_widget.w_intdest_lower_addr
61#define xb_wid_err_cmdword xb_widget.w_err_cmd_word
62#define xb_wid_llp xb_widget.w_llp_cfg
63#define xb_wid_stat_clr xb_widget.w_tflush
64
65/*
66 * we access these through synergy unswizzled space, so the address
67 * gets twiddled (i.e. references to 0x4 actually go to 0x0 and vv.)
68 * That's why we put the register first and filler second.
69 */
70 /* xbow-specific widget configuration 0x000058-0x0000FF */
71 uint32_t xb_wid_arb_reload; /* 0x00005C */
72 uint32_t _pad_000058;
73 uint32_t xb_perf_ctr_a; /* 0x000064 */
74 uint32_t _pad_000060;
75 uint32_t xb_perf_ctr_b; /* 0x00006c */
76 uint32_t _pad_000068;
77 uint32_t xb_nic; /* 0x000074 */
78 uint32_t _pad_000070;
79
80 /* Xbridge only */
81 uint32_t xb_w0_rst_fnc; /* 0x00007C */
82 uint32_t _pad_000078;
83 uint32_t xb_l8_rst_fnc; /* 0x000084 */
84 uint32_t _pad_000080;
85 uint32_t xb_l9_rst_fnc; /* 0x00008c */
86 uint32_t _pad_000088;
87 uint32_t xb_la_rst_fnc; /* 0x000094 */
88 uint32_t _pad_000090;
89 uint32_t xb_lb_rst_fnc; /* 0x00009c */
90 uint32_t _pad_000098;
91 uint32_t xb_lc_rst_fnc; /* 0x0000a4 */
92 uint32_t _pad_0000a0;
93 uint32_t xb_ld_rst_fnc; /* 0x0000ac */
94 uint32_t _pad_0000a8;
95 uint32_t xb_le_rst_fnc; /* 0x0000b4 */
96 uint32_t _pad_0000b0;
97 uint32_t xb_lf_rst_fnc; /* 0x0000bc */
98 uint32_t _pad_0000b8;
99 uint32_t xb_lock; /* 0x0000c4 */
100 uint32_t _pad_0000c0;
101 uint32_t xb_lock_clr; /* 0x0000cc */
102 uint32_t _pad_0000c8;
103 /* end of Xbridge only */
104 uint32_t _pad_0000d0[12];
105
106 /* Link Specific Registers, port 8..15 0x000100-0x000300 */
107 xb_linkregs_t xb_link_raw[MAX_XBOW_PORTS];
108#define xb_link(p) xb_link_raw[(p) & (MAX_XBOW_PORTS - 1)]
109
110} xbow_t;
111
112#define XB_FLAGS_EXISTS 0x1 /* device exists */
113#define XB_FLAGS_MASTER 0x2
114#define XB_FLAGS_SLAVE 0x0
115#define XB_FLAGS_GBR 0x4
116#define XB_FLAGS_16BIT 0x8
117#define XB_FLAGS_8BIT 0x0
118
119/* is widget port number valid? (based on version 7.0 of xbow spec) */
120#define XBOW_WIDGET_IS_VALID(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_F)
121
122/* whether to use upper or lower arbitration register, given source widget id */
123#define XBOW_ARB_IS_UPPER(wid) ((wid) >= XBOW_PORT_8 && (wid) <= XBOW_PORT_B)
124#define XBOW_ARB_IS_LOWER(wid) ((wid) >= XBOW_PORT_C && (wid) <= XBOW_PORT_F)
125
126/* offset of arbitration register, given source widget id */
127#define XBOW_ARB_OFF(wid) (XBOW_ARB_IS_UPPER(wid) ? 0x1c : 0x24)
128
129#define XBOW_WID_ID WIDGET_ID
130#define XBOW_WID_STAT WIDGET_STATUS
131#define XBOW_WID_ERR_UPPER WIDGET_ERR_UPPER_ADDR
132#define XBOW_WID_ERR_LOWER WIDGET_ERR_LOWER_ADDR
133#define XBOW_WID_CONTROL WIDGET_CONTROL
134#define XBOW_WID_REQ_TO WIDGET_REQ_TIMEOUT
135#define XBOW_WID_INT_UPPER WIDGET_INTDEST_UPPER_ADDR
136#define XBOW_WID_INT_LOWER WIDGET_INTDEST_LOWER_ADDR
137#define XBOW_WID_ERR_CMDWORD WIDGET_ERR_CMD_WORD
138#define XBOW_WID_LLP WIDGET_LLP_CFG
139#define XBOW_WID_STAT_CLR WIDGET_TFLUSH
140#define XBOW_WID_ARB_RELOAD 0x5c
141#define XBOW_WID_PERF_CTR_A 0x64
142#define XBOW_WID_PERF_CTR_B 0x6c
143#define XBOW_WID_NIC 0x74
144
145/* Xbridge only */
146#define XBOW_W0_RST_FNC 0x00007C
147#define XBOW_L8_RST_FNC 0x000084
148#define XBOW_L9_RST_FNC 0x00008c
149#define XBOW_LA_RST_FNC 0x000094
150#define XBOW_LB_RST_FNC 0x00009c
151#define XBOW_LC_RST_FNC 0x0000a4
152#define XBOW_LD_RST_FNC 0x0000ac
153#define XBOW_LE_RST_FNC 0x0000b4
154#define XBOW_LF_RST_FNC 0x0000bc
155#define XBOW_RESET_FENCE(x) ((x) > 7 && (x) < 16) ? \
156 (XBOW_W0_RST_FNC + ((x) - 7) * 8) : \
157 ((x) == 0) ? XBOW_W0_RST_FNC : 0
158#define XBOW_LOCK 0x0000c4
159#define XBOW_LOCK_CLR 0x0000cc
160/* End of Xbridge only */
161
162/* used only in ide, but defined here within the reserved portion */
163/* of the widget0 address space (before 0xf4) */
164#define XBOW_WID_UNDEF 0xe4
165
166/* xbow link register set base, legal value for x is 0x8..0xf */
167#define XB_LINK_BASE 0x100
168#define XB_LINK_OFFSET 0x40
169#define XB_LINK_REG_BASE(x) (XB_LINK_BASE + ((x) & (MAX_XBOW_PORTS - 1)) * XB_LINK_OFFSET)
170
171#define XB_LINK_IBUF_FLUSH(x) (XB_LINK_REG_BASE(x) + 0x4)
172#define XB_LINK_CTRL(x) (XB_LINK_REG_BASE(x) + 0xc)
173#define XB_LINK_STATUS(x) (XB_LINK_REG_BASE(x) + 0x14)
174#define XB_LINK_ARB_UPPER(x) (XB_LINK_REG_BASE(x) + 0x1c)
175#define XB_LINK_ARB_LOWER(x) (XB_LINK_REG_BASE(x) + 0x24)
176#define XB_LINK_STATUS_CLR(x) (XB_LINK_REG_BASE(x) + 0x2c)
177#define XB_LINK_RESET(x) (XB_LINK_REG_BASE(x) + 0x34)
178#define XB_LINK_AUX_STATUS(x) (XB_LINK_REG_BASE(x) + 0x3c)
179
180/* link_control(x) */
181#define XB_CTRL_LINKALIVE_IE 0x80000000 /* link comes alive */
182 /* reserved: 0x40000000 */
183#define XB_CTRL_PERF_CTR_MODE_MSK 0x30000000 /* perf counter mode */
184#define XB_CTRL_IBUF_LEVEL_MSK 0x0e000000 /* input packet buffer level */
185#define XB_CTRL_8BIT_MODE 0x01000000 /* force link into 8 bit mode */
186#define XB_CTRL_BAD_LLP_PKT 0x00800000 /* force bad LLP packet */
187#define XB_CTRL_WIDGET_CR_MSK 0x007c0000 /* LLP widget credit mask */
188#define XB_CTRL_WIDGET_CR_SHFT 18 /* LLP widget credit shift */
189#define XB_CTRL_ILLEGAL_DST_IE 0x00020000 /* illegal destination */
190#define XB_CTRL_OALLOC_IBUF_IE 0x00010000 /* overallocated input buffer */
191 /* reserved: 0x0000fe00 */
192#define XB_CTRL_BNDWDTH_ALLOC_IE 0x00000100 /* bandwidth alloc */
193#define XB_CTRL_RCV_CNT_OFLOW_IE 0x00000080 /* rcv retry overflow */
194#define XB_CTRL_XMT_CNT_OFLOW_IE 0x00000040 /* xmt retry overflow */
195#define XB_CTRL_XMT_MAX_RTRY_IE 0x00000020 /* max transmit retry */
196#define XB_CTRL_RCV_IE 0x00000010 /* receive */
197#define XB_CTRL_XMT_RTRY_IE 0x00000008 /* transmit retry */
198 /* reserved: 0x00000004 */
199#define XB_CTRL_MAXREQ_TOUT_IE 0x00000002 /* maximum request timeout */
200#define XB_CTRL_SRC_TOUT_IE 0x00000001 /* source timeout */
201
202/* link_status(x) */
203#define XB_STAT_LINKALIVE XB_CTRL_LINKALIVE_IE
204 /* reserved: 0x7ff80000 */
205#define XB_STAT_MULTI_ERR 0x00040000 /* multi error */
206#define XB_STAT_ILLEGAL_DST_ERR XB_CTRL_ILLEGAL_DST_IE
207#define XB_STAT_OALLOC_IBUF_ERR XB_CTRL_OALLOC_IBUF_IE
208#define XB_STAT_BNDWDTH_ALLOC_ID_MSK 0x0000ff00 /* port bitmask */
209#define XB_STAT_RCV_CNT_OFLOW_ERR XB_CTRL_RCV_CNT_OFLOW_IE
210#define XB_STAT_XMT_CNT_OFLOW_ERR XB_CTRL_XMT_CNT_OFLOW_IE
211#define XB_STAT_XMT_MAX_RTRY_ERR XB_CTRL_XMT_MAX_RTRY_IE
212#define XB_STAT_RCV_ERR XB_CTRL_RCV_IE
213#define XB_STAT_XMT_RTRY_ERR XB_CTRL_XMT_RTRY_IE
214 /* reserved: 0x00000004 */
215#define XB_STAT_MAXREQ_TOUT_ERR XB_CTRL_MAXREQ_TOUT_IE
216#define XB_STAT_SRC_TOUT_ERR XB_CTRL_SRC_TOUT_IE
217
218/* link_aux_status(x) */
219#define XB_AUX_STAT_RCV_CNT 0xff000000
220#define XB_AUX_STAT_XMT_CNT 0x00ff0000
221#define XB_AUX_STAT_TOUT_DST 0x0000ff00
222#define XB_AUX_LINKFAIL_RST_BAD 0x00000040
223#define XB_AUX_STAT_PRESENT 0x00000020
224#define XB_AUX_STAT_PORT_WIDTH 0x00000010
225 /* reserved: 0x0000000f */
226
227/*
228 * link_arb_upper/link_arb_lower(x), (reg) should be the link_arb_upper
229 * register if (x) is 0x8..0xb, link_arb_lower if (x) is 0xc..0xf
230 */
231#define XB_ARB_GBR_MSK 0x1f
232#define XB_ARB_RR_MSK 0x7
233#define XB_ARB_GBR_SHFT(x) (((x) & 0x3) * 8)
234#define XB_ARB_RR_SHFT(x) (((x) & 0x3) * 8 + 5)
235#define XB_ARB_GBR_CNT(reg,x) ((reg) >> XB_ARB_GBR_SHFT(x) & XB_ARB_GBR_MSK)
236#define XB_ARB_RR_CNT(reg,x) ((reg) >> XB_ARB_RR_SHFT(x) & XB_ARB_RR_MSK)
237
238/* XBOW_WID_STAT */
239#define XB_WID_STAT_LINK_INTR_SHFT (24)
240#define XB_WID_STAT_LINK_INTR_MASK (0xFF << XB_WID_STAT_LINK_INTR_SHFT)
241#define XB_WID_STAT_LINK_INTR(x) (0x1 << (((x)&7) + XB_WID_STAT_LINK_INTR_SHFT))
242#define XB_WID_STAT_WIDGET0_INTR 0x00800000
243#define XB_WID_STAT_SRCID_MASK 0x000003c0 /* Xbridge only */
244#define XB_WID_STAT_REG_ACC_ERR 0x00000020
245#define XB_WID_STAT_RECV_TOUT 0x00000010 /* Xbridge only */
246#define XB_WID_STAT_ARB_TOUT 0x00000008 /* Xbridge only */
247#define XB_WID_STAT_XTALK_ERR 0x00000004
248#define XB_WID_STAT_DST_TOUT 0x00000002 /* Xbridge only */
249#define XB_WID_STAT_MULTI_ERR 0x00000001
250
251#define XB_WID_STAT_SRCID_SHFT 6
252
253/* XBOW_WID_CONTROL */
254#define XB_WID_CTRL_REG_ACC_IE XB_WID_STAT_REG_ACC_ERR
255#define XB_WID_CTRL_RECV_TOUT XB_WID_STAT_RECV_TOUT
256#define XB_WID_CTRL_ARB_TOUT XB_WID_STAT_ARB_TOUT
257#define XB_WID_CTRL_XTALK_IE XB_WID_STAT_XTALK_ERR
258
259/* XBOW_WID_INT_UPPER */
260/* defined in xwidget.h for WIDGET_INTDEST_UPPER_ADDR */
261
262/* XBOW WIDGET part number, in the ID register */
263#define XBOW_WIDGET_PART_NUM 0x0 /* crossbow */
264#define XXBOW_WIDGET_PART_NUM 0xd000 /* Xbridge */
265#define XBOW_WIDGET_MFGR_NUM 0x0
266#define XXBOW_WIDGET_MFGR_NUM 0x0
267#define PXBOW_WIDGET_PART_NUM 0xd100 /* PIC */
268
269#define XBOW_REV_1_0 0x1 /* xbow rev 1.0 is "1" */
270#define XBOW_REV_1_1 0x2 /* xbow rev 1.1 is "2" */
271#define XBOW_REV_1_2 0x3 /* xbow rev 1.2 is "3" */
272#define XBOW_REV_1_3 0x4 /* xbow rev 1.3 is "4" */
273#define XBOW_REV_2_0 0x5 /* xbow rev 2.0 is "5" */
274
275#define XXBOW_PART_REV_1_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x1 )
276#define XXBOW_PART_REV_2_0 (XXBOW_WIDGET_PART_NUM << 4 | 0x2 )
277
278/* XBOW_WID_ARB_RELOAD */
279#define XBOW_WID_ARB_RELOAD_INT 0x3f /* GBR reload interval */
280
281#define IS_XBRIDGE_XBOW(wid) \
282 (XWIDGET_PART_NUM(wid) == XXBOW_WIDGET_PART_NUM && \
283 XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM)
284
285#define IS_PIC_XBOW(wid) \
286 (XWIDGET_PART_NUM(wid) == PXBOW_WIDGET_PART_NUM && \
287 XWIDGET_MFG_NUM(wid) == XXBOW_WIDGET_MFGR_NUM)
288
289#define XBOW_WAR_ENABLED(pv, widid) ((1 << XWIDGET_REV_NUM(widid)) & pv)
290
291#endif /* _ASM_IA64_SN_XTALK_XBOW_H */
diff --git a/arch/ia64/sn/include/xtalk/xwidgetdev.h b/arch/ia64/sn/include/xtalk/xwidgetdev.h
new file mode 100644
index 000000000000..c5f4bc5cc033
--- /dev/null
+++ b/arch/ia64/sn/include/xtalk/xwidgetdev.h
@@ -0,0 +1,70 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992-1997,2000-2003 Silicon Graphics, Inc. All Rights Reserved.
7 */
8#ifndef _ASM_IA64_SN_XTALK_XWIDGET_H
9#define _ASM_IA64_SN_XTALK_XWIDGET_H
10
11/* WIDGET_ID */
12#define WIDGET_REV_NUM 0xf0000000
13#define WIDGET_PART_NUM 0x0ffff000
14#define WIDGET_MFG_NUM 0x00000ffe
15#define WIDGET_REV_NUM_SHFT 28
16#define WIDGET_PART_NUM_SHFT 12
17#define WIDGET_MFG_NUM_SHFT 1
18
19#define XWIDGET_PART_NUM(widgetid) (((widgetid) & WIDGET_PART_NUM) >> WIDGET_PART_NUM_SHFT)
20#define XWIDGET_REV_NUM(widgetid) (((widgetid) & WIDGET_REV_NUM) >> WIDGET_REV_NUM_SHFT)
21#define XWIDGET_MFG_NUM(widgetid) (((widgetid) & WIDGET_MFG_NUM) >> WIDGET_MFG_NUM_SHFT)
22#define XWIDGET_PART_REV_NUM(widgetid) ((XWIDGET_PART_NUM(widgetid) << 4) | \
23 XWIDGET_REV_NUM(widgetid))
24#define XWIDGET_PART_REV_NUM_REV(partrev) (partrev & 0xf)
25
26/* widget configuration registers */
27struct widget_cfg{
28 uint32_t w_id; /* 0x04 */
29 uint32_t w_pad_0; /* 0x00 */
30 uint32_t w_status; /* 0x0c */
31 uint32_t w_pad_1; /* 0x08 */
32 uint32_t w_err_upper_addr; /* 0x14 */
33 uint32_t w_pad_2; /* 0x10 */
34 uint32_t w_err_lower_addr; /* 0x1c */
35 uint32_t w_pad_3; /* 0x18 */
36 uint32_t w_control; /* 0x24 */
37 uint32_t w_pad_4; /* 0x20 */
38 uint32_t w_req_timeout; /* 0x2c */
39 uint32_t w_pad_5; /* 0x28 */
40 uint32_t w_intdest_upper_addr; /* 0x34 */
41 uint32_t w_pad_6; /* 0x30 */
42 uint32_t w_intdest_lower_addr; /* 0x3c */
43 uint32_t w_pad_7; /* 0x38 */
44 uint32_t w_err_cmd_word; /* 0x44 */
45 uint32_t w_pad_8; /* 0x40 */
46 uint32_t w_llp_cfg; /* 0x4c */
47 uint32_t w_pad_9; /* 0x48 */
48 uint32_t w_tflush; /* 0x54 */
49 uint32_t w_pad_10; /* 0x50 */
50};
51
52/*
53 * Crosstalk Widget Hardware Identification, as defined in the Crosstalk spec.
54 */
55struct xwidget_hwid{
56 int mfg_num;
57 int rev_num;
58 int part_num;
59};
60
61struct xwidget_info{
62
63 struct xwidget_hwid xwi_hwid; /* Widget Identification */
64 char xwi_masterxid; /* Hub's Widget Port Number */
65 void *xwi_hubinfo; /* Hub's provider private info */
66 uint64_t *xwi_hub_provider; /* prom provider functions */
67 void *xwi_vertex;
68};
69
70#endif /* _ASM_IA64_SN_XTALK_XWIDGET_H */
diff --git a/arch/ia64/sn/kernel/Makefile b/arch/ia64/sn/kernel/Makefile
new file mode 100644
index 000000000000..6c7f4d9e8ea0
--- /dev/null
+++ b/arch/ia64/sn/kernel/Makefile
@@ -0,0 +1,12 @@
1# arch/ia64/sn/kernel/Makefile
2#
3# This file is subject to the terms and conditions of the GNU General Public
4# License. See the file "COPYING" in the main directory of this archive
5# for more details.
6#
7# Copyright (C) 1999,2001-2003 Silicon Graphics, Inc. All Rights Reserved.
8#
9
10obj-y += setup.o bte.o bte_error.o irq.o mca.o idle.o \
11 huberror.o io_init.o iomv.o klconflib.o sn2/
12obj-$(CONFIG_IA64_GENERIC) += machvec.o
diff --git a/arch/ia64/sn/kernel/bte.c b/arch/ia64/sn/kernel/bte.c
new file mode 100644
index 000000000000..ce0bc4085eae
--- /dev/null
+++ b/arch/ia64/sn/kernel/bte.c
@@ -0,0 +1,453 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2000-2003 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9#include <linux/config.h>
10#include <linux/module.h>
11#include <asm/sn/nodepda.h>
12#include <asm/sn/addrs.h>
13#include <asm/sn/arch.h>
14#include <asm/sn/sn_cpuid.h>
15#include <asm/sn/pda.h>
16#include <asm/sn/shubio.h>
17#include <asm/nodedata.h>
18#include <asm/delay.h>
19
20#include <linux/bootmem.h>
21#include <linux/string.h>
22#include <linux/sched.h>
23
24#include <asm/sn/bte.h>
25
26#ifndef L1_CACHE_MASK
27#define L1_CACHE_MASK (L1_CACHE_BYTES - 1)
28#endif
29
30/* two interfaces on two btes */
31#define MAX_INTERFACES_TO_TRY 4
32
33static struct bteinfo_s *bte_if_on_node(nasid_t nasid, int interface)
34{
35 nodepda_t *tmp_nodepda;
36
37 tmp_nodepda = NODEPDA(nasid_to_cnodeid(nasid));
38 return &tmp_nodepda->bte_if[interface];
39
40}
41
42/************************************************************************
43 * Block Transfer Engine copy related functions.
44 *
45 ***********************************************************************/
46
47/*
48 * bte_copy(src, dest, len, mode, notification)
49 *
50 * Use the block transfer engine to move kernel memory from src to dest
51 * using the assigned mode.
52 *
53 * Paramaters:
54 * src - physical address of the transfer source.
55 * dest - physical address of the transfer destination.
56 * len - number of bytes to transfer from source to dest.
57 * mode - hardware defined. See reference information
58 * for IBCT0/1 in the SHUB Programmers Reference
59 * notification - kernel virtual address of the notification cache
60 * line. If NULL, the default is used and
61 * the bte_copy is synchronous.
62 *
63 * NOTE: This function requires src, dest, and len to
64 * be cacheline aligned.
65 */
66bte_result_t bte_copy(u64 src, u64 dest, u64 len, u64 mode, void *notification)
67{
68 u64 transfer_size;
69 u64 transfer_stat;
70 struct bteinfo_s *bte;
71 bte_result_t bte_status;
72 unsigned long irq_flags;
73 unsigned long itc_end = 0;
74 struct bteinfo_s *btes_to_try[MAX_INTERFACES_TO_TRY];
75 int bte_if_index;
76 int bte_pri, bte_sec;
77
78 BTE_PRINTK(("bte_copy(0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%p)\n",
79 src, dest, len, mode, notification));
80
81 if (len == 0) {
82 return BTE_SUCCESS;
83 }
84
85 BUG_ON((len & L1_CACHE_MASK) ||
86 (src & L1_CACHE_MASK) || (dest & L1_CACHE_MASK));
87 BUG_ON(!(len < ((BTE_LEN_MASK + 1) << L1_CACHE_SHIFT)));
88
89 /* CPU 0 (per node) tries bte0 first, CPU 1 try bte1 first */
90 if (cpuid_to_subnode(smp_processor_id()) == 0) {
91 bte_pri = 0;
92 bte_sec = 1;
93 } else {
94 bte_pri = 1;
95 bte_sec = 0;
96 }
97
98 if (mode & BTE_USE_DEST) {
99 /* try remote then local */
100 btes_to_try[0] = bte_if_on_node(NASID_GET(dest), bte_pri);
101 btes_to_try[1] = bte_if_on_node(NASID_GET(dest), bte_sec);
102 if (mode & BTE_USE_ANY) {
103 btes_to_try[2] = bte_if_on_node(get_nasid(), bte_pri);
104 btes_to_try[3] = bte_if_on_node(get_nasid(), bte_sec);
105 } else {
106 btes_to_try[2] = NULL;
107 btes_to_try[3] = NULL;
108 }
109 } else {
110 /* try local then remote */
111 btes_to_try[0] = bte_if_on_node(get_nasid(), bte_pri);
112 btes_to_try[1] = bte_if_on_node(get_nasid(), bte_sec);
113 if (mode & BTE_USE_ANY) {
114 btes_to_try[2] = bte_if_on_node(NASID_GET(dest), bte_pri);
115 btes_to_try[3] = bte_if_on_node(NASID_GET(dest), bte_sec);
116 } else {
117 btes_to_try[2] = NULL;
118 btes_to_try[3] = NULL;
119 }
120 }
121
122retry_bteop:
123 do {
124 local_irq_save(irq_flags);
125
126 bte_if_index = 0;
127
128 /* Attempt to lock one of the BTE interfaces. */
129 while (bte_if_index < MAX_INTERFACES_TO_TRY) {
130 bte = btes_to_try[bte_if_index++];
131
132 if (bte == NULL) {
133 continue;
134 }
135
136 if (spin_trylock(&bte->spinlock)) {
137 if (!(*bte->most_rcnt_na & BTE_WORD_AVAILABLE) ||
138 (BTE_LNSTAT_LOAD(bte) & BTE_ACTIVE)) {
139 /* Got the lock but BTE still busy */
140 spin_unlock(&bte->spinlock);
141 } else {
142 /* we got the lock and it's not busy */
143 break;
144 }
145 }
146 bte = NULL;
147 }
148
149 if (bte != NULL) {
150 break;
151 }
152
153 local_irq_restore(irq_flags);
154
155 if (!(mode & BTE_WACQUIRE)) {
156 return BTEFAIL_NOTAVAIL;
157 }
158 } while (1);
159
160 if (notification == NULL) {
161 /* User does not want to be notified. */
162 bte->most_rcnt_na = &bte->notify;
163 } else {
164 bte->most_rcnt_na = notification;
165 }
166
167 /* Calculate the number of cache lines to transfer. */
168 transfer_size = ((len >> L1_CACHE_SHIFT) & BTE_LEN_MASK);
169
170 /* Initialize the notification to a known value. */
171 *bte->most_rcnt_na = BTE_WORD_BUSY;
172
173 /* Set the status reg busy bit and transfer length */
174 BTE_PRINTKV(("IBLS = 0x%lx\n", IBLS_BUSY | transfer_size));
175 BTE_LNSTAT_STORE(bte, IBLS_BUSY | transfer_size);
176
177 /* Set the source and destination registers */
178 BTE_PRINTKV(("IBSA = 0x%lx)\n", (TO_PHYS(src))));
179 BTE_SRC_STORE(bte, TO_PHYS(src));
180 BTE_PRINTKV(("IBDA = 0x%lx)\n", (TO_PHYS(dest))));
181 BTE_DEST_STORE(bte, TO_PHYS(dest));
182
183 /* Set the notification register */
184 BTE_PRINTKV(("IBNA = 0x%lx)\n",
185 TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na))));
186 BTE_NOTIF_STORE(bte,
187 TO_PHYS(ia64_tpa((unsigned long)bte->most_rcnt_na)));
188
189 /* Initiate the transfer */
190 BTE_PRINTK(("IBCT = 0x%lx)\n", BTE_VALID_MODE(mode)));
191 BTE_CTRL_STORE(bte, BTE_VALID_MODE(mode));
192
193 itc_end = ia64_get_itc() + (40000000 * local_cpu_data->cyc_per_usec);
194
195 spin_unlock_irqrestore(&bte->spinlock, irq_flags);
196
197 if (notification != NULL) {
198 return BTE_SUCCESS;
199 }
200
201 while ((transfer_stat = *bte->most_rcnt_na) == BTE_WORD_BUSY) {
202 if (ia64_get_itc() > itc_end) {
203 BTE_PRINTK(("BTE timeout nasid 0x%x bte%d IBLS = 0x%lx na 0x%lx\n",
204 NASID_GET(bte->bte_base_addr), bte->bte_num,
205 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na) );
206 bte->bte_error_count++;
207 bte->bh_error = IBLS_ERROR;
208 bte_error_handler((unsigned long)NODEPDA(bte->bte_cnode));
209 *bte->most_rcnt_na = BTE_WORD_AVAILABLE;
210 goto retry_bteop;
211 }
212 }
213
214 BTE_PRINTKV((" Delay Done. IBLS = 0x%lx, most_rcnt_na = 0x%lx\n",
215 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
216
217 if (transfer_stat & IBLS_ERROR) {
218 bte_status = transfer_stat & ~IBLS_ERROR;
219 } else {
220 bte_status = BTE_SUCCESS;
221 }
222 *bte->most_rcnt_na = BTE_WORD_AVAILABLE;
223
224 BTE_PRINTK(("Returning status is 0x%lx and most_rcnt_na is 0x%lx\n",
225 BTE_LNSTAT_LOAD(bte), *bte->most_rcnt_na));
226
227 return bte_status;
228}
229
230EXPORT_SYMBOL(bte_copy);
231
232/*
233 * bte_unaligned_copy(src, dest, len, mode)
234 *
235 * use the block transfer engine to move kernel
236 * memory from src to dest using the assigned mode.
237 *
238 * Paramaters:
239 * src - physical address of the transfer source.
240 * dest - physical address of the transfer destination.
241 * len - number of bytes to transfer from source to dest.
242 * mode - hardware defined. See reference information
243 * for IBCT0/1 in the SGI documentation.
244 *
245 * NOTE: If the source, dest, and len are all cache line aligned,
246 * then it would be _FAR_ preferrable to use bte_copy instead.
247 */
248bte_result_t bte_unaligned_copy(u64 src, u64 dest, u64 len, u64 mode)
249{
250 int destFirstCacheOffset;
251 u64 headBteSource;
252 u64 headBteLen;
253 u64 headBcopySrcOffset;
254 u64 headBcopyDest;
255 u64 headBcopyLen;
256 u64 footBteSource;
257 u64 footBteLen;
258 u64 footBcopyDest;
259 u64 footBcopyLen;
260 bte_result_t rv;
261 char *bteBlock, *bteBlock_unaligned;
262
263 if (len == 0) {
264 return BTE_SUCCESS;
265 }
266
267 /* temporary buffer used during unaligned transfers */
268 bteBlock_unaligned = kmalloc(len + 3 * L1_CACHE_BYTES,
269 GFP_KERNEL | GFP_DMA);
270 if (bteBlock_unaligned == NULL) {
271 return BTEFAIL_NOTAVAIL;
272 }
273 bteBlock = (char *)L1_CACHE_ALIGN((u64) bteBlock_unaligned);
274
275 headBcopySrcOffset = src & L1_CACHE_MASK;
276 destFirstCacheOffset = dest & L1_CACHE_MASK;
277
278 /*
279 * At this point, the transfer is broken into
280 * (up to) three sections. The first section is
281 * from the start address to the first physical
282 * cache line, the second is from the first physical
283 * cache line to the last complete cache line,
284 * and the third is from the last cache line to the
285 * end of the buffer. The first and third sections
286 * are handled by bte copying into a temporary buffer
287 * and then bcopy'ing the necessary section into the
288 * final location. The middle section is handled with
289 * a standard bte copy.
290 *
291 * One nasty exception to the above rule is when the
292 * source and destination are not symetrically
293 * mis-aligned. If the source offset from the first
294 * cache line is different from the destination offset,
295 * we make the first section be the entire transfer
296 * and the bcopy the entire block into place.
297 */
298 if (headBcopySrcOffset == destFirstCacheOffset) {
299
300 /*
301 * Both the source and destination are the same
302 * distance from a cache line boundary so we can
303 * use the bte to transfer the bulk of the
304 * data.
305 */
306 headBteSource = src & ~L1_CACHE_MASK;
307 headBcopyDest = dest;
308 if (headBcopySrcOffset) {
309 headBcopyLen =
310 (len >
311 (L1_CACHE_BYTES -
312 headBcopySrcOffset) ? L1_CACHE_BYTES
313 - headBcopySrcOffset : len);
314 headBteLen = L1_CACHE_BYTES;
315 } else {
316 headBcopyLen = 0;
317 headBteLen = 0;
318 }
319
320 if (len > headBcopyLen) {
321 footBcopyLen = (len - headBcopyLen) & L1_CACHE_MASK;
322 footBteLen = L1_CACHE_BYTES;
323
324 footBteSource = src + len - footBcopyLen;
325 footBcopyDest = dest + len - footBcopyLen;
326
327 if (footBcopyDest == (headBcopyDest + headBcopyLen)) {
328 /*
329 * We have two contigous bcopy
330 * blocks. Merge them.
331 */
332 headBcopyLen += footBcopyLen;
333 headBteLen += footBteLen;
334 } else if (footBcopyLen > 0) {
335 rv = bte_copy(footBteSource,
336 ia64_tpa((unsigned long)bteBlock),
337 footBteLen, mode, NULL);
338 if (rv != BTE_SUCCESS) {
339 kfree(bteBlock_unaligned);
340 return rv;
341 }
342
343 memcpy(__va(footBcopyDest),
344 (char *)bteBlock, footBcopyLen);
345 }
346 } else {
347 footBcopyLen = 0;
348 footBteLen = 0;
349 }
350
351 if (len > (headBcopyLen + footBcopyLen)) {
352 /* now transfer the middle. */
353 rv = bte_copy((src + headBcopyLen),
354 (dest +
355 headBcopyLen),
356 (len - headBcopyLen -
357 footBcopyLen), mode, NULL);
358 if (rv != BTE_SUCCESS) {
359 kfree(bteBlock_unaligned);
360 return rv;
361 }
362
363 }
364 } else {
365
366 /*
367 * The transfer is not symetric, we will
368 * allocate a buffer large enough for all the
369 * data, bte_copy into that buffer and then
370 * bcopy to the destination.
371 */
372
373 /* Add the leader from source */
374 headBteLen = len + (src & L1_CACHE_MASK);
375 /* Add the trailing bytes from footer. */
376 headBteLen += L1_CACHE_BYTES - (headBteLen & L1_CACHE_MASK);
377 headBteSource = src & ~L1_CACHE_MASK;
378 headBcopySrcOffset = src & L1_CACHE_MASK;
379 headBcopyDest = dest;
380 headBcopyLen = len;
381 }
382
383 if (headBcopyLen > 0) {
384 rv = bte_copy(headBteSource,
385 ia64_tpa((unsigned long)bteBlock), headBteLen,
386 mode, NULL);
387 if (rv != BTE_SUCCESS) {
388 kfree(bteBlock_unaligned);
389 return rv;
390 }
391
392 memcpy(__va(headBcopyDest), ((char *)bteBlock +
393 headBcopySrcOffset), headBcopyLen);
394 }
395 kfree(bteBlock_unaligned);
396 return BTE_SUCCESS;
397}
398
399EXPORT_SYMBOL(bte_unaligned_copy);
400
401/************************************************************************
402 * Block Transfer Engine initialization functions.
403 *
404 ***********************************************************************/
405
406/*
407 * bte_init_node(nodepda, cnode)
408 *
409 * Initialize the nodepda structure with BTE base addresses and
410 * spinlocks.
411 */
412void bte_init_node(nodepda_t * mynodepda, cnodeid_t cnode)
413{
414 int i;
415
416 /*
417 * Indicate that all the block transfer engines on this node
418 * are available.
419 */
420
421 /*
422 * Allocate one bte_recover_t structure per node. It holds
423 * the recovery lock for node. All the bte interface structures
424 * will point at this one bte_recover structure to get the lock.
425 */
426 spin_lock_init(&mynodepda->bte_recovery_lock);
427 init_timer(&mynodepda->bte_recovery_timer);
428 mynodepda->bte_recovery_timer.function = bte_error_handler;
429 mynodepda->bte_recovery_timer.data = (unsigned long)mynodepda;
430
431 for (i = 0; i < BTES_PER_NODE; i++) {
432 /* Which link status register should we use? */
433 unsigned long link_status = (i == 0 ? IIO_IBLS0 : IIO_IBLS1);
434 mynodepda->bte_if[i].bte_base_addr = (u64 *)
435 REMOTE_HUB_ADDR(cnodeid_to_nasid(cnode), link_status);
436
437 /*
438 * Initialize the notification and spinlock
439 * so the first transfer can occur.
440 */
441 mynodepda->bte_if[i].most_rcnt_na =
442 &(mynodepda->bte_if[i].notify);
443 mynodepda->bte_if[i].notify = BTE_WORD_AVAILABLE;
444 spin_lock_init(&mynodepda->bte_if[i].spinlock);
445
446 mynodepda->bte_if[i].bte_cnode = cnode;
447 mynodepda->bte_if[i].bte_error_count = 0;
448 mynodepda->bte_if[i].bte_num = i;
449 mynodepda->bte_if[i].cleanup_active = 0;
450 mynodepda->bte_if[i].bh_error = 0;
451 }
452
453}
diff --git a/arch/ia64/sn/kernel/bte_error.c b/arch/ia64/sn/kernel/bte_error.c
new file mode 100644
index 000000000000..fd104312c6bd
--- /dev/null
+++ b/arch/ia64/sn/kernel/bte_error.c
@@ -0,0 +1,198 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9#include <linux/types.h>
10#include <asm/sn/sn_sal.h>
11#include "ioerror.h"
12#include <asm/sn/addrs.h>
13#include <asm/sn/shubio.h>
14#include <asm/sn/geo.h>
15#include "xtalk/xwidgetdev.h"
16#include "xtalk/hubdev.h"
17#include <asm/sn/bte.h>
18#include <asm/param.h>
19
20/*
21 * Bte error handling is done in two parts. The first captures
22 * any crb related errors. Since there can be multiple crbs per
23 * interface and multiple interfaces active, we need to wait until
24 * all active crbs are completed. This is the first job of the
25 * second part error handler. When all bte related CRBs are cleanly
26 * completed, it resets the interfaces and gets them ready for new
27 * transfers to be queued.
28 */
29
30void bte_error_handler(unsigned long);
31
32/*
33 * Wait until all BTE related CRBs are completed
34 * and then reset the interfaces.
35 */
36void bte_error_handler(unsigned long _nodepda)
37{
38 struct nodepda_s *err_nodepda = (struct nodepda_s *)_nodepda;
39 spinlock_t *recovery_lock = &err_nodepda->bte_recovery_lock;
40 struct timer_list *recovery_timer = &err_nodepda->bte_recovery_timer;
41 nasid_t nasid;
42 int i;
43 int valid_crbs;
44 unsigned long irq_flags;
45 volatile u64 *notify;
46 bte_result_t bh_error;
47 ii_imem_u_t imem; /* II IMEM Register */
48 ii_icrb0_d_u_t icrbd; /* II CRB Register D */
49 ii_ibcr_u_t ibcr;
50 ii_icmr_u_t icmr;
51 ii_ieclr_u_t ieclr;
52
53 BTE_PRINTK(("bte_error_handler(%p) - %d\n", err_nodepda,
54 smp_processor_id()));
55
56 spin_lock_irqsave(recovery_lock, irq_flags);
57
58 if ((err_nodepda->bte_if[0].bh_error == BTE_SUCCESS) &&
59 (err_nodepda->bte_if[1].bh_error == BTE_SUCCESS)) {
60 BTE_PRINTK(("eh:%p:%d Nothing to do.\n", err_nodepda,
61 smp_processor_id()));
62 spin_unlock_irqrestore(recovery_lock, irq_flags);
63 return;
64 }
65 /*
66 * Lock all interfaces on this node to prevent new transfers
67 * from being queued.
68 */
69 for (i = 0; i < BTES_PER_NODE; i++) {
70 if (err_nodepda->bte_if[i].cleanup_active) {
71 continue;
72 }
73 spin_lock(&err_nodepda->bte_if[i].spinlock);
74 BTE_PRINTK(("eh:%p:%d locked %d\n", err_nodepda,
75 smp_processor_id(), i));
76 err_nodepda->bte_if[i].cleanup_active = 1;
77 }
78
79 /* Determine information about our hub */
80 nasid = cnodeid_to_nasid(err_nodepda->bte_if[0].bte_cnode);
81
82 /*
83 * A BTE transfer can use multiple CRBs. We need to make sure
84 * that all the BTE CRBs are complete (or timed out) before
85 * attempting to clean up the error. Resetting the BTE while
86 * there are still BTE CRBs active will hang the BTE.
87 * We should look at all the CRBs to see if they are allocated
88 * to the BTE and see if they are still active. When none
89 * are active, we can continue with the cleanup.
90 *
91 * We also want to make sure that the local NI port is up.
92 * When a router resets the NI port can go down, while it
93 * goes through the LLP handshake, but then comes back up.
94 */
95 icmr.ii_icmr_regval = REMOTE_HUB_L(nasid, IIO_ICMR);
96 if (icmr.ii_icmr_fld_s.i_crb_mark != 0) {
97 /*
98 * There are errors which still need to be cleaned up by
99 * hubiio_crb_error_handler
100 */
101 mod_timer(recovery_timer, HZ * 5);
102 BTE_PRINTK(("eh:%p:%d Marked Giving up\n", err_nodepda,
103 smp_processor_id()));
104 spin_unlock_irqrestore(recovery_lock, irq_flags);
105 return;
106 }
107 if (icmr.ii_icmr_fld_s.i_crb_vld != 0) {
108
109 valid_crbs = icmr.ii_icmr_fld_s.i_crb_vld;
110
111 for (i = 0; i < IIO_NUM_CRBS; i++) {
112 if (!((1 << i) & valid_crbs)) {
113 /* This crb was not marked as valid, ignore */
114 continue;
115 }
116 icrbd.ii_icrb0_d_regval =
117 REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
118 if (icrbd.d_bteop) {
119 mod_timer(recovery_timer, HZ * 5);
120 BTE_PRINTK(("eh:%p:%d Valid %d, Giving up\n",
121 err_nodepda, smp_processor_id(),
122 i));
123 spin_unlock_irqrestore(recovery_lock,
124 irq_flags);
125 return;
126 }
127 }
128 }
129
130 BTE_PRINTK(("eh:%p:%d Cleaning up\n", err_nodepda, smp_processor_id()));
131 /* Reenable both bte interfaces */
132 imem.ii_imem_regval = REMOTE_HUB_L(nasid, IIO_IMEM);
133 imem.ii_imem_fld_s.i_b0_esd = imem.ii_imem_fld_s.i_b1_esd = 1;
134 REMOTE_HUB_S(nasid, IIO_IMEM, imem.ii_imem_regval);
135
136 /* Clear BTE0/1 error bits */
137 ieclr.ii_ieclr_regval = 0;
138 if (err_nodepda->bte_if[0].bh_error != BTE_SUCCESS)
139 ieclr.ii_ieclr_fld_s.i_e_bte_0 = 1;
140 if (err_nodepda->bte_if[1].bh_error != BTE_SUCCESS)
141 ieclr.ii_ieclr_fld_s.i_e_bte_1 = 1;
142 REMOTE_HUB_S(nasid, IIO_IECLR, ieclr.ii_ieclr_regval);
143
144 /* Reinitialize both BTE state machines. */
145 ibcr.ii_ibcr_regval = REMOTE_HUB_L(nasid, IIO_IBCR);
146 ibcr.ii_ibcr_fld_s.i_soft_reset = 1;
147 REMOTE_HUB_S(nasid, IIO_IBCR, ibcr.ii_ibcr_regval);
148
149 for (i = 0; i < BTES_PER_NODE; i++) {
150 bh_error = err_nodepda->bte_if[i].bh_error;
151 if (bh_error != BTE_SUCCESS) {
152 /* There is an error which needs to be notified */
153 notify = err_nodepda->bte_if[i].most_rcnt_na;
154 BTE_PRINTK(("cnode %d bte %d error=0x%lx\n",
155 err_nodepda->bte_if[i].bte_cnode,
156 err_nodepda->bte_if[i].bte_num,
157 IBLS_ERROR | (u64) bh_error));
158 *notify = IBLS_ERROR | bh_error;
159 err_nodepda->bte_if[i].bh_error = BTE_SUCCESS;
160 }
161
162 err_nodepda->bte_if[i].cleanup_active = 0;
163 BTE_PRINTK(("eh:%p:%d Unlocked %d\n", err_nodepda,
164 smp_processor_id(), i));
165 spin_unlock(&err_nodepda->bte_if[i].spinlock);
166 }
167
168 del_timer(recovery_timer);
169
170 spin_unlock_irqrestore(recovery_lock, irq_flags);
171}
172
173/*
174 * First part error handler. This is called whenever any error CRB interrupt
175 * is generated by the II.
176 */
177void
178bte_crb_error_handler(cnodeid_t cnode, int btenum,
179 int crbnum, ioerror_t * ioe, int bteop)
180{
181 struct bteinfo_s *bte;
182
183
184 bte = &(NODEPDA(cnode)->bte_if[btenum]);
185
186 /*
187 * The caller has already figured out the error type, we save that
188 * in the bte handle structure for the thread excercising the
189 * interface to consume.
190 */
191 bte->bh_error = ioe->ie_errortype + BTEFAIL_OFFSET;
192 bte->bte_error_count++;
193
194 BTE_PRINTK(("Got an error on cnode %d bte %d: HW error type 0x%x\n",
195 bte->bte_cnode, bte->bte_num, ioe->ie_errortype));
196 bte_error_handler((unsigned long) NODEPDA(cnode));
197}
198
diff --git a/arch/ia64/sn/kernel/huberror.c b/arch/ia64/sn/kernel/huberror.c
new file mode 100644
index 000000000000..2bdf684c5066
--- /dev/null
+++ b/arch/ia64/sn/kernel/huberror.c
@@ -0,0 +1,201 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000,2002-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/types.h>
10#include <linux/interrupt.h>
11#include <linux/pci.h>
12#include <asm/delay.h>
13#include <asm/sn/sn_sal.h>
14#include "ioerror.h"
15#include <asm/sn/addrs.h>
16#include <asm/sn/shubio.h>
17#include <asm/sn/geo.h>
18#include "xtalk/xwidgetdev.h"
19#include "xtalk/hubdev.h"
20#include <asm/sn/bte.h>
21
22void hubiio_crb_error_handler(struct hubdev_info *hubdev_info);
23extern void bte_crb_error_handler(cnodeid_t, int, int, ioerror_t *,
24 int);
25static irqreturn_t hub_eint_handler(int irq, void *arg, struct pt_regs *ep)
26{
27 struct hubdev_info *hubdev_info;
28 struct ia64_sal_retval ret_stuff;
29 nasid_t nasid;
30
31 ret_stuff.status = 0;
32 ret_stuff.v0 = 0;
33 hubdev_info = (struct hubdev_info *)arg;
34 nasid = hubdev_info->hdi_nasid;
35 SAL_CALL_NOLOCK(ret_stuff, SN_SAL_HUB_ERROR_INTERRUPT,
36 (u64) nasid, 0, 0, 0, 0, 0, 0);
37
38 if ((int)ret_stuff.v0)
39 panic("hubii_eint_handler(): Fatal TIO Error");
40
41 if (!(nasid & 1)) /* Not a TIO, handle CRB errors */
42 (void)hubiio_crb_error_handler(hubdev_info);
43
44 return IRQ_HANDLED;
45}
46
47/*
48 * Free the hub CRB "crbnum" which encountered an error.
49 * Assumption is, error handling was successfully done,
50 * and we now want to return the CRB back to Hub for normal usage.
51 *
52 * In order to free the CRB, all that's needed is to de-allocate it
53 *
54 * Assumption:
55 * No other processor is mucking around with the hub control register.
56 * So, upper layer has to single thread this.
57 */
58void hubiio_crb_free(struct hubdev_info *hubdev_info, int crbnum)
59{
60 ii_icrb0_b_u_t icrbb;
61
62 /*
63 * The hardware does NOT clear the mark bit, so it must get cleared
64 * here to be sure the error is not processed twice.
65 */
66 icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(hubdev_info->hdi_nasid,
67 IIO_ICRB_B(crbnum));
68 icrbb.b_mark = 0;
69 REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICRB_B(crbnum),
70 icrbb.ii_icrb0_b_regval);
71 /*
72 * Deallocate the register wait till hub indicates it's done.
73 */
74 REMOTE_HUB_S(hubdev_info->hdi_nasid, IIO_ICDR, (IIO_ICDR_PND | crbnum));
75 while (REMOTE_HUB_L(hubdev_info->hdi_nasid, IIO_ICDR) & IIO_ICDR_PND)
76 udelay(1);
77
78}
79
80/*
81 * hubiio_crb_error_handler
82 *
83 * This routine gets invoked when a hub gets an error
84 * interrupt. So, the routine is running in interrupt context
85 * at error interrupt level.
86 * Action:
87 * It's responsible for identifying ALL the CRBs that are marked
88 * with error, and process them.
89 *
90 * If you find the CRB that's marked with error, map this to the
91 * reason it caused error, and invoke appropriate error handler.
92 *
93 * XXX Be aware of the information in the context register.
94 *
95 * NOTE:
96 * Use REMOTE_HUB_* macro instead of LOCAL_HUB_* so that the interrupt
97 * handler can be run on any node. (not necessarily the node
98 * corresponding to the hub that encountered error).
99 */
100
101void hubiio_crb_error_handler(struct hubdev_info *hubdev_info)
102{
103 nasid_t nasid;
104 ii_icrb0_a_u_t icrba; /* II CRB Register A */
105 ii_icrb0_b_u_t icrbb; /* II CRB Register B */
106 ii_icrb0_c_u_t icrbc; /* II CRB Register C */
107 ii_icrb0_d_u_t icrbd; /* II CRB Register D */
108 ii_icrb0_e_u_t icrbe; /* II CRB Register D */
109 int i;
110 int num_errors = 0; /* Num of errors handled */
111 ioerror_t ioerror;
112
113 nasid = hubdev_info->hdi_nasid;
114
115 /*
116 * XXX - Add locking for any recovery actions
117 */
118 /*
119 * Scan through all CRBs in the Hub, and handle the errors
120 * in any of the CRBs marked.
121 */
122 for (i = 0; i < IIO_NUM_CRBS; i++) {
123 /* Check this crb entry to see if it is in error. */
124 icrbb.ii_icrb0_b_regval = REMOTE_HUB_L(nasid, IIO_ICRB_B(i));
125
126 if (icrbb.b_mark == 0) {
127 continue;
128 }
129
130 icrba.ii_icrb0_a_regval = REMOTE_HUB_L(nasid, IIO_ICRB_A(i));
131
132 IOERROR_INIT(&ioerror);
133
134 /* read other CRB error registers. */
135 icrbc.ii_icrb0_c_regval = REMOTE_HUB_L(nasid, IIO_ICRB_C(i));
136 icrbd.ii_icrb0_d_regval = REMOTE_HUB_L(nasid, IIO_ICRB_D(i));
137 icrbe.ii_icrb0_e_regval = REMOTE_HUB_L(nasid, IIO_ICRB_E(i));
138
139 IOERROR_SETVALUE(&ioerror, errortype, icrbb.b_ecode);
140
141 /* Check if this error is due to BTE operation,
142 * and handle it separately.
143 */
144 if (icrbd.d_bteop ||
145 ((icrbb.b_initiator == IIO_ICRB_INIT_BTE0 ||
146 icrbb.b_initiator == IIO_ICRB_INIT_BTE1) &&
147 (icrbb.b_imsgtype == IIO_ICRB_IMSGT_BTE ||
148 icrbb.b_imsgtype == IIO_ICRB_IMSGT_SN1NET))) {
149
150 int bte_num;
151
152 if (icrbd.d_bteop)
153 bte_num = icrbc.c_btenum;
154 else /* b_initiator bit 2 gives BTE number */
155 bte_num = (icrbb.b_initiator & 0x4) >> 2;
156
157 hubiio_crb_free(hubdev_info, i);
158
159 bte_crb_error_handler(nasid_to_cnodeid(nasid), bte_num,
160 i, &ioerror, icrbd.d_bteop);
161 num_errors++;
162 continue;
163 }
164 }
165}
166
167/*
168 * Function : hub_error_init
169 * Purpose : initialize the error handling requirements for a given hub.
170 * Parameters : cnode, the compact nodeid.
171 * Assumptions : Called only once per hub, either by a local cpu. Or by a
172 * remote cpu, when this hub is headless.(cpuless)
173 * Returns : None
174 */
175void hub_error_init(struct hubdev_info *hubdev_info)
176{
177 if (request_irq(SGI_II_ERROR, (void *)hub_eint_handler, SA_SHIRQ,
178 "SN_hub_error", (void *)hubdev_info))
179 printk("hub_error_init: Failed to request_irq for 0x%p\n",
180 hubdev_info);
181 return;
182}
183
184
185/*
186 * Function : ice_error_init
187 * Purpose : initialize the error handling requirements for a given tio.
188 * Parameters : cnode, the compact nodeid.
189 * Assumptions : Called only once per tio.
190 * Returns : None
191 */
192void ice_error_init(struct hubdev_info *hubdev_info)
193{
194 if (request_irq
195 (SGI_TIO_ERROR, (void *)hub_eint_handler, SA_SHIRQ, "SN_TIO_error",
196 (void *)hubdev_info))
197 printk("ice_error_init: request_irq() error hubdev_info 0x%p\n",
198 hubdev_info);
199 return;
200}
201
diff --git a/arch/ia64/sn/kernel/idle.c b/arch/ia64/sn/kernel/idle.c
new file mode 100644
index 000000000000..49d178f022b5
--- /dev/null
+++ b/arch/ia64/sn/kernel/idle.c
@@ -0,0 +1,30 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2001-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <asm/sn/leds.h>
10
11void snidle(int state)
12{
13 if (state) {
14 if (pda->idle_flag == 0) {
15 /*
16 * Turn the activity LED off.
17 */
18 set_led_bits(0, LED_CPU_ACTIVITY);
19 }
20
21 pda->idle_flag = 1;
22 } else {
23 /*
24 * Turn the activity LED on.
25 */
26 set_led_bits(LED_CPU_ACTIVITY, LED_CPU_ACTIVITY);
27
28 pda->idle_flag = 0;
29 }
30}
diff --git a/arch/ia64/sn/kernel/io_init.c b/arch/ia64/sn/kernel/io_init.c
new file mode 100644
index 000000000000..001880812b7c
--- /dev/null
+++ b/arch/ia64/sn/kernel/io_init.c
@@ -0,0 +1,411 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/bootmem.h>
10#include <linux/nodemask.h>
11#include <asm/sn/types.h>
12#include <asm/sn/sn_sal.h>
13#include <asm/sn/addrs.h>
14#include "pci/pcibus_provider_defs.h"
15#include "pci/pcidev.h"
16#include "pci/pcibr_provider.h"
17#include "xtalk/xwidgetdev.h"
18#include <asm/sn/geo.h>
19#include "xtalk/hubdev.h"
20#include <asm/sn/io.h>
21#include <asm/sn/simulator.h>
22
23char master_baseio_wid;
24nasid_t master_nasid = INVALID_NASID; /* Partition Master */
25
26struct slab_info {
27 struct hubdev_info hubdev;
28};
29
30struct brick {
31 moduleid_t id; /* Module ID of this module */
32 struct slab_info slab_info[MAX_SLABS + 1];
33};
34
35int sn_ioif_inited = 0; /* SN I/O infrastructure initialized? */
36
37/*
38 * Retrieve the DMA Flush List given nasid. This list is needed
39 * to implement the WAR - Flush DMA data on PIO Reads.
40 */
41static inline uint64_t
42sal_get_widget_dmaflush_list(u64 nasid, u64 widget_num, u64 address)
43{
44
45 struct ia64_sal_retval ret_stuff;
46 ret_stuff.status = 0;
47 ret_stuff.v0 = 0;
48
49 SAL_CALL_NOLOCK(ret_stuff,
50 (u64) SN_SAL_IOIF_GET_WIDGET_DMAFLUSH_LIST,
51 (u64) nasid, (u64) widget_num, (u64) address, 0, 0, 0,
52 0);
53 return ret_stuff.v0;
54
55}
56
57/*
58 * Retrieve the hub device info structure for the given nasid.
59 */
60static inline uint64_t sal_get_hubdev_info(u64 handle, u64 address)
61{
62
63 struct ia64_sal_retval ret_stuff;
64 ret_stuff.status = 0;
65 ret_stuff.v0 = 0;
66
67 SAL_CALL_NOLOCK(ret_stuff,
68 (u64) SN_SAL_IOIF_GET_HUBDEV_INFO,
69 (u64) handle, (u64) address, 0, 0, 0, 0, 0);
70 return ret_stuff.v0;
71}
72
73/*
74 * Retrieve the pci bus information given the bus number.
75 */
76static inline uint64_t sal_get_pcibus_info(u64 segment, u64 busnum, u64 address)
77{
78
79 struct ia64_sal_retval ret_stuff;
80 ret_stuff.status = 0;
81 ret_stuff.v0 = 0;
82
83 SAL_CALL_NOLOCK(ret_stuff,
84 (u64) SN_SAL_IOIF_GET_PCIBUS_INFO,
85 (u64) segment, (u64) busnum, (u64) address, 0, 0, 0, 0);
86 return ret_stuff.v0;
87}
88
89/*
90 * Retrieve the pci device information given the bus and device|function number.
91 */
92static inline uint64_t
93sal_get_pcidev_info(u64 segment, u64 bus_number, u64 devfn, u64 pci_dev,
94 u64 sn_irq_info)
95{
96 struct ia64_sal_retval ret_stuff;
97 ret_stuff.status = 0;
98 ret_stuff.v0 = 0;
99
100 SAL_CALL_NOLOCK(ret_stuff,
101 (u64) SN_SAL_IOIF_GET_PCIDEV_INFO,
102 (u64) segment, (u64) bus_number, (u64) devfn,
103 (u64) pci_dev,
104 sn_irq_info, 0, 0);
105 return ret_stuff.v0;
106}
107
108/*
109 * sn_alloc_pci_sysdata() - This routine allocates a pci controller
110 * which is expected as the pci_dev and pci_bus sysdata by the Linux
111 * PCI infrastructure.
112 */
113static inline struct pci_controller *sn_alloc_pci_sysdata(void)
114{
115 struct pci_controller *pci_sysdata;
116
117 pci_sysdata = kmalloc(sizeof(*pci_sysdata), GFP_KERNEL);
118 if (!pci_sysdata)
119 BUG();
120
121 memset(pci_sysdata, 0, sizeof(*pci_sysdata));
122 return pci_sysdata;
123}
124
125/*
126 * sn_fixup_ionodes() - This routine initializes the HUB data strcuture for
127 * each node in the system.
128 */
129static void sn_fixup_ionodes(void)
130{
131
132 struct sn_flush_device_list *sn_flush_device_list;
133 struct hubdev_info *hubdev;
134 uint64_t status;
135 uint64_t nasid;
136 int i, widget;
137
138 for (i = 0; i < numionodes; i++) {
139 hubdev = (struct hubdev_info *)(NODEPDA(i)->pdinfo);
140 nasid = cnodeid_to_nasid(i);
141 status = sal_get_hubdev_info(nasid, (uint64_t) __pa(hubdev));
142 if (status)
143 continue;
144
145 for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++)
146 hubdev->hdi_xwidget_info[widget].xwi_hubinfo = hubdev;
147
148 if (!hubdev->hdi_flush_nasid_list.widget_p)
149 continue;
150
151 hubdev->hdi_flush_nasid_list.widget_p =
152 kmalloc((HUB_WIDGET_ID_MAX + 1) *
153 sizeof(struct sn_flush_device_list *), GFP_KERNEL);
154
155 memset(hubdev->hdi_flush_nasid_list.widget_p, 0x0,
156 (HUB_WIDGET_ID_MAX + 1) *
157 sizeof(struct sn_flush_device_list *));
158
159 for (widget = 0; widget <= HUB_WIDGET_ID_MAX; widget++) {
160 sn_flush_device_list = kmalloc(DEV_PER_WIDGET *
161 sizeof(struct
162 sn_flush_device_list),
163 GFP_KERNEL);
164 memset(sn_flush_device_list, 0x0,
165 DEV_PER_WIDGET *
166 sizeof(struct sn_flush_device_list));
167
168 status =
169 sal_get_widget_dmaflush_list(nasid, widget,
170 (uint64_t)
171 __pa
172 (sn_flush_device_list));
173 if (status) {
174 kfree(sn_flush_device_list);
175 continue;
176 }
177
178 hubdev->hdi_flush_nasid_list.widget_p[widget] =
179 sn_flush_device_list;
180 }
181
182 if (!(i & 1))
183 hub_error_init(hubdev);
184 else
185 ice_error_init(hubdev);
186 }
187
188}
189
190/*
191 * sn_pci_fixup_slot() - This routine sets up a slot's resources
192 * consistent with the Linux PCI abstraction layer. Resources acquired
193 * from our PCI provider include PIO maps to BAR space and interrupt
194 * objects.
195 */
196static void sn_pci_fixup_slot(struct pci_dev *dev)
197{
198 int idx;
199 int segment = 0;
200 uint64_t size;
201 struct sn_irq_info *sn_irq_info;
202 struct pci_dev *host_pci_dev;
203 int status = 0;
204
205 dev->sysdata = kmalloc(sizeof(struct pcidev_info), GFP_KERNEL);
206 if (SN_PCIDEV_INFO(dev) <= 0)
207 BUG(); /* Cannot afford to run out of memory */
208 memset(SN_PCIDEV_INFO(dev), 0, sizeof(struct pcidev_info));
209
210 sn_irq_info = kmalloc(sizeof(struct sn_irq_info), GFP_KERNEL);
211 if (sn_irq_info <= 0)
212 BUG(); /* Cannot afford to run out of memory */
213 memset(sn_irq_info, 0, sizeof(struct sn_irq_info));
214
215 /* Call to retrieve pci device information needed by kernel. */
216 status = sal_get_pcidev_info((u64) segment, (u64) dev->bus->number,
217 dev->devfn,
218 (u64) __pa(SN_PCIDEV_INFO(dev)),
219 (u64) __pa(sn_irq_info));
220 if (status)
221 BUG(); /* Cannot get platform pci device information information */
222
223 /* Copy over PIO Mapped Addresses */
224 for (idx = 0; idx <= PCI_ROM_RESOURCE; idx++) {
225 unsigned long start, end, addr;
226
227 if (!SN_PCIDEV_INFO(dev)->pdi_pio_mapped_addr[idx])
228 continue;
229
230 start = dev->resource[idx].start;
231 end = dev->resource[idx].end;
232 size = end - start;
233 addr = SN_PCIDEV_INFO(dev)->pdi_pio_mapped_addr[idx];
234 addr = ((addr << 4) >> 4) | __IA64_UNCACHED_OFFSET;
235 dev->resource[idx].start = addr;
236 dev->resource[idx].end = addr + size;
237 if (dev->resource[idx].flags & IORESOURCE_IO)
238 dev->resource[idx].parent = &ioport_resource;
239 else
240 dev->resource[idx].parent = &iomem_resource;
241 }
242
243 /* set up host bus linkages */
244 host_pci_dev =
245 pci_find_slot(SN_PCIDEV_INFO(dev)->pdi_slot_host_handle >> 32,
246 SN_PCIDEV_INFO(dev)->
247 pdi_slot_host_handle & 0xffffffff);
248 SN_PCIDEV_INFO(dev)->pdi_host_pcidev_info =
249 SN_PCIDEV_INFO(host_pci_dev);
250 SN_PCIDEV_INFO(dev)->pdi_linux_pcidev = dev;
251 SN_PCIDEV_INFO(dev)->pdi_pcibus_info = SN_PCIBUS_BUSSOFT(dev->bus);
252
253 /* Only set up IRQ stuff if this device has a host bus context */
254 if (SN_PCIDEV_BUSSOFT(dev) && sn_irq_info->irq_irq) {
255 SN_PCIDEV_INFO(dev)->pdi_sn_irq_info = sn_irq_info;
256 dev->irq = SN_PCIDEV_INFO(dev)->pdi_sn_irq_info->irq_irq;
257 sn_irq_fixup(dev, sn_irq_info);
258 }
259}
260
261/*
262 * sn_pci_controller_fixup() - This routine sets up a bus's resources
263 * consistent with the Linux PCI abstraction layer.
264 */
265static void sn_pci_controller_fixup(int segment, int busnum)
266{
267 int status = 0;
268 int nasid, cnode;
269 struct pci_bus *bus;
270 struct pci_controller *controller;
271 struct pcibus_bussoft *prom_bussoft_ptr;
272 struct hubdev_info *hubdev_info;
273 void *provider_soft;
274
275 status =
276 sal_get_pcibus_info((u64) segment, (u64) busnum,
277 (u64) ia64_tpa(&prom_bussoft_ptr));
278 if (status > 0) {
279 return; /* bus # does not exist */
280 }
281
282 prom_bussoft_ptr = __va(prom_bussoft_ptr);
283 controller = sn_alloc_pci_sysdata();
284 /* controller non-zero is BUG'd in sn_alloc_pci_sysdata */
285
286 bus = pci_scan_bus(busnum, &pci_root_ops, controller);
287 if (bus == NULL) {
288 return; /* error, or bus already scanned */
289 }
290
291 /*
292 * Per-provider fixup. Copies the contents from prom to local
293 * area and links SN_PCIBUS_BUSSOFT().
294 *
295 * Note: Provider is responsible for ensuring that prom_bussoft_ptr
296 * represents an asic-type that it can handle.
297 */
298
299 if (prom_bussoft_ptr->bs_asic_type == PCIIO_ASIC_TYPE_PPB) {
300 return; /* no further fixup necessary */
301 }
302
303 provider_soft = pcibr_bus_fixup(prom_bussoft_ptr);
304 if (provider_soft == NULL) {
305 return; /* fixup failed or not applicable */
306 }
307
308 /*
309 * Generic bus fixup goes here. Don't reference prom_bussoft_ptr
310 * after this point.
311 */
312
313 bus->sysdata = controller;
314 PCI_CONTROLLER(bus)->platform_data = provider_soft;
315
316 nasid = NASID_GET(SN_PCIBUS_BUSSOFT(bus)->bs_base);
317 cnode = nasid_to_cnodeid(nasid);
318 hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
319 SN_PCIBUS_BUSSOFT(bus)->bs_xwidget_info =
320 &(hubdev_info->hdi_xwidget_info[SN_PCIBUS_BUSSOFT(bus)->bs_xid]);
321}
322
323/*
324 * Ugly hack to get PCI setup until we have a proper ACPI namespace.
325 */
326
327#define PCI_BUSES_TO_SCAN 256
328
329static int __init sn_pci_init(void)
330{
331 int i = 0;
332 struct pci_dev *pci_dev = NULL;
333 extern void sn_init_cpei_timer(void);
334#ifdef CONFIG_PROC_FS
335 extern void register_sn_procfs(void);
336#endif
337
338 if (!ia64_platform_is("sn2") || IS_RUNNING_ON_SIMULATOR())
339 return 0;
340
341 /*
342 * This is needed to avoid bounce limit checks in the blk layer
343 */
344 ia64_max_iommu_merge_mask = ~PAGE_MASK;
345 sn_fixup_ionodes();
346 sn_irq = kmalloc(sizeof(struct sn_irq_info *) * NR_IRQS, GFP_KERNEL);
347 if (sn_irq <= 0)
348 BUG(); /* Canno afford to run out of memory. */
349 memset(sn_irq, 0, sizeof(struct sn_irq_info *) * NR_IRQS);
350
351 sn_init_cpei_timer();
352
353#ifdef CONFIG_PROC_FS
354 register_sn_procfs();
355#endif
356
357 for (i = 0; i < PCI_BUSES_TO_SCAN; i++) {
358 sn_pci_controller_fixup(0, i);
359 }
360
361 /*
362 * Generic Linux PCI Layer has created the pci_bus and pci_dev
363 * structures - time for us to add our SN PLatform specific
364 * information.
365 */
366
367 while ((pci_dev =
368 pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pci_dev)) != NULL) {
369 sn_pci_fixup_slot(pci_dev);
370 }
371
372 sn_ioif_inited = 1; /* sn I/O infrastructure now initialized */
373
374 return 0;
375}
376
377/*
378 * hubdev_init_node() - Creates the HUB data structure and link them to it's
379 * own NODE specific data area.
380 */
381void hubdev_init_node(nodepda_t * npda, cnodeid_t node)
382{
383
384 struct hubdev_info *hubdev_info;
385
386 if (node >= num_online_nodes()) /* Headless/memless IO nodes */
387 hubdev_info =
388 (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(0),
389 sizeof(struct
390 hubdev_info));
391 else
392 hubdev_info =
393 (struct hubdev_info *)alloc_bootmem_node(NODE_DATA(node),
394 sizeof(struct
395 hubdev_info));
396 npda->pdinfo = (void *)hubdev_info;
397
398}
399
400geoid_t
401cnodeid_get_geoid(cnodeid_t cnode)
402{
403
404 struct hubdev_info *hubdev;
405
406 hubdev = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
407 return hubdev->hdi_geoid;
408
409}
410
411subsys_initcall(sn_pci_init);
diff --git a/arch/ia64/sn/kernel/iomv.c b/arch/ia64/sn/kernel/iomv.c
new file mode 100644
index 000000000000..fec6d8b8237b
--- /dev/null
+++ b/arch/ia64/sn/kernel/iomv.c
@@ -0,0 +1,70 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2000-2003 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/module.h>
10#include <asm/io.h>
11#include <asm/delay.h>
12#include <asm/sn/nodepda.h>
13#include <asm/sn/simulator.h>
14#include <asm/sn/pda.h>
15#include <asm/sn/sn_cpuid.h>
16#include <asm/sn/shub_mmr.h>
17
18/**
19 * sn_io_addr - convert an in/out port to an i/o address
20 * @port: port to convert
21 *
22 * Legacy in/out instructions are converted to ld/st instructions
23 * on IA64. This routine will convert a port number into a valid
24 * SN i/o address. Used by sn_in*() and sn_out*().
25 */
26void *sn_io_addr(unsigned long port)
27{
28 if (!IS_RUNNING_ON_SIMULATOR()) {
29 /* On sn2, legacy I/O ports don't point at anything */
30 if (port < (64 * 1024))
31 return NULL;
32 return ((void *)(port | __IA64_UNCACHED_OFFSET));
33 } else {
34 /* but the simulator uses them... */
35 unsigned long addr;
36
37 /*
38 * word align port, but need more than 10 bits
39 * for accessing registers in bedrock local block
40 * (so we don't do port&0xfff)
41 */
42 addr = (is_shub2() ? 0xc00000028c000000UL : 0xc0000087cc000000UL) | ((port >> 2) << 12);
43 if ((port >= 0x1f0 && port <= 0x1f7) || port == 0x3f6 || port == 0x3f7)
44 addr |= port;
45 return (void *)addr;
46 }
47}
48
49EXPORT_SYMBOL(sn_io_addr);
50
51/**
52 * __sn_mmiowb - I/O space memory barrier
53 *
54 * See include/asm-ia64/io.h and Documentation/DocBook/deviceiobook.tmpl
55 * for details.
56 *
57 * On SN2, we wait for the PIO_WRITE_STATUS SHub register to clear.
58 * See PV 871084 for details about the WAR about zero value.
59 *
60 */
61void __sn_mmiowb(void)
62{
63 volatile unsigned long *adr = pda->pio_write_status_addr;
64 unsigned long val = pda->pio_write_status_val;
65
66 while ((*adr & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != val)
67 cpu_relax();
68}
69
70EXPORT_SYMBOL(__sn_mmiowb);
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
new file mode 100644
index 000000000000..3be44724f6c8
--- /dev/null
+++ b/arch/ia64/sn/kernel/irq.c
@@ -0,0 +1,431 @@
1/*
2 * Platform dependent support for SGI SN
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
9 */
10
11#include <linux/irq.h>
12#include <asm/sn/intr.h>
13#include <asm/sn/addrs.h>
14#include <asm/sn/arch.h>
15#include "xtalk/xwidgetdev.h"
16#include "pci/pcibus_provider_defs.h"
17#include "pci/pcidev.h"
18#include "pci/pcibr_provider.h"
19#include <asm/sn/shub_mmr.h>
20#include <asm/sn/sn_sal.h>
21
22static void force_interrupt(int irq);
23static void register_intr_pda(struct sn_irq_info *sn_irq_info);
24static void unregister_intr_pda(struct sn_irq_info *sn_irq_info);
25
26extern int sn_force_interrupt_flag;
27extern int sn_ioif_inited;
28struct sn_irq_info **sn_irq;
29
30static inline uint64_t sn_intr_alloc(nasid_t local_nasid, int local_widget,
31 u64 sn_irq_info,
32 int req_irq, nasid_t req_nasid,
33 int req_slice)
34{
35 struct ia64_sal_retval ret_stuff;
36 ret_stuff.status = 0;
37 ret_stuff.v0 = 0;
38
39 SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
40 (u64) SAL_INTR_ALLOC, (u64) local_nasid,
41 (u64) local_widget, (u64) sn_irq_info, (u64) req_irq,
42 (u64) req_nasid, (u64) req_slice);
43 return ret_stuff.status;
44}
45
46static inline void sn_intr_free(nasid_t local_nasid, int local_widget,
47 struct sn_irq_info *sn_irq_info)
48{
49 struct ia64_sal_retval ret_stuff;
50 ret_stuff.status = 0;
51 ret_stuff.v0 = 0;
52
53 SAL_CALL_NOLOCK(ret_stuff, (u64) SN_SAL_IOIF_INTERRUPT,
54 (u64) SAL_INTR_FREE, (u64) local_nasid,
55 (u64) local_widget, (u64) sn_irq_info->irq_irq,
56 (u64) sn_irq_info->irq_cookie, 0, 0);
57}
58
59static unsigned int sn_startup_irq(unsigned int irq)
60{
61 return 0;
62}
63
64static void sn_shutdown_irq(unsigned int irq)
65{
66}
67
68static void sn_disable_irq(unsigned int irq)
69{
70}
71
72static void sn_enable_irq(unsigned int irq)
73{
74}
75
76static void sn_ack_irq(unsigned int irq)
77{
78 uint64_t event_occurred, mask = 0;
79 int nasid;
80
81 irq = irq & 0xff;
82 nasid = get_nasid();
83 event_occurred =
84 HUB_L((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED));
85 if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
86 mask |= (1 << SH_EVENT_OCCURRED_UART_INT_SHFT);
87 }
88 if (event_occurred & SH_EVENT_OCCURRED_IPI_INT_MASK) {
89 mask |= (1 << SH_EVENT_OCCURRED_IPI_INT_SHFT);
90 }
91 if (event_occurred & SH_EVENT_OCCURRED_II_INT0_MASK) {
92 mask |= (1 << SH_EVENT_OCCURRED_II_INT0_SHFT);
93 }
94 if (event_occurred & SH_EVENT_OCCURRED_II_INT1_MASK) {
95 mask |= (1 << SH_EVENT_OCCURRED_II_INT1_SHFT);
96 }
97 HUB_S((uint64_t *) GLOBAL_MMR_ADDR(nasid, SH_EVENT_OCCURRED_ALIAS),
98 mask);
99 __set_bit(irq, (volatile void *)pda->sn_in_service_ivecs);
100
101 move_irq(irq);
102}
103
104static void sn_end_irq(unsigned int irq)
105{
106 int nasid;
107 int ivec;
108 uint64_t event_occurred;
109
110 ivec = irq & 0xff;
111 if (ivec == SGI_UART_VECTOR) {
112 nasid = get_nasid();
113 event_occurred = HUB_L((uint64_t *) GLOBAL_MMR_ADDR
114 (nasid, SH_EVENT_OCCURRED));
115 /* If the UART bit is set here, we may have received an
116 * interrupt from the UART that the driver missed. To
117 * make sure, we IPI ourselves to force us to look again.
118 */
119 if (event_occurred & SH_EVENT_OCCURRED_UART_INT_MASK) {
120 platform_send_ipi(smp_processor_id(), SGI_UART_VECTOR,
121 IA64_IPI_DM_INT, 0);
122 }
123 }
124 __clear_bit(ivec, (volatile void *)pda->sn_in_service_ivecs);
125 if (sn_force_interrupt_flag)
126 force_interrupt(irq);
127}
128
129static void sn_set_affinity_irq(unsigned int irq, cpumask_t mask)
130{
131 struct sn_irq_info *sn_irq_info = sn_irq[irq];
132 struct sn_irq_info *tmp_sn_irq_info;
133 int cpuid, cpuphys;
134 nasid_t t_nasid; /* nasid to target */
135 int t_slice; /* slice to target */
136
137 /* allocate a temp sn_irq_info struct to get new target info */
138 tmp_sn_irq_info = kmalloc(sizeof(*tmp_sn_irq_info), GFP_KERNEL);
139 if (!tmp_sn_irq_info)
140 return;
141
142 cpuid = first_cpu(mask);
143 cpuphys = cpu_physical_id(cpuid);
144 t_nasid = cpuid_to_nasid(cpuid);
145 t_slice = cpuid_to_slice(cpuid);
146
147 while (sn_irq_info) {
148 int status;
149 int local_widget;
150 uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
151 nasid_t local_nasid = NASID_GET(bridge);
152
153 if (!bridge)
154 break; /* irq is not a device interrupt */
155
156 if (local_nasid & 1)
157 local_widget = TIO_SWIN_WIDGETNUM(bridge);
158 else
159 local_widget = SWIN_WIDGETNUM(bridge);
160
161 /* Free the old PROM sn_irq_info structure */
162 sn_intr_free(local_nasid, local_widget, sn_irq_info);
163
164 /* allocate a new PROM sn_irq_info struct */
165 status = sn_intr_alloc(local_nasid, local_widget,
166 __pa(tmp_sn_irq_info), irq, t_nasid,
167 t_slice);
168
169 if (status == 0) {
170 /* Update kernels sn_irq_info with new target info */
171 unregister_intr_pda(sn_irq_info);
172 sn_irq_info->irq_cpuid = cpuid;
173 sn_irq_info->irq_nasid = t_nasid;
174 sn_irq_info->irq_slice = t_slice;
175 sn_irq_info->irq_xtalkaddr =
176 tmp_sn_irq_info->irq_xtalkaddr;
177 sn_irq_info->irq_cookie = tmp_sn_irq_info->irq_cookie;
178 register_intr_pda(sn_irq_info);
179
180 if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type)) {
181 pcibr_change_devices_irq(sn_irq_info);
182 }
183
184 sn_irq_info = sn_irq_info->irq_next;
185
186#ifdef CONFIG_SMP
187 set_irq_affinity_info((irq & 0xff), cpuphys, 0);
188#endif
189 } else {
190 break; /* snp_affinity failed the intr_alloc */
191 }
192 }
193 kfree(tmp_sn_irq_info);
194}
195
196struct hw_interrupt_type irq_type_sn = {
197 "SN hub",
198 sn_startup_irq,
199 sn_shutdown_irq,
200 sn_enable_irq,
201 sn_disable_irq,
202 sn_ack_irq,
203 sn_end_irq,
204 sn_set_affinity_irq
205};
206
207unsigned int sn_local_vector_to_irq(u8 vector)
208{
209 return (CPU_VECTOR_TO_IRQ(smp_processor_id(), vector));
210}
211
212void sn_irq_init(void)
213{
214 int i;
215 irq_desc_t *base_desc = irq_desc;
216
217 for (i = 0; i < NR_IRQS; i++) {
218 if (base_desc[i].handler == &no_irq_type) {
219 base_desc[i].handler = &irq_type_sn;
220 }
221 }
222}
223
224static void register_intr_pda(struct sn_irq_info *sn_irq_info)
225{
226 int irq = sn_irq_info->irq_irq;
227 int cpu = sn_irq_info->irq_cpuid;
228
229 if (pdacpu(cpu)->sn_last_irq < irq) {
230 pdacpu(cpu)->sn_last_irq = irq;
231 }
232
233 if (pdacpu(cpu)->sn_first_irq == 0 || pdacpu(cpu)->sn_first_irq > irq) {
234 pdacpu(cpu)->sn_first_irq = irq;
235 }
236}
237
238static void unregister_intr_pda(struct sn_irq_info *sn_irq_info)
239{
240 int irq = sn_irq_info->irq_irq;
241 int cpu = sn_irq_info->irq_cpuid;
242 struct sn_irq_info *tmp_irq_info;
243 int i, foundmatch;
244
245 if (pdacpu(cpu)->sn_last_irq == irq) {
246 foundmatch = 0;
247 for (i = pdacpu(cpu)->sn_last_irq - 1; i; i--) {
248 tmp_irq_info = sn_irq[i];
249 while (tmp_irq_info) {
250 if (tmp_irq_info->irq_cpuid == cpu) {
251 foundmatch++;
252 break;
253 }
254 tmp_irq_info = tmp_irq_info->irq_next;
255 }
256 if (foundmatch) {
257 break;
258 }
259 }
260 pdacpu(cpu)->sn_last_irq = i;
261 }
262
263 if (pdacpu(cpu)->sn_first_irq == irq) {
264 foundmatch = 0;
265 for (i = pdacpu(cpu)->sn_first_irq + 1; i < NR_IRQS; i++) {
266 tmp_irq_info = sn_irq[i];
267 while (tmp_irq_info) {
268 if (tmp_irq_info->irq_cpuid == cpu) {
269 foundmatch++;
270 break;
271 }
272 tmp_irq_info = tmp_irq_info->irq_next;
273 }
274 if (foundmatch) {
275 break;
276 }
277 }
278 pdacpu(cpu)->sn_first_irq = ((i == NR_IRQS) ? 0 : i);
279 }
280}
281
282struct sn_irq_info *sn_irq_alloc(nasid_t local_nasid, int local_widget, int irq,
283 nasid_t nasid, int slice)
284{
285 struct sn_irq_info *sn_irq_info;
286 int status;
287
288 sn_irq_info = kmalloc(sizeof(*sn_irq_info), GFP_KERNEL);
289 if (sn_irq_info == NULL)
290 return NULL;
291
292 memset(sn_irq_info, 0x0, sizeof(*sn_irq_info));
293
294 status =
295 sn_intr_alloc(local_nasid, local_widget, __pa(sn_irq_info), irq,
296 nasid, slice);
297
298 if (status) {
299 kfree(sn_irq_info);
300 return NULL;
301 } else {
302 return sn_irq_info;
303 }
304}
305
306void sn_irq_free(struct sn_irq_info *sn_irq_info)
307{
308 uint64_t bridge = (uint64_t) sn_irq_info->irq_bridge;
309 nasid_t local_nasid = NASID_GET(bridge);
310 int local_widget;
311
312 if (local_nasid & 1) /* tio check */
313 local_widget = TIO_SWIN_WIDGETNUM(bridge);
314 else
315 local_widget = SWIN_WIDGETNUM(bridge);
316
317 sn_intr_free(local_nasid, local_widget, sn_irq_info);
318
319 kfree(sn_irq_info);
320}
321
322void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
323{
324 nasid_t nasid = sn_irq_info->irq_nasid;
325 int slice = sn_irq_info->irq_slice;
326 int cpu = nasid_slice_to_cpuid(nasid, slice);
327
328 sn_irq_info->irq_cpuid = cpu;
329 sn_irq_info->irq_pciioinfo = SN_PCIDEV_INFO(pci_dev);
330
331 /* link it into the sn_irq[irq] list */
332 sn_irq_info->irq_next = sn_irq[sn_irq_info->irq_irq];
333 sn_irq[sn_irq_info->irq_irq] = sn_irq_info;
334
335 (void)register_intr_pda(sn_irq_info);
336}
337
338static void force_interrupt(int irq)
339{
340 struct sn_irq_info *sn_irq_info;
341
342 if (!sn_ioif_inited)
343 return;
344 sn_irq_info = sn_irq[irq];
345 while (sn_irq_info) {
346 if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
347 (sn_irq_info->irq_bridge != NULL)) {
348 pcibr_force_interrupt(sn_irq_info);
349 }
350 sn_irq_info = sn_irq_info->irq_next;
351 }
352}
353
354/*
355 * Check for lost interrupts. If the PIC int_status reg. says that
356 * an interrupt has been sent, but not handled, and the interrupt
357 * is not pending in either the cpu irr regs or in the soft irr regs,
358 * and the interrupt is not in service, then the interrupt may have
359 * been lost. Force an interrupt on that pin. It is possible that
360 * the interrupt is in flight, so we may generate a spurious interrupt,
361 * but we should never miss a real lost interrupt.
362 */
363static void sn_check_intr(int irq, struct sn_irq_info *sn_irq_info)
364{
365 uint64_t regval;
366 int irr_reg_num;
367 int irr_bit;
368 uint64_t irr_reg;
369 struct pcidev_info *pcidev_info;
370 struct pcibus_info *pcibus_info;
371
372 pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
373 if (!pcidev_info)
374 return;
375
376 pcibus_info =
377 (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
378 pdi_pcibus_info;
379 regval = pcireg_intr_status_get(pcibus_info);
380
381 irr_reg_num = irq_to_vector(irq) / 64;
382 irr_bit = irq_to_vector(irq) % 64;
383 switch (irr_reg_num) {
384 case 0:
385 irr_reg = ia64_getreg(_IA64_REG_CR_IRR0);
386 break;
387 case 1:
388 irr_reg = ia64_getreg(_IA64_REG_CR_IRR1);
389 break;
390 case 2:
391 irr_reg = ia64_getreg(_IA64_REG_CR_IRR2);
392 break;
393 case 3:
394 irr_reg = ia64_getreg(_IA64_REG_CR_IRR3);
395 break;
396 }
397 if (!test_bit(irr_bit, &irr_reg)) {
398 if (!test_bit(irq, pda->sn_soft_irr)) {
399 if (!test_bit(irq, pda->sn_in_service_ivecs)) {
400 regval &= 0xff;
401 if (sn_irq_info->irq_int_bit & regval &
402 sn_irq_info->irq_last_intr) {
403 regval &=
404 ~(sn_irq_info->
405 irq_int_bit & regval);
406 pcibr_force_interrupt(sn_irq_info);
407 }
408 }
409 }
410 }
411 sn_irq_info->irq_last_intr = regval;
412}
413
414void sn_lb_int_war_check(void)
415{
416 int i;
417
418 if (!sn_ioif_inited || pda->sn_first_irq == 0)
419 return;
420 for (i = pda->sn_first_irq; i <= pda->sn_last_irq; i++) {
421 struct sn_irq_info *sn_irq_info = sn_irq[i];
422 while (sn_irq_info) {
423 /* Only call for PCI bridges that are fully initialized. */
424 if (IS_PCI_BRIDGE_ASIC(sn_irq_info->irq_bridge_type) &&
425 (sn_irq_info->irq_bridge != NULL)) {
426 sn_check_intr(i, sn_irq_info);
427 }
428 sn_irq_info = sn_irq_info->irq_next;
429 }
430 }
431}
diff --git a/arch/ia64/sn/kernel/klconflib.c b/arch/ia64/sn/kernel/klconflib.c
new file mode 100644
index 000000000000..0f11a3299cd2
--- /dev/null
+++ b/arch/ia64/sn/kernel/klconflib.c
@@ -0,0 +1,108 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1992 - 1997, 2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/types.h>
10#include <linux/ctype.h>
11#include <linux/string.h>
12#include <linux/kernel.h>
13#include <asm/sn/types.h>
14#include <asm/sn/module.h>
15#include <asm/sn/l1.h>
16
17char brick_types[MAX_BRICK_TYPES + 1] = "cri.xdpn%#=vo^kjbf890123456789...";
18/*
19 * Format a module id for printing.
20 *
21 * There are three possible formats:
22 *
23 * MODULE_FORMAT_BRIEF is the brief 6-character format, including
24 * the actual brick-type as recorded in the
25 * moduleid_t, eg. 002c15 for a C-brick, or
26 * 101#17 for a PX-brick.
27 *
28 * MODULE_FORMAT_LONG is the hwgraph format, eg. rack/002/bay/15
29 * of rack/101/bay/17 (note that the brick
30 * type does not appear in this format).
31 *
32 * MODULE_FORMAT_LCD is like MODULE_FORMAT_BRIEF, except that it
33 * ensures that the module id provided appears
34 * exactly as it would on the LCD display of
35 * the corresponding brick, eg. still 002c15
36 * for a C-brick, but 101p17 for a PX-brick.
37 *
38 * maule (9/13/04): Removed top-level check for (fmt == MODULE_FORMAT_LCD)
39 * making MODULE_FORMAT_LCD equivalent to MODULE_FORMAT_BRIEF. It was
40 * decided that all callers should assume the returned string should be what
41 * is displayed on the brick L1 LCD.
42 */
43void
44format_module_id(char *buffer, moduleid_t m, int fmt)
45{
46 int rack, position;
47 unsigned char brickchar;
48
49 rack = MODULE_GET_RACK(m);
50 brickchar = MODULE_GET_BTCHAR(m);
51
52 /* Be sure we use the same brick type character as displayed
53 * on the brick's LCD
54 */
55 switch (brickchar)
56 {
57 case L1_BRICKTYPE_GA:
58 case L1_BRICKTYPE_OPUS_TIO:
59 brickchar = L1_BRICKTYPE_C;
60 break;
61
62 case L1_BRICKTYPE_PX:
63 case L1_BRICKTYPE_PE:
64 case L1_BRICKTYPE_PA:
65 case L1_BRICKTYPE_SA: /* we can move this to the "I's" later
66 * if that makes more sense
67 */
68 brickchar = L1_BRICKTYPE_P;
69 break;
70
71 case L1_BRICKTYPE_IX:
72 case L1_BRICKTYPE_IA:
73
74 brickchar = L1_BRICKTYPE_I;
75 break;
76 }
77
78 position = MODULE_GET_BPOS(m);
79
80 if ((fmt == MODULE_FORMAT_BRIEF) || (fmt == MODULE_FORMAT_LCD)) {
81 /* Brief module number format, eg. 002c15 */
82
83 /* Decompress the rack number */
84 *buffer++ = '0' + RACK_GET_CLASS(rack);
85 *buffer++ = '0' + RACK_GET_GROUP(rack);
86 *buffer++ = '0' + RACK_GET_NUM(rack);
87
88 /* Add the brick type */
89 *buffer++ = brickchar;
90 }
91 else if (fmt == MODULE_FORMAT_LONG) {
92 /* Fuller hwgraph format, eg. rack/002/bay/15 */
93
94 strcpy(buffer, "rack" "/"); buffer += strlen(buffer);
95
96 *buffer++ = '0' + RACK_GET_CLASS(rack);
97 *buffer++ = '0' + RACK_GET_GROUP(rack);
98 *buffer++ = '0' + RACK_GET_NUM(rack);
99
100 strcpy(buffer, "/" "bay" "/"); buffer += strlen(buffer);
101 }
102
103 /* Add the bay position, using at least two digits */
104 if (position < 10)
105 *buffer++ = '0';
106 sprintf(buffer, "%d", position);
107
108}
diff --git a/arch/ia64/sn/kernel/machvec.c b/arch/ia64/sn/kernel/machvec.c
new file mode 100644
index 000000000000..02bb9155840c
--- /dev/null
+++ b/arch/ia64/sn/kernel/machvec.c
@@ -0,0 +1,11 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2002-2003 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9#define MACHVEC_PLATFORM_NAME sn2
10#define MACHVEC_PLATFORM_HEADER <asm/machvec_sn2.h>
11#include <asm/machvec_init.h>
diff --git a/arch/ia64/sn/kernel/mca.c b/arch/ia64/sn/kernel/mca.c
new file mode 100644
index 000000000000..857774bb2c9a
--- /dev/null
+++ b/arch/ia64/sn/kernel/mca.c
@@ -0,0 +1,135 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9#include <linux/types.h>
10#include <linux/kernel.h>
11#include <linux/timer.h>
12#include <linux/vmalloc.h>
13#include <asm/mca.h>
14#include <asm/sal.h>
15#include <asm/sn/sn_sal.h>
16
17/*
18 * Interval for calling SAL to poll for errors that do NOT cause error
19 * interrupts. SAL will raise a CPEI if any errors are present that
20 * need to be logged.
21 */
22#define CPEI_INTERVAL (5*HZ)
23
24struct timer_list sn_cpei_timer;
25void sn_init_cpei_timer(void);
26
27/* Printing oemdata from mca uses data that is not passed through SAL, it is
28 * global. Only one user at a time.
29 */
30static DECLARE_MUTEX(sn_oemdata_mutex);
31static u8 **sn_oemdata;
32static u64 *sn_oemdata_size, sn_oemdata_bufsize;
33
34/*
35 * print_hook
36 *
37 * This function is the callback routine that SAL calls to log error
38 * info for platform errors. buf is appended to sn_oemdata, resizing as
39 * required.
40 */
41static int print_hook(const char *fmt, ...)
42{
43 char buf[400];
44 int len;
45 va_list args;
46 va_start(args, fmt);
47 vsnprintf(buf, sizeof(buf), fmt, args);
48 va_end(args);
49 len = strlen(buf);
50 while (*sn_oemdata_size + len + 1 > sn_oemdata_bufsize) {
51 u8 *newbuf = vmalloc(sn_oemdata_bufsize += 1000);
52 if (!newbuf) {
53 printk(KERN_ERR "%s: unable to extend sn_oemdata\n",
54 __FUNCTION__);
55 return 0;
56 }
57 memcpy(newbuf, *sn_oemdata, *sn_oemdata_size);
58 vfree(*sn_oemdata);
59 *sn_oemdata = newbuf;
60 }
61 memcpy(*sn_oemdata + *sn_oemdata_size, buf, len + 1);
62 *sn_oemdata_size += len;
63 return 0;
64}
65
66static void sn_cpei_handler(int irq, void *devid, struct pt_regs *regs)
67{
68 /*
69 * this function's sole purpose is to call SAL when we receive
70 * a CE interrupt from SHUB or when the timer routine decides
71 * we need to call SAL to check for CEs.
72 */
73
74 /* CALL SAL_LOG_CE */
75
76 ia64_sn_plat_cpei_handler();
77}
78
79static void sn_cpei_timer_handler(unsigned long dummy)
80{
81 sn_cpei_handler(-1, NULL, NULL);
82 mod_timer(&sn_cpei_timer, jiffies + CPEI_INTERVAL);
83}
84
85void sn_init_cpei_timer(void)
86{
87 init_timer(&sn_cpei_timer);
88 sn_cpei_timer.expires = jiffies + CPEI_INTERVAL;
89 sn_cpei_timer.function = sn_cpei_timer_handler;
90 add_timer(&sn_cpei_timer);
91}
92
93static int
94sn_platform_plat_specific_err_print(const u8 * sect_header, u8 ** oemdata,
95 u64 * oemdata_size)
96{
97 down(&sn_oemdata_mutex);
98 sn_oemdata = oemdata;
99 sn_oemdata_size = oemdata_size;
100 sn_oemdata_bufsize = 0;
101 ia64_sn_plat_specific_err_print(print_hook, (char *)sect_header);
102 up(&sn_oemdata_mutex);
103 return 0;
104}
105
106/* Callback when userspace salinfo wants to decode oem data via the platform
107 * kernel and/or prom.
108 */
109int sn_salinfo_platform_oemdata(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size)
110{
111 efi_guid_t guid = *(efi_guid_t *)sect_header;
112 int valid = 0;
113 *oemdata_size = 0;
114 vfree(*oemdata);
115 *oemdata = NULL;
116 if (efi_guidcmp(guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID) == 0) {
117 sal_log_plat_specific_err_info_t *psei = (sal_log_plat_specific_err_info_t *)sect_header;
118 valid = psei->valid.oem_data;
119 } else if (efi_guidcmp(guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID) == 0) {
120 sal_log_mem_dev_err_info_t *mdei = (sal_log_mem_dev_err_info_t *)sect_header;
121 valid = mdei->valid.oem_data;
122 }
123 if (valid)
124 return sn_platform_plat_specific_err_print(sect_header, oemdata, oemdata_size);
125 else
126 return 0;
127}
128
129static int __init sn_salinfo_init(void)
130{
131 salinfo_platform_oemdata = &sn_salinfo_platform_oemdata;
132 return 0;
133}
134
135module_init(sn_salinfo_init)
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
new file mode 100644
index 000000000000..f0306b516afb
--- /dev/null
+++ b/arch/ia64/sn/kernel/setup.c
@@ -0,0 +1,621 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/config.h>
10#include <linux/module.h>
11#include <linux/init.h>
12#include <linux/delay.h>
13#include <linux/kernel.h>
14#include <linux/kdev_t.h>
15#include <linux/string.h>
16#include <linux/tty.h>
17#include <linux/console.h>
18#include <linux/timex.h>
19#include <linux/sched.h>
20#include <linux/ioport.h>
21#include <linux/mm.h>
22#include <linux/serial.h>
23#include <linux/irq.h>
24#include <linux/bootmem.h>
25#include <linux/mmzone.h>
26#include <linux/interrupt.h>
27#include <linux/acpi.h>
28#include <linux/compiler.h>
29#include <linux/sched.h>
30#include <linux/root_dev.h>
31#include <linux/nodemask.h>
32
33#include <asm/io.h>
34#include <asm/sal.h>
35#include <asm/machvec.h>
36#include <asm/system.h>
37#include <asm/processor.h>
38#include <asm/sn/arch.h>
39#include <asm/sn/addrs.h>
40#include <asm/sn/pda.h>
41#include <asm/sn/nodepda.h>
42#include <asm/sn/sn_cpuid.h>
43#include <asm/sn/simulator.h>
44#include <asm/sn/leds.h>
45#include <asm/sn/bte.h>
46#include <asm/sn/shub_mmr.h>
47#include <asm/sn/clksupport.h>
48#include <asm/sn/sn_sal.h>
49#include <asm/sn/geo.h>
50#include "xtalk/xwidgetdev.h"
51#include "xtalk/hubdev.h"
52#include <asm/sn/klconfig.h>
53
54
55DEFINE_PER_CPU(struct pda_s, pda_percpu);
56
57#define MAX_PHYS_MEMORY (1UL << 49) /* 1 TB */
58
59lboard_t *root_lboard[MAX_COMPACT_NODES];
60
61extern void bte_init_node(nodepda_t *, cnodeid_t);
62
63extern void sn_timer_init(void);
64extern unsigned long last_time_offset;
65extern void (*ia64_mark_idle) (int);
66extern void snidle(int);
67extern unsigned char acpi_kbd_controller_present;
68
69unsigned long sn_rtc_cycles_per_second;
70EXPORT_SYMBOL(sn_rtc_cycles_per_second);
71
72DEFINE_PER_CPU(struct sn_hub_info_s, __sn_hub_info);
73EXPORT_PER_CPU_SYMBOL(__sn_hub_info);
74
75partid_t sn_partid = -1;
76EXPORT_SYMBOL(sn_partid);
77char sn_system_serial_number_string[128];
78EXPORT_SYMBOL(sn_system_serial_number_string);
79u64 sn_partition_serial_number;
80EXPORT_SYMBOL(sn_partition_serial_number);
81u8 sn_partition_id;
82EXPORT_SYMBOL(sn_partition_id);
83u8 sn_system_size;
84EXPORT_SYMBOL(sn_system_size);
85u8 sn_sharing_domain_size;
86EXPORT_SYMBOL(sn_sharing_domain_size);
87u8 sn_coherency_id;
88EXPORT_SYMBOL(sn_coherency_id);
89u8 sn_region_size;
90EXPORT_SYMBOL(sn_region_size);
91
92short physical_node_map[MAX_PHYSNODE_ID];
93
94EXPORT_SYMBOL(physical_node_map);
95
96int numionodes;
97
98static void sn_init_pdas(char **);
99static void scan_for_ionodes(void);
100
101static nodepda_t *nodepdaindr[MAX_COMPACT_NODES];
102
103/*
104 * The format of "screen_info" is strange, and due to early i386-setup
105 * code. This is just enough to make the console code think we're on a
106 * VGA color display.
107 */
108struct screen_info sn_screen_info = {
109 .orig_x = 0,
110 .orig_y = 0,
111 .orig_video_mode = 3,
112 .orig_video_cols = 80,
113 .orig_video_ega_bx = 3,
114 .orig_video_lines = 25,
115 .orig_video_isVGA = 1,
116 .orig_video_points = 16
117};
118
119/*
120 * This is here so we can use the CMOS detection in ide-probe.c to
121 * determine what drives are present. In theory, we don't need this
122 * as the auto-detection could be done via ide-probe.c:do_probe() but
123 * in practice that would be much slower, which is painful when
124 * running in the simulator. Note that passing zeroes in DRIVE_INFO
125 * is sufficient (the IDE driver will autodetect the drive geometry).
126 */
127#ifdef CONFIG_IA64_GENERIC
128extern char drive_info[4 * 16];
129#else
130char drive_info[4 * 16];
131#endif
132
133/*
134 * Get nasid of current cpu early in boot before nodepda is initialized
135 */
136static int
137boot_get_nasid(void)
138{
139 int nasid;
140
141 if (ia64_sn_get_sapic_info(get_sapicid(), &nasid, NULL, NULL))
142 BUG();
143 return nasid;
144}
145
146/*
147 * This routine can only be used during init, since
148 * smp_boot_data is an init data structure.
149 * We have to use smp_boot_data.cpu_phys_id to find
150 * the physical id of the processor because the normal
151 * cpu_physical_id() relies on data structures that
152 * may not be initialized yet.
153 */
154
155static int __init pxm_to_nasid(int pxm)
156{
157 int i;
158 int nid;
159
160 nid = pxm_to_nid_map[pxm];
161 for (i = 0; i < num_node_memblks; i++) {
162 if (node_memblk[i].nid == nid) {
163 return NASID_GET(node_memblk[i].start_paddr);
164 }
165 }
166 return -1;
167}
168
169/**
170 * early_sn_setup - early setup routine for SN platforms
171 *
172 * Sets up an initial console to aid debugging. Intended primarily
173 * for bringup. See start_kernel() in init/main.c.
174 */
175
176void __init early_sn_setup(void)
177{
178 efi_system_table_t *efi_systab;
179 efi_config_table_t *config_tables;
180 struct ia64_sal_systab *sal_systab;
181 struct ia64_sal_desc_entry_point *ep;
182 char *p;
183 int i, j;
184
185 /*
186 * Parse enough of the SAL tables to locate the SAL entry point. Since, console
187 * IO on SN2 is done via SAL calls, early_printk won't work without this.
188 *
189 * This code duplicates some of the ACPI table parsing that is in efi.c & sal.c.
190 * Any changes to those file may have to be made hereas well.
191 */
192 efi_systab = (efi_system_table_t *) __va(ia64_boot_param->efi_systab);
193 config_tables = __va(efi_systab->tables);
194 for (i = 0; i < efi_systab->nr_tables; i++) {
195 if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) ==
196 0) {
197 sal_systab = __va(config_tables[i].table);
198 p = (char *)(sal_systab + 1);
199 for (j = 0; j < sal_systab->entry_count; j++) {
200 if (*p == SAL_DESC_ENTRY_POINT) {
201 ep = (struct ia64_sal_desc_entry_point
202 *)p;
203 ia64_sal_handler_init(__va
204 (ep->sal_proc),
205 __va(ep->gp));
206 return;
207 }
208 p += SAL_DESC_SIZE(*p);
209 }
210 }
211 }
212 /* Uh-oh, SAL not available?? */
213 printk(KERN_ERR "failed to find SAL entry point\n");
214}
215
216extern int platform_intr_list[];
217extern nasid_t master_nasid;
218static int shub_1_1_found __initdata;
219
220/*
221 * sn_check_for_wars
222 *
223 * Set flag for enabling shub specific wars
224 */
225
226static inline int __init is_shub_1_1(int nasid)
227{
228 unsigned long id;
229 int rev;
230
231 if (is_shub2())
232 return 0;
233 id = REMOTE_HUB_L(nasid, SH1_SHUB_ID);
234 rev = (id & SH1_SHUB_ID_REVISION_MASK) >> SH1_SHUB_ID_REVISION_SHFT;
235 return rev <= 2;
236}
237
238static void __init sn_check_for_wars(void)
239{
240 int cnode;
241
242 if (is_shub2()) {
243 /* none yet */
244 } else {
245 for_each_online_node(cnode) {
246 if (is_shub_1_1(cnodeid_to_nasid(cnode)))
247 sn_hub_info->shub_1_1_found = 1;
248 }
249 }
250}
251
252/**
253 * sn_setup - SN platform setup routine
254 * @cmdline_p: kernel command line
255 *
256 * Handles platform setup for SN machines. This includes determining
257 * the RTC frequency (via a SAL call), initializing secondary CPUs, and
258 * setting up per-node data areas. The console is also initialized here.
259 */
260void __init sn_setup(char **cmdline_p)
261{
262 long status, ticks_per_sec, drift;
263 int pxm;
264 int major = sn_sal_rev_major(), minor = sn_sal_rev_minor();
265 extern void sn_cpu_init(void);
266
267 /*
268 * If the generic code has enabled vga console support - lets
269 * get rid of it again. This is a kludge for the fact that ACPI
270 * currtently has no way of informing us if legacy VGA is available
271 * or not.
272 */
273#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE)
274 if (conswitchp == &vga_con) {
275 printk(KERN_DEBUG "SGI: Disabling VGA console\n");
276#ifdef CONFIG_DUMMY_CONSOLE
277 conswitchp = &dummy_con;
278#else
279 conswitchp = NULL;
280#endif /* CONFIG_DUMMY_CONSOLE */
281 }
282#endif /* def(CONFIG_VT) && def(CONFIG_VGA_CONSOLE) */
283
284 MAX_DMA_ADDRESS = PAGE_OFFSET + MAX_PHYS_MEMORY;
285
286 memset(physical_node_map, -1, sizeof(physical_node_map));
287 for (pxm = 0; pxm < MAX_PXM_DOMAINS; pxm++)
288 if (pxm_to_nid_map[pxm] != -1)
289 physical_node_map[pxm_to_nasid(pxm)] =
290 pxm_to_nid_map[pxm];
291
292 /*
293 * Old PROMs do not provide an ACPI FADT. Disable legacy keyboard
294 * support here so we don't have to listen to failed keyboard probe
295 * messages.
296 */
297 if ((major < 2 || (major == 2 && minor <= 9)) &&
298 acpi_kbd_controller_present) {
299 printk(KERN_INFO "Disabling legacy keyboard support as prom "
300 "is too old and doesn't provide FADT\n");
301 acpi_kbd_controller_present = 0;
302 }
303
304 printk("SGI SAL version %x.%02x\n", major, minor);
305
306 /*
307 * Confirm the SAL we're running on is recent enough...
308 */
309 if ((major < SN_SAL_MIN_MAJOR) || (major == SN_SAL_MIN_MAJOR &&
310 minor < SN_SAL_MIN_MINOR)) {
311 printk(KERN_ERR "This kernel needs SGI SAL version >= "
312 "%x.%02x\n", SN_SAL_MIN_MAJOR, SN_SAL_MIN_MINOR);
313 panic("PROM version too old\n");
314 }
315
316 master_nasid = boot_get_nasid();
317
318 status =
319 ia64_sal_freq_base(SAL_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec,
320 &drift);
321 if (status != 0 || ticks_per_sec < 100000) {
322 printk(KERN_WARNING
323 "unable to determine platform RTC clock frequency, guessing.\n");
324 /* PROM gives wrong value for clock freq. so guess */
325 sn_rtc_cycles_per_second = 1000000000000UL / 30000UL;
326 } else
327 sn_rtc_cycles_per_second = ticks_per_sec;
328
329 platform_intr_list[ACPI_INTERRUPT_CPEI] = IA64_CPE_VECTOR;
330
331 /*
332 * we set the default root device to /dev/hda
333 * to make simulation easy
334 */
335 ROOT_DEV = Root_HDA1;
336
337 /*
338 * Create the PDAs and NODEPDAs for all the cpus.
339 */
340 sn_init_pdas(cmdline_p);
341
342 ia64_mark_idle = &snidle;
343
344 /*
345 * For the bootcpu, we do this here. All other cpus will make the
346 * call as part of cpu_init in slave cpu initialization.
347 */
348 sn_cpu_init();
349
350#ifdef CONFIG_SMP
351 init_smp_config();
352#endif
353 screen_info = sn_screen_info;
354
355 sn_timer_init();
356}
357
358/**
359 * sn_init_pdas - setup node data areas
360 *
361 * One time setup for Node Data Area. Called by sn_setup().
362 */
363static void __init sn_init_pdas(char **cmdline_p)
364{
365 cnodeid_t cnode;
366
367 memset(pda->cnodeid_to_nasid_table, -1,
368 sizeof(pda->cnodeid_to_nasid_table));
369 for_each_online_node(cnode)
370 pda->cnodeid_to_nasid_table[cnode] =
371 pxm_to_nasid(nid_to_pxm_map[cnode]);
372
373 numionodes = num_online_nodes();
374 scan_for_ionodes();
375
376 /*
377 * Allocate & initalize the nodepda for each node.
378 */
379 for_each_online_node(cnode) {
380 nodepdaindr[cnode] =
381 alloc_bootmem_node(NODE_DATA(cnode), sizeof(nodepda_t));
382 memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
383 memset(nodepdaindr[cnode]->phys_cpuid, -1,
384 sizeof(nodepdaindr[cnode]->phys_cpuid));
385 }
386
387 /*
388 * Allocate & initialize nodepda for TIOs. For now, put them on node 0.
389 */
390 for (cnode = num_online_nodes(); cnode < numionodes; cnode++) {
391 nodepdaindr[cnode] =
392 alloc_bootmem_node(NODE_DATA(0), sizeof(nodepda_t));
393 memset(nodepdaindr[cnode], 0, sizeof(nodepda_t));
394 }
395
396 /*
397 * Now copy the array of nodepda pointers to each nodepda.
398 */
399 for (cnode = 0; cnode < numionodes; cnode++)
400 memcpy(nodepdaindr[cnode]->pernode_pdaindr, nodepdaindr,
401 sizeof(nodepdaindr));
402
403 /*
404 * Set up IO related platform-dependent nodepda fields.
405 * The following routine actually sets up the hubinfo struct
406 * in nodepda.
407 */
408 for_each_online_node(cnode) {
409 bte_init_node(nodepdaindr[cnode], cnode);
410 }
411
412 /*
413 * Initialize the per node hubdev. This includes IO Nodes and
414 * headless/memless nodes.
415 */
416 for (cnode = 0; cnode < numionodes; cnode++) {
417 hubdev_init_node(nodepdaindr[cnode], cnode);
418 }
419}
420
421/**
422 * sn_cpu_init - initialize per-cpu data areas
423 * @cpuid: cpuid of the caller
424 *
425 * Called during cpu initialization on each cpu as it starts.
426 * Currently, initializes the per-cpu data area for SNIA.
427 * Also sets up a few fields in the nodepda. Also known as
428 * platform_cpu_init() by the ia64 machvec code.
429 */
430void __init sn_cpu_init(void)
431{
432 int cpuid;
433 int cpuphyid;
434 int nasid;
435 int subnode;
436 int slice;
437 int cnode;
438 int i;
439 static int wars_have_been_checked;
440
441 memset(pda, 0, sizeof(pda));
442 if (ia64_sn_get_sn_info(0, &sn_hub_info->shub2, &sn_hub_info->nasid_bitmask, &sn_hub_info->nasid_shift,
443 &sn_system_size, &sn_sharing_domain_size, &sn_partition_id,
444 &sn_coherency_id, &sn_region_size))
445 BUG();
446 sn_hub_info->as_shift = sn_hub_info->nasid_shift - 2;
447
448 /*
449 * The boot cpu makes this call again after platform initialization is
450 * complete.
451 */
452 if (nodepdaindr[0] == NULL)
453 return;
454
455 cpuid = smp_processor_id();
456 cpuphyid = get_sapicid();
457
458 if (ia64_sn_get_sapic_info(cpuphyid, &nasid, &subnode, &slice))
459 BUG();
460
461 for (i=0; i < MAX_NUMNODES; i++) {
462 if (nodepdaindr[i]) {
463 nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid;
464 nodepdaindr[i]->phys_cpuid[cpuid].slice = slice;
465 nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode;
466 }
467 }
468
469 cnode = nasid_to_cnodeid(nasid);
470
471 pda->p_nodepda = nodepdaindr[cnode];
472 pda->led_address =
473 (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
474 pda->led_state = LED_ALWAYS_SET;
475 pda->hb_count = HZ / 2;
476 pda->hb_state = 0;
477 pda->idle_flag = 0;
478
479 if (cpuid != 0) {
480 memcpy(pda->cnodeid_to_nasid_table,
481 pdacpu(0)->cnodeid_to_nasid_table,
482 sizeof(pda->cnodeid_to_nasid_table));
483 }
484
485 /*
486 * Check for WARs.
487 * Only needs to be done once, on BSP.
488 * Has to be done after loop above, because it uses pda.cnodeid_to_nasid_table[i].
489 * Has to be done before assignment below.
490 */
491 if (!wars_have_been_checked) {
492 sn_check_for_wars();
493 wars_have_been_checked = 1;
494 }
495 sn_hub_info->shub_1_1_found = shub_1_1_found;
496
497 /*
498 * Set up addresses of PIO/MEM write status registers.
499 */
500 {
501 u64 pio1[] = {SH1_PIO_WRITE_STATUS_0, 0, SH1_PIO_WRITE_STATUS_1, 0};
502 u64 pio2[] = {SH2_PIO_WRITE_STATUS_0, SH2_PIO_WRITE_STATUS_1,
503 SH2_PIO_WRITE_STATUS_2, SH2_PIO_WRITE_STATUS_3};
504 u64 *pio;
505 pio = is_shub1() ? pio1 : pio2;
506 pda->pio_write_status_addr = (volatile unsigned long *) LOCAL_MMR_ADDR(pio[slice]);
507 pda->pio_write_status_val = is_shub1() ? SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK : 0;
508 }
509
510 /*
511 * WAR addresses for SHUB 1.x.
512 */
513 if (local_node_data->active_cpu_count++ == 0 && is_shub1()) {
514 int buddy_nasid;
515 buddy_nasid =
516 cnodeid_to_nasid(numa_node_id() ==
517 num_online_nodes() - 1 ? 0 : numa_node_id() + 1);
518 pda->pio_shub_war_cam_addr =
519 (volatile unsigned long *)GLOBAL_MMR_ADDR(nasid,
520 SH1_PI_CAM_CONTROL);
521 }
522}
523
524/*
525 * Scan klconfig for ionodes. Add the nasids to the
526 * physical_node_map and the pda and increment numionodes.
527 */
528
529static void __init scan_for_ionodes(void)
530{
531 int nasid = 0;
532 lboard_t *brd;
533
534 /* Setup ionodes with memory */
535 for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
536 char *klgraph_header;
537 cnodeid_t cnodeid;
538
539 if (physical_node_map[nasid] == -1)
540 continue;
541
542 cnodeid = -1;
543 klgraph_header = __va(ia64_sn_get_klconfig_addr(nasid));
544 if (!klgraph_header) {
545 if (IS_RUNNING_ON_SIMULATOR())
546 continue;
547 BUG(); /* All nodes must have klconfig tables! */
548 }
549 cnodeid = nasid_to_cnodeid(nasid);
550 root_lboard[cnodeid] = (lboard_t *)
551 NODE_OFFSET_TO_LBOARD((nasid),
552 ((kl_config_hdr_t
553 *) (klgraph_header))->
554 ch_board_info);
555 }
556
557 /* Scan headless/memless IO Nodes. */
558 for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
559 /* if there's no nasid, don't try to read the klconfig on the node */
560 if (physical_node_map[nasid] == -1)
561 continue;
562 brd = find_lboard_any((lboard_t *)
563 root_lboard[nasid_to_cnodeid(nasid)],
564 KLTYPE_SNIA);
565 if (brd) {
566 brd = KLCF_NEXT_ANY(brd); /* Skip this node's lboard */
567 if (!brd)
568 continue;
569 }
570
571 brd = find_lboard_any(brd, KLTYPE_SNIA);
572
573 while (brd) {
574 pda->cnodeid_to_nasid_table[numionodes] =
575 brd->brd_nasid;
576 physical_node_map[brd->brd_nasid] = numionodes;
577 root_lboard[numionodes] = brd;
578 numionodes++;
579 brd = KLCF_NEXT_ANY(brd);
580 if (!brd)
581 break;
582
583 brd = find_lboard_any(brd, KLTYPE_SNIA);
584 }
585 }
586
587 /* Scan for TIO nodes. */
588 for (nasid = 0; nasid < MAX_PHYSNODE_ID; nasid += 2) {
589 /* if there's no nasid, don't try to read the klconfig on the node */
590 if (physical_node_map[nasid] == -1)
591 continue;
592 brd = find_lboard_any((lboard_t *)
593 root_lboard[nasid_to_cnodeid(nasid)],
594 KLTYPE_TIO);
595 while (brd) {
596 pda->cnodeid_to_nasid_table[numionodes] =
597 brd->brd_nasid;
598 physical_node_map[brd->brd_nasid] = numionodes;
599 root_lboard[numionodes] = brd;
600 numionodes++;
601 brd = KLCF_NEXT_ANY(brd);
602 if (!brd)
603 break;
604
605 brd = find_lboard_any(brd, KLTYPE_TIO);
606 }
607 }
608
609}
610
611int
612nasid_slice_to_cpuid(int nasid, int slice)
613{
614 long cpu;
615
616 for (cpu=0; cpu < NR_CPUS; cpu++)
617 if (nodepda->phys_cpuid[cpu].nasid == nasid && nodepda->phys_cpuid[cpu].slice == slice)
618 return cpu;
619
620 return -1;
621}
diff --git a/arch/ia64/sn/kernel/sn2/Makefile b/arch/ia64/sn/kernel/sn2/Makefile
new file mode 100644
index 000000000000..170bde4549da
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/Makefile
@@ -0,0 +1,13 @@
1# arch/ia64/sn/kernel/sn2/Makefile
2#
3# This file is subject to the terms and conditions of the GNU General Public
4# License. See the file "COPYING" in the main directory of this archive
5# for more details.
6#
7# Copyright (C) 1999,2001-2002 Silicon Graphics, Inc. All rights reserved.
8#
9# sn2 specific kernel files
10#
11
12obj-y += cache.o io.o ptc_deadlock.o sn2_smp.o sn_proc_fs.o \
13 prominfo_proc.o timer.o timer_interrupt.o sn_hwperf.o
diff --git a/arch/ia64/sn/kernel/sn2/cache.c b/arch/ia64/sn/kernel/sn2/cache.c
new file mode 100644
index 000000000000..bc3cfa17cd0f
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/cache.c
@@ -0,0 +1,34 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2001-2003 Silicon Graphics, Inc. All rights reserved.
7 *
8 */
9#include <linux/module.h>
10#include <asm/pgalloc.h>
11
12/**
13 * sn_flush_all_caches - flush a range of address from all caches (incl. L4)
14 * @flush_addr: identity mapped region 7 address to start flushing
15 * @bytes: number of bytes to flush
16 *
17 * Flush a range of addresses from all caches including L4.
18 * All addresses fully or partially contained within
19 * @flush_addr to @flush_addr + @bytes are flushed
20 * from the all caches.
21 */
22void
23sn_flush_all_caches(long flush_addr, long bytes)
24{
25 flush_icache_range(flush_addr, flush_addr+bytes);
26 /*
27 * The last call may have returned before the caches
28 * were actually flushed, so we call it again to make
29 * sure.
30 */
31 flush_icache_range(flush_addr, flush_addr+bytes);
32 mb();
33}
34EXPORT_SYMBOL(sn_flush_all_caches);
diff --git a/arch/ia64/sn/kernel/sn2/io.c b/arch/ia64/sn/kernel/sn2/io.c
new file mode 100644
index 000000000000..a12c0586de38
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/io.c
@@ -0,0 +1,101 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved.
7 *
8 * The generic kernel requires function pointers to these routines, so
9 * we wrap the inlines from asm/ia64/sn/sn2/io.h here.
10 */
11
12#include <asm/sn/io.h>
13
14#ifdef CONFIG_IA64_GENERIC
15
16#undef __sn_inb
17#undef __sn_inw
18#undef __sn_inl
19#undef __sn_outb
20#undef __sn_outw
21#undef __sn_outl
22#undef __sn_readb
23#undef __sn_readw
24#undef __sn_readl
25#undef __sn_readq
26#undef __sn_readb_relaxed
27#undef __sn_readw_relaxed
28#undef __sn_readl_relaxed
29#undef __sn_readq_relaxed
30
31unsigned int __sn_inb(unsigned long port)
32{
33 return ___sn_inb(port);
34}
35
36unsigned int __sn_inw(unsigned long port)
37{
38 return ___sn_inw(port);
39}
40
41unsigned int __sn_inl(unsigned long port)
42{
43 return ___sn_inl(port);
44}
45
46void __sn_outb(unsigned char val, unsigned long port)
47{
48 ___sn_outb(val, port);
49}
50
51void __sn_outw(unsigned short val, unsigned long port)
52{
53 ___sn_outw(val, port);
54}
55
56void __sn_outl(unsigned int val, unsigned long port)
57{
58 ___sn_outl(val, port);
59}
60
61unsigned char __sn_readb(void __iomem *addr)
62{
63 return ___sn_readb(addr);
64}
65
66unsigned short __sn_readw(void __iomem *addr)
67{
68 return ___sn_readw(addr);
69}
70
71unsigned int __sn_readl(void __iomem *addr)
72{
73 return ___sn_readl(addr);
74}
75
76unsigned long __sn_readq(void __iomem *addr)
77{
78 return ___sn_readq(addr);
79}
80
81unsigned char __sn_readb_relaxed(void __iomem *addr)
82{
83 return ___sn_readb_relaxed(addr);
84}
85
86unsigned short __sn_readw_relaxed(void __iomem *addr)
87{
88 return ___sn_readw_relaxed(addr);
89}
90
91unsigned int __sn_readl_relaxed(void __iomem *addr)
92{
93 return ___sn_readl_relaxed(addr);
94}
95
96unsigned long __sn_readq_relaxed(void __iomem *addr)
97{
98 return ___sn_readq_relaxed(addr);
99}
100
101#endif
diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
new file mode 100644
index 000000000000..81c63b2f8ae9
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c
@@ -0,0 +1,279 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 1999,2001-2004 Silicon Graphics, Inc. All Rights Reserved.
7 *
8 * Module to export the system's Firmware Interface Tables, including
9 * PROM revision numbers and banners, in /proc
10 */
11#include <linux/config.h>
12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/proc_fs.h>
15#include <linux/nodemask.h>
16#include <asm/system.h>
17#include <asm/io.h>
18#include <asm/sn/sn_sal.h>
19#include <asm/sn/sn_cpuid.h>
20#include <asm/sn/addrs.h>
21
22MODULE_DESCRIPTION("PROM version reporting for /proc");
23MODULE_AUTHOR("Chad Talbott");
24MODULE_LICENSE("GPL");
25
26/* Standard Intel FIT entry types */
27#define FIT_ENTRY_FIT_HEADER 0x00 /* FIT header entry */
28#define FIT_ENTRY_PAL_B 0x01 /* PAL_B entry */
29/* Entries 0x02 through 0x0D reserved by Intel */
30#define FIT_ENTRY_PAL_A_PROC 0x0E /* Processor-specific PAL_A entry */
31#define FIT_ENTRY_PAL_A 0x0F /* PAL_A entry, same as... */
32#define FIT_ENTRY_PAL_A_GEN 0x0F /* ...Generic PAL_A entry */
33#define FIT_ENTRY_UNUSED 0x7F /* Unused (reserved by Intel?) */
34/* OEM-defined entries range from 0x10 to 0x7E. */
35#define FIT_ENTRY_SAL_A 0x10 /* SAL_A entry */
36#define FIT_ENTRY_SAL_B 0x11 /* SAL_B entry */
37#define FIT_ENTRY_SALRUNTIME 0x12 /* SAL runtime entry */
38#define FIT_ENTRY_EFI 0x1F /* EFI entry */
39#define FIT_ENTRY_FPSWA 0x20 /* embedded fpswa entry */
40#define FIT_ENTRY_VMLINUX 0x21 /* embedded vmlinux entry */
41
42#define FIT_MAJOR_SHIFT (32 + 8)
43#define FIT_MAJOR_MASK ((1 << 8) - 1)
44#define FIT_MINOR_SHIFT 32
45#define FIT_MINOR_MASK ((1 << 8) - 1)
46
47#define FIT_MAJOR(q) \
48 ((unsigned) ((q) >> FIT_MAJOR_SHIFT) & FIT_MAJOR_MASK)
49#define FIT_MINOR(q) \
50 ((unsigned) ((q) >> FIT_MINOR_SHIFT) & FIT_MINOR_MASK)
51
52#define FIT_TYPE_SHIFT (32 + 16)
53#define FIT_TYPE_MASK ((1 << 7) - 1)
54
55#define FIT_TYPE(q) \
56 ((unsigned) ((q) >> FIT_TYPE_SHIFT) & FIT_TYPE_MASK)
57
58struct fit_type_map_t {
59 unsigned char type;
60 const char *name;
61};
62
63static const struct fit_type_map_t fit_entry_types[] = {
64 {FIT_ENTRY_FIT_HEADER, "FIT Header"},
65 {FIT_ENTRY_PAL_A_GEN, "Generic PAL_A"},
66 {FIT_ENTRY_PAL_A_PROC, "Processor-specific PAL_A"},
67 {FIT_ENTRY_PAL_A, "PAL_A"},
68 {FIT_ENTRY_PAL_B, "PAL_B"},
69 {FIT_ENTRY_SAL_A, "SAL_A"},
70 {FIT_ENTRY_SAL_B, "SAL_B"},
71 {FIT_ENTRY_SALRUNTIME, "SAL runtime"},
72 {FIT_ENTRY_EFI, "EFI"},
73 {FIT_ENTRY_VMLINUX, "Embedded Linux"},
74 {FIT_ENTRY_FPSWA, "Embedded FPSWA"},
75 {FIT_ENTRY_UNUSED, "Unused"},
76 {0xff, "Error"},
77};
78
79static const char *fit_type_name(unsigned char type)
80{
81 struct fit_type_map_t const *mapp;
82
83 for (mapp = fit_entry_types; mapp->type != 0xff; mapp++)
84 if (type == mapp->type)
85 return mapp->name;
86
87 if ((type > FIT_ENTRY_PAL_A) && (type < FIT_ENTRY_UNUSED))
88 return "OEM type";
89 if ((type > FIT_ENTRY_PAL_B) && (type < FIT_ENTRY_PAL_A))
90 return "Reserved";
91
92 return "Unknown type";
93}
94
95static int
96get_fit_entry(unsigned long nasid, int index, unsigned long *fentry,
97 char *banner, int banlen)
98{
99 return ia64_sn_get_fit_compt(nasid, index, fentry, banner, banlen);
100}
101
102
103/*
104 * These two routines display the FIT table for each node.
105 */
106static int dump_fit_entry(char *page, unsigned long *fentry)
107{
108 unsigned type;
109
110 type = FIT_TYPE(fentry[1]);
111 return sprintf(page, "%02x %-25s %x.%02x %016lx %u\n",
112 type,
113 fit_type_name(type),
114 FIT_MAJOR(fentry[1]), FIT_MINOR(fentry[1]),
115 fentry[0],
116 /* mult by sixteen to get size in bytes */
117 (unsigned)(fentry[1] & 0xffffff) * 16);
118}
119
120
121/*
122 * We assume that the fit table will be small enough that we can print
123 * the whole thing into one page. (This is true for our default 16kB
124 * pages -- each entry is about 60 chars wide when printed.) I read
125 * somewhere that the maximum size of the FIT is 128 entries, so we're
126 * OK except for 4kB pages (and no one is going to do that on SN
127 * anyway).
128 */
129static int
130dump_fit(char *page, unsigned long nasid)
131{
132 unsigned long fentry[2];
133 int index;
134 char *p;
135
136 p = page;
137 for (index=0;;index++) {
138 BUG_ON(index * 60 > PAGE_SIZE);
139 if (get_fit_entry(nasid, index, fentry, NULL, 0))
140 break;
141 p += dump_fit_entry(p, fentry);
142 }
143
144 return p - page;
145}
146
147static int
148dump_version(char *page, unsigned long nasid)
149{
150 unsigned long fentry[2];
151 char banner[128];
152 int index;
153 int len;
154
155 for (index = 0; ; index++) {
156 if (get_fit_entry(nasid, index, fentry, banner,
157 sizeof(banner)))
158 return 0;
159 if (FIT_TYPE(fentry[1]) == FIT_ENTRY_SAL_A)
160 break;
161 }
162
163 len = sprintf(page, "%x.%02x\n", FIT_MAJOR(fentry[1]),
164 FIT_MINOR(fentry[1]));
165 page += len;
166
167 if (banner[0])
168 len += snprintf(page, PAGE_SIZE-len, "%s\n", banner);
169
170 return len;
171}
172
173/* same as in proc_misc.c */
174static int
175proc_calc_metrics(char *page, char **start, off_t off, int count, int *eof,
176 int len)
177{
178 if (len <= off + count)
179 *eof = 1;
180 *start = page + off;
181 len -= off;
182 if (len > count)
183 len = count;
184 if (len < 0)
185 len = 0;
186 return len;
187}
188
189static int
190read_version_entry(char *page, char **start, off_t off, int count, int *eof,
191 void *data)
192{
193 int len = 0;
194
195 /* data holds the NASID of the node */
196 len = dump_version(page, (unsigned long)data);
197 len = proc_calc_metrics(page, start, off, count, eof, len);
198 return len;
199}
200
201static int
202read_fit_entry(char *page, char **start, off_t off, int count, int *eof,
203 void *data)
204{
205 int len = 0;
206
207 /* data holds the NASID of the node */
208 len = dump_fit(page, (unsigned long)data);
209 len = proc_calc_metrics(page, start, off, count, eof, len);
210
211 return len;
212}
213
214/* module entry points */
215int __init prominfo_init(void);
216void __exit prominfo_exit(void);
217
218module_init(prominfo_init);
219module_exit(prominfo_exit);
220
221static struct proc_dir_entry **proc_entries;
222static struct proc_dir_entry *sgi_prominfo_entry;
223
224#define NODE_NAME_LEN 11
225
226int __init prominfo_init(void)
227{
228 struct proc_dir_entry **entp;
229 struct proc_dir_entry *p;
230 cnodeid_t cnodeid;
231 unsigned long nasid;
232 char name[NODE_NAME_LEN];
233
234 if (!ia64_platform_is("sn2"))
235 return 0;
236
237 proc_entries = kmalloc(num_online_nodes() * sizeof(struct proc_dir_entry *),
238 GFP_KERNEL);
239
240 sgi_prominfo_entry = proc_mkdir("sgi_prominfo", NULL);
241
242 entp = proc_entries;
243 for_each_online_node(cnodeid) {
244 sprintf(name, "node%d", cnodeid);
245 *entp = proc_mkdir(name, sgi_prominfo_entry);
246 nasid = cnodeid_to_nasid(cnodeid);
247 p = create_proc_read_entry(
248 "fit", 0, *entp, read_fit_entry,
249 (void *)nasid);
250 if (p)
251 p->owner = THIS_MODULE;
252 p = create_proc_read_entry(
253 "version", 0, *entp, read_version_entry,
254 (void *)nasid);
255 if (p)
256 p->owner = THIS_MODULE;
257 entp++;
258 }
259
260 return 0;
261}
262
263void __exit prominfo_exit(void)
264{
265 struct proc_dir_entry **entp;
266 unsigned cnodeid;
267 char name[NODE_NAME_LEN];
268
269 entp = proc_entries;
270 for_each_online_node(cnodeid) {
271 remove_proc_entry("fit", *entp);
272 remove_proc_entry("version", *entp);
273 sprintf(name, "node%d", cnodeid);
274 remove_proc_entry(name, sgi_prominfo_entry);
275 entp++;
276 }
277 remove_proc_entry("sgi_prominfo", NULL);
278 kfree(proc_entries);
279}
diff --git a/arch/ia64/sn/kernel/sn2/ptc_deadlock.S b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S
new file mode 100644
index 000000000000..7947312801ec
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/ptc_deadlock.S
@@ -0,0 +1,82 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <asm/sn/shub_mmr.h>
10
11#define DEADLOCKBIT SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_SHFT
12#define WRITECOUNTMASK SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK
13#define ALIAS_OFFSET (SH1_PIO_WRITE_STATUS_0_ALIAS-SH1_PIO_WRITE_STATUS_0)
14
15
16 .global sn2_ptc_deadlock_recovery_core
17 .proc sn2_ptc_deadlock_recovery_core
18
19sn2_ptc_deadlock_recovery_core:
20 .regstk 6,0,0,0
21
22 ptc0 = in0
23 data0 = in1
24 ptc1 = in2
25 data1 = in3
26 piowc = in4
27 zeroval = in5
28 piowcphy = r30
29 psrsave = r2
30 scr1 = r16
31 scr2 = r17
32 mask = r18
33
34
35 extr.u piowcphy=piowc,0,61;; // Convert piowc to uncached physical address
36 dep piowcphy=-1,piowcphy,63,1
37 movl mask=WRITECOUNTMASK
38
391:
40 add scr2=ALIAS_OFFSET,piowc // Address of WRITE_STATUS alias register
41 mov scr1=7;; // Clear DEADLOCK, WRITE_ERROR, MULTI_WRITE_ERROR
42 st8.rel [scr2]=scr1;;
43
445: ld8.acq scr1=[piowc];; // Wait for PIOs to complete.
45 and scr2=scr1,mask;; // mask of writecount bits
46 cmp.ne p6,p0=zeroval,scr2
47(p6) br.cond.sptk 5b
48
49
50
51 ////////////// BEGIN PHYSICAL MODE ////////////////////
52 mov psrsave=psr // Disable IC (no PMIs)
53 rsm psr.i | psr.dt | psr.ic;;
54 srlz.i;;
55
56 st8.rel [ptc0]=data0 // Write PTC0 & wait for completion.
57
585: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete.
59 and scr2=scr1,mask;; // mask of writecount bits
60 cmp.ne p6,p0=zeroval,scr2
61(p6) br.cond.sptk 5b;;
62
63 tbit.nz p8,p7=scr1,DEADLOCKBIT;;// Test for DEADLOCK
64(p7) cmp.ne p7,p0=r0,ptc1;; // Test for non-null ptc1
65
66(p7) st8.rel [ptc1]=data1;; // Now write PTC1.
67
685: ld8.acq scr1=[piowcphy];; // Wait for PIOs to complete.
69 and scr2=scr1,mask;; // mask of writecount bits
70 cmp.ne p6,p0=zeroval,scr2
71(p6) br.cond.sptk 5b
72
73 tbit.nz p8,p0=scr1,DEADLOCKBIT;;// Test for DEADLOCK
74
75 mov psr.l=psrsave;; // Reenable IC
76 srlz.i;;
77 ////////////// END PHYSICAL MODE ////////////////////
78
79(p8) br.cond.spnt 1b;; // Repeat if DEADLOCK occurred.
80
81 br.ret.sptk rp
82 .endp sn2_ptc_deadlock_recovery_core
diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c
new file mode 100644
index 000000000000..7af05a7ac743
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c
@@ -0,0 +1,295 @@
1/*
2 * SN2 Platform specific SMP Support
3 *
4 * This file is subject to the terms and conditions of the GNU General Public
5 * License. See the file "COPYING" in the main directory of this archive
6 * for more details.
7 *
8 * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#include <linux/init.h>
12#include <linux/kernel.h>
13#include <linux/spinlock.h>
14#include <linux/threads.h>
15#include <linux/sched.h>
16#include <linux/smp.h>
17#include <linux/interrupt.h>
18#include <linux/irq.h>
19#include <linux/mmzone.h>
20#include <linux/module.h>
21#include <linux/bitops.h>
22#include <linux/nodemask.h>
23
24#include <asm/processor.h>
25#include <asm/irq.h>
26#include <asm/sal.h>
27#include <asm/system.h>
28#include <asm/delay.h>
29#include <asm/io.h>
30#include <asm/smp.h>
31#include <asm/tlb.h>
32#include <asm/numa.h>
33#include <asm/hw_irq.h>
34#include <asm/current.h>
35#include <asm/sn/sn_cpuid.h>
36#include <asm/sn/sn_sal.h>
37#include <asm/sn/addrs.h>
38#include <asm/sn/shub_mmr.h>
39#include <asm/sn/nodepda.h>
40#include <asm/sn/rw_mmr.h>
41
42void sn2_ptc_deadlock_recovery(volatile unsigned long *, unsigned long data0,
43 volatile unsigned long *, unsigned long data1);
44
45static __cacheline_aligned DEFINE_SPINLOCK(sn2_global_ptc_lock);
46
47static unsigned long sn2_ptc_deadlock_count;
48
49static inline unsigned long wait_piowc(void)
50{
51 volatile unsigned long *piows, zeroval;
52 unsigned long ws;
53
54 piows = pda->pio_write_status_addr;
55 zeroval = pda->pio_write_status_val;
56 do {
57 cpu_relax();
58 } while (((ws = *piows) & SH_PIO_WRITE_STATUS_PENDING_WRITE_COUNT_MASK) != zeroval);
59 return ws;
60}
61
62void sn_tlb_migrate_finish(struct mm_struct *mm)
63{
64 if (mm == current->mm)
65 flush_tlb_mm(mm);
66}
67
68/**
69 * sn2_global_tlb_purge - globally purge translation cache of virtual address range
70 * @start: start of virtual address range
71 * @end: end of virtual address range
72 * @nbits: specifies number of bytes to purge per instruction (num = 1<<(nbits & 0xfc))
73 *
74 * Purges the translation caches of all processors of the given virtual address
75 * range.
76 *
77 * Note:
78 * - cpu_vm_mask is a bit mask that indicates which cpus have loaded the context.
79 * - cpu_vm_mask is converted into a nodemask of the nodes containing the
80 * cpus in cpu_vm_mask.
81 * - if only one bit is set in cpu_vm_mask & it is the current cpu,
82 * then only the local TLB needs to be flushed. This flushing can be done
83 * using ptc.l. This is the common case & avoids the global spinlock.
84 * - if multiple cpus have loaded the context, then flushing has to be
85 * done with ptc.g/MMRs under protection of the global ptc_lock.
86 */
87
88void
89sn2_global_tlb_purge(unsigned long start, unsigned long end,
90 unsigned long nbits)
91{
92 int i, shub1, cnode, mynasid, cpu, lcpu = 0, nasid, flushed = 0;
93 volatile unsigned long *ptc0, *ptc1;
94 unsigned long flags = 0, data0 = 0, data1 = 0;
95 struct mm_struct *mm = current->active_mm;
96 short nasids[MAX_NUMNODES], nix;
97 nodemask_t nodes_flushed;
98
99 nodes_clear(nodes_flushed);
100 i = 0;
101
102 for_each_cpu_mask(cpu, mm->cpu_vm_mask) {
103 cnode = cpu_to_node(cpu);
104 node_set(cnode, nodes_flushed);
105 lcpu = cpu;
106 i++;
107 }
108
109 preempt_disable();
110
111 if (likely(i == 1 && lcpu == smp_processor_id())) {
112 do {
113 ia64_ptcl(start, nbits << 2);
114 start += (1UL << nbits);
115 } while (start < end);
116 ia64_srlz_i();
117 preempt_enable();
118 return;
119 }
120
121 if (atomic_read(&mm->mm_users) == 1) {
122 flush_tlb_mm(mm);
123 preempt_enable();
124 return;
125 }
126
127 nix = 0;
128 for_each_node_mask(cnode, nodes_flushed)
129 nasids[nix++] = cnodeid_to_nasid(cnode);
130
131 shub1 = is_shub1();
132 if (shub1) {
133 data0 = (1UL << SH1_PTC_0_A_SHFT) |
134 (nbits << SH1_PTC_0_PS_SHFT) |
135 ((ia64_get_rr(start) >> 8) << SH1_PTC_0_RID_SHFT) |
136 (1UL << SH1_PTC_0_START_SHFT);
137 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_0);
138 ptc1 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH1_PTC_1);
139 } else {
140 data0 = (1UL << SH2_PTC_A_SHFT) |
141 (nbits << SH2_PTC_PS_SHFT) |
142 (1UL << SH2_PTC_START_SHFT);
143 ptc0 = (long *)GLOBAL_MMR_PHYS_ADDR(0, SH2_PTC +
144 ((ia64_get_rr(start) >> 8) << SH2_PTC_RID_SHFT) );
145 ptc1 = NULL;
146 }
147
148
149 mynasid = get_nasid();
150
151 spin_lock_irqsave(&sn2_global_ptc_lock, flags);
152
153 do {
154 if (shub1)
155 data1 = start | (1UL << SH1_PTC_1_START_SHFT);
156 else
157 data0 = (data0 & ~SH2_PTC_ADDR_MASK) | (start & SH2_PTC_ADDR_MASK);
158 for (i = 0; i < nix; i++) {
159 nasid = nasids[i];
160 if (unlikely(nasid == mynasid)) {
161 ia64_ptcga(start, nbits << 2);
162 ia64_srlz_i();
163 } else {
164 ptc0 = CHANGE_NASID(nasid, ptc0);
165 if (ptc1)
166 ptc1 = CHANGE_NASID(nasid, ptc1);
167 pio_atomic_phys_write_mmrs(ptc0, data0, ptc1,
168 data1);
169 flushed = 1;
170 }
171 }
172
173 if (flushed
174 && (wait_piowc() &
175 SH_PIO_WRITE_STATUS_WRITE_DEADLOCK_MASK)) {
176 sn2_ptc_deadlock_recovery(ptc0, data0, ptc1, data1);
177 }
178
179 start += (1UL << nbits);
180
181 } while (start < end);
182
183 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
184
185 preempt_enable();
186}
187
188/*
189 * sn2_ptc_deadlock_recovery
190 *
191 * Recover from PTC deadlocks conditions. Recovery requires stepping thru each
192 * TLB flush transaction. The recovery sequence is somewhat tricky & is
193 * coded in assembly language.
194 */
195void sn2_ptc_deadlock_recovery(volatile unsigned long *ptc0, unsigned long data0,
196 volatile unsigned long *ptc1, unsigned long data1)
197{
198 extern void sn2_ptc_deadlock_recovery_core(volatile unsigned long *, unsigned long,
199 volatile unsigned long *, unsigned long, volatile unsigned long *, unsigned long);
200 int cnode, mycnode, nasid;
201 volatile unsigned long *piows;
202 volatile unsigned long zeroval;
203
204 sn2_ptc_deadlock_count++;
205
206 piows = pda->pio_write_status_addr;
207 zeroval = pda->pio_write_status_val;
208
209 mycnode = numa_node_id();
210
211 for_each_online_node(cnode) {
212 if (is_headless_node(cnode) || cnode == mycnode)
213 continue;
214 nasid = cnodeid_to_nasid(cnode);
215 ptc0 = CHANGE_NASID(nasid, ptc0);
216 if (ptc1)
217 ptc1 = CHANGE_NASID(nasid, ptc1);
218 sn2_ptc_deadlock_recovery_core(ptc0, data0, ptc1, data1, piows, zeroval);
219 }
220}
221
222/**
223 * sn_send_IPI_phys - send an IPI to a Nasid and slice
224 * @nasid: nasid to receive the interrupt (may be outside partition)
225 * @physid: physical cpuid to receive the interrupt.
226 * @vector: command to send
227 * @delivery_mode: delivery mechanism
228 *
229 * Sends an IPI (interprocessor interrupt) to the processor specified by
230 * @physid
231 *
232 * @delivery_mode can be one of the following
233 *
234 * %IA64_IPI_DM_INT - pend an interrupt
235 * %IA64_IPI_DM_PMI - pend a PMI
236 * %IA64_IPI_DM_NMI - pend an NMI
237 * %IA64_IPI_DM_INIT - pend an INIT interrupt
238 */
239void sn_send_IPI_phys(int nasid, long physid, int vector, int delivery_mode)
240{
241 long val;
242 unsigned long flags = 0;
243 volatile long *p;
244
245 p = (long *)GLOBAL_MMR_PHYS_ADDR(nasid, SH_IPI_INT);
246 val = (1UL << SH_IPI_INT_SEND_SHFT) |
247 (physid << SH_IPI_INT_PID_SHFT) |
248 ((long)delivery_mode << SH_IPI_INT_TYPE_SHFT) |
249 ((long)vector << SH_IPI_INT_IDX_SHFT) |
250 (0x000feeUL << SH_IPI_INT_BASE_SHFT);
251
252 mb();
253 if (enable_shub_wars_1_1()) {
254 spin_lock_irqsave(&sn2_global_ptc_lock, flags);
255 }
256 pio_phys_write_mmr(p, val);
257 if (enable_shub_wars_1_1()) {
258 wait_piowc();
259 spin_unlock_irqrestore(&sn2_global_ptc_lock, flags);
260 }
261
262}
263
264EXPORT_SYMBOL(sn_send_IPI_phys);
265
266/**
267 * sn2_send_IPI - send an IPI to a processor
268 * @cpuid: target of the IPI
269 * @vector: command to send
270 * @delivery_mode: delivery mechanism
271 * @redirect: redirect the IPI?
272 *
273 * Sends an IPI (InterProcessor Interrupt) to the processor specified by
274 * @cpuid. @vector specifies the command to send, while @delivery_mode can
275 * be one of the following
276 *
277 * %IA64_IPI_DM_INT - pend an interrupt
278 * %IA64_IPI_DM_PMI - pend a PMI
279 * %IA64_IPI_DM_NMI - pend an NMI
280 * %IA64_IPI_DM_INIT - pend an INIT interrupt
281 */
282void sn2_send_IPI(int cpuid, int vector, int delivery_mode, int redirect)
283{
284 long physid;
285 int nasid;
286
287 physid = cpu_physical_id(cpuid);
288 nasid = cpuid_to_nasid(cpuid);
289
290 /* the following is used only when starting cpus at boot time */
291 if (unlikely(nasid == -1))
292 ia64_sn_get_sapic_info(physid, &nasid, NULL, NULL);
293
294 sn_send_IPI_phys(nasid, physid, vector, delivery_mode);
295}
diff --git a/arch/ia64/sn/kernel/sn2/sn_hwperf.c b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
new file mode 100644
index 000000000000..197356460ee1
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn_hwperf.c
@@ -0,0 +1,690 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2004-2005 Silicon Graphics, Inc. All rights reserved.
7 *
8 * SGI Altix topology and hardware performance monitoring API.
9 * Mark Goodwin <markgw@sgi.com>.
10 *
11 * Creates /proc/sgi_sn/sn_topology (read-only) to export
12 * info about Altix nodes, routers, CPUs and NumaLink
13 * interconnection/topology.
14 *
15 * Also creates a dynamic misc device named "sn_hwperf"
16 * that supports an ioctl interface to call down into SAL
17 * to discover hw objects, topology and to read/write
18 * memory mapped registers, e.g. for performance monitoring.
19 * The "sn_hwperf" device is registered only after the procfs
20 * file is first opened, i.e. only if/when it's needed.
21 *
22 * This API is used by SGI Performance Co-Pilot and other
23 * tools, see http://oss.sgi.com/projects/pcp
24 */
25
26#include <linux/fs.h>
27#include <linux/slab.h>
28#include <linux/vmalloc.h>
29#include <linux/seq_file.h>
30#include <linux/miscdevice.h>
31#include <linux/cpumask.h>
32#include <linux/smp_lock.h>
33#include <linux/nodemask.h>
34#include <asm/processor.h>
35#include <asm/topology.h>
36#include <asm/smp.h>
37#include <asm/semaphore.h>
38#include <asm/segment.h>
39#include <asm/uaccess.h>
40#include <asm/sal.h>
41#include <asm/sn/io.h>
42#include <asm/sn/sn_sal.h>
43#include <asm/sn/module.h>
44#include <asm/sn/geo.h>
45#include <asm/sn/sn2/sn_hwperf.h>
46
47static void *sn_hwperf_salheap = NULL;
48static int sn_hwperf_obj_cnt = 0;
49static nasid_t sn_hwperf_master_nasid = INVALID_NASID;
50static int sn_hwperf_init(void);
51static DECLARE_MUTEX(sn_hwperf_init_mutex);
52
53static int sn_hwperf_enum_objects(int *nobj, struct sn_hwperf_object_info **ret)
54{
55 int e;
56 u64 sz;
57 struct sn_hwperf_object_info *objbuf = NULL;
58
59 if ((e = sn_hwperf_init()) < 0) {
60 printk("sn_hwperf_init failed: err %d\n", e);
61 goto out;
62 }
63
64 sz = sn_hwperf_obj_cnt * sizeof(struct sn_hwperf_object_info);
65 if ((objbuf = (struct sn_hwperf_object_info *) vmalloc(sz)) == NULL) {
66 printk("sn_hwperf_enum_objects: vmalloc(%d) failed\n", (int)sz);
67 e = -ENOMEM;
68 goto out;
69 }
70
71 e = ia64_sn_hwperf_op(sn_hwperf_master_nasid, SN_HWPERF_ENUM_OBJECTS,
72 0, sz, (u64) objbuf, 0, 0, NULL);
73 if (e != SN_HWPERF_OP_OK) {
74 e = -EINVAL;
75 vfree(objbuf);
76 }
77
78out:
79 *nobj = sn_hwperf_obj_cnt;
80 *ret = objbuf;
81 return e;
82}
83
84static int sn_hwperf_geoid_to_cnode(char *location)
85{
86 int cnode;
87 geoid_t geoid;
88 moduleid_t module_id;
89 char type;
90 int rack, slot, slab;
91 int this_rack, this_slot, this_slab;
92
93 if (sscanf(location, "%03d%c%02d#%d", &rack, &type, &slot, &slab) != 4)
94 return -1;
95
96 for (cnode = 0; cnode < numionodes; cnode++) {
97 geoid = cnodeid_get_geoid(cnode);
98 module_id = geo_module(geoid);
99 this_rack = MODULE_GET_RACK(module_id);
100 this_slot = MODULE_GET_BPOS(module_id);
101 this_slab = geo_slab(geoid);
102 if (rack == this_rack && slot == this_slot && slab == this_slab)
103 break;
104 }
105
106 return cnode < numionodes ? cnode : -1;
107}
108
109static int sn_hwperf_obj_to_cnode(struct sn_hwperf_object_info * obj)
110{
111 if (!obj->sn_hwp_this_part)
112 return -1;
113 return sn_hwperf_geoid_to_cnode(obj->location);
114}
115
116static int sn_hwperf_generic_ordinal(struct sn_hwperf_object_info *obj,
117 struct sn_hwperf_object_info *objs)
118{
119 int ordinal;
120 struct sn_hwperf_object_info *p;
121
122 for (ordinal=0, p=objs; p != obj; p++) {
123 if (SN_HWPERF_FOREIGN(p))
124 continue;
125 if (SN_HWPERF_SAME_OBJTYPE(p, obj))
126 ordinal++;
127 }
128
129 return ordinal;
130}
131
132static const char *slabname_node = "node"; /* SHub asic */
133static const char *slabname_ionode = "ionode"; /* TIO asic */
134static const char *slabname_router = "router"; /* NL3R or NL4R */
135static const char *slabname_other = "other"; /* unknown asic */
136
137static const char *sn_hwperf_get_slabname(struct sn_hwperf_object_info *obj,
138 struct sn_hwperf_object_info *objs, int *ordinal)
139{
140 int isnode;
141 const char *slabname = slabname_other;
142
143 if ((isnode = SN_HWPERF_IS_NODE(obj)) || SN_HWPERF_IS_IONODE(obj)) {
144 slabname = isnode ? slabname_node : slabname_ionode;
145 *ordinal = sn_hwperf_obj_to_cnode(obj);
146 }
147 else {
148 *ordinal = sn_hwperf_generic_ordinal(obj, objs);
149 if (SN_HWPERF_IS_ROUTER(obj))
150 slabname = slabname_router;
151 }
152
153 return slabname;
154}
155
156static int sn_topology_show(struct seq_file *s, void *d)
157{
158 int sz;
159 int pt;
160 int e;
161 int i;
162 int j;
163 const char *slabname;
164 int ordinal;
165 cpumask_t cpumask;
166 char slice;
167 struct cpuinfo_ia64 *c;
168 struct sn_hwperf_port_info *ptdata;
169 struct sn_hwperf_object_info *p;
170 struct sn_hwperf_object_info *obj = d; /* this object */
171 struct sn_hwperf_object_info *objs = s->private; /* all objects */
172
173 if (obj == objs) {
174 seq_printf(s, "# sn_topology version 1\n");
175 seq_printf(s, "# objtype ordinal location partition"
176 " [attribute value [, ...]]\n");
177 }
178
179 if (SN_HWPERF_FOREIGN(obj)) {
180 /* private in another partition: not interesting */
181 return 0;
182 }
183
184 for (i = 0; obj->name[i]; i++) {
185 if (obj->name[i] == ' ')
186 obj->name[i] = '_';
187 }
188
189 slabname = sn_hwperf_get_slabname(obj, objs, &ordinal);
190 seq_printf(s, "%s %d %s %s asic %s", slabname, ordinal, obj->location,
191 obj->sn_hwp_this_part ? "local" : "shared", obj->name);
192
193 if (!SN_HWPERF_IS_NODE(obj) && !SN_HWPERF_IS_IONODE(obj))
194 seq_putc(s, '\n');
195 else {
196 seq_printf(s, ", nasid 0x%x", cnodeid_to_nasid(ordinal));
197 for (i=0; i < numionodes; i++) {
198 seq_printf(s, i ? ":%d" : ", dist %d",
199 node_distance(ordinal, i));
200 }
201 seq_putc(s, '\n');
202
203 /*
204 * CPUs on this node, if any
205 */
206 cpumask = node_to_cpumask(ordinal);
207 for_each_online_cpu(i) {
208 if (cpu_isset(i, cpumask)) {
209 slice = 'a' + cpuid_to_slice(i);
210 c = cpu_data(i);
211 seq_printf(s, "cpu %d %s%c local"
212 " freq %luMHz, arch ia64",
213 i, obj->location, slice,
214 c->proc_freq / 1000000);
215 for_each_online_cpu(j) {
216 seq_printf(s, j ? ":%d" : ", dist %d",
217 node_distance(
218 cpuid_to_cnodeid(i),
219 cpuid_to_cnodeid(j)));
220 }
221 seq_putc(s, '\n');
222 }
223 }
224 }
225
226 if (obj->ports) {
227 /*
228 * numalink ports
229 */
230 sz = obj->ports * sizeof(struct sn_hwperf_port_info);
231 if ((ptdata = vmalloc(sz)) == NULL)
232 return -ENOMEM;
233 e = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
234 SN_HWPERF_ENUM_PORTS, obj->id, sz,
235 (u64) ptdata, 0, 0, NULL);
236 if (e != SN_HWPERF_OP_OK)
237 return -EINVAL;
238 for (ordinal=0, p=objs; p != obj; p++) {
239 if (!SN_HWPERF_FOREIGN(p))
240 ordinal += p->ports;
241 }
242 for (pt = 0; pt < obj->ports; pt++) {
243 for (p = objs, i = 0; i < sn_hwperf_obj_cnt; i++, p++) {
244 if (ptdata[pt].conn_id == p->id) {
245 break;
246 }
247 }
248 seq_printf(s, "numalink %d %s-%d",
249 ordinal+pt, obj->location, ptdata[pt].port);
250
251 if (i >= sn_hwperf_obj_cnt) {
252 /* no connection */
253 seq_puts(s, " local endpoint disconnected"
254 ", protocol unknown\n");
255 continue;
256 }
257
258 if (obj->sn_hwp_this_part && p->sn_hwp_this_part)
259 /* both ends local to this partition */
260 seq_puts(s, " local");
261 else if (!obj->sn_hwp_this_part && !p->sn_hwp_this_part)
262 /* both ends of the link in foreign partiton */
263 seq_puts(s, " foreign");
264 else
265 /* link straddles a partition */
266 seq_puts(s, " shared");
267
268 /*
269 * Unlikely, but strictly should query the LLP config
270 * registers because an NL4R can be configured to run
271 * NL3 protocol, even when not talking to an NL3 router.
272 * Ditto for node-node.
273 */
274 seq_printf(s, " endpoint %s-%d, protocol %s\n",
275 p->location, ptdata[pt].conn_port,
276 (SN_HWPERF_IS_NL3ROUTER(obj) ||
277 SN_HWPERF_IS_NL3ROUTER(p)) ? "LLP3" : "LLP4");
278 }
279 vfree(ptdata);
280 }
281
282 return 0;
283}
284
285static void *sn_topology_start(struct seq_file *s, loff_t * pos)
286{
287 struct sn_hwperf_object_info *objs = s->private;
288
289 if (*pos < sn_hwperf_obj_cnt)
290 return (void *)(objs + *pos);
291
292 return NULL;
293}
294
295static void *sn_topology_next(struct seq_file *s, void *v, loff_t * pos)
296{
297 ++*pos;
298 return sn_topology_start(s, pos);
299}
300
301static void sn_topology_stop(struct seq_file *m, void *v)
302{
303 return;
304}
305
306/*
307 * /proc/sgi_sn/sn_topology, read-only using seq_file
308 */
309static struct seq_operations sn_topology_seq_ops = {
310 .start = sn_topology_start,
311 .next = sn_topology_next,
312 .stop = sn_topology_stop,
313 .show = sn_topology_show
314};
315
316struct sn_hwperf_op_info {
317 u64 op;
318 struct sn_hwperf_ioctl_args *a;
319 void *p;
320 int *v0;
321 int ret;
322};
323
324static void sn_hwperf_call_sal(void *info)
325{
326 struct sn_hwperf_op_info *op_info = info;
327 int r;
328
329 r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op_info->op,
330 op_info->a->arg, op_info->a->sz,
331 (u64) op_info->p, 0, 0, op_info->v0);
332 op_info->ret = r;
333}
334
335static int sn_hwperf_op_cpu(struct sn_hwperf_op_info *op_info)
336{
337 u32 cpu;
338 u32 use_ipi;
339 int r = 0;
340 cpumask_t save_allowed;
341
342 cpu = (op_info->a->arg & SN_HWPERF_ARG_CPU_MASK) >> 32;
343 use_ipi = op_info->a->arg & SN_HWPERF_ARG_USE_IPI_MASK;
344 op_info->a->arg &= SN_HWPERF_ARG_OBJID_MASK;
345
346 if (cpu != SN_HWPERF_ARG_ANY_CPU) {
347 if (cpu >= num_online_cpus() || !cpu_online(cpu)) {
348 r = -EINVAL;
349 goto out;
350 }
351 }
352
353 if (cpu == SN_HWPERF_ARG_ANY_CPU || cpu == get_cpu()) {
354 /* don't care, or already on correct cpu */
355 sn_hwperf_call_sal(op_info);
356 }
357 else {
358 if (use_ipi) {
359 /* use an interprocessor interrupt to call SAL */
360 smp_call_function_single(cpu, sn_hwperf_call_sal,
361 op_info, 1, 1);
362 }
363 else {
364 /* migrate the task before calling SAL */
365 save_allowed = current->cpus_allowed;
366 set_cpus_allowed(current, cpumask_of_cpu(cpu));
367 sn_hwperf_call_sal(op_info);
368 set_cpus_allowed(current, save_allowed);
369 }
370 }
371 r = op_info->ret;
372
373out:
374 return r;
375}
376
377/* map SAL hwperf error code to system error code */
378static int sn_hwperf_map_err(int hwperf_err)
379{
380 int e;
381
382 switch(hwperf_err) {
383 case SN_HWPERF_OP_OK:
384 e = 0;
385 break;
386
387 case SN_HWPERF_OP_NOMEM:
388 e = -ENOMEM;
389 break;
390
391 case SN_HWPERF_OP_NO_PERM:
392 e = -EPERM;
393 break;
394
395 case SN_HWPERF_OP_IO_ERROR:
396 e = -EIO;
397 break;
398
399 case SN_HWPERF_OP_BUSY:
400 case SN_HWPERF_OP_RECONFIGURE:
401 e = -EAGAIN;
402 break;
403
404 case SN_HWPERF_OP_INVAL:
405 default:
406 e = -EINVAL;
407 break;
408 }
409
410 return e;
411}
412
413/*
414 * ioctl for "sn_hwperf" misc device
415 */
416static int
417sn_hwperf_ioctl(struct inode *in, struct file *fp, u32 op, u64 arg)
418{
419 struct sn_hwperf_ioctl_args a;
420 struct cpuinfo_ia64 *cdata;
421 struct sn_hwperf_object_info *objs;
422 struct sn_hwperf_object_info *cpuobj;
423 struct sn_hwperf_op_info op_info;
424 void *p = NULL;
425 int nobj;
426 char slice;
427 int node;
428 int r;
429 int v0;
430 int i;
431 int j;
432
433 unlock_kernel();
434
435 /* only user requests are allowed here */
436 if ((op & SN_HWPERF_OP_MASK) < 10) {
437 r = -EINVAL;
438 goto error;
439 }
440 r = copy_from_user(&a, (const void __user *)arg,
441 sizeof(struct sn_hwperf_ioctl_args));
442 if (r != 0) {
443 r = -EFAULT;
444 goto error;
445 }
446
447 /*
448 * Allocate memory to hold a kernel copy of the user buffer. The
449 * buffer contents are either copied in or out (or both) of user
450 * space depending on the flags encoded in the requested operation.
451 */
452 if (a.ptr) {
453 p = vmalloc(a.sz);
454 if (!p) {
455 r = -ENOMEM;
456 goto error;
457 }
458 }
459
460 if (op & SN_HWPERF_OP_MEM_COPYIN) {
461 r = copy_from_user(p, (const void __user *)a.ptr, a.sz);
462 if (r != 0) {
463 r = -EFAULT;
464 goto error;
465 }
466 }
467
468 switch (op) {
469 case SN_HWPERF_GET_CPU_INFO:
470 if (a.sz == sizeof(u64)) {
471 /* special case to get size needed */
472 *(u64 *) p = (u64) num_online_cpus() *
473 sizeof(struct sn_hwperf_object_info);
474 } else
475 if (a.sz < num_online_cpus() * sizeof(struct sn_hwperf_object_info)) {
476 r = -ENOMEM;
477 goto error;
478 } else
479 if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
480 memset(p, 0, a.sz);
481 for (i = 0; i < nobj; i++) {
482 node = sn_hwperf_obj_to_cnode(objs + i);
483 for_each_online_cpu(j) {
484 if (node != cpu_to_node(j))
485 continue;
486 cpuobj = (struct sn_hwperf_object_info *) p + j;
487 slice = 'a' + cpuid_to_slice(j);
488 cdata = cpu_data(j);
489 cpuobj->id = j;
490 snprintf(cpuobj->name,
491 sizeof(cpuobj->name),
492 "CPU %luMHz %s",
493 cdata->proc_freq / 1000000,
494 cdata->vendor);
495 snprintf(cpuobj->location,
496 sizeof(cpuobj->location),
497 "%s%c", objs[i].location,
498 slice);
499 }
500 }
501
502 vfree(objs);
503 }
504 break;
505
506 case SN_HWPERF_GET_NODE_NASID:
507 if (a.sz != sizeof(u64) ||
508 (node = a.arg) < 0 || node >= numionodes) {
509 r = -EINVAL;
510 goto error;
511 }
512 *(u64 *)p = (u64)cnodeid_to_nasid(node);
513 break;
514
515 case SN_HWPERF_GET_OBJ_NODE:
516 if (a.sz != sizeof(u64) || a.arg < 0) {
517 r = -EINVAL;
518 goto error;
519 }
520 if ((r = sn_hwperf_enum_objects(&nobj, &objs)) == 0) {
521 if (a.arg >= nobj) {
522 r = -EINVAL;
523 vfree(objs);
524 goto error;
525 }
526 if (objs[(i = a.arg)].id != a.arg) {
527 for (i = 0; i < nobj; i++) {
528 if (objs[i].id == a.arg)
529 break;
530 }
531 }
532 if (i == nobj) {
533 r = -EINVAL;
534 vfree(objs);
535 goto error;
536 }
537 *(u64 *)p = (u64)sn_hwperf_obj_to_cnode(objs + i);
538 vfree(objs);
539 }
540 break;
541
542 case SN_HWPERF_GET_MMRS:
543 case SN_HWPERF_SET_MMRS:
544 case SN_HWPERF_OBJECT_DISTANCE:
545 op_info.p = p;
546 op_info.a = &a;
547 op_info.v0 = &v0;
548 op_info.op = op;
549 r = sn_hwperf_op_cpu(&op_info);
550 if (r) {
551 r = sn_hwperf_map_err(r);
552 goto error;
553 }
554 break;
555
556 default:
557 /* all other ops are a direct SAL call */
558 r = ia64_sn_hwperf_op(sn_hwperf_master_nasid, op,
559 a.arg, a.sz, (u64) p, 0, 0, &v0);
560 if (r) {
561 r = sn_hwperf_map_err(r);
562 goto error;
563 }
564 a.v0 = v0;
565 break;
566 }
567
568 if (op & SN_HWPERF_OP_MEM_COPYOUT) {
569 r = copy_to_user((void __user *)a.ptr, p, a.sz);
570 if (r != 0) {
571 r = -EFAULT;
572 goto error;
573 }
574 }
575
576error:
577 vfree(p);
578
579 lock_kernel();
580 return r;
581}
582
583static struct file_operations sn_hwperf_fops = {
584 .ioctl = sn_hwperf_ioctl,
585};
586
587static struct miscdevice sn_hwperf_dev = {
588 MISC_DYNAMIC_MINOR,
589 "sn_hwperf",
590 &sn_hwperf_fops
591};
592
593static int sn_hwperf_init(void)
594{
595 u64 v;
596 int salr;
597 int e = 0;
598
599 /* single threaded, once-only initialization */
600 down(&sn_hwperf_init_mutex);
601 if (sn_hwperf_salheap) {
602 up(&sn_hwperf_init_mutex);
603 return e;
604 }
605
606 /*
607 * The PROM code needs a fixed reference node. For convenience the
608 * same node as the console I/O is used.
609 */
610 sn_hwperf_master_nasid = (nasid_t) ia64_sn_get_console_nasid();
611
612 /*
613 * Request the needed size and install the PROM scratch area.
614 * The PROM keeps various tracking bits in this memory area.
615 */
616 salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
617 (u64) SN_HWPERF_GET_HEAPSIZE, 0,
618 (u64) sizeof(u64), (u64) &v, 0, 0, NULL);
619 if (salr != SN_HWPERF_OP_OK) {
620 e = -EINVAL;
621 goto out;
622 }
623
624 if ((sn_hwperf_salheap = vmalloc(v)) == NULL) {
625 e = -ENOMEM;
626 goto out;
627 }
628 salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
629 SN_HWPERF_INSTALL_HEAP, 0, v,
630 (u64) sn_hwperf_salheap, 0, 0, NULL);
631 if (salr != SN_HWPERF_OP_OK) {
632 e = -EINVAL;
633 goto out;
634 }
635
636 salr = ia64_sn_hwperf_op(sn_hwperf_master_nasid,
637 SN_HWPERF_OBJECT_COUNT, 0,
638 sizeof(u64), (u64) &v, 0, 0, NULL);
639 if (salr != SN_HWPERF_OP_OK) {
640 e = -EINVAL;
641 goto out;
642 }
643 sn_hwperf_obj_cnt = (int)v;
644
645out:
646 if (e < 0 && sn_hwperf_salheap) {
647 vfree(sn_hwperf_salheap);
648 sn_hwperf_salheap = NULL;
649 sn_hwperf_obj_cnt = 0;
650 }
651
652 if (!e) {
653 /*
654 * Register a dynamic misc device for ioctl. Platforms
655 * supporting hotplug will create /dev/sn_hwperf, else
656 * user can to look up the minor number in /proc/misc.
657 */
658 if ((e = misc_register(&sn_hwperf_dev)) != 0) {
659 printk(KERN_ERR "sn_hwperf_init: misc register "
660 "for \"sn_hwperf\" failed, err %d\n", e);
661 }
662 }
663
664 up(&sn_hwperf_init_mutex);
665 return e;
666}
667
668int sn_topology_open(struct inode *inode, struct file *file)
669{
670 int e;
671 struct seq_file *seq;
672 struct sn_hwperf_object_info *objbuf;
673 int nobj;
674
675 if ((e = sn_hwperf_enum_objects(&nobj, &objbuf)) == 0) {
676 e = seq_open(file, &sn_topology_seq_ops);
677 seq = file->private_data;
678 seq->private = objbuf;
679 }
680
681 return e;
682}
683
684int sn_topology_release(struct inode *inode, struct file *file)
685{
686 struct seq_file *seq = file->private_data;
687
688 vfree(seq->private);
689 return seq_release(inode, file);
690}
diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
new file mode 100644
index 000000000000..6a80fca807b9
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c
@@ -0,0 +1,149 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2000-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8#include <linux/config.h>
9#include <asm/uaccess.h>
10
11#ifdef CONFIG_PROC_FS
12#include <linux/proc_fs.h>
13#include <linux/seq_file.h>
14#include <asm/sn/sn_sal.h>
15
16static int partition_id_show(struct seq_file *s, void *p)
17{
18 seq_printf(s, "%d\n", sn_local_partid());
19 return 0;
20}
21
22static int partition_id_open(struct inode *inode, struct file *file)
23{
24 return single_open(file, partition_id_show, NULL);
25}
26
27static int system_serial_number_show(struct seq_file *s, void *p)
28{
29 seq_printf(s, "%s\n", sn_system_serial_number());
30 return 0;
31}
32
33static int system_serial_number_open(struct inode *inode, struct file *file)
34{
35 return single_open(file, system_serial_number_show, NULL);
36}
37
38static int licenseID_show(struct seq_file *s, void *p)
39{
40 seq_printf(s, "0x%lx\n", sn_partition_serial_number_val());
41 return 0;
42}
43
44static int licenseID_open(struct inode *inode, struct file *file)
45{
46 return single_open(file, licenseID_show, NULL);
47}
48
49/*
50 * Enable forced interrupt by default.
51 * When set, the sn interrupt handler writes the force interrupt register on
52 * the bridge chip. The hardware will then send an interrupt message if the
53 * interrupt line is active. This mimics a level sensitive interrupt.
54 */
55int sn_force_interrupt_flag = 1;
56
57static int sn_force_interrupt_show(struct seq_file *s, void *p)
58{
59 seq_printf(s, "Force interrupt is %s\n",
60 sn_force_interrupt_flag ? "enabled" : "disabled");
61 return 0;
62}
63
64static ssize_t sn_force_interrupt_write_proc(struct file *file,
65 const char __user *buffer, size_t count, loff_t *data)
66{
67 char val;
68
69 if (copy_from_user(&val, buffer, 1))
70 return -EFAULT;
71
72 sn_force_interrupt_flag = (val == '0') ? 0 : 1;
73 return count;
74}
75
76static int sn_force_interrupt_open(struct inode *inode, struct file *file)
77{
78 return single_open(file, sn_force_interrupt_show, NULL);
79}
80
81static int coherence_id_show(struct seq_file *s, void *p)
82{
83 seq_printf(s, "%d\n", partition_coherence_id());
84
85 return 0;
86}
87
88static int coherence_id_open(struct inode *inode, struct file *file)
89{
90 return single_open(file, coherence_id_show, NULL);
91}
92
93static struct proc_dir_entry *sn_procfs_create_entry(
94 const char *name, struct proc_dir_entry *parent,
95 int (*openfunc)(struct inode *, struct file *),
96 int (*releasefunc)(struct inode *, struct file *))
97{
98 struct proc_dir_entry *e = create_proc_entry(name, 0444, parent);
99
100 if (e) {
101 e->proc_fops = (struct file_operations *)kmalloc(
102 sizeof(struct file_operations), GFP_KERNEL);
103 if (e->proc_fops) {
104 memset(e->proc_fops, 0, sizeof(struct file_operations));
105 e->proc_fops->open = openfunc;
106 e->proc_fops->read = seq_read;
107 e->proc_fops->llseek = seq_lseek;
108 e->proc_fops->release = releasefunc;
109 }
110 }
111
112 return e;
113}
114
115/* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */
116extern int sn_topology_open(struct inode *, struct file *);
117extern int sn_topology_release(struct inode *, struct file *);
118
119void register_sn_procfs(void)
120{
121 static struct proc_dir_entry *sgi_proc_dir = NULL;
122 struct proc_dir_entry *e;
123
124 BUG_ON(sgi_proc_dir != NULL);
125 if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL)))
126 return;
127
128 sn_procfs_create_entry("partition_id", sgi_proc_dir,
129 partition_id_open, single_release);
130
131 sn_procfs_create_entry("system_serial_number", sgi_proc_dir,
132 system_serial_number_open, single_release);
133
134 sn_procfs_create_entry("licenseID", sgi_proc_dir,
135 licenseID_open, single_release);
136
137 e = sn_procfs_create_entry("sn_force_interrupt", sgi_proc_dir,
138 sn_force_interrupt_open, single_release);
139 if (e)
140 e->proc_fops->write = sn_force_interrupt_write_proc;
141
142 sn_procfs_create_entry("coherence_id", sgi_proc_dir,
143 coherence_id_open, single_release);
144
145 sn_procfs_create_entry("sn_topology", sgi_proc_dir,
146 sn_topology_open, sn_topology_release);
147}
148
149#endif /* CONFIG_PROC_FS */
diff --git a/arch/ia64/sn/kernel/sn2/timer.c b/arch/ia64/sn/kernel/sn2/timer.c
new file mode 100644
index 000000000000..deb9baf4d473
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/timer.c
@@ -0,0 +1,36 @@
1/*
2 * linux/arch/ia64/sn/kernel/sn2/timer.c
3 *
4 * Copyright (C) 2003 Silicon Graphics, Inc.
5 * Copyright (C) 2003 Hewlett-Packard Co
6 * David Mosberger <davidm@hpl.hp.com>: updated for new timer-interpolation infrastructure
7 */
8
9#include <linux/init.h>
10#include <linux/kernel.h>
11#include <linux/sched.h>
12#include <linux/time.h>
13#include <linux/interrupt.h>
14
15#include <asm/hw_irq.h>
16#include <asm/system.h>
17
18#include <asm/sn/leds.h>
19#include <asm/sn/shub_mmr.h>
20#include <asm/sn/clksupport.h>
21
22extern unsigned long sn_rtc_cycles_per_second;
23
24static struct time_interpolator sn2_interpolator = {
25 .drift = -1,
26 .shift = 10,
27 .mask = (1LL << 55) - 1,
28 .source = TIME_SOURCE_MMIO64
29};
30
31void __init sn_timer_init(void)
32{
33 sn2_interpolator.frequency = sn_rtc_cycles_per_second;
34 sn2_interpolator.addr = RTC_COUNTER_ADDR;
35 register_time_interpolator(&sn2_interpolator);
36}
diff --git a/arch/ia64/sn/kernel/sn2/timer_interrupt.c b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
new file mode 100644
index 000000000000..cde7375390b0
--- /dev/null
+++ b/arch/ia64/sn/kernel/sn2/timer_interrupt.c
@@ -0,0 +1,63 @@
1/*
2 *
3 *
4 * Copyright (c) 2003 Silicon Graphics, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of version 2 of the GNU General Public License
8 * as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it would be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 *
14 * Further, this software is distributed without any warranty that it is
15 * free of the rightful claim of any third person regarding infringement
16 * or the like. Any license provided herein, whether implied or
17 * otherwise, applies only to this software file. Patent licenses, if
18 * any, provided herein do not apply to combinations of this program with
19 * other software, or any other product whatsoever.
20 *
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write the Free Software
23 * Foundation, Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
24 *
25 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
26 * Mountain View, CA 94043, or:
27 *
28 * http://www.sgi.com
29 *
30 * For further information regarding this notice, see:
31 *
32 * http://oss.sgi.com/projects/GenInfo/NoticeExplan
33 */
34
35#include <linux/interrupt.h>
36#include <asm/sn/pda.h>
37#include <asm/sn/leds.h>
38
39extern void sn_lb_int_war_check(void);
40extern irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs);
41
42#define SN_LB_INT_WAR_INTERVAL 100
43
44void sn_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
45{
46 /* LED blinking */
47 if (!pda->hb_count--) {
48 pda->hb_count = HZ / 2;
49 set_led_bits(pda->hb_state ^=
50 LED_CPU_HEARTBEAT, LED_CPU_HEARTBEAT);
51 }
52
53 if (enable_shub_wars_1_1()) {
54 /* Bugfix code for SHUB 1.1 */
55 if (pda->pio_shub_war_cam_addr)
56 *pda->pio_shub_war_cam_addr = 0x8000000000000010UL;
57 }
58 if (pda->sn_lb_int_war_ticks == 0)
59 sn_lb_int_war_check();
60 pda->sn_lb_int_war_ticks++;
61 if (pda->sn_lb_int_war_ticks >= SN_LB_INT_WAR_INTERVAL)
62 pda->sn_lb_int_war_ticks = 0;
63}
diff --git a/arch/ia64/sn/pci/Makefile b/arch/ia64/sn/pci/Makefile
new file mode 100644
index 000000000000..b5dca0097a8e
--- /dev/null
+++ b/arch/ia64/sn/pci/Makefile
@@ -0,0 +1,10 @@
1#
2# This file is subject to the terms and conditions of the GNU General Public
3# License. See the file "COPYING" in the main directory of this archive
4# for more details.
5#
6# Copyright (C) 2000-2004 Silicon Graphics, Inc. All Rights Reserved.
7#
8# Makefile for the sn pci general routines.
9
10obj-y := pci_dma.o pcibr/
diff --git a/arch/ia64/sn/pci/pci_dma.c b/arch/ia64/sn/pci/pci_dma.c
new file mode 100644
index 000000000000..f680824f819d
--- /dev/null
+++ b/arch/ia64/sn/pci/pci_dma.c
@@ -0,0 +1,363 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2000,2002-2005 Silicon Graphics, Inc. All rights reserved.
7 *
8 * Routines for PCI DMA mapping. See Documentation/DMA-API.txt for
9 * a description of how these routines should be used.
10 */
11
12#include <linux/module.h>
13#include <asm/dma.h>
14#include <asm/sn/sn_sal.h>
15#include "pci/pcibus_provider_defs.h"
16#include "pci/pcidev.h"
17#include "pci/pcibr_provider.h"
18
19#define SG_ENT_VIRT_ADDRESS(sg) (page_address((sg)->page) + (sg)->offset)
20#define SG_ENT_PHYS_ADDRESS(SG) virt_to_phys(SG_ENT_VIRT_ADDRESS(SG))
21
22/**
23 * sn_dma_supported - test a DMA mask
24 * @dev: device to test
25 * @mask: DMA mask to test
26 *
27 * Return whether the given PCI device DMA address mask can be supported
28 * properly. For example, if your device can only drive the low 24-bits
29 * during PCI bus mastering, then you would pass 0x00ffffff as the mask to
30 * this function. Of course, SN only supports devices that have 32 or more
31 * address bits when using the PMU.
32 */
33int sn_dma_supported(struct device *dev, u64 mask)
34{
35 BUG_ON(dev->bus != &pci_bus_type);
36
37 if (mask < 0x7fffffff)
38 return 0;
39 return 1;
40}
41EXPORT_SYMBOL(sn_dma_supported);
42
43/**
44 * sn_dma_set_mask - set the DMA mask
45 * @dev: device to set
46 * @dma_mask: new mask
47 *
48 * Set @dev's DMA mask if the hw supports it.
49 */
50int sn_dma_set_mask(struct device *dev, u64 dma_mask)
51{
52 BUG_ON(dev->bus != &pci_bus_type);
53
54 if (!sn_dma_supported(dev, dma_mask))
55 return 0;
56
57 *dev->dma_mask = dma_mask;
58 return 1;
59}
60EXPORT_SYMBOL(sn_dma_set_mask);
61
62/**
63 * sn_dma_alloc_coherent - allocate memory for coherent DMA
64 * @dev: device to allocate for
65 * @size: size of the region
66 * @dma_handle: DMA (bus) address
67 * @flags: memory allocation flags
68 *
69 * dma_alloc_coherent() returns a pointer to a memory region suitable for
70 * coherent DMA traffic to/from a PCI device. On SN platforms, this means
71 * that @dma_handle will have the %PCIIO_DMA_CMD flag set.
72 *
73 * This interface is usually used for "command" streams (e.g. the command
74 * queue for a SCSI controller). See Documentation/DMA-API.txt for
75 * more information.
76 */
77void *sn_dma_alloc_coherent(struct device *dev, size_t size,
78 dma_addr_t * dma_handle, int flags)
79{
80 void *cpuaddr;
81 unsigned long phys_addr;
82 struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
83
84 BUG_ON(dev->bus != &pci_bus_type);
85
86 /*
87 * Allocate the memory.
88 * FIXME: We should be doing alloc_pages_node for the node closest
89 * to the PCI device.
90 */
91 if (!(cpuaddr = (void *)__get_free_pages(GFP_ATOMIC, get_order(size))))
92 return NULL;
93
94 memset(cpuaddr, 0x0, size);
95
96 /* physical addr. of the memory we just got */
97 phys_addr = __pa(cpuaddr);
98
99 /*
100 * 64 bit address translations should never fail.
101 * 32 bit translations can fail if there are insufficient mapping
102 * resources.
103 */
104
105 *dma_handle = pcibr_dma_map(pcidev_info, phys_addr, size,
106 SN_PCIDMA_CONSISTENT);
107 if (!*dma_handle) {
108 printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
109 free_pages((unsigned long)cpuaddr, get_order(size));
110 return NULL;
111 }
112
113 return cpuaddr;
114}
115EXPORT_SYMBOL(sn_dma_alloc_coherent);
116
117/**
118 * sn_pci_free_coherent - free memory associated with coherent DMAable region
119 * @dev: device to free for
120 * @size: size to free
121 * @cpu_addr: kernel virtual address to free
122 * @dma_handle: DMA address associated with this region
123 *
124 * Frees the memory allocated by dma_alloc_coherent(), potentially unmapping
125 * any associated IOMMU mappings.
126 */
127void sn_dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
128 dma_addr_t dma_handle)
129{
130 struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
131
132 BUG_ON(dev->bus != &pci_bus_type);
133
134 pcibr_dma_unmap(pcidev_info, dma_handle, 0);
135 free_pages((unsigned long)cpu_addr, get_order(size));
136}
137EXPORT_SYMBOL(sn_dma_free_coherent);
138
139/**
140 * sn_dma_map_single - map a single page for DMA
141 * @dev: device to map for
142 * @cpu_addr: kernel virtual address of the region to map
143 * @size: size of the region
144 * @direction: DMA direction
145 *
146 * Map the region pointed to by @cpu_addr for DMA and return the
147 * DMA address.
148 *
149 * We map this to the one step pcibr_dmamap_trans interface rather than
150 * the two step pcibr_dmamap_alloc/pcibr_dmamap_addr because we have
151 * no way of saving the dmamap handle from the alloc to later free
152 * (which is pretty much unacceptable).
153 *
154 * TODO: simplify our interface;
155 * figure out how to save dmamap handle so can use two step.
156 */
157dma_addr_t sn_dma_map_single(struct device *dev, void *cpu_addr, size_t size,
158 int direction)
159{
160 dma_addr_t dma_addr;
161 unsigned long phys_addr;
162 struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
163
164 BUG_ON(dev->bus != &pci_bus_type);
165
166 phys_addr = __pa(cpu_addr);
167 dma_addr = pcibr_dma_map(pcidev_info, phys_addr, size, 0);
168 if (!dma_addr) {
169 printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
170 return 0;
171 }
172 return dma_addr;
173}
174EXPORT_SYMBOL(sn_dma_map_single);
175
176/**
177 * sn_dma_unmap_single - unamp a DMA mapped page
178 * @dev: device to sync
179 * @dma_addr: DMA address to sync
180 * @size: size of region
181 * @direction: DMA direction
182 *
183 * This routine is supposed to sync the DMA region specified
184 * by @dma_handle into the coherence domain. On SN, we're always cache
185 * coherent, so we just need to free any ATEs associated with this mapping.
186 */
187void sn_dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
188 int direction)
189{
190 struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
191
192 BUG_ON(dev->bus != &pci_bus_type);
193 pcibr_dma_unmap(pcidev_info, dma_addr, direction);
194}
195EXPORT_SYMBOL(sn_dma_unmap_single);
196
197/**
198 * sn_dma_unmap_sg - unmap a DMA scatterlist
199 * @dev: device to unmap
200 * @sg: scatterlist to unmap
201 * @nhwentries: number of scatterlist entries
202 * @direction: DMA direction
203 *
204 * Unmap a set of streaming mode DMA translations.
205 */
206void sn_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
207 int nhwentries, int direction)
208{
209 int i;
210 struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
211
212 BUG_ON(dev->bus != &pci_bus_type);
213
214 for (i = 0; i < nhwentries; i++, sg++) {
215 pcibr_dma_unmap(pcidev_info, sg->dma_address, direction);
216 sg->dma_address = (dma_addr_t) NULL;
217 sg->dma_length = 0;
218 }
219}
220EXPORT_SYMBOL(sn_dma_unmap_sg);
221
222/**
223 * sn_dma_map_sg - map a scatterlist for DMA
224 * @dev: device to map for
225 * @sg: scatterlist to map
226 * @nhwentries: number of entries
227 * @direction: direction of the DMA transaction
228 *
229 * Maps each entry of @sg for DMA.
230 */
231int sn_dma_map_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
232 int direction)
233{
234 unsigned long phys_addr;
235 struct scatterlist *saved_sg = sg;
236 struct pcidev_info *pcidev_info = SN_PCIDEV_INFO(to_pci_dev(dev));
237 int i;
238
239 BUG_ON(dev->bus != &pci_bus_type);
240
241 /*
242 * Setup a DMA address for each entry in the scatterlist.
243 */
244 for (i = 0; i < nhwentries; i++, sg++) {
245 phys_addr = SG_ENT_PHYS_ADDRESS(sg);
246 sg->dma_address = pcibr_dma_map(pcidev_info, phys_addr,
247 sg->length, 0);
248
249 if (!sg->dma_address) {
250 printk(KERN_ERR "%s: out of ATEs\n", __FUNCTION__);
251
252 /*
253 * Free any successfully allocated entries.
254 */
255 if (i > 0)
256 sn_dma_unmap_sg(dev, saved_sg, i, direction);
257 return 0;
258 }
259
260 sg->dma_length = sg->length;
261 }
262
263 return nhwentries;
264}
265EXPORT_SYMBOL(sn_dma_map_sg);
266
267void sn_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
268 size_t size, int direction)
269{
270 BUG_ON(dev->bus != &pci_bus_type);
271}
272EXPORT_SYMBOL(sn_dma_sync_single_for_cpu);
273
274void sn_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle,
275 size_t size, int direction)
276{
277 BUG_ON(dev->bus != &pci_bus_type);
278}
279EXPORT_SYMBOL(sn_dma_sync_single_for_device);
280
281void sn_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
282 int nelems, int direction)
283{
284 BUG_ON(dev->bus != &pci_bus_type);
285}
286EXPORT_SYMBOL(sn_dma_sync_sg_for_cpu);
287
288void sn_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
289 int nelems, int direction)
290{
291 BUG_ON(dev->bus != &pci_bus_type);
292}
293EXPORT_SYMBOL(sn_dma_sync_sg_for_device);
294
295int sn_dma_mapping_error(dma_addr_t dma_addr)
296{
297 return 0;
298}
299EXPORT_SYMBOL(sn_dma_mapping_error);
300
301char *sn_pci_get_legacy_mem(struct pci_bus *bus)
302{
303 if (!SN_PCIBUS_BUSSOFT(bus))
304 return ERR_PTR(-ENODEV);
305
306 return (char *)(SN_PCIBUS_BUSSOFT(bus)->bs_legacy_mem | __IA64_UNCACHED_OFFSET);
307}
308
309int sn_pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size)
310{
311 unsigned long addr;
312 int ret;
313
314 if (!SN_PCIBUS_BUSSOFT(bus))
315 return -ENODEV;
316
317 addr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET;
318 addr += port;
319
320 ret = ia64_sn_probe_mem(addr, (long)size, (void *)val);
321
322 if (ret == 2)
323 return -EINVAL;
324
325 if (ret == 1)
326 *val = -1;
327
328 return size;
329}
330
331int sn_pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size)
332{
333 int ret = size;
334 unsigned long paddr;
335 unsigned long *addr;
336
337 if (!SN_PCIBUS_BUSSOFT(bus)) {
338 ret = -ENODEV;
339 goto out;
340 }
341
342 /* Put the phys addr in uncached space */
343 paddr = SN_PCIBUS_BUSSOFT(bus)->bs_legacy_io | __IA64_UNCACHED_OFFSET;
344 paddr += port;
345 addr = (unsigned long *)paddr;
346
347 switch (size) {
348 case 1:
349 *(volatile u8 *)(addr) = (u8)(val);
350 break;
351 case 2:
352 *(volatile u16 *)(addr) = (u16)(val);
353 break;
354 case 4:
355 *(volatile u32 *)(addr) = (u32)(val);
356 break;
357 default:
358 ret = -EINVAL;
359 break;
360 }
361 out:
362 return ret;
363}
diff --git a/arch/ia64/sn/pci/pcibr/Makefile b/arch/ia64/sn/pci/pcibr/Makefile
new file mode 100644
index 000000000000..1850c4a94c41
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/Makefile
@@ -0,0 +1,11 @@
1#
2# This file is subject to the terms and conditions of the GNU General Public
3# License. See the file "COPYING" in the main directory of this archive
4# for more details.
5#
6# Copyright (C) 2002-2004 Silicon Graphics, Inc. All Rights Reserved.
7#
8# Makefile for the sn2 io routines.
9
10obj-y += pcibr_dma.o pcibr_reg.o \
11 pcibr_ate.o pcibr_provider.o
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_ate.c b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
new file mode 100644
index 000000000000..9d6854666f9b
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_ate.c
@@ -0,0 +1,188 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/types.h>
10#include <asm/sn/sn_sal.h>
11#include "pci/pcibus_provider_defs.h"
12#include "pci/pcidev.h"
13#include "pci/pcibr_provider.h"
14
15int pcibr_invalidate_ate = 0; /* by default don't invalidate ATE on free */
16
17/*
18 * mark_ate: Mark the ate as either free or inuse.
19 */
20static void mark_ate(struct ate_resource *ate_resource, int start, int number,
21 uint64_t value)
22{
23
24 uint64_t *ate = ate_resource->ate;
25 int index;
26 int length = 0;
27
28 for (index = start; length < number; index++, length++)
29 ate[index] = value;
30
31}
32
33/*
34 * find_free_ate: Find the first free ate index starting from the given
35 * index for the desired consequtive count.
36 */
37static int find_free_ate(struct ate_resource *ate_resource, int start,
38 int count)
39{
40
41 uint64_t *ate = ate_resource->ate;
42 int index;
43 int start_free;
44
45 for (index = start; index < ate_resource->num_ate;) {
46 if (!ate[index]) {
47 int i;
48 int free;
49 free = 0;
50 start_free = index; /* Found start free ate */
51 for (i = start_free; i < ate_resource->num_ate; i++) {
52 if (!ate[i]) { /* This is free */
53 if (++free == count)
54 return start_free;
55 } else {
56 index = i + 1;
57 break;
58 }
59 }
60 } else
61 index++; /* Try next ate */
62 }
63
64 return -1;
65}
66
67/*
68 * free_ate_resource: Free the requested number of ATEs.
69 */
70static inline void free_ate_resource(struct ate_resource *ate_resource,
71 int start)
72{
73
74 mark_ate(ate_resource, start, ate_resource->ate[start], 0);
75 if ((ate_resource->lowest_free_index > start) ||
76 (ate_resource->lowest_free_index < 0))
77 ate_resource->lowest_free_index = start;
78
79}
80
81/*
82 * alloc_ate_resource: Allocate the requested number of ATEs.
83 */
84static inline int alloc_ate_resource(struct ate_resource *ate_resource,
85 int ate_needed)
86{
87
88 int start_index;
89
90 /*
91 * Check for ate exhaustion.
92 */
93 if (ate_resource->lowest_free_index < 0)
94 return -1;
95
96 /*
97 * Find the required number of free consequtive ates.
98 */
99 start_index =
100 find_free_ate(ate_resource, ate_resource->lowest_free_index,
101 ate_needed);
102 if (start_index >= 0)
103 mark_ate(ate_resource, start_index, ate_needed, ate_needed);
104
105 ate_resource->lowest_free_index =
106 find_free_ate(ate_resource, ate_resource->lowest_free_index, 1);
107
108 return start_index;
109}
110
111/*
112 * Allocate "count" contiguous Bridge Address Translation Entries
113 * on the specified bridge to be used for PCI to XTALK mappings.
114 * Indices in rm map range from 1..num_entries. Indicies returned
115 * to caller range from 0..num_entries-1.
116 *
117 * Return the start index on success, -1 on failure.
118 */
119int pcibr_ate_alloc(struct pcibus_info *pcibus_info, int count)
120{
121 int status = 0;
122 uint64_t flag;
123
124 flag = pcibr_lock(pcibus_info);
125 status = alloc_ate_resource(&pcibus_info->pbi_int_ate_resource, count);
126
127 if (status < 0) {
128 /* Failed to allocate */
129 pcibr_unlock(pcibus_info, flag);
130 return -1;
131 }
132
133 pcibr_unlock(pcibus_info, flag);
134
135 return status;
136}
137
138/*
139 * Setup an Address Translation Entry as specified. Use either the Bridge
140 * internal maps or the external map RAM, as appropriate.
141 */
142static inline uint64_t *pcibr_ate_addr(struct pcibus_info *pcibus_info,
143 int ate_index)
144{
145 if (ate_index < pcibus_info->pbi_int_ate_size) {
146 return pcireg_int_ate_addr(pcibus_info, ate_index);
147 }
148 panic("pcibr_ate_addr: invalid ate_index 0x%x", ate_index);
149}
150
151/*
152 * Update the ate.
153 */
154void inline
155ate_write(struct pcibus_info *pcibus_info, int ate_index, int count,
156 volatile uint64_t ate)
157{
158 while (count-- > 0) {
159 if (ate_index < pcibus_info->pbi_int_ate_size) {
160 pcireg_int_ate_set(pcibus_info, ate_index, ate);
161 } else {
162 panic("ate_write: invalid ate_index 0x%x", ate_index);
163 }
164 ate_index++;
165 ate += IOPGSIZE;
166 }
167
168 pcireg_tflush_get(pcibus_info); /* wait until Bridge PIO complete */
169}
170
171void pcibr_ate_free(struct pcibus_info *pcibus_info, int index)
172{
173
174 volatile uint64_t ate;
175 int count;
176 uint64_t flags;
177
178 if (pcibr_invalidate_ate) {
179 /* For debugging purposes, clear the valid bit in the ATE */
180 ate = *pcibr_ate_addr(pcibus_info, index);
181 count = pcibus_info->pbi_int_ate_resource.ate[index];
182 ate_write(pcibus_info, index, count, (ate & ~PCI32_ATE_V));
183 }
184
185 flags = pcibr_lock(pcibus_info);
186 free_ate_resource(&pcibus_info->pbi_int_ate_resource, index);
187 pcibr_unlock(pcibus_info, flags);
188}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_dma.c b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
new file mode 100644
index 000000000000..b1d66ac065c8
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_dma.c
@@ -0,0 +1,379 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/types.h>
10#include <linux/pci.h>
11#include <asm/sn/sn_sal.h>
12#include <asm/sn/geo.h>
13#include "xtalk/xwidgetdev.h"
14#include "xtalk/hubdev.h"
15#include "pci/pcibus_provider_defs.h"
16#include "pci/pcidev.h"
17#include "pci/tiocp.h"
18#include "pci/pic.h"
19#include "pci/pcibr_provider.h"
20#include "pci/tiocp.h"
21#include "tio.h"
22#include <asm/sn/addrs.h>
23
24extern int sn_ioif_inited;
25
26/* =====================================================================
27 * DMA MANAGEMENT
28 *
29 * The Bridge ASIC provides three methods of doing DMA: via a "direct map"
30 * register available in 32-bit PCI space (which selects a contiguous 2G
31 * address space on some other widget), via "direct" addressing via 64-bit
32 * PCI space (all destination information comes from the PCI address,
33 * including transfer attributes), and via a "mapped" region that allows
34 * a bunch of different small mappings to be established with the PMU.
35 *
36 * For efficiency, we most prefer to use the 32bit direct mapping facility,
37 * since it requires no resource allocations. The advantage of using the
38 * PMU over the 64-bit direct is that single-cycle PCI addressing can be
39 * used; the advantage of using 64-bit direct over PMU addressing is that
40 * we do not have to allocate entries in the PMU.
41 */
42
43static uint64_t
44pcibr_dmamap_ate32(struct pcidev_info *info,
45 uint64_t paddr, size_t req_size, uint64_t flags)
46{
47
48 struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
49 struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
50 pdi_pcibus_info;
51 uint8_t internal_device = (PCI_SLOT(pcidev_info->pdi_host_pcidev_info->
52 pdi_linux_pcidev->devfn)) - 1;
53 int ate_count;
54 int ate_index;
55 uint64_t ate_flags = flags | PCI32_ATE_V;
56 uint64_t ate;
57 uint64_t pci_addr;
58 uint64_t xio_addr;
59 uint64_t offset;
60
61 /* PIC in PCI-X mode does not supports 32bit PageMap mode */
62 if (IS_PIC_SOFT(pcibus_info) && IS_PCIX(pcibus_info)) {
63 return 0;
64 }
65
66 /* Calculate the number of ATEs needed. */
67 if (!(MINIMAL_ATE_FLAG(paddr, req_size))) {
68 ate_count = IOPG((IOPGSIZE - 1) /* worst case start offset */
69 +req_size /* max mapping bytes */
70 - 1) + 1; /* round UP */
71 } else { /* assume requested target is page aligned */
72 ate_count = IOPG(req_size /* max mapping bytes */
73 - 1) + 1; /* round UP */
74 }
75
76 /* Get the number of ATEs required. */
77 ate_index = pcibr_ate_alloc(pcibus_info, ate_count);
78 if (ate_index < 0)
79 return 0;
80
81 /* In PCI-X mode, Prefetch not supported */
82 if (IS_PCIX(pcibus_info))
83 ate_flags &= ~(PCI32_ATE_PREF);
84
85 xio_addr =
86 IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
87 PHYS_TO_TIODMA(paddr);
88 offset = IOPGOFF(xio_addr);
89 ate = ate_flags | (xio_addr - offset);
90
91 /* If PIC, put the targetid in the ATE */
92 if (IS_PIC_SOFT(pcibus_info)) {
93 ate |= (pcibus_info->pbi_hub_xid << PIC_ATE_TARGETID_SHFT);
94 }
95 ate_write(pcibus_info, ate_index, ate_count, ate);
96
97 /*
98 * Set up the DMA mapped Address.
99 */
100 pci_addr = PCI32_MAPPED_BASE + offset + IOPGSIZE * ate_index;
101
102 /*
103 * If swap was set in device in pcibr_endian_set()
104 * we need to turn swapping on.
105 */
106 if (pcibus_info->pbi_devreg[internal_device] & PCIBR_DEV_SWAP_DIR)
107 ATE_SWAP_ON(pci_addr);
108
109 return pci_addr;
110}
111
112static uint64_t
113pcibr_dmatrans_direct64(struct pcidev_info * info, uint64_t paddr,
114 uint64_t dma_attributes)
115{
116 struct pcibus_info *pcibus_info = (struct pcibus_info *)
117 ((info->pdi_host_pcidev_info)->pdi_pcibus_info);
118 uint64_t pci_addr;
119
120 /* Translate to Crosstalk View of Physical Address */
121 pci_addr = (IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
122 PHYS_TO_TIODMA(paddr)) | dma_attributes;
123
124 /* Handle Bus mode */
125 if (IS_PCIX(pcibus_info))
126 pci_addr &= ~PCI64_ATTR_PREF;
127
128 /* Handle Bridge Chipset differences */
129 if (IS_PIC_SOFT(pcibus_info)) {
130 pci_addr |=
131 ((uint64_t) pcibus_info->
132 pbi_hub_xid << PIC_PCI64_ATTR_TARG_SHFT);
133 } else
134 pci_addr |= TIOCP_PCI64_CMDTYPE_MEM;
135
136 /* If PCI mode, func zero uses VCHAN0, every other func uses VCHAN1 */
137 if (!IS_PCIX(pcibus_info) && PCI_FUNC(info->pdi_linux_pcidev->devfn))
138 pci_addr |= PCI64_ATTR_VIRTUAL;
139
140 return pci_addr;
141
142}
143
144static uint64_t
145pcibr_dmatrans_direct32(struct pcidev_info * info,
146 uint64_t paddr, size_t req_size, uint64_t flags)
147{
148
149 struct pcidev_info *pcidev_info = info->pdi_host_pcidev_info;
150 struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
151 pdi_pcibus_info;
152 uint64_t xio_addr;
153
154 uint64_t xio_base;
155 uint64_t offset;
156 uint64_t endoff;
157
158 if (IS_PCIX(pcibus_info)) {
159 return 0;
160 }
161
162 xio_addr = IS_PIC_SOFT(pcibus_info) ? PHYS_TO_DMA(paddr) :
163 PHYS_TO_TIODMA(paddr);
164
165 xio_base = pcibus_info->pbi_dir_xbase;
166 offset = xio_addr - xio_base;
167 endoff = req_size + offset;
168 if ((req_size > (1ULL << 31)) || /* Too Big */
169 (xio_addr < xio_base) || /* Out of range for mappings */
170 (endoff > (1ULL << 31))) { /* Too Big */
171 return 0;
172 }
173
174 return PCI32_DIRECT_BASE | offset;
175
176}
177
178/*
179 * Wrapper routine for free'ing DMA maps
180 * DMA mappings for Direct 64 and 32 do not have any DMA maps.
181 */
182void
183pcibr_dma_unmap(struct pcidev_info *pcidev_info, dma_addr_t dma_handle,
184 int direction)
185{
186 struct pcibus_info *pcibus_info = (struct pcibus_info *)pcidev_info->
187 pdi_pcibus_info;
188
189 if (IS_PCI32_MAPPED(dma_handle)) {
190 int ate_index;
191
192 ate_index =
193 IOPG((ATE_SWAP_OFF(dma_handle) - PCI32_MAPPED_BASE));
194 pcibr_ate_free(pcibus_info, ate_index);
195 }
196}
197
198/*
199 * On SN systems there is a race condition between a PIO read response and
200 * DMA's. In rare cases, the read response may beat the DMA, causing the
201 * driver to think that data in memory is complete and meaningful. This code
202 * eliminates that race. This routine is called by the PIO read routines
203 * after doing the read. For PIC this routine then forces a fake interrupt
204 * on another line, which is logically associated with the slot that the PIO
205 * is addressed to. It then spins while watching the memory location that
206 * the interrupt is targetted to. When the interrupt response arrives, we
207 * are sure that the DMA has landed in memory and it is safe for the driver
208 * to proceed. For TIOCP use the Device(x) Write Request Buffer Flush
209 * Bridge register since it ensures the data has entered the coherence domain,
210 * unlike the PIC Device(x) Write Request Buffer Flush register.
211 */
212
213void sn_dma_flush(uint64_t addr)
214{
215 nasid_t nasid;
216 int is_tio;
217 int wid_num;
218 int i, j;
219 int bwin;
220 uint64_t flags;
221 struct hubdev_info *hubinfo;
222 volatile struct sn_flush_device_list *p;
223 struct sn_flush_nasid_entry *flush_nasid_list;
224
225 if (!sn_ioif_inited)
226 return;
227
228 nasid = NASID_GET(addr);
229 if (-1 == nasid_to_cnodeid(nasid))
230 return;
231
232 hubinfo = (NODEPDA(nasid_to_cnodeid(nasid)))->pdinfo;
233
234 if (!hubinfo) {
235 BUG();
236 }
237 is_tio = (nasid & 1);
238 if (is_tio) {
239 wid_num = TIO_SWIN_WIDGETNUM(addr);
240 bwin = TIO_BWIN_WINDOWNUM(addr);
241 } else {
242 wid_num = SWIN_WIDGETNUM(addr);
243 bwin = BWIN_WINDOWNUM(addr);
244 }
245
246 flush_nasid_list = &hubinfo->hdi_flush_nasid_list;
247 if (flush_nasid_list->widget_p == NULL)
248 return;
249 if (bwin > 0) {
250 uint64_t itte = flush_nasid_list->iio_itte[bwin];
251
252 if (is_tio) {
253 wid_num = (itte >> TIO_ITTE_WIDGET_SHIFT) &
254 TIO_ITTE_WIDGET_MASK;
255 } else {
256 wid_num = (itte >> IIO_ITTE_WIDGET_SHIFT) &
257 IIO_ITTE_WIDGET_MASK;
258 }
259 }
260 if (flush_nasid_list->widget_p == NULL)
261 return;
262 if (flush_nasid_list->widget_p[wid_num] == NULL)
263 return;
264 p = &flush_nasid_list->widget_p[wid_num][0];
265
266 /* find a matching BAR */
267 for (i = 0; i < DEV_PER_WIDGET; i++) {
268 for (j = 0; j < PCI_ROM_RESOURCE; j++) {
269 if (p->sfdl_bar_list[j].start == 0)
270 break;
271 if (addr >= p->sfdl_bar_list[j].start
272 && addr <= p->sfdl_bar_list[j].end)
273 break;
274 }
275 if (j < PCI_ROM_RESOURCE && p->sfdl_bar_list[j].start != 0)
276 break;
277 p++;
278 }
279
280 /* if no matching BAR, return without doing anything. */
281 if (i == DEV_PER_WIDGET)
282 return;
283
284 /*
285 * For TIOCP use the Device(x) Write Request Buffer Flush Bridge
286 * register since it ensures the data has entered the coherence
287 * domain, unlike PIC
288 */
289 if (is_tio) {
290 uint32_t tio_id = REMOTE_HUB_L(nasid, TIO_NODE_ID);
291 uint32_t revnum = XWIDGET_PART_REV_NUM(tio_id);
292
293 /* TIOCP BRINGUP WAR (PV907516): Don't write buffer flush reg */
294 if ((1 << XWIDGET_PART_REV_NUM_REV(revnum)) & PV907516) {
295 return;
296 } else {
297 pcireg_wrb_flush_get(p->sfdl_pcibus_info,
298 (p->sfdl_slot - 1));
299 }
300 } else {
301 spin_lock_irqsave(&((struct sn_flush_device_list *)p)->
302 sfdl_flush_lock, flags);
303
304 p->sfdl_flush_value = 0;
305
306 /* force an interrupt. */
307 *(volatile uint32_t *)(p->sfdl_force_int_addr) = 1;
308
309 /* wait for the interrupt to come back. */
310 while (*(p->sfdl_flush_addr) != 0x10f) ;
311
312 /* okay, everything is synched up. */
313 spin_unlock_irqrestore((spinlock_t *)&p->sfdl_flush_lock, flags);
314 }
315 return;
316}
317
318/*
319 * Wrapper DMA interface. Called from pci_dma.c routines.
320 */
321
322uint64_t
323pcibr_dma_map(struct pcidev_info * pcidev_info, unsigned long phys_addr,
324 size_t size, unsigned int flags)
325{
326 dma_addr_t dma_handle;
327 struct pci_dev *pcidev = pcidev_info->pdi_linux_pcidev;
328
329 if (flags & SN_PCIDMA_CONSISTENT) {
330 /* sn_pci_alloc_consistent interfaces */
331 if (pcidev->dev.coherent_dma_mask == ~0UL) {
332 dma_handle =
333 pcibr_dmatrans_direct64(pcidev_info, phys_addr,
334 PCI64_ATTR_BAR);
335 } else {
336 dma_handle =
337 (dma_addr_t) pcibr_dmamap_ate32(pcidev_info,
338 phys_addr, size,
339 PCI32_ATE_BAR);
340 }
341 } else {
342 /* map_sg/map_single interfaces */
343
344 /* SN cannot support DMA addresses smaller than 32 bits. */
345 if (pcidev->dma_mask < 0x7fffffff) {
346 return 0;
347 }
348
349 if (pcidev->dma_mask == ~0UL) {
350 /*
351 * Handle the most common case: 64 bit cards. This
352 * call should always succeed.
353 */
354
355 dma_handle =
356 pcibr_dmatrans_direct64(pcidev_info, phys_addr,
357 PCI64_ATTR_PREF);
358 } else {
359 /* Handle 32-63 bit cards via direct mapping */
360 dma_handle =
361 pcibr_dmatrans_direct32(pcidev_info, phys_addr,
362 size, 0);
363 if (!dma_handle) {
364 /*
365 * It is a 32 bit card and we cannot do direct mapping,
366 * so we use an ATE.
367 */
368
369 dma_handle =
370 pcibr_dmamap_ate32(pcidev_info, phys_addr,
371 size, PCI32_ATE_PREF);
372 }
373 }
374 }
375
376 return dma_handle;
377}
378
379EXPORT_SYMBOL(sn_dma_flush);
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_provider.c b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
new file mode 100644
index 000000000000..92bd278cf7ff
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_provider.c
@@ -0,0 +1,170 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2001-2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/types.h>
10#include <linux/interrupt.h>
11#include <linux/pci.h>
12#include <asm/sn/sn_sal.h>
13#include "xtalk/xwidgetdev.h"
14#include <asm/sn/geo.h>
15#include "xtalk/hubdev.h"
16#include "pci/pcibus_provider_defs.h"
17#include "pci/pcidev.h"
18#include "pci/pcibr_provider.h"
19#include <asm/sn/addrs.h>
20
21
22static int sal_pcibr_error_interrupt(struct pcibus_info *soft)
23{
24 struct ia64_sal_retval ret_stuff;
25 uint64_t busnum;
26 int segment;
27 ret_stuff.status = 0;
28 ret_stuff.v0 = 0;
29
30 segment = 0;
31 busnum = soft->pbi_buscommon.bs_persist_busnum;
32 SAL_CALL_NOLOCK(ret_stuff,
33 (u64) SN_SAL_IOIF_ERROR_INTERRUPT,
34 (u64) segment, (u64) busnum, 0, 0, 0, 0, 0);
35
36 return (int)ret_stuff.v0;
37}
38
39/*
40 * PCI Bridge Error interrupt handler. Gets invoked whenever a PCI
41 * bridge sends an error interrupt.
42 */
43static irqreturn_t
44pcibr_error_intr_handler(int irq, void *arg, struct pt_regs *regs)
45{
46 struct pcibus_info *soft = (struct pcibus_info *)arg;
47
48 if (sal_pcibr_error_interrupt(soft) < 0) {
49 panic("pcibr_error_intr_handler(): Fatal Bridge Error");
50 }
51 return IRQ_HANDLED;
52}
53
54void *
55pcibr_bus_fixup(struct pcibus_bussoft *prom_bussoft)
56{
57 int nasid, cnode, j;
58 struct hubdev_info *hubdev_info;
59 struct pcibus_info *soft;
60 struct sn_flush_device_list *sn_flush_device_list;
61
62 if (! IS_PCI_BRIDGE_ASIC(prom_bussoft->bs_asic_type)) {
63 return NULL;
64 }
65
66 /*
67 * Allocate kernel bus soft and copy from prom.
68 */
69
70 soft = kmalloc(sizeof(struct pcibus_info), GFP_KERNEL);
71 if (!soft) {
72 return NULL;
73 }
74
75 memcpy(soft, prom_bussoft, sizeof(struct pcibus_info));
76 soft->pbi_buscommon.bs_base =
77 (((u64) soft->pbi_buscommon.
78 bs_base << 4) >> 4) | __IA64_UNCACHED_OFFSET;
79
80 spin_lock_init(&soft->pbi_lock);
81
82 /*
83 * register the bridge's error interrupt handler
84 */
85 if (request_irq(SGI_PCIBR_ERROR, (void *)pcibr_error_intr_handler,
86 SA_SHIRQ, "PCIBR error", (void *)(soft))) {
87 printk(KERN_WARNING
88 "pcibr cannot allocate interrupt for error handler\n");
89 }
90
91 /*
92 * Update the Bridge with the "kernel" pagesize
93 */
94 if (PAGE_SIZE < 16384) {
95 pcireg_control_bit_clr(soft, PCIBR_CTRL_PAGE_SIZE);
96 } else {
97 pcireg_control_bit_set(soft, PCIBR_CTRL_PAGE_SIZE);
98 }
99
100 nasid = NASID_GET(soft->pbi_buscommon.bs_base);
101 cnode = nasid_to_cnodeid(nasid);
102 hubdev_info = (struct hubdev_info *)(NODEPDA(cnode)->pdinfo);
103
104 if (hubdev_info->hdi_flush_nasid_list.widget_p) {
105 sn_flush_device_list = hubdev_info->hdi_flush_nasid_list.
106 widget_p[(int)soft->pbi_buscommon.bs_xid];
107 if (sn_flush_device_list) {
108 for (j = 0; j < DEV_PER_WIDGET;
109 j++, sn_flush_device_list++) {
110 if (sn_flush_device_list->sfdl_slot == -1)
111 continue;
112 if (sn_flush_device_list->
113 sfdl_persistent_busnum ==
114 soft->pbi_buscommon.bs_persist_busnum)
115 sn_flush_device_list->sfdl_pcibus_info =
116 soft;
117 }
118 }
119 }
120
121 /* Setup the PMU ATE map */
122 soft->pbi_int_ate_resource.lowest_free_index = 0;
123 soft->pbi_int_ate_resource.ate =
124 kmalloc(soft->pbi_int_ate_size * sizeof(uint64_t), GFP_KERNEL);
125 memset(soft->pbi_int_ate_resource.ate, 0,
126 (soft->pbi_int_ate_size * sizeof(uint64_t)));
127
128 return soft;
129}
130
131void pcibr_force_interrupt(struct sn_irq_info *sn_irq_info)
132{
133 struct pcidev_info *pcidev_info;
134 struct pcibus_info *pcibus_info;
135 int bit = sn_irq_info->irq_int_bit;
136
137 pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
138 if (pcidev_info) {
139 pcibus_info =
140 (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
141 pdi_pcibus_info;
142 pcireg_force_intr_set(pcibus_info, bit);
143 }
144}
145
146void pcibr_change_devices_irq(struct sn_irq_info *sn_irq_info)
147{
148 struct pcidev_info *pcidev_info;
149 struct pcibus_info *pcibus_info;
150 int bit = sn_irq_info->irq_int_bit;
151 uint64_t xtalk_addr = sn_irq_info->irq_xtalkaddr;
152
153 pcidev_info = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
154 if (pcidev_info) {
155 pcibus_info =
156 (struct pcibus_info *)pcidev_info->pdi_host_pcidev_info->
157 pdi_pcibus_info;
158
159 /* Disable the device's IRQ */
160 pcireg_intr_enable_bit_clr(pcibus_info, bit);
161
162 /* Change the device's IRQ */
163 pcireg_intr_addr_addr_set(pcibus_info, bit, xtalk_addr);
164
165 /* Re-enable the device's IRQ */
166 pcireg_intr_enable_bit_set(pcibus_info, bit);
167
168 pcibr_force_interrupt(sn_irq_info);
169 }
170}
diff --git a/arch/ia64/sn/pci/pcibr/pcibr_reg.c b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
new file mode 100644
index 000000000000..74a74a7d2a13
--- /dev/null
+++ b/arch/ia64/sn/pci/pcibr/pcibr_reg.c
@@ -0,0 +1,282 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2004 Silicon Graphics, Inc. All rights reserved.
7 */
8
9#include <linux/types.h>
10#include <linux/interrupt.h>
11#include "pci/pcibus_provider_defs.h"
12#include "pci/pcidev.h"
13#include "pci/tiocp.h"
14#include "pci/pic.h"
15#include "pci/pcibr_provider.h"
16
17union br_ptr {
18 struct tiocp tio;
19 struct pic pic;
20};
21
22/*
23 * Control Register Access -- Read/Write 0000_0020
24 */
25void pcireg_control_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits)
26{
27 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
28
29 if (pcibus_info) {
30 switch (pcibus_info->pbi_bridge_type) {
31 case PCIBR_BRIDGETYPE_TIOCP:
32 ptr->tio.cp_control &= ~bits;
33 break;
34 case PCIBR_BRIDGETYPE_PIC:
35 ptr->pic.p_wid_control &= ~bits;
36 break;
37 default:
38 panic
39 ("pcireg_control_bit_clr: unknown bridgetype bridge 0x%p",
40 (void *)ptr);
41 }
42 }
43}
44
45void pcireg_control_bit_set(struct pcibus_info *pcibus_info, uint64_t bits)
46{
47 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
48
49 if (pcibus_info) {
50 switch (pcibus_info->pbi_bridge_type) {
51 case PCIBR_BRIDGETYPE_TIOCP:
52 ptr->tio.cp_control |= bits;
53 break;
54 case PCIBR_BRIDGETYPE_PIC:
55 ptr->pic.p_wid_control |= bits;
56 break;
57 default:
58 panic
59 ("pcireg_control_bit_set: unknown bridgetype bridge 0x%p",
60 (void *)ptr);
61 }
62 }
63}
64
65/*
66 * PCI/PCIX Target Flush Register Access -- Read Only 0000_0050
67 */
68uint64_t pcireg_tflush_get(struct pcibus_info *pcibus_info)
69{
70 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
71 uint64_t ret = 0;
72
73 if (pcibus_info) {
74 switch (pcibus_info->pbi_bridge_type) {
75 case PCIBR_BRIDGETYPE_TIOCP:
76 ret = ptr->tio.cp_tflush;
77 break;
78 case PCIBR_BRIDGETYPE_PIC:
79 ret = ptr->pic.p_wid_tflush;
80 break;
81 default:
82 panic
83 ("pcireg_tflush_get: unknown bridgetype bridge 0x%p",
84 (void *)ptr);
85 }
86 }
87
88 /* Read of the Target Flush should always return zero */
89 if (ret != 0)
90 panic("pcireg_tflush_get:Target Flush failed\n");
91
92 return ret;
93}
94
95/*
96 * Interrupt Status Register Access -- Read Only 0000_0100
97 */
98uint64_t pcireg_intr_status_get(struct pcibus_info * pcibus_info)
99{
100 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
101 uint64_t ret = 0;
102
103 if (pcibus_info) {
104 switch (pcibus_info->pbi_bridge_type) {
105 case PCIBR_BRIDGETYPE_TIOCP:
106 ret = ptr->tio.cp_int_status;
107 break;
108 case PCIBR_BRIDGETYPE_PIC:
109 ret = ptr->pic.p_int_status;
110 break;
111 default:
112 panic
113 ("pcireg_intr_status_get: unknown bridgetype bridge 0x%p",
114 (void *)ptr);
115 }
116 }
117 return ret;
118}
119
120/*
121 * Interrupt Enable Register Access -- Read/Write 0000_0108
122 */
123void pcireg_intr_enable_bit_clr(struct pcibus_info *pcibus_info, uint64_t bits)
124{
125 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
126
127 if (pcibus_info) {
128 switch (pcibus_info->pbi_bridge_type) {
129 case PCIBR_BRIDGETYPE_TIOCP:
130 ptr->tio.cp_int_enable &= ~bits;
131 break;
132 case PCIBR_BRIDGETYPE_PIC:
133 ptr->pic.p_int_enable &= ~bits;
134 break;
135 default:
136 panic
137 ("pcireg_intr_enable_bit_clr: unknown bridgetype bridge 0x%p",
138 (void *)ptr);
139 }
140 }
141}
142
143void pcireg_intr_enable_bit_set(struct pcibus_info *pcibus_info, uint64_t bits)
144{
145 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
146
147 if (pcibus_info) {
148 switch (pcibus_info->pbi_bridge_type) {
149 case PCIBR_BRIDGETYPE_TIOCP:
150 ptr->tio.cp_int_enable |= bits;
151 break;
152 case PCIBR_BRIDGETYPE_PIC:
153 ptr->pic.p_int_enable |= bits;
154 break;
155 default:
156 panic
157 ("pcireg_intr_enable_bit_set: unknown bridgetype bridge 0x%p",
158 (void *)ptr);
159 }
160 }
161}
162
163/*
164 * Intr Host Address Register (int_addr) -- Read/Write 0000_0130 - 0000_0168
165 */
166void pcireg_intr_addr_addr_set(struct pcibus_info *pcibus_info, int int_n,
167 uint64_t addr)
168{
169 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
170
171 if (pcibus_info) {
172 switch (pcibus_info->pbi_bridge_type) {
173 case PCIBR_BRIDGETYPE_TIOCP:
174 ptr->tio.cp_int_addr[int_n] &= ~TIOCP_HOST_INTR_ADDR;
175 ptr->tio.cp_int_addr[int_n] |=
176 (addr & TIOCP_HOST_INTR_ADDR);
177 break;
178 case PCIBR_BRIDGETYPE_PIC:
179 ptr->pic.p_int_addr[int_n] &= ~PIC_HOST_INTR_ADDR;
180 ptr->pic.p_int_addr[int_n] |=
181 (addr & PIC_HOST_INTR_ADDR);
182 break;
183 default:
184 panic
185 ("pcireg_intr_addr_addr_get: unknown bridgetype bridge 0x%p",
186 (void *)ptr);
187 }
188 }
189}
190
191/*
192 * Force Interrupt Register Access -- Write Only 0000_01C0 - 0000_01F8
193 */
194void pcireg_force_intr_set(struct pcibus_info *pcibus_info, int int_n)
195{
196 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
197
198 if (pcibus_info) {
199 switch (pcibus_info->pbi_bridge_type) {
200 case PCIBR_BRIDGETYPE_TIOCP:
201 ptr->tio.cp_force_pin[int_n] = 1;
202 break;
203 case PCIBR_BRIDGETYPE_PIC:
204 ptr->pic.p_force_pin[int_n] = 1;
205 break;
206 default:
207 panic
208 ("pcireg_force_intr_set: unknown bridgetype bridge 0x%p",
209 (void *)ptr);
210 }
211 }
212}
213
214/*
215 * Device(x) Write Buffer Flush Reg Access -- Read Only 0000_0240 - 0000_0258
216 */
217uint64_t pcireg_wrb_flush_get(struct pcibus_info *pcibus_info, int device)
218{
219 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
220 uint64_t ret = 0;
221
222 if (pcibus_info) {
223 switch (pcibus_info->pbi_bridge_type) {
224 case PCIBR_BRIDGETYPE_TIOCP:
225 ret = ptr->tio.cp_wr_req_buf[device];
226 break;
227 case PCIBR_BRIDGETYPE_PIC:
228 ret = ptr->pic.p_wr_req_buf[device];
229 break;
230 default:
231 panic("pcireg_wrb_flush_get: unknown bridgetype bridge 0x%p", (void *)ptr);
232 }
233
234 }
235 /* Read of the Write Buffer Flush should always return zero */
236 return ret;
237}
238
239void pcireg_int_ate_set(struct pcibus_info *pcibus_info, int ate_index,
240 uint64_t val)
241{
242 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
243
244 if (pcibus_info) {
245 switch (pcibus_info->pbi_bridge_type) {
246 case PCIBR_BRIDGETYPE_TIOCP:
247 ptr->tio.cp_int_ate_ram[ate_index] = (uint64_t) val;
248 break;
249 case PCIBR_BRIDGETYPE_PIC:
250 ptr->pic.p_int_ate_ram[ate_index] = (uint64_t) val;
251 break;
252 default:
253 panic
254 ("pcireg_int_ate_set: unknown bridgetype bridge 0x%p",
255 (void *)ptr);
256 }
257 }
258}
259
260uint64_t *pcireg_int_ate_addr(struct pcibus_info *pcibus_info, int ate_index)
261{
262 union br_ptr *ptr = (union br_ptr *)pcibus_info->pbi_buscommon.bs_base;
263 uint64_t *ret = (uint64_t *) 0;
264
265 if (pcibus_info) {
266 switch (pcibus_info->pbi_bridge_type) {
267 case PCIBR_BRIDGETYPE_TIOCP:
268 ret =
269 (uint64_t *) & (ptr->tio.cp_int_ate_ram[ate_index]);
270 break;
271 case PCIBR_BRIDGETYPE_PIC:
272 ret =
273 (uint64_t *) & (ptr->pic.p_int_ate_ram[ate_index]);
274 break;
275 default:
276 panic
277 ("pcireg_int_ate_addr: unknown bridgetype bridge 0x%p",
278 (void *)ptr);
279 }
280 }
281 return ret;
282}