diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 18:20:36 -0400 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/ia64/kernel |
Linux-2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/ia64/kernel')
58 files changed, 35362 insertions, 0 deletions
diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile new file mode 100644 index 000000000000..c1a02bbc252c --- /dev/null +++ b/arch/ia64/kernel/Makefile | |||
@@ -0,0 +1,52 @@ | |||
1 | # | ||
2 | # Makefile for the linux kernel. | ||
3 | # | ||
4 | |||
5 | extra-y := head.o init_task.o vmlinux.lds | ||
6 | |||
7 | obj-y := acpi.o entry.o efi.o efi_stub.o gate-data.o fsys.o ia64_ksyms.o irq.o irq_ia64.o \ | ||
8 | irq_lsapic.o ivt.o machvec.o pal.o patch.o process.o perfmon.o ptrace.o sal.o \ | ||
9 | salinfo.o semaphore.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ | ||
10 | unwind.o mca.o mca_asm.o topology.o | ||
11 | |||
12 | obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o | ||
13 | obj-$(CONFIG_IA64_GENERIC) += acpi-ext.o | ||
14 | obj-$(CONFIG_IA64_HP_ZX1) += acpi-ext.o | ||
15 | obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o | ||
16 | obj-$(CONFIG_IA64_PALINFO) += palinfo.o | ||
17 | obj-$(CONFIG_IOSAPIC) += iosapic.o | ||
18 | obj-$(CONFIG_MODULES) += module.o | ||
19 | obj-$(CONFIG_SMP) += smp.o smpboot.o domain.o | ||
20 | obj-$(CONFIG_PERFMON) += perfmon_default_smpl.o | ||
21 | obj-$(CONFIG_IA64_CYCLONE) += cyclone.o | ||
22 | obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o | ||
23 | mca_recovery-y += mca_drv.o mca_drv_asm.o | ||
24 | |||
25 | # The gate DSO image is built using a special linker script. | ||
26 | targets += gate.so gate-syms.o | ||
27 | |||
28 | extra-y += gate.so gate-syms.o gate.lds gate.o | ||
29 | |||
30 | # fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. | ||
31 | CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 | ||
32 | |||
33 | CPPFLAGS_gate.lds := -P -C -U$(ARCH) | ||
34 | |||
35 | quiet_cmd_gate = GATE $@ | ||
36 | cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ | ||
37 | |||
38 | GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 | ||
39 | $(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE | ||
40 | $(call if_changed,gate) | ||
41 | |||
42 | $(obj)/built-in.o: $(obj)/gate-syms.o | ||
43 | $(obj)/built-in.o: ld_flags += -R $(obj)/gate-syms.o | ||
44 | |||
45 | GATECFLAGS_gate-syms.o = -r | ||
46 | $(obj)/gate-syms.o: $(obj)/gate.lds $(obj)/gate.o FORCE | ||
47 | $(call if_changed,gate) | ||
48 | |||
49 | # gate-data.o contains the gate DSO image as data in section .data.gate. | ||
50 | # We must build gate.so before we can assemble it. | ||
51 | # Note: kbuild does not track this dependency due to usage of .incbin | ||
52 | $(obj)/gate-data.o: $(obj)/gate.so | ||
diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c new file mode 100644 index 000000000000..2623df5e2633 --- /dev/null +++ b/arch/ia64/kernel/acpi-ext.c | |||
@@ -0,0 +1,100 @@ | |||
1 | /* | ||
2 | * arch/ia64/kernel/acpi-ext.c | ||
3 | * | ||
4 | * Copyright (C) 2003 Hewlett-Packard | ||
5 | * Copyright (C) Alex Williamson | ||
6 | * Copyright (C) Bjorn Helgaas | ||
7 | * | ||
8 | * Vendor specific extensions to ACPI. | ||
9 | */ | ||
10 | |||
11 | #include <linux/config.h> | ||
12 | #include <linux/module.h> | ||
13 | #include <linux/types.h> | ||
14 | #include <linux/acpi.h> | ||
15 | #include <linux/efi.h> | ||
16 | |||
17 | #include <asm/acpi-ext.h> | ||
18 | |||
19 | struct acpi_vendor_descriptor { | ||
20 | u8 guid_id; | ||
21 | efi_guid_t guid; | ||
22 | }; | ||
23 | |||
24 | struct acpi_vendor_info { | ||
25 | struct acpi_vendor_descriptor *descriptor; | ||
26 | u8 *data; | ||
27 | u32 length; | ||
28 | }; | ||
29 | |||
30 | acpi_status | ||
31 | acpi_vendor_resource_match(struct acpi_resource *resource, void *context) | ||
32 | { | ||
33 | struct acpi_vendor_info *info = (struct acpi_vendor_info *) context; | ||
34 | struct acpi_resource_vendor *vendor; | ||
35 | struct acpi_vendor_descriptor *descriptor; | ||
36 | u32 length; | ||
37 | |||
38 | if (resource->id != ACPI_RSTYPE_VENDOR) | ||
39 | return AE_OK; | ||
40 | |||
41 | vendor = (struct acpi_resource_vendor *) &resource->data; | ||
42 | descriptor = (struct acpi_vendor_descriptor *) vendor->reserved; | ||
43 | if (vendor->length <= sizeof(*info->descriptor) || | ||
44 | descriptor->guid_id != info->descriptor->guid_id || | ||
45 | efi_guidcmp(descriptor->guid, info->descriptor->guid)) | ||
46 | return AE_OK; | ||
47 | |||
48 | length = vendor->length - sizeof(struct acpi_vendor_descriptor); | ||
49 | info->data = acpi_os_allocate(length); | ||
50 | if (!info->data) | ||
51 | return AE_NO_MEMORY; | ||
52 | |||
53 | memcpy(info->data, vendor->reserved + sizeof(struct acpi_vendor_descriptor), length); | ||
54 | info->length = length; | ||
55 | return AE_CTRL_TERMINATE; | ||
56 | } | ||
57 | |||
58 | acpi_status | ||
59 | acpi_find_vendor_resource(acpi_handle obj, struct acpi_vendor_descriptor *id, | ||
60 | u8 **data, u32 *length) | ||
61 | { | ||
62 | struct acpi_vendor_info info; | ||
63 | |||
64 | info.descriptor = id; | ||
65 | info.data = NULL; | ||
66 | |||
67 | acpi_walk_resources(obj, METHOD_NAME__CRS, acpi_vendor_resource_match, &info); | ||
68 | if (!info.data) | ||
69 | return AE_NOT_FOUND; | ||
70 | |||
71 | *data = info.data; | ||
72 | *length = info.length; | ||
73 | return AE_OK; | ||
74 | } | ||
75 | |||
76 | struct acpi_vendor_descriptor hp_ccsr_descriptor = { | ||
77 | .guid_id = 2, | ||
78 | .guid = EFI_GUID(0x69e9adf9, 0x924f, 0xab5f, 0xf6, 0x4a, 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad) | ||
79 | }; | ||
80 | |||
81 | acpi_status | ||
82 | hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length) | ||
83 | { | ||
84 | acpi_status status; | ||
85 | u8 *data; | ||
86 | u32 length; | ||
87 | |||
88 | status = acpi_find_vendor_resource(obj, &hp_ccsr_descriptor, &data, &length); | ||
89 | |||
90 | if (ACPI_FAILURE(status) || length != 16) | ||
91 | return AE_NOT_FOUND; | ||
92 | |||
93 | memcpy(csr_base, data, sizeof(*csr_base)); | ||
94 | memcpy(csr_length, data + 8, sizeof(*csr_length)); | ||
95 | acpi_os_free(data); | ||
96 | |||
97 | return AE_OK; | ||
98 | } | ||
99 | |||
100 | EXPORT_SYMBOL(hp_acpi_csr_space); | ||
diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c new file mode 100644 index 000000000000..a8e99c56a768 --- /dev/null +++ b/arch/ia64/kernel/acpi.c | |||
@@ -0,0 +1,841 @@ | |||
1 | /* | ||
2 | * acpi.c - Architecture-Specific Low-Level ACPI Support | ||
3 | * | ||
4 | * Copyright (C) 1999 VA Linux Systems | ||
5 | * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> | ||
6 | * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. | ||
7 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
8 | * Copyright (C) 2000 Intel Corp. | ||
9 | * Copyright (C) 2000,2001 J.I. Lee <jung-ik.lee@intel.com> | ||
10 | * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> | ||
11 | * Copyright (C) 2001 Jenna Hall <jenna.s.hall@intel.com> | ||
12 | * Copyright (C) 2001 Takayoshi Kochi <t-kochi@bq.jp.nec.com> | ||
13 | * Copyright (C) 2002 Erich Focht <efocht@ess.nec.de> | ||
14 | * | ||
15 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
16 | * | ||
17 | * This program is free software; you can redistribute it and/or modify | ||
18 | * it under the terms of the GNU General Public License as published by | ||
19 | * the Free Software Foundation; either version 2 of the License, or | ||
20 | * (at your option) any later version. | ||
21 | * | ||
22 | * This program is distributed in the hope that it will be useful, | ||
23 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
24 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
25 | * GNU General Public License for more details. | ||
26 | * | ||
27 | * You should have received a copy of the GNU General Public License | ||
28 | * along with this program; if not, write to the Free Software | ||
29 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
30 | * | ||
31 | * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
32 | */ | ||
33 | |||
34 | #include <linux/config.h> | ||
35 | #include <linux/module.h> | ||
36 | #include <linux/init.h> | ||
37 | #include <linux/kernel.h> | ||
38 | #include <linux/sched.h> | ||
39 | #include <linux/smp.h> | ||
40 | #include <linux/string.h> | ||
41 | #include <linux/types.h> | ||
42 | #include <linux/irq.h> | ||
43 | #include <linux/acpi.h> | ||
44 | #include <linux/efi.h> | ||
45 | #include <linux/mmzone.h> | ||
46 | #include <linux/nodemask.h> | ||
47 | #include <asm/io.h> | ||
48 | #include <asm/iosapic.h> | ||
49 | #include <asm/machvec.h> | ||
50 | #include <asm/page.h> | ||
51 | #include <asm/system.h> | ||
52 | #include <asm/numa.h> | ||
53 | #include <asm/sal.h> | ||
54 | #include <asm/cyclone.h> | ||
55 | |||
56 | #define BAD_MADT_ENTRY(entry, end) ( \ | ||
57 | (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ | ||
58 | ((acpi_table_entry_header *)entry)->length != sizeof(*entry)) | ||
59 | |||
60 | #define PREFIX "ACPI: " | ||
61 | |||
62 | void (*pm_idle) (void); | ||
63 | EXPORT_SYMBOL(pm_idle); | ||
64 | void (*pm_power_off) (void); | ||
65 | EXPORT_SYMBOL(pm_power_off); | ||
66 | |||
67 | unsigned char acpi_kbd_controller_present = 1; | ||
68 | unsigned char acpi_legacy_devices; | ||
69 | |||
70 | #define MAX_SAPICS 256 | ||
71 | u16 ia64_acpiid_to_sapicid[MAX_SAPICS] = | ||
72 | { [0 ... MAX_SAPICS - 1] = -1 }; | ||
73 | EXPORT_SYMBOL(ia64_acpiid_to_sapicid); | ||
74 | |||
75 | const char * | ||
76 | acpi_get_sysname (void) | ||
77 | { | ||
78 | #ifdef CONFIG_IA64_GENERIC | ||
79 | unsigned long rsdp_phys; | ||
80 | struct acpi20_table_rsdp *rsdp; | ||
81 | struct acpi_table_xsdt *xsdt; | ||
82 | struct acpi_table_header *hdr; | ||
83 | |||
84 | rsdp_phys = acpi_find_rsdp(); | ||
85 | if (!rsdp_phys) { | ||
86 | printk(KERN_ERR "ACPI 2.0 RSDP not found, default to \"dig\"\n"); | ||
87 | return "dig"; | ||
88 | } | ||
89 | |||
90 | rsdp = (struct acpi20_table_rsdp *) __va(rsdp_phys); | ||
91 | if (strncmp(rsdp->signature, RSDP_SIG, sizeof(RSDP_SIG) - 1)) { | ||
92 | printk(KERN_ERR "ACPI 2.0 RSDP signature incorrect, default to \"dig\"\n"); | ||
93 | return "dig"; | ||
94 | } | ||
95 | |||
96 | xsdt = (struct acpi_table_xsdt *) __va(rsdp->xsdt_address); | ||
97 | hdr = &xsdt->header; | ||
98 | if (strncmp(hdr->signature, XSDT_SIG, sizeof(XSDT_SIG) - 1)) { | ||
99 | printk(KERN_ERR "ACPI 2.0 XSDT signature incorrect, default to \"dig\"\n"); | ||
100 | return "dig"; | ||
101 | } | ||
102 | |||
103 | if (!strcmp(hdr->oem_id, "HP")) { | ||
104 | return "hpzx1"; | ||
105 | } | ||
106 | else if (!strcmp(hdr->oem_id, "SGI")) { | ||
107 | return "sn2"; | ||
108 | } | ||
109 | |||
110 | return "dig"; | ||
111 | #else | ||
112 | # if defined (CONFIG_IA64_HP_SIM) | ||
113 | return "hpsim"; | ||
114 | # elif defined (CONFIG_IA64_HP_ZX1) | ||
115 | return "hpzx1"; | ||
116 | # elif defined (CONFIG_IA64_HP_ZX1_SWIOTLB) | ||
117 | return "hpzx1_swiotlb"; | ||
118 | # elif defined (CONFIG_IA64_SGI_SN2) | ||
119 | return "sn2"; | ||
120 | # elif defined (CONFIG_IA64_DIG) | ||
121 | return "dig"; | ||
122 | # else | ||
123 | # error Unknown platform. Fix acpi.c. | ||
124 | # endif | ||
125 | #endif | ||
126 | } | ||
127 | |||
128 | #ifdef CONFIG_ACPI_BOOT | ||
129 | |||
130 | #define ACPI_MAX_PLATFORM_INTERRUPTS 256 | ||
131 | |||
132 | /* Array to record platform interrupt vectors for generic interrupt routing. */ | ||
133 | int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { | ||
134 | [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 | ||
135 | }; | ||
136 | |||
137 | enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; | ||
138 | |||
139 | /* | ||
140 | * Interrupt routing API for device drivers. Provides interrupt vector for | ||
141 | * a generic platform event. Currently only CPEI is implemented. | ||
142 | */ | ||
143 | int | ||
144 | acpi_request_vector (u32 int_type) | ||
145 | { | ||
146 | int vector = -1; | ||
147 | |||
148 | if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { | ||
149 | /* corrected platform error interrupt */ | ||
150 | vector = platform_intr_list[int_type]; | ||
151 | } else | ||
152 | printk(KERN_ERR "acpi_request_vector(): invalid interrupt type\n"); | ||
153 | return vector; | ||
154 | } | ||
155 | |||
156 | char * | ||
157 | __acpi_map_table (unsigned long phys_addr, unsigned long size) | ||
158 | { | ||
159 | return __va(phys_addr); | ||
160 | } | ||
161 | |||
162 | /* -------------------------------------------------------------------------- | ||
163 | Boot-time Table Parsing | ||
164 | -------------------------------------------------------------------------- */ | ||
165 | |||
166 | static int total_cpus __initdata; | ||
167 | static int available_cpus __initdata; | ||
168 | struct acpi_table_madt * acpi_madt __initdata; | ||
169 | static u8 has_8259; | ||
170 | |||
171 | |||
172 | static int __init | ||
173 | acpi_parse_lapic_addr_ovr ( | ||
174 | acpi_table_entry_header *header, const unsigned long end) | ||
175 | { | ||
176 | struct acpi_table_lapic_addr_ovr *lapic; | ||
177 | |||
178 | lapic = (struct acpi_table_lapic_addr_ovr *) header; | ||
179 | |||
180 | if (BAD_MADT_ENTRY(lapic, end)) | ||
181 | return -EINVAL; | ||
182 | |||
183 | if (lapic->address) { | ||
184 | iounmap(ipi_base_addr); | ||
185 | ipi_base_addr = ioremap(lapic->address, 0); | ||
186 | } | ||
187 | return 0; | ||
188 | } | ||
189 | |||
190 | |||
191 | static int __init | ||
192 | acpi_parse_lsapic (acpi_table_entry_header *header, const unsigned long end) | ||
193 | { | ||
194 | struct acpi_table_lsapic *lsapic; | ||
195 | |||
196 | lsapic = (struct acpi_table_lsapic *) header; | ||
197 | |||
198 | if (BAD_MADT_ENTRY(lsapic, end)) | ||
199 | return -EINVAL; | ||
200 | |||
201 | if (lsapic->flags.enabled) { | ||
202 | #ifdef CONFIG_SMP | ||
203 | smp_boot_data.cpu_phys_id[available_cpus] = (lsapic->id << 8) | lsapic->eid; | ||
204 | #endif | ||
205 | ia64_acpiid_to_sapicid[lsapic->acpi_id] = (lsapic->id << 8) | lsapic->eid; | ||
206 | ++available_cpus; | ||
207 | } | ||
208 | |||
209 | total_cpus++; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | |||
214 | static int __init | ||
215 | acpi_parse_lapic_nmi (acpi_table_entry_header *header, const unsigned long end) | ||
216 | { | ||
217 | struct acpi_table_lapic_nmi *lacpi_nmi; | ||
218 | |||
219 | lacpi_nmi = (struct acpi_table_lapic_nmi*) header; | ||
220 | |||
221 | if (BAD_MADT_ENTRY(lacpi_nmi, end)) | ||
222 | return -EINVAL; | ||
223 | |||
224 | /* TBD: Support lapic_nmi entries */ | ||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | |||
229 | static int __init | ||
230 | acpi_parse_iosapic (acpi_table_entry_header *header, const unsigned long end) | ||
231 | { | ||
232 | struct acpi_table_iosapic *iosapic; | ||
233 | |||
234 | iosapic = (struct acpi_table_iosapic *) header; | ||
235 | |||
236 | if (BAD_MADT_ENTRY(iosapic, end)) | ||
237 | return -EINVAL; | ||
238 | |||
239 | iosapic_init(iosapic->address, iosapic->global_irq_base); | ||
240 | |||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | |||
245 | static int __init | ||
246 | acpi_parse_plat_int_src ( | ||
247 | acpi_table_entry_header *header, const unsigned long end) | ||
248 | { | ||
249 | struct acpi_table_plat_int_src *plintsrc; | ||
250 | int vector; | ||
251 | |||
252 | plintsrc = (struct acpi_table_plat_int_src *) header; | ||
253 | |||
254 | if (BAD_MADT_ENTRY(plintsrc, end)) | ||
255 | return -EINVAL; | ||
256 | |||
257 | /* | ||
258 | * Get vector assignment for this interrupt, set attributes, | ||
259 | * and program the IOSAPIC routing table. | ||
260 | */ | ||
261 | vector = iosapic_register_platform_intr(plintsrc->type, | ||
262 | plintsrc->global_irq, | ||
263 | plintsrc->iosapic_vector, | ||
264 | plintsrc->eid, | ||
265 | plintsrc->id, | ||
266 | (plintsrc->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, | ||
267 | (plintsrc->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); | ||
268 | |||
269 | platform_intr_list[plintsrc->type] = vector; | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | |||
274 | static int __init | ||
275 | acpi_parse_int_src_ovr ( | ||
276 | acpi_table_entry_header *header, const unsigned long end) | ||
277 | { | ||
278 | struct acpi_table_int_src_ovr *p; | ||
279 | |||
280 | p = (struct acpi_table_int_src_ovr *) header; | ||
281 | |||
282 | if (BAD_MADT_ENTRY(p, end)) | ||
283 | return -EINVAL; | ||
284 | |||
285 | iosapic_override_isa_irq(p->bus_irq, p->global_irq, | ||
286 | (p->flags.polarity == 1) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, | ||
287 | (p->flags.trigger == 1) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); | ||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | |||
292 | static int __init | ||
293 | acpi_parse_nmi_src (acpi_table_entry_header *header, const unsigned long end) | ||
294 | { | ||
295 | struct acpi_table_nmi_src *nmi_src; | ||
296 | |||
297 | nmi_src = (struct acpi_table_nmi_src*) header; | ||
298 | |||
299 | if (BAD_MADT_ENTRY(nmi_src, end)) | ||
300 | return -EINVAL; | ||
301 | |||
302 | /* TBD: Support nimsrc entries */ | ||
303 | return 0; | ||
304 | } | ||
305 | |||
306 | static void __init | ||
307 | acpi_madt_oem_check (char *oem_id, char *oem_table_id) | ||
308 | { | ||
309 | if (!strncmp(oem_id, "IBM", 3) && | ||
310 | (!strncmp(oem_table_id, "SERMOW", 6))) { | ||
311 | |||
312 | /* | ||
313 | * Unfortunately ITC_DRIFT is not yet part of the | ||
314 | * official SAL spec, so the ITC_DRIFT bit is not | ||
315 | * set by the BIOS on this hardware. | ||
316 | */ | ||
317 | sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; | ||
318 | |||
319 | cyclone_setup(); | ||
320 | } | ||
321 | } | ||
322 | |||
323 | static int __init | ||
324 | acpi_parse_madt (unsigned long phys_addr, unsigned long size) | ||
325 | { | ||
326 | if (!phys_addr || !size) | ||
327 | return -EINVAL; | ||
328 | |||
329 | acpi_madt = (struct acpi_table_madt *) __va(phys_addr); | ||
330 | |||
331 | /* remember the value for reference after free_initmem() */ | ||
332 | #ifdef CONFIG_ITANIUM | ||
333 | has_8259 = 1; /* Firmware on old Itanium systems is broken */ | ||
334 | #else | ||
335 | has_8259 = acpi_madt->flags.pcat_compat; | ||
336 | #endif | ||
337 | iosapic_system_init(has_8259); | ||
338 | |||
339 | /* Get base address of IPI Message Block */ | ||
340 | |||
341 | if (acpi_madt->lapic_address) | ||
342 | ipi_base_addr = ioremap(acpi_madt->lapic_address, 0); | ||
343 | |||
344 | printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr); | ||
345 | |||
346 | acpi_madt_oem_check(acpi_madt->header.oem_id, | ||
347 | acpi_madt->header.oem_table_id); | ||
348 | |||
349 | return 0; | ||
350 | } | ||
351 | |||
352 | |||
353 | #ifdef CONFIG_ACPI_NUMA | ||
354 | |||
355 | #undef SLIT_DEBUG | ||
356 | |||
357 | #define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) | ||
358 | |||
359 | static int __initdata srat_num_cpus; /* number of cpus */ | ||
360 | static u32 __devinitdata pxm_flag[PXM_FLAG_LEN]; | ||
361 | #define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) | ||
362 | #define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) | ||
363 | /* maps to convert between proximity domain and logical node ID */ | ||
364 | int __devinitdata pxm_to_nid_map[MAX_PXM_DOMAINS]; | ||
365 | int __initdata nid_to_pxm_map[MAX_NUMNODES]; | ||
366 | static struct acpi_table_slit __initdata *slit_table; | ||
367 | |||
368 | /* | ||
369 | * ACPI 2.0 SLIT (System Locality Information Table) | ||
370 | * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf | ||
371 | */ | ||
372 | void __init | ||
373 | acpi_numa_slit_init (struct acpi_table_slit *slit) | ||
374 | { | ||
375 | u32 len; | ||
376 | |||
377 | len = sizeof(struct acpi_table_header) + 8 | ||
378 | + slit->localities * slit->localities; | ||
379 | if (slit->header.length != len) { | ||
380 | printk(KERN_ERR "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", | ||
381 | len, slit->header.length); | ||
382 | memset(numa_slit, 10, sizeof(numa_slit)); | ||
383 | return; | ||
384 | } | ||
385 | slit_table = slit; | ||
386 | } | ||
387 | |||
388 | void __init | ||
389 | acpi_numa_processor_affinity_init (struct acpi_table_processor_affinity *pa) | ||
390 | { | ||
391 | /* record this node in proximity bitmap */ | ||
392 | pxm_bit_set(pa->proximity_domain); | ||
393 | |||
394 | node_cpuid[srat_num_cpus].phys_id = (pa->apic_id << 8) | (pa->lsapic_eid); | ||
395 | /* nid should be overridden as logical node id later */ | ||
396 | node_cpuid[srat_num_cpus].nid = pa->proximity_domain; | ||
397 | srat_num_cpus++; | ||
398 | } | ||
399 | |||
400 | void __init | ||
401 | acpi_numa_memory_affinity_init (struct acpi_table_memory_affinity *ma) | ||
402 | { | ||
403 | unsigned long paddr, size; | ||
404 | u8 pxm; | ||
405 | struct node_memblk_s *p, *q, *pend; | ||
406 | |||
407 | pxm = ma->proximity_domain; | ||
408 | |||
409 | /* fill node memory chunk structure */ | ||
410 | paddr = ma->base_addr_hi; | ||
411 | paddr = (paddr << 32) | ma->base_addr_lo; | ||
412 | size = ma->length_hi; | ||
413 | size = (size << 32) | ma->length_lo; | ||
414 | |||
415 | /* Ignore disabled entries */ | ||
416 | if (!ma->flags.enabled) | ||
417 | return; | ||
418 | |||
419 | /* record this node in proximity bitmap */ | ||
420 | pxm_bit_set(pxm); | ||
421 | |||
422 | /* Insertion sort based on base address */ | ||
423 | pend = &node_memblk[num_node_memblks]; | ||
424 | for (p = &node_memblk[0]; p < pend; p++) { | ||
425 | if (paddr < p->start_paddr) | ||
426 | break; | ||
427 | } | ||
428 | if (p < pend) { | ||
429 | for (q = pend - 1; q >= p; q--) | ||
430 | *(q + 1) = *q; | ||
431 | } | ||
432 | p->start_paddr = paddr; | ||
433 | p->size = size; | ||
434 | p->nid = pxm; | ||
435 | num_node_memblks++; | ||
436 | } | ||
437 | |||
438 | void __init | ||
439 | acpi_numa_arch_fixup (void) | ||
440 | { | ||
441 | int i, j, node_from, node_to; | ||
442 | |||
443 | /* If there's no SRAT, fix the phys_id and mark node 0 online */ | ||
444 | if (srat_num_cpus == 0) { | ||
445 | node_set_online(0); | ||
446 | node_cpuid[0].phys_id = hard_smp_processor_id(); | ||
447 | return; | ||
448 | } | ||
449 | |||
450 | /* | ||
451 | * MCD - This can probably be dropped now. No need for pxm ID to node ID | ||
452 | * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. | ||
453 | */ | ||
454 | /* calculate total number of nodes in system from PXM bitmap */ | ||
455 | memset(pxm_to_nid_map, -1, sizeof(pxm_to_nid_map)); | ||
456 | memset(nid_to_pxm_map, -1, sizeof(nid_to_pxm_map)); | ||
457 | nodes_clear(node_online_map); | ||
458 | for (i = 0; i < MAX_PXM_DOMAINS; i++) { | ||
459 | if (pxm_bit_test(i)) { | ||
460 | int nid = num_online_nodes(); | ||
461 | pxm_to_nid_map[i] = nid; | ||
462 | nid_to_pxm_map[nid] = i; | ||
463 | node_set_online(nid); | ||
464 | } | ||
465 | } | ||
466 | |||
467 | /* set logical node id in memory chunk structure */ | ||
468 | for (i = 0; i < num_node_memblks; i++) | ||
469 | node_memblk[i].nid = pxm_to_nid_map[node_memblk[i].nid]; | ||
470 | |||
471 | /* assign memory bank numbers for each chunk on each node */ | ||
472 | for_each_online_node(i) { | ||
473 | int bank; | ||
474 | |||
475 | bank = 0; | ||
476 | for (j = 0; j < num_node_memblks; j++) | ||
477 | if (node_memblk[j].nid == i) | ||
478 | node_memblk[j].bank = bank++; | ||
479 | } | ||
480 | |||
481 | /* set logical node id in cpu structure */ | ||
482 | for (i = 0; i < srat_num_cpus; i++) | ||
483 | node_cpuid[i].nid = pxm_to_nid_map[node_cpuid[i].nid]; | ||
484 | |||
485 | printk(KERN_INFO "Number of logical nodes in system = %d\n", num_online_nodes()); | ||
486 | printk(KERN_INFO "Number of memory chunks in system = %d\n", num_node_memblks); | ||
487 | |||
488 | if (!slit_table) return; | ||
489 | memset(numa_slit, -1, sizeof(numa_slit)); | ||
490 | for (i=0; i<slit_table->localities; i++) { | ||
491 | if (!pxm_bit_test(i)) | ||
492 | continue; | ||
493 | node_from = pxm_to_nid_map[i]; | ||
494 | for (j=0; j<slit_table->localities; j++) { | ||
495 | if (!pxm_bit_test(j)) | ||
496 | continue; | ||
497 | node_to = pxm_to_nid_map[j]; | ||
498 | node_distance(node_from, node_to) = | ||
499 | slit_table->entry[i*slit_table->localities + j]; | ||
500 | } | ||
501 | } | ||
502 | |||
503 | #ifdef SLIT_DEBUG | ||
504 | printk("ACPI 2.0 SLIT locality table:\n"); | ||
505 | for_each_online_node(i) { | ||
506 | for_each_online_node(j) | ||
507 | printk("%03d ", node_distance(i,j)); | ||
508 | printk("\n"); | ||
509 | } | ||
510 | #endif | ||
511 | } | ||
512 | #endif /* CONFIG_ACPI_NUMA */ | ||
513 | |||
514 | unsigned int | ||
515 | acpi_register_gsi (u32 gsi, int edge_level, int active_high_low) | ||
516 | { | ||
517 | if (has_8259 && gsi < 16) | ||
518 | return isa_irq_to_vector(gsi); | ||
519 | |||
520 | return iosapic_register_intr(gsi, | ||
521 | (active_high_low == ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, | ||
522 | (edge_level == ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : IOSAPIC_LEVEL); | ||
523 | } | ||
524 | EXPORT_SYMBOL(acpi_register_gsi); | ||
525 | |||
526 | #ifdef CONFIG_ACPI_DEALLOCATE_IRQ | ||
527 | void | ||
528 | acpi_unregister_gsi (u32 gsi) | ||
529 | { | ||
530 | iosapic_unregister_intr(gsi); | ||
531 | } | ||
532 | EXPORT_SYMBOL(acpi_unregister_gsi); | ||
533 | #endif /* CONFIG_ACPI_DEALLOCATE_IRQ */ | ||
534 | |||
535 | static int __init | ||
536 | acpi_parse_fadt (unsigned long phys_addr, unsigned long size) | ||
537 | { | ||
538 | struct acpi_table_header *fadt_header; | ||
539 | struct fadt_descriptor_rev2 *fadt; | ||
540 | |||
541 | if (!phys_addr || !size) | ||
542 | return -EINVAL; | ||
543 | |||
544 | fadt_header = (struct acpi_table_header *) __va(phys_addr); | ||
545 | if (fadt_header->revision != 3) | ||
546 | return -ENODEV; /* Only deal with ACPI 2.0 FADT */ | ||
547 | |||
548 | fadt = (struct fadt_descriptor_rev2 *) fadt_header; | ||
549 | |||
550 | if (!(fadt->iapc_boot_arch & BAF_8042_KEYBOARD_CONTROLLER)) | ||
551 | acpi_kbd_controller_present = 0; | ||
552 | |||
553 | if (fadt->iapc_boot_arch & BAF_LEGACY_DEVICES) | ||
554 | acpi_legacy_devices = 1; | ||
555 | |||
556 | acpi_register_gsi(fadt->sci_int, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_LOW); | ||
557 | return 0; | ||
558 | } | ||
559 | |||
560 | |||
561 | unsigned long __init | ||
562 | acpi_find_rsdp (void) | ||
563 | { | ||
564 | unsigned long rsdp_phys = 0; | ||
565 | |||
566 | if (efi.acpi20) | ||
567 | rsdp_phys = __pa(efi.acpi20); | ||
568 | else if (efi.acpi) | ||
569 | printk(KERN_WARNING PREFIX "v1.0/r0.71 tables no longer supported\n"); | ||
570 | return rsdp_phys; | ||
571 | } | ||
572 | |||
573 | |||
574 | int __init | ||
575 | acpi_boot_init (void) | ||
576 | { | ||
577 | |||
578 | /* | ||
579 | * MADT | ||
580 | * ---- | ||
581 | * Parse the Multiple APIC Description Table (MADT), if exists. | ||
582 | * Note that this table provides platform SMP configuration | ||
583 | * information -- the successor to MPS tables. | ||
584 | */ | ||
585 | |||
586 | if (acpi_table_parse(ACPI_APIC, acpi_parse_madt) < 1) { | ||
587 | printk(KERN_ERR PREFIX "Can't find MADT\n"); | ||
588 | goto skip_madt; | ||
589 | } | ||
590 | |||
591 | /* Local APIC */ | ||
592 | |||
593 | if (acpi_table_parse_madt(ACPI_MADT_LAPIC_ADDR_OVR, acpi_parse_lapic_addr_ovr, 0) < 0) | ||
594 | printk(KERN_ERR PREFIX "Error parsing LAPIC address override entry\n"); | ||
595 | |||
596 | if (acpi_table_parse_madt(ACPI_MADT_LSAPIC, acpi_parse_lsapic, NR_CPUS) < 1) | ||
597 | printk(KERN_ERR PREFIX "Error parsing MADT - no LAPIC entries\n"); | ||
598 | |||
599 | if (acpi_table_parse_madt(ACPI_MADT_LAPIC_NMI, acpi_parse_lapic_nmi, 0) < 0) | ||
600 | printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); | ||
601 | |||
602 | /* I/O APIC */ | ||
603 | |||
604 | if (acpi_table_parse_madt(ACPI_MADT_IOSAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) | ||
605 | printk(KERN_ERR PREFIX "Error parsing MADT - no IOSAPIC entries\n"); | ||
606 | |||
607 | /* System-Level Interrupt Routing */ | ||
608 | |||
609 | if (acpi_table_parse_madt(ACPI_MADT_PLAT_INT_SRC, acpi_parse_plat_int_src, ACPI_MAX_PLATFORM_INTERRUPTS) < 0) | ||
610 | printk(KERN_ERR PREFIX "Error parsing platform interrupt source entry\n"); | ||
611 | |||
612 | if (acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr, 0) < 0) | ||
613 | printk(KERN_ERR PREFIX "Error parsing interrupt source overrides entry\n"); | ||
614 | |||
615 | if (acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src, 0) < 0) | ||
616 | printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); | ||
617 | skip_madt: | ||
618 | |||
619 | /* | ||
620 | * FADT says whether a legacy keyboard controller is present. | ||
621 | * The FADT also contains an SCI_INT line, by which the system | ||
622 | * gets interrupts such as power and sleep buttons. If it's not | ||
623 | * on a Legacy interrupt, it needs to be setup. | ||
624 | */ | ||
625 | if (acpi_table_parse(ACPI_FADT, acpi_parse_fadt) < 1) | ||
626 | printk(KERN_ERR PREFIX "Can't find FADT\n"); | ||
627 | |||
628 | #ifdef CONFIG_SMP | ||
629 | if (available_cpus == 0) { | ||
630 | printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); | ||
631 | printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); | ||
632 | smp_boot_data.cpu_phys_id[available_cpus] = hard_smp_processor_id(); | ||
633 | available_cpus = 1; /* We've got at least one of these, no? */ | ||
634 | } | ||
635 | smp_boot_data.cpu_count = available_cpus; | ||
636 | |||
637 | smp_build_cpu_map(); | ||
638 | # ifdef CONFIG_ACPI_NUMA | ||
639 | if (srat_num_cpus == 0) { | ||
640 | int cpu, i = 1; | ||
641 | for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) | ||
642 | if (smp_boot_data.cpu_phys_id[cpu] != hard_smp_processor_id()) | ||
643 | node_cpuid[i++].phys_id = smp_boot_data.cpu_phys_id[cpu]; | ||
644 | } | ||
645 | build_cpu_to_node_map(); | ||
646 | # endif | ||
647 | #endif | ||
648 | /* Make boot-up look pretty */ | ||
649 | printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, total_cpus); | ||
650 | return 0; | ||
651 | } | ||
652 | |||
653 | int | ||
654 | acpi_gsi_to_irq (u32 gsi, unsigned int *irq) | ||
655 | { | ||
656 | int vector; | ||
657 | |||
658 | if (has_8259 && gsi < 16) | ||
659 | *irq = isa_irq_to_vector(gsi); | ||
660 | else { | ||
661 | vector = gsi_to_vector(gsi); | ||
662 | if (vector == -1) | ||
663 | return -1; | ||
664 | |||
665 | *irq = vector; | ||
666 | } | ||
667 | return 0; | ||
668 | } | ||
669 | |||
670 | /* | ||
671 | * ACPI based hotplug CPU support | ||
672 | */ | ||
673 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | ||
674 | static | ||
675 | int | ||
676 | acpi_map_cpu2node(acpi_handle handle, int cpu, long physid) | ||
677 | { | ||
678 | #ifdef CONFIG_ACPI_NUMA | ||
679 | int pxm_id; | ||
680 | |||
681 | pxm_id = acpi_get_pxm(handle); | ||
682 | |||
683 | /* | ||
684 | * Assuming that the container driver would have set the proximity | ||
685 | * domain and would have initialized pxm_to_nid_map[pxm_id] && pxm_flag | ||
686 | */ | ||
687 | node_cpuid[cpu].nid = (pxm_id < 0) ? 0: | ||
688 | pxm_to_nid_map[pxm_id]; | ||
689 | |||
690 | node_cpuid[cpu].phys_id = physid; | ||
691 | #endif | ||
692 | return(0); | ||
693 | } | ||
694 | |||
695 | |||
696 | int | ||
697 | acpi_map_lsapic(acpi_handle handle, int *pcpu) | ||
698 | { | ||
699 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
700 | union acpi_object *obj; | ||
701 | struct acpi_table_lsapic *lsapic; | ||
702 | cpumask_t tmp_map; | ||
703 | long physid; | ||
704 | int cpu; | ||
705 | |||
706 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) | ||
707 | return -EINVAL; | ||
708 | |||
709 | if (!buffer.length || !buffer.pointer) | ||
710 | return -EINVAL; | ||
711 | |||
712 | obj = buffer.pointer; | ||
713 | if (obj->type != ACPI_TYPE_BUFFER || | ||
714 | obj->buffer.length < sizeof(*lsapic)) { | ||
715 | acpi_os_free(buffer.pointer); | ||
716 | return -EINVAL; | ||
717 | } | ||
718 | |||
719 | lsapic = (struct acpi_table_lsapic *)obj->buffer.pointer; | ||
720 | |||
721 | if ((lsapic->header.type != ACPI_MADT_LSAPIC) || | ||
722 | (!lsapic->flags.enabled)) { | ||
723 | acpi_os_free(buffer.pointer); | ||
724 | return -EINVAL; | ||
725 | } | ||
726 | |||
727 | physid = ((lsapic->id <<8) | (lsapic->eid)); | ||
728 | |||
729 | acpi_os_free(buffer.pointer); | ||
730 | buffer.length = ACPI_ALLOCATE_BUFFER; | ||
731 | buffer.pointer = NULL; | ||
732 | |||
733 | cpus_complement(tmp_map, cpu_present_map); | ||
734 | cpu = first_cpu(tmp_map); | ||
735 | if(cpu >= NR_CPUS) | ||
736 | return -EINVAL; | ||
737 | |||
738 | acpi_map_cpu2node(handle, cpu, physid); | ||
739 | |||
740 | cpu_set(cpu, cpu_present_map); | ||
741 | ia64_cpu_to_sapicid[cpu] = physid; | ||
742 | ia64_acpiid_to_sapicid[lsapic->acpi_id] = ia64_cpu_to_sapicid[cpu]; | ||
743 | |||
744 | *pcpu = cpu; | ||
745 | return(0); | ||
746 | } | ||
747 | EXPORT_SYMBOL(acpi_map_lsapic); | ||
748 | |||
749 | |||
750 | int | ||
751 | acpi_unmap_lsapic(int cpu) | ||
752 | { | ||
753 | int i; | ||
754 | |||
755 | for (i=0; i<MAX_SAPICS; i++) { | ||
756 | if (ia64_acpiid_to_sapicid[i] == ia64_cpu_to_sapicid[cpu]) { | ||
757 | ia64_acpiid_to_sapicid[i] = -1; | ||
758 | break; | ||
759 | } | ||
760 | } | ||
761 | ia64_cpu_to_sapicid[cpu] = -1; | ||
762 | cpu_clear(cpu,cpu_present_map); | ||
763 | |||
764 | #ifdef CONFIG_ACPI_NUMA | ||
765 | /* NUMA specific cleanup's */ | ||
766 | #endif | ||
767 | |||
768 | return(0); | ||
769 | } | ||
770 | EXPORT_SYMBOL(acpi_unmap_lsapic); | ||
771 | #endif /* CONFIG_ACPI_HOTPLUG_CPU */ | ||
772 | |||
773 | |||
774 | #ifdef CONFIG_ACPI_NUMA | ||
775 | acpi_status __init | ||
776 | acpi_map_iosapic (acpi_handle handle, u32 depth, void *context, void **ret) | ||
777 | { | ||
778 | struct acpi_buffer buffer = {ACPI_ALLOCATE_BUFFER, NULL}; | ||
779 | union acpi_object *obj; | ||
780 | struct acpi_table_iosapic *iosapic; | ||
781 | unsigned int gsi_base; | ||
782 | int node; | ||
783 | |||
784 | /* Only care about objects w/ a method that returns the MADT */ | ||
785 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) | ||
786 | return AE_OK; | ||
787 | |||
788 | if (!buffer.length || !buffer.pointer) | ||
789 | return AE_OK; | ||
790 | |||
791 | obj = buffer.pointer; | ||
792 | if (obj->type != ACPI_TYPE_BUFFER || | ||
793 | obj->buffer.length < sizeof(*iosapic)) { | ||
794 | acpi_os_free(buffer.pointer); | ||
795 | return AE_OK; | ||
796 | } | ||
797 | |||
798 | iosapic = (struct acpi_table_iosapic *)obj->buffer.pointer; | ||
799 | |||
800 | if (iosapic->header.type != ACPI_MADT_IOSAPIC) { | ||
801 | acpi_os_free(buffer.pointer); | ||
802 | return AE_OK; | ||
803 | } | ||
804 | |||
805 | gsi_base = iosapic->global_irq_base; | ||
806 | |||
807 | acpi_os_free(buffer.pointer); | ||
808 | buffer.length = ACPI_ALLOCATE_BUFFER; | ||
809 | buffer.pointer = NULL; | ||
810 | |||
811 | /* | ||
812 | * OK, it's an IOSAPIC MADT entry, look for a _PXM method to tell | ||
813 | * us which node to associate this with. | ||
814 | */ | ||
815 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_PXM", NULL, &buffer))) | ||
816 | return AE_OK; | ||
817 | |||
818 | if (!buffer.length || !buffer.pointer) | ||
819 | return AE_OK; | ||
820 | |||
821 | obj = buffer.pointer; | ||
822 | |||
823 | if (obj->type != ACPI_TYPE_INTEGER || | ||
824 | obj->integer.value >= MAX_PXM_DOMAINS) { | ||
825 | acpi_os_free(buffer.pointer); | ||
826 | return AE_OK; | ||
827 | } | ||
828 | |||
829 | node = pxm_to_nid_map[obj->integer.value]; | ||
830 | acpi_os_free(buffer.pointer); | ||
831 | |||
832 | if (node >= MAX_NUMNODES || !node_online(node) || | ||
833 | cpus_empty(node_to_cpumask(node))) | ||
834 | return AE_OK; | ||
835 | |||
836 | /* We know a gsi to node mapping! */ | ||
837 | map_iosapic_to_node(gsi_base, node); | ||
838 | return AE_OK; | ||
839 | } | ||
840 | #endif /* CONFIG_NUMA */ | ||
841 | #endif /* CONFIG_ACPI_BOOT */ | ||
diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c new file mode 100644 index 000000000000..7d1ae2982c53 --- /dev/null +++ b/arch/ia64/kernel/asm-offsets.c | |||
@@ -0,0 +1,239 @@ | |||
1 | /* | ||
2 | * Generate definitions needed by assembly language modules. | ||
3 | * This code generates raw asm output which is post-processed | ||
4 | * to extract and format the required data. | ||
5 | */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | |||
9 | #include <linux/sched.h> | ||
10 | |||
11 | #include <asm-ia64/processor.h> | ||
12 | #include <asm-ia64/ptrace.h> | ||
13 | #include <asm-ia64/siginfo.h> | ||
14 | #include <asm-ia64/sigcontext.h> | ||
15 | #include <asm-ia64/mca.h> | ||
16 | |||
17 | #include "../kernel/sigframe.h" | ||
18 | |||
19 | #define DEFINE(sym, val) \ | ||
20 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | ||
21 | |||
22 | #define BLANK() asm volatile("\n->" : : ) | ||
23 | |||
24 | void foo(void) | ||
25 | { | ||
26 | DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); | ||
27 | DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info)); | ||
28 | DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs)); | ||
29 | DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack)); | ||
30 | DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo)); | ||
31 | DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64)); | ||
32 | DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); | ||
33 | DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); | ||
34 | |||
35 | BLANK(); | ||
36 | |||
37 | DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); | ||
38 | DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); | ||
39 | |||
40 | BLANK(); | ||
41 | |||
42 | DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); | ||
43 | DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); | ||
44 | DEFINE(IA64_TASK_GROUP_LEADER_OFFSET, offsetof (struct task_struct, group_leader)); | ||
45 | DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); | ||
46 | DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); | ||
47 | DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); | ||
48 | DEFINE(IA64_TASK_SIGHAND_OFFSET,offsetof (struct task_struct, sighand)); | ||
49 | DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); | ||
50 | DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); | ||
51 | DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp)); | ||
52 | DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack)); | ||
53 | |||
54 | BLANK(); | ||
55 | |||
56 | DEFINE(IA64_SIGHAND_SIGLOCK_OFFSET,offsetof (struct sighand_struct, siglock)); | ||
57 | |||
58 | BLANK(); | ||
59 | |||
60 | DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, | ||
61 | group_stop_count)); | ||
62 | DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); | ||
63 | |||
64 | BLANK(); | ||
65 | |||
66 | DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6)); | ||
67 | DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7)); | ||
68 | DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd)); | ||
69 | DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd)); | ||
70 | DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8)); | ||
71 | DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9)); | ||
72 | DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10)); | ||
73 | DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11)); | ||
74 | DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr)); | ||
75 | DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip)); | ||
76 | DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs)); | ||
77 | DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat)); | ||
78 | DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs)); | ||
79 | DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc)); | ||
80 | DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat)); | ||
81 | |||
82 | DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore)); | ||
83 | DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr)); | ||
84 | DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0)); | ||
85 | DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs)); | ||
86 | DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1)); | ||
87 | DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12)); | ||
88 | DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13)); | ||
89 | DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr)); | ||
90 | DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15)); | ||
91 | DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14)); | ||
92 | DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2)); | ||
93 | DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3)); | ||
94 | DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16)); | ||
95 | DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17)); | ||
96 | DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18)); | ||
97 | DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19)); | ||
98 | DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20)); | ||
99 | DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21)); | ||
100 | DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22)); | ||
101 | DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23)); | ||
102 | DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24)); | ||
103 | DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25)); | ||
104 | DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26)); | ||
105 | DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27)); | ||
106 | DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28)); | ||
107 | DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29)); | ||
108 | DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30)); | ||
109 | DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31)); | ||
110 | DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv)); | ||
111 | DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6)); | ||
112 | DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7)); | ||
113 | DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8)); | ||
114 | DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9)); | ||
115 | DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10)); | ||
116 | DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11)); | ||
117 | |||
118 | BLANK(); | ||
119 | |||
120 | DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat)); | ||
121 | DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr)); | ||
122 | DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2)); | ||
123 | DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3)); | ||
124 | DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4)); | ||
125 | DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5)); | ||
126 | DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12)); | ||
127 | DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13)); | ||
128 | DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14)); | ||
129 | DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15)); | ||
130 | DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16)); | ||
131 | DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17)); | ||
132 | DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18)); | ||
133 | DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19)); | ||
134 | DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20)); | ||
135 | DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21)); | ||
136 | DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22)); | ||
137 | DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23)); | ||
138 | DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24)); | ||
139 | DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25)); | ||
140 | DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26)); | ||
141 | DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27)); | ||
142 | DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28)); | ||
143 | DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29)); | ||
144 | DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30)); | ||
145 | DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31)); | ||
146 | DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4)); | ||
147 | DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5)); | ||
148 | DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6)); | ||
149 | DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7)); | ||
150 | DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0)); | ||
151 | DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1)); | ||
152 | DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2)); | ||
153 | DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3)); | ||
154 | DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4)); | ||
155 | DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5)); | ||
156 | DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs)); | ||
157 | DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc)); | ||
158 | DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat)); | ||
159 | DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat)); | ||
160 | DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore)); | ||
161 | DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr)); | ||
162 | |||
163 | BLANK(); | ||
164 | |||
165 | DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip)); | ||
166 | DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp)); | ||
167 | DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr)); | ||
168 | DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat)); | ||
169 | DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat)); | ||
170 | DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0])); | ||
171 | DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm)); | ||
172 | DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags)); | ||
173 | DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6])); | ||
174 | DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr)); | ||
175 | DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12])); | ||
176 | DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base)); | ||
177 | DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs)); | ||
178 | |||
179 | BLANK(); | ||
180 | |||
181 | DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal)); | ||
182 | |||
183 | BLANK(); | ||
184 | |||
185 | DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0)); | ||
186 | DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1)); | ||
187 | DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2)); | ||
188 | DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler)); | ||
189 | DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc)); | ||
190 | BLANK(); | ||
191 | /* for assembly files which can't include sched.h: */ | ||
192 | DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); | ||
193 | DEFINE(IA64_CLONE_VM, CLONE_VM); | ||
194 | |||
195 | BLANK(); | ||
196 | DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, | ||
197 | offsetof (struct cpuinfo_ia64, nsec_per_cyc)); | ||
198 | DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET, | ||
199 | offsetof (struct cpuinfo_ia64, ptce_base)); | ||
200 | DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET, | ||
201 | offsetof (struct cpuinfo_ia64, ptce_count)); | ||
202 | DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET, | ||
203 | offsetof (struct cpuinfo_ia64, ptce_stride)); | ||
204 | BLANK(); | ||
205 | DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, | ||
206 | offsetof (struct timespec, tv_nsec)); | ||
207 | |||
208 | DEFINE(CLONE_SETTLS_BIT, 19); | ||
209 | #if CLONE_SETTLS != (1<<19) | ||
210 | # error "CLONE_SETTLS_BIT incorrect, please fix" | ||
211 | #endif | ||
212 | |||
213 | BLANK(); | ||
214 | DEFINE(IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, | ||
215 | offsetof (struct ia64_mca_cpu, proc_state_dump)); | ||
216 | DEFINE(IA64_MCA_CPU_STACK_OFFSET, | ||
217 | offsetof (struct ia64_mca_cpu, stack)); | ||
218 | DEFINE(IA64_MCA_CPU_STACKFRAME_OFFSET, | ||
219 | offsetof (struct ia64_mca_cpu, stackframe)); | ||
220 | DEFINE(IA64_MCA_CPU_RBSTORE_OFFSET, | ||
221 | offsetof (struct ia64_mca_cpu, rbstore)); | ||
222 | DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, | ||
223 | offsetof (struct ia64_mca_cpu, init_stack)); | ||
224 | BLANK(); | ||
225 | /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ | ||
226 | DEFINE(IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET, offsetof (struct time_interpolator, addr)); | ||
227 | DEFINE(IA64_TIME_INTERPOLATOR_SOURCE_OFFSET, offsetof (struct time_interpolator, source)); | ||
228 | DEFINE(IA64_TIME_INTERPOLATOR_SHIFT_OFFSET, offsetof (struct time_interpolator, shift)); | ||
229 | DEFINE(IA64_TIME_INTERPOLATOR_NSEC_OFFSET, offsetof (struct time_interpolator, nsec_per_cyc)); | ||
230 | DEFINE(IA64_TIME_INTERPOLATOR_OFFSET_OFFSET, offsetof (struct time_interpolator, offset)); | ||
231 | DEFINE(IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET, offsetof (struct time_interpolator, last_cycle)); | ||
232 | DEFINE(IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET, offsetof (struct time_interpolator, last_counter)); | ||
233 | DEFINE(IA64_TIME_INTERPOLATOR_JITTER_OFFSET, offsetof (struct time_interpolator, jitter)); | ||
234 | DEFINE(IA64_TIME_INTERPOLATOR_MASK_OFFSET, offsetof (struct time_interpolator, mask)); | ||
235 | DEFINE(IA64_TIME_SOURCE_CPU, TIME_SOURCE_CPU); | ||
236 | DEFINE(IA64_TIME_SOURCE_MMIO64, TIME_SOURCE_MMIO64); | ||
237 | DEFINE(IA64_TIME_SOURCE_MMIO32, TIME_SOURCE_MMIO32); | ||
238 | DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, offsetof (struct timespec, tv_nsec)); | ||
239 | } | ||
diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c new file mode 100644 index 000000000000..0b286ca164f9 --- /dev/null +++ b/arch/ia64/kernel/brl_emu.c | |||
@@ -0,0 +1,234 @@ | |||
1 | /* | ||
2 | * Emulation of the "brl" instruction for IA64 processors that | ||
3 | * don't support it in hardware. | ||
4 | * Author: Stephan Zeisset, Intel Corp. <Stephan.Zeisset@intel.com> | ||
5 | * | ||
6 | * 02/22/02 D. Mosberger Clear si_flgs, si_isr, and si_imm to avoid | ||
7 | * leaking kernel bits. | ||
8 | */ | ||
9 | |||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <asm/uaccess.h> | ||
13 | #include <asm/processor.h> | ||
14 | |||
15 | extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5; | ||
16 | |||
17 | struct illegal_op_return { | ||
18 | unsigned long fkt, arg1, arg2, arg3; | ||
19 | }; | ||
20 | |||
21 | /* | ||
22 | * The unimplemented bits of a virtual address must be set | ||
23 | * to the value of the most significant implemented bit. | ||
24 | * unimpl_va_mask includes all unimplemented bits and | ||
25 | * the most significant implemented bit, so the result | ||
26 | * of an and operation with the mask must be all 0's | ||
27 | * or all 1's for the address to be valid. | ||
28 | */ | ||
29 | #define unimplemented_virtual_address(va) ( \ | ||
30 | ((va) & local_cpu_data->unimpl_va_mask) != 0 && \ | ||
31 | ((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask \ | ||
32 | ) | ||
33 | |||
34 | /* | ||
35 | * The unimplemented bits of a physical address must be 0. | ||
36 | * unimpl_pa_mask includes all unimplemented bits, so the result | ||
37 | * of an and operation with the mask must be all 0's for the | ||
38 | * address to be valid. | ||
39 | */ | ||
40 | #define unimplemented_physical_address(pa) ( \ | ||
41 | ((pa) & local_cpu_data->unimpl_pa_mask) != 0 \ | ||
42 | ) | ||
43 | |||
44 | /* | ||
45 | * Handle an illegal operation fault that was caused by an | ||
46 | * unimplemented "brl" instruction. | ||
47 | * If we are not successful (e.g because the illegal operation | ||
48 | * wasn't caused by a "brl" after all), we return -1. | ||
49 | * If we are successful, we return either 0 or the address | ||
50 | * of a "fixup" function for manipulating preserved register | ||
51 | * state. | ||
52 | */ | ||
53 | |||
54 | struct illegal_op_return | ||
55 | ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec) | ||
56 | { | ||
57 | unsigned long bundle[2]; | ||
58 | unsigned long opcode, btype, qp, offset, cpl; | ||
59 | unsigned long next_ip; | ||
60 | struct siginfo siginfo; | ||
61 | struct illegal_op_return rv; | ||
62 | long tmp_taken, unimplemented_address; | ||
63 | |||
64 | rv.fkt = (unsigned long) -1; | ||
65 | |||
66 | /* | ||
67 | * Decode the instruction bundle. | ||
68 | */ | ||
69 | |||
70 | if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle))) | ||
71 | return rv; | ||
72 | |||
73 | next_ip = (unsigned long) regs->cr_iip + 16; | ||
74 | |||
75 | /* "brl" must be in slot 2. */ | ||
76 | if (ia64_psr(regs)->ri != 1) return rv; | ||
77 | |||
78 | /* Must be "mlx" template */ | ||
79 | if ((bundle[0] & 0x1e) != 0x4) return rv; | ||
80 | |||
81 | opcode = (bundle[1] >> 60); | ||
82 | btype = ((bundle[1] >> 29) & 0x7); | ||
83 | qp = ((bundle[1] >> 23) & 0x3f); | ||
84 | offset = ((bundle[1] & 0x0800000000000000L) << 4) | ||
85 | | ((bundle[1] & 0x00fffff000000000L) >> 32) | ||
86 | | ((bundle[1] & 0x00000000007fffffL) << 40) | ||
87 | | ((bundle[0] & 0xffff000000000000L) >> 24); | ||
88 | |||
89 | tmp_taken = regs->pr & (1L << qp); | ||
90 | |||
91 | switch(opcode) { | ||
92 | |||
93 | case 0xC: | ||
94 | /* | ||
95 | * Long Branch. | ||
96 | */ | ||
97 | if (btype != 0) return rv; | ||
98 | rv.fkt = 0; | ||
99 | if (!(tmp_taken)) { | ||
100 | /* | ||
101 | * Qualifying predicate is 0. | ||
102 | * Skip instruction. | ||
103 | */ | ||
104 | regs->cr_iip = next_ip; | ||
105 | ia64_psr(regs)->ri = 0; | ||
106 | return rv; | ||
107 | } | ||
108 | break; | ||
109 | |||
110 | case 0xD: | ||
111 | /* | ||
112 | * Long Call. | ||
113 | */ | ||
114 | rv.fkt = 0; | ||
115 | if (!(tmp_taken)) { | ||
116 | /* | ||
117 | * Qualifying predicate is 0. | ||
118 | * Skip instruction. | ||
119 | */ | ||
120 | regs->cr_iip = next_ip; | ||
121 | ia64_psr(regs)->ri = 0; | ||
122 | return rv; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * BR[btype] = IP+16 | ||
127 | */ | ||
128 | switch(btype) { | ||
129 | case 0: | ||
130 | regs->b0 = next_ip; | ||
131 | break; | ||
132 | case 1: | ||
133 | rv.fkt = (unsigned long) &ia64_set_b1; | ||
134 | break; | ||
135 | case 2: | ||
136 | rv.fkt = (unsigned long) &ia64_set_b2; | ||
137 | break; | ||
138 | case 3: | ||
139 | rv.fkt = (unsigned long) &ia64_set_b3; | ||
140 | break; | ||
141 | case 4: | ||
142 | rv.fkt = (unsigned long) &ia64_set_b4; | ||
143 | break; | ||
144 | case 5: | ||
145 | rv.fkt = (unsigned long) &ia64_set_b5; | ||
146 | break; | ||
147 | case 6: | ||
148 | regs->b6 = next_ip; | ||
149 | break; | ||
150 | case 7: | ||
151 | regs->b7 = next_ip; | ||
152 | break; | ||
153 | } | ||
154 | rv.arg1 = next_ip; | ||
155 | |||
156 | /* | ||
157 | * AR[PFS].pfm = CFM | ||
158 | * AR[PFS].pec = AR[EC] | ||
159 | * AR[PFS].ppl = PSR.cpl | ||
160 | */ | ||
161 | cpl = ia64_psr(regs)->cpl; | ||
162 | regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff) | ||
163 | | (ar_ec << 52) | (cpl << 62)); | ||
164 | |||
165 | /* | ||
166 | * CFM.sof -= CFM.sol | ||
167 | * CFM.sol = 0 | ||
168 | * CFM.sor = 0 | ||
169 | * CFM.rrb.gr = 0 | ||
170 | * CFM.rrb.fr = 0 | ||
171 | * CFM.rrb.pr = 0 | ||
172 | */ | ||
173 | regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f) | ||
174 | - ((regs->cr_ifs >> 7) & 0x7f)); | ||
175 | |||
176 | break; | ||
177 | |||
178 | default: | ||
179 | /* | ||
180 | * Unknown opcode. | ||
181 | */ | ||
182 | return rv; | ||
183 | |||
184 | } | ||
185 | |||
186 | regs->cr_iip += offset; | ||
187 | ia64_psr(regs)->ri = 0; | ||
188 | |||
189 | if (ia64_psr(regs)->it == 0) | ||
190 | unimplemented_address = unimplemented_physical_address(regs->cr_iip); | ||
191 | else | ||
192 | unimplemented_address = unimplemented_virtual_address(regs->cr_iip); | ||
193 | |||
194 | if (unimplemented_address) { | ||
195 | /* | ||
196 | * The target address contains unimplemented bits. | ||
197 | */ | ||
198 | printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n"); | ||
199 | siginfo.si_signo = SIGILL; | ||
200 | siginfo.si_errno = 0; | ||
201 | siginfo.si_flags = 0; | ||
202 | siginfo.si_isr = 0; | ||
203 | siginfo.si_imm = 0; | ||
204 | siginfo.si_code = ILL_BADIADDR; | ||
205 | force_sig_info(SIGILL, &siginfo, current); | ||
206 | } else if (ia64_psr(regs)->tb) { | ||
207 | /* | ||
208 | * Branch Tracing is enabled. | ||
209 | * Force a taken branch signal. | ||
210 | */ | ||
211 | siginfo.si_signo = SIGTRAP; | ||
212 | siginfo.si_errno = 0; | ||
213 | siginfo.si_code = TRAP_BRANCH; | ||
214 | siginfo.si_flags = 0; | ||
215 | siginfo.si_isr = 0; | ||
216 | siginfo.si_addr = 0; | ||
217 | siginfo.si_imm = 0; | ||
218 | force_sig_info(SIGTRAP, &siginfo, current); | ||
219 | } else if (ia64_psr(regs)->ss) { | ||
220 | /* | ||
221 | * Single Step is enabled. | ||
222 | * Force a trace signal. | ||
223 | */ | ||
224 | siginfo.si_signo = SIGTRAP; | ||
225 | siginfo.si_errno = 0; | ||
226 | siginfo.si_code = TRAP_TRACE; | ||
227 | siginfo.si_flags = 0; | ||
228 | siginfo.si_isr = 0; | ||
229 | siginfo.si_addr = 0; | ||
230 | siginfo.si_imm = 0; | ||
231 | force_sig_info(SIGTRAP, &siginfo, current); | ||
232 | } | ||
233 | return rv; | ||
234 | } | ||
diff --git a/arch/ia64/kernel/cyclone.c b/arch/ia64/kernel/cyclone.c new file mode 100644 index 000000000000..768c7e46957c --- /dev/null +++ b/arch/ia64/kernel/cyclone.c | |||
@@ -0,0 +1,109 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/time.h> | ||
4 | #include <linux/errno.h> | ||
5 | #include <asm/io.h> | ||
6 | |||
7 | /* IBM Summit (EXA) Cyclone counter code*/ | ||
8 | #define CYCLONE_CBAR_ADDR 0xFEB00CD0 | ||
9 | #define CYCLONE_PMCC_OFFSET 0x51A0 | ||
10 | #define CYCLONE_MPMC_OFFSET 0x51D0 | ||
11 | #define CYCLONE_MPCS_OFFSET 0x51A8 | ||
12 | #define CYCLONE_TIMER_FREQ 100000000 | ||
13 | |||
14 | int use_cyclone; | ||
15 | void __init cyclone_setup(void) | ||
16 | { | ||
17 | use_cyclone = 1; | ||
18 | } | ||
19 | |||
20 | |||
21 | struct time_interpolator cyclone_interpolator = { | ||
22 | .source = TIME_SOURCE_MMIO64, | ||
23 | .shift = 16, | ||
24 | .frequency = CYCLONE_TIMER_FREQ, | ||
25 | .drift = -100, | ||
26 | .mask = (1LL << 40) - 1 | ||
27 | }; | ||
28 | |||
29 | int __init init_cyclone_clock(void) | ||
30 | { | ||
31 | u64* reg; | ||
32 | u64 base; /* saved cyclone base address */ | ||
33 | u64 offset; /* offset from pageaddr to cyclone_timer register */ | ||
34 | int i; | ||
35 | u32* volatile cyclone_timer; /* Cyclone MPMC0 register */ | ||
36 | |||
37 | if (!use_cyclone) | ||
38 | return -ENODEV; | ||
39 | |||
40 | printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); | ||
41 | |||
42 | /* find base address */ | ||
43 | offset = (CYCLONE_CBAR_ADDR); | ||
44 | reg = (u64*)ioremap_nocache(offset, sizeof(u64)); | ||
45 | if(!reg){ | ||
46 | printk(KERN_ERR "Summit chipset: Could not find valid CBAR register.\n"); | ||
47 | use_cyclone = 0; | ||
48 | return -ENODEV; | ||
49 | } | ||
50 | base = readq(reg); | ||
51 | if(!base){ | ||
52 | printk(KERN_ERR "Summit chipset: Could not find valid CBAR value.\n"); | ||
53 | use_cyclone = 0; | ||
54 | return -ENODEV; | ||
55 | } | ||
56 | iounmap(reg); | ||
57 | |||
58 | /* setup PMCC */ | ||
59 | offset = (base + CYCLONE_PMCC_OFFSET); | ||
60 | reg = (u64*)ioremap_nocache(offset, sizeof(u64)); | ||
61 | if(!reg){ | ||
62 | printk(KERN_ERR "Summit chipset: Could not find valid PMCC register.\n"); | ||
63 | use_cyclone = 0; | ||
64 | return -ENODEV; | ||
65 | } | ||
66 | writel(0x00000001,reg); | ||
67 | iounmap(reg); | ||
68 | |||
69 | /* setup MPCS */ | ||
70 | offset = (base + CYCLONE_MPCS_OFFSET); | ||
71 | reg = (u64*)ioremap_nocache(offset, sizeof(u64)); | ||
72 | if(!reg){ | ||
73 | printk(KERN_ERR "Summit chipset: Could not find valid MPCS register.\n"); | ||
74 | use_cyclone = 0; | ||
75 | return -ENODEV; | ||
76 | } | ||
77 | writel(0x00000001,reg); | ||
78 | iounmap(reg); | ||
79 | |||
80 | /* map in cyclone_timer */ | ||
81 | offset = (base + CYCLONE_MPMC_OFFSET); | ||
82 | cyclone_timer = (u32*)ioremap_nocache(offset, sizeof(u32)); | ||
83 | if(!cyclone_timer){ | ||
84 | printk(KERN_ERR "Summit chipset: Could not find valid MPMC register.\n"); | ||
85 | use_cyclone = 0; | ||
86 | return -ENODEV; | ||
87 | } | ||
88 | |||
89 | /*quick test to make sure its ticking*/ | ||
90 | for(i=0; i<3; i++){ | ||
91 | u32 old = readl(cyclone_timer); | ||
92 | int stall = 100; | ||
93 | while(stall--) barrier(); | ||
94 | if(readl(cyclone_timer) == old){ | ||
95 | printk(KERN_ERR "Summit chipset: Counter not counting! DISABLED\n"); | ||
96 | iounmap(cyclone_timer); | ||
97 | cyclone_timer = 0; | ||
98 | use_cyclone = 0; | ||
99 | return -ENODEV; | ||
100 | } | ||
101 | } | ||
102 | /* initialize last tick */ | ||
103 | cyclone_interpolator.addr = cyclone_timer; | ||
104 | register_time_interpolator(&cyclone_interpolator); | ||
105 | |||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | __initcall(init_cyclone_clock); | ||
diff --git a/arch/ia64/kernel/domain.c b/arch/ia64/kernel/domain.c new file mode 100644 index 000000000000..fe532c970438 --- /dev/null +++ b/arch/ia64/kernel/domain.c | |||
@@ -0,0 +1,382 @@ | |||
1 | /* | ||
2 | * arch/ia64/kernel/domain.c | ||
3 | * Architecture specific sched-domains builder. | ||
4 | * | ||
5 | * Copyright (C) 2004 Jesse Barnes | ||
6 | * Copyright (C) 2004 Silicon Graphics, Inc. | ||
7 | */ | ||
8 | |||
9 | #include <linux/sched.h> | ||
10 | #include <linux/percpu.h> | ||
11 | #include <linux/slab.h> | ||
12 | #include <linux/cpumask.h> | ||
13 | #include <linux/init.h> | ||
14 | #include <linux/topology.h> | ||
15 | #include <linux/nodemask.h> | ||
16 | |||
17 | #define SD_NODES_PER_DOMAIN 6 | ||
18 | |||
19 | #ifdef CONFIG_NUMA | ||
20 | /** | ||
21 | * find_next_best_node - find the next node to include in a sched_domain | ||
22 | * @node: node whose sched_domain we're building | ||
23 | * @used_nodes: nodes already in the sched_domain | ||
24 | * | ||
25 | * Find the next node to include in a given scheduling domain. Simply | ||
26 | * finds the closest node not already in the @used_nodes map. | ||
27 | * | ||
28 | * Should use nodemask_t. | ||
29 | */ | ||
30 | static int __devinit find_next_best_node(int node, unsigned long *used_nodes) | ||
31 | { | ||
32 | int i, n, val, min_val, best_node = 0; | ||
33 | |||
34 | min_val = INT_MAX; | ||
35 | |||
36 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
37 | /* Start at @node */ | ||
38 | n = (node + i) % MAX_NUMNODES; | ||
39 | |||
40 | if (!nr_cpus_node(n)) | ||
41 | continue; | ||
42 | |||
43 | /* Skip already used nodes */ | ||
44 | if (test_bit(n, used_nodes)) | ||
45 | continue; | ||
46 | |||
47 | /* Simple min distance search */ | ||
48 | val = node_distance(node, n); | ||
49 | |||
50 | if (val < min_val) { | ||
51 | min_val = val; | ||
52 | best_node = n; | ||
53 | } | ||
54 | } | ||
55 | |||
56 | set_bit(best_node, used_nodes); | ||
57 | return best_node; | ||
58 | } | ||
59 | |||
60 | /** | ||
61 | * sched_domain_node_span - get a cpumask for a node's sched_domain | ||
62 | * @node: node whose cpumask we're constructing | ||
63 | * @size: number of nodes to include in this span | ||
64 | * | ||
65 | * Given a node, construct a good cpumask for its sched_domain to span. It | ||
66 | * should be one that prevents unnecessary balancing, but also spreads tasks | ||
67 | * out optimally. | ||
68 | */ | ||
69 | static cpumask_t __devinit sched_domain_node_span(int node) | ||
70 | { | ||
71 | int i; | ||
72 | cpumask_t span, nodemask; | ||
73 | DECLARE_BITMAP(used_nodes, MAX_NUMNODES); | ||
74 | |||
75 | cpus_clear(span); | ||
76 | bitmap_zero(used_nodes, MAX_NUMNODES); | ||
77 | |||
78 | nodemask = node_to_cpumask(node); | ||
79 | cpus_or(span, span, nodemask); | ||
80 | set_bit(node, used_nodes); | ||
81 | |||
82 | for (i = 1; i < SD_NODES_PER_DOMAIN; i++) { | ||
83 | int next_node = find_next_best_node(node, used_nodes); | ||
84 | nodemask = node_to_cpumask(next_node); | ||
85 | cpus_or(span, span, nodemask); | ||
86 | } | ||
87 | |||
88 | return span; | ||
89 | } | ||
90 | #endif | ||
91 | |||
92 | /* | ||
93 | * At the moment, CONFIG_SCHED_SMT is never defined, but leave it in so we | ||
94 | * can switch it on easily if needed. | ||
95 | */ | ||
96 | #ifdef CONFIG_SCHED_SMT | ||
97 | static DEFINE_PER_CPU(struct sched_domain, cpu_domains); | ||
98 | static struct sched_group sched_group_cpus[NR_CPUS]; | ||
99 | static int __devinit cpu_to_cpu_group(int cpu) | ||
100 | { | ||
101 | return cpu; | ||
102 | } | ||
103 | #endif | ||
104 | |||
105 | static DEFINE_PER_CPU(struct sched_domain, phys_domains); | ||
106 | static struct sched_group sched_group_phys[NR_CPUS]; | ||
107 | static int __devinit cpu_to_phys_group(int cpu) | ||
108 | { | ||
109 | #ifdef CONFIG_SCHED_SMT | ||
110 | return first_cpu(cpu_sibling_map[cpu]); | ||
111 | #else | ||
112 | return cpu; | ||
113 | #endif | ||
114 | } | ||
115 | |||
116 | #ifdef CONFIG_NUMA | ||
117 | /* | ||
118 | * The init_sched_build_groups can't handle what we want to do with node | ||
119 | * groups, so roll our own. Now each node has its own list of groups which | ||
120 | * gets dynamically allocated. | ||
121 | */ | ||
122 | static DEFINE_PER_CPU(struct sched_domain, node_domains); | ||
123 | static struct sched_group *sched_group_nodes[MAX_NUMNODES]; | ||
124 | |||
125 | static DEFINE_PER_CPU(struct sched_domain, allnodes_domains); | ||
126 | static struct sched_group sched_group_allnodes[MAX_NUMNODES]; | ||
127 | |||
128 | static int __devinit cpu_to_allnodes_group(int cpu) | ||
129 | { | ||
130 | return cpu_to_node(cpu); | ||
131 | } | ||
132 | #endif | ||
133 | |||
134 | /* | ||
135 | * Set up scheduler domains and groups. Callers must hold the hotplug lock. | ||
136 | */ | ||
137 | void __devinit arch_init_sched_domains(void) | ||
138 | { | ||
139 | int i; | ||
140 | cpumask_t cpu_default_map; | ||
141 | |||
142 | /* | ||
143 | * Setup mask for cpus without special case scheduling requirements. | ||
144 | * For now this just excludes isolated cpus, but could be used to | ||
145 | * exclude other special cases in the future. | ||
146 | */ | ||
147 | cpus_complement(cpu_default_map, cpu_isolated_map); | ||
148 | cpus_and(cpu_default_map, cpu_default_map, cpu_online_map); | ||
149 | |||
150 | /* | ||
151 | * Set up domains. Isolated domains just stay on the dummy domain. | ||
152 | */ | ||
153 | for_each_cpu_mask(i, cpu_default_map) { | ||
154 | int group; | ||
155 | struct sched_domain *sd = NULL, *p; | ||
156 | cpumask_t nodemask = node_to_cpumask(cpu_to_node(i)); | ||
157 | |||
158 | cpus_and(nodemask, nodemask, cpu_default_map); | ||
159 | |||
160 | #ifdef CONFIG_NUMA | ||
161 | if (num_online_cpus() | ||
162 | > SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) { | ||
163 | sd = &per_cpu(allnodes_domains, i); | ||
164 | *sd = SD_ALLNODES_INIT; | ||
165 | sd->span = cpu_default_map; | ||
166 | group = cpu_to_allnodes_group(i); | ||
167 | sd->groups = &sched_group_allnodes[group]; | ||
168 | p = sd; | ||
169 | } else | ||
170 | p = NULL; | ||
171 | |||
172 | sd = &per_cpu(node_domains, i); | ||
173 | *sd = SD_NODE_INIT; | ||
174 | sd->span = sched_domain_node_span(cpu_to_node(i)); | ||
175 | sd->parent = p; | ||
176 | cpus_and(sd->span, sd->span, cpu_default_map); | ||
177 | #endif | ||
178 | |||
179 | p = sd; | ||
180 | sd = &per_cpu(phys_domains, i); | ||
181 | group = cpu_to_phys_group(i); | ||
182 | *sd = SD_CPU_INIT; | ||
183 | sd->span = nodemask; | ||
184 | sd->parent = p; | ||
185 | sd->groups = &sched_group_phys[group]; | ||
186 | |||
187 | #ifdef CONFIG_SCHED_SMT | ||
188 | p = sd; | ||
189 | sd = &per_cpu(cpu_domains, i); | ||
190 | group = cpu_to_cpu_group(i); | ||
191 | *sd = SD_SIBLING_INIT; | ||
192 | sd->span = cpu_sibling_map[i]; | ||
193 | cpus_and(sd->span, sd->span, cpu_default_map); | ||
194 | sd->parent = p; | ||
195 | sd->groups = &sched_group_cpus[group]; | ||
196 | #endif | ||
197 | } | ||
198 | |||
199 | #ifdef CONFIG_SCHED_SMT | ||
200 | /* Set up CPU (sibling) groups */ | ||
201 | for_each_cpu_mask(i, cpu_default_map) { | ||
202 | cpumask_t this_sibling_map = cpu_sibling_map[i]; | ||
203 | cpus_and(this_sibling_map, this_sibling_map, cpu_default_map); | ||
204 | if (i != first_cpu(this_sibling_map)) | ||
205 | continue; | ||
206 | |||
207 | init_sched_build_groups(sched_group_cpus, this_sibling_map, | ||
208 | &cpu_to_cpu_group); | ||
209 | } | ||
210 | #endif | ||
211 | |||
212 | /* Set up physical groups */ | ||
213 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
214 | cpumask_t nodemask = node_to_cpumask(i); | ||
215 | |||
216 | cpus_and(nodemask, nodemask, cpu_default_map); | ||
217 | if (cpus_empty(nodemask)) | ||
218 | continue; | ||
219 | |||
220 | init_sched_build_groups(sched_group_phys, nodemask, | ||
221 | &cpu_to_phys_group); | ||
222 | } | ||
223 | |||
224 | #ifdef CONFIG_NUMA | ||
225 | init_sched_build_groups(sched_group_allnodes, cpu_default_map, | ||
226 | &cpu_to_allnodes_group); | ||
227 | |||
228 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
229 | /* Set up node groups */ | ||
230 | struct sched_group *sg, *prev; | ||
231 | cpumask_t nodemask = node_to_cpumask(i); | ||
232 | cpumask_t domainspan; | ||
233 | cpumask_t covered = CPU_MASK_NONE; | ||
234 | int j; | ||
235 | |||
236 | cpus_and(nodemask, nodemask, cpu_default_map); | ||
237 | if (cpus_empty(nodemask)) | ||
238 | continue; | ||
239 | |||
240 | domainspan = sched_domain_node_span(i); | ||
241 | cpus_and(domainspan, domainspan, cpu_default_map); | ||
242 | |||
243 | sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); | ||
244 | sched_group_nodes[i] = sg; | ||
245 | for_each_cpu_mask(j, nodemask) { | ||
246 | struct sched_domain *sd; | ||
247 | sd = &per_cpu(node_domains, j); | ||
248 | sd->groups = sg; | ||
249 | if (sd->groups == NULL) { | ||
250 | /* Turn off balancing if we have no groups */ | ||
251 | sd->flags = 0; | ||
252 | } | ||
253 | } | ||
254 | if (!sg) { | ||
255 | printk(KERN_WARNING | ||
256 | "Can not alloc domain group for node %d\n", i); | ||
257 | continue; | ||
258 | } | ||
259 | sg->cpu_power = 0; | ||
260 | sg->cpumask = nodemask; | ||
261 | cpus_or(covered, covered, nodemask); | ||
262 | prev = sg; | ||
263 | |||
264 | for (j = 0; j < MAX_NUMNODES; j++) { | ||
265 | cpumask_t tmp, notcovered; | ||
266 | int n = (i + j) % MAX_NUMNODES; | ||
267 | |||
268 | cpus_complement(notcovered, covered); | ||
269 | cpus_and(tmp, notcovered, cpu_default_map); | ||
270 | cpus_and(tmp, tmp, domainspan); | ||
271 | if (cpus_empty(tmp)) | ||
272 | break; | ||
273 | |||
274 | nodemask = node_to_cpumask(n); | ||
275 | cpus_and(tmp, tmp, nodemask); | ||
276 | if (cpus_empty(tmp)) | ||
277 | continue; | ||
278 | |||
279 | sg = kmalloc(sizeof(struct sched_group), GFP_KERNEL); | ||
280 | if (!sg) { | ||
281 | printk(KERN_WARNING | ||
282 | "Can not alloc domain group for node %d\n", j); | ||
283 | break; | ||
284 | } | ||
285 | sg->cpu_power = 0; | ||
286 | sg->cpumask = tmp; | ||
287 | cpus_or(covered, covered, tmp); | ||
288 | prev->next = sg; | ||
289 | prev = sg; | ||
290 | } | ||
291 | prev->next = sched_group_nodes[i]; | ||
292 | } | ||
293 | #endif | ||
294 | |||
295 | /* Calculate CPU power for physical packages and nodes */ | ||
296 | for_each_cpu_mask(i, cpu_default_map) { | ||
297 | int power; | ||
298 | struct sched_domain *sd; | ||
299 | #ifdef CONFIG_SCHED_SMT | ||
300 | sd = &per_cpu(cpu_domains, i); | ||
301 | power = SCHED_LOAD_SCALE; | ||
302 | sd->groups->cpu_power = power; | ||
303 | #endif | ||
304 | |||
305 | sd = &per_cpu(phys_domains, i); | ||
306 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
307 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
308 | sd->groups->cpu_power = power; | ||
309 | |||
310 | #ifdef CONFIG_NUMA | ||
311 | sd = &per_cpu(allnodes_domains, i); | ||
312 | if (sd->groups) { | ||
313 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
314 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
315 | sd->groups->cpu_power = power; | ||
316 | } | ||
317 | #endif | ||
318 | } | ||
319 | |||
320 | #ifdef CONFIG_NUMA | ||
321 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
322 | struct sched_group *sg = sched_group_nodes[i]; | ||
323 | int j; | ||
324 | |||
325 | if (sg == NULL) | ||
326 | continue; | ||
327 | next_sg: | ||
328 | for_each_cpu_mask(j, sg->cpumask) { | ||
329 | struct sched_domain *sd; | ||
330 | int power; | ||
331 | |||
332 | sd = &per_cpu(phys_domains, j); | ||
333 | if (j != first_cpu(sd->groups->cpumask)) { | ||
334 | /* | ||
335 | * Only add "power" once for each | ||
336 | * physical package. | ||
337 | */ | ||
338 | continue; | ||
339 | } | ||
340 | power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE * | ||
341 | (cpus_weight(sd->groups->cpumask)-1) / 10; | ||
342 | |||
343 | sg->cpu_power += power; | ||
344 | } | ||
345 | sg = sg->next; | ||
346 | if (sg != sched_group_nodes[i]) | ||
347 | goto next_sg; | ||
348 | } | ||
349 | #endif | ||
350 | |||
351 | /* Attach the domains */ | ||
352 | for_each_online_cpu(i) { | ||
353 | struct sched_domain *sd; | ||
354 | #ifdef CONFIG_SCHED_SMT | ||
355 | sd = &per_cpu(cpu_domains, i); | ||
356 | #else | ||
357 | sd = &per_cpu(phys_domains, i); | ||
358 | #endif | ||
359 | cpu_attach_domain(sd, i); | ||
360 | } | ||
361 | } | ||
362 | |||
363 | void __devinit arch_destroy_sched_domains(void) | ||
364 | { | ||
365 | #ifdef CONFIG_NUMA | ||
366 | int i; | ||
367 | for (i = 0; i < MAX_NUMNODES; i++) { | ||
368 | struct sched_group *oldsg, *sg = sched_group_nodes[i]; | ||
369 | if (sg == NULL) | ||
370 | continue; | ||
371 | sg = sg->next; | ||
372 | next_sg: | ||
373 | oldsg = sg; | ||
374 | sg = sg->next; | ||
375 | kfree(oldsg); | ||
376 | if (oldsg != sched_group_nodes[i]) | ||
377 | goto next_sg; | ||
378 | sched_group_nodes[i] = NULL; | ||
379 | } | ||
380 | #endif | ||
381 | } | ||
382 | |||
diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c new file mode 100644 index 000000000000..4a3b1aac43e7 --- /dev/null +++ b/arch/ia64/kernel/efi.c | |||
@@ -0,0 +1,832 @@ | |||
1 | /* | ||
2 | * Extensible Firmware Interface | ||
3 | * | ||
4 | * Based on Extensible Firmware Interface Specification version 0.9 April 30, 1999 | ||
5 | * | ||
6 | * Copyright (C) 1999 VA Linux Systems | ||
7 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
8 | * Copyright (C) 1999-2003 Hewlett-Packard Co. | ||
9 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
10 | * Stephane Eranian <eranian@hpl.hp.com> | ||
11 | * | ||
12 | * All EFI Runtime Services are not implemented yet as EFI only | ||
13 | * supports physical mode addressing on SoftSDV. This is to be fixed | ||
14 | * in a future version. --drummond 1999-07-20 | ||
15 | * | ||
16 | * Implemented EFI runtime services and virtual mode calls. --davidm | ||
17 | * | ||
18 | * Goutham Rao: <goutham.rao@intel.com> | ||
19 | * Skip non-WB memory and ignore empty memory ranges. | ||
20 | */ | ||
21 | #include <linux/config.h> | ||
22 | #include <linux/module.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/time.h> | ||
27 | #include <linux/efi.h> | ||
28 | |||
29 | #include <asm/io.h> | ||
30 | #include <asm/kregs.h> | ||
31 | #include <asm/meminit.h> | ||
32 | #include <asm/pgtable.h> | ||
33 | #include <asm/processor.h> | ||
34 | #include <asm/mca.h> | ||
35 | |||
36 | #define EFI_DEBUG 0 | ||
37 | |||
38 | extern efi_status_t efi_call_phys (void *, ...); | ||
39 | |||
40 | struct efi efi; | ||
41 | EXPORT_SYMBOL(efi); | ||
42 | static efi_runtime_services_t *runtime; | ||
43 | static unsigned long mem_limit = ~0UL, max_addr = ~0UL; | ||
44 | |||
45 | #define efi_call_virt(f, args...) (*(f))(args) | ||
46 | |||
47 | #define STUB_GET_TIME(prefix, adjust_arg) \ | ||
48 | static efi_status_t \ | ||
49 | prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ | ||
50 | { \ | ||
51 | struct ia64_fpreg fr[6]; \ | ||
52 | efi_time_cap_t *atc = NULL; \ | ||
53 | efi_status_t ret; \ | ||
54 | \ | ||
55 | if (tc) \ | ||
56 | atc = adjust_arg(tc); \ | ||
57 | ia64_save_scratch_fpregs(fr); \ | ||
58 | ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), adjust_arg(tm), atc); \ | ||
59 | ia64_load_scratch_fpregs(fr); \ | ||
60 | return ret; \ | ||
61 | } | ||
62 | |||
63 | #define STUB_SET_TIME(prefix, adjust_arg) \ | ||
64 | static efi_status_t \ | ||
65 | prefix##_set_time (efi_time_t *tm) \ | ||
66 | { \ | ||
67 | struct ia64_fpreg fr[6]; \ | ||
68 | efi_status_t ret; \ | ||
69 | \ | ||
70 | ia64_save_scratch_fpregs(fr); \ | ||
71 | ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), adjust_arg(tm)); \ | ||
72 | ia64_load_scratch_fpregs(fr); \ | ||
73 | return ret; \ | ||
74 | } | ||
75 | |||
76 | #define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \ | ||
77 | static efi_status_t \ | ||
78 | prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, efi_time_t *tm) \ | ||
79 | { \ | ||
80 | struct ia64_fpreg fr[6]; \ | ||
81 | efi_status_t ret; \ | ||
82 | \ | ||
83 | ia64_save_scratch_fpregs(fr); \ | ||
84 | ret = efi_call_##prefix((efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \ | ||
85 | adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \ | ||
86 | ia64_load_scratch_fpregs(fr); \ | ||
87 | return ret; \ | ||
88 | } | ||
89 | |||
90 | #define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \ | ||
91 | static efi_status_t \ | ||
92 | prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \ | ||
93 | { \ | ||
94 | struct ia64_fpreg fr[6]; \ | ||
95 | efi_time_t *atm = NULL; \ | ||
96 | efi_status_t ret; \ | ||
97 | \ | ||
98 | if (tm) \ | ||
99 | atm = adjust_arg(tm); \ | ||
100 | ia64_save_scratch_fpregs(fr); \ | ||
101 | ret = efi_call_##prefix((efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \ | ||
102 | enabled, atm); \ | ||
103 | ia64_load_scratch_fpregs(fr); \ | ||
104 | return ret; \ | ||
105 | } | ||
106 | |||
107 | #define STUB_GET_VARIABLE(prefix, adjust_arg) \ | ||
108 | static efi_status_t \ | ||
109 | prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \ | ||
110 | unsigned long *data_size, void *data) \ | ||
111 | { \ | ||
112 | struct ia64_fpreg fr[6]; \ | ||
113 | u32 *aattr = NULL; \ | ||
114 | efi_status_t ret; \ | ||
115 | \ | ||
116 | if (attr) \ | ||
117 | aattr = adjust_arg(attr); \ | ||
118 | ia64_save_scratch_fpregs(fr); \ | ||
119 | ret = efi_call_##prefix((efi_get_variable_t *) __va(runtime->get_variable), \ | ||
120 | adjust_arg(name), adjust_arg(vendor), aattr, \ | ||
121 | adjust_arg(data_size), adjust_arg(data)); \ | ||
122 | ia64_load_scratch_fpregs(fr); \ | ||
123 | return ret; \ | ||
124 | } | ||
125 | |||
126 | #define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \ | ||
127 | static efi_status_t \ | ||
128 | prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, efi_guid_t *vendor) \ | ||
129 | { \ | ||
130 | struct ia64_fpreg fr[6]; \ | ||
131 | efi_status_t ret; \ | ||
132 | \ | ||
133 | ia64_save_scratch_fpregs(fr); \ | ||
134 | ret = efi_call_##prefix((efi_get_next_variable_t *) __va(runtime->get_next_variable), \ | ||
135 | adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \ | ||
136 | ia64_load_scratch_fpregs(fr); \ | ||
137 | return ret; \ | ||
138 | } | ||
139 | |||
140 | #define STUB_SET_VARIABLE(prefix, adjust_arg) \ | ||
141 | static efi_status_t \ | ||
142 | prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, unsigned long attr, \ | ||
143 | unsigned long data_size, void *data) \ | ||
144 | { \ | ||
145 | struct ia64_fpreg fr[6]; \ | ||
146 | efi_status_t ret; \ | ||
147 | \ | ||
148 | ia64_save_scratch_fpregs(fr); \ | ||
149 | ret = efi_call_##prefix((efi_set_variable_t *) __va(runtime->set_variable), \ | ||
150 | adjust_arg(name), adjust_arg(vendor), attr, data_size, \ | ||
151 | adjust_arg(data)); \ | ||
152 | ia64_load_scratch_fpregs(fr); \ | ||
153 | return ret; \ | ||
154 | } | ||
155 | |||
156 | #define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \ | ||
157 | static efi_status_t \ | ||
158 | prefix##_get_next_high_mono_count (u32 *count) \ | ||
159 | { \ | ||
160 | struct ia64_fpreg fr[6]; \ | ||
161 | efi_status_t ret; \ | ||
162 | \ | ||
163 | ia64_save_scratch_fpregs(fr); \ | ||
164 | ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \ | ||
165 | __va(runtime->get_next_high_mono_count), adjust_arg(count)); \ | ||
166 | ia64_load_scratch_fpregs(fr); \ | ||
167 | return ret; \ | ||
168 | } | ||
169 | |||
170 | #define STUB_RESET_SYSTEM(prefix, adjust_arg) \ | ||
171 | static void \ | ||
172 | prefix##_reset_system (int reset_type, efi_status_t status, \ | ||
173 | unsigned long data_size, efi_char16_t *data) \ | ||
174 | { \ | ||
175 | struct ia64_fpreg fr[6]; \ | ||
176 | efi_char16_t *adata = NULL; \ | ||
177 | \ | ||
178 | if (data) \ | ||
179 | adata = adjust_arg(data); \ | ||
180 | \ | ||
181 | ia64_save_scratch_fpregs(fr); \ | ||
182 | efi_call_##prefix((efi_reset_system_t *) __va(runtime->reset_system), \ | ||
183 | reset_type, status, data_size, adata); \ | ||
184 | /* should not return, but just in case... */ \ | ||
185 | ia64_load_scratch_fpregs(fr); \ | ||
186 | } | ||
187 | |||
188 | #define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg)) | ||
189 | |||
190 | STUB_GET_TIME(phys, phys_ptr) | ||
191 | STUB_SET_TIME(phys, phys_ptr) | ||
192 | STUB_GET_WAKEUP_TIME(phys, phys_ptr) | ||
193 | STUB_SET_WAKEUP_TIME(phys, phys_ptr) | ||
194 | STUB_GET_VARIABLE(phys, phys_ptr) | ||
195 | STUB_GET_NEXT_VARIABLE(phys, phys_ptr) | ||
196 | STUB_SET_VARIABLE(phys, phys_ptr) | ||
197 | STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr) | ||
198 | STUB_RESET_SYSTEM(phys, phys_ptr) | ||
199 | |||
200 | #define id(arg) arg | ||
201 | |||
202 | STUB_GET_TIME(virt, id) | ||
203 | STUB_SET_TIME(virt, id) | ||
204 | STUB_GET_WAKEUP_TIME(virt, id) | ||
205 | STUB_SET_WAKEUP_TIME(virt, id) | ||
206 | STUB_GET_VARIABLE(virt, id) | ||
207 | STUB_GET_NEXT_VARIABLE(virt, id) | ||
208 | STUB_SET_VARIABLE(virt, id) | ||
209 | STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id) | ||
210 | STUB_RESET_SYSTEM(virt, id) | ||
211 | |||
212 | void | ||
213 | efi_gettimeofday (struct timespec *ts) | ||
214 | { | ||
215 | efi_time_t tm; | ||
216 | |||
217 | memset(ts, 0, sizeof(ts)); | ||
218 | if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) | ||
219 | return; | ||
220 | |||
221 | ts->tv_sec = mktime(tm.year, tm.month, tm.day, tm.hour, tm.minute, tm.second); | ||
222 | ts->tv_nsec = tm.nanosecond; | ||
223 | } | ||
224 | |||
225 | static int | ||
226 | is_available_memory (efi_memory_desc_t *md) | ||
227 | { | ||
228 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
229 | return 0; | ||
230 | |||
231 | switch (md->type) { | ||
232 | case EFI_LOADER_CODE: | ||
233 | case EFI_LOADER_DATA: | ||
234 | case EFI_BOOT_SERVICES_CODE: | ||
235 | case EFI_BOOT_SERVICES_DATA: | ||
236 | case EFI_CONVENTIONAL_MEMORY: | ||
237 | return 1; | ||
238 | } | ||
239 | return 0; | ||
240 | } | ||
241 | |||
242 | /* | ||
243 | * Trim descriptor MD so its starts at address START_ADDR. If the descriptor covers | ||
244 | * memory that is normally available to the kernel, issue a warning that some memory | ||
245 | * is being ignored. | ||
246 | */ | ||
247 | static void | ||
248 | trim_bottom (efi_memory_desc_t *md, u64 start_addr) | ||
249 | { | ||
250 | u64 num_skipped_pages; | ||
251 | |||
252 | if (md->phys_addr >= start_addr || !md->num_pages) | ||
253 | return; | ||
254 | |||
255 | num_skipped_pages = (start_addr - md->phys_addr) >> EFI_PAGE_SHIFT; | ||
256 | if (num_skipped_pages > md->num_pages) | ||
257 | num_skipped_pages = md->num_pages; | ||
258 | |||
259 | if (is_available_memory(md)) | ||
260 | printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " | ||
261 | "at 0x%lx\n", __FUNCTION__, | ||
262 | (num_skipped_pages << EFI_PAGE_SHIFT) >> 10, | ||
263 | md->phys_addr, start_addr - IA64_GRANULE_SIZE); | ||
264 | /* | ||
265 | * NOTE: Don't set md->phys_addr to START_ADDR because that could cause the memory | ||
266 | * descriptor list to become unsorted. In such a case, md->num_pages will be | ||
267 | * zero, so the Right Thing will happen. | ||
268 | */ | ||
269 | md->phys_addr += num_skipped_pages << EFI_PAGE_SHIFT; | ||
270 | md->num_pages -= num_skipped_pages; | ||
271 | } | ||
272 | |||
273 | static void | ||
274 | trim_top (efi_memory_desc_t *md, u64 end_addr) | ||
275 | { | ||
276 | u64 num_dropped_pages, md_end_addr; | ||
277 | |||
278 | md_end_addr = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); | ||
279 | |||
280 | if (md_end_addr <= end_addr || !md->num_pages) | ||
281 | return; | ||
282 | |||
283 | num_dropped_pages = (md_end_addr - end_addr) >> EFI_PAGE_SHIFT; | ||
284 | if (num_dropped_pages > md->num_pages) | ||
285 | num_dropped_pages = md->num_pages; | ||
286 | |||
287 | if (is_available_memory(md)) | ||
288 | printk(KERN_NOTICE "efi.%s: ignoring %luKB of memory at 0x%lx due to granule hole " | ||
289 | "at 0x%lx\n", __FUNCTION__, | ||
290 | (num_dropped_pages << EFI_PAGE_SHIFT) >> 10, | ||
291 | md->phys_addr, end_addr); | ||
292 | md->num_pages -= num_dropped_pages; | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * Walks the EFI memory map and calls CALLBACK once for each EFI memory descriptor that | ||
297 | * has memory that is available for OS use. | ||
298 | */ | ||
299 | void | ||
300 | efi_memmap_walk (efi_freemem_callback_t callback, void *arg) | ||
301 | { | ||
302 | int prev_valid = 0; | ||
303 | struct range { | ||
304 | u64 start; | ||
305 | u64 end; | ||
306 | } prev, curr; | ||
307 | void *efi_map_start, *efi_map_end, *p, *q; | ||
308 | efi_memory_desc_t *md, *check_md; | ||
309 | u64 efi_desc_size, start, end, granule_addr, last_granule_addr, first_non_wb_addr = 0; | ||
310 | unsigned long total_mem = 0; | ||
311 | |||
312 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
313 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
314 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
315 | |||
316 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
317 | md = p; | ||
318 | |||
319 | /* skip over non-WB memory descriptors; that's all we're interested in... */ | ||
320 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
321 | continue; | ||
322 | |||
323 | /* | ||
324 | * granule_addr is the base of md's first granule. | ||
325 | * [granule_addr - first_non_wb_addr) is guaranteed to | ||
326 | * be contiguous WB memory. | ||
327 | */ | ||
328 | granule_addr = GRANULEROUNDDOWN(md->phys_addr); | ||
329 | first_non_wb_addr = max(first_non_wb_addr, granule_addr); | ||
330 | |||
331 | if (first_non_wb_addr < md->phys_addr) { | ||
332 | trim_bottom(md, granule_addr + IA64_GRANULE_SIZE); | ||
333 | granule_addr = GRANULEROUNDDOWN(md->phys_addr); | ||
334 | first_non_wb_addr = max(first_non_wb_addr, granule_addr); | ||
335 | } | ||
336 | |||
337 | for (q = p; q < efi_map_end; q += efi_desc_size) { | ||
338 | check_md = q; | ||
339 | |||
340 | if ((check_md->attribute & EFI_MEMORY_WB) && | ||
341 | (check_md->phys_addr == first_non_wb_addr)) | ||
342 | first_non_wb_addr += check_md->num_pages << EFI_PAGE_SHIFT; | ||
343 | else | ||
344 | break; /* non-WB or hole */ | ||
345 | } | ||
346 | |||
347 | last_granule_addr = GRANULEROUNDDOWN(first_non_wb_addr); | ||
348 | if (last_granule_addr < md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) | ||
349 | trim_top(md, last_granule_addr); | ||
350 | |||
351 | if (is_available_memory(md)) { | ||
352 | if (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) >= max_addr) { | ||
353 | if (md->phys_addr >= max_addr) | ||
354 | continue; | ||
355 | md->num_pages = (max_addr - md->phys_addr) >> EFI_PAGE_SHIFT; | ||
356 | first_non_wb_addr = max_addr; | ||
357 | } | ||
358 | |||
359 | if (total_mem >= mem_limit) | ||
360 | continue; | ||
361 | |||
362 | if (total_mem + (md->num_pages << EFI_PAGE_SHIFT) > mem_limit) { | ||
363 | unsigned long limit_addr = md->phys_addr; | ||
364 | |||
365 | limit_addr += mem_limit - total_mem; | ||
366 | limit_addr = GRANULEROUNDDOWN(limit_addr); | ||
367 | |||
368 | if (md->phys_addr > limit_addr) | ||
369 | continue; | ||
370 | |||
371 | md->num_pages = (limit_addr - md->phys_addr) >> | ||
372 | EFI_PAGE_SHIFT; | ||
373 | first_non_wb_addr = max_addr = md->phys_addr + | ||
374 | (md->num_pages << EFI_PAGE_SHIFT); | ||
375 | } | ||
376 | total_mem += (md->num_pages << EFI_PAGE_SHIFT); | ||
377 | |||
378 | if (md->num_pages == 0) | ||
379 | continue; | ||
380 | |||
381 | curr.start = PAGE_OFFSET + md->phys_addr; | ||
382 | curr.end = curr.start + (md->num_pages << EFI_PAGE_SHIFT); | ||
383 | |||
384 | if (!prev_valid) { | ||
385 | prev = curr; | ||
386 | prev_valid = 1; | ||
387 | } else { | ||
388 | if (curr.start < prev.start) | ||
389 | printk(KERN_ERR "Oops: EFI memory table not ordered!\n"); | ||
390 | |||
391 | if (prev.end == curr.start) { | ||
392 | /* merge two consecutive memory ranges */ | ||
393 | prev.end = curr.end; | ||
394 | } else { | ||
395 | start = PAGE_ALIGN(prev.start); | ||
396 | end = prev.end & PAGE_MASK; | ||
397 | if ((end > start) && (*callback)(start, end, arg) < 0) | ||
398 | return; | ||
399 | prev = curr; | ||
400 | } | ||
401 | } | ||
402 | } | ||
403 | } | ||
404 | if (prev_valid) { | ||
405 | start = PAGE_ALIGN(prev.start); | ||
406 | end = prev.end & PAGE_MASK; | ||
407 | if (end > start) | ||
408 | (*callback)(start, end, arg); | ||
409 | } | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * Look for the PAL_CODE region reported by EFI and maps it using an | ||
414 | * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor | ||
415 | * Abstraction Layer chapter 11 in ADAG | ||
416 | */ | ||
417 | |||
418 | void * | ||
419 | efi_get_pal_addr (void) | ||
420 | { | ||
421 | void *efi_map_start, *efi_map_end, *p; | ||
422 | efi_memory_desc_t *md; | ||
423 | u64 efi_desc_size; | ||
424 | int pal_code_count = 0; | ||
425 | u64 vaddr, mask; | ||
426 | |||
427 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
428 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
429 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
430 | |||
431 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
432 | md = p; | ||
433 | if (md->type != EFI_PAL_CODE) | ||
434 | continue; | ||
435 | |||
436 | if (++pal_code_count > 1) { | ||
437 | printk(KERN_ERR "Too many EFI Pal Code memory ranges, dropped @ %lx\n", | ||
438 | md->phys_addr); | ||
439 | continue; | ||
440 | } | ||
441 | /* | ||
442 | * The only ITLB entry in region 7 that is used is the one installed by | ||
443 | * __start(). That entry covers a 64MB range. | ||
444 | */ | ||
445 | mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1); | ||
446 | vaddr = PAGE_OFFSET + md->phys_addr; | ||
447 | |||
448 | /* | ||
449 | * We must check that the PAL mapping won't overlap with the kernel | ||
450 | * mapping. | ||
451 | * | ||
452 | * PAL code is guaranteed to be aligned on a power of 2 between 4k and | ||
453 | * 256KB and that only one ITR is needed to map it. This implies that the | ||
454 | * PAL code is always aligned on its size, i.e., the closest matching page | ||
455 | * size supported by the TLB. Therefore PAL code is guaranteed never to | ||
456 | * cross a 64MB unless it is bigger than 64MB (very unlikely!). So for | ||
457 | * now the following test is enough to determine whether or not we need a | ||
458 | * dedicated ITR for the PAL code. | ||
459 | */ | ||
460 | if ((vaddr & mask) == (KERNEL_START & mask)) { | ||
461 | printk(KERN_INFO "%s: no need to install ITR for PAL code\n", | ||
462 | __FUNCTION__); | ||
463 | continue; | ||
464 | } | ||
465 | |||
466 | if (md->num_pages << EFI_PAGE_SHIFT > IA64_GRANULE_SIZE) | ||
467 | panic("Woah! PAL code size bigger than a granule!"); | ||
468 | |||
469 | #if EFI_DEBUG | ||
470 | mask = ~((1 << IA64_GRANULE_SHIFT) - 1); | ||
471 | |||
472 | printk(KERN_INFO "CPU %d: mapping PAL code [0x%lx-0x%lx) into [0x%lx-0x%lx)\n", | ||
473 | smp_processor_id(), md->phys_addr, | ||
474 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | ||
475 | vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE); | ||
476 | #endif | ||
477 | return __va(md->phys_addr); | ||
478 | } | ||
479 | printk(KERN_WARNING "%s: no PAL-code memory-descriptor found", | ||
480 | __FUNCTION__); | ||
481 | return NULL; | ||
482 | } | ||
483 | |||
484 | void | ||
485 | efi_map_pal_code (void) | ||
486 | { | ||
487 | void *pal_vaddr = efi_get_pal_addr (); | ||
488 | u64 psr; | ||
489 | |||
490 | if (!pal_vaddr) | ||
491 | return; | ||
492 | |||
493 | /* | ||
494 | * Cannot write to CRx with PSR.ic=1 | ||
495 | */ | ||
496 | psr = ia64_clear_ic(); | ||
497 | ia64_itr(0x1, IA64_TR_PALCODE, GRANULEROUNDDOWN((unsigned long) pal_vaddr), | ||
498 | pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), | ||
499 | IA64_GRANULE_SHIFT); | ||
500 | ia64_set_psr(psr); /* restore psr */ | ||
501 | ia64_srlz_i(); | ||
502 | } | ||
503 | |||
504 | void __init | ||
505 | efi_init (void) | ||
506 | { | ||
507 | void *efi_map_start, *efi_map_end; | ||
508 | efi_config_table_t *config_tables; | ||
509 | efi_char16_t *c16; | ||
510 | u64 efi_desc_size; | ||
511 | char *cp, *end, vendor[100] = "unknown"; | ||
512 | extern char saved_command_line[]; | ||
513 | int i; | ||
514 | |||
515 | /* it's too early to be able to use the standard kernel command line support... */ | ||
516 | for (cp = saved_command_line; *cp; ) { | ||
517 | if (memcmp(cp, "mem=", 4) == 0) { | ||
518 | cp += 4; | ||
519 | mem_limit = memparse(cp, &end); | ||
520 | if (end != cp) | ||
521 | break; | ||
522 | cp = end; | ||
523 | } else if (memcmp(cp, "max_addr=", 9) == 0) { | ||
524 | cp += 9; | ||
525 | max_addr = GRANULEROUNDDOWN(memparse(cp, &end)); | ||
526 | if (end != cp) | ||
527 | break; | ||
528 | cp = end; | ||
529 | } else { | ||
530 | while (*cp != ' ' && *cp) | ||
531 | ++cp; | ||
532 | while (*cp == ' ') | ||
533 | ++cp; | ||
534 | } | ||
535 | } | ||
536 | if (max_addr != ~0UL) | ||
537 | printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20); | ||
538 | |||
539 | efi.systab = __va(ia64_boot_param->efi_systab); | ||
540 | |||
541 | /* | ||
542 | * Verify the EFI Table | ||
543 | */ | ||
544 | if (efi.systab == NULL) | ||
545 | panic("Woah! Can't find EFI system table.\n"); | ||
546 | if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | ||
547 | panic("Woah! EFI system table signature incorrect\n"); | ||
548 | if ((efi.systab->hdr.revision ^ EFI_SYSTEM_TABLE_REVISION) >> 16 != 0) | ||
549 | printk(KERN_WARNING "Warning: EFI system table major version mismatch: " | ||
550 | "got %d.%02d, expected %d.%02d\n", | ||
551 | efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, | ||
552 | EFI_SYSTEM_TABLE_REVISION >> 16, EFI_SYSTEM_TABLE_REVISION & 0xffff); | ||
553 | |||
554 | config_tables = __va(efi.systab->tables); | ||
555 | |||
556 | /* Show what we know for posterity */ | ||
557 | c16 = __va(efi.systab->fw_vendor); | ||
558 | if (c16) { | ||
559 | for (i = 0;i < (int) sizeof(vendor) && *c16; ++i) | ||
560 | vendor[i] = *c16++; | ||
561 | vendor[i] = '\0'; | ||
562 | } | ||
563 | |||
564 | printk(KERN_INFO "EFI v%u.%.02u by %s:", | ||
565 | efi.systab->hdr.revision >> 16, efi.systab->hdr.revision & 0xffff, vendor); | ||
566 | |||
567 | for (i = 0; i < (int) efi.systab->nr_tables; i++) { | ||
568 | if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { | ||
569 | efi.mps = __va(config_tables[i].table); | ||
570 | printk(" MPS=0x%lx", config_tables[i].table); | ||
571 | } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { | ||
572 | efi.acpi20 = __va(config_tables[i].table); | ||
573 | printk(" ACPI 2.0=0x%lx", config_tables[i].table); | ||
574 | } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { | ||
575 | efi.acpi = __va(config_tables[i].table); | ||
576 | printk(" ACPI=0x%lx", config_tables[i].table); | ||
577 | } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { | ||
578 | efi.smbios = __va(config_tables[i].table); | ||
579 | printk(" SMBIOS=0x%lx", config_tables[i].table); | ||
580 | } else if (efi_guidcmp(config_tables[i].guid, SAL_SYSTEM_TABLE_GUID) == 0) { | ||
581 | efi.sal_systab = __va(config_tables[i].table); | ||
582 | printk(" SALsystab=0x%lx", config_tables[i].table); | ||
583 | } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { | ||
584 | efi.hcdp = __va(config_tables[i].table); | ||
585 | printk(" HCDP=0x%lx", config_tables[i].table); | ||
586 | } | ||
587 | } | ||
588 | printk("\n"); | ||
589 | |||
590 | runtime = __va(efi.systab->runtime); | ||
591 | efi.get_time = phys_get_time; | ||
592 | efi.set_time = phys_set_time; | ||
593 | efi.get_wakeup_time = phys_get_wakeup_time; | ||
594 | efi.set_wakeup_time = phys_set_wakeup_time; | ||
595 | efi.get_variable = phys_get_variable; | ||
596 | efi.get_next_variable = phys_get_next_variable; | ||
597 | efi.set_variable = phys_set_variable; | ||
598 | efi.get_next_high_mono_count = phys_get_next_high_mono_count; | ||
599 | efi.reset_system = phys_reset_system; | ||
600 | |||
601 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
602 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
603 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
604 | |||
605 | #if EFI_DEBUG | ||
606 | /* print EFI memory map: */ | ||
607 | { | ||
608 | efi_memory_desc_t *md; | ||
609 | void *p; | ||
610 | |||
611 | for (i = 0, p = efi_map_start; p < efi_map_end; ++i, p += efi_desc_size) { | ||
612 | md = p; | ||
613 | printk("mem%02u: type=%u, attr=0x%lx, range=[0x%016lx-0x%016lx) (%luMB)\n", | ||
614 | i, md->type, md->attribute, md->phys_addr, | ||
615 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | ||
616 | md->num_pages >> (20 - EFI_PAGE_SHIFT)); | ||
617 | } | ||
618 | } | ||
619 | #endif | ||
620 | |||
621 | efi_map_pal_code(); | ||
622 | efi_enter_virtual_mode(); | ||
623 | } | ||
624 | |||
625 | void | ||
626 | efi_enter_virtual_mode (void) | ||
627 | { | ||
628 | void *efi_map_start, *efi_map_end, *p; | ||
629 | efi_memory_desc_t *md; | ||
630 | efi_status_t status; | ||
631 | u64 efi_desc_size; | ||
632 | |||
633 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
634 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
635 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
636 | |||
637 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
638 | md = p; | ||
639 | if (md->attribute & EFI_MEMORY_RUNTIME) { | ||
640 | /* | ||
641 | * Some descriptors have multiple bits set, so the order of | ||
642 | * the tests is relevant. | ||
643 | */ | ||
644 | if (md->attribute & EFI_MEMORY_WB) { | ||
645 | md->virt_addr = (u64) __va(md->phys_addr); | ||
646 | } else if (md->attribute & EFI_MEMORY_UC) { | ||
647 | md->virt_addr = (u64) ioremap(md->phys_addr, 0); | ||
648 | } else if (md->attribute & EFI_MEMORY_WC) { | ||
649 | #if 0 | ||
650 | md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P | ||
651 | | _PAGE_D | ||
652 | | _PAGE_MA_WC | ||
653 | | _PAGE_PL_0 | ||
654 | | _PAGE_AR_RW)); | ||
655 | #else | ||
656 | printk(KERN_INFO "EFI_MEMORY_WC mapping\n"); | ||
657 | md->virt_addr = (u64) ioremap(md->phys_addr, 0); | ||
658 | #endif | ||
659 | } else if (md->attribute & EFI_MEMORY_WT) { | ||
660 | #if 0 | ||
661 | md->virt_addr = ia64_remap(md->phys_addr, (_PAGE_A | _PAGE_P | ||
662 | | _PAGE_D | _PAGE_MA_WT | ||
663 | | _PAGE_PL_0 | ||
664 | | _PAGE_AR_RW)); | ||
665 | #else | ||
666 | printk(KERN_INFO "EFI_MEMORY_WT mapping\n"); | ||
667 | md->virt_addr = (u64) ioremap(md->phys_addr, 0); | ||
668 | #endif | ||
669 | } | ||
670 | } | ||
671 | } | ||
672 | |||
673 | status = efi_call_phys(__va(runtime->set_virtual_address_map), | ||
674 | ia64_boot_param->efi_memmap_size, | ||
675 | efi_desc_size, ia64_boot_param->efi_memdesc_version, | ||
676 | ia64_boot_param->efi_memmap); | ||
677 | if (status != EFI_SUCCESS) { | ||
678 | printk(KERN_WARNING "warning: unable to switch EFI into virtual mode " | ||
679 | "(status=%lu)\n", status); | ||
680 | return; | ||
681 | } | ||
682 | |||
683 | /* | ||
684 | * Now that EFI is in virtual mode, we call the EFI functions more efficiently: | ||
685 | */ | ||
686 | efi.get_time = virt_get_time; | ||
687 | efi.set_time = virt_set_time; | ||
688 | efi.get_wakeup_time = virt_get_wakeup_time; | ||
689 | efi.set_wakeup_time = virt_set_wakeup_time; | ||
690 | efi.get_variable = virt_get_variable; | ||
691 | efi.get_next_variable = virt_get_next_variable; | ||
692 | efi.set_variable = virt_set_variable; | ||
693 | efi.get_next_high_mono_count = virt_get_next_high_mono_count; | ||
694 | efi.reset_system = virt_reset_system; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Walk the EFI memory map looking for the I/O port range. There can only be one entry of | ||
699 | * this type, other I/O port ranges should be described via ACPI. | ||
700 | */ | ||
701 | u64 | ||
702 | efi_get_iobase (void) | ||
703 | { | ||
704 | void *efi_map_start, *efi_map_end, *p; | ||
705 | efi_memory_desc_t *md; | ||
706 | u64 efi_desc_size; | ||
707 | |||
708 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
709 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
710 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
711 | |||
712 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
713 | md = p; | ||
714 | if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { | ||
715 | if (md->attribute & EFI_MEMORY_UC) | ||
716 | return md->phys_addr; | ||
717 | } | ||
718 | } | ||
719 | return 0; | ||
720 | } | ||
721 | |||
722 | u32 | ||
723 | efi_mem_type (unsigned long phys_addr) | ||
724 | { | ||
725 | void *efi_map_start, *efi_map_end, *p; | ||
726 | efi_memory_desc_t *md; | ||
727 | u64 efi_desc_size; | ||
728 | |||
729 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
730 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
731 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
732 | |||
733 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
734 | md = p; | ||
735 | |||
736 | if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) | ||
737 | return md->type; | ||
738 | } | ||
739 | return 0; | ||
740 | } | ||
741 | |||
742 | u64 | ||
743 | efi_mem_attributes (unsigned long phys_addr) | ||
744 | { | ||
745 | void *efi_map_start, *efi_map_end, *p; | ||
746 | efi_memory_desc_t *md; | ||
747 | u64 efi_desc_size; | ||
748 | |||
749 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
750 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
751 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
752 | |||
753 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
754 | md = p; | ||
755 | |||
756 | if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) | ||
757 | return md->attribute; | ||
758 | } | ||
759 | return 0; | ||
760 | } | ||
761 | EXPORT_SYMBOL(efi_mem_attributes); | ||
762 | |||
763 | int | ||
764 | valid_phys_addr_range (unsigned long phys_addr, unsigned long *size) | ||
765 | { | ||
766 | void *efi_map_start, *efi_map_end, *p; | ||
767 | efi_memory_desc_t *md; | ||
768 | u64 efi_desc_size; | ||
769 | |||
770 | efi_map_start = __va(ia64_boot_param->efi_memmap); | ||
771 | efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; | ||
772 | efi_desc_size = ia64_boot_param->efi_memdesc_size; | ||
773 | |||
774 | for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { | ||
775 | md = p; | ||
776 | |||
777 | if (phys_addr - md->phys_addr < (md->num_pages << EFI_PAGE_SHIFT)) { | ||
778 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
779 | return 0; | ||
780 | |||
781 | if (*size > md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr) | ||
782 | *size = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT) - phys_addr; | ||
783 | return 1; | ||
784 | } | ||
785 | } | ||
786 | return 0; | ||
787 | } | ||
788 | |||
789 | int __init | ||
790 | efi_uart_console_only(void) | ||
791 | { | ||
792 | efi_status_t status; | ||
793 | char *s, name[] = "ConOut"; | ||
794 | efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID; | ||
795 | efi_char16_t *utf16, name_utf16[32]; | ||
796 | unsigned char data[1024]; | ||
797 | unsigned long size = sizeof(data); | ||
798 | struct efi_generic_dev_path *hdr, *end_addr; | ||
799 | int uart = 0; | ||
800 | |||
801 | /* Convert to UTF-16 */ | ||
802 | utf16 = name_utf16; | ||
803 | s = name; | ||
804 | while (*s) | ||
805 | *utf16++ = *s++ & 0x7f; | ||
806 | *utf16 = 0; | ||
807 | |||
808 | status = efi.get_variable(name_utf16, &guid, NULL, &size, data); | ||
809 | if (status != EFI_SUCCESS) { | ||
810 | printk(KERN_ERR "No EFI %s variable?\n", name); | ||
811 | return 0; | ||
812 | } | ||
813 | |||
814 | hdr = (struct efi_generic_dev_path *) data; | ||
815 | end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size); | ||
816 | while (hdr < end_addr) { | ||
817 | if (hdr->type == EFI_DEV_MSG && | ||
818 | hdr->sub_type == EFI_DEV_MSG_UART) | ||
819 | uart = 1; | ||
820 | else if (hdr->type == EFI_DEV_END_PATH || | ||
821 | hdr->type == EFI_DEV_END_PATH2) { | ||
822 | if (!uart) | ||
823 | return 0; | ||
824 | if (hdr->sub_type == EFI_DEV_END_ENTIRE) | ||
825 | return 1; | ||
826 | uart = 0; | ||
827 | } | ||
828 | hdr = (struct efi_generic_dev_path *) ((u8 *) hdr + hdr->length); | ||
829 | } | ||
830 | printk(KERN_ERR "Malformed %s value\n", name); | ||
831 | return 0; | ||
832 | } | ||
diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S new file mode 100644 index 000000000000..5a7fe70212a9 --- /dev/null +++ b/arch/ia64/kernel/efi_stub.S | |||
@@ -0,0 +1,86 @@ | |||
1 | /* | ||
2 | * EFI call stub. | ||
3 | * | ||
4 | * Copyright (C) 1999-2001 Hewlett-Packard Co | ||
5 | * David Mosberger <davidm@hpl.hp.com> | ||
6 | * | ||
7 | * This stub allows us to make EFI calls in physical mode with interrupts | ||
8 | * turned off. We need this because we can't call SetVirtualMap() until | ||
9 | * the kernel has booted far enough to allow allocation of struct vma_struct | ||
10 | * entries (which we would need to map stuff with memory attributes other | ||
11 | * than uncached or writeback...). Since the GetTime() service gets called | ||
12 | * earlier than that, we need to be able to make physical mode EFI calls from | ||
13 | * the kernel. | ||
14 | */ | ||
15 | |||
16 | /* | ||
17 | * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System | ||
18 | * Abstraction Layer Specification", revision 2.6e). Note that | ||
19 | * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. | ||
20 | * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call | ||
21 | * (the br.ia instruction fails unless psr.dfl and psr.dfh are | ||
22 | * cleared). Fortunately, SAL promises not to touch the floating | ||
23 | * point regs, so at least we don't have to save f2-f127. | ||
24 | */ | ||
25 | #define PSR_BITS_TO_CLEAR \ | ||
26 | (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ | ||
27 | IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ | ||
28 | IA64_PSR_DFL | IA64_PSR_DFH) | ||
29 | |||
30 | #define PSR_BITS_TO_SET \ | ||
31 | (IA64_PSR_BN) | ||
32 | |||
33 | #include <asm/processor.h> | ||
34 | #include <asm/asmmacro.h> | ||
35 | |||
36 | /* | ||
37 | * Inputs: | ||
38 | * in0 = address of function descriptor of EFI routine to call | ||
39 | * in1..in7 = arguments to routine | ||
40 | * | ||
41 | * Outputs: | ||
42 | * r8 = EFI_STATUS returned by called function | ||
43 | */ | ||
44 | |||
45 | GLOBAL_ENTRY(efi_call_phys) | ||
46 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) | ||
47 | alloc loc1=ar.pfs,8,7,7,0 | ||
48 | ld8 r2=[in0],8 // load EFI function's entry point | ||
49 | mov loc0=rp | ||
50 | .body | ||
51 | ;; | ||
52 | mov loc2=gp // save global pointer | ||
53 | mov loc4=ar.rsc // save RSE configuration | ||
54 | mov ar.rsc=0 // put RSE in enforced lazy, LE mode | ||
55 | ;; | ||
56 | ld8 gp=[in0] // load EFI function's global pointer | ||
57 | movl r16=PSR_BITS_TO_CLEAR | ||
58 | mov loc3=psr // save processor status word | ||
59 | movl r17=PSR_BITS_TO_SET | ||
60 | ;; | ||
61 | or loc3=loc3,r17 | ||
62 | mov b6=r2 | ||
63 | ;; | ||
64 | andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared | ||
65 | br.call.sptk.many rp=ia64_switch_mode_phys | ||
66 | .ret0: mov out4=in5 | ||
67 | mov out0=in1 | ||
68 | mov out1=in2 | ||
69 | mov out2=in3 | ||
70 | mov out3=in4 | ||
71 | mov out5=in6 | ||
72 | mov out6=in7 | ||
73 | mov loc5=r19 | ||
74 | mov loc6=r20 | ||
75 | br.call.sptk.many rp=b6 // call the EFI function | ||
76 | .ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode | ||
77 | mov r16=loc3 | ||
78 | mov r19=loc5 | ||
79 | mov r20=loc6 | ||
80 | br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode | ||
81 | .ret2: mov ar.rsc=loc4 // restore RSE configuration | ||
82 | mov ar.pfs=loc1 | ||
83 | mov rp=loc0 | ||
84 | mov gp=loc2 | ||
85 | br.ret.sptk.many rp | ||
86 | END(efi_call_phys) | ||
diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S new file mode 100644 index 000000000000..0272c010a3ba --- /dev/null +++ b/arch/ia64/kernel/entry.S | |||
@@ -0,0 +1,1587 @@ | |||
1 | /* | ||
2 | * ia64/kernel/entry.S | ||
3 | * | ||
4 | * Kernel entry points. | ||
5 | * | ||
6 | * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co | ||
7 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
8 | * Copyright (C) 1999, 2002-2003 | ||
9 | * Asit Mallick <Asit.K.Mallick@intel.com> | ||
10 | * Don Dugger <Don.Dugger@intel.com> | ||
11 | * Suresh Siddha <suresh.b.siddha@intel.com> | ||
12 | * Fenghua Yu <fenghua.yu@intel.com> | ||
13 | * Copyright (C) 1999 VA Linux Systems | ||
14 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
15 | */ | ||
16 | /* | ||
17 | * ia64_switch_to now places correct virtual mapping in in TR2 for | ||
18 | * kernel stack. This allows us to handle interrupts without changing | ||
19 | * to physical mode. | ||
20 | * | ||
21 | * Jonathan Nicklin <nicklin@missioncriticallinux.com> | ||
22 | * Patrick O'Rourke <orourke@missioncriticallinux.com> | ||
23 | * 11/07/2000 | ||
24 | */ | ||
25 | /* | ||
26 | * Global (preserved) predicate usage on syscall entry/exit path: | ||
27 | * | ||
28 | * pKStk: See entry.h. | ||
29 | * pUStk: See entry.h. | ||
30 | * pSys: See entry.h. | ||
31 | * pNonSys: !pSys | ||
32 | */ | ||
33 | |||
34 | #include <linux/config.h> | ||
35 | |||
36 | #include <asm/asmmacro.h> | ||
37 | #include <asm/cache.h> | ||
38 | #include <asm/errno.h> | ||
39 | #include <asm/kregs.h> | ||
40 | #include <asm/offsets.h> | ||
41 | #include <asm/pgtable.h> | ||
42 | #include <asm/percpu.h> | ||
43 | #include <asm/processor.h> | ||
44 | #include <asm/thread_info.h> | ||
45 | #include <asm/unistd.h> | ||
46 | |||
47 | #include "minstate.h" | ||
48 | |||
49 | /* | ||
50 | * execve() is special because in case of success, we need to | ||
51 | * setup a null register window frame. | ||
52 | */ | ||
53 | ENTRY(ia64_execve) | ||
54 | /* | ||
55 | * Allocate 8 input registers since ptrace() may clobber them | ||
56 | */ | ||
57 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) | ||
58 | alloc loc1=ar.pfs,8,2,4,0 | ||
59 | mov loc0=rp | ||
60 | .body | ||
61 | mov out0=in0 // filename | ||
62 | ;; // stop bit between alloc and call | ||
63 | mov out1=in1 // argv | ||
64 | mov out2=in2 // envp | ||
65 | add out3=16,sp // regs | ||
66 | br.call.sptk.many rp=sys_execve | ||
67 | .ret0: | ||
68 | #ifdef CONFIG_IA32_SUPPORT | ||
69 | /* | ||
70 | * Check if we're returning to ia32 mode. If so, we need to restore ia32 registers | ||
71 | * from pt_regs. | ||
72 | */ | ||
73 | adds r16=PT(CR_IPSR)+16,sp | ||
74 | ;; | ||
75 | ld8 r16=[r16] | ||
76 | #endif | ||
77 | cmp4.ge p6,p7=r8,r0 | ||
78 | mov ar.pfs=loc1 // restore ar.pfs | ||
79 | sxt4 r8=r8 // return 64-bit result | ||
80 | ;; | ||
81 | stf.spill [sp]=f0 | ||
82 | (p6) cmp.ne pKStk,pUStk=r0,r0 // a successful execve() lands us in user-mode... | ||
83 | mov rp=loc0 | ||
84 | (p6) mov ar.pfs=r0 // clear ar.pfs on success | ||
85 | (p7) br.ret.sptk.many rp | ||
86 | |||
87 | /* | ||
88 | * In theory, we'd have to zap this state only to prevent leaking of | ||
89 | * security sensitive state (e.g., if current->mm->dumpable is zero). However, | ||
90 | * this executes in less than 20 cycles even on Itanium, so it's not worth | ||
91 | * optimizing for...). | ||
92 | */ | ||
93 | mov ar.unat=0; mov ar.lc=0 | ||
94 | mov r4=0; mov f2=f0; mov b1=r0 | ||
95 | mov r5=0; mov f3=f0; mov b2=r0 | ||
96 | mov r6=0; mov f4=f0; mov b3=r0 | ||
97 | mov r7=0; mov f5=f0; mov b4=r0 | ||
98 | ldf.fill f12=[sp]; mov f13=f0; mov b5=r0 | ||
99 | ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0 | ||
100 | ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0 | ||
101 | ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0 | ||
102 | ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0 | ||
103 | ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0 | ||
104 | ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0 | ||
105 | #ifdef CONFIG_IA32_SUPPORT | ||
106 | tbit.nz p6,p0=r16, IA64_PSR_IS_BIT | ||
107 | movl loc0=ia64_ret_from_ia32_execve | ||
108 | ;; | ||
109 | (p6) mov rp=loc0 | ||
110 | #endif | ||
111 | br.ret.sptk.many rp | ||
112 | END(ia64_execve) | ||
113 | |||
114 | /* | ||
115 | * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr, | ||
116 | * u64 tls) | ||
117 | */ | ||
118 | GLOBAL_ENTRY(sys_clone2) | ||
119 | /* | ||
120 | * Allocate 8 input registers since ptrace() may clobber them | ||
121 | */ | ||
122 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) | ||
123 | alloc r16=ar.pfs,8,2,6,0 | ||
124 | DO_SAVE_SWITCH_STACK | ||
125 | adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp | ||
126 | mov loc0=rp | ||
127 | mov loc1=r16 // save ar.pfs across do_fork | ||
128 | .body | ||
129 | mov out1=in1 | ||
130 | mov out3=in2 | ||
131 | tbit.nz p6,p0=in0,CLONE_SETTLS_BIT | ||
132 | mov out4=in3 // parent_tidptr: valid only w/CLONE_PARENT_SETTID | ||
133 | ;; | ||
134 | (p6) st8 [r2]=in5 // store TLS in r16 for copy_thread() | ||
135 | mov out5=in4 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID | ||
136 | adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s | ||
137 | mov out0=in0 // out0 = clone_flags | ||
138 | br.call.sptk.many rp=do_fork | ||
139 | .ret1: .restore sp | ||
140 | adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack | ||
141 | mov ar.pfs=loc1 | ||
142 | mov rp=loc0 | ||
143 | br.ret.sptk.many rp | ||
144 | END(sys_clone2) | ||
145 | |||
146 | /* | ||
147 | * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls) | ||
148 | * Deprecated. Use sys_clone2() instead. | ||
149 | */ | ||
150 | GLOBAL_ENTRY(sys_clone) | ||
151 | /* | ||
152 | * Allocate 8 input registers since ptrace() may clobber them | ||
153 | */ | ||
154 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) | ||
155 | alloc r16=ar.pfs,8,2,6,0 | ||
156 | DO_SAVE_SWITCH_STACK | ||
157 | adds r2=PT(R16)+IA64_SWITCH_STACK_SIZE+16,sp | ||
158 | mov loc0=rp | ||
159 | mov loc1=r16 // save ar.pfs across do_fork | ||
160 | .body | ||
161 | mov out1=in1 | ||
162 | mov out3=16 // stacksize (compensates for 16-byte scratch area) | ||
163 | tbit.nz p6,p0=in0,CLONE_SETTLS_BIT | ||
164 | mov out4=in2 // parent_tidptr: valid only w/CLONE_PARENT_SETTID | ||
165 | ;; | ||
166 | (p6) st8 [r2]=in4 // store TLS in r13 (tp) | ||
167 | mov out5=in3 // child_tidptr: valid only w/CLONE_CHILD_SETTID or CLONE_CHILD_CLEARTID | ||
168 | adds out2=IA64_SWITCH_STACK_SIZE+16,sp // out2 = ®s | ||
169 | mov out0=in0 // out0 = clone_flags | ||
170 | br.call.sptk.many rp=do_fork | ||
171 | .ret2: .restore sp | ||
172 | adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack | ||
173 | mov ar.pfs=loc1 | ||
174 | mov rp=loc0 | ||
175 | br.ret.sptk.many rp | ||
176 | END(sys_clone) | ||
177 | |||
178 | /* | ||
179 | * prev_task <- ia64_switch_to(struct task_struct *next) | ||
180 | * With Ingo's new scheduler, interrupts are disabled when this routine gets | ||
181 | * called. The code starting at .map relies on this. The rest of the code | ||
182 | * doesn't care about the interrupt masking status. | ||
183 | */ | ||
184 | GLOBAL_ENTRY(ia64_switch_to) | ||
185 | .prologue | ||
186 | alloc r16=ar.pfs,1,0,0,0 | ||
187 | DO_SAVE_SWITCH_STACK | ||
188 | .body | ||
189 | |||
190 | adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 | ||
191 | movl r25=init_task | ||
192 | mov r27=IA64_KR(CURRENT_STACK) | ||
193 | adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 | ||
194 | dep r20=0,in0,61,3 // physical address of "next" | ||
195 | ;; | ||
196 | st8 [r22]=sp // save kernel stack pointer of old task | ||
197 | shr.u r26=r20,IA64_GRANULE_SHIFT | ||
198 | cmp.eq p7,p6=r25,in0 | ||
199 | ;; | ||
200 | /* | ||
201 | * If we've already mapped this task's page, we can skip doing it again. | ||
202 | */ | ||
203 | (p6) cmp.eq p7,p6=r26,r27 | ||
204 | (p6) br.cond.dpnt .map | ||
205 | ;; | ||
206 | .done: | ||
207 | (p6) ssm psr.ic // if we had to map, reenable the psr.ic bit FIRST!!! | ||
208 | ;; | ||
209 | (p6) srlz.d | ||
210 | ld8 sp=[r21] // load kernel stack pointer of new task | ||
211 | mov IA64_KR(CURRENT)=in0 // update "current" application register | ||
212 | mov r8=r13 // return pointer to previously running task | ||
213 | mov r13=in0 // set "current" pointer | ||
214 | ;; | ||
215 | DO_LOAD_SWITCH_STACK | ||
216 | |||
217 | #ifdef CONFIG_SMP | ||
218 | sync.i // ensure "fc"s done by this CPU are visible on other CPUs | ||
219 | #endif | ||
220 | br.ret.sptk.many rp // boogie on out in new context | ||
221 | |||
222 | .map: | ||
223 | rsm psr.ic // interrupts (psr.i) are already disabled here | ||
224 | movl r25=PAGE_KERNEL | ||
225 | ;; | ||
226 | srlz.d | ||
227 | or r23=r25,r20 // construct PA | page properties | ||
228 | mov r25=IA64_GRANULE_SHIFT<<2 | ||
229 | ;; | ||
230 | mov cr.itir=r25 | ||
231 | mov cr.ifa=in0 // VA of next task... | ||
232 | ;; | ||
233 | mov r25=IA64_TR_CURRENT_STACK | ||
234 | mov IA64_KR(CURRENT_STACK)=r26 // remember last page we mapped... | ||
235 | ;; | ||
236 | itr.d dtr[r25]=r23 // wire in new mapping... | ||
237 | br.cond.sptk .done | ||
238 | END(ia64_switch_to) | ||
239 | |||
240 | /* | ||
241 | * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This | ||
242 | * means that we may get an interrupt with "sp" pointing to the new kernel stack while | ||
243 | * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc, | ||
244 | * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a | ||
245 | * problem. Also, we don't need to specify unwind information for preserved registers | ||
246 | * that are not modified in save_switch_stack as the right unwind information is already | ||
247 | * specified at the call-site of save_switch_stack. | ||
248 | */ | ||
249 | |||
250 | /* | ||
251 | * save_switch_stack: | ||
252 | * - r16 holds ar.pfs | ||
253 | * - b7 holds address to return to | ||
254 | * - rp (b0) holds return address to save | ||
255 | */ | ||
256 | GLOBAL_ENTRY(save_switch_stack) | ||
257 | .prologue | ||
258 | .altrp b7 | ||
259 | flushrs // flush dirty regs to backing store (must be first in insn group) | ||
260 | .save @priunat,r17 | ||
261 | mov r17=ar.unat // preserve caller's | ||
262 | .body | ||
263 | #ifdef CONFIG_ITANIUM | ||
264 | adds r2=16+128,sp | ||
265 | adds r3=16+64,sp | ||
266 | adds r14=SW(R4)+16,sp | ||
267 | ;; | ||
268 | st8.spill [r14]=r4,16 // spill r4 | ||
269 | lfetch.fault.excl.nt1 [r3],128 | ||
270 | ;; | ||
271 | lfetch.fault.excl.nt1 [r2],128 | ||
272 | lfetch.fault.excl.nt1 [r3],128 | ||
273 | ;; | ||
274 | lfetch.fault.excl [r2] | ||
275 | lfetch.fault.excl [r3] | ||
276 | adds r15=SW(R5)+16,sp | ||
277 | #else | ||
278 | add r2=16+3*128,sp | ||
279 | add r3=16,sp | ||
280 | add r14=SW(R4)+16,sp | ||
281 | ;; | ||
282 | st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0 | ||
283 | lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010 | ||
284 | ;; | ||
285 | lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090 | ||
286 | lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190 | ||
287 | ;; | ||
288 | lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110 | ||
289 | lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210 | ||
290 | adds r15=SW(R5)+16,sp | ||
291 | #endif | ||
292 | ;; | ||
293 | st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5 | ||
294 | mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0 | ||
295 | add r2=SW(F2)+16,sp // r2 = &sw->f2 | ||
296 | ;; | ||
297 | st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6 | ||
298 | mov.m r18=ar.fpsr // preserve fpsr | ||
299 | add r3=SW(F3)+16,sp // r3 = &sw->f3 | ||
300 | ;; | ||
301 | stf.spill [r2]=f2,32 | ||
302 | mov.m r19=ar.rnat | ||
303 | mov r21=b0 | ||
304 | |||
305 | stf.spill [r3]=f3,32 | ||
306 | st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7 | ||
307 | mov r22=b1 | ||
308 | ;; | ||
309 | // since we're done with the spills, read and save ar.unat: | ||
310 | mov.m r29=ar.unat | ||
311 | mov.m r20=ar.bspstore | ||
312 | mov r23=b2 | ||
313 | stf.spill [r2]=f4,32 | ||
314 | stf.spill [r3]=f5,32 | ||
315 | mov r24=b3 | ||
316 | ;; | ||
317 | st8 [r14]=r21,SW(B1)-SW(B0) // save b0 | ||
318 | st8 [r15]=r23,SW(B3)-SW(B2) // save b2 | ||
319 | mov r25=b4 | ||
320 | mov r26=b5 | ||
321 | ;; | ||
322 | st8 [r14]=r22,SW(B4)-SW(B1) // save b1 | ||
323 | st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3 | ||
324 | mov r21=ar.lc // I-unit | ||
325 | stf.spill [r2]=f12,32 | ||
326 | stf.spill [r3]=f13,32 | ||
327 | ;; | ||
328 | st8 [r14]=r25,SW(B5)-SW(B4) // save b4 | ||
329 | st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs | ||
330 | stf.spill [r2]=f14,32 | ||
331 | stf.spill [r3]=f15,32 | ||
332 | ;; | ||
333 | st8 [r14]=r26 // save b5 | ||
334 | st8 [r15]=r21 // save ar.lc | ||
335 | stf.spill [r2]=f16,32 | ||
336 | stf.spill [r3]=f17,32 | ||
337 | ;; | ||
338 | stf.spill [r2]=f18,32 | ||
339 | stf.spill [r3]=f19,32 | ||
340 | ;; | ||
341 | stf.spill [r2]=f20,32 | ||
342 | stf.spill [r3]=f21,32 | ||
343 | ;; | ||
344 | stf.spill [r2]=f22,32 | ||
345 | stf.spill [r3]=f23,32 | ||
346 | ;; | ||
347 | stf.spill [r2]=f24,32 | ||
348 | stf.spill [r3]=f25,32 | ||
349 | ;; | ||
350 | stf.spill [r2]=f26,32 | ||
351 | stf.spill [r3]=f27,32 | ||
352 | ;; | ||
353 | stf.spill [r2]=f28,32 | ||
354 | stf.spill [r3]=f29,32 | ||
355 | ;; | ||
356 | stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30) | ||
357 | stf.spill [r3]=f31,SW(PR)-SW(F31) | ||
358 | add r14=SW(CALLER_UNAT)+16,sp | ||
359 | ;; | ||
360 | st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat | ||
361 | st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat | ||
362 | mov r21=pr | ||
363 | ;; | ||
364 | st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat | ||
365 | st8 [r3]=r21 // save predicate registers | ||
366 | ;; | ||
367 | st8 [r2]=r20 // save ar.bspstore | ||
368 | st8 [r14]=r18 // save fpsr | ||
369 | mov ar.rsc=3 // put RSE back into eager mode, pl 0 | ||
370 | br.cond.sptk.many b7 | ||
371 | END(save_switch_stack) | ||
372 | |||
373 | /* | ||
374 | * load_switch_stack: | ||
375 | * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK) | ||
376 | * - b7 holds address to return to | ||
377 | * - must not touch r8-r11 | ||
378 | */ | ||
379 | ENTRY(load_switch_stack) | ||
380 | .prologue | ||
381 | .altrp b7 | ||
382 | |||
383 | .body | ||
384 | lfetch.fault.nt1 [sp] | ||
385 | adds r2=SW(AR_BSPSTORE)+16,sp | ||
386 | adds r3=SW(AR_UNAT)+16,sp | ||
387 | mov ar.rsc=0 // put RSE into enforced lazy mode | ||
388 | adds r14=SW(CALLER_UNAT)+16,sp | ||
389 | adds r15=SW(AR_FPSR)+16,sp | ||
390 | ;; | ||
391 | ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore | ||
392 | ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat | ||
393 | ;; | ||
394 | ld8 r21=[r2],16 // restore b0 | ||
395 | ld8 r22=[r3],16 // restore b1 | ||
396 | ;; | ||
397 | ld8 r23=[r2],16 // restore b2 | ||
398 | ld8 r24=[r3],16 // restore b3 | ||
399 | ;; | ||
400 | ld8 r25=[r2],16 // restore b4 | ||
401 | ld8 r26=[r3],16 // restore b5 | ||
402 | ;; | ||
403 | ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs | ||
404 | ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc | ||
405 | ;; | ||
406 | ld8 r28=[r2] // restore pr | ||
407 | ld8 r30=[r3] // restore rnat | ||
408 | ;; | ||
409 | ld8 r18=[r14],16 // restore caller's unat | ||
410 | ld8 r19=[r15],24 // restore fpsr | ||
411 | ;; | ||
412 | ldf.fill f2=[r14],32 | ||
413 | ldf.fill f3=[r15],32 | ||
414 | ;; | ||
415 | ldf.fill f4=[r14],32 | ||
416 | ldf.fill f5=[r15],32 | ||
417 | ;; | ||
418 | ldf.fill f12=[r14],32 | ||
419 | ldf.fill f13=[r15],32 | ||
420 | ;; | ||
421 | ldf.fill f14=[r14],32 | ||
422 | ldf.fill f15=[r15],32 | ||
423 | ;; | ||
424 | ldf.fill f16=[r14],32 | ||
425 | ldf.fill f17=[r15],32 | ||
426 | ;; | ||
427 | ldf.fill f18=[r14],32 | ||
428 | ldf.fill f19=[r15],32 | ||
429 | mov b0=r21 | ||
430 | ;; | ||
431 | ldf.fill f20=[r14],32 | ||
432 | ldf.fill f21=[r15],32 | ||
433 | mov b1=r22 | ||
434 | ;; | ||
435 | ldf.fill f22=[r14],32 | ||
436 | ldf.fill f23=[r15],32 | ||
437 | mov b2=r23 | ||
438 | ;; | ||
439 | mov ar.bspstore=r27 | ||
440 | mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7 | ||
441 | mov b3=r24 | ||
442 | ;; | ||
443 | ldf.fill f24=[r14],32 | ||
444 | ldf.fill f25=[r15],32 | ||
445 | mov b4=r25 | ||
446 | ;; | ||
447 | ldf.fill f26=[r14],32 | ||
448 | ldf.fill f27=[r15],32 | ||
449 | mov b5=r26 | ||
450 | ;; | ||
451 | ldf.fill f28=[r14],32 | ||
452 | ldf.fill f29=[r15],32 | ||
453 | mov ar.pfs=r16 | ||
454 | ;; | ||
455 | ldf.fill f30=[r14],32 | ||
456 | ldf.fill f31=[r15],24 | ||
457 | mov ar.lc=r17 | ||
458 | ;; | ||
459 | ld8.fill r4=[r14],16 | ||
460 | ld8.fill r5=[r15],16 | ||
461 | mov pr=r28,-1 | ||
462 | ;; | ||
463 | ld8.fill r6=[r14],16 | ||
464 | ld8.fill r7=[r15],16 | ||
465 | |||
466 | mov ar.unat=r18 // restore caller's unat | ||
467 | mov ar.rnat=r30 // must restore after bspstore but before rsc! | ||
468 | mov ar.fpsr=r19 // restore fpsr | ||
469 | mov ar.rsc=3 // put RSE back into eager mode, pl 0 | ||
470 | br.cond.sptk.many b7 | ||
471 | END(load_switch_stack) | ||
472 | |||
473 | GLOBAL_ENTRY(__ia64_syscall) | ||
474 | .regstk 6,0,0,0 | ||
475 | mov r15=in5 // put syscall number in place | ||
476 | break __BREAK_SYSCALL | ||
477 | movl r2=errno | ||
478 | cmp.eq p6,p7=-1,r10 | ||
479 | ;; | ||
480 | (p6) st4 [r2]=r8 | ||
481 | (p6) mov r8=-1 | ||
482 | br.ret.sptk.many rp | ||
483 | END(__ia64_syscall) | ||
484 | |||
485 | GLOBAL_ENTRY(execve) | ||
486 | mov r15=__NR_execve // put syscall number in place | ||
487 | break __BREAK_SYSCALL | ||
488 | br.ret.sptk.many rp | ||
489 | END(execve) | ||
490 | |||
491 | GLOBAL_ENTRY(clone) | ||
492 | mov r15=__NR_clone // put syscall number in place | ||
493 | break __BREAK_SYSCALL | ||
494 | br.ret.sptk.many rp | ||
495 | END(clone) | ||
496 | |||
497 | /* | ||
498 | * Invoke a system call, but do some tracing before and after the call. | ||
499 | * We MUST preserve the current register frame throughout this routine | ||
500 | * because some system calls (such as ia64_execve) directly | ||
501 | * manipulate ar.pfs. | ||
502 | */ | ||
503 | GLOBAL_ENTRY(ia64_trace_syscall) | ||
504 | PT_REGS_UNWIND_INFO(0) | ||
505 | /* | ||
506 | * We need to preserve the scratch registers f6-f11 in case the system | ||
507 | * call is sigreturn. | ||
508 | */ | ||
509 | adds r16=PT(F6)+16,sp | ||
510 | adds r17=PT(F7)+16,sp | ||
511 | ;; | ||
512 | stf.spill [r16]=f6,32 | ||
513 | stf.spill [r17]=f7,32 | ||
514 | ;; | ||
515 | stf.spill [r16]=f8,32 | ||
516 | stf.spill [r17]=f9,32 | ||
517 | ;; | ||
518 | stf.spill [r16]=f10 | ||
519 | stf.spill [r17]=f11 | ||
520 | br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args | ||
521 | adds r16=PT(F6)+16,sp | ||
522 | adds r17=PT(F7)+16,sp | ||
523 | ;; | ||
524 | ldf.fill f6=[r16],32 | ||
525 | ldf.fill f7=[r17],32 | ||
526 | ;; | ||
527 | ldf.fill f8=[r16],32 | ||
528 | ldf.fill f9=[r17],32 | ||
529 | ;; | ||
530 | ldf.fill f10=[r16] | ||
531 | ldf.fill f11=[r17] | ||
532 | // the syscall number may have changed, so re-load it and re-calculate the | ||
533 | // syscall entry-point: | ||
534 | adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #) | ||
535 | ;; | ||
536 | ld8 r15=[r15] | ||
537 | mov r3=NR_syscalls - 1 | ||
538 | ;; | ||
539 | adds r15=-1024,r15 | ||
540 | movl r16=sys_call_table | ||
541 | ;; | ||
542 | shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) | ||
543 | cmp.leu p6,p7=r15,r3 | ||
544 | ;; | ||
545 | (p6) ld8 r20=[r20] // load address of syscall entry point | ||
546 | (p7) movl r20=sys_ni_syscall | ||
547 | ;; | ||
548 | mov b6=r20 | ||
549 | br.call.sptk.many rp=b6 // do the syscall | ||
550 | .strace_check_retval: | ||
551 | cmp.lt p6,p0=r8,r0 // syscall failed? | ||
552 | adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 | ||
553 | adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 | ||
554 | mov r10=0 | ||
555 | (p6) br.cond.sptk strace_error // syscall failed -> | ||
556 | ;; // avoid RAW on r10 | ||
557 | .strace_save_retval: | ||
558 | .mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 | ||
559 | .mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 | ||
560 | br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value | ||
561 | .ret3: br.cond.sptk .work_pending_syscall_end | ||
562 | |||
563 | strace_error: | ||
564 | ld8 r3=[r2] // load pt_regs.r8 | ||
565 | sub r9=0,r8 // negate return value to get errno value | ||
566 | ;; | ||
567 | cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? | ||
568 | adds r3=16,r2 // r3=&pt_regs.r10 | ||
569 | ;; | ||
570 | (p6) mov r10=-1 | ||
571 | (p6) mov r8=r9 | ||
572 | br.cond.sptk .strace_save_retval | ||
573 | END(ia64_trace_syscall) | ||
574 | |||
575 | /* | ||
576 | * When traced and returning from sigreturn, we invoke syscall_trace but then | ||
577 | * go straight to ia64_leave_kernel rather than ia64_leave_syscall. | ||
578 | */ | ||
579 | GLOBAL_ENTRY(ia64_strace_leave_kernel) | ||
580 | PT_REGS_UNWIND_INFO(0) | ||
581 | { /* | ||
582 | * Some versions of gas generate bad unwind info if the first instruction of a | ||
583 | * procedure doesn't go into the first slot of a bundle. This is a workaround. | ||
584 | */ | ||
585 | nop.m 0 | ||
586 | nop.i 0 | ||
587 | br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value | ||
588 | } | ||
589 | .ret4: br.cond.sptk ia64_leave_kernel | ||
590 | END(ia64_strace_leave_kernel) | ||
591 | |||
592 | GLOBAL_ENTRY(ia64_ret_from_clone) | ||
593 | PT_REGS_UNWIND_INFO(0) | ||
594 | { /* | ||
595 | * Some versions of gas generate bad unwind info if the first instruction of a | ||
596 | * procedure doesn't go into the first slot of a bundle. This is a workaround. | ||
597 | */ | ||
598 | nop.m 0 | ||
599 | nop.i 0 | ||
600 | /* | ||
601 | * We need to call schedule_tail() to complete the scheduling process. | ||
602 | * Called by ia64_switch_to() after do_fork()->copy_thread(). r8 contains the | ||
603 | * address of the previously executing task. | ||
604 | */ | ||
605 | br.call.sptk.many rp=ia64_invoke_schedule_tail | ||
606 | } | ||
607 | .ret8: | ||
608 | adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 | ||
609 | ;; | ||
610 | ld4 r2=[r2] | ||
611 | ;; | ||
612 | mov r8=0 | ||
613 | and r2=_TIF_SYSCALL_TRACEAUDIT,r2 | ||
614 | ;; | ||
615 | cmp.ne p6,p0=r2,r0 | ||
616 | (p6) br.cond.spnt .strace_check_retval | ||
617 | ;; // added stop bits to prevent r8 dependency | ||
618 | END(ia64_ret_from_clone) | ||
619 | // fall through | ||
620 | GLOBAL_ENTRY(ia64_ret_from_syscall) | ||
621 | PT_REGS_UNWIND_INFO(0) | ||
622 | cmp.ge p6,p7=r8,r0 // syscall executed successfully? | ||
623 | adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 | ||
624 | mov r10=r0 // clear error indication in r10 | ||
625 | (p7) br.cond.spnt handle_syscall_error // handle potential syscall failure | ||
626 | END(ia64_ret_from_syscall) | ||
627 | // fall through | ||
628 | /* | ||
629 | * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't | ||
630 | * need to switch to bank 0 and doesn't restore the scratch registers. | ||
631 | * To avoid leaking kernel bits, the scratch registers are set to | ||
632 | * the following known-to-be-safe values: | ||
633 | * | ||
634 | * r1: restored (global pointer) | ||
635 | * r2: cleared | ||
636 | * r3: 1 (when returning to user-level) | ||
637 | * r8-r11: restored (syscall return value(s)) | ||
638 | * r12: restored (user-level stack pointer) | ||
639 | * r13: restored (user-level thread pointer) | ||
640 | * r14: cleared | ||
641 | * r15: restored (syscall #) | ||
642 | * r16-r17: cleared | ||
643 | * r18: user-level b6 | ||
644 | * r19: cleared | ||
645 | * r20: user-level ar.fpsr | ||
646 | * r21: user-level b0 | ||
647 | * r22: cleared | ||
648 | * r23: user-level ar.bspstore | ||
649 | * r24: user-level ar.rnat | ||
650 | * r25: user-level ar.unat | ||
651 | * r26: user-level ar.pfs | ||
652 | * r27: user-level ar.rsc | ||
653 | * r28: user-level ip | ||
654 | * r29: user-level psr | ||
655 | * r30: user-level cfm | ||
656 | * r31: user-level pr | ||
657 | * f6-f11: cleared | ||
658 | * pr: restored (user-level pr) | ||
659 | * b0: restored (user-level rp) | ||
660 | * b6: restored | ||
661 | * b7: cleared | ||
662 | * ar.unat: restored (user-level ar.unat) | ||
663 | * ar.pfs: restored (user-level ar.pfs) | ||
664 | * ar.rsc: restored (user-level ar.rsc) | ||
665 | * ar.rnat: restored (user-level ar.rnat) | ||
666 | * ar.bspstore: restored (user-level ar.bspstore) | ||
667 | * ar.fpsr: restored (user-level ar.fpsr) | ||
668 | * ar.ccv: cleared | ||
669 | * ar.csd: cleared | ||
670 | * ar.ssd: cleared | ||
671 | */ | ||
672 | ENTRY(ia64_leave_syscall) | ||
673 | PT_REGS_UNWIND_INFO(0) | ||
674 | /* | ||
675 | * work.need_resched etc. mustn't get changed by this CPU before it returns to | ||
676 | * user- or fsys-mode, hence we disable interrupts early on. | ||
677 | * | ||
678 | * p6 controls whether current_thread_info()->flags needs to be check for | ||
679 | * extra work. We always check for extra work when returning to user-level. | ||
680 | * With CONFIG_PREEMPT, we also check for extra work when the preempt_count | ||
681 | * is 0. After extra work processing has been completed, execution | ||
682 | * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check | ||
683 | * needs to be redone. | ||
684 | */ | ||
685 | #ifdef CONFIG_PREEMPT | ||
686 | rsm psr.i // disable interrupts | ||
687 | cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall | ||
688 | (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 | ||
689 | ;; | ||
690 | .pred.rel.mutex pUStk,pKStk | ||
691 | (pKStk) ld4 r21=[r20] // r21 <- preempt_count | ||
692 | (pUStk) mov r21=0 // r21 <- 0 | ||
693 | ;; | ||
694 | cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) | ||
695 | #else /* !CONFIG_PREEMPT */ | ||
696 | (pUStk) rsm psr.i | ||
697 | cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall | ||
698 | (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk | ||
699 | #endif | ||
700 | .work_processed_syscall: | ||
701 | adds r2=PT(LOADRS)+16,r12 | ||
702 | adds r3=PT(AR_BSPSTORE)+16,r12 | ||
703 | adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 | ||
704 | ;; | ||
705 | (p6) ld4 r31=[r18] // load current_thread_info()->flags | ||
706 | ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" | ||
707 | mov b7=r0 // clear b7 | ||
708 | ;; | ||
709 | ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) | ||
710 | ld8 r18=[r2],PT(R9)-PT(B6) // load b6 | ||
711 | (p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? | ||
712 | ;; | ||
713 | mov r16=ar.bsp // M2 get existing backing store pointer | ||
714 | (p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? | ||
715 | (p6) br.cond.spnt .work_pending_syscall | ||
716 | ;; | ||
717 | // start restoring the state saved on the kernel stack (struct pt_regs): | ||
718 | ld8 r9=[r2],PT(CR_IPSR)-PT(R9) | ||
719 | ld8 r11=[r3],PT(CR_IIP)-PT(R11) | ||
720 | mov f6=f0 // clear f6 | ||
721 | ;; | ||
722 | invala // M0|1 invalidate ALAT | ||
723 | rsm psr.i | psr.ic // M2 initiate turning off of interrupt and interruption collection | ||
724 | mov f9=f0 // clear f9 | ||
725 | |||
726 | ld8 r29=[r2],16 // load cr.ipsr | ||
727 | ld8 r28=[r3],16 // load cr.iip | ||
728 | mov f8=f0 // clear f8 | ||
729 | ;; | ||
730 | ld8 r30=[r2],16 // M0|1 load cr.ifs | ||
731 | mov.m ar.ssd=r0 // M2 clear ar.ssd | ||
732 | cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs | ||
733 | ;; | ||
734 | ld8 r25=[r3],16 // M0|1 load ar.unat | ||
735 | mov.m ar.csd=r0 // M2 clear ar.csd | ||
736 | mov r22=r0 // clear r22 | ||
737 | ;; | ||
738 | ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs | ||
739 | (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled | ||
740 | mov f10=f0 // clear f10 | ||
741 | ;; | ||
742 | ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // load b0 | ||
743 | ld8 r27=[r3],PT(PR)-PT(AR_RSC) // load ar.rsc | ||
744 | mov f11=f0 // clear f11 | ||
745 | ;; | ||
746 | ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // load ar.rnat (may be garbage) | ||
747 | ld8 r31=[r3],PT(R1)-PT(PR) // load predicates | ||
748 | (pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 | ||
749 | ;; | ||
750 | ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // load ar.fpsr | ||
751 | ld8.fill r1=[r3],16 // load r1 | ||
752 | (pUStk) mov r17=1 | ||
753 | ;; | ||
754 | srlz.d // M0 ensure interruption collection is off | ||
755 | ld8.fill r13=[r3],16 | ||
756 | mov f7=f0 // clear f7 | ||
757 | ;; | ||
758 | ld8.fill r12=[r2] // restore r12 (sp) | ||
759 | ld8.fill r15=[r3] // restore r15 | ||
760 | addl r3=THIS_CPU(ia64_phys_stacked_size_p8),r0 | ||
761 | ;; | ||
762 | (pUStk) ld4 r3=[r3] // r3 = cpu_data->phys_stacked_size_p8 | ||
763 | (pUStk) st1 [r14]=r17 | ||
764 | mov b6=r18 // I0 restore b6 | ||
765 | ;; | ||
766 | mov r14=r0 // clear r14 | ||
767 | shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition | ||
768 | (pKStk) br.cond.dpnt.many skip_rbs_switch | ||
769 | |||
770 | mov.m ar.ccv=r0 // clear ar.ccv | ||
771 | (pNonSys) br.cond.dpnt.many dont_preserve_current_frame | ||
772 | br.cond.sptk.many rbs_switch | ||
773 | END(ia64_leave_syscall) | ||
774 | |||
775 | #ifdef CONFIG_IA32_SUPPORT | ||
776 | GLOBAL_ENTRY(ia64_ret_from_ia32_execve) | ||
777 | PT_REGS_UNWIND_INFO(0) | ||
778 | adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 | ||
779 | adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 | ||
780 | ;; | ||
781 | .mem.offset 0,0 | ||
782 | st8.spill [r2]=r8 // store return value in slot for r8 and set unat bit | ||
783 | .mem.offset 8,0 | ||
784 | st8.spill [r3]=r0 // clear error indication in slot for r10 and set unat bit | ||
785 | END(ia64_ret_from_ia32_execve_syscall) | ||
786 | // fall through | ||
787 | #endif /* CONFIG_IA32_SUPPORT */ | ||
788 | GLOBAL_ENTRY(ia64_leave_kernel) | ||
789 | PT_REGS_UNWIND_INFO(0) | ||
790 | /* | ||
791 | * work.need_resched etc. mustn't get changed by this CPU before it returns to | ||
792 | * user- or fsys-mode, hence we disable interrupts early on. | ||
793 | * | ||
794 | * p6 controls whether current_thread_info()->flags needs to be check for | ||
795 | * extra work. We always check for extra work when returning to user-level. | ||
796 | * With CONFIG_PREEMPT, we also check for extra work when the preempt_count | ||
797 | * is 0. After extra work processing has been completed, execution | ||
798 | * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check | ||
799 | * needs to be redone. | ||
800 | */ | ||
801 | #ifdef CONFIG_PREEMPT | ||
802 | rsm psr.i // disable interrupts | ||
803 | cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel | ||
804 | (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 | ||
805 | ;; | ||
806 | .pred.rel.mutex pUStk,pKStk | ||
807 | (pKStk) ld4 r21=[r20] // r21 <- preempt_count | ||
808 | (pUStk) mov r21=0 // r21 <- 0 | ||
809 | ;; | ||
810 | cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) | ||
811 | #else | ||
812 | (pUStk) rsm psr.i | ||
813 | cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel | ||
814 | (pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk | ||
815 | #endif | ||
816 | .work_processed_kernel: | ||
817 | adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 | ||
818 | ;; | ||
819 | (p6) ld4 r31=[r17] // load current_thread_info()->flags | ||
820 | adds r21=PT(PR)+16,r12 | ||
821 | ;; | ||
822 | |||
823 | lfetch [r21],PT(CR_IPSR)-PT(PR) | ||
824 | adds r2=PT(B6)+16,r12 | ||
825 | adds r3=PT(R16)+16,r12 | ||
826 | ;; | ||
827 | lfetch [r21] | ||
828 | ld8 r28=[r2],8 // load b6 | ||
829 | adds r29=PT(R24)+16,r12 | ||
830 | |||
831 | ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) | ||
832 | adds r30=PT(AR_CCV)+16,r12 | ||
833 | (p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? | ||
834 | ;; | ||
835 | ld8.fill r24=[r29] | ||
836 | ld8 r15=[r30] // load ar.ccv | ||
837 | (p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? | ||
838 | ;; | ||
839 | ld8 r29=[r2],16 // load b7 | ||
840 | ld8 r30=[r3],16 // load ar.csd | ||
841 | (p6) br.cond.spnt .work_pending | ||
842 | ;; | ||
843 | ld8 r31=[r2],16 // load ar.ssd | ||
844 | ld8.fill r8=[r3],16 | ||
845 | ;; | ||
846 | ld8.fill r9=[r2],16 | ||
847 | ld8.fill r10=[r3],PT(R17)-PT(R10) | ||
848 | ;; | ||
849 | ld8.fill r11=[r2],PT(R18)-PT(R11) | ||
850 | ld8.fill r17=[r3],16 | ||
851 | ;; | ||
852 | ld8.fill r18=[r2],16 | ||
853 | ld8.fill r19=[r3],16 | ||
854 | ;; | ||
855 | ld8.fill r20=[r2],16 | ||
856 | ld8.fill r21=[r3],16 | ||
857 | mov ar.csd=r30 | ||
858 | mov ar.ssd=r31 | ||
859 | ;; | ||
860 | rsm psr.i | psr.ic // initiate turning off of interrupt and interruption collection | ||
861 | invala // invalidate ALAT | ||
862 | ;; | ||
863 | ld8.fill r22=[r2],24 | ||
864 | ld8.fill r23=[r3],24 | ||
865 | mov b6=r28 | ||
866 | ;; | ||
867 | ld8.fill r25=[r2],16 | ||
868 | ld8.fill r26=[r3],16 | ||
869 | mov b7=r29 | ||
870 | ;; | ||
871 | ld8.fill r27=[r2],16 | ||
872 | ld8.fill r28=[r3],16 | ||
873 | ;; | ||
874 | ld8.fill r29=[r2],16 | ||
875 | ld8.fill r30=[r3],24 | ||
876 | ;; | ||
877 | ld8.fill r31=[r2],PT(F9)-PT(R31) | ||
878 | adds r3=PT(F10)-PT(F6),r3 | ||
879 | ;; | ||
880 | ldf.fill f9=[r2],PT(F6)-PT(F9) | ||
881 | ldf.fill f10=[r3],PT(F8)-PT(F10) | ||
882 | ;; | ||
883 | ldf.fill f6=[r2],PT(F7)-PT(F6) | ||
884 | ;; | ||
885 | ldf.fill f7=[r2],PT(F11)-PT(F7) | ||
886 | ldf.fill f8=[r3],32 | ||
887 | ;; | ||
888 | srlz.i // ensure interruption collection is off | ||
889 | mov ar.ccv=r15 | ||
890 | ;; | ||
891 | ldf.fill f11=[r2] | ||
892 | bsw.0 // switch back to bank 0 (no stop bit required beforehand...) | ||
893 | ;; | ||
894 | (pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) | ||
895 | adds r16=PT(CR_IPSR)+16,r12 | ||
896 | adds r17=PT(CR_IIP)+16,r12 | ||
897 | |||
898 | (pKStk) mov r22=psr // M2 read PSR now that interrupts are disabled | ||
899 | nop.i 0 | ||
900 | nop.i 0 | ||
901 | ;; | ||
902 | ld8 r29=[r16],16 // load cr.ipsr | ||
903 | ld8 r28=[r17],16 // load cr.iip | ||
904 | ;; | ||
905 | ld8 r30=[r16],16 // load cr.ifs | ||
906 | ld8 r25=[r17],16 // load ar.unat | ||
907 | ;; | ||
908 | ld8 r26=[r16],16 // load ar.pfs | ||
909 | ld8 r27=[r17],16 // load ar.rsc | ||
910 | cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs | ||
911 | ;; | ||
912 | ld8 r24=[r16],16 // load ar.rnat (may be garbage) | ||
913 | ld8 r23=[r17],16 // load ar.bspstore (may be garbage) | ||
914 | ;; | ||
915 | ld8 r31=[r16],16 // load predicates | ||
916 | ld8 r21=[r17],16 // load b0 | ||
917 | ;; | ||
918 | ld8 r19=[r16],16 // load ar.rsc value for "loadrs" | ||
919 | ld8.fill r1=[r17],16 // load r1 | ||
920 | ;; | ||
921 | ld8.fill r12=[r16],16 | ||
922 | ld8.fill r13=[r17],16 | ||
923 | (pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 | ||
924 | ;; | ||
925 | ld8 r20=[r16],16 // ar.fpsr | ||
926 | ld8.fill r15=[r17],16 | ||
927 | ;; | ||
928 | ld8.fill r14=[r16],16 | ||
929 | ld8.fill r2=[r17] | ||
930 | (pUStk) mov r17=1 | ||
931 | ;; | ||
932 | ld8.fill r3=[r16] | ||
933 | (pUStk) st1 [r18]=r17 // restore current->thread.on_ustack | ||
934 | shr.u r18=r19,16 // get byte size of existing "dirty" partition | ||
935 | ;; | ||
936 | mov r16=ar.bsp // get existing backing store pointer | ||
937 | addl r17=THIS_CPU(ia64_phys_stacked_size_p8),r0 | ||
938 | ;; | ||
939 | ld4 r17=[r17] // r17 = cpu_data->phys_stacked_size_p8 | ||
940 | (pKStk) br.cond.dpnt skip_rbs_switch | ||
941 | |||
942 | /* | ||
943 | * Restore user backing store. | ||
944 | * | ||
945 | * NOTE: alloc, loadrs, and cover can't be predicated. | ||
946 | */ | ||
947 | (pNonSys) br.cond.dpnt dont_preserve_current_frame | ||
948 | |||
949 | rbs_switch: | ||
950 | cover // add current frame into dirty partition and set cr.ifs | ||
951 | ;; | ||
952 | mov r19=ar.bsp // get new backing store pointer | ||
953 | sub r16=r16,r18 // krbs = old bsp - size of dirty partition | ||
954 | cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs | ||
955 | ;; | ||
956 | sub r19=r19,r16 // calculate total byte size of dirty partition | ||
957 | add r18=64,r18 // don't force in0-in7 into memory... | ||
958 | ;; | ||
959 | shl r19=r19,16 // shift size of dirty partition into loadrs position | ||
960 | ;; | ||
961 | dont_preserve_current_frame: | ||
962 | /* | ||
963 | * To prevent leaking bits between the kernel and user-space, | ||
964 | * we must clear the stacked registers in the "invalid" partition here. | ||
965 | * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium, | ||
966 | * 5 registers/cycle on McKinley). | ||
967 | */ | ||
968 | # define pRecurse p6 | ||
969 | # define pReturn p7 | ||
970 | #ifdef CONFIG_ITANIUM | ||
971 | # define Nregs 10 | ||
972 | #else | ||
973 | # define Nregs 14 | ||
974 | #endif | ||
975 | alloc loc0=ar.pfs,2,Nregs-2,2,0 | ||
976 | shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) | ||
977 | sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize | ||
978 | ;; | ||
979 | mov ar.rsc=r19 // load ar.rsc to be used for "loadrs" | ||
980 | shladd in0=loc1,3,r17 | ||
981 | mov in1=0 | ||
982 | ;; | ||
983 | TEXT_ALIGN(32) | ||
984 | rse_clear_invalid: | ||
985 | #ifdef CONFIG_ITANIUM | ||
986 | // cycle 0 | ||
987 | { .mii | ||
988 | alloc loc0=ar.pfs,2,Nregs-2,2,0 | ||
989 | cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse | ||
990 | add out0=-Nregs*8,in0 | ||
991 | }{ .mfb | ||
992 | add out1=1,in1 // increment recursion count | ||
993 | nop.f 0 | ||
994 | nop.b 0 // can't do br.call here because of alloc (WAW on CFM) | ||
995 | ;; | ||
996 | }{ .mfi // cycle 1 | ||
997 | mov loc1=0 | ||
998 | nop.f 0 | ||
999 | mov loc2=0 | ||
1000 | }{ .mib | ||
1001 | mov loc3=0 | ||
1002 | mov loc4=0 | ||
1003 | (pRecurse) br.call.sptk.many b0=rse_clear_invalid | ||
1004 | |||
1005 | }{ .mfi // cycle 2 | ||
1006 | mov loc5=0 | ||
1007 | nop.f 0 | ||
1008 | cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret | ||
1009 | }{ .mib | ||
1010 | mov loc6=0 | ||
1011 | mov loc7=0 | ||
1012 | (pReturn) br.ret.sptk.many b0 | ||
1013 | } | ||
1014 | #else /* !CONFIG_ITANIUM */ | ||
1015 | alloc loc0=ar.pfs,2,Nregs-2,2,0 | ||
1016 | cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse | ||
1017 | add out0=-Nregs*8,in0 | ||
1018 | add out1=1,in1 // increment recursion count | ||
1019 | mov loc1=0 | ||
1020 | mov loc2=0 | ||
1021 | ;; | ||
1022 | mov loc3=0 | ||
1023 | mov loc4=0 | ||
1024 | mov loc5=0 | ||
1025 | mov loc6=0 | ||
1026 | mov loc7=0 | ||
1027 | (pRecurse) br.call.sptk.few b0=rse_clear_invalid | ||
1028 | ;; | ||
1029 | mov loc8=0 | ||
1030 | mov loc9=0 | ||
1031 | cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret | ||
1032 | mov loc10=0 | ||
1033 | mov loc11=0 | ||
1034 | (pReturn) br.ret.sptk.many b0 | ||
1035 | #endif /* !CONFIG_ITANIUM */ | ||
1036 | # undef pRecurse | ||
1037 | # undef pReturn | ||
1038 | ;; | ||
1039 | alloc r17=ar.pfs,0,0,0,0 // drop current register frame | ||
1040 | ;; | ||
1041 | loadrs | ||
1042 | ;; | ||
1043 | skip_rbs_switch: | ||
1044 | mov ar.unat=r25 // M2 | ||
1045 | (pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22 | ||
1046 | (pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise | ||
1047 | ;; | ||
1048 | (pUStk) mov ar.bspstore=r23 // M2 | ||
1049 | (pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp | ||
1050 | (pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise | ||
1051 | ;; | ||
1052 | mov cr.ipsr=r29 // M2 | ||
1053 | mov ar.pfs=r26 // I0 | ||
1054 | (pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise | ||
1055 | |||
1056 | (p9) mov cr.ifs=r30 // M2 | ||
1057 | mov b0=r21 // I0 | ||
1058 | (pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise | ||
1059 | |||
1060 | mov ar.fpsr=r20 // M2 | ||
1061 | mov cr.iip=r28 // M2 | ||
1062 | nop 0 | ||
1063 | ;; | ||
1064 | (pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode | ||
1065 | nop 0 | ||
1066 | (pLvSys)mov r2=r0 | ||
1067 | |||
1068 | mov ar.rsc=r27 // M2 | ||
1069 | mov pr=r31,-1 // I0 | ||
1070 | rfi // B | ||
1071 | |||
1072 | /* | ||
1073 | * On entry: | ||
1074 | * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPT) | ||
1075 | * r31 = current->thread_info->flags | ||
1076 | * On exit: | ||
1077 | * p6 = TRUE if work-pending-check needs to be redone | ||
1078 | */ | ||
1079 | .work_pending_syscall: | ||
1080 | add r2=-8,r2 | ||
1081 | add r3=-8,r3 | ||
1082 | ;; | ||
1083 | st8 [r2]=r8 | ||
1084 | st8 [r3]=r10 | ||
1085 | .work_pending: | ||
1086 | tbit.nz p6,p0=r31,TIF_SIGDELAYED // signal delayed from MCA/INIT/NMI/PMI context? | ||
1087 | (p6) br.cond.sptk.few .sigdelayed | ||
1088 | ;; | ||
1089 | tbit.z p6,p0=r31,TIF_NEED_RESCHED // current_thread_info()->need_resched==0? | ||
1090 | (p6) br.cond.sptk.few .notify | ||
1091 | #ifdef CONFIG_PREEMPT | ||
1092 | (pKStk) dep r21=-1,r0,PREEMPT_ACTIVE_BIT,1 | ||
1093 | ;; | ||
1094 | (pKStk) st4 [r20]=r21 | ||
1095 | ssm psr.i // enable interrupts | ||
1096 | #endif | ||
1097 | br.call.spnt.many rp=schedule | ||
1098 | .ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 | ||
1099 | rsm psr.i // disable interrupts | ||
1100 | ;; | ||
1101 | #ifdef CONFIG_PREEMPT | ||
1102 | (pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 | ||
1103 | ;; | ||
1104 | (pKStk) st4 [r20]=r0 // preempt_count() <- 0 | ||
1105 | #endif | ||
1106 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end | ||
1107 | br.cond.sptk.many .work_processed_kernel // re-check | ||
1108 | |||
1109 | .notify: | ||
1110 | (pUStk) br.call.spnt.many rp=notify_resume_user | ||
1111 | .ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 | ||
1112 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end | ||
1113 | br.cond.sptk.many .work_processed_kernel // don't re-check | ||
1114 | |||
1115 | // There is a delayed signal that was detected in MCA/INIT/NMI/PMI context where | ||
1116 | // it could not be delivered. Deliver it now. The signal might be for us and | ||
1117 | // may set TIF_SIGPENDING, so redrive ia64_leave_* after processing the delayed | ||
1118 | // signal. | ||
1119 | |||
1120 | .sigdelayed: | ||
1121 | br.call.sptk.many rp=do_sigdelayed | ||
1122 | cmp.eq p6,p0=r0,r0 // p6 <- 1, always re-check | ||
1123 | (pLvSys)br.cond.sptk.few .work_pending_syscall_end | ||
1124 | br.cond.sptk.many .work_processed_kernel // re-check | ||
1125 | |||
1126 | .work_pending_syscall_end: | ||
1127 | adds r2=PT(R8)+16,r12 | ||
1128 | adds r3=PT(R10)+16,r12 | ||
1129 | ;; | ||
1130 | ld8 r8=[r2] | ||
1131 | ld8 r10=[r3] | ||
1132 | br.cond.sptk.many .work_processed_syscall // re-check | ||
1133 | |||
1134 | END(ia64_leave_kernel) | ||
1135 | |||
1136 | ENTRY(handle_syscall_error) | ||
1137 | /* | ||
1138 | * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could | ||
1139 | * lead us to mistake a negative return value as a failed syscall. Those syscall | ||
1140 | * must deposit a non-zero value in pt_regs.r8 to indicate an error. If | ||
1141 | * pt_regs.r8 is zero, we assume that the call completed successfully. | ||
1142 | */ | ||
1143 | PT_REGS_UNWIND_INFO(0) | ||
1144 | ld8 r3=[r2] // load pt_regs.r8 | ||
1145 | ;; | ||
1146 | cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? | ||
1147 | ;; | ||
1148 | (p7) mov r10=-1 | ||
1149 | (p7) sub r8=0,r8 // negate return value to get errno | ||
1150 | br.cond.sptk ia64_leave_syscall | ||
1151 | END(handle_syscall_error) | ||
1152 | |||
1153 | /* | ||
1154 | * Invoke schedule_tail(task) while preserving in0-in7, which may be needed | ||
1155 | * in case a system call gets restarted. | ||
1156 | */ | ||
1157 | GLOBAL_ENTRY(ia64_invoke_schedule_tail) | ||
1158 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) | ||
1159 | alloc loc1=ar.pfs,8,2,1,0 | ||
1160 | mov loc0=rp | ||
1161 | mov out0=r8 // Address of previous task | ||
1162 | ;; | ||
1163 | br.call.sptk.many rp=schedule_tail | ||
1164 | .ret11: mov ar.pfs=loc1 | ||
1165 | mov rp=loc0 | ||
1166 | br.ret.sptk.many rp | ||
1167 | END(ia64_invoke_schedule_tail) | ||
1168 | |||
1169 | /* | ||
1170 | * Setup stack and call do_notify_resume_user(). Note that pSys and pNonSys need to | ||
1171 | * be set up by the caller. We declare 8 input registers so the system call | ||
1172 | * args get preserved, in case we need to restart a system call. | ||
1173 | */ | ||
1174 | ENTRY(notify_resume_user) | ||
1175 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) | ||
1176 | alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! | ||
1177 | mov r9=ar.unat | ||
1178 | mov loc0=rp // save return address | ||
1179 | mov out0=0 // there is no "oldset" | ||
1180 | adds out1=8,sp // out1=&sigscratch->ar_pfs | ||
1181 | (pSys) mov out2=1 // out2==1 => we're in a syscall | ||
1182 | ;; | ||
1183 | (pNonSys) mov out2=0 // out2==0 => not a syscall | ||
1184 | .fframe 16 | ||
1185 | .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) | ||
1186 | st8 [sp]=r9,-16 // allocate space for ar.unat and save it | ||
1187 | st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch | ||
1188 | .body | ||
1189 | br.call.sptk.many rp=do_notify_resume_user | ||
1190 | .ret15: .restore sp | ||
1191 | adds sp=16,sp // pop scratch stack space | ||
1192 | ;; | ||
1193 | ld8 r9=[sp] // load new unat from sigscratch->scratch_unat | ||
1194 | mov rp=loc0 | ||
1195 | ;; | ||
1196 | mov ar.unat=r9 | ||
1197 | mov ar.pfs=loc1 | ||
1198 | br.ret.sptk.many rp | ||
1199 | END(notify_resume_user) | ||
1200 | |||
1201 | GLOBAL_ENTRY(sys_rt_sigsuspend) | ||
1202 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) | ||
1203 | alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! | ||
1204 | mov r9=ar.unat | ||
1205 | mov loc0=rp // save return address | ||
1206 | mov out0=in0 // mask | ||
1207 | mov out1=in1 // sigsetsize | ||
1208 | adds out2=8,sp // out2=&sigscratch->ar_pfs | ||
1209 | ;; | ||
1210 | .fframe 16 | ||
1211 | .spillpsp ar.unat, 16 // (note that offset is relative to psp+0x10!) | ||
1212 | st8 [sp]=r9,-16 // allocate space for ar.unat and save it | ||
1213 | st8 [out2]=loc1,-8 // save ar.pfs, out2=&sigscratch | ||
1214 | .body | ||
1215 | br.call.sptk.many rp=ia64_rt_sigsuspend | ||
1216 | .ret17: .restore sp | ||
1217 | adds sp=16,sp // pop scratch stack space | ||
1218 | ;; | ||
1219 | ld8 r9=[sp] // load new unat from sw->caller_unat | ||
1220 | mov rp=loc0 | ||
1221 | ;; | ||
1222 | mov ar.unat=r9 | ||
1223 | mov ar.pfs=loc1 | ||
1224 | br.ret.sptk.many rp | ||
1225 | END(sys_rt_sigsuspend) | ||
1226 | |||
1227 | ENTRY(sys_rt_sigreturn) | ||
1228 | PT_REGS_UNWIND_INFO(0) | ||
1229 | /* | ||
1230 | * Allocate 8 input registers since ptrace() may clobber them | ||
1231 | */ | ||
1232 | alloc r2=ar.pfs,8,0,1,0 | ||
1233 | .prologue | ||
1234 | PT_REGS_SAVES(16) | ||
1235 | adds sp=-16,sp | ||
1236 | .body | ||
1237 | cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall... | ||
1238 | ;; | ||
1239 | /* | ||
1240 | * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined | ||
1241 | * syscall-entry path does not save them we save them here instead. Note: we | ||
1242 | * don't need to save any other registers that are not saved by the stream-lined | ||
1243 | * syscall path, because restore_sigcontext() restores them. | ||
1244 | */ | ||
1245 | adds r16=PT(F6)+32,sp | ||
1246 | adds r17=PT(F7)+32,sp | ||
1247 | ;; | ||
1248 | stf.spill [r16]=f6,32 | ||
1249 | stf.spill [r17]=f7,32 | ||
1250 | ;; | ||
1251 | stf.spill [r16]=f8,32 | ||
1252 | stf.spill [r17]=f9,32 | ||
1253 | ;; | ||
1254 | stf.spill [r16]=f10 | ||
1255 | stf.spill [r17]=f11 | ||
1256 | adds out0=16,sp // out0 = &sigscratch | ||
1257 | br.call.sptk.many rp=ia64_rt_sigreturn | ||
1258 | .ret19: .restore sp 0 | ||
1259 | adds sp=16,sp | ||
1260 | ;; | ||
1261 | ld8 r9=[sp] // load new ar.unat | ||
1262 | mov.sptk b7=r8,ia64_leave_kernel | ||
1263 | ;; | ||
1264 | mov ar.unat=r9 | ||
1265 | br.many b7 | ||
1266 | END(sys_rt_sigreturn) | ||
1267 | |||
1268 | GLOBAL_ENTRY(ia64_prepare_handle_unaligned) | ||
1269 | .prologue | ||
1270 | /* | ||
1271 | * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 | ||
1272 | */ | ||
1273 | mov r16=r0 | ||
1274 | DO_SAVE_SWITCH_STACK | ||
1275 | br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt | ||
1276 | .ret21: .body | ||
1277 | DO_LOAD_SWITCH_STACK | ||
1278 | br.cond.sptk.many rp // goes to ia64_leave_kernel | ||
1279 | END(ia64_prepare_handle_unaligned) | ||
1280 | |||
1281 | // | ||
1282 | // unw_init_running(void (*callback)(info, arg), void *arg) | ||
1283 | // | ||
1284 | # define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15) | ||
1285 | |||
1286 | GLOBAL_ENTRY(unw_init_running) | ||
1287 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) | ||
1288 | alloc loc1=ar.pfs,2,3,3,0 | ||
1289 | ;; | ||
1290 | ld8 loc2=[in0],8 | ||
1291 | mov loc0=rp | ||
1292 | mov r16=loc1 | ||
1293 | DO_SAVE_SWITCH_STACK | ||
1294 | .body | ||
1295 | |||
1296 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) | ||
1297 | .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE | ||
1298 | SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE) | ||
1299 | adds sp=-EXTRA_FRAME_SIZE,sp | ||
1300 | .body | ||
1301 | ;; | ||
1302 | adds out0=16,sp // &info | ||
1303 | mov out1=r13 // current | ||
1304 | adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack | ||
1305 | br.call.sptk.many rp=unw_init_frame_info | ||
1306 | 1: adds out0=16,sp // &info | ||
1307 | mov b6=loc2 | ||
1308 | mov loc2=gp // save gp across indirect function call | ||
1309 | ;; | ||
1310 | ld8 gp=[in0] | ||
1311 | mov out1=in1 // arg | ||
1312 | br.call.sptk.many rp=b6 // invoke the callback function | ||
1313 | 1: mov gp=loc2 // restore gp | ||
1314 | |||
1315 | // For now, we don't allow changing registers from within | ||
1316 | // unw_init_running; if we ever want to allow that, we'd | ||
1317 | // have to do a load_switch_stack here: | ||
1318 | .restore sp | ||
1319 | adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp | ||
1320 | |||
1321 | mov ar.pfs=loc1 | ||
1322 | mov rp=loc0 | ||
1323 | br.ret.sptk.many rp | ||
1324 | END(unw_init_running) | ||
1325 | |||
1326 | .rodata | ||
1327 | .align 8 | ||
1328 | .globl sys_call_table | ||
1329 | sys_call_table: | ||
1330 | data8 sys_ni_syscall // This must be sys_ni_syscall! See ivt.S. | ||
1331 | data8 sys_exit // 1025 | ||
1332 | data8 sys_read | ||
1333 | data8 sys_write | ||
1334 | data8 sys_open | ||
1335 | data8 sys_close | ||
1336 | data8 sys_creat // 1030 | ||
1337 | data8 sys_link | ||
1338 | data8 sys_unlink | ||
1339 | data8 ia64_execve | ||
1340 | data8 sys_chdir | ||
1341 | data8 sys_fchdir // 1035 | ||
1342 | data8 sys_utimes | ||
1343 | data8 sys_mknod | ||
1344 | data8 sys_chmod | ||
1345 | data8 sys_chown | ||
1346 | data8 sys_lseek // 1040 | ||
1347 | data8 sys_getpid | ||
1348 | data8 sys_getppid | ||
1349 | data8 sys_mount | ||
1350 | data8 sys_umount | ||
1351 | data8 sys_setuid // 1045 | ||
1352 | data8 sys_getuid | ||
1353 | data8 sys_geteuid | ||
1354 | data8 sys_ptrace | ||
1355 | data8 sys_access | ||
1356 | data8 sys_sync // 1050 | ||
1357 | data8 sys_fsync | ||
1358 | data8 sys_fdatasync | ||
1359 | data8 sys_kill | ||
1360 | data8 sys_rename | ||
1361 | data8 sys_mkdir // 1055 | ||
1362 | data8 sys_rmdir | ||
1363 | data8 sys_dup | ||
1364 | data8 sys_pipe | ||
1365 | data8 sys_times | ||
1366 | data8 ia64_brk // 1060 | ||
1367 | data8 sys_setgid | ||
1368 | data8 sys_getgid | ||
1369 | data8 sys_getegid | ||
1370 | data8 sys_acct | ||
1371 | data8 sys_ioctl // 1065 | ||
1372 | data8 sys_fcntl | ||
1373 | data8 sys_umask | ||
1374 | data8 sys_chroot | ||
1375 | data8 sys_ustat | ||
1376 | data8 sys_dup2 // 1070 | ||
1377 | data8 sys_setreuid | ||
1378 | data8 sys_setregid | ||
1379 | data8 sys_getresuid | ||
1380 | data8 sys_setresuid | ||
1381 | data8 sys_getresgid // 1075 | ||
1382 | data8 sys_setresgid | ||
1383 | data8 sys_getgroups | ||
1384 | data8 sys_setgroups | ||
1385 | data8 sys_getpgid | ||
1386 | data8 sys_setpgid // 1080 | ||
1387 | data8 sys_setsid | ||
1388 | data8 sys_getsid | ||
1389 | data8 sys_sethostname | ||
1390 | data8 sys_setrlimit | ||
1391 | data8 sys_getrlimit // 1085 | ||
1392 | data8 sys_getrusage | ||
1393 | data8 sys_gettimeofday | ||
1394 | data8 sys_settimeofday | ||
1395 | data8 sys_select | ||
1396 | data8 sys_poll // 1090 | ||
1397 | data8 sys_symlink | ||
1398 | data8 sys_readlink | ||
1399 | data8 sys_uselib | ||
1400 | data8 sys_swapon | ||
1401 | data8 sys_swapoff // 1095 | ||
1402 | data8 sys_reboot | ||
1403 | data8 sys_truncate | ||
1404 | data8 sys_ftruncate | ||
1405 | data8 sys_fchmod | ||
1406 | data8 sys_fchown // 1100 | ||
1407 | data8 ia64_getpriority | ||
1408 | data8 sys_setpriority | ||
1409 | data8 sys_statfs | ||
1410 | data8 sys_fstatfs | ||
1411 | data8 sys_gettid // 1105 | ||
1412 | data8 sys_semget | ||
1413 | data8 sys_semop | ||
1414 | data8 sys_semctl | ||
1415 | data8 sys_msgget | ||
1416 | data8 sys_msgsnd // 1110 | ||
1417 | data8 sys_msgrcv | ||
1418 | data8 sys_msgctl | ||
1419 | data8 sys_shmget | ||
1420 | data8 ia64_shmat | ||
1421 | data8 sys_shmdt // 1115 | ||
1422 | data8 sys_shmctl | ||
1423 | data8 sys_syslog | ||
1424 | data8 sys_setitimer | ||
1425 | data8 sys_getitimer | ||
1426 | data8 sys_ni_syscall // 1120 /* was: ia64_oldstat */ | ||
1427 | data8 sys_ni_syscall /* was: ia64_oldlstat */ | ||
1428 | data8 sys_ni_syscall /* was: ia64_oldfstat */ | ||
1429 | data8 sys_vhangup | ||
1430 | data8 sys_lchown | ||
1431 | data8 sys_remap_file_pages // 1125 | ||
1432 | data8 sys_wait4 | ||
1433 | data8 sys_sysinfo | ||
1434 | data8 sys_clone | ||
1435 | data8 sys_setdomainname | ||
1436 | data8 sys_newuname // 1130 | ||
1437 | data8 sys_adjtimex | ||
1438 | data8 sys_ni_syscall /* was: ia64_create_module */ | ||
1439 | data8 sys_init_module | ||
1440 | data8 sys_delete_module | ||
1441 | data8 sys_ni_syscall // 1135 /* was: sys_get_kernel_syms */ | ||
1442 | data8 sys_ni_syscall /* was: sys_query_module */ | ||
1443 | data8 sys_quotactl | ||
1444 | data8 sys_bdflush | ||
1445 | data8 sys_sysfs | ||
1446 | data8 sys_personality // 1140 | ||
1447 | data8 sys_ni_syscall // sys_afs_syscall | ||
1448 | data8 sys_setfsuid | ||
1449 | data8 sys_setfsgid | ||
1450 | data8 sys_getdents | ||
1451 | data8 sys_flock // 1145 | ||
1452 | data8 sys_readv | ||
1453 | data8 sys_writev | ||
1454 | data8 sys_pread64 | ||
1455 | data8 sys_pwrite64 | ||
1456 | data8 sys_sysctl // 1150 | ||
1457 | data8 sys_mmap | ||
1458 | data8 sys_munmap | ||
1459 | data8 sys_mlock | ||
1460 | data8 sys_mlockall | ||
1461 | data8 sys_mprotect // 1155 | ||
1462 | data8 ia64_mremap | ||
1463 | data8 sys_msync | ||
1464 | data8 sys_munlock | ||
1465 | data8 sys_munlockall | ||
1466 | data8 sys_sched_getparam // 1160 | ||
1467 | data8 sys_sched_setparam | ||
1468 | data8 sys_sched_getscheduler | ||
1469 | data8 sys_sched_setscheduler | ||
1470 | data8 sys_sched_yield | ||
1471 | data8 sys_sched_get_priority_max // 1165 | ||
1472 | data8 sys_sched_get_priority_min | ||
1473 | data8 sys_sched_rr_get_interval | ||
1474 | data8 sys_nanosleep | ||
1475 | data8 sys_nfsservctl | ||
1476 | data8 sys_prctl // 1170 | ||
1477 | data8 sys_getpagesize | ||
1478 | data8 sys_mmap2 | ||
1479 | data8 sys_pciconfig_read | ||
1480 | data8 sys_pciconfig_write | ||
1481 | data8 sys_perfmonctl // 1175 | ||
1482 | data8 sys_sigaltstack | ||
1483 | data8 sys_rt_sigaction | ||
1484 | data8 sys_rt_sigpending | ||
1485 | data8 sys_rt_sigprocmask | ||
1486 | data8 sys_rt_sigqueueinfo // 1180 | ||
1487 | data8 sys_rt_sigreturn | ||
1488 | data8 sys_rt_sigsuspend | ||
1489 | data8 sys_rt_sigtimedwait | ||
1490 | data8 sys_getcwd | ||
1491 | data8 sys_capget // 1185 | ||
1492 | data8 sys_capset | ||
1493 | data8 sys_sendfile64 | ||
1494 | data8 sys_ni_syscall // sys_getpmsg (STREAMS) | ||
1495 | data8 sys_ni_syscall // sys_putpmsg (STREAMS) | ||
1496 | data8 sys_socket // 1190 | ||
1497 | data8 sys_bind | ||
1498 | data8 sys_connect | ||
1499 | data8 sys_listen | ||
1500 | data8 sys_accept | ||
1501 | data8 sys_getsockname // 1195 | ||
1502 | data8 sys_getpeername | ||
1503 | data8 sys_socketpair | ||
1504 | data8 sys_send | ||
1505 | data8 sys_sendto | ||
1506 | data8 sys_recv // 1200 | ||
1507 | data8 sys_recvfrom | ||
1508 | data8 sys_shutdown | ||
1509 | data8 sys_setsockopt | ||
1510 | data8 sys_getsockopt | ||
1511 | data8 sys_sendmsg // 1205 | ||
1512 | data8 sys_recvmsg | ||
1513 | data8 sys_pivot_root | ||
1514 | data8 sys_mincore | ||
1515 | data8 sys_madvise | ||
1516 | data8 sys_newstat // 1210 | ||
1517 | data8 sys_newlstat | ||
1518 | data8 sys_newfstat | ||
1519 | data8 sys_clone2 | ||
1520 | data8 sys_getdents64 | ||
1521 | data8 sys_getunwind // 1215 | ||
1522 | data8 sys_readahead | ||
1523 | data8 sys_setxattr | ||
1524 | data8 sys_lsetxattr | ||
1525 | data8 sys_fsetxattr | ||
1526 | data8 sys_getxattr // 1220 | ||
1527 | data8 sys_lgetxattr | ||
1528 | data8 sys_fgetxattr | ||
1529 | data8 sys_listxattr | ||
1530 | data8 sys_llistxattr | ||
1531 | data8 sys_flistxattr // 1225 | ||
1532 | data8 sys_removexattr | ||
1533 | data8 sys_lremovexattr | ||
1534 | data8 sys_fremovexattr | ||
1535 | data8 sys_tkill | ||
1536 | data8 sys_futex // 1230 | ||
1537 | data8 sys_sched_setaffinity | ||
1538 | data8 sys_sched_getaffinity | ||
1539 | data8 sys_set_tid_address | ||
1540 | data8 sys_fadvise64_64 | ||
1541 | data8 sys_tgkill // 1235 | ||
1542 | data8 sys_exit_group | ||
1543 | data8 sys_lookup_dcookie | ||
1544 | data8 sys_io_setup | ||
1545 | data8 sys_io_destroy | ||
1546 | data8 sys_io_getevents // 1240 | ||
1547 | data8 sys_io_submit | ||
1548 | data8 sys_io_cancel | ||
1549 | data8 sys_epoll_create | ||
1550 | data8 sys_epoll_ctl | ||
1551 | data8 sys_epoll_wait // 1245 | ||
1552 | data8 sys_restart_syscall | ||
1553 | data8 sys_semtimedop | ||
1554 | data8 sys_timer_create | ||
1555 | data8 sys_timer_settime | ||
1556 | data8 sys_timer_gettime // 1250 | ||
1557 | data8 sys_timer_getoverrun | ||
1558 | data8 sys_timer_delete | ||
1559 | data8 sys_clock_settime | ||
1560 | data8 sys_clock_gettime | ||
1561 | data8 sys_clock_getres // 1255 | ||
1562 | data8 sys_clock_nanosleep | ||
1563 | data8 sys_fstatfs64 | ||
1564 | data8 sys_statfs64 | ||
1565 | data8 sys_mbind | ||
1566 | data8 sys_get_mempolicy // 1260 | ||
1567 | data8 sys_set_mempolicy | ||
1568 | data8 sys_mq_open | ||
1569 | data8 sys_mq_unlink | ||
1570 | data8 sys_mq_timedsend | ||
1571 | data8 sys_mq_timedreceive // 1265 | ||
1572 | data8 sys_mq_notify | ||
1573 | data8 sys_mq_getsetattr | ||
1574 | data8 sys_ni_syscall // reserved for kexec_load | ||
1575 | data8 sys_ni_syscall // reserved for vserver | ||
1576 | data8 sys_waitid // 1270 | ||
1577 | data8 sys_add_key | ||
1578 | data8 sys_request_key | ||
1579 | data8 sys_keyctl | ||
1580 | data8 sys_ni_syscall | ||
1581 | data8 sys_ni_syscall // 1275 | ||
1582 | data8 sys_ni_syscall | ||
1583 | data8 sys_ni_syscall | ||
1584 | data8 sys_ni_syscall | ||
1585 | data8 sys_ni_syscall | ||
1586 | |||
1587 | .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls | ||
diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h new file mode 100644 index 000000000000..6d4ecec989b5 --- /dev/null +++ b/arch/ia64/kernel/entry.h | |||
@@ -0,0 +1,82 @@ | |||
1 | #include <linux/config.h> | ||
2 | |||
3 | /* | ||
4 | * Preserved registers that are shared between code in ivt.S and | ||
5 | * entry.S. Be careful not to step on these! | ||
6 | */ | ||
7 | #define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */ | ||
8 | #define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */ | ||
9 | #define PRED_USER_STACK 3 /* returning to user-stacks? */ | ||
10 | #define PRED_SYSCALL 4 /* inside a system call? */ | ||
11 | #define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */ | ||
12 | |||
13 | #ifdef __ASSEMBLY__ | ||
14 | # define PASTE2(x,y) x##y | ||
15 | # define PASTE(x,y) PASTE2(x,y) | ||
16 | |||
17 | # define pLvSys PASTE(p,PRED_LEAVE_SYSCALL) | ||
18 | # define pKStk PASTE(p,PRED_KERNEL_STACK) | ||
19 | # define pUStk PASTE(p,PRED_USER_STACK) | ||
20 | # define pSys PASTE(p,PRED_SYSCALL) | ||
21 | # define pNonSys PASTE(p,PRED_NON_SYSCALL) | ||
22 | #endif | ||
23 | |||
24 | #define PT(f) (IA64_PT_REGS_##f##_OFFSET) | ||
25 | #define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET) | ||
26 | |||
27 | #define PT_REGS_SAVES(off) \ | ||
28 | .unwabi 3, 'i'; \ | ||
29 | .fframe IA64_PT_REGS_SIZE+16+(off); \ | ||
30 | .spillsp rp, PT(CR_IIP)+16+(off); \ | ||
31 | .spillsp ar.pfs, PT(CR_IFS)+16+(off); \ | ||
32 | .spillsp ar.unat, PT(AR_UNAT)+16+(off); \ | ||
33 | .spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \ | ||
34 | .spillsp pr, PT(PR)+16+(off); | ||
35 | |||
36 | #define PT_REGS_UNWIND_INFO(off) \ | ||
37 | .prologue; \ | ||
38 | PT_REGS_SAVES(off); \ | ||
39 | .body | ||
40 | |||
41 | #define SWITCH_STACK_SAVES(off) \ | ||
42 | .savesp ar.unat,SW(CALLER_UNAT)+16+(off); \ | ||
43 | .savesp ar.fpsr,SW(AR_FPSR)+16+(off); \ | ||
44 | .spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \ | ||
45 | .spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \ | ||
46 | .spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \ | ||
47 | .spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \ | ||
48 | .spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \ | ||
49 | .spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \ | ||
50 | .spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \ | ||
51 | .spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \ | ||
52 | .spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \ | ||
53 | .spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \ | ||
54 | .spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \ | ||
55 | .spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \ | ||
56 | .spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \ | ||
57 | .spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \ | ||
58 | .spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \ | ||
59 | .spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \ | ||
60 | .spillsp @priunat,SW(AR_UNAT)+16+(off); \ | ||
61 | .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \ | ||
62 | .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \ | ||
63 | .spillsp pr,SW(PR)+16+(off)) | ||
64 | |||
65 | #define DO_SAVE_SWITCH_STACK \ | ||
66 | movl r28=1f; \ | ||
67 | ;; \ | ||
68 | .fframe IA64_SWITCH_STACK_SIZE; \ | ||
69 | adds sp=-IA64_SWITCH_STACK_SIZE,sp; \ | ||
70 | mov.ret.sptk b7=r28,1f; \ | ||
71 | SWITCH_STACK_SAVES(0); \ | ||
72 | br.cond.sptk.many save_switch_stack; \ | ||
73 | 1: | ||
74 | |||
75 | #define DO_LOAD_SWITCH_STACK \ | ||
76 | movl r28=1f; \ | ||
77 | ;; \ | ||
78 | invala; \ | ||
79 | mov.ret.sptk b7=r28,1f; \ | ||
80 | br.cond.sptk.many load_switch_stack; \ | ||
81 | 1: .restore sp; \ | ||
82 | adds sp=IA64_SWITCH_STACK_SIZE,sp | ||
diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S new file mode 100644 index 000000000000..0d8650f7fce7 --- /dev/null +++ b/arch/ia64/kernel/fsys.S | |||
@@ -0,0 +1,884 @@ | |||
1 | /* | ||
2 | * This file contains the light-weight system call handlers (fsyscall-handlers). | ||
3 | * | ||
4 | * Copyright (C) 2003 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * | ||
7 | * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). | ||
8 | * 18-Feb-03 louisk Implement fsys_gettimeofday(). | ||
9 | * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, | ||
10 | * probably broke it along the way... ;-) | ||
11 | * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make | ||
12 | * it capable of using memory based clocks without falling back to C code. | ||
13 | */ | ||
14 | |||
15 | #include <asm/asmmacro.h> | ||
16 | #include <asm/errno.h> | ||
17 | #include <asm/offsets.h> | ||
18 | #include <asm/percpu.h> | ||
19 | #include <asm/thread_info.h> | ||
20 | #include <asm/sal.h> | ||
21 | #include <asm/signal.h> | ||
22 | #include <asm/system.h> | ||
23 | #include <asm/unistd.h> | ||
24 | |||
25 | #include "entry.h" | ||
26 | |||
27 | /* | ||
28 | * See Documentation/ia64/fsys.txt for details on fsyscalls. | ||
29 | * | ||
30 | * On entry to an fsyscall handler: | ||
31 | * r10 = 0 (i.e., defaults to "successful syscall return") | ||
32 | * r11 = saved ar.pfs (a user-level value) | ||
33 | * r15 = system call number | ||
34 | * r16 = "current" task pointer (in normal kernel-mode, this is in r13) | ||
35 | * r32-r39 = system call arguments | ||
36 | * b6 = return address (a user-level value) | ||
37 | * ar.pfs = previous frame-state (a user-level value) | ||
38 | * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) | ||
39 | * all other registers may contain values passed in from user-mode | ||
40 | * | ||
41 | * On return from an fsyscall handler: | ||
42 | * r11 = saved ar.pfs (as passed into the fsyscall handler) | ||
43 | * r15 = system call number (as passed into the fsyscall handler) | ||
44 | * r32-r39 = system call arguments (as passed into the fsyscall handler) | ||
45 | * b6 = return address (as passed into the fsyscall handler) | ||
46 | * ar.pfs = previous frame-state (as passed into the fsyscall handler) | ||
47 | */ | ||
48 | |||
49 | ENTRY(fsys_ni_syscall) | ||
50 | .prologue | ||
51 | .altrp b6 | ||
52 | .body | ||
53 | mov r8=ENOSYS | ||
54 | mov r10=-1 | ||
55 | FSYS_RETURN | ||
56 | END(fsys_ni_syscall) | ||
57 | |||
58 | ENTRY(fsys_getpid) | ||
59 | .prologue | ||
60 | .altrp b6 | ||
61 | .body | ||
62 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 | ||
63 | ;; | ||
64 | ld4 r9=[r9] | ||
65 | add r8=IA64_TASK_TGID_OFFSET,r16 | ||
66 | ;; | ||
67 | and r9=TIF_ALLWORK_MASK,r9 | ||
68 | ld4 r8=[r8] // r8 = current->tgid | ||
69 | ;; | ||
70 | cmp.ne p8,p0=0,r9 | ||
71 | (p8) br.spnt.many fsys_fallback_syscall | ||
72 | FSYS_RETURN | ||
73 | END(fsys_getpid) | ||
74 | |||
75 | ENTRY(fsys_getppid) | ||
76 | .prologue | ||
77 | .altrp b6 | ||
78 | .body | ||
79 | add r17=IA64_TASK_GROUP_LEADER_OFFSET,r16 | ||
80 | ;; | ||
81 | ld8 r17=[r17] // r17 = current->group_leader | ||
82 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 | ||
83 | ;; | ||
84 | |||
85 | ld4 r9=[r9] | ||
86 | add r17=IA64_TASK_REAL_PARENT_OFFSET,r17 // r17 = ¤t->group_leader->real_parent | ||
87 | ;; | ||
88 | and r9=TIF_ALLWORK_MASK,r9 | ||
89 | |||
90 | 1: ld8 r18=[r17] // r18 = current->group_leader->real_parent | ||
91 | ;; | ||
92 | cmp.ne p8,p0=0,r9 | ||
93 | add r8=IA64_TASK_TGID_OFFSET,r18 // r8 = ¤t->group_leader->real_parent->tgid | ||
94 | ;; | ||
95 | |||
96 | /* | ||
97 | * The .acq is needed to ensure that the read of tgid has returned its data before | ||
98 | * we re-check "real_parent". | ||
99 | */ | ||
100 | ld4.acq r8=[r8] // r8 = current->group_leader->real_parent->tgid | ||
101 | #ifdef CONFIG_SMP | ||
102 | /* | ||
103 | * Re-read current->group_leader->real_parent. | ||
104 | */ | ||
105 | ld8 r19=[r17] // r19 = current->group_leader->real_parent | ||
106 | (p8) br.spnt.many fsys_fallback_syscall | ||
107 | ;; | ||
108 | cmp.ne p6,p0=r18,r19 // did real_parent change? | ||
109 | mov r19=0 // i must not leak kernel bits... | ||
110 | (p6) br.cond.spnt.few 1b // yes -> redo the read of tgid and the check | ||
111 | ;; | ||
112 | mov r17=0 // i must not leak kernel bits... | ||
113 | mov r18=0 // i must not leak kernel bits... | ||
114 | #else | ||
115 | mov r17=0 // i must not leak kernel bits... | ||
116 | mov r18=0 // i must not leak kernel bits... | ||
117 | mov r19=0 // i must not leak kernel bits... | ||
118 | #endif | ||
119 | FSYS_RETURN | ||
120 | END(fsys_getppid) | ||
121 | |||
122 | ENTRY(fsys_set_tid_address) | ||
123 | .prologue | ||
124 | .altrp b6 | ||
125 | .body | ||
126 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 | ||
127 | ;; | ||
128 | ld4 r9=[r9] | ||
129 | tnat.z p6,p7=r32 // check argument register for being NaT | ||
130 | ;; | ||
131 | and r9=TIF_ALLWORK_MASK,r9 | ||
132 | add r8=IA64_TASK_PID_OFFSET,r16 | ||
133 | add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 | ||
134 | ;; | ||
135 | ld4 r8=[r8] | ||
136 | cmp.ne p8,p0=0,r9 | ||
137 | mov r17=-1 | ||
138 | ;; | ||
139 | (p6) st8 [r18]=r32 | ||
140 | (p7) st8 [r18]=r17 | ||
141 | (p8) br.spnt.many fsys_fallback_syscall | ||
142 | ;; | ||
143 | mov r17=0 // i must not leak kernel bits... | ||
144 | mov r18=0 // i must not leak kernel bits... | ||
145 | FSYS_RETURN | ||
146 | END(fsys_set_tid_address) | ||
147 | |||
148 | /* | ||
149 | * Ensure that the time interpolator structure is compatible with the asm code | ||
150 | */ | ||
151 | #if IA64_TIME_INTERPOLATOR_SOURCE_OFFSET !=0 || IA64_TIME_INTERPOLATOR_SHIFT_OFFSET != 2 \ | ||
152 | || IA64_TIME_INTERPOLATOR_JITTER_OFFSET != 3 || IA64_TIME_INTERPOLATOR_NSEC_OFFSET != 4 | ||
153 | #error fsys_gettimeofday incompatible with changes to struct time_interpolator | ||
154 | #endif | ||
155 | #define CLOCK_REALTIME 0 | ||
156 | #define CLOCK_MONOTONIC 1 | ||
157 | #define CLOCK_DIVIDE_BY_1000 0x4000 | ||
158 | #define CLOCK_ADD_MONOTONIC 0x8000 | ||
159 | |||
160 | ENTRY(fsys_gettimeofday) | ||
161 | .prologue | ||
162 | .altrp b6 | ||
163 | .body | ||
164 | mov r31 = r32 | ||
165 | tnat.nz p6,p0 = r33 // guard against NaT argument | ||
166 | (p6) br.cond.spnt.few .fail_einval | ||
167 | mov r30 = CLOCK_DIVIDE_BY_1000 | ||
168 | ;; | ||
169 | .gettime: | ||
170 | // Register map | ||
171 | // Incoming r31 = pointer to address where to place result | ||
172 | // r30 = flags determining how time is processed | ||
173 | // r2,r3 = temp r4-r7 preserved | ||
174 | // r8 = result nanoseconds | ||
175 | // r9 = result seconds | ||
176 | // r10 = temporary storage for clock difference | ||
177 | // r11 = preserved: saved ar.pfs | ||
178 | // r12 = preserved: memory stack | ||
179 | // r13 = preserved: thread pointer | ||
180 | // r14 = address of mask / mask | ||
181 | // r15 = preserved: system call number | ||
182 | // r16 = preserved: current task pointer | ||
183 | // r17 = wall to monotonic use | ||
184 | // r18 = time_interpolator->offset | ||
185 | // r19 = address of wall_to_monotonic | ||
186 | // r20 = pointer to struct time_interpolator / pointer to time_interpolator->address | ||
187 | // r21 = shift factor | ||
188 | // r22 = address of time interpolator->last_counter | ||
189 | // r23 = address of time_interpolator->last_cycle | ||
190 | // r24 = adress of time_interpolator->offset | ||
191 | // r25 = last_cycle value | ||
192 | // r26 = last_counter value | ||
193 | // r27 = pointer to xtime | ||
194 | // r28 = sequence number at the beginning of critcal section | ||
195 | // r29 = address of seqlock | ||
196 | // r30 = time processing flags / memory address | ||
197 | // r31 = pointer to result | ||
198 | // Predicates | ||
199 | // p6,p7 short term use | ||
200 | // p8 = timesource ar.itc | ||
201 | // p9 = timesource mmio64 | ||
202 | // p10 = timesource mmio32 | ||
203 | // p11 = timesource not to be handled by asm code | ||
204 | // p12 = memory time source ( = p9 | p10) | ||
205 | // p13 = do cmpxchg with time_interpolator_last_cycle | ||
206 | // p14 = Divide by 1000 | ||
207 | // p15 = Add monotonic | ||
208 | // | ||
209 | // Note that instructions are optimized for McKinley. McKinley can process two | ||
210 | // bundles simultaneously and therefore we continuously try to feed the CPU | ||
211 | // two bundles and then a stop. | ||
212 | tnat.nz p6,p0 = r31 // branch deferred since it does not fit into bundle structure | ||
213 | mov pr = r30,0xc000 // Set predicates according to function | ||
214 | add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 | ||
215 | movl r20 = time_interpolator | ||
216 | ;; | ||
217 | ld8 r20 = [r20] // get pointer to time_interpolator structure | ||
218 | movl r29 = xtime_lock | ||
219 | ld4 r2 = [r2] // process work pending flags | ||
220 | movl r27 = xtime | ||
221 | ;; // only one bundle here | ||
222 | ld8 r21 = [r20] // first quad with control information | ||
223 | and r2 = TIF_ALLWORK_MASK,r2 | ||
224 | (p6) br.cond.spnt.few .fail_einval // deferred branch | ||
225 | ;; | ||
226 | add r10 = IA64_TIME_INTERPOLATOR_ADDRESS_OFFSET,r20 | ||
227 | extr r3 = r21,32,32 // time_interpolator->nsec_per_cyc | ||
228 | extr r8 = r21,0,16 // time_interpolator->source | ||
229 | cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled | ||
230 | (p6) br.cond.spnt.many fsys_fallback_syscall | ||
231 | ;; | ||
232 | cmp.eq p8,p12 = 0,r8 // Check for cpu timer | ||
233 | cmp.eq p9,p0 = 1,r8 // MMIO64 ? | ||
234 | extr r2 = r21,24,8 // time_interpolator->jitter | ||
235 | cmp.eq p10,p0 = 2,r8 // MMIO32 ? | ||
236 | cmp.ltu p11,p0 = 2,r8 // function or other clock | ||
237 | (p11) br.cond.spnt.many fsys_fallback_syscall | ||
238 | ;; | ||
239 | setf.sig f7 = r3 // Setup for scaling of counter | ||
240 | (p15) movl r19 = wall_to_monotonic | ||
241 | (p12) ld8 r30 = [r10] | ||
242 | cmp.ne p13,p0 = r2,r0 // need jitter compensation? | ||
243 | extr r21 = r21,16,8 // shift factor | ||
244 | ;; | ||
245 | .time_redo: | ||
246 | .pred.rel.mutex p8,p9,p10 | ||
247 | ld4.acq r28 = [r29] // xtime_lock.sequence. Must come first for locking purposes | ||
248 | (p8) mov r2 = ar.itc // CPU_TIMER. 36 clocks latency!!! | ||
249 | add r22 = IA64_TIME_INTERPOLATOR_LAST_COUNTER_OFFSET,r20 | ||
250 | (p9) ld8 r2 = [r30] // readq(ti->address). Could also have latency issues.. | ||
251 | (p10) ld4 r2 = [r30] // readw(ti->address) | ||
252 | (p13) add r23 = IA64_TIME_INTERPOLATOR_LAST_CYCLE_OFFSET,r20 | ||
253 | ;; // could be removed by moving the last add upward | ||
254 | ld8 r26 = [r22] // time_interpolator->last_counter | ||
255 | (p13) ld8 r25 = [r23] // time interpolator->last_cycle | ||
256 | add r24 = IA64_TIME_INTERPOLATOR_OFFSET_OFFSET,r20 | ||
257 | (p15) ld8 r17 = [r19],IA64_TIMESPEC_TV_NSEC_OFFSET | ||
258 | ld8 r9 = [r27],IA64_TIMESPEC_TV_NSEC_OFFSET | ||
259 | add r14 = IA64_TIME_INTERPOLATOR_MASK_OFFSET, r20 | ||
260 | ;; | ||
261 | ld8 r18 = [r24] // time_interpolator->offset | ||
262 | ld8 r8 = [r27],-IA64_TIMESPEC_TV_NSEC_OFFSET // xtime.tv_nsec | ||
263 | (p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) | ||
264 | ;; | ||
265 | ld8 r14 = [r14] // time_interpolator->mask | ||
266 | (p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared | ||
267 | sub r10 = r2,r26 // current_counter - last_counter | ||
268 | ;; | ||
269 | (p6) sub r10 = r25,r26 // time we got was less than last_cycle | ||
270 | (p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg | ||
271 | ;; | ||
272 | and r10 = r10,r14 // Apply mask | ||
273 | ;; | ||
274 | setf.sig f8 = r10 | ||
275 | nop.i 123 | ||
276 | ;; | ||
277 | (p7) cmpxchg8.rel r3 = [r23],r2,ar.ccv | ||
278 | EX(.fail_efault, probe.w.fault r31, 3) // This takes 5 cycles and we have spare time | ||
279 | xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) | ||
280 | (p15) add r9 = r9,r17 // Add wall to monotonic.secs to result secs | ||
281 | ;; | ||
282 | (p15) ld8 r17 = [r19],-IA64_TIMESPEC_TV_NSEC_OFFSET | ||
283 | (p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful redo | ||
284 | // simulate tbit.nz.or p7,p0 = r28,0 | ||
285 | and r28 = ~1,r28 // Make sequence even to force retry if odd | ||
286 | getf.sig r2 = f8 | ||
287 | mf | ||
288 | add r8 = r8,r18 // Add time interpolator offset | ||
289 | ;; | ||
290 | ld4 r10 = [r29] // xtime_lock.sequence | ||
291 | (p15) add r8 = r8, r17 // Add monotonic.nsecs to nsecs | ||
292 | shr.u r2 = r2,r21 | ||
293 | ;; // overloaded 3 bundles! | ||
294 | // End critical section. | ||
295 | add r8 = r8,r2 // Add xtime.nsecs | ||
296 | cmp4.ne.or p7,p0 = r28,r10 | ||
297 | (p7) br.cond.dpnt.few .time_redo // sequence number changed ? | ||
298 | // Now r8=tv->tv_nsec and r9=tv->tv_sec | ||
299 | mov r10 = r0 | ||
300 | movl r2 = 1000000000 | ||
301 | add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 | ||
302 | (p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack | ||
303 | ;; | ||
304 | .time_normalize: | ||
305 | mov r21 = r8 | ||
306 | cmp.ge p6,p0 = r8,r2 | ||
307 | (p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting some time | ||
308 | ;; | ||
309 | (p14) setf.sig f8 = r20 | ||
310 | (p6) sub r8 = r8,r2 | ||
311 | (p6) add r9 = 1,r9 // two nops before the branch. | ||
312 | (p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod | ||
313 | (p6) br.cond.dpnt.few .time_normalize | ||
314 | ;; | ||
315 | // Divided by 8 though shift. Now divide by 125 | ||
316 | // The compiler was able to do that with a multiply | ||
317 | // and a shift and we do the same | ||
318 | EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles | ||
319 | (p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it... | ||
320 | ;; | ||
321 | mov r8 = r0 | ||
322 | (p14) getf.sig r2 = f8 | ||
323 | ;; | ||
324 | (p14) shr.u r21 = r2, 4 | ||
325 | ;; | ||
326 | EX(.fail_efault, st8 [r31] = r9) | ||
327 | EX(.fail_efault, st8 [r23] = r21) | ||
328 | FSYS_RETURN | ||
329 | .fail_einval: | ||
330 | mov r8 = EINVAL | ||
331 | mov r10 = -1 | ||
332 | FSYS_RETURN | ||
333 | .fail_efault: | ||
334 | mov r8 = EFAULT | ||
335 | mov r10 = -1 | ||
336 | FSYS_RETURN | ||
337 | END(fsys_gettimeofday) | ||
338 | |||
339 | ENTRY(fsys_clock_gettime) | ||
340 | .prologue | ||
341 | .altrp b6 | ||
342 | .body | ||
343 | cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 | ||
344 | // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC | ||
345 | (p6) br.spnt.few fsys_fallback_syscall | ||
346 | mov r31 = r33 | ||
347 | shl r30 = r32,15 | ||
348 | br.many .gettime | ||
349 | END(fsys_clock_gettime) | ||
350 | |||
351 | /* | ||
352 | * long fsys_rt_sigprocmask (int how, sigset_t *set, sigset_t *oset, size_t sigsetsize). | ||
353 | */ | ||
354 | #if _NSIG_WORDS != 1 | ||
355 | # error Sorry, fsys_rt_sigprocmask() needs to be updated for _NSIG_WORDS != 1. | ||
356 | #endif | ||
357 | ENTRY(fsys_rt_sigprocmask) | ||
358 | .prologue | ||
359 | .altrp b6 | ||
360 | .body | ||
361 | |||
362 | add r2=IA64_TASK_BLOCKED_OFFSET,r16 | ||
363 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 | ||
364 | cmp4.ltu p6,p0=SIG_SETMASK,r32 | ||
365 | |||
366 | cmp.ne p15,p0=r0,r34 // oset != NULL? | ||
367 | tnat.nz p8,p0=r34 | ||
368 | add r31=IA64_TASK_SIGHAND_OFFSET,r16 | ||
369 | ;; | ||
370 | ld8 r3=[r2] // read/prefetch current->blocked | ||
371 | ld4 r9=[r9] | ||
372 | tnat.nz.or p6,p0=r35 | ||
373 | |||
374 | cmp.ne.or p6,p0=_NSIG_WORDS*8,r35 | ||
375 | tnat.nz.or p6,p0=r32 | ||
376 | (p6) br.spnt.few .fail_einval // fail with EINVAL | ||
377 | ;; | ||
378 | #ifdef CONFIG_SMP | ||
379 | ld8 r31=[r31] // r31 <- current->sighand | ||
380 | #endif | ||
381 | and r9=TIF_ALLWORK_MASK,r9 | ||
382 | tnat.nz.or p8,p0=r33 | ||
383 | ;; | ||
384 | cmp.ne p7,p0=0,r9 | ||
385 | cmp.eq p6,p0=r0,r33 // set == NULL? | ||
386 | add r31=IA64_SIGHAND_SIGLOCK_OFFSET,r31 // r31 <- current->sighand->siglock | ||
387 | (p8) br.spnt.few .fail_efault // fail with EFAULT | ||
388 | (p7) br.spnt.many fsys_fallback_syscall // got pending kernel work... | ||
389 | (p6) br.dpnt.many .store_mask // -> short-circuit to just reading the signal mask | ||
390 | |||
391 | /* Argh, we actually have to do some work and _update_ the signal mask: */ | ||
392 | |||
393 | EX(.fail_efault, probe.r.fault r33, 3) // verify user has read-access to *set | ||
394 | EX(.fail_efault, ld8 r14=[r33]) // r14 <- *set | ||
395 | mov r17=(1 << (SIGKILL - 1)) | (1 << (SIGSTOP - 1)) | ||
396 | ;; | ||
397 | |||
398 | rsm psr.i // mask interrupt delivery | ||
399 | mov ar.ccv=0 | ||
400 | andcm r14=r14,r17 // filter out SIGKILL & SIGSTOP | ||
401 | |||
402 | #ifdef CONFIG_SMP | ||
403 | mov r17=1 | ||
404 | ;; | ||
405 | cmpxchg4.acq r18=[r31],r17,ar.ccv // try to acquire the lock | ||
406 | mov r8=EINVAL // default to EINVAL | ||
407 | ;; | ||
408 | ld8 r3=[r2] // re-read current->blocked now that we hold the lock | ||
409 | cmp4.ne p6,p0=r18,r0 | ||
410 | (p6) br.cond.spnt.many .lock_contention | ||
411 | ;; | ||
412 | #else | ||
413 | ld8 r3=[r2] // re-read current->blocked now that we hold the lock | ||
414 | mov r8=EINVAL // default to EINVAL | ||
415 | #endif | ||
416 | add r18=IA64_TASK_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r16 | ||
417 | add r19=IA64_TASK_SIGNAL_OFFSET,r16 | ||
418 | cmp4.eq p6,p0=SIG_BLOCK,r32 | ||
419 | ;; | ||
420 | ld8 r19=[r19] // r19 <- current->signal | ||
421 | cmp4.eq p7,p0=SIG_UNBLOCK,r32 | ||
422 | cmp4.eq p8,p0=SIG_SETMASK,r32 | ||
423 | ;; | ||
424 | ld8 r18=[r18] // r18 <- current->pending.signal | ||
425 | .pred.rel.mutex p6,p7,p8 | ||
426 | (p6) or r14=r3,r14 // SIG_BLOCK | ||
427 | (p7) andcm r14=r3,r14 // SIG_UNBLOCK | ||
428 | |||
429 | (p8) mov r14=r14 // SIG_SETMASK | ||
430 | (p6) mov r8=0 // clear error code | ||
431 | // recalc_sigpending() | ||
432 | add r17=IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,r19 | ||
433 | |||
434 | add r19=IA64_SIGNAL_SHARED_PENDING_OFFSET+IA64_SIGPENDING_SIGNAL_OFFSET,r19 | ||
435 | ;; | ||
436 | ld4 r17=[r17] // r17 <- current->signal->group_stop_count | ||
437 | (p7) mov r8=0 // clear error code | ||
438 | |||
439 | ld8 r19=[r19] // r19 <- current->signal->shared_pending | ||
440 | ;; | ||
441 | cmp4.gt p6,p7=r17,r0 // p6/p7 <- (current->signal->group_stop_count > 0)? | ||
442 | (p8) mov r8=0 // clear error code | ||
443 | |||
444 | or r18=r18,r19 // r18 <- current->pending | current->signal->shared_pending | ||
445 | ;; | ||
446 | // r18 <- (current->pending | current->signal->shared_pending) & ~current->blocked: | ||
447 | andcm r18=r18,r14 | ||
448 | add r9=TI_FLAGS+IA64_TASK_SIZE,r16 | ||
449 | ;; | ||
450 | |||
451 | (p7) cmp.ne.or.andcm p6,p7=r18,r0 // p6/p7 <- signal pending | ||
452 | mov r19=0 // i must not leak kernel bits... | ||
453 | (p6) br.cond.dpnt.many .sig_pending | ||
454 | ;; | ||
455 | |||
456 | 1: ld4 r17=[r9] // r17 <- current->thread_info->flags | ||
457 | ;; | ||
458 | mov ar.ccv=r17 | ||
459 | and r18=~_TIF_SIGPENDING,r17 // r18 <- r17 & ~(1 << TIF_SIGPENDING) | ||
460 | ;; | ||
461 | |||
462 | st8 [r2]=r14 // update current->blocked with new mask | ||
463 | cmpxchg4.acq r14=[r9],r18,ar.ccv // current->thread_info->flags <- r18 | ||
464 | ;; | ||
465 | cmp.ne p6,p0=r17,r14 // update failed? | ||
466 | (p6) br.cond.spnt.few 1b // yes -> retry | ||
467 | |||
468 | #ifdef CONFIG_SMP | ||
469 | st4.rel [r31]=r0 // release the lock | ||
470 | #endif | ||
471 | ssm psr.i | ||
472 | ;; | ||
473 | |||
474 | srlz.d // ensure psr.i is set again | ||
475 | mov r18=0 // i must not leak kernel bits... | ||
476 | |||
477 | .store_mask: | ||
478 | EX(.fail_efault, (p15) probe.w.fault r34, 3) // verify user has write-access to *oset | ||
479 | EX(.fail_efault, (p15) st8 [r34]=r3) | ||
480 | mov r2=0 // i must not leak kernel bits... | ||
481 | mov r3=0 // i must not leak kernel bits... | ||
482 | mov r8=0 // return 0 | ||
483 | mov r9=0 // i must not leak kernel bits... | ||
484 | mov r14=0 // i must not leak kernel bits... | ||
485 | mov r17=0 // i must not leak kernel bits... | ||
486 | mov r31=0 // i must not leak kernel bits... | ||
487 | FSYS_RETURN | ||
488 | |||
489 | .sig_pending: | ||
490 | #ifdef CONFIG_SMP | ||
491 | st4.rel [r31]=r0 // release the lock | ||
492 | #endif | ||
493 | ssm psr.i | ||
494 | ;; | ||
495 | srlz.d | ||
496 | br.sptk.many fsys_fallback_syscall // with signal pending, do the heavy-weight syscall | ||
497 | |||
498 | #ifdef CONFIG_SMP | ||
499 | .lock_contention: | ||
500 | /* Rather than spinning here, fall back on doing a heavy-weight syscall. */ | ||
501 | ssm psr.i | ||
502 | ;; | ||
503 | srlz.d | ||
504 | br.sptk.many fsys_fallback_syscall | ||
505 | #endif | ||
506 | END(fsys_rt_sigprocmask) | ||
507 | |||
508 | ENTRY(fsys_fallback_syscall) | ||
509 | .prologue | ||
510 | .altrp b6 | ||
511 | .body | ||
512 | /* | ||
513 | * We only get here from light-weight syscall handlers. Thus, we already | ||
514 | * know that r15 contains a valid syscall number. No need to re-check. | ||
515 | */ | ||
516 | adds r17=-1024,r15 | ||
517 | movl r14=sys_call_table | ||
518 | ;; | ||
519 | rsm psr.i | ||
520 | shladd r18=r17,3,r14 | ||
521 | ;; | ||
522 | ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point | ||
523 | mov r29=psr // read psr (12 cyc load latency) | ||
524 | mov r27=ar.rsc | ||
525 | mov r21=ar.fpsr | ||
526 | mov r26=ar.pfs | ||
527 | END(fsys_fallback_syscall) | ||
528 | /* FALL THROUGH */ | ||
529 | GLOBAL_ENTRY(fsys_bubble_down) | ||
530 | .prologue | ||
531 | .altrp b6 | ||
532 | .body | ||
533 | /* | ||
534 | * We get here for syscalls that don't have a lightweight handler. For those, we | ||
535 | * need to bubble down into the kernel and that requires setting up a minimal | ||
536 | * pt_regs structure, and initializing the CPU state more or less as if an | ||
537 | * interruption had occurred. To make syscall-restarts work, we setup pt_regs | ||
538 | * such that cr_iip points to the second instruction in syscall_via_break. | ||
539 | * Decrementing the IP hence will restart the syscall via break and not | ||
540 | * decrementing IP will return us to the caller, as usual. Note that we preserve | ||
541 | * the value of psr.pp rather than initializing it from dcr.pp. This makes it | ||
542 | * possible to distinguish fsyscall execution from other privileged execution. | ||
543 | * | ||
544 | * On entry: | ||
545 | * - normal fsyscall handler register usage, except that we also have: | ||
546 | * - r18: address of syscall entry point | ||
547 | * - r21: ar.fpsr | ||
548 | * - r26: ar.pfs | ||
549 | * - r27: ar.rsc | ||
550 | * - r29: psr | ||
551 | */ | ||
552 | # define PSR_PRESERVED_BITS (IA64_PSR_UP | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_PK \ | ||
553 | | IA64_PSR_DT | IA64_PSR_PP | IA64_PSR_SP | IA64_PSR_RT \ | ||
554 | | IA64_PSR_IC) | ||
555 | /* | ||
556 | * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. The rest we have | ||
557 | * to synthesize. | ||
558 | */ | ||
559 | # define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) | (0x1 << IA64_PSR_RI_BIT) \ | ||
560 | | IA64_PSR_BN | IA64_PSR_I) | ||
561 | |||
562 | invala | ||
563 | movl r8=PSR_ONE_BITS | ||
564 | |||
565 | mov r25=ar.unat // save ar.unat (5 cyc) | ||
566 | movl r9=PSR_PRESERVED_BITS | ||
567 | |||
568 | mov ar.rsc=0 // set enforced lazy mode, pl 0, little-endian, loadrs=0 | ||
569 | movl r28=__kernel_syscall_via_break | ||
570 | ;; | ||
571 | mov r23=ar.bspstore // save ar.bspstore (12 cyc) | ||
572 | mov r31=pr // save pr (2 cyc) | ||
573 | mov r20=r1 // save caller's gp in r20 | ||
574 | ;; | ||
575 | mov r2=r16 // copy current task addr to addl-addressable register | ||
576 | and r9=r9,r29 | ||
577 | mov r19=b6 // save b6 (2 cyc) | ||
578 | ;; | ||
579 | mov psr.l=r9 // slam the door (17 cyc to srlz.i) | ||
580 | or r29=r8,r29 // construct cr.ipsr value to save | ||
581 | addl r22=IA64_RBS_OFFSET,r2 // compute base of RBS | ||
582 | ;; | ||
583 | // GAS reports a spurious RAW hazard on the read of ar.rnat because it thinks | ||
584 | // we may be reading ar.itc after writing to psr.l. Avoid that message with | ||
585 | // this directive: | ||
586 | dv_serialize_data | ||
587 | mov.m r24=ar.rnat // read ar.rnat (5 cyc lat) | ||
588 | lfetch.fault.excl.nt1 [r22] | ||
589 | adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r2 | ||
590 | |||
591 | // ensure previous insn group is issued before we stall for srlz.i: | ||
592 | ;; | ||
593 | srlz.i // ensure new psr.l has been established | ||
594 | ///////////////////////////////////////////////////////////////////////////// | ||
595 | ////////// from this point on, execution is not interruptible anymore | ||
596 | ///////////////////////////////////////////////////////////////////////////// | ||
597 | addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // compute base of memory stack | ||
598 | cmp.ne pKStk,pUStk=r0,r0 // set pKStk <- 0, pUStk <- 1 | ||
599 | ;; | ||
600 | st1 [r16]=r0 // clear current->thread.on_ustack flag | ||
601 | mov ar.bspstore=r22 // switch to kernel RBS | ||
602 | mov b6=r18 // copy syscall entry-point to b6 (7 cyc) | ||
603 | add r3=TI_FLAGS+IA64_TASK_SIZE,r2 | ||
604 | ;; | ||
605 | ld4 r3=[r3] // r2 = current_thread_info()->flags | ||
606 | mov r18=ar.bsp // save (kernel) ar.bsp (12 cyc) | ||
607 | mov ar.rsc=0x3 // set eager mode, pl 0, little-endian, loadrs=0 | ||
608 | br.call.sptk.many b7=ia64_syscall_setup | ||
609 | ;; | ||
610 | ssm psr.i | ||
611 | movl r2=ia64_ret_from_syscall | ||
612 | ;; | ||
613 | mov rp=r2 // set the real return addr | ||
614 | tbit.z p8,p0=r3,TIF_SYSCALL_TRACE | ||
615 | ;; | ||
616 | (p10) br.cond.spnt.many ia64_ret_from_syscall // p10==true means out registers are more than 8 | ||
617 | (p8) br.call.sptk.many b6=b6 // ignore this return addr | ||
618 | br.cond.sptk ia64_trace_syscall | ||
619 | END(fsys_bubble_down) | ||
620 | |||
621 | .rodata | ||
622 | .align 8 | ||
623 | .globl fsyscall_table | ||
624 | |||
625 | data8 fsys_bubble_down | ||
626 | fsyscall_table: | ||
627 | data8 fsys_ni_syscall | ||
628 | data8 0 // exit // 1025 | ||
629 | data8 0 // read | ||
630 | data8 0 // write | ||
631 | data8 0 // open | ||
632 | data8 0 // close | ||
633 | data8 0 // creat // 1030 | ||
634 | data8 0 // link | ||
635 | data8 0 // unlink | ||
636 | data8 0 // execve | ||
637 | data8 0 // chdir | ||
638 | data8 0 // fchdir // 1035 | ||
639 | data8 0 // utimes | ||
640 | data8 0 // mknod | ||
641 | data8 0 // chmod | ||
642 | data8 0 // chown | ||
643 | data8 0 // lseek // 1040 | ||
644 | data8 fsys_getpid // getpid | ||
645 | data8 fsys_getppid // getppid | ||
646 | data8 0 // mount | ||
647 | data8 0 // umount | ||
648 | data8 0 // setuid // 1045 | ||
649 | data8 0 // getuid | ||
650 | data8 0 // geteuid | ||
651 | data8 0 // ptrace | ||
652 | data8 0 // access | ||
653 | data8 0 // sync // 1050 | ||
654 | data8 0 // fsync | ||
655 | data8 0 // fdatasync | ||
656 | data8 0 // kill | ||
657 | data8 0 // rename | ||
658 | data8 0 // mkdir // 1055 | ||
659 | data8 0 // rmdir | ||
660 | data8 0 // dup | ||
661 | data8 0 // pipe | ||
662 | data8 0 // times | ||
663 | data8 0 // brk // 1060 | ||
664 | data8 0 // setgid | ||
665 | data8 0 // getgid | ||
666 | data8 0 // getegid | ||
667 | data8 0 // acct | ||
668 | data8 0 // ioctl // 1065 | ||
669 | data8 0 // fcntl | ||
670 | data8 0 // umask | ||
671 | data8 0 // chroot | ||
672 | data8 0 // ustat | ||
673 | data8 0 // dup2 // 1070 | ||
674 | data8 0 // setreuid | ||
675 | data8 0 // setregid | ||
676 | data8 0 // getresuid | ||
677 | data8 0 // setresuid | ||
678 | data8 0 // getresgid // 1075 | ||
679 | data8 0 // setresgid | ||
680 | data8 0 // getgroups | ||
681 | data8 0 // setgroups | ||
682 | data8 0 // getpgid | ||
683 | data8 0 // setpgid // 1080 | ||
684 | data8 0 // setsid | ||
685 | data8 0 // getsid | ||
686 | data8 0 // sethostname | ||
687 | data8 0 // setrlimit | ||
688 | data8 0 // getrlimit // 1085 | ||
689 | data8 0 // getrusage | ||
690 | data8 fsys_gettimeofday // gettimeofday | ||
691 | data8 0 // settimeofday | ||
692 | data8 0 // select | ||
693 | data8 0 // poll // 1090 | ||
694 | data8 0 // symlink | ||
695 | data8 0 // readlink | ||
696 | data8 0 // uselib | ||
697 | data8 0 // swapon | ||
698 | data8 0 // swapoff // 1095 | ||
699 | data8 0 // reboot | ||
700 | data8 0 // truncate | ||
701 | data8 0 // ftruncate | ||
702 | data8 0 // fchmod | ||
703 | data8 0 // fchown // 1100 | ||
704 | data8 0 // getpriority | ||
705 | data8 0 // setpriority | ||
706 | data8 0 // statfs | ||
707 | data8 0 // fstatfs | ||
708 | data8 0 // gettid // 1105 | ||
709 | data8 0 // semget | ||
710 | data8 0 // semop | ||
711 | data8 0 // semctl | ||
712 | data8 0 // msgget | ||
713 | data8 0 // msgsnd // 1110 | ||
714 | data8 0 // msgrcv | ||
715 | data8 0 // msgctl | ||
716 | data8 0 // shmget | ||
717 | data8 0 // shmat | ||
718 | data8 0 // shmdt // 1115 | ||
719 | data8 0 // shmctl | ||
720 | data8 0 // syslog | ||
721 | data8 0 // setitimer | ||
722 | data8 0 // getitimer | ||
723 | data8 0 // 1120 | ||
724 | data8 0 | ||
725 | data8 0 | ||
726 | data8 0 // vhangup | ||
727 | data8 0 // lchown | ||
728 | data8 0 // remap_file_pages // 1125 | ||
729 | data8 0 // wait4 | ||
730 | data8 0 // sysinfo | ||
731 | data8 0 // clone | ||
732 | data8 0 // setdomainname | ||
733 | data8 0 // newuname // 1130 | ||
734 | data8 0 // adjtimex | ||
735 | data8 0 | ||
736 | data8 0 // init_module | ||
737 | data8 0 // delete_module | ||
738 | data8 0 // 1135 | ||
739 | data8 0 | ||
740 | data8 0 // quotactl | ||
741 | data8 0 // bdflush | ||
742 | data8 0 // sysfs | ||
743 | data8 0 // personality // 1140 | ||
744 | data8 0 // afs_syscall | ||
745 | data8 0 // setfsuid | ||
746 | data8 0 // setfsgid | ||
747 | data8 0 // getdents | ||
748 | data8 0 // flock // 1145 | ||
749 | data8 0 // readv | ||
750 | data8 0 // writev | ||
751 | data8 0 // pread64 | ||
752 | data8 0 // pwrite64 | ||
753 | data8 0 // sysctl // 1150 | ||
754 | data8 0 // mmap | ||
755 | data8 0 // munmap | ||
756 | data8 0 // mlock | ||
757 | data8 0 // mlockall | ||
758 | data8 0 // mprotect // 1155 | ||
759 | data8 0 // mremap | ||
760 | data8 0 // msync | ||
761 | data8 0 // munlock | ||
762 | data8 0 // munlockall | ||
763 | data8 0 // sched_getparam // 1160 | ||
764 | data8 0 // sched_setparam | ||
765 | data8 0 // sched_getscheduler | ||
766 | data8 0 // sched_setscheduler | ||
767 | data8 0 // sched_yield | ||
768 | data8 0 // sched_get_priority_max // 1165 | ||
769 | data8 0 // sched_get_priority_min | ||
770 | data8 0 // sched_rr_get_interval | ||
771 | data8 0 // nanosleep | ||
772 | data8 0 // nfsservctl | ||
773 | data8 0 // prctl // 1170 | ||
774 | data8 0 // getpagesize | ||
775 | data8 0 // mmap2 | ||
776 | data8 0 // pciconfig_read | ||
777 | data8 0 // pciconfig_write | ||
778 | data8 0 // perfmonctl // 1175 | ||
779 | data8 0 // sigaltstack | ||
780 | data8 0 // rt_sigaction | ||
781 | data8 0 // rt_sigpending | ||
782 | data8 fsys_rt_sigprocmask // rt_sigprocmask | ||
783 | data8 0 // rt_sigqueueinfo // 1180 | ||
784 | data8 0 // rt_sigreturn | ||
785 | data8 0 // rt_sigsuspend | ||
786 | data8 0 // rt_sigtimedwait | ||
787 | data8 0 // getcwd | ||
788 | data8 0 // capget // 1185 | ||
789 | data8 0 // capset | ||
790 | data8 0 // sendfile | ||
791 | data8 0 | ||
792 | data8 0 | ||
793 | data8 0 // socket // 1190 | ||
794 | data8 0 // bind | ||
795 | data8 0 // connect | ||
796 | data8 0 // listen | ||
797 | data8 0 // accept | ||
798 | data8 0 // getsockname // 1195 | ||
799 | data8 0 // getpeername | ||
800 | data8 0 // socketpair | ||
801 | data8 0 // send | ||
802 | data8 0 // sendto | ||
803 | data8 0 // recv // 1200 | ||
804 | data8 0 // recvfrom | ||
805 | data8 0 // shutdown | ||
806 | data8 0 // setsockopt | ||
807 | data8 0 // getsockopt | ||
808 | data8 0 // sendmsg // 1205 | ||
809 | data8 0 // recvmsg | ||
810 | data8 0 // pivot_root | ||
811 | data8 0 // mincore | ||
812 | data8 0 // madvise | ||
813 | data8 0 // newstat // 1210 | ||
814 | data8 0 // newlstat | ||
815 | data8 0 // newfstat | ||
816 | data8 0 // clone2 | ||
817 | data8 0 // getdents64 | ||
818 | data8 0 // getunwind // 1215 | ||
819 | data8 0 // readahead | ||
820 | data8 0 // setxattr | ||
821 | data8 0 // lsetxattr | ||
822 | data8 0 // fsetxattr | ||
823 | data8 0 // getxattr // 1220 | ||
824 | data8 0 // lgetxattr | ||
825 | data8 0 // fgetxattr | ||
826 | data8 0 // listxattr | ||
827 | data8 0 // llistxattr | ||
828 | data8 0 // flistxattr // 1225 | ||
829 | data8 0 // removexattr | ||
830 | data8 0 // lremovexattr | ||
831 | data8 0 // fremovexattr | ||
832 | data8 0 // tkill | ||
833 | data8 0 // futex // 1230 | ||
834 | data8 0 // sched_setaffinity | ||
835 | data8 0 // sched_getaffinity | ||
836 | data8 fsys_set_tid_address // set_tid_address | ||
837 | data8 0 // fadvise64_64 | ||
838 | data8 0 // tgkill // 1235 | ||
839 | data8 0 // exit_group | ||
840 | data8 0 // lookup_dcookie | ||
841 | data8 0 // io_setup | ||
842 | data8 0 // io_destroy | ||
843 | data8 0 // io_getevents // 1240 | ||
844 | data8 0 // io_submit | ||
845 | data8 0 // io_cancel | ||
846 | data8 0 // epoll_create | ||
847 | data8 0 // epoll_ctl | ||
848 | data8 0 // epoll_wait // 1245 | ||
849 | data8 0 // restart_syscall | ||
850 | data8 0 // semtimedop | ||
851 | data8 0 // timer_create | ||
852 | data8 0 // timer_settime | ||
853 | data8 0 // timer_gettime // 1250 | ||
854 | data8 0 // timer_getoverrun | ||
855 | data8 0 // timer_delete | ||
856 | data8 0 // clock_settime | ||
857 | data8 fsys_clock_gettime // clock_gettime | ||
858 | data8 0 // clock_getres // 1255 | ||
859 | data8 0 // clock_nanosleep | ||
860 | data8 0 // fstatfs64 | ||
861 | data8 0 // statfs64 | ||
862 | data8 0 | ||
863 | data8 0 // 1260 | ||
864 | data8 0 | ||
865 | data8 0 // mq_open | ||
866 | data8 0 // mq_unlink | ||
867 | data8 0 // mq_timedsend | ||
868 | data8 0 // mq_timedreceive // 1265 | ||
869 | data8 0 // mq_notify | ||
870 | data8 0 // mq_getsetattr | ||
871 | data8 0 // kexec_load | ||
872 | data8 0 | ||
873 | data8 0 // 1270 | ||
874 | data8 0 | ||
875 | data8 0 | ||
876 | data8 0 | ||
877 | data8 0 | ||
878 | data8 0 // 1275 | ||
879 | data8 0 | ||
880 | data8 0 | ||
881 | data8 0 | ||
882 | data8 0 | ||
883 | |||
884 | .org fsyscall_table + 8*NR_syscalls // guard against failures to increase NR_syscalls | ||
diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S new file mode 100644 index 000000000000..258c0a3238fb --- /dev/null +++ b/arch/ia64/kernel/gate-data.S | |||
@@ -0,0 +1,3 @@ | |||
1 | .section .data.gate, "aw" | ||
2 | |||
3 | .incbin "arch/ia64/kernel/gate.so" | ||
diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S new file mode 100644 index 000000000000..facf75acdc85 --- /dev/null +++ b/arch/ia64/kernel/gate.S | |||
@@ -0,0 +1,372 @@ | |||
1 | /* | ||
2 | * This file contains the code that gets mapped at the upper end of each task's text | ||
3 | * region. For now, it contains the signal trampoline code only. | ||
4 | * | ||
5 | * Copyright (C) 1999-2003 Hewlett-Packard Co | ||
6 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/config.h> | ||
10 | |||
11 | #include <asm/asmmacro.h> | ||
12 | #include <asm/errno.h> | ||
13 | #include <asm/offsets.h> | ||
14 | #include <asm/sigcontext.h> | ||
15 | #include <asm/system.h> | ||
16 | #include <asm/unistd.h> | ||
17 | |||
18 | /* | ||
19 | * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, | ||
20 | * complications with the linker (which likes to create PLT stubs for branches | ||
21 | * to targets outside the shared object) and to avoid multi-phase kernel builds, we | ||
22 | * simply create minimalistic "patch lists" in special ELF sections. | ||
23 | */ | ||
24 | .section ".data.patch.fsyscall_table", "a" | ||
25 | .previous | ||
26 | #define LOAD_FSYSCALL_TABLE(reg) \ | ||
27 | [1:] movl reg=0; \ | ||
28 | .xdata4 ".data.patch.fsyscall_table", 1b-. | ||
29 | |||
30 | .section ".data.patch.brl_fsys_bubble_down", "a" | ||
31 | .previous | ||
32 | #define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ | ||
33 | [1:](pr)brl.cond.sptk 0; \ | ||
34 | .xdata4 ".data.patch.brl_fsys_bubble_down", 1b-. | ||
35 | |||
36 | GLOBAL_ENTRY(__kernel_syscall_via_break) | ||
37 | .prologue | ||
38 | .altrp b6 | ||
39 | .body | ||
40 | /* | ||
41 | * Note: for (fast) syscall restart to work, the break instruction must be | ||
42 | * the first one in the bundle addressed by syscall_via_break. | ||
43 | */ | ||
44 | { .mib | ||
45 | break 0x100000 | ||
46 | nop.i 0 | ||
47 | br.ret.sptk.many b6 | ||
48 | } | ||
49 | END(__kernel_syscall_via_break) | ||
50 | |||
51 | /* | ||
52 | * On entry: | ||
53 | * r11 = saved ar.pfs | ||
54 | * r15 = system call # | ||
55 | * b0 = saved return address | ||
56 | * b6 = return address | ||
57 | * On exit: | ||
58 | * r11 = saved ar.pfs | ||
59 | * r15 = system call # | ||
60 | * b0 = saved return address | ||
61 | * all other "scratch" registers: undefined | ||
62 | * all "preserved" registers: same as on entry | ||
63 | */ | ||
64 | |||
65 | GLOBAL_ENTRY(__kernel_syscall_via_epc) | ||
66 | .prologue | ||
67 | .altrp b6 | ||
68 | .body | ||
69 | { | ||
70 | /* | ||
71 | * Note: the kernel cannot assume that the first two instructions in this | ||
72 | * bundle get executed. The remaining code must be safe even if | ||
73 | * they do not get executed. | ||
74 | */ | ||
75 | adds r17=-1024,r15 | ||
76 | mov r10=0 // default to successful syscall execution | ||
77 | epc | ||
78 | } | ||
79 | ;; | ||
80 | rsm psr.be // note: on McKinley "rsm psr.be/srlz.d" is slightly faster than "rum psr.be" | ||
81 | LOAD_FSYSCALL_TABLE(r14) | ||
82 | |||
83 | mov r16=IA64_KR(CURRENT) // 12 cycle read latency | ||
84 | tnat.nz p10,p9=r15 | ||
85 | mov r19=NR_syscalls-1 | ||
86 | ;; | ||
87 | shladd r18=r17,3,r14 | ||
88 | |||
89 | srlz.d | ||
90 | cmp.ne p8,p0=r0,r0 // p8 <- FALSE | ||
91 | /* Note: if r17 is a NaT, p6 will be set to zero. */ | ||
92 | cmp.geu p6,p7=r19,r17 // (syscall > 0 && syscall < 1024+NR_syscalls)? | ||
93 | ;; | ||
94 | (p6) ld8 r18=[r18] | ||
95 | mov r21=ar.fpsr | ||
96 | add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry | ||
97 | ;; | ||
98 | (p6) mov b7=r18 | ||
99 | (p6) tbit.z p8,p0=r18,0 | ||
100 | (p8) br.dptk.many b7 | ||
101 | |||
102 | (p6) rsm psr.i | ||
103 | mov r27=ar.rsc | ||
104 | mov r26=ar.pfs | ||
105 | ;; | ||
106 | mov r29=psr // read psr (12 cyc load latency) | ||
107 | /* | ||
108 | * brl.cond doesn't work as intended because the linker would convert this branch | ||
109 | * into a branch to a PLT. Perhaps there will be a way to avoid this with some | ||
110 | * future version of the linker. In the meantime, we just use an indirect branch | ||
111 | * instead. | ||
112 | */ | ||
113 | #ifdef CONFIG_ITANIUM | ||
114 | (p6) ld8 r14=[r14] // r14 <- fsys_bubble_down | ||
115 | ;; | ||
116 | (p6) mov b7=r14 | ||
117 | (p6) br.sptk.many b7 | ||
118 | #else | ||
119 | BRL_COND_FSYS_BUBBLE_DOWN(p6) | ||
120 | #endif | ||
121 | |||
122 | mov r10=-1 | ||
123 | (p10) mov r8=EINVAL | ||
124 | (p9) mov r8=ENOSYS | ||
125 | FSYS_RETURN | ||
126 | END(__kernel_syscall_via_epc) | ||
127 | |||
128 | # define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) | ||
129 | # define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) | ||
130 | # define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) | ||
131 | # define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET) | ||
132 | # define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET) | ||
133 | |||
134 | # define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET | ||
135 | # define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET | ||
136 | # define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET | ||
137 | # define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET | ||
138 | # define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET | ||
139 | # define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET | ||
140 | # define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET | ||
141 | # define PR_OFF IA64_SIGCONTEXT_PR_OFFSET | ||
142 | # define RP_OFF IA64_SIGCONTEXT_IP_OFFSET | ||
143 | # define SP_OFF IA64_SIGCONTEXT_R12_OFFSET | ||
144 | # define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET | ||
145 | # define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET | ||
146 | # define base0 r2 | ||
147 | # define base1 r3 | ||
148 | /* | ||
149 | * When we get here, the memory stack looks like this: | ||
150 | * | ||
151 | * +===============================+ | ||
152 | * | | | ||
153 | * // struct sigframe // | ||
154 | * | | | ||
155 | * +-------------------------------+ <-- sp+16 | ||
156 | * | 16 byte of scratch | | ||
157 | * | space | | ||
158 | * +-------------------------------+ <-- sp | ||
159 | * | ||
160 | * The register stack looks _exactly_ the way it looked at the time the signal | ||
161 | * occurred. In other words, we're treading on a potential mine-field: each | ||
162 | * incoming general register may be a NaT value (including sp, in which case the | ||
163 | * process ends up dying with a SIGSEGV). | ||
164 | * | ||
165 | * The first thing need to do is a cover to get the registers onto the backing | ||
166 | * store. Once that is done, we invoke the signal handler which may modify some | ||
167 | * of the machine state. After returning from the signal handler, we return | ||
168 | * control to the previous context by executing a sigreturn system call. A signal | ||
169 | * handler may call the rt_sigreturn() function to directly return to a given | ||
170 | * sigcontext. However, the user-level sigreturn() needs to do much more than | ||
171 | * calling the rt_sigreturn() system call as it needs to unwind the stack to | ||
172 | * restore preserved registers that may have been saved on the signal handler's | ||
173 | * call stack. | ||
174 | */ | ||
175 | |||
176 | #define SIGTRAMP_SAVES \ | ||
177 | .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \ | ||
178 | .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \ | ||
179 | .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \ | ||
180 | .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \ | ||
181 | .savesp pr, PR_OFF+SIGCONTEXT_OFF; \ | ||
182 | .savesp rp, RP_OFF+SIGCONTEXT_OFF; \ | ||
183 | .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \ | ||
184 | .vframesp SP_OFF+SIGCONTEXT_OFF | ||
185 | |||
186 | GLOBAL_ENTRY(__kernel_sigtramp) | ||
187 | // describe the state that is active when we get here: | ||
188 | .prologue | ||
189 | SIGTRAMP_SAVES | ||
190 | .body | ||
191 | |||
192 | .label_state 1 | ||
193 | |||
194 | adds base0=SIGHANDLER_OFF,sp | ||
195 | adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp | ||
196 | br.call.sptk.many rp=1f | ||
197 | 1: | ||
198 | ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel | ||
199 | ld8 r15=[base1] // get address of new RBS base (or NULL) | ||
200 | cover // push args in interrupted frame onto backing store | ||
201 | ;; | ||
202 | cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel) | ||
203 | mov.m r9=ar.bsp // fetch ar.bsp | ||
204 | .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF | ||
205 | (p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20) | ||
206 | back_from_setup_rbs: | ||
207 | alloc r8=ar.pfs,0,0,3,0 | ||
208 | ld8 out0=[base0],16 // load arg0 (signum) | ||
209 | adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1 | ||
210 | ;; | ||
211 | ld8 out1=[base1] // load arg1 (siginfop) | ||
212 | ld8 r10=[r17],8 // get signal handler entry point | ||
213 | ;; | ||
214 | ld8 out2=[base0] // load arg2 (sigcontextp) | ||
215 | ld8 gp=[r17] // get signal handler's global pointer | ||
216 | adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp | ||
217 | ;; | ||
218 | .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF | ||
219 | st8 [base0]=r9 // save sc_ar_bsp | ||
220 | adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp | ||
221 | adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp | ||
222 | ;; | ||
223 | stf.spill [base0]=f6,32 | ||
224 | stf.spill [base1]=f7,32 | ||
225 | ;; | ||
226 | stf.spill [base0]=f8,32 | ||
227 | stf.spill [base1]=f9,32 | ||
228 | mov b6=r10 | ||
229 | ;; | ||
230 | stf.spill [base0]=f10,32 | ||
231 | stf.spill [base1]=f11,32 | ||
232 | ;; | ||
233 | stf.spill [base0]=f12,32 | ||
234 | stf.spill [base1]=f13,32 | ||
235 | ;; | ||
236 | stf.spill [base0]=f14,32 | ||
237 | stf.spill [base1]=f15,32 | ||
238 | br.call.sptk.many rp=b6 // call the signal handler | ||
239 | .ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp | ||
240 | ;; | ||
241 | ld8 r15=[base0] // fetch sc_ar_bsp | ||
242 | mov r14=ar.bsp | ||
243 | ;; | ||
244 | cmp.ne p1,p0=r14,r15 // do we need to restore the rbs? | ||
245 | (p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7) | ||
246 | ;; | ||
247 | back_from_restore_rbs: | ||
248 | adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp | ||
249 | adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp | ||
250 | ;; | ||
251 | ldf.fill f6=[base0],32 | ||
252 | ldf.fill f7=[base1],32 | ||
253 | ;; | ||
254 | ldf.fill f8=[base0],32 | ||
255 | ldf.fill f9=[base1],32 | ||
256 | ;; | ||
257 | ldf.fill f10=[base0],32 | ||
258 | ldf.fill f11=[base1],32 | ||
259 | ;; | ||
260 | ldf.fill f12=[base0],32 | ||
261 | ldf.fill f13=[base1],32 | ||
262 | ;; | ||
263 | ldf.fill f14=[base0],32 | ||
264 | ldf.fill f15=[base1],32 | ||
265 | mov r15=__NR_rt_sigreturn | ||
266 | .restore sp // pop .prologue | ||
267 | break __BREAK_SYSCALL | ||
268 | |||
269 | .prologue | ||
270 | SIGTRAMP_SAVES | ||
271 | setup_rbs: | ||
272 | mov ar.rsc=0 // put RSE into enforced lazy mode | ||
273 | ;; | ||
274 | .save ar.rnat, r19 | ||
275 | mov r19=ar.rnat // save RNaT before switching backing store area | ||
276 | adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp | ||
277 | |||
278 | mov r18=ar.bspstore | ||
279 | mov ar.bspstore=r15 // switch over to new register backing store area | ||
280 | ;; | ||
281 | |||
282 | .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF | ||
283 | st8 [r14]=r19 // save sc_ar_rnat | ||
284 | .body | ||
285 | mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16 | ||
286 | adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp | ||
287 | ;; | ||
288 | invala | ||
289 | sub r15=r16,r15 | ||
290 | extr.u r20=r18,3,6 | ||
291 | ;; | ||
292 | mov ar.rsc=0xf // set RSE into eager mode, pl 3 | ||
293 | cmp.eq p8,p0=63,r20 | ||
294 | shl r15=r15,16 | ||
295 | ;; | ||
296 | st8 [r14]=r15 // save sc_loadrs | ||
297 | (p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now | ||
298 | .restore sp // pop .prologue | ||
299 | br.cond.sptk back_from_setup_rbs | ||
300 | |||
301 | .prologue | ||
302 | SIGTRAMP_SAVES | ||
303 | .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF | ||
304 | .body | ||
305 | restore_rbs: | ||
306 | // On input: | ||
307 | // r14 = bsp1 (bsp at the time of return from signal handler) | ||
308 | // r15 = bsp0 (bsp at the time the signal occurred) | ||
309 | // | ||
310 | // Here, we need to calculate bspstore0, the value that ar.bspstore needs | ||
311 | // to be set to, based on bsp0 and the size of the dirty partition on | ||
312 | // the alternate stack (sc_loadrs >> 16). This can be done with the | ||
313 | // following algorithm: | ||
314 | // | ||
315 | // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1)); | ||
316 | // | ||
317 | // This is what the code below does. | ||
318 | // | ||
319 | alloc r2=ar.pfs,0,0,0,0 // alloc null frame | ||
320 | adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp | ||
321 | adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp | ||
322 | ;; | ||
323 | ld8 r17=[r16] | ||
324 | ld8 r16=[r18] // get new rnat | ||
325 | extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0) | ||
326 | ;; | ||
327 | mov ar.rsc=r17 // put RSE into enforced lazy mode | ||
328 | shr.u r17=r17,16 | ||
329 | ;; | ||
330 | sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16) | ||
331 | shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19) | ||
332 | ;; | ||
333 | loadrs // restore dirty partition | ||
334 | extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1) | ||
335 | ;; | ||
336 | add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19) | ||
337 | ;; | ||
338 | shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40 | ||
339 | ;; | ||
340 | sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1) | ||
341 | movl r17=0x8208208208208209 | ||
342 | ;; | ||
343 | add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1) | ||
344 | setf.sig f7=r17 | ||
345 | cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)? | ||
346 | ;; | ||
347 | (p7) adds r18=-62,r18 // delta -= 62 | ||
348 | ;; | ||
349 | setf.sig f6=r18 | ||
350 | ;; | ||
351 | xmpy.h f6=f6,f7 | ||
352 | ;; | ||
353 | getf.sig r17=f6 | ||
354 | ;; | ||
355 | add r17=r17,r18 | ||
356 | shr r18=r18,63 | ||
357 | ;; | ||
358 | shr r17=r17,5 | ||
359 | ;; | ||
360 | sub r17=r17,r18 // r17 = delta/63 | ||
361 | ;; | ||
362 | add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1) | ||
363 | ;; | ||
364 | shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1)) | ||
365 | ;; | ||
366 | mov ar.bspstore=r15 // switch back to old register backing store area | ||
367 | ;; | ||
368 | mov ar.rnat=r16 // restore RNaT | ||
369 | mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) | ||
370 | // invala not necessary as that will happen when returning to user-mode | ||
371 | br.cond.sptk back_from_restore_rbs | ||
372 | END(__kernel_sigtramp) | ||
diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S new file mode 100644 index 000000000000..e1e4aba9ecd0 --- /dev/null +++ b/arch/ia64/kernel/gate.lds.S | |||
@@ -0,0 +1,95 @@ | |||
1 | /* | ||
2 | * Linker script for gate DSO. The gate pages are an ELF shared object prelinked to its | ||
3 | * virtual address, with only one read-only segment and one execute-only segment (both fit | ||
4 | * in one page). This script controls its layout. | ||
5 | */ | ||
6 | |||
7 | #include <linux/config.h> | ||
8 | |||
9 | #include <asm/system.h> | ||
10 | |||
11 | SECTIONS | ||
12 | { | ||
13 | . = GATE_ADDR + SIZEOF_HEADERS; | ||
14 | |||
15 | .hash : { *(.hash) } :readable | ||
16 | .dynsym : { *(.dynsym) } | ||
17 | .dynstr : { *(.dynstr) } | ||
18 | .gnu.version : { *(.gnu.version) } | ||
19 | .gnu.version_d : { *(.gnu.version_d) } | ||
20 | .gnu.version_r : { *(.gnu.version_r) } | ||
21 | .dynamic : { *(.dynamic) } :readable :dynamic | ||
22 | |||
23 | /* | ||
24 | * This linker script is used both with -r and with -shared. For the layouts to match, | ||
25 | * we need to skip more than enough space for the dynamic symbol table et al. If this | ||
26 | * amount is insufficient, ld -shared will barf. Just increase it here. | ||
27 | */ | ||
28 | . = GATE_ADDR + 0x500; | ||
29 | |||
30 | .data.patch : { | ||
31 | __start_gate_mckinley_e9_patchlist = .; | ||
32 | *(.data.patch.mckinley_e9) | ||
33 | __end_gate_mckinley_e9_patchlist = .; | ||
34 | |||
35 | __start_gate_vtop_patchlist = .; | ||
36 | *(.data.patch.vtop) | ||
37 | __end_gate_vtop_patchlist = .; | ||
38 | |||
39 | __start_gate_fsyscall_patchlist = .; | ||
40 | *(.data.patch.fsyscall_table) | ||
41 | __end_gate_fsyscall_patchlist = .; | ||
42 | |||
43 | __start_gate_brl_fsys_bubble_down_patchlist = .; | ||
44 | *(.data.patch.brl_fsys_bubble_down) | ||
45 | __end_gate_brl_fsys_bubble_down_patchlist = .; | ||
46 | } :readable | ||
47 | .IA_64.unwind_info : { *(.IA_64.unwind_info*) } | ||
48 | .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind | ||
49 | #ifdef HAVE_BUGGY_SEGREL | ||
50 | .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable | ||
51 | #else | ||
52 | . = ALIGN (PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1)); | ||
53 | .text : { *(.text) *(.text.*) } :epc | ||
54 | #endif | ||
55 | |||
56 | /DISCARD/ : { | ||
57 | *(.got.plt) *(.got) | ||
58 | *(.data .data.* .gnu.linkonce.d.*) | ||
59 | *(.dynbss) | ||
60 | *(.bss .bss.* .gnu.linkonce.b.*) | ||
61 | *(__ex_table) | ||
62 | } | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * We must supply the ELF program headers explicitly to get just one | ||
67 | * PT_LOAD segment, and set the flags explicitly to make segments read-only. | ||
68 | */ | ||
69 | PHDRS | ||
70 | { | ||
71 | readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */ | ||
72 | #ifndef HAVE_BUGGY_SEGREL | ||
73 | epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */ | ||
74 | #endif | ||
75 | dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ | ||
76 | unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ | ||
77 | } | ||
78 | |||
79 | /* | ||
80 | * This controls what symbols we export from the DSO. | ||
81 | */ | ||
82 | VERSION | ||
83 | { | ||
84 | LINUX_2.5 { | ||
85 | global: | ||
86 | __kernel_syscall_via_break; | ||
87 | __kernel_syscall_via_epc; | ||
88 | __kernel_sigtramp; | ||
89 | |||
90 | local: *; | ||
91 | }; | ||
92 | } | ||
93 | |||
94 | /* The ELF entry point can be used to set the AT_SYSINFO value. */ | ||
95 | ENTRY(__kernel_syscall_via_epc) | ||
diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S new file mode 100644 index 000000000000..105c7fec8c6d --- /dev/null +++ b/arch/ia64/kernel/head.S | |||
@@ -0,0 +1,996 @@ | |||
1 | /* | ||
2 | * Here is where the ball gets rolling as far as the kernel is concerned. | ||
3 | * When control is transferred to _start, the bootload has already | ||
4 | * loaded us to the correct address. All that's left to do here is | ||
5 | * to set up the kernel's global pointer and jump to the kernel | ||
6 | * entry point. | ||
7 | * | ||
8 | * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co | ||
9 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
10 | * Stephane Eranian <eranian@hpl.hp.com> | ||
11 | * Copyright (C) 1999 VA Linux Systems | ||
12 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
13 | * Copyright (C) 1999 Intel Corp. | ||
14 | * Copyright (C) 1999 Asit Mallick <Asit.K.Mallick@intel.com> | ||
15 | * Copyright (C) 1999 Don Dugger <Don.Dugger@intel.com> | ||
16 | * Copyright (C) 2002 Fenghua Yu <fenghua.yu@intel.com> | ||
17 | * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. | ||
18 | */ | ||
19 | |||
20 | #include <linux/config.h> | ||
21 | |||
22 | #include <asm/asmmacro.h> | ||
23 | #include <asm/fpu.h> | ||
24 | #include <asm/kregs.h> | ||
25 | #include <asm/mmu_context.h> | ||
26 | #include <asm/offsets.h> | ||
27 | #include <asm/pal.h> | ||
28 | #include <asm/pgtable.h> | ||
29 | #include <asm/processor.h> | ||
30 | #include <asm/ptrace.h> | ||
31 | #include <asm/system.h> | ||
32 | |||
33 | .section __special_page_section,"ax" | ||
34 | |||
35 | .global empty_zero_page | ||
36 | empty_zero_page: | ||
37 | .skip PAGE_SIZE | ||
38 | |||
39 | .global swapper_pg_dir | ||
40 | swapper_pg_dir: | ||
41 | .skip PAGE_SIZE | ||
42 | |||
43 | .rodata | ||
44 | halt_msg: | ||
45 | stringz "Halting kernel\n" | ||
46 | |||
47 | .text | ||
48 | |||
49 | .global start_ap | ||
50 | |||
51 | /* | ||
52 | * Start the kernel. When the bootloader passes control to _start(), r28 | ||
53 | * points to the address of the boot parameter area. Execution reaches | ||
54 | * here in physical mode. | ||
55 | */ | ||
56 | GLOBAL_ENTRY(_start) | ||
57 | start_ap: | ||
58 | .prologue | ||
59 | .save rp, r0 // terminate unwind chain with a NULL rp | ||
60 | .body | ||
61 | |||
62 | rsm psr.i | psr.ic | ||
63 | ;; | ||
64 | srlz.i | ||
65 | ;; | ||
66 | /* | ||
67 | * Initialize kernel region registers: | ||
68 | * rr[0]: VHPT enabled, page size = PAGE_SHIFT | ||
69 | * rr[1]: VHPT enabled, page size = PAGE_SHIFT | ||
70 | * rr[2]: VHPT enabled, page size = PAGE_SHIFT | ||
71 | * rr[3]: VHPT enabled, page size = PAGE_SHIFT | ||
72 | * rr[4]: VHPT enabled, page size = PAGE_SHIFT | ||
73 | * rr[5]: VHPT enabled, page size = PAGE_SHIFT | ||
74 | * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT | ||
75 | * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT | ||
76 | * We initialize all of them to prevent inadvertently assuming | ||
77 | * something about the state of address translation early in boot. | ||
78 | */ | ||
79 | mov r6=((ia64_rid(IA64_REGION_ID_KERNEL, (0<<61)) << 8) | (PAGE_SHIFT << 2) | 1) | ||
80 | movl r7=(0<<61) | ||
81 | mov r8=((ia64_rid(IA64_REGION_ID_KERNEL, (1<<61)) << 8) | (PAGE_SHIFT << 2) | 1) | ||
82 | movl r9=(1<<61) | ||
83 | mov r10=((ia64_rid(IA64_REGION_ID_KERNEL, (2<<61)) << 8) | (PAGE_SHIFT << 2) | 1) | ||
84 | movl r11=(2<<61) | ||
85 | mov r12=((ia64_rid(IA64_REGION_ID_KERNEL, (3<<61)) << 8) | (PAGE_SHIFT << 2) | 1) | ||
86 | movl r13=(3<<61) | ||
87 | mov r14=((ia64_rid(IA64_REGION_ID_KERNEL, (4<<61)) << 8) | (PAGE_SHIFT << 2) | 1) | ||
88 | movl r15=(4<<61) | ||
89 | mov r16=((ia64_rid(IA64_REGION_ID_KERNEL, (5<<61)) << 8) | (PAGE_SHIFT << 2) | 1) | ||
90 | movl r17=(5<<61) | ||
91 | mov r18=((ia64_rid(IA64_REGION_ID_KERNEL, (6<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) | ||
92 | movl r19=(6<<61) | ||
93 | mov r20=((ia64_rid(IA64_REGION_ID_KERNEL, (7<<61)) << 8) | (IA64_GRANULE_SHIFT << 2)) | ||
94 | movl r21=(7<<61) | ||
95 | ;; | ||
96 | mov rr[r7]=r6 | ||
97 | mov rr[r9]=r8 | ||
98 | mov rr[r11]=r10 | ||
99 | mov rr[r13]=r12 | ||
100 | mov rr[r15]=r14 | ||
101 | mov rr[r17]=r16 | ||
102 | mov rr[r19]=r18 | ||
103 | mov rr[r21]=r20 | ||
104 | ;; | ||
105 | /* | ||
106 | * Now pin mappings into the TLB for kernel text and data | ||
107 | */ | ||
108 | mov r18=KERNEL_TR_PAGE_SHIFT<<2 | ||
109 | movl r17=KERNEL_START | ||
110 | ;; | ||
111 | mov cr.itir=r18 | ||
112 | mov cr.ifa=r17 | ||
113 | mov r16=IA64_TR_KERNEL | ||
114 | mov r3=ip | ||
115 | movl r18=PAGE_KERNEL | ||
116 | ;; | ||
117 | dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT | ||
118 | ;; | ||
119 | or r18=r2,r18 | ||
120 | ;; | ||
121 | srlz.i | ||
122 | ;; | ||
123 | itr.i itr[r16]=r18 | ||
124 | ;; | ||
125 | itr.d dtr[r16]=r18 | ||
126 | ;; | ||
127 | srlz.i | ||
128 | |||
129 | /* | ||
130 | * Switch into virtual mode: | ||
131 | */ | ||
132 | movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ | ||
133 | |IA64_PSR_DI) | ||
134 | ;; | ||
135 | mov cr.ipsr=r16 | ||
136 | movl r17=1f | ||
137 | ;; | ||
138 | mov cr.iip=r17 | ||
139 | mov cr.ifs=r0 | ||
140 | ;; | ||
141 | rfi | ||
142 | ;; | ||
143 | 1: // now we are in virtual mode | ||
144 | |||
145 | // set IVT entry point---can't access I/O ports without it | ||
146 | movl r3=ia64_ivt | ||
147 | ;; | ||
148 | mov cr.iva=r3 | ||
149 | movl r2=FPSR_DEFAULT | ||
150 | ;; | ||
151 | srlz.i | ||
152 | movl gp=__gp | ||
153 | |||
154 | mov ar.fpsr=r2 | ||
155 | ;; | ||
156 | |||
157 | #define isAP p2 // are we an Application Processor? | ||
158 | #define isBP p3 // are we the Bootstrap Processor? | ||
159 | |||
160 | #ifdef CONFIG_SMP | ||
161 | /* | ||
162 | * Find the init_task for the currently booting CPU. At poweron, and in | ||
163 | * UP mode, task_for_booting_cpu is NULL. | ||
164 | */ | ||
165 | movl r3=task_for_booting_cpu | ||
166 | ;; | ||
167 | ld8 r3=[r3] | ||
168 | movl r2=init_task | ||
169 | ;; | ||
170 | cmp.eq isBP,isAP=r3,r0 | ||
171 | ;; | ||
172 | (isAP) mov r2=r3 | ||
173 | #else | ||
174 | movl r2=init_task | ||
175 | cmp.eq isBP,isAP=r0,r0 | ||
176 | #endif | ||
177 | ;; | ||
178 | tpa r3=r2 // r3 == phys addr of task struct | ||
179 | mov r16=-1 | ||
180 | (isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it | ||
181 | |||
182 | // load mapping for stack (virtaddr in r2, physaddr in r3) | ||
183 | rsm psr.ic | ||
184 | movl r17=PAGE_KERNEL | ||
185 | ;; | ||
186 | srlz.d | ||
187 | dep r18=0,r3,0,12 | ||
188 | ;; | ||
189 | or r18=r17,r18 | ||
190 | dep r2=-1,r3,61,3 // IMVA of task | ||
191 | ;; | ||
192 | mov r17=rr[r2] | ||
193 | shr.u r16=r3,IA64_GRANULE_SHIFT | ||
194 | ;; | ||
195 | dep r17=0,r17,8,24 | ||
196 | ;; | ||
197 | mov cr.itir=r17 | ||
198 | mov cr.ifa=r2 | ||
199 | |||
200 | mov r19=IA64_TR_CURRENT_STACK | ||
201 | ;; | ||
202 | itr.d dtr[r19]=r18 | ||
203 | ;; | ||
204 | ssm psr.ic | ||
205 | srlz.d | ||
206 | ;; | ||
207 | |||
208 | .load_current: | ||
209 | // load the "current" pointer (r13) and ar.k6 with the current task | ||
210 | mov IA64_KR(CURRENT)=r2 // virtual address | ||
211 | mov IA64_KR(CURRENT_STACK)=r16 | ||
212 | mov r13=r2 | ||
213 | /* | ||
214 | * Reserve space at the top of the stack for "struct pt_regs". Kernel threads | ||
215 | * don't store interesting values in that structure, but the space still needs | ||
216 | * to be there because time-critical stuff such as the context switching can | ||
217 | * be implemented more efficiently (for example, __switch_to() | ||
218 | * always sets the psr.dfh bit of the task it is switching to). | ||
219 | */ | ||
220 | addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 | ||
221 | addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE | ||
222 | mov ar.rsc=0 // place RSE in enforced lazy mode | ||
223 | ;; | ||
224 | loadrs // clear the dirty partition | ||
225 | ;; | ||
226 | mov ar.bspstore=r2 // establish the new RSE stack | ||
227 | ;; | ||
228 | mov ar.rsc=0x3 // place RSE in eager mode | ||
229 | |||
230 | (isBP) dep r28=-1,r28,61,3 // make address virtual | ||
231 | (isBP) movl r2=ia64_boot_param | ||
232 | ;; | ||
233 | (isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader | ||
234 | |||
235 | #ifdef CONFIG_SMP | ||
236 | (isAP) br.call.sptk.many rp=start_secondary | ||
237 | .ret0: | ||
238 | (isAP) br.cond.sptk self | ||
239 | #endif | ||
240 | |||
241 | // This is executed by the bootstrap processor (bsp) only: | ||
242 | |||
243 | #ifdef CONFIG_IA64_FW_EMU | ||
244 | // initialize PAL & SAL emulator: | ||
245 | br.call.sptk.many rp=sys_fw_init | ||
246 | .ret1: | ||
247 | #endif | ||
248 | br.call.sptk.many rp=start_kernel | ||
249 | .ret2: addl r3=@ltoff(halt_msg),gp | ||
250 | ;; | ||
251 | alloc r2=ar.pfs,8,0,2,0 | ||
252 | ;; | ||
253 | ld8 out0=[r3] | ||
254 | br.call.sptk.many b0=console_print | ||
255 | |||
256 | self: hint @pause | ||
257 | br.sptk.many self // endless loop | ||
258 | END(_start) | ||
259 | |||
260 | GLOBAL_ENTRY(ia64_save_debug_regs) | ||
261 | alloc r16=ar.pfs,1,0,0,0 | ||
262 | mov r20=ar.lc // preserve ar.lc | ||
263 | mov ar.lc=IA64_NUM_DBG_REGS-1 | ||
264 | mov r18=0 | ||
265 | add r19=IA64_NUM_DBG_REGS*8,in0 | ||
266 | ;; | ||
267 | 1: mov r16=dbr[r18] | ||
268 | #ifdef CONFIG_ITANIUM | ||
269 | ;; | ||
270 | srlz.d | ||
271 | #endif | ||
272 | mov r17=ibr[r18] | ||
273 | add r18=1,r18 | ||
274 | ;; | ||
275 | st8.nta [in0]=r16,8 | ||
276 | st8.nta [r19]=r17,8 | ||
277 | br.cloop.sptk.many 1b | ||
278 | ;; | ||
279 | mov ar.lc=r20 // restore ar.lc | ||
280 | br.ret.sptk.many rp | ||
281 | END(ia64_save_debug_regs) | ||
282 | |||
283 | GLOBAL_ENTRY(ia64_load_debug_regs) | ||
284 | alloc r16=ar.pfs,1,0,0,0 | ||
285 | lfetch.nta [in0] | ||
286 | mov r20=ar.lc // preserve ar.lc | ||
287 | add r19=IA64_NUM_DBG_REGS*8,in0 | ||
288 | mov ar.lc=IA64_NUM_DBG_REGS-1 | ||
289 | mov r18=-1 | ||
290 | ;; | ||
291 | 1: ld8.nta r16=[in0],8 | ||
292 | ld8.nta r17=[r19],8 | ||
293 | add r18=1,r18 | ||
294 | ;; | ||
295 | mov dbr[r18]=r16 | ||
296 | #ifdef CONFIG_ITANIUM | ||
297 | ;; | ||
298 | srlz.d // Errata 132 (NoFix status) | ||
299 | #endif | ||
300 | mov ibr[r18]=r17 | ||
301 | br.cloop.sptk.many 1b | ||
302 | ;; | ||
303 | mov ar.lc=r20 // restore ar.lc | ||
304 | br.ret.sptk.many rp | ||
305 | END(ia64_load_debug_regs) | ||
306 | |||
307 | GLOBAL_ENTRY(__ia64_save_fpu) | ||
308 | alloc r2=ar.pfs,1,4,0,0 | ||
309 | adds loc0=96*16-16,in0 | ||
310 | adds loc1=96*16-16-128,in0 | ||
311 | ;; | ||
312 | stf.spill.nta [loc0]=f127,-256 | ||
313 | stf.spill.nta [loc1]=f119,-256 | ||
314 | ;; | ||
315 | stf.spill.nta [loc0]=f111,-256 | ||
316 | stf.spill.nta [loc1]=f103,-256 | ||
317 | ;; | ||
318 | stf.spill.nta [loc0]=f95,-256 | ||
319 | stf.spill.nta [loc1]=f87,-256 | ||
320 | ;; | ||
321 | stf.spill.nta [loc0]=f79,-256 | ||
322 | stf.spill.nta [loc1]=f71,-256 | ||
323 | ;; | ||
324 | stf.spill.nta [loc0]=f63,-256 | ||
325 | stf.spill.nta [loc1]=f55,-256 | ||
326 | adds loc2=96*16-32,in0 | ||
327 | ;; | ||
328 | stf.spill.nta [loc0]=f47,-256 | ||
329 | stf.spill.nta [loc1]=f39,-256 | ||
330 | adds loc3=96*16-32-128,in0 | ||
331 | ;; | ||
332 | stf.spill.nta [loc2]=f126,-256 | ||
333 | stf.spill.nta [loc3]=f118,-256 | ||
334 | ;; | ||
335 | stf.spill.nta [loc2]=f110,-256 | ||
336 | stf.spill.nta [loc3]=f102,-256 | ||
337 | ;; | ||
338 | stf.spill.nta [loc2]=f94,-256 | ||
339 | stf.spill.nta [loc3]=f86,-256 | ||
340 | ;; | ||
341 | stf.spill.nta [loc2]=f78,-256 | ||
342 | stf.spill.nta [loc3]=f70,-256 | ||
343 | ;; | ||
344 | stf.spill.nta [loc2]=f62,-256 | ||
345 | stf.spill.nta [loc3]=f54,-256 | ||
346 | adds loc0=96*16-48,in0 | ||
347 | ;; | ||
348 | stf.spill.nta [loc2]=f46,-256 | ||
349 | stf.spill.nta [loc3]=f38,-256 | ||
350 | adds loc1=96*16-48-128,in0 | ||
351 | ;; | ||
352 | stf.spill.nta [loc0]=f125,-256 | ||
353 | stf.spill.nta [loc1]=f117,-256 | ||
354 | ;; | ||
355 | stf.spill.nta [loc0]=f109,-256 | ||
356 | stf.spill.nta [loc1]=f101,-256 | ||
357 | ;; | ||
358 | stf.spill.nta [loc0]=f93,-256 | ||
359 | stf.spill.nta [loc1]=f85,-256 | ||
360 | ;; | ||
361 | stf.spill.nta [loc0]=f77,-256 | ||
362 | stf.spill.nta [loc1]=f69,-256 | ||
363 | ;; | ||
364 | stf.spill.nta [loc0]=f61,-256 | ||
365 | stf.spill.nta [loc1]=f53,-256 | ||
366 | adds loc2=96*16-64,in0 | ||
367 | ;; | ||
368 | stf.spill.nta [loc0]=f45,-256 | ||
369 | stf.spill.nta [loc1]=f37,-256 | ||
370 | adds loc3=96*16-64-128,in0 | ||
371 | ;; | ||
372 | stf.spill.nta [loc2]=f124,-256 | ||
373 | stf.spill.nta [loc3]=f116,-256 | ||
374 | ;; | ||
375 | stf.spill.nta [loc2]=f108,-256 | ||
376 | stf.spill.nta [loc3]=f100,-256 | ||
377 | ;; | ||
378 | stf.spill.nta [loc2]=f92,-256 | ||
379 | stf.spill.nta [loc3]=f84,-256 | ||
380 | ;; | ||
381 | stf.spill.nta [loc2]=f76,-256 | ||
382 | stf.spill.nta [loc3]=f68,-256 | ||
383 | ;; | ||
384 | stf.spill.nta [loc2]=f60,-256 | ||
385 | stf.spill.nta [loc3]=f52,-256 | ||
386 | adds loc0=96*16-80,in0 | ||
387 | ;; | ||
388 | stf.spill.nta [loc2]=f44,-256 | ||
389 | stf.spill.nta [loc3]=f36,-256 | ||
390 | adds loc1=96*16-80-128,in0 | ||
391 | ;; | ||
392 | stf.spill.nta [loc0]=f123,-256 | ||
393 | stf.spill.nta [loc1]=f115,-256 | ||
394 | ;; | ||
395 | stf.spill.nta [loc0]=f107,-256 | ||
396 | stf.spill.nta [loc1]=f99,-256 | ||
397 | ;; | ||
398 | stf.spill.nta [loc0]=f91,-256 | ||
399 | stf.spill.nta [loc1]=f83,-256 | ||
400 | ;; | ||
401 | stf.spill.nta [loc0]=f75,-256 | ||
402 | stf.spill.nta [loc1]=f67,-256 | ||
403 | ;; | ||
404 | stf.spill.nta [loc0]=f59,-256 | ||
405 | stf.spill.nta [loc1]=f51,-256 | ||
406 | adds loc2=96*16-96,in0 | ||
407 | ;; | ||
408 | stf.spill.nta [loc0]=f43,-256 | ||
409 | stf.spill.nta [loc1]=f35,-256 | ||
410 | adds loc3=96*16-96-128,in0 | ||
411 | ;; | ||
412 | stf.spill.nta [loc2]=f122,-256 | ||
413 | stf.spill.nta [loc3]=f114,-256 | ||
414 | ;; | ||
415 | stf.spill.nta [loc2]=f106,-256 | ||
416 | stf.spill.nta [loc3]=f98,-256 | ||
417 | ;; | ||
418 | stf.spill.nta [loc2]=f90,-256 | ||
419 | stf.spill.nta [loc3]=f82,-256 | ||
420 | ;; | ||
421 | stf.spill.nta [loc2]=f74,-256 | ||
422 | stf.spill.nta [loc3]=f66,-256 | ||
423 | ;; | ||
424 | stf.spill.nta [loc2]=f58,-256 | ||
425 | stf.spill.nta [loc3]=f50,-256 | ||
426 | adds loc0=96*16-112,in0 | ||
427 | ;; | ||
428 | stf.spill.nta [loc2]=f42,-256 | ||
429 | stf.spill.nta [loc3]=f34,-256 | ||
430 | adds loc1=96*16-112-128,in0 | ||
431 | ;; | ||
432 | stf.spill.nta [loc0]=f121,-256 | ||
433 | stf.spill.nta [loc1]=f113,-256 | ||
434 | ;; | ||
435 | stf.spill.nta [loc0]=f105,-256 | ||
436 | stf.spill.nta [loc1]=f97,-256 | ||
437 | ;; | ||
438 | stf.spill.nta [loc0]=f89,-256 | ||
439 | stf.spill.nta [loc1]=f81,-256 | ||
440 | ;; | ||
441 | stf.spill.nta [loc0]=f73,-256 | ||
442 | stf.spill.nta [loc1]=f65,-256 | ||
443 | ;; | ||
444 | stf.spill.nta [loc0]=f57,-256 | ||
445 | stf.spill.nta [loc1]=f49,-256 | ||
446 | adds loc2=96*16-128,in0 | ||
447 | ;; | ||
448 | stf.spill.nta [loc0]=f41,-256 | ||
449 | stf.spill.nta [loc1]=f33,-256 | ||
450 | adds loc3=96*16-128-128,in0 | ||
451 | ;; | ||
452 | stf.spill.nta [loc2]=f120,-256 | ||
453 | stf.spill.nta [loc3]=f112,-256 | ||
454 | ;; | ||
455 | stf.spill.nta [loc2]=f104,-256 | ||
456 | stf.spill.nta [loc3]=f96,-256 | ||
457 | ;; | ||
458 | stf.spill.nta [loc2]=f88,-256 | ||
459 | stf.spill.nta [loc3]=f80,-256 | ||
460 | ;; | ||
461 | stf.spill.nta [loc2]=f72,-256 | ||
462 | stf.spill.nta [loc3]=f64,-256 | ||
463 | ;; | ||
464 | stf.spill.nta [loc2]=f56,-256 | ||
465 | stf.spill.nta [loc3]=f48,-256 | ||
466 | ;; | ||
467 | stf.spill.nta [loc2]=f40 | ||
468 | stf.spill.nta [loc3]=f32 | ||
469 | br.ret.sptk.many rp | ||
470 | END(__ia64_save_fpu) | ||
471 | |||
472 | GLOBAL_ENTRY(__ia64_load_fpu) | ||
473 | alloc r2=ar.pfs,1,2,0,0 | ||
474 | adds r3=128,in0 | ||
475 | adds r14=256,in0 | ||
476 | adds r15=384,in0 | ||
477 | mov loc0=512 | ||
478 | mov loc1=-1024+16 | ||
479 | ;; | ||
480 | ldf.fill.nta f32=[in0],loc0 | ||
481 | ldf.fill.nta f40=[ r3],loc0 | ||
482 | ldf.fill.nta f48=[r14],loc0 | ||
483 | ldf.fill.nta f56=[r15],loc0 | ||
484 | ;; | ||
485 | ldf.fill.nta f64=[in0],loc0 | ||
486 | ldf.fill.nta f72=[ r3],loc0 | ||
487 | ldf.fill.nta f80=[r14],loc0 | ||
488 | ldf.fill.nta f88=[r15],loc0 | ||
489 | ;; | ||
490 | ldf.fill.nta f96=[in0],loc1 | ||
491 | ldf.fill.nta f104=[ r3],loc1 | ||
492 | ldf.fill.nta f112=[r14],loc1 | ||
493 | ldf.fill.nta f120=[r15],loc1 | ||
494 | ;; | ||
495 | ldf.fill.nta f33=[in0],loc0 | ||
496 | ldf.fill.nta f41=[ r3],loc0 | ||
497 | ldf.fill.nta f49=[r14],loc0 | ||
498 | ldf.fill.nta f57=[r15],loc0 | ||
499 | ;; | ||
500 | ldf.fill.nta f65=[in0],loc0 | ||
501 | ldf.fill.nta f73=[ r3],loc0 | ||
502 | ldf.fill.nta f81=[r14],loc0 | ||
503 | ldf.fill.nta f89=[r15],loc0 | ||
504 | ;; | ||
505 | ldf.fill.nta f97=[in0],loc1 | ||
506 | ldf.fill.nta f105=[ r3],loc1 | ||
507 | ldf.fill.nta f113=[r14],loc1 | ||
508 | ldf.fill.nta f121=[r15],loc1 | ||
509 | ;; | ||
510 | ldf.fill.nta f34=[in0],loc0 | ||
511 | ldf.fill.nta f42=[ r3],loc0 | ||
512 | ldf.fill.nta f50=[r14],loc0 | ||
513 | ldf.fill.nta f58=[r15],loc0 | ||
514 | ;; | ||
515 | ldf.fill.nta f66=[in0],loc0 | ||
516 | ldf.fill.nta f74=[ r3],loc0 | ||
517 | ldf.fill.nta f82=[r14],loc0 | ||
518 | ldf.fill.nta f90=[r15],loc0 | ||
519 | ;; | ||
520 | ldf.fill.nta f98=[in0],loc1 | ||
521 | ldf.fill.nta f106=[ r3],loc1 | ||
522 | ldf.fill.nta f114=[r14],loc1 | ||
523 | ldf.fill.nta f122=[r15],loc1 | ||
524 | ;; | ||
525 | ldf.fill.nta f35=[in0],loc0 | ||
526 | ldf.fill.nta f43=[ r3],loc0 | ||
527 | ldf.fill.nta f51=[r14],loc0 | ||
528 | ldf.fill.nta f59=[r15],loc0 | ||
529 | ;; | ||
530 | ldf.fill.nta f67=[in0],loc0 | ||
531 | ldf.fill.nta f75=[ r3],loc0 | ||
532 | ldf.fill.nta f83=[r14],loc0 | ||
533 | ldf.fill.nta f91=[r15],loc0 | ||
534 | ;; | ||
535 | ldf.fill.nta f99=[in0],loc1 | ||
536 | ldf.fill.nta f107=[ r3],loc1 | ||
537 | ldf.fill.nta f115=[r14],loc1 | ||
538 | ldf.fill.nta f123=[r15],loc1 | ||
539 | ;; | ||
540 | ldf.fill.nta f36=[in0],loc0 | ||
541 | ldf.fill.nta f44=[ r3],loc0 | ||
542 | ldf.fill.nta f52=[r14],loc0 | ||
543 | ldf.fill.nta f60=[r15],loc0 | ||
544 | ;; | ||
545 | ldf.fill.nta f68=[in0],loc0 | ||
546 | ldf.fill.nta f76=[ r3],loc0 | ||
547 | ldf.fill.nta f84=[r14],loc0 | ||
548 | ldf.fill.nta f92=[r15],loc0 | ||
549 | ;; | ||
550 | ldf.fill.nta f100=[in0],loc1 | ||
551 | ldf.fill.nta f108=[ r3],loc1 | ||
552 | ldf.fill.nta f116=[r14],loc1 | ||
553 | ldf.fill.nta f124=[r15],loc1 | ||
554 | ;; | ||
555 | ldf.fill.nta f37=[in0],loc0 | ||
556 | ldf.fill.nta f45=[ r3],loc0 | ||
557 | ldf.fill.nta f53=[r14],loc0 | ||
558 | ldf.fill.nta f61=[r15],loc0 | ||
559 | ;; | ||
560 | ldf.fill.nta f69=[in0],loc0 | ||
561 | ldf.fill.nta f77=[ r3],loc0 | ||
562 | ldf.fill.nta f85=[r14],loc0 | ||
563 | ldf.fill.nta f93=[r15],loc0 | ||
564 | ;; | ||
565 | ldf.fill.nta f101=[in0],loc1 | ||
566 | ldf.fill.nta f109=[ r3],loc1 | ||
567 | ldf.fill.nta f117=[r14],loc1 | ||
568 | ldf.fill.nta f125=[r15],loc1 | ||
569 | ;; | ||
570 | ldf.fill.nta f38 =[in0],loc0 | ||
571 | ldf.fill.nta f46 =[ r3],loc0 | ||
572 | ldf.fill.nta f54 =[r14],loc0 | ||
573 | ldf.fill.nta f62 =[r15],loc0 | ||
574 | ;; | ||
575 | ldf.fill.nta f70 =[in0],loc0 | ||
576 | ldf.fill.nta f78 =[ r3],loc0 | ||
577 | ldf.fill.nta f86 =[r14],loc0 | ||
578 | ldf.fill.nta f94 =[r15],loc0 | ||
579 | ;; | ||
580 | ldf.fill.nta f102=[in0],loc1 | ||
581 | ldf.fill.nta f110=[ r3],loc1 | ||
582 | ldf.fill.nta f118=[r14],loc1 | ||
583 | ldf.fill.nta f126=[r15],loc1 | ||
584 | ;; | ||
585 | ldf.fill.nta f39 =[in0],loc0 | ||
586 | ldf.fill.nta f47 =[ r3],loc0 | ||
587 | ldf.fill.nta f55 =[r14],loc0 | ||
588 | ldf.fill.nta f63 =[r15],loc0 | ||
589 | ;; | ||
590 | ldf.fill.nta f71 =[in0],loc0 | ||
591 | ldf.fill.nta f79 =[ r3],loc0 | ||
592 | ldf.fill.nta f87 =[r14],loc0 | ||
593 | ldf.fill.nta f95 =[r15],loc0 | ||
594 | ;; | ||
595 | ldf.fill.nta f103=[in0] | ||
596 | ldf.fill.nta f111=[ r3] | ||
597 | ldf.fill.nta f119=[r14] | ||
598 | ldf.fill.nta f127=[r15] | ||
599 | br.ret.sptk.many rp | ||
600 | END(__ia64_load_fpu) | ||
601 | |||
602 | GLOBAL_ENTRY(__ia64_init_fpu) | ||
603 | stf.spill [sp]=f0 // M3 | ||
604 | mov f32=f0 // F | ||
605 | nop.b 0 | ||
606 | |||
607 | ldfps f33,f34=[sp] // M0 | ||
608 | ldfps f35,f36=[sp] // M1 | ||
609 | mov f37=f0 // F | ||
610 | ;; | ||
611 | |||
612 | setf.s f38=r0 // M2 | ||
613 | setf.s f39=r0 // M3 | ||
614 | mov f40=f0 // F | ||
615 | |||
616 | ldfps f41,f42=[sp] // M0 | ||
617 | ldfps f43,f44=[sp] // M1 | ||
618 | mov f45=f0 // F | ||
619 | |||
620 | setf.s f46=r0 // M2 | ||
621 | setf.s f47=r0 // M3 | ||
622 | mov f48=f0 // F | ||
623 | |||
624 | ldfps f49,f50=[sp] // M0 | ||
625 | ldfps f51,f52=[sp] // M1 | ||
626 | mov f53=f0 // F | ||
627 | |||
628 | setf.s f54=r0 // M2 | ||
629 | setf.s f55=r0 // M3 | ||
630 | mov f56=f0 // F | ||
631 | |||
632 | ldfps f57,f58=[sp] // M0 | ||
633 | ldfps f59,f60=[sp] // M1 | ||
634 | mov f61=f0 // F | ||
635 | |||
636 | setf.s f62=r0 // M2 | ||
637 | setf.s f63=r0 // M3 | ||
638 | mov f64=f0 // F | ||
639 | |||
640 | ldfps f65,f66=[sp] // M0 | ||
641 | ldfps f67,f68=[sp] // M1 | ||
642 | mov f69=f0 // F | ||
643 | |||
644 | setf.s f70=r0 // M2 | ||
645 | setf.s f71=r0 // M3 | ||
646 | mov f72=f0 // F | ||
647 | |||
648 | ldfps f73,f74=[sp] // M0 | ||
649 | ldfps f75,f76=[sp] // M1 | ||
650 | mov f77=f0 // F | ||
651 | |||
652 | setf.s f78=r0 // M2 | ||
653 | setf.s f79=r0 // M3 | ||
654 | mov f80=f0 // F | ||
655 | |||
656 | ldfps f81,f82=[sp] // M0 | ||
657 | ldfps f83,f84=[sp] // M1 | ||
658 | mov f85=f0 // F | ||
659 | |||
660 | setf.s f86=r0 // M2 | ||
661 | setf.s f87=r0 // M3 | ||
662 | mov f88=f0 // F | ||
663 | |||
664 | /* | ||
665 | * When the instructions are cached, it would be faster to initialize | ||
666 | * the remaining registers with simply mov instructions (F-unit). | ||
667 | * This gets the time down to ~29 cycles. However, this would use up | ||
668 | * 33 bundles, whereas continuing with the above pattern yields | ||
669 | * 10 bundles and ~30 cycles. | ||
670 | */ | ||
671 | |||
672 | ldfps f89,f90=[sp] // M0 | ||
673 | ldfps f91,f92=[sp] // M1 | ||
674 | mov f93=f0 // F | ||
675 | |||
676 | setf.s f94=r0 // M2 | ||
677 | setf.s f95=r0 // M3 | ||
678 | mov f96=f0 // F | ||
679 | |||
680 | ldfps f97,f98=[sp] // M0 | ||
681 | ldfps f99,f100=[sp] // M1 | ||
682 | mov f101=f0 // F | ||
683 | |||
684 | setf.s f102=r0 // M2 | ||
685 | setf.s f103=r0 // M3 | ||
686 | mov f104=f0 // F | ||
687 | |||
688 | ldfps f105,f106=[sp] // M0 | ||
689 | ldfps f107,f108=[sp] // M1 | ||
690 | mov f109=f0 // F | ||
691 | |||
692 | setf.s f110=r0 // M2 | ||
693 | setf.s f111=r0 // M3 | ||
694 | mov f112=f0 // F | ||
695 | |||
696 | ldfps f113,f114=[sp] // M0 | ||
697 | ldfps f115,f116=[sp] // M1 | ||
698 | mov f117=f0 // F | ||
699 | |||
700 | setf.s f118=r0 // M2 | ||
701 | setf.s f119=r0 // M3 | ||
702 | mov f120=f0 // F | ||
703 | |||
704 | ldfps f121,f122=[sp] // M0 | ||
705 | ldfps f123,f124=[sp] // M1 | ||
706 | mov f125=f0 // F | ||
707 | |||
708 | setf.s f126=r0 // M2 | ||
709 | setf.s f127=r0 // M3 | ||
710 | br.ret.sptk.many rp // F | ||
711 | END(__ia64_init_fpu) | ||
712 | |||
713 | /* | ||
714 | * Switch execution mode from virtual to physical | ||
715 | * | ||
716 | * Inputs: | ||
717 | * r16 = new psr to establish | ||
718 | * Output: | ||
719 | * r19 = old virtual address of ar.bsp | ||
720 | * r20 = old virtual address of sp | ||
721 | * | ||
722 | * Note: RSE must already be in enforced lazy mode | ||
723 | */ | ||
724 | GLOBAL_ENTRY(ia64_switch_mode_phys) | ||
725 | { | ||
726 | alloc r2=ar.pfs,0,0,0,0 | ||
727 | rsm psr.i | psr.ic // disable interrupts and interrupt collection | ||
728 | mov r15=ip | ||
729 | } | ||
730 | ;; | ||
731 | { | ||
732 | flushrs // must be first insn in group | ||
733 | srlz.i | ||
734 | } | ||
735 | ;; | ||
736 | mov cr.ipsr=r16 // set new PSR | ||
737 | add r3=1f-ia64_switch_mode_phys,r15 | ||
738 | |||
739 | mov r19=ar.bsp | ||
740 | mov r20=sp | ||
741 | mov r14=rp // get return address into a general register | ||
742 | ;; | ||
743 | |||
744 | // going to physical mode, use tpa to translate virt->phys | ||
745 | tpa r17=r19 | ||
746 | tpa r3=r3 | ||
747 | tpa sp=sp | ||
748 | tpa r14=r14 | ||
749 | ;; | ||
750 | |||
751 | mov r18=ar.rnat // save ar.rnat | ||
752 | mov ar.bspstore=r17 // this steps on ar.rnat | ||
753 | mov cr.iip=r3 | ||
754 | mov cr.ifs=r0 | ||
755 | ;; | ||
756 | mov ar.rnat=r18 // restore ar.rnat | ||
757 | rfi // must be last insn in group | ||
758 | ;; | ||
759 | 1: mov rp=r14 | ||
760 | br.ret.sptk.many rp | ||
761 | END(ia64_switch_mode_phys) | ||
762 | |||
763 | /* | ||
764 | * Switch execution mode from physical to virtual | ||
765 | * | ||
766 | * Inputs: | ||
767 | * r16 = new psr to establish | ||
768 | * r19 = new bspstore to establish | ||
769 | * r20 = new sp to establish | ||
770 | * | ||
771 | * Note: RSE must already be in enforced lazy mode | ||
772 | */ | ||
773 | GLOBAL_ENTRY(ia64_switch_mode_virt) | ||
774 | { | ||
775 | alloc r2=ar.pfs,0,0,0,0 | ||
776 | rsm psr.i | psr.ic // disable interrupts and interrupt collection | ||
777 | mov r15=ip | ||
778 | } | ||
779 | ;; | ||
780 | { | ||
781 | flushrs // must be first insn in group | ||
782 | srlz.i | ||
783 | } | ||
784 | ;; | ||
785 | mov cr.ipsr=r16 // set new PSR | ||
786 | add r3=1f-ia64_switch_mode_virt,r15 | ||
787 | |||
788 | mov r14=rp // get return address into a general register | ||
789 | ;; | ||
790 | |||
791 | // going to virtual | ||
792 | // - for code addresses, set upper bits of addr to KERNEL_START | ||
793 | // - for stack addresses, copy from input argument | ||
794 | movl r18=KERNEL_START | ||
795 | dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT | ||
796 | dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT | ||
797 | mov sp=r20 | ||
798 | ;; | ||
799 | or r3=r3,r18 | ||
800 | or r14=r14,r18 | ||
801 | ;; | ||
802 | |||
803 | mov r18=ar.rnat // save ar.rnat | ||
804 | mov ar.bspstore=r19 // this steps on ar.rnat | ||
805 | mov cr.iip=r3 | ||
806 | mov cr.ifs=r0 | ||
807 | ;; | ||
808 | mov ar.rnat=r18 // restore ar.rnat | ||
809 | rfi // must be last insn in group | ||
810 | ;; | ||
811 | 1: mov rp=r14 | ||
812 | br.ret.sptk.many rp | ||
813 | END(ia64_switch_mode_virt) | ||
814 | |||
815 | GLOBAL_ENTRY(ia64_delay_loop) | ||
816 | .prologue | ||
817 | { nop 0 // work around GAS unwind info generation bug... | ||
818 | .save ar.lc,r2 | ||
819 | mov r2=ar.lc | ||
820 | .body | ||
821 | ;; | ||
822 | mov ar.lc=r32 | ||
823 | } | ||
824 | ;; | ||
825 | // force loop to be 32-byte aligned (GAS bug means we cannot use .align | ||
826 | // inside function body without corrupting unwind info). | ||
827 | { nop 0 } | ||
828 | 1: br.cloop.sptk.few 1b | ||
829 | ;; | ||
830 | mov ar.lc=r2 | ||
831 | br.ret.sptk.many rp | ||
832 | END(ia64_delay_loop) | ||
833 | |||
834 | /* | ||
835 | * Return a CPU-local timestamp in nano-seconds. This timestamp is | ||
836 | * NOT synchronized across CPUs its return value must never be | ||
837 | * compared against the values returned on another CPU. The usage in | ||
838 | * kernel/sched.c ensures that. | ||
839 | * | ||
840 | * The return-value of sched_clock() is NOT supposed to wrap-around. | ||
841 | * If it did, it would cause some scheduling hiccups (at the worst). | ||
842 | * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even | ||
843 | * that would happen only once every 5+ years. | ||
844 | * | ||
845 | * The code below basically calculates: | ||
846 | * | ||
847 | * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT | ||
848 | * | ||
849 | * except that the multiplication and the shift are done with 128-bit | ||
850 | * intermediate precision so that we can produce a full 64-bit result. | ||
851 | */ | ||
852 | GLOBAL_ENTRY(sched_clock) | ||
853 | addl r8=THIS_CPU(cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 | ||
854 | mov.m r9=ar.itc // fetch cycle-counter (35 cyc) | ||
855 | ;; | ||
856 | ldf8 f8=[r8] | ||
857 | ;; | ||
858 | setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... | ||
859 | ;; | ||
860 | xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) | ||
861 | xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product | ||
862 | ;; | ||
863 | getf.sig r8=f10 // (5 cyc) | ||
864 | getf.sig r9=f11 | ||
865 | ;; | ||
866 | shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT | ||
867 | br.ret.sptk.many rp | ||
868 | END(sched_clock) | ||
869 | |||
870 | GLOBAL_ENTRY(start_kernel_thread) | ||
871 | .prologue | ||
872 | .save rp, r0 // this is the end of the call-chain | ||
873 | .body | ||
874 | alloc r2 = ar.pfs, 0, 0, 2, 0 | ||
875 | mov out0 = r9 | ||
876 | mov out1 = r11;; | ||
877 | br.call.sptk.many rp = kernel_thread_helper;; | ||
878 | mov out0 = r8 | ||
879 | br.call.sptk.many rp = sys_exit;; | ||
880 | 1: br.sptk.few 1b // not reached | ||
881 | END(start_kernel_thread) | ||
882 | |||
883 | #ifdef CONFIG_IA64_BRL_EMU | ||
884 | |||
885 | /* | ||
886 | * Assembly routines used by brl_emu.c to set preserved register state. | ||
887 | */ | ||
888 | |||
889 | #define SET_REG(reg) \ | ||
890 | GLOBAL_ENTRY(ia64_set_##reg); \ | ||
891 | alloc r16=ar.pfs,1,0,0,0; \ | ||
892 | mov reg=r32; \ | ||
893 | ;; \ | ||
894 | br.ret.sptk.many rp; \ | ||
895 | END(ia64_set_##reg) | ||
896 | |||
897 | SET_REG(b1); | ||
898 | SET_REG(b2); | ||
899 | SET_REG(b3); | ||
900 | SET_REG(b4); | ||
901 | SET_REG(b5); | ||
902 | |||
903 | #endif /* CONFIG_IA64_BRL_EMU */ | ||
904 | |||
905 | #ifdef CONFIG_SMP | ||
906 | /* | ||
907 | * This routine handles spinlock contention. It uses a non-standard calling | ||
908 | * convention to avoid converting leaf routines into interior routines. Because | ||
909 | * of this special convention, there are several restrictions: | ||
910 | * | ||
911 | * - do not use gp relative variables, this code is called from the kernel | ||
912 | * and from modules, r1 is undefined. | ||
913 | * - do not use stacked registers, the caller owns them. | ||
914 | * - do not use the scratch stack space, the caller owns it. | ||
915 | * - do not use any registers other than the ones listed below | ||
916 | * | ||
917 | * Inputs: | ||
918 | * ar.pfs - saved CFM of caller | ||
919 | * ar.ccv - 0 (and available for use) | ||
920 | * r27 - flags from spin_lock_irqsave or 0. Must be preserved. | ||
921 | * r28 - available for use. | ||
922 | * r29 - available for use. | ||
923 | * r30 - available for use. | ||
924 | * r31 - address of lock, available for use. | ||
925 | * b6 - return address | ||
926 | * p14 - available for use. | ||
927 | * p15 - used to track flag status. | ||
928 | * | ||
929 | * If you patch this code to use more registers, do not forget to update | ||
930 | * the clobber lists for spin_lock() in include/asm-ia64/spinlock.h. | ||
931 | */ | ||
932 | |||
933 | #if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) | ||
934 | |||
935 | GLOBAL_ENTRY(ia64_spinlock_contention_pre3_4) | ||
936 | .prologue | ||
937 | .save ar.pfs, r0 // this code effectively has a zero frame size | ||
938 | .save rp, r28 | ||
939 | .body | ||
940 | nop 0 | ||
941 | tbit.nz p15,p0=r27,IA64_PSR_I_BIT | ||
942 | .restore sp // pop existing prologue after next insn | ||
943 | mov b6 = r28 | ||
944 | .prologue | ||
945 | .save ar.pfs, r0 | ||
946 | .altrp b6 | ||
947 | .body | ||
948 | ;; | ||
949 | (p15) ssm psr.i // reenable interrupts if they were on | ||
950 | // DavidM says that srlz.d is slow and is not required in this case | ||
951 | .wait: | ||
952 | // exponential backoff, kdb, lockmeter etc. go in here | ||
953 | hint @pause | ||
954 | ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word | ||
955 | nop 0 | ||
956 | ;; | ||
957 | cmp4.ne p14,p0=r30,r0 | ||
958 | (p14) br.cond.sptk.few .wait | ||
959 | (p15) rsm psr.i // disable interrupts if we reenabled them | ||
960 | br.cond.sptk.few b6 // lock is now free, try to acquire | ||
961 | .global ia64_spinlock_contention_pre3_4_end // for kernprof | ||
962 | ia64_spinlock_contention_pre3_4_end: | ||
963 | END(ia64_spinlock_contention_pre3_4) | ||
964 | |||
965 | #else | ||
966 | |||
967 | GLOBAL_ENTRY(ia64_spinlock_contention) | ||
968 | .prologue | ||
969 | .altrp b6 | ||
970 | .body | ||
971 | tbit.nz p15,p0=r27,IA64_PSR_I_BIT | ||
972 | ;; | ||
973 | .wait: | ||
974 | (p15) ssm psr.i // reenable interrupts if they were on | ||
975 | // DavidM says that srlz.d is slow and is not required in this case | ||
976 | .wait2: | ||
977 | // exponential backoff, kdb, lockmeter etc. go in here | ||
978 | hint @pause | ||
979 | ld4 r30=[r31] // don't use ld4.bias; if it's contended, we won't write the word | ||
980 | ;; | ||
981 | cmp4.ne p14,p0=r30,r0 | ||
982 | mov r30 = 1 | ||
983 | (p14) br.cond.sptk.few .wait2 | ||
984 | (p15) rsm psr.i // disable interrupts if we reenabled them | ||
985 | ;; | ||
986 | cmpxchg4.acq r30=[r31], r30, ar.ccv | ||
987 | ;; | ||
988 | cmp4.ne p14,p0=r0,r30 | ||
989 | (p14) br.cond.sptk.few .wait | ||
990 | |||
991 | br.ret.sptk.many b6 // lock is now taken | ||
992 | END(ia64_spinlock_contention) | ||
993 | |||
994 | #endif | ||
995 | |||
996 | #endif /* CONFIG_SMP */ | ||
diff --git a/arch/ia64/kernel/ia64_ksyms.c b/arch/ia64/kernel/ia64_ksyms.c new file mode 100644 index 000000000000..7bbf019c9867 --- /dev/null +++ b/arch/ia64/kernel/ia64_ksyms.c | |||
@@ -0,0 +1,127 @@ | |||
1 | /* | ||
2 | * Architecture-specific kernel symbols | ||
3 | * | ||
4 | * Don't put any exports here unless it's defined in an assembler file. | ||
5 | * All other exports should be put directly after the definition. | ||
6 | */ | ||
7 | |||
8 | #include <linux/config.h> | ||
9 | #include <linux/module.h> | ||
10 | |||
11 | #include <linux/string.h> | ||
12 | EXPORT_SYMBOL(memset); | ||
13 | EXPORT_SYMBOL(memchr); | ||
14 | EXPORT_SYMBOL(memcmp); | ||
15 | EXPORT_SYMBOL(memcpy); | ||
16 | EXPORT_SYMBOL(memmove); | ||
17 | EXPORT_SYMBOL(memscan); | ||
18 | EXPORT_SYMBOL(strcat); | ||
19 | EXPORT_SYMBOL(strchr); | ||
20 | EXPORT_SYMBOL(strcmp); | ||
21 | EXPORT_SYMBOL(strcpy); | ||
22 | EXPORT_SYMBOL(strlen); | ||
23 | EXPORT_SYMBOL(strncat); | ||
24 | EXPORT_SYMBOL(strncmp); | ||
25 | EXPORT_SYMBOL(strncpy); | ||
26 | EXPORT_SYMBOL(strnlen); | ||
27 | EXPORT_SYMBOL(strrchr); | ||
28 | EXPORT_SYMBOL(strstr); | ||
29 | EXPORT_SYMBOL(strpbrk); | ||
30 | |||
31 | #include <asm/checksum.h> | ||
32 | EXPORT_SYMBOL(ip_fast_csum); /* hand-coded assembly */ | ||
33 | |||
34 | #include <asm/semaphore.h> | ||
35 | EXPORT_SYMBOL(__down); | ||
36 | EXPORT_SYMBOL(__down_interruptible); | ||
37 | EXPORT_SYMBOL(__down_trylock); | ||
38 | EXPORT_SYMBOL(__up); | ||
39 | |||
40 | #include <asm/page.h> | ||
41 | EXPORT_SYMBOL(clear_page); | ||
42 | |||
43 | #ifdef CONFIG_VIRTUAL_MEM_MAP | ||
44 | #include <linux/bootmem.h> | ||
45 | EXPORT_SYMBOL(max_low_pfn); /* defined by bootmem.c, but not exported by generic code */ | ||
46 | #endif | ||
47 | |||
48 | #include <asm/processor.h> | ||
49 | EXPORT_SYMBOL(per_cpu__cpu_info); | ||
50 | #ifdef CONFIG_SMP | ||
51 | EXPORT_SYMBOL(per_cpu__local_per_cpu_offset); | ||
52 | #endif | ||
53 | |||
54 | #include <asm/uaccess.h> | ||
55 | EXPORT_SYMBOL(__copy_user); | ||
56 | EXPORT_SYMBOL(__do_clear_user); | ||
57 | EXPORT_SYMBOL(__strlen_user); | ||
58 | EXPORT_SYMBOL(__strncpy_from_user); | ||
59 | EXPORT_SYMBOL(__strnlen_user); | ||
60 | |||
61 | #include <asm/unistd.h> | ||
62 | EXPORT_SYMBOL(__ia64_syscall); | ||
63 | |||
64 | /* from arch/ia64/lib */ | ||
65 | extern void __divsi3(void); | ||
66 | extern void __udivsi3(void); | ||
67 | extern void __modsi3(void); | ||
68 | extern void __umodsi3(void); | ||
69 | extern void __divdi3(void); | ||
70 | extern void __udivdi3(void); | ||
71 | extern void __moddi3(void); | ||
72 | extern void __umoddi3(void); | ||
73 | |||
74 | EXPORT_SYMBOL(__divsi3); | ||
75 | EXPORT_SYMBOL(__udivsi3); | ||
76 | EXPORT_SYMBOL(__modsi3); | ||
77 | EXPORT_SYMBOL(__umodsi3); | ||
78 | EXPORT_SYMBOL(__divdi3); | ||
79 | EXPORT_SYMBOL(__udivdi3); | ||
80 | EXPORT_SYMBOL(__moddi3); | ||
81 | EXPORT_SYMBOL(__umoddi3); | ||
82 | |||
83 | #if defined(CONFIG_MD_RAID5) || defined(CONFIG_MD_RAID5_MODULE) | ||
84 | extern void xor_ia64_2(void); | ||
85 | extern void xor_ia64_3(void); | ||
86 | extern void xor_ia64_4(void); | ||
87 | extern void xor_ia64_5(void); | ||
88 | |||
89 | EXPORT_SYMBOL(xor_ia64_2); | ||
90 | EXPORT_SYMBOL(xor_ia64_3); | ||
91 | EXPORT_SYMBOL(xor_ia64_4); | ||
92 | EXPORT_SYMBOL(xor_ia64_5); | ||
93 | #endif | ||
94 | |||
95 | #include <asm/pal.h> | ||
96 | EXPORT_SYMBOL(ia64_pal_call_phys_stacked); | ||
97 | EXPORT_SYMBOL(ia64_pal_call_phys_static); | ||
98 | EXPORT_SYMBOL(ia64_pal_call_stacked); | ||
99 | EXPORT_SYMBOL(ia64_pal_call_static); | ||
100 | EXPORT_SYMBOL(ia64_load_scratch_fpregs); | ||
101 | EXPORT_SYMBOL(ia64_save_scratch_fpregs); | ||
102 | |||
103 | #include <asm/unwind.h> | ||
104 | EXPORT_SYMBOL(unw_init_running); | ||
105 | |||
106 | #ifdef ASM_SUPPORTED | ||
107 | # ifdef CONFIG_SMP | ||
108 | # if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) | ||
109 | /* | ||
110 | * This is not a normal routine and we don't want a function descriptor for it, so we use | ||
111 | * a fake declaration here. | ||
112 | */ | ||
113 | extern char ia64_spinlock_contention_pre3_4; | ||
114 | EXPORT_SYMBOL(ia64_spinlock_contention_pre3_4); | ||
115 | # else | ||
116 | /* | ||
117 | * This is not a normal routine and we don't want a function descriptor for it, so we use | ||
118 | * a fake declaration here. | ||
119 | */ | ||
120 | extern char ia64_spinlock_contention; | ||
121 | EXPORT_SYMBOL(ia64_spinlock_contention); | ||
122 | # endif | ||
123 | # endif | ||
124 | #endif | ||
125 | |||
126 | extern char ia64_ivt[]; | ||
127 | EXPORT_SYMBOL(ia64_ivt); | ||
diff --git a/arch/ia64/kernel/init_task.c b/arch/ia64/kernel/init_task.c new file mode 100644 index 000000000000..b69c397ed1bf --- /dev/null +++ b/arch/ia64/kernel/init_task.c | |||
@@ -0,0 +1,46 @@ | |||
1 | /* | ||
2 | * This is where we statically allocate and initialize the initial | ||
3 | * task. | ||
4 | * | ||
5 | * Copyright (C) 1999, 2002-2003 Hewlett-Packard Co | ||
6 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/init.h> | ||
10 | #include <linux/mm.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/init_task.h> | ||
14 | #include <linux/mqueue.h> | ||
15 | |||
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/pgtable.h> | ||
18 | |||
19 | static struct fs_struct init_fs = INIT_FS; | ||
20 | static struct files_struct init_files = INIT_FILES; | ||
21 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | ||
22 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | ||
23 | struct mm_struct init_mm = INIT_MM(init_mm); | ||
24 | |||
25 | EXPORT_SYMBOL(init_mm); | ||
26 | |||
27 | /* | ||
28 | * Initial task structure. | ||
29 | * | ||
30 | * We need to make sure that this is properly aligned due to the way process stacks are | ||
31 | * handled. This is done by having a special ".data.init_task" section... | ||
32 | */ | ||
33 | #define init_thread_info init_task_mem.s.thread_info | ||
34 | |||
35 | union { | ||
36 | struct { | ||
37 | struct task_struct task; | ||
38 | struct thread_info thread_info; | ||
39 | } s; | ||
40 | unsigned long stack[KERNEL_STACK_SIZE/sizeof (unsigned long)]; | ||
41 | } init_task_mem asm ("init_task") __attribute__((section(".data.init_task"))) = {{ | ||
42 | .task = INIT_TASK(init_task_mem.s.task), | ||
43 | .thread_info = INIT_THREAD_INFO(init_task_mem.s.task) | ||
44 | }}; | ||
45 | |||
46 | EXPORT_SYMBOL(init_task); | ||
diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c new file mode 100644 index 000000000000..c15be5c38f56 --- /dev/null +++ b/arch/ia64/kernel/iosapic.c | |||
@@ -0,0 +1,827 @@ | |||
1 | /* | ||
2 | * I/O SAPIC support. | ||
3 | * | ||
4 | * Copyright (C) 1999 Intel Corp. | ||
5 | * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com> | ||
6 | * Copyright (C) 2000-2002 J.I. Lee <jung-ik.lee@intel.com> | ||
7 | * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co. | ||
8 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
9 | * Copyright (C) 1999 VA Linux Systems | ||
10 | * Copyright (C) 1999,2000 Walt Drummond <drummond@valinux.com> | ||
11 | * | ||
12 | * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O APIC code. | ||
13 | * In particular, we now have separate handlers for edge | ||
14 | * and level triggered interrupts. | ||
15 | * 00/10/27 Asit Mallick, Goutham Rao <goutham.rao@intel.com> IRQ vector allocation | ||
16 | * PCI to vector mapping, shared PCI interrupts. | ||
17 | * 00/10/27 D. Mosberger Document things a bit more to make them more understandable. | ||
18 | * Clean up much of the old IOSAPIC cruft. | ||
19 | * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts and fixes for | ||
20 | * ACPI S5(SoftOff) support. | ||
21 | * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT | ||
22 | * 02/01/07 E. Focht <efocht@ess.nec.de> Redirectable interrupt vectors in | ||
23 | * iosapic_set_affinity(), initializations for | ||
24 | * /proc/irq/#/smp_affinity | ||
25 | * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. | ||
26 | * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq | ||
27 | * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to IOSAPIC mapping | ||
28 | * error | ||
29 | * 02/07/29 T. Kochi Allocate interrupt vectors dynamically | ||
30 | * 02/08/04 T. Kochi Cleaned up terminology (irq, global system interrupt, vector, etc.) | ||
31 | * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's pci_irq code. | ||
32 | * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. | ||
33 | * Remove iosapic_address & gsi_base from external interfaces. | ||
34 | * Rationalize __init/__devinit attributes. | ||
35 | * 04/12/04 Ashok Raj <ashok.raj@intel.com> Intel Corporation 2004 | ||
36 | * Updated to work with irq migration necessary for CPU Hotplug | ||
37 | */ | ||
38 | /* | ||
39 | * Here is what the interrupt logic between a PCI device and the kernel looks like: | ||
40 | * | ||
41 | * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, INTD). The | ||
42 | * device is uniquely identified by its bus--, and slot-number (the function | ||
43 | * number does not matter here because all functions share the same interrupt | ||
44 | * lines). | ||
45 | * | ||
46 | * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC controller. | ||
47 | * Multiple interrupt lines may have to share the same IOSAPIC pin (if they're level | ||
48 | * triggered and use the same polarity). Each interrupt line has a unique Global | ||
49 | * System Interrupt (GSI) number which can be calculated as the sum of the controller's | ||
50 | * base GSI number and the IOSAPIC pin number to which the line connects. | ||
51 | * | ||
52 | * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the IOSAPIC pin | ||
53 | * into the IA-64 interrupt vector. This interrupt vector is then sent to the CPU. | ||
54 | * | ||
55 | * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is used as | ||
56 | * architecture-independent interrupt handling mechanism in Linux. As an | ||
57 | * IRQ is a number, we have to have IA-64 interrupt vector number <-> IRQ number | ||
58 | * mapping. On smaller systems, we use one-to-one mapping between IA-64 vector and | ||
59 | * IRQ. A platform can implement platform_irq_to_vector(irq) and | ||
60 | * platform_local_vector_to_irq(vector) APIs to differentiate the mapping. | ||
61 | * Please see also include/asm-ia64/hw_irq.h for those APIs. | ||
62 | * | ||
63 | * To sum up, there are three levels of mappings involved: | ||
64 | * | ||
65 | * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ | ||
66 | * | ||
67 | * Note: The term "IRQ" is loosely used everywhere in Linux kernel to describe interrupts. | ||
68 | * Now we use "IRQ" only for Linux IRQ's. ISA IRQ (isa_irq) is the only exception in this | ||
69 | * source code. | ||
70 | */ | ||
71 | #include <linux/config.h> | ||
72 | |||
73 | #include <linux/acpi.h> | ||
74 | #include <linux/init.h> | ||
75 | #include <linux/irq.h> | ||
76 | #include <linux/kernel.h> | ||
77 | #include <linux/list.h> | ||
78 | #include <linux/pci.h> | ||
79 | #include <linux/smp.h> | ||
80 | #include <linux/smp_lock.h> | ||
81 | #include <linux/string.h> | ||
82 | |||
83 | #include <asm/delay.h> | ||
84 | #include <asm/hw_irq.h> | ||
85 | #include <asm/io.h> | ||
86 | #include <asm/iosapic.h> | ||
87 | #include <asm/machvec.h> | ||
88 | #include <asm/processor.h> | ||
89 | #include <asm/ptrace.h> | ||
90 | #include <asm/system.h> | ||
91 | |||
92 | |||
93 | #undef DEBUG_INTERRUPT_ROUTING | ||
94 | |||
95 | #ifdef DEBUG_INTERRUPT_ROUTING | ||
96 | #define DBG(fmt...) printk(fmt) | ||
97 | #else | ||
98 | #define DBG(fmt...) | ||
99 | #endif | ||
100 | |||
101 | static DEFINE_SPINLOCK(iosapic_lock); | ||
102 | |||
103 | /* These tables map IA-64 vectors to the IOSAPIC pin that generates this vector. */ | ||
104 | |||
105 | static struct iosapic_intr_info { | ||
106 | char __iomem *addr; /* base address of IOSAPIC */ | ||
107 | u32 low32; /* current value of low word of Redirection table entry */ | ||
108 | unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ | ||
109 | char rte_index; /* IOSAPIC RTE index (-1 => not an IOSAPIC interrupt) */ | ||
110 | unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ | ||
111 | unsigned char polarity: 1; /* interrupt polarity (see iosapic.h) */ | ||
112 | unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ | ||
113 | int refcnt; /* reference counter */ | ||
114 | } iosapic_intr_info[IA64_NUM_VECTORS]; | ||
115 | |||
116 | static struct iosapic { | ||
117 | char __iomem *addr; /* base address of IOSAPIC */ | ||
118 | unsigned int gsi_base; /* first GSI assigned to this IOSAPIC */ | ||
119 | unsigned short num_rte; /* number of RTE in this IOSAPIC */ | ||
120 | #ifdef CONFIG_NUMA | ||
121 | unsigned short node; /* numa node association via pxm */ | ||
122 | #endif | ||
123 | } iosapic_lists[NR_IOSAPICS]; | ||
124 | |||
125 | static int num_iosapic; | ||
126 | |||
127 | static unsigned char pcat_compat __initdata; /* 8259 compatibility flag */ | ||
128 | |||
129 | |||
130 | /* | ||
131 | * Find an IOSAPIC associated with a GSI | ||
132 | */ | ||
133 | static inline int | ||
134 | find_iosapic (unsigned int gsi) | ||
135 | { | ||
136 | int i; | ||
137 | |||
138 | for (i = 0; i < num_iosapic; i++) { | ||
139 | if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < iosapic_lists[i].num_rte) | ||
140 | return i; | ||
141 | } | ||
142 | |||
143 | return -1; | ||
144 | } | ||
145 | |||
146 | static inline int | ||
147 | _gsi_to_vector (unsigned int gsi) | ||
148 | { | ||
149 | struct iosapic_intr_info *info; | ||
150 | |||
151 | for (info = iosapic_intr_info; info < iosapic_intr_info + IA64_NUM_VECTORS; ++info) | ||
152 | if (info->gsi_base + info->rte_index == gsi) | ||
153 | return info - iosapic_intr_info; | ||
154 | return -1; | ||
155 | } | ||
156 | |||
157 | /* | ||
158 | * Translate GSI number to the corresponding IA-64 interrupt vector. If no | ||
159 | * entry exists, return -1. | ||
160 | */ | ||
161 | inline int | ||
162 | gsi_to_vector (unsigned int gsi) | ||
163 | { | ||
164 | return _gsi_to_vector(gsi); | ||
165 | } | ||
166 | |||
167 | int | ||
168 | gsi_to_irq (unsigned int gsi) | ||
169 | { | ||
170 | /* | ||
171 | * XXX fix me: this assumes an identity mapping vetween IA-64 vector and Linux irq | ||
172 | * numbers... | ||
173 | */ | ||
174 | return _gsi_to_vector(gsi); | ||
175 | } | ||
176 | |||
177 | static void | ||
178 | set_rte (unsigned int vector, unsigned int dest, int mask) | ||
179 | { | ||
180 | unsigned long pol, trigger, dmode; | ||
181 | u32 low32, high32; | ||
182 | char __iomem *addr; | ||
183 | int rte_index; | ||
184 | char redir; | ||
185 | |||
186 | DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); | ||
187 | |||
188 | rte_index = iosapic_intr_info[vector].rte_index; | ||
189 | if (rte_index < 0) | ||
190 | return; /* not an IOSAPIC interrupt */ | ||
191 | |||
192 | addr = iosapic_intr_info[vector].addr; | ||
193 | pol = iosapic_intr_info[vector].polarity; | ||
194 | trigger = iosapic_intr_info[vector].trigger; | ||
195 | dmode = iosapic_intr_info[vector].dmode; | ||
196 | vector &= (~IA64_IRQ_REDIRECTED); | ||
197 | |||
198 | redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; | ||
199 | |||
200 | #ifdef CONFIG_SMP | ||
201 | { | ||
202 | unsigned int irq; | ||
203 | |||
204 | for (irq = 0; irq < NR_IRQS; ++irq) | ||
205 | if (irq_to_vector(irq) == vector) { | ||
206 | set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); | ||
207 | break; | ||
208 | } | ||
209 | } | ||
210 | #endif | ||
211 | |||
212 | low32 = ((pol << IOSAPIC_POLARITY_SHIFT) | | ||
213 | (trigger << IOSAPIC_TRIGGER_SHIFT) | | ||
214 | (dmode << IOSAPIC_DELIVERY_SHIFT) | | ||
215 | ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) | | ||
216 | vector); | ||
217 | |||
218 | /* dest contains both id and eid */ | ||
219 | high32 = (dest << IOSAPIC_DEST_SHIFT); | ||
220 | |||
221 | iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); | ||
222 | iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); | ||
223 | iosapic_intr_info[vector].low32 = low32; | ||
224 | } | ||
225 | |||
226 | static void | ||
227 | nop (unsigned int vector) | ||
228 | { | ||
229 | /* do nothing... */ | ||
230 | } | ||
231 | |||
232 | static void | ||
233 | mask_irq (unsigned int irq) | ||
234 | { | ||
235 | unsigned long flags; | ||
236 | char __iomem *addr; | ||
237 | u32 low32; | ||
238 | int rte_index; | ||
239 | ia64_vector vec = irq_to_vector(irq); | ||
240 | |||
241 | addr = iosapic_intr_info[vec].addr; | ||
242 | rte_index = iosapic_intr_info[vec].rte_index; | ||
243 | |||
244 | if (rte_index < 0) | ||
245 | return; /* not an IOSAPIC interrupt! */ | ||
246 | |||
247 | spin_lock_irqsave(&iosapic_lock, flags); | ||
248 | { | ||
249 | /* set only the mask bit */ | ||
250 | low32 = iosapic_intr_info[vec].low32 |= IOSAPIC_MASK; | ||
251 | iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); | ||
252 | } | ||
253 | spin_unlock_irqrestore(&iosapic_lock, flags); | ||
254 | } | ||
255 | |||
256 | static void | ||
257 | unmask_irq (unsigned int irq) | ||
258 | { | ||
259 | unsigned long flags; | ||
260 | char __iomem *addr; | ||
261 | u32 low32; | ||
262 | int rte_index; | ||
263 | ia64_vector vec = irq_to_vector(irq); | ||
264 | |||
265 | addr = iosapic_intr_info[vec].addr; | ||
266 | rte_index = iosapic_intr_info[vec].rte_index; | ||
267 | if (rte_index < 0) | ||
268 | return; /* not an IOSAPIC interrupt! */ | ||
269 | |||
270 | spin_lock_irqsave(&iosapic_lock, flags); | ||
271 | { | ||
272 | low32 = iosapic_intr_info[vec].low32 &= ~IOSAPIC_MASK; | ||
273 | iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); | ||
274 | } | ||
275 | spin_unlock_irqrestore(&iosapic_lock, flags); | ||
276 | } | ||
277 | |||
278 | |||
279 | static void | ||
280 | iosapic_set_affinity (unsigned int irq, cpumask_t mask) | ||
281 | { | ||
282 | #ifdef CONFIG_SMP | ||
283 | unsigned long flags; | ||
284 | u32 high32, low32; | ||
285 | int dest, rte_index; | ||
286 | char __iomem *addr; | ||
287 | int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; | ||
288 | ia64_vector vec; | ||
289 | |||
290 | irq &= (~IA64_IRQ_REDIRECTED); | ||
291 | vec = irq_to_vector(irq); | ||
292 | |||
293 | if (cpus_empty(mask)) | ||
294 | return; | ||
295 | |||
296 | dest = cpu_physical_id(first_cpu(mask)); | ||
297 | |||
298 | rte_index = iosapic_intr_info[vec].rte_index; | ||
299 | addr = iosapic_intr_info[vec].addr; | ||
300 | |||
301 | if (rte_index < 0) | ||
302 | return; /* not an IOSAPIC interrupt */ | ||
303 | |||
304 | set_irq_affinity_info(irq, dest, redir); | ||
305 | |||
306 | /* dest contains both id and eid */ | ||
307 | high32 = dest << IOSAPIC_DEST_SHIFT; | ||
308 | |||
309 | spin_lock_irqsave(&iosapic_lock, flags); | ||
310 | { | ||
311 | low32 = iosapic_intr_info[vec].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); | ||
312 | |||
313 | if (redir) | ||
314 | /* change delivery mode to lowest priority */ | ||
315 | low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); | ||
316 | else | ||
317 | /* change delivery mode to fixed */ | ||
318 | low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); | ||
319 | |||
320 | iosapic_intr_info[vec].low32 = low32; | ||
321 | iosapic_write(addr, IOSAPIC_RTE_HIGH(rte_index), high32); | ||
322 | iosapic_write(addr, IOSAPIC_RTE_LOW(rte_index), low32); | ||
323 | } | ||
324 | spin_unlock_irqrestore(&iosapic_lock, flags); | ||
325 | #endif | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * Handlers for level-triggered interrupts. | ||
330 | */ | ||
331 | |||
332 | static unsigned int | ||
333 | iosapic_startup_level_irq (unsigned int irq) | ||
334 | { | ||
335 | unmask_irq(irq); | ||
336 | return 0; | ||
337 | } | ||
338 | |||
339 | static void | ||
340 | iosapic_end_level_irq (unsigned int irq) | ||
341 | { | ||
342 | ia64_vector vec = irq_to_vector(irq); | ||
343 | |||
344 | move_irq(irq); | ||
345 | iosapic_eoi(iosapic_intr_info[vec].addr, vec); | ||
346 | } | ||
347 | |||
348 | #define iosapic_shutdown_level_irq mask_irq | ||
349 | #define iosapic_enable_level_irq unmask_irq | ||
350 | #define iosapic_disable_level_irq mask_irq | ||
351 | #define iosapic_ack_level_irq nop | ||
352 | |||
353 | struct hw_interrupt_type irq_type_iosapic_level = { | ||
354 | .typename = "IO-SAPIC-level", | ||
355 | .startup = iosapic_startup_level_irq, | ||
356 | .shutdown = iosapic_shutdown_level_irq, | ||
357 | .enable = iosapic_enable_level_irq, | ||
358 | .disable = iosapic_disable_level_irq, | ||
359 | .ack = iosapic_ack_level_irq, | ||
360 | .end = iosapic_end_level_irq, | ||
361 | .set_affinity = iosapic_set_affinity | ||
362 | }; | ||
363 | |||
364 | /* | ||
365 | * Handlers for edge-triggered interrupts. | ||
366 | */ | ||
367 | |||
368 | static unsigned int | ||
369 | iosapic_startup_edge_irq (unsigned int irq) | ||
370 | { | ||
371 | unmask_irq(irq); | ||
372 | /* | ||
373 | * IOSAPIC simply drops interrupts pended while the | ||
374 | * corresponding pin was masked, so we can't know if an | ||
375 | * interrupt is pending already. Let's hope not... | ||
376 | */ | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | static void | ||
381 | iosapic_ack_edge_irq (unsigned int irq) | ||
382 | { | ||
383 | irq_desc_t *idesc = irq_descp(irq); | ||
384 | |||
385 | move_irq(irq); | ||
386 | /* | ||
387 | * Once we have recorded IRQ_PENDING already, we can mask the | ||
388 | * interrupt for real. This prevents IRQ storms from unhandled | ||
389 | * devices. | ||
390 | */ | ||
391 | if ((idesc->status & (IRQ_PENDING|IRQ_DISABLED)) == (IRQ_PENDING|IRQ_DISABLED)) | ||
392 | mask_irq(irq); | ||
393 | } | ||
394 | |||
395 | #define iosapic_enable_edge_irq unmask_irq | ||
396 | #define iosapic_disable_edge_irq nop | ||
397 | #define iosapic_end_edge_irq nop | ||
398 | |||
399 | struct hw_interrupt_type irq_type_iosapic_edge = { | ||
400 | .typename = "IO-SAPIC-edge", | ||
401 | .startup = iosapic_startup_edge_irq, | ||
402 | .shutdown = iosapic_disable_edge_irq, | ||
403 | .enable = iosapic_enable_edge_irq, | ||
404 | .disable = iosapic_disable_edge_irq, | ||
405 | .ack = iosapic_ack_edge_irq, | ||
406 | .end = iosapic_end_edge_irq, | ||
407 | .set_affinity = iosapic_set_affinity | ||
408 | }; | ||
409 | |||
410 | unsigned int | ||
411 | iosapic_version (char __iomem *addr) | ||
412 | { | ||
413 | /* | ||
414 | * IOSAPIC Version Register return 32 bit structure like: | ||
415 | * { | ||
416 | * unsigned int version : 8; | ||
417 | * unsigned int reserved1 : 8; | ||
418 | * unsigned int max_redir : 8; | ||
419 | * unsigned int reserved2 : 8; | ||
420 | * } | ||
421 | */ | ||
422 | return iosapic_read(addr, IOSAPIC_VERSION); | ||
423 | } | ||
424 | |||
425 | /* | ||
426 | * if the given vector is already owned by other, | ||
427 | * assign a new vector for the other and make the vector available | ||
428 | */ | ||
429 | static void __init | ||
430 | iosapic_reassign_vector (int vector) | ||
431 | { | ||
432 | int new_vector; | ||
433 | |||
434 | if (iosapic_intr_info[vector].rte_index >= 0 || iosapic_intr_info[vector].addr | ||
435 | || iosapic_intr_info[vector].gsi_base || iosapic_intr_info[vector].dmode | ||
436 | || iosapic_intr_info[vector].polarity || iosapic_intr_info[vector].trigger) | ||
437 | { | ||
438 | new_vector = assign_irq_vector(AUTO_ASSIGN); | ||
439 | printk(KERN_INFO "Reassigning vector %d to %d\n", vector, new_vector); | ||
440 | memcpy(&iosapic_intr_info[new_vector], &iosapic_intr_info[vector], | ||
441 | sizeof(struct iosapic_intr_info)); | ||
442 | memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); | ||
443 | iosapic_intr_info[vector].rte_index = -1; | ||
444 | } | ||
445 | } | ||
446 | |||
447 | static void | ||
448 | register_intr (unsigned int gsi, int vector, unsigned char delivery, | ||
449 | unsigned long polarity, unsigned long trigger) | ||
450 | { | ||
451 | irq_desc_t *idesc; | ||
452 | struct hw_interrupt_type *irq_type; | ||
453 | int rte_index; | ||
454 | int index; | ||
455 | unsigned long gsi_base; | ||
456 | void __iomem *iosapic_address; | ||
457 | |||
458 | index = find_iosapic(gsi); | ||
459 | if (index < 0) { | ||
460 | printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", __FUNCTION__, gsi); | ||
461 | return; | ||
462 | } | ||
463 | |||
464 | iosapic_address = iosapic_lists[index].addr; | ||
465 | gsi_base = iosapic_lists[index].gsi_base; | ||
466 | |||
467 | rte_index = gsi - gsi_base; | ||
468 | iosapic_intr_info[vector].rte_index = rte_index; | ||
469 | iosapic_intr_info[vector].polarity = polarity; | ||
470 | iosapic_intr_info[vector].dmode = delivery; | ||
471 | iosapic_intr_info[vector].addr = iosapic_address; | ||
472 | iosapic_intr_info[vector].gsi_base = gsi_base; | ||
473 | iosapic_intr_info[vector].trigger = trigger; | ||
474 | iosapic_intr_info[vector].refcnt++; | ||
475 | |||
476 | if (trigger == IOSAPIC_EDGE) | ||
477 | irq_type = &irq_type_iosapic_edge; | ||
478 | else | ||
479 | irq_type = &irq_type_iosapic_level; | ||
480 | |||
481 | idesc = irq_descp(vector); | ||
482 | if (idesc->handler != irq_type) { | ||
483 | if (idesc->handler != &no_irq_type) | ||
484 | printk(KERN_WARNING "%s: changing vector %d from %s to %s\n", | ||
485 | __FUNCTION__, vector, idesc->handler->typename, irq_type->typename); | ||
486 | idesc->handler = irq_type; | ||
487 | } | ||
488 | } | ||
489 | |||
490 | static unsigned int | ||
491 | get_target_cpu (unsigned int gsi, int vector) | ||
492 | { | ||
493 | #ifdef CONFIG_SMP | ||
494 | static int cpu = -1; | ||
495 | |||
496 | /* | ||
497 | * If the platform supports redirection via XTP, let it | ||
498 | * distribute interrupts. | ||
499 | */ | ||
500 | if (smp_int_redirect & SMP_IRQ_REDIRECTION) | ||
501 | return cpu_physical_id(smp_processor_id()); | ||
502 | |||
503 | /* | ||
504 | * Some interrupts (ACPI SCI, for instance) are registered | ||
505 | * before the BSP is marked as online. | ||
506 | */ | ||
507 | if (!cpu_online(smp_processor_id())) | ||
508 | return cpu_physical_id(smp_processor_id()); | ||
509 | |||
510 | #ifdef CONFIG_NUMA | ||
511 | { | ||
512 | int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; | ||
513 | cpumask_t cpu_mask; | ||
514 | |||
515 | iosapic_index = find_iosapic(gsi); | ||
516 | if (iosapic_index < 0 || | ||
517 | iosapic_lists[iosapic_index].node == MAX_NUMNODES) | ||
518 | goto skip_numa_setup; | ||
519 | |||
520 | cpu_mask = node_to_cpumask(iosapic_lists[iosapic_index].node); | ||
521 | |||
522 | for_each_cpu_mask(numa_cpu, cpu_mask) { | ||
523 | if (!cpu_online(numa_cpu)) | ||
524 | cpu_clear(numa_cpu, cpu_mask); | ||
525 | } | ||
526 | |||
527 | num_cpus = cpus_weight(cpu_mask); | ||
528 | |||
529 | if (!num_cpus) | ||
530 | goto skip_numa_setup; | ||
531 | |||
532 | /* Use vector assigment to distribute across cpus in node */ | ||
533 | cpu_index = vector % num_cpus; | ||
534 | |||
535 | for (numa_cpu = first_cpu(cpu_mask) ; i < cpu_index ; i++) | ||
536 | numa_cpu = next_cpu(numa_cpu, cpu_mask); | ||
537 | |||
538 | if (numa_cpu != NR_CPUS) | ||
539 | return cpu_physical_id(numa_cpu); | ||
540 | } | ||
541 | skip_numa_setup: | ||
542 | #endif | ||
543 | /* | ||
544 | * Otherwise, round-robin interrupt vectors across all the | ||
545 | * processors. (It'd be nice if we could be smarter in the | ||
546 | * case of NUMA.) | ||
547 | */ | ||
548 | do { | ||
549 | if (++cpu >= NR_CPUS) | ||
550 | cpu = 0; | ||
551 | } while (!cpu_online(cpu)); | ||
552 | |||
553 | return cpu_physical_id(cpu); | ||
554 | #else | ||
555 | return cpu_physical_id(smp_processor_id()); | ||
556 | #endif | ||
557 | } | ||
558 | |||
559 | /* | ||
560 | * ACPI can describe IOSAPIC interrupts via static tables and namespace | ||
561 | * methods. This provides an interface to register those interrupts and | ||
562 | * program the IOSAPIC RTE. | ||
563 | */ | ||
564 | int | ||
565 | iosapic_register_intr (unsigned int gsi, | ||
566 | unsigned long polarity, unsigned long trigger) | ||
567 | { | ||
568 | int vector; | ||
569 | unsigned int dest; | ||
570 | unsigned long flags; | ||
571 | |||
572 | /* | ||
573 | * If this GSI has already been registered (i.e., it's a | ||
574 | * shared interrupt, or we lost a race to register it), | ||
575 | * don't touch the RTE. | ||
576 | */ | ||
577 | spin_lock_irqsave(&iosapic_lock, flags); | ||
578 | { | ||
579 | vector = gsi_to_vector(gsi); | ||
580 | if (vector > 0) { | ||
581 | iosapic_intr_info[vector].refcnt++; | ||
582 | spin_unlock_irqrestore(&iosapic_lock, flags); | ||
583 | return vector; | ||
584 | } | ||
585 | |||
586 | vector = assign_irq_vector(AUTO_ASSIGN); | ||
587 | dest = get_target_cpu(gsi, vector); | ||
588 | register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, | ||
589 | polarity, trigger); | ||
590 | |||
591 | set_rte(vector, dest, 1); | ||
592 | } | ||
593 | spin_unlock_irqrestore(&iosapic_lock, flags); | ||
594 | |||
595 | printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", | ||
596 | gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), | ||
597 | (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), | ||
598 | cpu_logical_id(dest), dest, vector); | ||
599 | |||
600 | return vector; | ||
601 | } | ||
602 | |||
603 | #ifdef CONFIG_ACPI_DEALLOCATE_IRQ | ||
604 | void | ||
605 | iosapic_unregister_intr (unsigned int gsi) | ||
606 | { | ||
607 | unsigned long flags; | ||
608 | int irq, vector; | ||
609 | irq_desc_t *idesc; | ||
610 | int rte_index; | ||
611 | unsigned long trigger, polarity; | ||
612 | |||
613 | /* | ||
614 | * If the irq associated with the gsi is not found, | ||
615 | * iosapic_unregister_intr() is unbalanced. We need to check | ||
616 | * this again after getting locks. | ||
617 | */ | ||
618 | irq = gsi_to_irq(gsi); | ||
619 | if (irq < 0) { | ||
620 | printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); | ||
621 | WARN_ON(1); | ||
622 | return; | ||
623 | } | ||
624 | vector = irq_to_vector(irq); | ||
625 | |||
626 | idesc = irq_descp(irq); | ||
627 | spin_lock_irqsave(&idesc->lock, flags); | ||
628 | spin_lock(&iosapic_lock); | ||
629 | { | ||
630 | rte_index = iosapic_intr_info[vector].rte_index; | ||
631 | if (rte_index < 0) { | ||
632 | spin_unlock(&iosapic_lock); | ||
633 | spin_unlock_irqrestore(&idesc->lock, flags); | ||
634 | printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", gsi); | ||
635 | WARN_ON(1); | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | if (--iosapic_intr_info[vector].refcnt > 0) { | ||
640 | spin_unlock(&iosapic_lock); | ||
641 | spin_unlock_irqrestore(&idesc->lock, flags); | ||
642 | return; | ||
643 | } | ||
644 | |||
645 | /* | ||
646 | * If interrupt handlers still exist on the irq | ||
647 | * associated with the gsi, don't unregister the | ||
648 | * interrupt. | ||
649 | */ | ||
650 | if (idesc->action) { | ||
651 | iosapic_intr_info[vector].refcnt++; | ||
652 | spin_unlock(&iosapic_lock); | ||
653 | spin_unlock_irqrestore(&idesc->lock, flags); | ||
654 | printk(KERN_WARNING "Cannot unregister GSI. IRQ %u is still in use.\n", irq); | ||
655 | return; | ||
656 | } | ||
657 | |||
658 | /* Clear the interrupt controller descriptor. */ | ||
659 | idesc->handler = &no_irq_type; | ||
660 | |||
661 | trigger = iosapic_intr_info[vector].trigger; | ||
662 | polarity = iosapic_intr_info[vector].polarity; | ||
663 | |||
664 | /* Clear the interrupt information. */ | ||
665 | memset(&iosapic_intr_info[vector], 0, sizeof(struct iosapic_intr_info)); | ||
666 | iosapic_intr_info[vector].rte_index = -1; /* mark as unused */ | ||
667 | } | ||
668 | spin_unlock(&iosapic_lock); | ||
669 | spin_unlock_irqrestore(&idesc->lock, flags); | ||
670 | |||
671 | /* Free the interrupt vector */ | ||
672 | free_irq_vector(vector); | ||
673 | |||
674 | printk(KERN_INFO "GSI %u (%s, %s) -> vector %d unregisterd.\n", | ||
675 | gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), | ||
676 | (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), | ||
677 | vector); | ||
678 | } | ||
679 | #endif /* CONFIG_ACPI_DEALLOCATE_IRQ */ | ||
680 | |||
681 | /* | ||
682 | * ACPI calls this when it finds an entry for a platform interrupt. | ||
683 | * Note that the irq_base and IOSAPIC address must be set in iosapic_init(). | ||
684 | */ | ||
685 | int __init | ||
686 | iosapic_register_platform_intr (u32 int_type, unsigned int gsi, | ||
687 | int iosapic_vector, u16 eid, u16 id, | ||
688 | unsigned long polarity, unsigned long trigger) | ||
689 | { | ||
690 | static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"}; | ||
691 | unsigned char delivery; | ||
692 | int vector, mask = 0; | ||
693 | unsigned int dest = ((id << 8) | eid) & 0xffff; | ||
694 | |||
695 | switch (int_type) { | ||
696 | case ACPI_INTERRUPT_PMI: | ||
697 | vector = iosapic_vector; | ||
698 | /* | ||
699 | * since PMI vector is alloc'd by FW(ACPI) not by kernel, | ||
700 | * we need to make sure the vector is available | ||
701 | */ | ||
702 | iosapic_reassign_vector(vector); | ||
703 | delivery = IOSAPIC_PMI; | ||
704 | break; | ||
705 | case ACPI_INTERRUPT_INIT: | ||
706 | vector = assign_irq_vector(AUTO_ASSIGN); | ||
707 | delivery = IOSAPIC_INIT; | ||
708 | break; | ||
709 | case ACPI_INTERRUPT_CPEI: | ||
710 | vector = IA64_CPE_VECTOR; | ||
711 | delivery = IOSAPIC_LOWEST_PRIORITY; | ||
712 | mask = 1; | ||
713 | break; | ||
714 | default: | ||
715 | printk(KERN_ERR "iosapic_register_platform_irq(): invalid int type 0x%x\n", int_type); | ||
716 | return -1; | ||
717 | } | ||
718 | |||
719 | register_intr(gsi, vector, delivery, polarity, trigger); | ||
720 | |||
721 | printk(KERN_INFO "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", | ||
722 | int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", | ||
723 | int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), | ||
724 | (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), | ||
725 | cpu_logical_id(dest), dest, vector); | ||
726 | |||
727 | set_rte(vector, dest, mask); | ||
728 | return vector; | ||
729 | } | ||
730 | |||
731 | |||
732 | /* | ||
733 | * ACPI calls this when it finds an entry for a legacy ISA IRQ override. | ||
734 | * Note that the gsi_base and IOSAPIC address must be set in iosapic_init(). | ||
735 | */ | ||
736 | void __init | ||
737 | iosapic_override_isa_irq (unsigned int isa_irq, unsigned int gsi, | ||
738 | unsigned long polarity, | ||
739 | unsigned long trigger) | ||
740 | { | ||
741 | int vector; | ||
742 | unsigned int dest = cpu_physical_id(smp_processor_id()); | ||
743 | |||
744 | vector = isa_irq_to_vector(isa_irq); | ||
745 | |||
746 | register_intr(gsi, vector, IOSAPIC_LOWEST_PRIORITY, polarity, trigger); | ||
747 | |||
748 | DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", | ||
749 | isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level", | ||
750 | polarity == IOSAPIC_POL_HIGH ? "high" : "low", | ||
751 | cpu_logical_id(dest), dest, vector); | ||
752 | |||
753 | set_rte(vector, dest, 1); | ||
754 | } | ||
755 | |||
756 | void __init | ||
757 | iosapic_system_init (int system_pcat_compat) | ||
758 | { | ||
759 | int vector; | ||
760 | |||
761 | for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) | ||
762 | iosapic_intr_info[vector].rte_index = -1; /* mark as unused */ | ||
763 | |||
764 | pcat_compat = system_pcat_compat; | ||
765 | if (pcat_compat) { | ||
766 | /* | ||
767 | * Disable the compatibility mode interrupts (8259 style), needs IN/OUT support | ||
768 | * enabled. | ||
769 | */ | ||
770 | printk(KERN_INFO "%s: Disabling PC-AT compatible 8259 interrupts\n", __FUNCTION__); | ||
771 | outb(0xff, 0xA1); | ||
772 | outb(0xff, 0x21); | ||
773 | } | ||
774 | } | ||
775 | |||
776 | void __init | ||
777 | iosapic_init (unsigned long phys_addr, unsigned int gsi_base) | ||
778 | { | ||
779 | int num_rte; | ||
780 | unsigned int isa_irq, ver; | ||
781 | char __iomem *addr; | ||
782 | |||
783 | addr = ioremap(phys_addr, 0); | ||
784 | ver = iosapic_version(addr); | ||
785 | |||
786 | /* | ||
787 | * The MAX_REDIR register holds the highest input pin | ||
788 | * number (starting from 0). | ||
789 | * We add 1 so that we can use it for number of pins (= RTEs) | ||
790 | */ | ||
791 | num_rte = ((ver >> 16) & 0xff) + 1; | ||
792 | |||
793 | iosapic_lists[num_iosapic].addr = addr; | ||
794 | iosapic_lists[num_iosapic].gsi_base = gsi_base; | ||
795 | iosapic_lists[num_iosapic].num_rte = num_rte; | ||
796 | #ifdef CONFIG_NUMA | ||
797 | iosapic_lists[num_iosapic].node = MAX_NUMNODES; | ||
798 | #endif | ||
799 | num_iosapic++; | ||
800 | |||
801 | if ((gsi_base == 0) && pcat_compat) { | ||
802 | /* | ||
803 | * Map the legacy ISA devices into the IOSAPIC data. Some of these may | ||
804 | * get reprogrammed later on with data from the ACPI Interrupt Source | ||
805 | * Override table. | ||
806 | */ | ||
807 | for (isa_irq = 0; isa_irq < 16; ++isa_irq) | ||
808 | iosapic_override_isa_irq(isa_irq, isa_irq, IOSAPIC_POL_HIGH, IOSAPIC_EDGE); | ||
809 | } | ||
810 | } | ||
811 | |||
812 | #ifdef CONFIG_NUMA | ||
813 | void __init | ||
814 | map_iosapic_to_node(unsigned int gsi_base, int node) | ||
815 | { | ||
816 | int index; | ||
817 | |||
818 | index = find_iosapic(gsi_base); | ||
819 | if (index < 0) { | ||
820 | printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", | ||
821 | __FUNCTION__, gsi_base); | ||
822 | return; | ||
823 | } | ||
824 | iosapic_lists[index].node = node; | ||
825 | return; | ||
826 | } | ||
827 | #endif | ||
diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c new file mode 100644 index 000000000000..28f2aadc38d0 --- /dev/null +++ b/arch/ia64/kernel/irq.c | |||
@@ -0,0 +1,238 @@ | |||
1 | /* | ||
2 | * linux/arch/ia64/kernel/irq.c | ||
3 | * | ||
4 | * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar | ||
5 | * | ||
6 | * This file contains the code used by various IRQ handling routines: | ||
7 | * asking for different IRQ's should be done through these routines | ||
8 | * instead of just grabbing them. Thus setups with different IRQ numbers | ||
9 | * shouldn't result in any weird surprises, and installing new handlers | ||
10 | * should be easier. | ||
11 | * | ||
12 | * Copyright (C) Ashok Raj<ashok.raj@intel.com>, Intel Corporation 2004 | ||
13 | * | ||
14 | * 4/14/2004: Added code to handle cpu migration and do safe irq | ||
15 | * migration without lossing interrupts for iosapic | ||
16 | * architecture. | ||
17 | */ | ||
18 | |||
19 | #include <asm/delay.h> | ||
20 | #include <asm/uaccess.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/seq_file.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/kernel_stat.h> | ||
25 | |||
26 | /* | ||
27 | * 'what should we do if we get a hw irq event on an illegal vector'. | ||
28 | * each architecture has to answer this themselves. | ||
29 | */ | ||
30 | void ack_bad_irq(unsigned int irq) | ||
31 | { | ||
32 | printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); | ||
33 | } | ||
34 | |||
35 | #ifdef CONFIG_IA64_GENERIC | ||
36 | unsigned int __ia64_local_vector_to_irq (ia64_vector vec) | ||
37 | { | ||
38 | return (unsigned int) vec; | ||
39 | } | ||
40 | #endif | ||
41 | |||
42 | /* | ||
43 | * Interrupt statistics: | ||
44 | */ | ||
45 | |||
46 | atomic_t irq_err_count; | ||
47 | |||
48 | /* | ||
49 | * /proc/interrupts printing: | ||
50 | */ | ||
51 | |||
52 | int show_interrupts(struct seq_file *p, void *v) | ||
53 | { | ||
54 | int i = *(loff_t *) v, j; | ||
55 | struct irqaction * action; | ||
56 | unsigned long flags; | ||
57 | |||
58 | if (i == 0) { | ||
59 | seq_printf(p, " "); | ||
60 | for (j=0; j<NR_CPUS; j++) | ||
61 | if (cpu_online(j)) | ||
62 | seq_printf(p, "CPU%d ",j); | ||
63 | seq_putc(p, '\n'); | ||
64 | } | ||
65 | |||
66 | if (i < NR_IRQS) { | ||
67 | spin_lock_irqsave(&irq_desc[i].lock, flags); | ||
68 | action = irq_desc[i].action; | ||
69 | if (!action) | ||
70 | goto skip; | ||
71 | seq_printf(p, "%3d: ",i); | ||
72 | #ifndef CONFIG_SMP | ||
73 | seq_printf(p, "%10u ", kstat_irqs(i)); | ||
74 | #else | ||
75 | for (j = 0; j < NR_CPUS; j++) | ||
76 | if (cpu_online(j)) | ||
77 | seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); | ||
78 | #endif | ||
79 | seq_printf(p, " %14s", irq_desc[i].handler->typename); | ||
80 | seq_printf(p, " %s", action->name); | ||
81 | |||
82 | for (action=action->next; action; action = action->next) | ||
83 | seq_printf(p, ", %s", action->name); | ||
84 | |||
85 | seq_putc(p, '\n'); | ||
86 | skip: | ||
87 | spin_unlock_irqrestore(&irq_desc[i].lock, flags); | ||
88 | } else if (i == NR_IRQS) | ||
89 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | #ifdef CONFIG_SMP | ||
94 | /* | ||
95 | * This is updated when the user sets irq affinity via /proc | ||
96 | */ | ||
97 | static cpumask_t __cacheline_aligned pending_irq_cpumask[NR_IRQS]; | ||
98 | static unsigned long pending_irq_redir[BITS_TO_LONGS(NR_IRQS)]; | ||
99 | |||
100 | static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; | ||
101 | |||
102 | /* | ||
103 | * Arch specific routine for deferred write to iosapic rte to reprogram | ||
104 | * intr destination. | ||
105 | */ | ||
106 | void proc_set_irq_affinity(unsigned int irq, cpumask_t mask_val) | ||
107 | { | ||
108 | pending_irq_cpumask[irq] = mask_val; | ||
109 | } | ||
110 | |||
111 | void set_irq_affinity_info (unsigned int irq, int hwid, int redir) | ||
112 | { | ||
113 | cpumask_t mask = CPU_MASK_NONE; | ||
114 | |||
115 | cpu_set(cpu_logical_id(hwid), mask); | ||
116 | |||
117 | if (irq < NR_IRQS) { | ||
118 | irq_affinity[irq] = mask; | ||
119 | irq_redir[irq] = (char) (redir & 0xff); | ||
120 | } | ||
121 | } | ||
122 | |||
123 | |||
124 | void move_irq(int irq) | ||
125 | { | ||
126 | /* note - we hold desc->lock */ | ||
127 | cpumask_t tmp; | ||
128 | irq_desc_t *desc = irq_descp(irq); | ||
129 | int redir = test_bit(irq, pending_irq_redir); | ||
130 | |||
131 | if (unlikely(!desc->handler->set_affinity)) | ||
132 | return; | ||
133 | |||
134 | if (!cpus_empty(pending_irq_cpumask[irq])) { | ||
135 | cpus_and(tmp, pending_irq_cpumask[irq], cpu_online_map); | ||
136 | if (unlikely(!cpus_empty(tmp))) { | ||
137 | desc->handler->set_affinity(irq | (redir ? IA64_IRQ_REDIRECTED : 0), | ||
138 | pending_irq_cpumask[irq]); | ||
139 | } | ||
140 | cpus_clear(pending_irq_cpumask[irq]); | ||
141 | } | ||
142 | } | ||
143 | |||
144 | |||
145 | #endif /* CONFIG_SMP */ | ||
146 | |||
147 | #ifdef CONFIG_HOTPLUG_CPU | ||
148 | unsigned int vectors_in_migration[NR_IRQS]; | ||
149 | |||
150 | /* | ||
151 | * Since cpu_online_map is already updated, we just need to check for | ||
152 | * affinity that has zeros | ||
153 | */ | ||
154 | static void migrate_irqs(void) | ||
155 | { | ||
156 | cpumask_t mask; | ||
157 | irq_desc_t *desc; | ||
158 | int irq, new_cpu; | ||
159 | |||
160 | for (irq=0; irq < NR_IRQS; irq++) { | ||
161 | desc = irq_descp(irq); | ||
162 | |||
163 | /* | ||
164 | * No handling for now. | ||
165 | * TBD: Implement a disable function so we can now | ||
166 | * tell CPU not to respond to these local intr sources. | ||
167 | * such as ITV,CPEI,MCA etc. | ||
168 | */ | ||
169 | if (desc->status == IRQ_PER_CPU) | ||
170 | continue; | ||
171 | |||
172 | cpus_and(mask, irq_affinity[irq], cpu_online_map); | ||
173 | if (any_online_cpu(mask) == NR_CPUS) { | ||
174 | /* | ||
175 | * Save it for phase 2 processing | ||
176 | */ | ||
177 | vectors_in_migration[irq] = irq; | ||
178 | |||
179 | new_cpu = any_online_cpu(cpu_online_map); | ||
180 | mask = cpumask_of_cpu(new_cpu); | ||
181 | |||
182 | /* | ||
183 | * Al three are essential, currently WARN_ON.. maybe panic? | ||
184 | */ | ||
185 | if (desc->handler && desc->handler->disable && | ||
186 | desc->handler->enable && desc->handler->set_affinity) { | ||
187 | desc->handler->disable(irq); | ||
188 | desc->handler->set_affinity(irq, mask); | ||
189 | desc->handler->enable(irq); | ||
190 | } else { | ||
191 | WARN_ON((!(desc->handler) || !(desc->handler->disable) || | ||
192 | !(desc->handler->enable) || | ||
193 | !(desc->handler->set_affinity))); | ||
194 | } | ||
195 | } | ||
196 | } | ||
197 | } | ||
198 | |||
199 | void fixup_irqs(void) | ||
200 | { | ||
201 | unsigned int irq; | ||
202 | extern void ia64_process_pending_intr(void); | ||
203 | |||
204 | ia64_set_itv(1<<16); | ||
205 | /* | ||
206 | * Phase 1: Locate irq's bound to this cpu and | ||
207 | * relocate them for cpu removal. | ||
208 | */ | ||
209 | migrate_irqs(); | ||
210 | |||
211 | /* | ||
212 | * Phase 2: Perform interrupt processing for all entries reported in | ||
213 | * local APIC. | ||
214 | */ | ||
215 | ia64_process_pending_intr(); | ||
216 | |||
217 | /* | ||
218 | * Phase 3: Now handle any interrupts not captured in local APIC. | ||
219 | * This is to account for cases that device interrupted during the time the | ||
220 | * rte was being disabled and re-programmed. | ||
221 | */ | ||
222 | for (irq=0; irq < NR_IRQS; irq++) { | ||
223 | if (vectors_in_migration[irq]) { | ||
224 | vectors_in_migration[irq]=0; | ||
225 | __do_IRQ(irq, NULL); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | /* | ||
230 | * Now let processor die. We do irq disable and max_xtp() to | ||
231 | * ensure there is no more interrupts routed to this processor. | ||
232 | * But the local timer interrupt can have 1 pending which we | ||
233 | * take care in timer_interrupt(). | ||
234 | */ | ||
235 | max_xtp(); | ||
236 | local_irq_disable(); | ||
237 | } | ||
238 | #endif | ||
diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c new file mode 100644 index 000000000000..5ba06ebe355b --- /dev/null +++ b/arch/ia64/kernel/irq_ia64.c | |||
@@ -0,0 +1,278 @@ | |||
1 | /* | ||
2 | * linux/arch/ia64/kernel/irq.c | ||
3 | * | ||
4 | * Copyright (C) 1998-2001 Hewlett-Packard Co | ||
5 | * Stephane Eranian <eranian@hpl.hp.com> | ||
6 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
7 | * | ||
8 | * 6/10/99: Updated to bring in sync with x86 version to facilitate | ||
9 | * support for SMP and different interrupt controllers. | ||
10 | * | ||
11 | * 09/15/00 Goutham Rao <goutham.rao@intel.com> Implemented pci_irq_to_vector | ||
12 | * PCI to vector allocation routine. | ||
13 | * 04/14/2004 Ashok Raj <ashok.raj@intel.com> | ||
14 | * Added CPU Hotplug handling for IPF. | ||
15 | */ | ||
16 | |||
17 | #include <linux/config.h> | ||
18 | #include <linux/module.h> | ||
19 | |||
20 | #include <linux/jiffies.h> | ||
21 | #include <linux/errno.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/ioport.h> | ||
25 | #include <linux/kernel_stat.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/ptrace.h> | ||
28 | #include <linux/random.h> /* for rand_initialize_irq() */ | ||
29 | #include <linux/signal.h> | ||
30 | #include <linux/smp.h> | ||
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/threads.h> | ||
33 | #include <linux/bitops.h> | ||
34 | |||
35 | #include <asm/delay.h> | ||
36 | #include <asm/intrinsics.h> | ||
37 | #include <asm/io.h> | ||
38 | #include <asm/hw_irq.h> | ||
39 | #include <asm/machvec.h> | ||
40 | #include <asm/pgtable.h> | ||
41 | #include <asm/system.h> | ||
42 | |||
43 | #ifdef CONFIG_PERFMON | ||
44 | # include <asm/perfmon.h> | ||
45 | #endif | ||
46 | |||
47 | #define IRQ_DEBUG 0 | ||
48 | |||
49 | /* default base addr of IPI table */ | ||
50 | void __iomem *ipi_base_addr = ((void __iomem *) | ||
51 | (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); | ||
52 | |||
53 | /* | ||
54 | * Legacy IRQ to IA-64 vector translation table. | ||
55 | */ | ||
56 | __u8 isa_irq_to_vector_map[16] = { | ||
57 | /* 8259 IRQ translation, first 16 entries */ | ||
58 | 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, | ||
59 | 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21 | ||
60 | }; | ||
61 | EXPORT_SYMBOL(isa_irq_to_vector_map); | ||
62 | |||
63 | static unsigned long ia64_vector_mask[BITS_TO_LONGS(IA64_NUM_DEVICE_VECTORS)]; | ||
64 | |||
65 | int | ||
66 | assign_irq_vector (int irq) | ||
67 | { | ||
68 | int pos, vector; | ||
69 | again: | ||
70 | pos = find_first_zero_bit(ia64_vector_mask, IA64_NUM_DEVICE_VECTORS); | ||
71 | vector = IA64_FIRST_DEVICE_VECTOR + pos; | ||
72 | if (vector > IA64_LAST_DEVICE_VECTOR) | ||
73 | /* XXX could look for sharable vectors instead of panic'ing... */ | ||
74 | panic("assign_irq_vector: out of interrupt vectors!"); | ||
75 | if (test_and_set_bit(pos, ia64_vector_mask)) | ||
76 | goto again; | ||
77 | return vector; | ||
78 | } | ||
79 | |||
80 | void | ||
81 | free_irq_vector (int vector) | ||
82 | { | ||
83 | int pos; | ||
84 | |||
85 | if (vector < IA64_FIRST_DEVICE_VECTOR || vector > IA64_LAST_DEVICE_VECTOR) | ||
86 | return; | ||
87 | |||
88 | pos = vector - IA64_FIRST_DEVICE_VECTOR; | ||
89 | if (!test_and_clear_bit(pos, ia64_vector_mask)) | ||
90 | printk(KERN_WARNING "%s: double free!\n", __FUNCTION__); | ||
91 | } | ||
92 | |||
93 | #ifdef CONFIG_SMP | ||
94 | # define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) | ||
95 | #else | ||
96 | # define IS_RESCHEDULE(vec) (0) | ||
97 | #endif | ||
98 | /* | ||
99 | * That's where the IVT branches when we get an external | ||
100 | * interrupt. This branches to the correct hardware IRQ handler via | ||
101 | * function ptr. | ||
102 | */ | ||
103 | void | ||
104 | ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) | ||
105 | { | ||
106 | unsigned long saved_tpr; | ||
107 | |||
108 | #if IRQ_DEBUG | ||
109 | { | ||
110 | unsigned long bsp, sp; | ||
111 | |||
112 | /* | ||
113 | * Note: if the interrupt happened while executing in | ||
114 | * the context switch routine (ia64_switch_to), we may | ||
115 | * get a spurious stack overflow here. This is | ||
116 | * because the register and the memory stack are not | ||
117 | * switched atomically. | ||
118 | */ | ||
119 | bsp = ia64_getreg(_IA64_REG_AR_BSP); | ||
120 | sp = ia64_getreg(_IA64_REG_SP); | ||
121 | |||
122 | if ((sp - bsp) < 1024) { | ||
123 | static unsigned char count; | ||
124 | static long last_time; | ||
125 | |||
126 | if (jiffies - last_time > 5*HZ) | ||
127 | count = 0; | ||
128 | if (++count < 5) { | ||
129 | last_time = jiffies; | ||
130 | printk("ia64_handle_irq: DANGER: less than " | ||
131 | "1KB of free stack space!!\n" | ||
132 | "(bsp=0x%lx, sp=%lx)\n", bsp, sp); | ||
133 | } | ||
134 | } | ||
135 | } | ||
136 | #endif /* IRQ_DEBUG */ | ||
137 | |||
138 | /* | ||
139 | * Always set TPR to limit maximum interrupt nesting depth to | ||
140 | * 16 (without this, it would be ~240, which could easily lead | ||
141 | * to kernel stack overflows). | ||
142 | */ | ||
143 | irq_enter(); | ||
144 | saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); | ||
145 | ia64_srlz_d(); | ||
146 | while (vector != IA64_SPURIOUS_INT_VECTOR) { | ||
147 | if (!IS_RESCHEDULE(vector)) { | ||
148 | ia64_setreg(_IA64_REG_CR_TPR, vector); | ||
149 | ia64_srlz_d(); | ||
150 | |||
151 | __do_IRQ(local_vector_to_irq(vector), regs); | ||
152 | |||
153 | /* | ||
154 | * Disable interrupts and send EOI: | ||
155 | */ | ||
156 | local_irq_disable(); | ||
157 | ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); | ||
158 | } | ||
159 | ia64_eoi(); | ||
160 | vector = ia64_get_ivr(); | ||
161 | } | ||
162 | /* | ||
163 | * This must be done *after* the ia64_eoi(). For example, the keyboard softirq | ||
164 | * handler needs to be able to wait for further keyboard interrupts, which can't | ||
165 | * come through until ia64_eoi() has been done. | ||
166 | */ | ||
167 | irq_exit(); | ||
168 | } | ||
169 | |||
170 | #ifdef CONFIG_HOTPLUG_CPU | ||
171 | /* | ||
172 | * This function emulates a interrupt processing when a cpu is about to be | ||
173 | * brought down. | ||
174 | */ | ||
175 | void ia64_process_pending_intr(void) | ||
176 | { | ||
177 | ia64_vector vector; | ||
178 | unsigned long saved_tpr; | ||
179 | extern unsigned int vectors_in_migration[NR_IRQS]; | ||
180 | |||
181 | vector = ia64_get_ivr(); | ||
182 | |||
183 | irq_enter(); | ||
184 | saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); | ||
185 | ia64_srlz_d(); | ||
186 | |||
187 | /* | ||
188 | * Perform normal interrupt style processing | ||
189 | */ | ||
190 | while (vector != IA64_SPURIOUS_INT_VECTOR) { | ||
191 | if (!IS_RESCHEDULE(vector)) { | ||
192 | ia64_setreg(_IA64_REG_CR_TPR, vector); | ||
193 | ia64_srlz_d(); | ||
194 | |||
195 | /* | ||
196 | * Now try calling normal ia64_handle_irq as it would have got called | ||
197 | * from a real intr handler. Try passing null for pt_regs, hopefully | ||
198 | * it will work. I hope it works!. | ||
199 | * Probably could shared code. | ||
200 | */ | ||
201 | vectors_in_migration[local_vector_to_irq(vector)]=0; | ||
202 | __do_IRQ(local_vector_to_irq(vector), NULL); | ||
203 | |||
204 | /* | ||
205 | * Disable interrupts and send EOI | ||
206 | */ | ||
207 | local_irq_disable(); | ||
208 | ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); | ||
209 | } | ||
210 | ia64_eoi(); | ||
211 | vector = ia64_get_ivr(); | ||
212 | } | ||
213 | irq_exit(); | ||
214 | } | ||
215 | #endif | ||
216 | |||
217 | |||
218 | #ifdef CONFIG_SMP | ||
219 | extern irqreturn_t handle_IPI (int irq, void *dev_id, struct pt_regs *regs); | ||
220 | |||
221 | static struct irqaction ipi_irqaction = { | ||
222 | .handler = handle_IPI, | ||
223 | .flags = SA_INTERRUPT, | ||
224 | .name = "IPI" | ||
225 | }; | ||
226 | #endif | ||
227 | |||
228 | void | ||
229 | register_percpu_irq (ia64_vector vec, struct irqaction *action) | ||
230 | { | ||
231 | irq_desc_t *desc; | ||
232 | unsigned int irq; | ||
233 | |||
234 | for (irq = 0; irq < NR_IRQS; ++irq) | ||
235 | if (irq_to_vector(irq) == vec) { | ||
236 | desc = irq_descp(irq); | ||
237 | desc->status |= IRQ_PER_CPU; | ||
238 | desc->handler = &irq_type_ia64_lsapic; | ||
239 | if (action) | ||
240 | setup_irq(irq, action); | ||
241 | } | ||
242 | } | ||
243 | |||
244 | void __init | ||
245 | init_IRQ (void) | ||
246 | { | ||
247 | register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL); | ||
248 | #ifdef CONFIG_SMP | ||
249 | register_percpu_irq(IA64_IPI_VECTOR, &ipi_irqaction); | ||
250 | #endif | ||
251 | #ifdef CONFIG_PERFMON | ||
252 | pfm_init_percpu(); | ||
253 | #endif | ||
254 | platform_irq_init(); | ||
255 | } | ||
256 | |||
257 | void | ||
258 | ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect) | ||
259 | { | ||
260 | void __iomem *ipi_addr; | ||
261 | unsigned long ipi_data; | ||
262 | unsigned long phys_cpu_id; | ||
263 | |||
264 | #ifdef CONFIG_SMP | ||
265 | phys_cpu_id = cpu_physical_id(cpu); | ||
266 | #else | ||
267 | phys_cpu_id = (ia64_getreg(_IA64_REG_CR_LID) >> 16) & 0xffff; | ||
268 | #endif | ||
269 | |||
270 | /* | ||
271 | * cpu number is in 8bit ID and 8bit EID | ||
272 | */ | ||
273 | |||
274 | ipi_data = (delivery_mode << 8) | (vector & 0xff); | ||
275 | ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3)); | ||
276 | |||
277 | writeq(ipi_data, ipi_addr); | ||
278 | } | ||
diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c new file mode 100644 index 000000000000..ea14e6a04409 --- /dev/null +++ b/arch/ia64/kernel/irq_lsapic.c | |||
@@ -0,0 +1,37 @@ | |||
1 | /* | ||
2 | * LSAPIC Interrupt Controller | ||
3 | * | ||
4 | * This takes care of interrupts that are generated by the CPU's | ||
5 | * internal Streamlined Advanced Programmable Interrupt Controller | ||
6 | * (LSAPIC), such as the ITC and IPI interrupts. | ||
7 | * | ||
8 | * Copyright (C) 1999 VA Linux Systems | ||
9 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
10 | * Copyright (C) 2000 Hewlett-Packard Co | ||
11 | * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> | ||
12 | */ | ||
13 | |||
14 | #include <linux/sched.h> | ||
15 | #include <linux/irq.h> | ||
16 | |||
17 | static unsigned int | ||
18 | lsapic_noop_startup (unsigned int irq) | ||
19 | { | ||
20 | return 0; | ||
21 | } | ||
22 | |||
23 | static void | ||
24 | lsapic_noop (unsigned int irq) | ||
25 | { | ||
26 | /* nuthing to do... */ | ||
27 | } | ||
28 | |||
29 | struct hw_interrupt_type irq_type_ia64_lsapic = { | ||
30 | .typename = "LSAPIC", | ||
31 | .startup = lsapic_noop_startup, | ||
32 | .shutdown = lsapic_noop, | ||
33 | .enable = lsapic_noop, | ||
34 | .disable = lsapic_noop, | ||
35 | .ack = lsapic_noop, | ||
36 | .end = lsapic_noop | ||
37 | }; | ||
diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S new file mode 100644 index 000000000000..d9c05d53435b --- /dev/null +++ b/arch/ia64/kernel/ivt.S | |||
@@ -0,0 +1,1619 @@ | |||
1 | /* | ||
2 | * arch/ia64/kernel/ivt.S | ||
3 | * | ||
4 | * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co | ||
5 | * Stephane Eranian <eranian@hpl.hp.com> | ||
6 | * David Mosberger <davidm@hpl.hp.com> | ||
7 | * Copyright (C) 2000, 2002-2003 Intel Co | ||
8 | * Asit Mallick <asit.k.mallick@intel.com> | ||
9 | * Suresh Siddha <suresh.b.siddha@intel.com> | ||
10 | * Kenneth Chen <kenneth.w.chen@intel.com> | ||
11 | * Fenghua Yu <fenghua.yu@intel.com> | ||
12 | * | ||
13 | * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling for SMP | ||
14 | * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB handler now uses virtual PT. | ||
15 | */ | ||
16 | /* | ||
17 | * This file defines the interruption vector table used by the CPU. | ||
18 | * It does not include one entry per possible cause of interruption. | ||
19 | * | ||
20 | * The first 20 entries of the table contain 64 bundles each while the | ||
21 | * remaining 48 entries contain only 16 bundles each. | ||
22 | * | ||
23 | * The 64 bundles are used to allow inlining the whole handler for critical | ||
24 | * interruptions like TLB misses. | ||
25 | * | ||
26 | * For each entry, the comment is as follows: | ||
27 | * | ||
28 | * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) | ||
29 | * entry offset ----/ / / / / | ||
30 | * entry number ---------/ / / / | ||
31 | * size of the entry -------------/ / / | ||
32 | * vector name -------------------------------------/ / | ||
33 | * interruptions triggering this vector ----------------------/ | ||
34 | * | ||
35 | * The table is 32KB in size and must be aligned on 32KB boundary. | ||
36 | * (The CPU ignores the 15 lower bits of the address) | ||
37 | * | ||
38 | * Table is based upon EAS2.6 (Oct 1999) | ||
39 | */ | ||
40 | |||
41 | #include <linux/config.h> | ||
42 | |||
43 | #include <asm/asmmacro.h> | ||
44 | #include <asm/break.h> | ||
45 | #include <asm/ia32.h> | ||
46 | #include <asm/kregs.h> | ||
47 | #include <asm/offsets.h> | ||
48 | #include <asm/pgtable.h> | ||
49 | #include <asm/processor.h> | ||
50 | #include <asm/ptrace.h> | ||
51 | #include <asm/system.h> | ||
52 | #include <asm/thread_info.h> | ||
53 | #include <asm/unistd.h> | ||
54 | #include <asm/errno.h> | ||
55 | |||
56 | #if 1 | ||
57 | # define PSR_DEFAULT_BITS psr.ac | ||
58 | #else | ||
59 | # define PSR_DEFAULT_BITS 0 | ||
60 | #endif | ||
61 | |||
62 | #if 0 | ||
63 | /* | ||
64 | * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't | ||
65 | * needed for something else before enabling this... | ||
66 | */ | ||
67 | # define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 | ||
68 | #else | ||
69 | # define DBG_FAULT(i) | ||
70 | #endif | ||
71 | |||
72 | #define MINSTATE_VIRT /* needed by minstate.h */ | ||
73 | #include "minstate.h" | ||
74 | |||
75 | #define FAULT(n) \ | ||
76 | mov r31=pr; \ | ||
77 | mov r19=n;; /* prepare to save predicates */ \ | ||
78 | br.sptk.many dispatch_to_fault_handler | ||
79 | |||
80 | .section .text.ivt,"ax" | ||
81 | |||
82 | .align 32768 // align on 32KB boundary | ||
83 | .global ia64_ivt | ||
84 | ia64_ivt: | ||
85 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
86 | // 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) | ||
87 | ENTRY(vhpt_miss) | ||
88 | DBG_FAULT(0) | ||
89 | /* | ||
90 | * The VHPT vector is invoked when the TLB entry for the virtual page table | ||
91 | * is missing. This happens only as a result of a previous | ||
92 | * (the "original") TLB miss, which may either be caused by an instruction | ||
93 | * fetch or a data access (or non-access). | ||
94 | * | ||
95 | * What we do here is normal TLB miss handing for the _original_ miss, followed | ||
96 | * by inserting the TLB entry for the virtual page table page that the VHPT | ||
97 | * walker was attempting to access. The latter gets inserted as long | ||
98 | * as both L1 and L2 have valid mappings for the faulting address. | ||
99 | * The TLB entry for the original miss gets inserted only if | ||
100 | * the L3 entry indicates that the page is present. | ||
101 | * | ||
102 | * do_page_fault gets invoked in the following cases: | ||
103 | * - the faulting virtual address uses unimplemented address bits | ||
104 | * - the faulting virtual address has no L1, L2, or L3 mapping | ||
105 | */ | ||
106 | mov r16=cr.ifa // get address that caused the TLB miss | ||
107 | #ifdef CONFIG_HUGETLB_PAGE | ||
108 | movl r18=PAGE_SHIFT | ||
109 | mov r25=cr.itir | ||
110 | #endif | ||
111 | ;; | ||
112 | rsm psr.dt // use physical addressing for data | ||
113 | mov r31=pr // save the predicate registers | ||
114 | mov r19=IA64_KR(PT_BASE) // get page table base address | ||
115 | shl r21=r16,3 // shift bit 60 into sign bit | ||
116 | shr.u r17=r16,61 // get the region number into r17 | ||
117 | ;; | ||
118 | shr r22=r21,3 | ||
119 | #ifdef CONFIG_HUGETLB_PAGE | ||
120 | extr.u r26=r25,2,6 | ||
121 | ;; | ||
122 | cmp.ne p8,p0=r18,r26 | ||
123 | sub r27=r26,r18 | ||
124 | ;; | ||
125 | (p8) dep r25=r18,r25,2,6 | ||
126 | (p8) shr r22=r22,r27 | ||
127 | #endif | ||
128 | ;; | ||
129 | cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? | ||
130 | shr.u r18=r22,PGDIR_SHIFT // get bits 33-63 of the faulting address | ||
131 | ;; | ||
132 | (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place | ||
133 | |||
134 | srlz.d | ||
135 | LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir | ||
136 | |||
137 | .pred.rel "mutex", p6, p7 | ||
138 | (p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT | ||
139 | (p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 | ||
140 | ;; | ||
141 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 | ||
142 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) | ||
143 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? | ||
144 | shr.u r18=r22,PMD_SHIFT // shift L2 index into position | ||
145 | ;; | ||
146 | ld8 r17=[r17] // fetch the L1 entry (may be 0) | ||
147 | ;; | ||
148 | (p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? | ||
149 | dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry | ||
150 | ;; | ||
151 | (p7) ld8 r20=[r17] // fetch the L2 entry (may be 0) | ||
152 | shr.u r19=r22,PAGE_SHIFT // shift L3 index into position | ||
153 | ;; | ||
154 | (p7) cmp.eq.or.andcm p6,p7=r20,r0 // was L2 entry NULL? | ||
155 | dep r21=r19,r20,3,(PAGE_SHIFT-3) // compute address of L3 page table entry | ||
156 | ;; | ||
157 | (p7) ld8 r18=[r21] // read the L3 PTE | ||
158 | mov r19=cr.isr // cr.isr bit 0 tells us if this is an insn miss | ||
159 | ;; | ||
160 | (p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? | ||
161 | mov r22=cr.iha // get the VHPT address that caused the TLB miss | ||
162 | ;; // avoid RAW on p7 | ||
163 | (p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? | ||
164 | dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address | ||
165 | ;; | ||
166 | (p10) itc.i r18 // insert the instruction TLB entry | ||
167 | (p11) itc.d r18 // insert the data TLB entry | ||
168 | (p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) | ||
169 | mov cr.ifa=r22 | ||
170 | |||
171 | #ifdef CONFIG_HUGETLB_PAGE | ||
172 | (p8) mov cr.itir=r25 // change to default page-size for VHPT | ||
173 | #endif | ||
174 | |||
175 | /* | ||
176 | * Now compute and insert the TLB entry for the virtual page table. We never | ||
177 | * execute in a page table page so there is no need to set the exception deferral | ||
178 | * bit. | ||
179 | */ | ||
180 | adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 | ||
181 | ;; | ||
182 | (p7) itc.d r24 | ||
183 | ;; | ||
184 | #ifdef CONFIG_SMP | ||
185 | /* | ||
186 | * Tell the assemblers dependency-violation checker that the above "itc" instructions | ||
187 | * cannot possibly affect the following loads: | ||
188 | */ | ||
189 | dv_serialize_data | ||
190 | |||
191 | /* | ||
192 | * Re-check L2 and L3 pagetable. If they changed, we may have received a ptc.g | ||
193 | * between reading the pagetable and the "itc". If so, flush the entry we | ||
194 | * inserted and retry. | ||
195 | */ | ||
196 | ld8 r25=[r21] // read L3 PTE again | ||
197 | ld8 r26=[r17] // read L2 entry again | ||
198 | ;; | ||
199 | cmp.ne p6,p7=r26,r20 // did L2 entry change | ||
200 | mov r27=PAGE_SHIFT<<2 | ||
201 | ;; | ||
202 | (p6) ptc.l r22,r27 // purge PTE page translation | ||
203 | (p7) cmp.ne.or.andcm p6,p7=r25,r18 // did L3 PTE change | ||
204 | ;; | ||
205 | (p6) ptc.l r16,r27 // purge translation | ||
206 | #endif | ||
207 | |||
208 | mov pr=r31,-1 // restore predicate registers | ||
209 | rfi | ||
210 | END(vhpt_miss) | ||
211 | |||
212 | .org ia64_ivt+0x400 | ||
213 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
214 | // 0x0400 Entry 1 (size 64 bundles) ITLB (21) | ||
215 | ENTRY(itlb_miss) | ||
216 | DBG_FAULT(1) | ||
217 | /* | ||
218 | * The ITLB handler accesses the L3 PTE via the virtually mapped linear | ||
219 | * page table. If a nested TLB miss occurs, we switch into physical | ||
220 | * mode, walk the page table, and then re-execute the L3 PTE read | ||
221 | * and go on normally after that. | ||
222 | */ | ||
223 | mov r16=cr.ifa // get virtual address | ||
224 | mov r29=b0 // save b0 | ||
225 | mov r31=pr // save predicates | ||
226 | .itlb_fault: | ||
227 | mov r17=cr.iha // get virtual address of L3 PTE | ||
228 | movl r30=1f // load nested fault continuation point | ||
229 | ;; | ||
230 | 1: ld8 r18=[r17] // read L3 PTE | ||
231 | ;; | ||
232 | mov b0=r29 | ||
233 | tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? | ||
234 | (p6) br.cond.spnt page_fault | ||
235 | ;; | ||
236 | itc.i r18 | ||
237 | ;; | ||
238 | #ifdef CONFIG_SMP | ||
239 | /* | ||
240 | * Tell the assemblers dependency-violation checker that the above "itc" instructions | ||
241 | * cannot possibly affect the following loads: | ||
242 | */ | ||
243 | dv_serialize_data | ||
244 | |||
245 | ld8 r19=[r17] // read L3 PTE again and see if same | ||
246 | mov r20=PAGE_SHIFT<<2 // setup page size for purge | ||
247 | ;; | ||
248 | cmp.ne p7,p0=r18,r19 | ||
249 | ;; | ||
250 | (p7) ptc.l r16,r20 | ||
251 | #endif | ||
252 | mov pr=r31,-1 | ||
253 | rfi | ||
254 | END(itlb_miss) | ||
255 | |||
256 | .org ia64_ivt+0x0800 | ||
257 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
258 | // 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) | ||
259 | ENTRY(dtlb_miss) | ||
260 | DBG_FAULT(2) | ||
261 | /* | ||
262 | * The DTLB handler accesses the L3 PTE via the virtually mapped linear | ||
263 | * page table. If a nested TLB miss occurs, we switch into physical | ||
264 | * mode, walk the page table, and then re-execute the L3 PTE read | ||
265 | * and go on normally after that. | ||
266 | */ | ||
267 | mov r16=cr.ifa // get virtual address | ||
268 | mov r29=b0 // save b0 | ||
269 | mov r31=pr // save predicates | ||
270 | dtlb_fault: | ||
271 | mov r17=cr.iha // get virtual address of L3 PTE | ||
272 | movl r30=1f // load nested fault continuation point | ||
273 | ;; | ||
274 | 1: ld8 r18=[r17] // read L3 PTE | ||
275 | ;; | ||
276 | mov b0=r29 | ||
277 | tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? | ||
278 | (p6) br.cond.spnt page_fault | ||
279 | ;; | ||
280 | itc.d r18 | ||
281 | ;; | ||
282 | #ifdef CONFIG_SMP | ||
283 | /* | ||
284 | * Tell the assemblers dependency-violation checker that the above "itc" instructions | ||
285 | * cannot possibly affect the following loads: | ||
286 | */ | ||
287 | dv_serialize_data | ||
288 | |||
289 | ld8 r19=[r17] // read L3 PTE again and see if same | ||
290 | mov r20=PAGE_SHIFT<<2 // setup page size for purge | ||
291 | ;; | ||
292 | cmp.ne p7,p0=r18,r19 | ||
293 | ;; | ||
294 | (p7) ptc.l r16,r20 | ||
295 | #endif | ||
296 | mov pr=r31,-1 | ||
297 | rfi | ||
298 | END(dtlb_miss) | ||
299 | |||
300 | .org ia64_ivt+0x0c00 | ||
301 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
302 | // 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) | ||
303 | ENTRY(alt_itlb_miss) | ||
304 | DBG_FAULT(3) | ||
305 | mov r16=cr.ifa // get address that caused the TLB miss | ||
306 | movl r17=PAGE_KERNEL | ||
307 | mov r21=cr.ipsr | ||
308 | movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) | ||
309 | mov r31=pr | ||
310 | ;; | ||
311 | #ifdef CONFIG_DISABLE_VHPT | ||
312 | shr.u r22=r16,61 // get the region number into r21 | ||
313 | ;; | ||
314 | cmp.gt p8,p0=6,r22 // user mode | ||
315 | ;; | ||
316 | (p8) thash r17=r16 | ||
317 | ;; | ||
318 | (p8) mov cr.iha=r17 | ||
319 | (p8) mov r29=b0 // save b0 | ||
320 | (p8) br.cond.dptk .itlb_fault | ||
321 | #endif | ||
322 | extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl | ||
323 | and r19=r19,r16 // clear ed, reserved bits, and PTE control bits | ||
324 | shr.u r18=r16,57 // move address bit 61 to bit 4 | ||
325 | ;; | ||
326 | andcm r18=0x10,r18 // bit 4=~address-bit(61) | ||
327 | cmp.ne p8,p0=r0,r23 // psr.cpl != 0? | ||
328 | or r19=r17,r19 // insert PTE control bits into r19 | ||
329 | ;; | ||
330 | or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 | ||
331 | (p8) br.cond.spnt page_fault | ||
332 | ;; | ||
333 | itc.i r19 // insert the TLB entry | ||
334 | mov pr=r31,-1 | ||
335 | rfi | ||
336 | END(alt_itlb_miss) | ||
337 | |||
338 | .org ia64_ivt+0x1000 | ||
339 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
340 | // 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) | ||
341 | ENTRY(alt_dtlb_miss) | ||
342 | DBG_FAULT(4) | ||
343 | mov r16=cr.ifa // get address that caused the TLB miss | ||
344 | movl r17=PAGE_KERNEL | ||
345 | mov r20=cr.isr | ||
346 | movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) | ||
347 | mov r21=cr.ipsr | ||
348 | mov r31=pr | ||
349 | ;; | ||
350 | #ifdef CONFIG_DISABLE_VHPT | ||
351 | shr.u r22=r16,61 // get the region number into r21 | ||
352 | ;; | ||
353 | cmp.gt p8,p0=6,r22 // access to region 0-5 | ||
354 | ;; | ||
355 | (p8) thash r17=r16 | ||
356 | ;; | ||
357 | (p8) mov cr.iha=r17 | ||
358 | (p8) mov r29=b0 // save b0 | ||
359 | (p8) br.cond.dptk dtlb_fault | ||
360 | #endif | ||
361 | extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl | ||
362 | and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field | ||
363 | tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? | ||
364 | shr.u r18=r16,57 // move address bit 61 to bit 4 | ||
365 | and r19=r19,r16 // clear ed, reserved bits, and PTE control bits | ||
366 | tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? | ||
367 | ;; | ||
368 | andcm r18=0x10,r18 // bit 4=~address-bit(61) | ||
369 | cmp.ne p8,p0=r0,r23 | ||
370 | (p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field | ||
371 | (p8) br.cond.spnt page_fault | ||
372 | |||
373 | dep r21=-1,r21,IA64_PSR_ED_BIT,1 | ||
374 | or r19=r19,r17 // insert PTE control bits into r19 | ||
375 | ;; | ||
376 | or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 | ||
377 | (p6) mov cr.ipsr=r21 | ||
378 | ;; | ||
379 | (p7) itc.d r19 // insert the TLB entry | ||
380 | mov pr=r31,-1 | ||
381 | rfi | ||
382 | END(alt_dtlb_miss) | ||
383 | |||
384 | .org ia64_ivt+0x1400 | ||
385 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
386 | // 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) | ||
387 | ENTRY(nested_dtlb_miss) | ||
388 | /* | ||
389 | * In the absence of kernel bugs, we get here when the virtually mapped linear | ||
390 | * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction | ||
391 | * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page | ||
392 | * table is missing, a nested TLB miss fault is triggered and control is | ||
393 | * transferred to this point. When this happens, we lookup the pte for the | ||
394 | * faulting address by walking the page table in physical mode and return to the | ||
395 | * continuation point passed in register r30 (or call page_fault if the address is | ||
396 | * not mapped). | ||
397 | * | ||
398 | * Input: r16: faulting address | ||
399 | * r29: saved b0 | ||
400 | * r30: continuation address | ||
401 | * r31: saved pr | ||
402 | * | ||
403 | * Output: r17: physical address of L3 PTE of faulting address | ||
404 | * r29: saved b0 | ||
405 | * r30: continuation address | ||
406 | * r31: saved pr | ||
407 | * | ||
408 | * Clobbered: b0, r18, r19, r21, psr.dt (cleared) | ||
409 | */ | ||
410 | rsm psr.dt // switch to using physical data addressing | ||
411 | mov r19=IA64_KR(PT_BASE) // get the page table base address | ||
412 | shl r21=r16,3 // shift bit 60 into sign bit | ||
413 | ;; | ||
414 | shr.u r17=r16,61 // get the region number into r17 | ||
415 | ;; | ||
416 | cmp.eq p6,p7=5,r17 // is faulting address in region 5? | ||
417 | shr.u r18=r16,PGDIR_SHIFT // get bits 33-63 of faulting address | ||
418 | ;; | ||
419 | (p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place | ||
420 | |||
421 | srlz.d | ||
422 | LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir | ||
423 | |||
424 | .pred.rel "mutex", p6, p7 | ||
425 | (p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT | ||
426 | (p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 | ||
427 | ;; | ||
428 | (p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=PTA + IFA(33,42)*8 | ||
429 | (p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=PTA + (((IFA(61,63) << 7) | IFA(33,39))*8) | ||
430 | cmp.eq p7,p6=0,r21 // unused address bits all zeroes? | ||
431 | shr.u r18=r16,PMD_SHIFT // shift L2 index into position | ||
432 | ;; | ||
433 | ld8 r17=[r17] // fetch the L1 entry (may be 0) | ||
434 | ;; | ||
435 | (p7) cmp.eq p6,p7=r17,r0 // was L1 entry NULL? | ||
436 | dep r17=r18,r17,3,(PAGE_SHIFT-3) // compute address of L2 page table entry | ||
437 | ;; | ||
438 | (p7) ld8 r17=[r17] // fetch the L2 entry (may be 0) | ||
439 | shr.u r19=r16,PAGE_SHIFT // shift L3 index into position | ||
440 | ;; | ||
441 | (p7) cmp.eq.or.andcm p6,p7=r17,r0 // was L2 entry NULL? | ||
442 | dep r17=r19,r17,3,(PAGE_SHIFT-3) // compute address of L3 page table entry | ||
443 | (p6) br.cond.spnt page_fault | ||
444 | mov b0=r30 | ||
445 | br.sptk.many b0 // return to continuation point | ||
446 | END(nested_dtlb_miss) | ||
447 | |||
448 | .org ia64_ivt+0x1800 | ||
449 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
450 | // 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) | ||
451 | ENTRY(ikey_miss) | ||
452 | DBG_FAULT(6) | ||
453 | FAULT(6) | ||
454 | END(ikey_miss) | ||
455 | |||
456 | //----------------------------------------------------------------------------------- | ||
457 | // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) | ||
458 | ENTRY(page_fault) | ||
459 | ssm psr.dt | ||
460 | ;; | ||
461 | srlz.i | ||
462 | ;; | ||
463 | SAVE_MIN_WITH_COVER | ||
464 | alloc r15=ar.pfs,0,0,3,0 | ||
465 | mov out0=cr.ifa | ||
466 | mov out1=cr.isr | ||
467 | adds r3=8,r2 // set up second base pointer | ||
468 | ;; | ||
469 | ssm psr.ic | PSR_DEFAULT_BITS | ||
470 | ;; | ||
471 | srlz.i // guarantee that interruption collectin is on | ||
472 | ;; | ||
473 | (p15) ssm psr.i // restore psr.i | ||
474 | movl r14=ia64_leave_kernel | ||
475 | ;; | ||
476 | SAVE_REST | ||
477 | mov rp=r14 | ||
478 | ;; | ||
479 | adds out2=16,r12 // out2 = pointer to pt_regs | ||
480 | br.call.sptk.many b6=ia64_do_page_fault // ignore return address | ||
481 | END(page_fault) | ||
482 | |||
483 | .org ia64_ivt+0x1c00 | ||
484 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
485 | // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) | ||
486 | ENTRY(dkey_miss) | ||
487 | DBG_FAULT(7) | ||
488 | FAULT(7) | ||
489 | END(dkey_miss) | ||
490 | |||
491 | .org ia64_ivt+0x2000 | ||
492 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
493 | // 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) | ||
494 | ENTRY(dirty_bit) | ||
495 | DBG_FAULT(8) | ||
496 | /* | ||
497 | * What we do here is to simply turn on the dirty bit in the PTE. We need to | ||
498 | * update both the page-table and the TLB entry. To efficiently access the PTE, | ||
499 | * we address it through the virtual page table. Most likely, the TLB entry for | ||
500 | * the relevant virtual page table page is still present in the TLB so we can | ||
501 | * normally do this without additional TLB misses. In case the necessary virtual | ||
502 | * page table TLB entry isn't present, we take a nested TLB miss hit where we look | ||
503 | * up the physical address of the L3 PTE and then continue at label 1 below. | ||
504 | */ | ||
505 | mov r16=cr.ifa // get the address that caused the fault | ||
506 | movl r30=1f // load continuation point in case of nested fault | ||
507 | ;; | ||
508 | thash r17=r16 // compute virtual address of L3 PTE | ||
509 | mov r29=b0 // save b0 in case of nested fault | ||
510 | mov r31=pr // save pr | ||
511 | #ifdef CONFIG_SMP | ||
512 | mov r28=ar.ccv // save ar.ccv | ||
513 | ;; | ||
514 | 1: ld8 r18=[r17] | ||
515 | ;; // avoid RAW on r18 | ||
516 | mov ar.ccv=r18 // set compare value for cmpxchg | ||
517 | or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits | ||
518 | ;; | ||
519 | cmpxchg8.acq r26=[r17],r25,ar.ccv | ||
520 | mov r24=PAGE_SHIFT<<2 | ||
521 | ;; | ||
522 | cmp.eq p6,p7=r26,r18 | ||
523 | ;; | ||
524 | (p6) itc.d r25 // install updated PTE | ||
525 | ;; | ||
526 | /* | ||
527 | * Tell the assemblers dependency-violation checker that the above "itc" instructions | ||
528 | * cannot possibly affect the following loads: | ||
529 | */ | ||
530 | dv_serialize_data | ||
531 | |||
532 | ld8 r18=[r17] // read PTE again | ||
533 | ;; | ||
534 | cmp.eq p6,p7=r18,r25 // is it same as the newly installed | ||
535 | ;; | ||
536 | (p7) ptc.l r16,r24 | ||
537 | mov b0=r29 // restore b0 | ||
538 | mov ar.ccv=r28 | ||
539 | #else | ||
540 | ;; | ||
541 | 1: ld8 r18=[r17] | ||
542 | ;; // avoid RAW on r18 | ||
543 | or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits | ||
544 | mov b0=r29 // restore b0 | ||
545 | ;; | ||
546 | st8 [r17]=r18 // store back updated PTE | ||
547 | itc.d r18 // install updated PTE | ||
548 | #endif | ||
549 | mov pr=r31,-1 // restore pr | ||
550 | rfi | ||
551 | END(dirty_bit) | ||
552 | |||
553 | .org ia64_ivt+0x2400 | ||
554 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
555 | // 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) | ||
556 | ENTRY(iaccess_bit) | ||
557 | DBG_FAULT(9) | ||
558 | // Like Entry 8, except for instruction access | ||
559 | mov r16=cr.ifa // get the address that caused the fault | ||
560 | movl r30=1f // load continuation point in case of nested fault | ||
561 | mov r31=pr // save predicates | ||
562 | #ifdef CONFIG_ITANIUM | ||
563 | /* | ||
564 | * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. | ||
565 | */ | ||
566 | mov r17=cr.ipsr | ||
567 | ;; | ||
568 | mov r18=cr.iip | ||
569 | tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? | ||
570 | ;; | ||
571 | (p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa | ||
572 | #endif /* CONFIG_ITANIUM */ | ||
573 | ;; | ||
574 | thash r17=r16 // compute virtual address of L3 PTE | ||
575 | mov r29=b0 // save b0 in case of nested fault) | ||
576 | #ifdef CONFIG_SMP | ||
577 | mov r28=ar.ccv // save ar.ccv | ||
578 | ;; | ||
579 | 1: ld8 r18=[r17] | ||
580 | ;; | ||
581 | mov ar.ccv=r18 // set compare value for cmpxchg | ||
582 | or r25=_PAGE_A,r18 // set the accessed bit | ||
583 | ;; | ||
584 | cmpxchg8.acq r26=[r17],r25,ar.ccv | ||
585 | mov r24=PAGE_SHIFT<<2 | ||
586 | ;; | ||
587 | cmp.eq p6,p7=r26,r18 | ||
588 | ;; | ||
589 | (p6) itc.i r25 // install updated PTE | ||
590 | ;; | ||
591 | /* | ||
592 | * Tell the assemblers dependency-violation checker that the above "itc" instructions | ||
593 | * cannot possibly affect the following loads: | ||
594 | */ | ||
595 | dv_serialize_data | ||
596 | |||
597 | ld8 r18=[r17] // read PTE again | ||
598 | ;; | ||
599 | cmp.eq p6,p7=r18,r25 // is it same as the newly installed | ||
600 | ;; | ||
601 | (p7) ptc.l r16,r24 | ||
602 | mov b0=r29 // restore b0 | ||
603 | mov ar.ccv=r28 | ||
604 | #else /* !CONFIG_SMP */ | ||
605 | ;; | ||
606 | 1: ld8 r18=[r17] | ||
607 | ;; | ||
608 | or r18=_PAGE_A,r18 // set the accessed bit | ||
609 | mov b0=r29 // restore b0 | ||
610 | ;; | ||
611 | st8 [r17]=r18 // store back updated PTE | ||
612 | itc.i r18 // install updated PTE | ||
613 | #endif /* !CONFIG_SMP */ | ||
614 | mov pr=r31,-1 | ||
615 | rfi | ||
616 | END(iaccess_bit) | ||
617 | |||
618 | .org ia64_ivt+0x2800 | ||
619 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
620 | // 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) | ||
621 | ENTRY(daccess_bit) | ||
622 | DBG_FAULT(10) | ||
623 | // Like Entry 8, except for data access | ||
624 | mov r16=cr.ifa // get the address that caused the fault | ||
625 | movl r30=1f // load continuation point in case of nested fault | ||
626 | ;; | ||
627 | thash r17=r16 // compute virtual address of L3 PTE | ||
628 | mov r31=pr | ||
629 | mov r29=b0 // save b0 in case of nested fault) | ||
630 | #ifdef CONFIG_SMP | ||
631 | mov r28=ar.ccv // save ar.ccv | ||
632 | ;; | ||
633 | 1: ld8 r18=[r17] | ||
634 | ;; // avoid RAW on r18 | ||
635 | mov ar.ccv=r18 // set compare value for cmpxchg | ||
636 | or r25=_PAGE_A,r18 // set the dirty bit | ||
637 | ;; | ||
638 | cmpxchg8.acq r26=[r17],r25,ar.ccv | ||
639 | mov r24=PAGE_SHIFT<<2 | ||
640 | ;; | ||
641 | cmp.eq p6,p7=r26,r18 | ||
642 | ;; | ||
643 | (p6) itc.d r25 // install updated PTE | ||
644 | /* | ||
645 | * Tell the assemblers dependency-violation checker that the above "itc" instructions | ||
646 | * cannot possibly affect the following loads: | ||
647 | */ | ||
648 | dv_serialize_data | ||
649 | ;; | ||
650 | ld8 r18=[r17] // read PTE again | ||
651 | ;; | ||
652 | cmp.eq p6,p7=r18,r25 // is it same as the newly installed | ||
653 | ;; | ||
654 | (p7) ptc.l r16,r24 | ||
655 | mov ar.ccv=r28 | ||
656 | #else | ||
657 | ;; | ||
658 | 1: ld8 r18=[r17] | ||
659 | ;; // avoid RAW on r18 | ||
660 | or r18=_PAGE_A,r18 // set the accessed bit | ||
661 | ;; | ||
662 | st8 [r17]=r18 // store back updated PTE | ||
663 | itc.d r18 // install updated PTE | ||
664 | #endif | ||
665 | mov b0=r29 // restore b0 | ||
666 | mov pr=r31,-1 | ||
667 | rfi | ||
668 | END(daccess_bit) | ||
669 | |||
670 | .org ia64_ivt+0x2c00 | ||
671 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
672 | // 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) | ||
673 | ENTRY(break_fault) | ||
674 | /* | ||
675 | * The streamlined system call entry/exit paths only save/restore the initial part | ||
676 | * of pt_regs. This implies that the callers of system-calls must adhere to the | ||
677 | * normal procedure calling conventions. | ||
678 | * | ||
679 | * Registers to be saved & restored: | ||
680 | * CR registers: cr.ipsr, cr.iip, cr.ifs | ||
681 | * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr | ||
682 | * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 | ||
683 | * Registers to be restored only: | ||
684 | * r8-r11: output value from the system call. | ||
685 | * | ||
686 | * During system call exit, scratch registers (including r15) are modified/cleared | ||
687 | * to prevent leaking bits from kernel to user level. | ||
688 | */ | ||
689 | DBG_FAULT(11) | ||
690 | mov r16=IA64_KR(CURRENT) // r16 = current task; 12 cycle read lat. | ||
691 | mov r17=cr.iim | ||
692 | mov r18=__IA64_BREAK_SYSCALL | ||
693 | mov r21=ar.fpsr | ||
694 | mov r29=cr.ipsr | ||
695 | mov r19=b6 | ||
696 | mov r25=ar.unat | ||
697 | mov r27=ar.rsc | ||
698 | mov r26=ar.pfs | ||
699 | mov r28=cr.iip | ||
700 | mov r31=pr // prepare to save predicates | ||
701 | mov r20=r1 | ||
702 | ;; | ||
703 | adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 | ||
704 | cmp.eq p0,p7=r18,r17 // is this a system call? (p7 <- false, if so) | ||
705 | (p7) br.cond.spnt non_syscall | ||
706 | ;; | ||
707 | ld1 r17=[r16] // load current->thread.on_ustack flag | ||
708 | st1 [r16]=r0 // clear current->thread.on_ustack flag | ||
709 | add r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // set r1 for MINSTATE_START_SAVE_MIN_VIRT | ||
710 | ;; | ||
711 | invala | ||
712 | |||
713 | /* adjust return address so we skip over the break instruction: */ | ||
714 | |||
715 | extr.u r8=r29,41,2 // extract ei field from cr.ipsr | ||
716 | ;; | ||
717 | cmp.eq p6,p7=2,r8 // isr.ei==2? | ||
718 | mov r2=r1 // setup r2 for ia64_syscall_setup | ||
719 | ;; | ||
720 | (p6) mov r8=0 // clear ei to 0 | ||
721 | (p6) adds r28=16,r28 // switch cr.iip to next bundle cr.ipsr.ei wrapped | ||
722 | (p7) adds r8=1,r8 // increment ei to next slot | ||
723 | ;; | ||
724 | cmp.eq pKStk,pUStk=r0,r17 // are we in kernel mode already? | ||
725 | dep r29=r8,r29,41,2 // insert new ei into cr.ipsr | ||
726 | ;; | ||
727 | |||
728 | // switch from user to kernel RBS: | ||
729 | MINSTATE_START_SAVE_MIN_VIRT | ||
730 | br.call.sptk.many b7=ia64_syscall_setup | ||
731 | ;; | ||
732 | MINSTATE_END_SAVE_MIN_VIRT // switch to bank 1 | ||
733 | ssm psr.ic | PSR_DEFAULT_BITS | ||
734 | ;; | ||
735 | srlz.i // guarantee that interruption collection is on | ||
736 | mov r3=NR_syscalls - 1 | ||
737 | ;; | ||
738 | (p15) ssm psr.i // restore psr.i | ||
739 | // p10==true means out registers are more than 8 or r15's Nat is true | ||
740 | (p10) br.cond.spnt.many ia64_ret_from_syscall | ||
741 | ;; | ||
742 | movl r16=sys_call_table | ||
743 | |||
744 | adds r15=-1024,r15 // r15 contains the syscall number---subtract 1024 | ||
745 | movl r2=ia64_ret_from_syscall | ||
746 | ;; | ||
747 | shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) | ||
748 | cmp.leu p6,p7=r15,r3 // (syscall > 0 && syscall < 1024 + NR_syscalls) ? | ||
749 | mov rp=r2 // set the real return addr | ||
750 | ;; | ||
751 | (p6) ld8 r20=[r20] // load address of syscall entry point | ||
752 | (p7) movl r20=sys_ni_syscall | ||
753 | |||
754 | add r2=TI_FLAGS+IA64_TASK_SIZE,r13 | ||
755 | ;; | ||
756 | ld4 r2=[r2] // r2 = current_thread_info()->flags | ||
757 | ;; | ||
758 | and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit | ||
759 | ;; | ||
760 | cmp.eq p8,p0=r2,r0 | ||
761 | mov b6=r20 | ||
762 | ;; | ||
763 | (p8) br.call.sptk.many b6=b6 // ignore this return addr | ||
764 | br.cond.sptk ia64_trace_syscall | ||
765 | // NOT REACHED | ||
766 | END(break_fault) | ||
767 | |||
768 | .org ia64_ivt+0x3000 | ||
769 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
770 | // 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) | ||
771 | ENTRY(interrupt) | ||
772 | DBG_FAULT(12) | ||
773 | mov r31=pr // prepare to save predicates | ||
774 | ;; | ||
775 | SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 | ||
776 | ssm psr.ic | PSR_DEFAULT_BITS | ||
777 | ;; | ||
778 | adds r3=8,r2 // set up second base pointer for SAVE_REST | ||
779 | srlz.i // ensure everybody knows psr.ic is back on | ||
780 | ;; | ||
781 | SAVE_REST | ||
782 | ;; | ||
783 | alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group | ||
784 | mov out0=cr.ivr // pass cr.ivr as first arg | ||
785 | add out1=16,sp // pass pointer to pt_regs as second arg | ||
786 | ;; | ||
787 | srlz.d // make sure we see the effect of cr.ivr | ||
788 | movl r14=ia64_leave_kernel | ||
789 | ;; | ||
790 | mov rp=r14 | ||
791 | br.call.sptk.many b6=ia64_handle_irq | ||
792 | END(interrupt) | ||
793 | |||
794 | .org ia64_ivt+0x3400 | ||
795 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
796 | // 0x3400 Entry 13 (size 64 bundles) Reserved | ||
797 | DBG_FAULT(13) | ||
798 | FAULT(13) | ||
799 | |||
800 | .org ia64_ivt+0x3800 | ||
801 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
802 | // 0x3800 Entry 14 (size 64 bundles) Reserved | ||
803 | DBG_FAULT(14) | ||
804 | FAULT(14) | ||
805 | |||
806 | /* | ||
807 | * There is no particular reason for this code to be here, other than that | ||
808 | * there happens to be space here that would go unused otherwise. If this | ||
809 | * fault ever gets "unreserved", simply moved the following code to a more | ||
810 | * suitable spot... | ||
811 | * | ||
812 | * ia64_syscall_setup() is a separate subroutine so that it can | ||
813 | * allocate stacked registers so it can safely demine any | ||
814 | * potential NaT values from the input registers. | ||
815 | * | ||
816 | * On entry: | ||
817 | * - executing on bank 0 or bank 1 register set (doesn't matter) | ||
818 | * - r1: stack pointer | ||
819 | * - r2: current task pointer | ||
820 | * - r3: preserved | ||
821 | * - r11: original contents (saved ar.pfs to be saved) | ||
822 | * - r12: original contents (sp to be saved) | ||
823 | * - r13: original contents (tp to be saved) | ||
824 | * - r15: original contents (syscall # to be saved) | ||
825 | * - r18: saved bsp (after switching to kernel stack) | ||
826 | * - r19: saved b6 | ||
827 | * - r20: saved r1 (gp) | ||
828 | * - r21: saved ar.fpsr | ||
829 | * - r22: kernel's register backing store base (krbs_base) | ||
830 | * - r23: saved ar.bspstore | ||
831 | * - r24: saved ar.rnat | ||
832 | * - r25: saved ar.unat | ||
833 | * - r26: saved ar.pfs | ||
834 | * - r27: saved ar.rsc | ||
835 | * - r28: saved cr.iip | ||
836 | * - r29: saved cr.ipsr | ||
837 | * - r31: saved pr | ||
838 | * - b0: original contents (to be saved) | ||
839 | * On exit: | ||
840 | * - executing on bank 1 registers | ||
841 | * - psr.ic enabled, interrupts restored | ||
842 | * - p10: TRUE if syscall is invoked with more than 8 out | ||
843 | * registers or r15's Nat is true | ||
844 | * - r1: kernel's gp | ||
845 | * - r3: preserved (same as on entry) | ||
846 | * - r8: -EINVAL if p10 is true | ||
847 | * - r12: points to kernel stack | ||
848 | * - r13: points to current task | ||
849 | * - p15: TRUE if interrupts need to be re-enabled | ||
850 | * - ar.fpsr: set to kernel settings | ||
851 | */ | ||
852 | GLOBAL_ENTRY(ia64_syscall_setup) | ||
853 | #if PT(B6) != 0 | ||
854 | # error This code assumes that b6 is the first field in pt_regs. | ||
855 | #endif | ||
856 | st8 [r1]=r19 // save b6 | ||
857 | add r16=PT(CR_IPSR),r1 // initialize first base pointer | ||
858 | add r17=PT(R11),r1 // initialize second base pointer | ||
859 | ;; | ||
860 | alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable | ||
861 | st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr | ||
862 | tnat.nz p8,p0=in0 | ||
863 | |||
864 | st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 | ||
865 | tnat.nz p9,p0=in1 | ||
866 | (pKStk) mov r18=r0 // make sure r18 isn't NaT | ||
867 | ;; | ||
868 | |||
869 | st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs | ||
870 | st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip | ||
871 | mov r28=b0 // save b0 (2 cyc) | ||
872 | ;; | ||
873 | |||
874 | st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat | ||
875 | dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] | ||
876 | (p8) mov in0=-1 | ||
877 | ;; | ||
878 | |||
879 | st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs | ||
880 | extr.u r11=r19,7,7 // I0 // get sol of ar.pfs | ||
881 | and r8=0x7f,r19 // A // get sof of ar.pfs | ||
882 | |||
883 | st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc | ||
884 | tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 | ||
885 | (p9) mov in1=-1 | ||
886 | ;; | ||
887 | |||
888 | (pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 | ||
889 | tnat.nz p10,p0=in2 | ||
890 | add r11=8,r11 | ||
891 | ;; | ||
892 | (pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field | ||
893 | (pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field | ||
894 | tnat.nz p11,p0=in3 | ||
895 | ;; | ||
896 | (p10) mov in2=-1 | ||
897 | tnat.nz p12,p0=in4 // [I0] | ||
898 | (p11) mov in3=-1 | ||
899 | ;; | ||
900 | (pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat | ||
901 | (pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore | ||
902 | shl r18=r18,16 // compute ar.rsc to be used for "loadrs" | ||
903 | ;; | ||
904 | st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates | ||
905 | st8 [r17]=r28,PT(R1)-PT(B0) // save b0 | ||
906 | tnat.nz p13,p0=in5 // [I0] | ||
907 | ;; | ||
908 | st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" | ||
909 | st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 | ||
910 | (p12) mov in4=-1 | ||
911 | ;; | ||
912 | |||
913 | .mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 | ||
914 | .mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 | ||
915 | (p13) mov in5=-1 | ||
916 | ;; | ||
917 | st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr | ||
918 | tnat.nz p14,p0=in6 | ||
919 | cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 | ||
920 | ;; | ||
921 | stf8 [r16]=f1 // ensure pt_regs.r8 != 0 (see handle_syscall_error) | ||
922 | (p9) tnat.nz p10,p0=r15 | ||
923 | adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) | ||
924 | |||
925 | st8.spill [r17]=r15 // save r15 | ||
926 | tnat.nz p8,p0=in7 | ||
927 | nop.i 0 | ||
928 | |||
929 | mov r13=r2 // establish `current' | ||
930 | movl r1=__gp // establish kernel global pointer | ||
931 | ;; | ||
932 | (p14) mov in6=-1 | ||
933 | (p8) mov in7=-1 | ||
934 | nop.i 0 | ||
935 | |||
936 | cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 | ||
937 | movl r17=FPSR_DEFAULT | ||
938 | ;; | ||
939 | mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value | ||
940 | (p10) mov r8=-EINVAL | ||
941 | br.ret.sptk.many b7 | ||
942 | END(ia64_syscall_setup) | ||
943 | |||
944 | .org ia64_ivt+0x3c00 | ||
945 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
946 | // 0x3c00 Entry 15 (size 64 bundles) Reserved | ||
947 | DBG_FAULT(15) | ||
948 | FAULT(15) | ||
949 | |||
950 | /* | ||
951 | * Squatting in this space ... | ||
952 | * | ||
953 | * This special case dispatcher for illegal operation faults allows preserved | ||
954 | * registers to be modified through a callback function (asm only) that is handed | ||
955 | * back from the fault handler in r8. Up to three arguments can be passed to the | ||
956 | * callback function by returning an aggregate with the callback as its first | ||
957 | * element, followed by the arguments. | ||
958 | */ | ||
959 | ENTRY(dispatch_illegal_op_fault) | ||
960 | .prologue | ||
961 | .body | ||
962 | SAVE_MIN_WITH_COVER | ||
963 | ssm psr.ic | PSR_DEFAULT_BITS | ||
964 | ;; | ||
965 | srlz.i // guarantee that interruption collection is on | ||
966 | ;; | ||
967 | (p15) ssm psr.i // restore psr.i | ||
968 | adds r3=8,r2 // set up second base pointer for SAVE_REST | ||
969 | ;; | ||
970 | alloc r14=ar.pfs,0,0,1,0 // must be first in insn group | ||
971 | mov out0=ar.ec | ||
972 | ;; | ||
973 | SAVE_REST | ||
974 | PT_REGS_UNWIND_INFO(0) | ||
975 | ;; | ||
976 | br.call.sptk.many rp=ia64_illegal_op_fault | ||
977 | .ret0: ;; | ||
978 | alloc r14=ar.pfs,0,0,3,0 // must be first in insn group | ||
979 | mov out0=r9 | ||
980 | mov out1=r10 | ||
981 | mov out2=r11 | ||
982 | movl r15=ia64_leave_kernel | ||
983 | ;; | ||
984 | mov rp=r15 | ||
985 | mov b6=r8 | ||
986 | ;; | ||
987 | cmp.ne p6,p0=0,r8 | ||
988 | (p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel | ||
989 | br.sptk.many ia64_leave_kernel | ||
990 | END(dispatch_illegal_op_fault) | ||
991 | |||
992 | .org ia64_ivt+0x4000 | ||
993 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
994 | // 0x4000 Entry 16 (size 64 bundles) Reserved | ||
995 | DBG_FAULT(16) | ||
996 | FAULT(16) | ||
997 | |||
998 | .org ia64_ivt+0x4400 | ||
999 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1000 | // 0x4400 Entry 17 (size 64 bundles) Reserved | ||
1001 | DBG_FAULT(17) | ||
1002 | FAULT(17) | ||
1003 | |||
1004 | ENTRY(non_syscall) | ||
1005 | SAVE_MIN_WITH_COVER | ||
1006 | |||
1007 | // There is no particular reason for this code to be here, other than that | ||
1008 | // there happens to be space here that would go unused otherwise. If this | ||
1009 | // fault ever gets "unreserved", simply moved the following code to a more | ||
1010 | // suitable spot... | ||
1011 | |||
1012 | alloc r14=ar.pfs,0,0,2,0 | ||
1013 | mov out0=cr.iim | ||
1014 | add out1=16,sp | ||
1015 | adds r3=8,r2 // set up second base pointer for SAVE_REST | ||
1016 | |||
1017 | ssm psr.ic | PSR_DEFAULT_BITS | ||
1018 | ;; | ||
1019 | srlz.i // guarantee that interruption collection is on | ||
1020 | ;; | ||
1021 | (p15) ssm psr.i // restore psr.i | ||
1022 | movl r15=ia64_leave_kernel | ||
1023 | ;; | ||
1024 | SAVE_REST | ||
1025 | mov rp=r15 | ||
1026 | ;; | ||
1027 | br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr | ||
1028 | END(non_syscall) | ||
1029 | |||
1030 | .org ia64_ivt+0x4800 | ||
1031 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1032 | // 0x4800 Entry 18 (size 64 bundles) Reserved | ||
1033 | DBG_FAULT(18) | ||
1034 | FAULT(18) | ||
1035 | |||
1036 | /* | ||
1037 | * There is no particular reason for this code to be here, other than that | ||
1038 | * there happens to be space here that would go unused otherwise. If this | ||
1039 | * fault ever gets "unreserved", simply moved the following code to a more | ||
1040 | * suitable spot... | ||
1041 | */ | ||
1042 | |||
1043 | ENTRY(dispatch_unaligned_handler) | ||
1044 | SAVE_MIN_WITH_COVER | ||
1045 | ;; | ||
1046 | alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) | ||
1047 | mov out0=cr.ifa | ||
1048 | adds out1=16,sp | ||
1049 | |||
1050 | ssm psr.ic | PSR_DEFAULT_BITS | ||
1051 | ;; | ||
1052 | srlz.i // guarantee that interruption collection is on | ||
1053 | ;; | ||
1054 | (p15) ssm psr.i // restore psr.i | ||
1055 | adds r3=8,r2 // set up second base pointer | ||
1056 | ;; | ||
1057 | SAVE_REST | ||
1058 | movl r14=ia64_leave_kernel | ||
1059 | ;; | ||
1060 | mov rp=r14 | ||
1061 | br.sptk.many ia64_prepare_handle_unaligned | ||
1062 | END(dispatch_unaligned_handler) | ||
1063 | |||
1064 | .org ia64_ivt+0x4c00 | ||
1065 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1066 | // 0x4c00 Entry 19 (size 64 bundles) Reserved | ||
1067 | DBG_FAULT(19) | ||
1068 | FAULT(19) | ||
1069 | |||
1070 | /* | ||
1071 | * There is no particular reason for this code to be here, other than that | ||
1072 | * there happens to be space here that would go unused otherwise. If this | ||
1073 | * fault ever gets "unreserved", simply moved the following code to a more | ||
1074 | * suitable spot... | ||
1075 | */ | ||
1076 | |||
1077 | ENTRY(dispatch_to_fault_handler) | ||
1078 | /* | ||
1079 | * Input: | ||
1080 | * psr.ic: off | ||
1081 | * r19: fault vector number (e.g., 24 for General Exception) | ||
1082 | * r31: contains saved predicates (pr) | ||
1083 | */ | ||
1084 | SAVE_MIN_WITH_COVER_R19 | ||
1085 | alloc r14=ar.pfs,0,0,5,0 | ||
1086 | mov out0=r15 | ||
1087 | mov out1=cr.isr | ||
1088 | mov out2=cr.ifa | ||
1089 | mov out3=cr.iim | ||
1090 | mov out4=cr.itir | ||
1091 | ;; | ||
1092 | ssm psr.ic | PSR_DEFAULT_BITS | ||
1093 | ;; | ||
1094 | srlz.i // guarantee that interruption collection is on | ||
1095 | ;; | ||
1096 | (p15) ssm psr.i // restore psr.i | ||
1097 | adds r3=8,r2 // set up second base pointer for SAVE_REST | ||
1098 | ;; | ||
1099 | SAVE_REST | ||
1100 | movl r14=ia64_leave_kernel | ||
1101 | ;; | ||
1102 | mov rp=r14 | ||
1103 | br.call.sptk.many b6=ia64_fault | ||
1104 | END(dispatch_to_fault_handler) | ||
1105 | |||
1106 | // | ||
1107 | // --- End of long entries, Beginning of short entries | ||
1108 | // | ||
1109 | |||
1110 | .org ia64_ivt+0x5000 | ||
1111 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1112 | // 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) | ||
1113 | ENTRY(page_not_present) | ||
1114 | DBG_FAULT(20) | ||
1115 | mov r16=cr.ifa | ||
1116 | rsm psr.dt | ||
1117 | /* | ||
1118 | * The Linux page fault handler doesn't expect non-present pages to be in | ||
1119 | * the TLB. Flush the existing entry now, so we meet that expectation. | ||
1120 | */ | ||
1121 | mov r17=PAGE_SHIFT<<2 | ||
1122 | ;; | ||
1123 | ptc.l r16,r17 | ||
1124 | ;; | ||
1125 | mov r31=pr | ||
1126 | srlz.d | ||
1127 | br.sptk.many page_fault | ||
1128 | END(page_not_present) | ||
1129 | |||
1130 | .org ia64_ivt+0x5100 | ||
1131 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1132 | // 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) | ||
1133 | ENTRY(key_permission) | ||
1134 | DBG_FAULT(21) | ||
1135 | mov r16=cr.ifa | ||
1136 | rsm psr.dt | ||
1137 | mov r31=pr | ||
1138 | ;; | ||
1139 | srlz.d | ||
1140 | br.sptk.many page_fault | ||
1141 | END(key_permission) | ||
1142 | |||
1143 | .org ia64_ivt+0x5200 | ||
1144 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1145 | // 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) | ||
1146 | ENTRY(iaccess_rights) | ||
1147 | DBG_FAULT(22) | ||
1148 | mov r16=cr.ifa | ||
1149 | rsm psr.dt | ||
1150 | mov r31=pr | ||
1151 | ;; | ||
1152 | srlz.d | ||
1153 | br.sptk.many page_fault | ||
1154 | END(iaccess_rights) | ||
1155 | |||
1156 | .org ia64_ivt+0x5300 | ||
1157 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1158 | // 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) | ||
1159 | ENTRY(daccess_rights) | ||
1160 | DBG_FAULT(23) | ||
1161 | mov r16=cr.ifa | ||
1162 | rsm psr.dt | ||
1163 | mov r31=pr | ||
1164 | ;; | ||
1165 | srlz.d | ||
1166 | br.sptk.many page_fault | ||
1167 | END(daccess_rights) | ||
1168 | |||
1169 | .org ia64_ivt+0x5400 | ||
1170 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1171 | // 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) | ||
1172 | ENTRY(general_exception) | ||
1173 | DBG_FAULT(24) | ||
1174 | mov r16=cr.isr | ||
1175 | mov r31=pr | ||
1176 | ;; | ||
1177 | cmp4.eq p6,p0=0,r16 | ||
1178 | (p6) br.sptk.many dispatch_illegal_op_fault | ||
1179 | ;; | ||
1180 | mov r19=24 // fault number | ||
1181 | br.sptk.many dispatch_to_fault_handler | ||
1182 | END(general_exception) | ||
1183 | |||
1184 | .org ia64_ivt+0x5500 | ||
1185 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1186 | // 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) | ||
1187 | ENTRY(disabled_fp_reg) | ||
1188 | DBG_FAULT(25) | ||
1189 | rsm psr.dfh // ensure we can access fph | ||
1190 | ;; | ||
1191 | srlz.d | ||
1192 | mov r31=pr | ||
1193 | mov r19=25 | ||
1194 | br.sptk.many dispatch_to_fault_handler | ||
1195 | END(disabled_fp_reg) | ||
1196 | |||
1197 | .org ia64_ivt+0x5600 | ||
1198 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1199 | // 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) | ||
1200 | ENTRY(nat_consumption) | ||
1201 | DBG_FAULT(26) | ||
1202 | FAULT(26) | ||
1203 | END(nat_consumption) | ||
1204 | |||
1205 | .org ia64_ivt+0x5700 | ||
1206 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1207 | // 0x5700 Entry 27 (size 16 bundles) Speculation (40) | ||
1208 | ENTRY(speculation_vector) | ||
1209 | DBG_FAULT(27) | ||
1210 | /* | ||
1211 | * A [f]chk.[as] instruction needs to take the branch to the recovery code but | ||
1212 | * this part of the architecture is not implemented in hardware on some CPUs, such | ||
1213 | * as Itanium. Thus, in general we need to emulate the behavior. IIM contains | ||
1214 | * the relative target (not yet sign extended). So after sign extending it we | ||
1215 | * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, | ||
1216 | * i.e., the slot to restart into. | ||
1217 | * | ||
1218 | * cr.imm contains zero_ext(imm21) | ||
1219 | */ | ||
1220 | mov r18=cr.iim | ||
1221 | ;; | ||
1222 | mov r17=cr.iip | ||
1223 | shl r18=r18,43 // put sign bit in position (43=64-21) | ||
1224 | ;; | ||
1225 | |||
1226 | mov r16=cr.ipsr | ||
1227 | shr r18=r18,39 // sign extend (39=43-4) | ||
1228 | ;; | ||
1229 | |||
1230 | add r17=r17,r18 // now add the offset | ||
1231 | ;; | ||
1232 | mov cr.iip=r17 | ||
1233 | dep r16=0,r16,41,2 // clear EI | ||
1234 | ;; | ||
1235 | |||
1236 | mov cr.ipsr=r16 | ||
1237 | ;; | ||
1238 | |||
1239 | rfi // and go back | ||
1240 | END(speculation_vector) | ||
1241 | |||
1242 | .org ia64_ivt+0x5800 | ||
1243 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1244 | // 0x5800 Entry 28 (size 16 bundles) Reserved | ||
1245 | DBG_FAULT(28) | ||
1246 | FAULT(28) | ||
1247 | |||
1248 | .org ia64_ivt+0x5900 | ||
1249 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1250 | // 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) | ||
1251 | ENTRY(debug_vector) | ||
1252 | DBG_FAULT(29) | ||
1253 | FAULT(29) | ||
1254 | END(debug_vector) | ||
1255 | |||
1256 | .org ia64_ivt+0x5a00 | ||
1257 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1258 | // 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) | ||
1259 | ENTRY(unaligned_access) | ||
1260 | DBG_FAULT(30) | ||
1261 | mov r16=cr.ipsr | ||
1262 | mov r31=pr // prepare to save predicates | ||
1263 | ;; | ||
1264 | br.sptk.many dispatch_unaligned_handler | ||
1265 | END(unaligned_access) | ||
1266 | |||
1267 | .org ia64_ivt+0x5b00 | ||
1268 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1269 | // 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) | ||
1270 | ENTRY(unsupported_data_reference) | ||
1271 | DBG_FAULT(31) | ||
1272 | FAULT(31) | ||
1273 | END(unsupported_data_reference) | ||
1274 | |||
1275 | .org ia64_ivt+0x5c00 | ||
1276 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1277 | // 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) | ||
1278 | ENTRY(floating_point_fault) | ||
1279 | DBG_FAULT(32) | ||
1280 | FAULT(32) | ||
1281 | END(floating_point_fault) | ||
1282 | |||
1283 | .org ia64_ivt+0x5d00 | ||
1284 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1285 | // 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) | ||
1286 | ENTRY(floating_point_trap) | ||
1287 | DBG_FAULT(33) | ||
1288 | FAULT(33) | ||
1289 | END(floating_point_trap) | ||
1290 | |||
1291 | .org ia64_ivt+0x5e00 | ||
1292 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1293 | // 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) | ||
1294 | ENTRY(lower_privilege_trap) | ||
1295 | DBG_FAULT(34) | ||
1296 | FAULT(34) | ||
1297 | END(lower_privilege_trap) | ||
1298 | |||
1299 | .org ia64_ivt+0x5f00 | ||
1300 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1301 | // 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) | ||
1302 | ENTRY(taken_branch_trap) | ||
1303 | DBG_FAULT(35) | ||
1304 | FAULT(35) | ||
1305 | END(taken_branch_trap) | ||
1306 | |||
1307 | .org ia64_ivt+0x6000 | ||
1308 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1309 | // 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) | ||
1310 | ENTRY(single_step_trap) | ||
1311 | DBG_FAULT(36) | ||
1312 | FAULT(36) | ||
1313 | END(single_step_trap) | ||
1314 | |||
1315 | .org ia64_ivt+0x6100 | ||
1316 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1317 | // 0x6100 Entry 37 (size 16 bundles) Reserved | ||
1318 | DBG_FAULT(37) | ||
1319 | FAULT(37) | ||
1320 | |||
1321 | .org ia64_ivt+0x6200 | ||
1322 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1323 | // 0x6200 Entry 38 (size 16 bundles) Reserved | ||
1324 | DBG_FAULT(38) | ||
1325 | FAULT(38) | ||
1326 | |||
1327 | .org ia64_ivt+0x6300 | ||
1328 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1329 | // 0x6300 Entry 39 (size 16 bundles) Reserved | ||
1330 | DBG_FAULT(39) | ||
1331 | FAULT(39) | ||
1332 | |||
1333 | .org ia64_ivt+0x6400 | ||
1334 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1335 | // 0x6400 Entry 40 (size 16 bundles) Reserved | ||
1336 | DBG_FAULT(40) | ||
1337 | FAULT(40) | ||
1338 | |||
1339 | .org ia64_ivt+0x6500 | ||
1340 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1341 | // 0x6500 Entry 41 (size 16 bundles) Reserved | ||
1342 | DBG_FAULT(41) | ||
1343 | FAULT(41) | ||
1344 | |||
1345 | .org ia64_ivt+0x6600 | ||
1346 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1347 | // 0x6600 Entry 42 (size 16 bundles) Reserved | ||
1348 | DBG_FAULT(42) | ||
1349 | FAULT(42) | ||
1350 | |||
1351 | .org ia64_ivt+0x6700 | ||
1352 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1353 | // 0x6700 Entry 43 (size 16 bundles) Reserved | ||
1354 | DBG_FAULT(43) | ||
1355 | FAULT(43) | ||
1356 | |||
1357 | .org ia64_ivt+0x6800 | ||
1358 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1359 | // 0x6800 Entry 44 (size 16 bundles) Reserved | ||
1360 | DBG_FAULT(44) | ||
1361 | FAULT(44) | ||
1362 | |||
1363 | .org ia64_ivt+0x6900 | ||
1364 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1365 | // 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) | ||
1366 | ENTRY(ia32_exception) | ||
1367 | DBG_FAULT(45) | ||
1368 | FAULT(45) | ||
1369 | END(ia32_exception) | ||
1370 | |||
1371 | .org ia64_ivt+0x6a00 | ||
1372 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1373 | // 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) | ||
1374 | ENTRY(ia32_intercept) | ||
1375 | DBG_FAULT(46) | ||
1376 | #ifdef CONFIG_IA32_SUPPORT | ||
1377 | mov r31=pr | ||
1378 | mov r16=cr.isr | ||
1379 | ;; | ||
1380 | extr.u r17=r16,16,8 // get ISR.code | ||
1381 | mov r18=ar.eflag | ||
1382 | mov r19=cr.iim // old eflag value | ||
1383 | ;; | ||
1384 | cmp.ne p6,p0=2,r17 | ||
1385 | (p6) br.cond.spnt 1f // not a system flag fault | ||
1386 | xor r16=r18,r19 | ||
1387 | ;; | ||
1388 | extr.u r17=r16,18,1 // get the eflags.ac bit | ||
1389 | ;; | ||
1390 | cmp.eq p6,p0=0,r17 | ||
1391 | (p6) br.cond.spnt 1f // eflags.ac bit didn't change | ||
1392 | ;; | ||
1393 | mov pr=r31,-1 // restore predicate registers | ||
1394 | rfi | ||
1395 | |||
1396 | 1: | ||
1397 | #endif // CONFIG_IA32_SUPPORT | ||
1398 | FAULT(46) | ||
1399 | END(ia32_intercept) | ||
1400 | |||
1401 | .org ia64_ivt+0x6b00 | ||
1402 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1403 | // 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) | ||
1404 | ENTRY(ia32_interrupt) | ||
1405 | DBG_FAULT(47) | ||
1406 | #ifdef CONFIG_IA32_SUPPORT | ||
1407 | mov r31=pr | ||
1408 | br.sptk.many dispatch_to_ia32_handler | ||
1409 | #else | ||
1410 | FAULT(47) | ||
1411 | #endif | ||
1412 | END(ia32_interrupt) | ||
1413 | |||
1414 | .org ia64_ivt+0x6c00 | ||
1415 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1416 | // 0x6c00 Entry 48 (size 16 bundles) Reserved | ||
1417 | DBG_FAULT(48) | ||
1418 | FAULT(48) | ||
1419 | |||
1420 | .org ia64_ivt+0x6d00 | ||
1421 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1422 | // 0x6d00 Entry 49 (size 16 bundles) Reserved | ||
1423 | DBG_FAULT(49) | ||
1424 | FAULT(49) | ||
1425 | |||
1426 | .org ia64_ivt+0x6e00 | ||
1427 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1428 | // 0x6e00 Entry 50 (size 16 bundles) Reserved | ||
1429 | DBG_FAULT(50) | ||
1430 | FAULT(50) | ||
1431 | |||
1432 | .org ia64_ivt+0x6f00 | ||
1433 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1434 | // 0x6f00 Entry 51 (size 16 bundles) Reserved | ||
1435 | DBG_FAULT(51) | ||
1436 | FAULT(51) | ||
1437 | |||
1438 | .org ia64_ivt+0x7000 | ||
1439 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1440 | // 0x7000 Entry 52 (size 16 bundles) Reserved | ||
1441 | DBG_FAULT(52) | ||
1442 | FAULT(52) | ||
1443 | |||
1444 | .org ia64_ivt+0x7100 | ||
1445 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1446 | // 0x7100 Entry 53 (size 16 bundles) Reserved | ||
1447 | DBG_FAULT(53) | ||
1448 | FAULT(53) | ||
1449 | |||
1450 | .org ia64_ivt+0x7200 | ||
1451 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1452 | // 0x7200 Entry 54 (size 16 bundles) Reserved | ||
1453 | DBG_FAULT(54) | ||
1454 | FAULT(54) | ||
1455 | |||
1456 | .org ia64_ivt+0x7300 | ||
1457 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1458 | // 0x7300 Entry 55 (size 16 bundles) Reserved | ||
1459 | DBG_FAULT(55) | ||
1460 | FAULT(55) | ||
1461 | |||
1462 | .org ia64_ivt+0x7400 | ||
1463 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1464 | // 0x7400 Entry 56 (size 16 bundles) Reserved | ||
1465 | DBG_FAULT(56) | ||
1466 | FAULT(56) | ||
1467 | |||
1468 | .org ia64_ivt+0x7500 | ||
1469 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1470 | // 0x7500 Entry 57 (size 16 bundles) Reserved | ||
1471 | DBG_FAULT(57) | ||
1472 | FAULT(57) | ||
1473 | |||
1474 | .org ia64_ivt+0x7600 | ||
1475 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1476 | // 0x7600 Entry 58 (size 16 bundles) Reserved | ||
1477 | DBG_FAULT(58) | ||
1478 | FAULT(58) | ||
1479 | |||
1480 | .org ia64_ivt+0x7700 | ||
1481 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1482 | // 0x7700 Entry 59 (size 16 bundles) Reserved | ||
1483 | DBG_FAULT(59) | ||
1484 | FAULT(59) | ||
1485 | |||
1486 | .org ia64_ivt+0x7800 | ||
1487 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1488 | // 0x7800 Entry 60 (size 16 bundles) Reserved | ||
1489 | DBG_FAULT(60) | ||
1490 | FAULT(60) | ||
1491 | |||
1492 | .org ia64_ivt+0x7900 | ||
1493 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1494 | // 0x7900 Entry 61 (size 16 bundles) Reserved | ||
1495 | DBG_FAULT(61) | ||
1496 | FAULT(61) | ||
1497 | |||
1498 | .org ia64_ivt+0x7a00 | ||
1499 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1500 | // 0x7a00 Entry 62 (size 16 bundles) Reserved | ||
1501 | DBG_FAULT(62) | ||
1502 | FAULT(62) | ||
1503 | |||
1504 | .org ia64_ivt+0x7b00 | ||
1505 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1506 | // 0x7b00 Entry 63 (size 16 bundles) Reserved | ||
1507 | DBG_FAULT(63) | ||
1508 | FAULT(63) | ||
1509 | |||
1510 | .org ia64_ivt+0x7c00 | ||
1511 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1512 | // 0x7c00 Entry 64 (size 16 bundles) Reserved | ||
1513 | DBG_FAULT(64) | ||
1514 | FAULT(64) | ||
1515 | |||
1516 | .org ia64_ivt+0x7d00 | ||
1517 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1518 | // 0x7d00 Entry 65 (size 16 bundles) Reserved | ||
1519 | DBG_FAULT(65) | ||
1520 | FAULT(65) | ||
1521 | |||
1522 | .org ia64_ivt+0x7e00 | ||
1523 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1524 | // 0x7e00 Entry 66 (size 16 bundles) Reserved | ||
1525 | DBG_FAULT(66) | ||
1526 | FAULT(66) | ||
1527 | |||
1528 | .org ia64_ivt+0x7f00 | ||
1529 | ///////////////////////////////////////////////////////////////////////////////////////// | ||
1530 | // 0x7f00 Entry 67 (size 16 bundles) Reserved | ||
1531 | DBG_FAULT(67) | ||
1532 | FAULT(67) | ||
1533 | |||
1534 | #ifdef CONFIG_IA32_SUPPORT | ||
1535 | |||
1536 | /* | ||
1537 | * There is no particular reason for this code to be here, other than that | ||
1538 | * there happens to be space here that would go unused otherwise. If this | ||
1539 | * fault ever gets "unreserved", simply moved the following code to a more | ||
1540 | * suitable spot... | ||
1541 | */ | ||
1542 | |||
1543 | // IA32 interrupt entry point | ||
1544 | |||
1545 | ENTRY(dispatch_to_ia32_handler) | ||
1546 | SAVE_MIN | ||
1547 | ;; | ||
1548 | mov r14=cr.isr | ||
1549 | ssm psr.ic | PSR_DEFAULT_BITS | ||
1550 | ;; | ||
1551 | srlz.i // guarantee that interruption collection is on | ||
1552 | ;; | ||
1553 | (p15) ssm psr.i | ||
1554 | adds r3=8,r2 // Base pointer for SAVE_REST | ||
1555 | ;; | ||
1556 | SAVE_REST | ||
1557 | ;; | ||
1558 | mov r15=0x80 | ||
1559 | shr r14=r14,16 // Get interrupt number | ||
1560 | ;; | ||
1561 | cmp.ne p6,p0=r14,r15 | ||
1562 | (p6) br.call.dpnt.many b6=non_ia32_syscall | ||
1563 | |||
1564 | adds r14=IA64_PT_REGS_R8_OFFSET + 16,sp // 16 byte hole per SW conventions | ||
1565 | adds r15=IA64_PT_REGS_R1_OFFSET + 16,sp | ||
1566 | ;; | ||
1567 | cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 | ||
1568 | ld8 r8=[r14] // get r8 | ||
1569 | ;; | ||
1570 | st8 [r15]=r8 // save original EAX in r1 (IA32 procs don't use the GP) | ||
1571 | ;; | ||
1572 | alloc r15=ar.pfs,0,0,6,0 // must first in an insn group | ||
1573 | ;; | ||
1574 | ld4 r8=[r14],8 // r8 == eax (syscall number) | ||
1575 | mov r15=IA32_NR_syscalls | ||
1576 | ;; | ||
1577 | cmp.ltu.unc p6,p7=r8,r15 | ||
1578 | ld4 out1=[r14],8 // r9 == ecx | ||
1579 | ;; | ||
1580 | ld4 out2=[r14],8 // r10 == edx | ||
1581 | ;; | ||
1582 | ld4 out0=[r14] // r11 == ebx | ||
1583 | adds r14=(IA64_PT_REGS_R13_OFFSET) + 16,sp | ||
1584 | ;; | ||
1585 | ld4 out5=[r14],PT(R14)-PT(R13) // r13 == ebp | ||
1586 | ;; | ||
1587 | ld4 out3=[r14],PT(R15)-PT(R14) // r14 == esi | ||
1588 | adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 | ||
1589 | ;; | ||
1590 | ld4 out4=[r14] // r15 == edi | ||
1591 | movl r16=ia32_syscall_table | ||
1592 | ;; | ||
1593 | (p6) shladd r16=r8,3,r16 // force ni_syscall if not valid syscall number | ||
1594 | ld4 r2=[r2] // r2 = current_thread_info()->flags | ||
1595 | ;; | ||
1596 | ld8 r16=[r16] | ||
1597 | and r2=_TIF_SYSCALL_TRACEAUDIT,r2 // mask trace or audit | ||
1598 | ;; | ||
1599 | mov b6=r16 | ||
1600 | movl r15=ia32_ret_from_syscall | ||
1601 | cmp.eq p8,p0=r2,r0 | ||
1602 | ;; | ||
1603 | mov rp=r15 | ||
1604 | (p8) br.call.sptk.many b6=b6 | ||
1605 | br.cond.sptk ia32_trace_syscall | ||
1606 | |||
1607 | non_ia32_syscall: | ||
1608 | alloc r15=ar.pfs,0,0,2,0 | ||
1609 | mov out0=r14 // interrupt # | ||
1610 | add out1=16,sp // pointer to pt_regs | ||
1611 | ;; // avoid WAW on CFM | ||
1612 | br.call.sptk.many rp=ia32_bad_interrupt | ||
1613 | .ret1: movl r15=ia64_leave_kernel | ||
1614 | ;; | ||
1615 | mov rp=r15 | ||
1616 | br.ret.sptk.many rp | ||
1617 | END(dispatch_to_ia32_handler) | ||
1618 | |||
1619 | #endif /* CONFIG_IA32_SUPPORT */ | ||
diff --git a/arch/ia64/kernel/machvec.c b/arch/ia64/kernel/machvec.c new file mode 100644 index 000000000000..c3a04ee7f4f6 --- /dev/null +++ b/arch/ia64/kernel/machvec.c | |||
@@ -0,0 +1,70 @@ | |||
1 | #include <linux/config.h> | ||
2 | #include <linux/module.h> | ||
3 | |||
4 | #include <asm/machvec.h> | ||
5 | #include <asm/system.h> | ||
6 | |||
7 | #ifdef CONFIG_IA64_GENERIC | ||
8 | |||
9 | #include <linux/kernel.h> | ||
10 | #include <linux/string.h> | ||
11 | |||
12 | #include <asm/page.h> | ||
13 | |||
14 | struct ia64_machine_vector ia64_mv; | ||
15 | EXPORT_SYMBOL(ia64_mv); | ||
16 | |||
17 | static struct ia64_machine_vector * | ||
18 | lookup_machvec (const char *name) | ||
19 | { | ||
20 | extern struct ia64_machine_vector machvec_start[]; | ||
21 | extern struct ia64_machine_vector machvec_end[]; | ||
22 | struct ia64_machine_vector *mv; | ||
23 | |||
24 | for (mv = machvec_start; mv < machvec_end; ++mv) | ||
25 | if (strcmp (mv->name, name) == 0) | ||
26 | return mv; | ||
27 | |||
28 | return 0; | ||
29 | } | ||
30 | |||
31 | void | ||
32 | machvec_init (const char *name) | ||
33 | { | ||
34 | struct ia64_machine_vector *mv; | ||
35 | |||
36 | mv = lookup_machvec(name); | ||
37 | if (!mv) { | ||
38 | panic("generic kernel failed to find machine vector for platform %s!", name); | ||
39 | } | ||
40 | ia64_mv = *mv; | ||
41 | printk(KERN_INFO "booting generic kernel on platform %s\n", name); | ||
42 | } | ||
43 | |||
44 | #endif /* CONFIG_IA64_GENERIC */ | ||
45 | |||
46 | void | ||
47 | machvec_setup (char **arg) | ||
48 | { | ||
49 | } | ||
50 | EXPORT_SYMBOL(machvec_setup); | ||
51 | |||
52 | void | ||
53 | machvec_timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) | ||
54 | { | ||
55 | } | ||
56 | EXPORT_SYMBOL(machvec_timer_interrupt); | ||
57 | |||
58 | void | ||
59 | machvec_dma_sync_single (struct device *hwdev, dma_addr_t dma_handle, size_t size, int dir) | ||
60 | { | ||
61 | mb(); | ||
62 | } | ||
63 | EXPORT_SYMBOL(machvec_dma_sync_single); | ||
64 | |||
65 | void | ||
66 | machvec_dma_sync_sg (struct device *hwdev, struct scatterlist *sg, int n, int dir) | ||
67 | { | ||
68 | mb(); | ||
69 | } | ||
70 | EXPORT_SYMBOL(machvec_dma_sync_sg); | ||
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c new file mode 100644 index 000000000000..4d6c7b8f667b --- /dev/null +++ b/arch/ia64/kernel/mca.c | |||
@@ -0,0 +1,1470 @@ | |||
1 | /* | ||
2 | * File: mca.c | ||
3 | * Purpose: Generic MCA handling layer | ||
4 | * | ||
5 | * Updated for latest kernel | ||
6 | * Copyright (C) 2003 Hewlett-Packard Co | ||
7 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
8 | * | ||
9 | * Copyright (C) 2002 Dell Inc. | ||
10 | * Copyright (C) Matt Domsch (Matt_Domsch@dell.com) | ||
11 | * | ||
12 | * Copyright (C) 2002 Intel | ||
13 | * Copyright (C) Jenna Hall (jenna.s.hall@intel.com) | ||
14 | * | ||
15 | * Copyright (C) 2001 Intel | ||
16 | * Copyright (C) Fred Lewis (frederick.v.lewis@intel.com) | ||
17 | * | ||
18 | * Copyright (C) 2000 Intel | ||
19 | * Copyright (C) Chuck Fleckenstein (cfleck@co.intel.com) | ||
20 | * | ||
21 | * Copyright (C) 1999, 2004 Silicon Graphics, Inc. | ||
22 | * Copyright (C) Vijay Chander(vijay@engr.sgi.com) | ||
23 | * | ||
24 | * 03/04/15 D. Mosberger Added INIT backtrace support. | ||
25 | * 02/03/25 M. Domsch GUID cleanups | ||
26 | * | ||
27 | * 02/01/04 J. Hall Aligned MCA stack to 16 bytes, added platform vs. CPU | ||
28 | * error flag, set SAL default return values, changed | ||
29 | * error record structure to linked list, added init call | ||
30 | * to sal_get_state_info_size(). | ||
31 | * | ||
32 | * 01/01/03 F. Lewis Added setup of CMCI and CPEI IRQs, logging of corrected | ||
33 | * platform errors, completed code for logging of | ||
34 | * corrected & uncorrected machine check errors, and | ||
35 | * updated for conformance with Nov. 2000 revision of the | ||
36 | * SAL 3.0 spec. | ||
37 | * 00/03/29 C. Fleckenstein Fixed PAL/SAL update issues, began MCA bug fixes, logging issues, | ||
38 | * added min save state dump, added INIT handler. | ||
39 | * | ||
40 | * 2003-12-08 Keith Owens <kaos@sgi.com> | ||
41 | * smp_call_function() must not be called from interrupt context (can | ||
42 | * deadlock on tasklist_lock). Use keventd to call smp_call_function(). | ||
43 | * | ||
44 | * 2004-02-01 Keith Owens <kaos@sgi.com> | ||
45 | * Avoid deadlock when using printk() for MCA and INIT records. | ||
46 | * Delete all record printing code, moved to salinfo_decode in user space. | ||
47 | * Mark variables and functions static where possible. | ||
48 | * Delete dead variables and functions. | ||
49 | * Reorder to remove the need for forward declarations and to consolidate | ||
50 | * related code. | ||
51 | */ | ||
52 | #include <linux/config.h> | ||
53 | #include <linux/types.h> | ||
54 | #include <linux/init.h> | ||
55 | #include <linux/sched.h> | ||
56 | #include <linux/interrupt.h> | ||
57 | #include <linux/irq.h> | ||
58 | #include <linux/kallsyms.h> | ||
59 | #include <linux/smp_lock.h> | ||
60 | #include <linux/bootmem.h> | ||
61 | #include <linux/acpi.h> | ||
62 | #include <linux/timer.h> | ||
63 | #include <linux/module.h> | ||
64 | #include <linux/kernel.h> | ||
65 | #include <linux/smp.h> | ||
66 | #include <linux/workqueue.h> | ||
67 | |||
68 | #include <asm/delay.h> | ||
69 | #include <asm/machvec.h> | ||
70 | #include <asm/meminit.h> | ||
71 | #include <asm/page.h> | ||
72 | #include <asm/ptrace.h> | ||
73 | #include <asm/system.h> | ||
74 | #include <asm/sal.h> | ||
75 | #include <asm/mca.h> | ||
76 | |||
77 | #include <asm/irq.h> | ||
78 | #include <asm/hw_irq.h> | ||
79 | |||
80 | #if defined(IA64_MCA_DEBUG_INFO) | ||
81 | # define IA64_MCA_DEBUG(fmt...) printk(fmt) | ||
82 | #else | ||
83 | # define IA64_MCA_DEBUG(fmt...) | ||
84 | #endif | ||
85 | |||
86 | /* Used by mca_asm.S */ | ||
87 | ia64_mca_sal_to_os_state_t ia64_sal_to_os_handoff_state; | ||
88 | ia64_mca_os_to_sal_state_t ia64_os_to_sal_handoff_state; | ||
89 | u64 ia64_mca_serialize; | ||
90 | DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ | ||
91 | DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ | ||
92 | DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ | ||
93 | DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ | ||
94 | |||
95 | unsigned long __per_cpu_mca[NR_CPUS]; | ||
96 | |||
97 | /* In mca_asm.S */ | ||
98 | extern void ia64_monarch_init_handler (void); | ||
99 | extern void ia64_slave_init_handler (void); | ||
100 | |||
101 | static ia64_mc_info_t ia64_mc_info; | ||
102 | |||
103 | #define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */ | ||
104 | #define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */ | ||
105 | #define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */ | ||
106 | #define CPE_HISTORY_LENGTH 5 | ||
107 | #define CMC_HISTORY_LENGTH 5 | ||
108 | |||
109 | static struct timer_list cpe_poll_timer; | ||
110 | static struct timer_list cmc_poll_timer; | ||
111 | /* | ||
112 | * This variable tells whether we are currently in polling mode. | ||
113 | * Start with this in the wrong state so we won't play w/ timers | ||
114 | * before the system is ready. | ||
115 | */ | ||
116 | static int cmc_polling_enabled = 1; | ||
117 | |||
118 | /* | ||
119 | * Clearing this variable prevents CPE polling from getting activated | ||
120 | * in mca_late_init. Use it if your system doesn't provide a CPEI, | ||
121 | * but encounters problems retrieving CPE logs. This should only be | ||
122 | * necessary for debugging. | ||
123 | */ | ||
124 | static int cpe_poll_enabled = 1; | ||
125 | |||
126 | extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); | ||
127 | |||
128 | static int mca_init; | ||
129 | |||
130 | /* | ||
131 | * IA64_MCA log support | ||
132 | */ | ||
133 | #define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ | ||
134 | #define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */ | ||
135 | |||
136 | typedef struct ia64_state_log_s | ||
137 | { | ||
138 | spinlock_t isl_lock; | ||
139 | int isl_index; | ||
140 | unsigned long isl_count; | ||
141 | ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ | ||
142 | } ia64_state_log_t; | ||
143 | |||
144 | static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; | ||
145 | |||
146 | #define IA64_LOG_ALLOCATE(it, size) \ | ||
147 | {ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = \ | ||
148 | (ia64_err_rec_t *)alloc_bootmem(size); \ | ||
149 | ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = \ | ||
150 | (ia64_err_rec_t *)alloc_bootmem(size);} | ||
151 | #define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) | ||
152 | #define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) | ||
153 | #define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) | ||
154 | #define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index | ||
155 | #define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index | ||
156 | #define IA64_LOG_INDEX_INC(it) \ | ||
157 | {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ | ||
158 | ia64_state_log[it].isl_count++;} | ||
159 | #define IA64_LOG_INDEX_DEC(it) \ | ||
160 | ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index | ||
161 | #define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) | ||
162 | #define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) | ||
163 | #define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count | ||
164 | |||
165 | /* | ||
166 | * ia64_log_init | ||
167 | * Reset the OS ia64 log buffer | ||
168 | * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) | ||
169 | * Outputs : None | ||
170 | */ | ||
171 | static void | ||
172 | ia64_log_init(int sal_info_type) | ||
173 | { | ||
174 | u64 max_size = 0; | ||
175 | |||
176 | IA64_LOG_NEXT_INDEX(sal_info_type) = 0; | ||
177 | IA64_LOG_LOCK_INIT(sal_info_type); | ||
178 | |||
179 | // SAL will tell us the maximum size of any error record of this type | ||
180 | max_size = ia64_sal_get_state_info_size(sal_info_type); | ||
181 | if (!max_size) | ||
182 | /* alloc_bootmem() doesn't like zero-sized allocations! */ | ||
183 | return; | ||
184 | |||
185 | // set up OS data structures to hold error info | ||
186 | IA64_LOG_ALLOCATE(sal_info_type, max_size); | ||
187 | memset(IA64_LOG_CURR_BUFFER(sal_info_type), 0, max_size); | ||
188 | memset(IA64_LOG_NEXT_BUFFER(sal_info_type), 0, max_size); | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * ia64_log_get | ||
193 | * | ||
194 | * Get the current MCA log from SAL and copy it into the OS log buffer. | ||
195 | * | ||
196 | * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) | ||
197 | * irq_safe whether you can use printk at this point | ||
198 | * Outputs : size (total record length) | ||
199 | * *buffer (ptr to error record) | ||
200 | * | ||
201 | */ | ||
202 | static u64 | ||
203 | ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) | ||
204 | { | ||
205 | sal_log_record_header_t *log_buffer; | ||
206 | u64 total_len = 0; | ||
207 | int s; | ||
208 | |||
209 | IA64_LOG_LOCK(sal_info_type); | ||
210 | |||
211 | /* Get the process state information */ | ||
212 | log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); | ||
213 | |||
214 | total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); | ||
215 | |||
216 | if (total_len) { | ||
217 | IA64_LOG_INDEX_INC(sal_info_type); | ||
218 | IA64_LOG_UNLOCK(sal_info_type); | ||
219 | if (irq_safe) { | ||
220 | IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. " | ||
221 | "Record length = %ld\n", __FUNCTION__, sal_info_type, total_len); | ||
222 | } | ||
223 | *buffer = (u8 *) log_buffer; | ||
224 | return total_len; | ||
225 | } else { | ||
226 | IA64_LOG_UNLOCK(sal_info_type); | ||
227 | return 0; | ||
228 | } | ||
229 | } | ||
230 | |||
231 | /* | ||
232 | * ia64_mca_log_sal_error_record | ||
233 | * | ||
234 | * This function retrieves a specified error record type from SAL | ||
235 | * and wakes up any processes waiting for error records. | ||
236 | * | ||
237 | * Inputs : sal_info_type (Type of error record MCA/CMC/CPE/INIT) | ||
238 | */ | ||
239 | static void | ||
240 | ia64_mca_log_sal_error_record(int sal_info_type) | ||
241 | { | ||
242 | u8 *buffer; | ||
243 | sal_log_record_header_t *rh; | ||
244 | u64 size; | ||
245 | int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA && sal_info_type != SAL_INFO_TYPE_INIT; | ||
246 | #ifdef IA64_MCA_DEBUG_INFO | ||
247 | static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; | ||
248 | #endif | ||
249 | |||
250 | size = ia64_log_get(sal_info_type, &buffer, irq_safe); | ||
251 | if (!size) | ||
252 | return; | ||
253 | |||
254 | salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); | ||
255 | |||
256 | if (irq_safe) | ||
257 | IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n", | ||
258 | smp_processor_id(), | ||
259 | sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); | ||
260 | |||
261 | /* Clear logs from corrected errors in case there's no user-level logger */ | ||
262 | rh = (sal_log_record_header_t *)buffer; | ||
263 | if (rh->severity == sal_log_severity_corrected) | ||
264 | ia64_sal_clear_state_info(sal_info_type); | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * platform dependent error handling | ||
269 | */ | ||
270 | #ifndef PLATFORM_MCA_HANDLERS | ||
271 | |||
272 | #ifdef CONFIG_ACPI | ||
273 | |||
274 | static int cpe_vector = -1; | ||
275 | |||
276 | static irqreturn_t | ||
277 | ia64_mca_cpe_int_handler (int cpe_irq, void *arg, struct pt_regs *ptregs) | ||
278 | { | ||
279 | static unsigned long cpe_history[CPE_HISTORY_LENGTH]; | ||
280 | static int index; | ||
281 | static DEFINE_SPINLOCK(cpe_history_lock); | ||
282 | |||
283 | IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", | ||
284 | __FUNCTION__, cpe_irq, smp_processor_id()); | ||
285 | |||
286 | /* SAL spec states this should run w/ interrupts enabled */ | ||
287 | local_irq_enable(); | ||
288 | |||
289 | /* Get the CPE error record and log it */ | ||
290 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); | ||
291 | |||
292 | spin_lock(&cpe_history_lock); | ||
293 | if (!cpe_poll_enabled && cpe_vector >= 0) { | ||
294 | |||
295 | int i, count = 1; /* we know 1 happened now */ | ||
296 | unsigned long now = jiffies; | ||
297 | |||
298 | for (i = 0; i < CPE_HISTORY_LENGTH; i++) { | ||
299 | if (now - cpe_history[i] <= HZ) | ||
300 | count++; | ||
301 | } | ||
302 | |||
303 | IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH); | ||
304 | if (count >= CPE_HISTORY_LENGTH) { | ||
305 | |||
306 | cpe_poll_enabled = 1; | ||
307 | spin_unlock(&cpe_history_lock); | ||
308 | disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR)); | ||
309 | |||
310 | /* | ||
311 | * Corrected errors will still be corrected, but | ||
312 | * make sure there's a log somewhere that indicates | ||
313 | * something is generating more than we can handle. | ||
314 | */ | ||
315 | printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n"); | ||
316 | |||
317 | mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); | ||
318 | |||
319 | /* lock already released, get out now */ | ||
320 | return IRQ_HANDLED; | ||
321 | } else { | ||
322 | cpe_history[index++] = now; | ||
323 | if (index == CPE_HISTORY_LENGTH) | ||
324 | index = 0; | ||
325 | } | ||
326 | } | ||
327 | spin_unlock(&cpe_history_lock); | ||
328 | return IRQ_HANDLED; | ||
329 | } | ||
330 | |||
331 | #endif /* CONFIG_ACPI */ | ||
332 | |||
333 | static void | ||
334 | show_min_state (pal_min_state_area_t *minstate) | ||
335 | { | ||
336 | u64 iip = minstate->pmsa_iip + ((struct ia64_psr *)(&minstate->pmsa_ipsr))->ri; | ||
337 | u64 xip = minstate->pmsa_xip + ((struct ia64_psr *)(&minstate->pmsa_xpsr))->ri; | ||
338 | |||
339 | printk("NaT bits\t%016lx\n", minstate->pmsa_nat_bits); | ||
340 | printk("pr\t\t%016lx\n", minstate->pmsa_pr); | ||
341 | printk("b0\t\t%016lx ", minstate->pmsa_br0); print_symbol("%s\n", minstate->pmsa_br0); | ||
342 | printk("ar.rsc\t\t%016lx\n", minstate->pmsa_rsc); | ||
343 | printk("cr.iip\t\t%016lx ", iip); print_symbol("%s\n", iip); | ||
344 | printk("cr.ipsr\t\t%016lx\n", minstate->pmsa_ipsr); | ||
345 | printk("cr.ifs\t\t%016lx\n", minstate->pmsa_ifs); | ||
346 | printk("xip\t\t%016lx ", xip); print_symbol("%s\n", xip); | ||
347 | printk("xpsr\t\t%016lx\n", minstate->pmsa_xpsr); | ||
348 | printk("xfs\t\t%016lx\n", minstate->pmsa_xfs); | ||
349 | printk("b1\t\t%016lx ", minstate->pmsa_br1); | ||
350 | print_symbol("%s\n", minstate->pmsa_br1); | ||
351 | |||
352 | printk("\nstatic registers r0-r15:\n"); | ||
353 | printk(" r0- 3 %016lx %016lx %016lx %016lx\n", | ||
354 | 0UL, minstate->pmsa_gr[0], minstate->pmsa_gr[1], minstate->pmsa_gr[2]); | ||
355 | printk(" r4- 7 %016lx %016lx %016lx %016lx\n", | ||
356 | minstate->pmsa_gr[3], minstate->pmsa_gr[4], | ||
357 | minstate->pmsa_gr[5], minstate->pmsa_gr[6]); | ||
358 | printk(" r8-11 %016lx %016lx %016lx %016lx\n", | ||
359 | minstate->pmsa_gr[7], minstate->pmsa_gr[8], | ||
360 | minstate->pmsa_gr[9], minstate->pmsa_gr[10]); | ||
361 | printk("r12-15 %016lx %016lx %016lx %016lx\n", | ||
362 | minstate->pmsa_gr[11], minstate->pmsa_gr[12], | ||
363 | minstate->pmsa_gr[13], minstate->pmsa_gr[14]); | ||
364 | |||
365 | printk("\nbank 0:\n"); | ||
366 | printk("r16-19 %016lx %016lx %016lx %016lx\n", | ||
367 | minstate->pmsa_bank0_gr[0], minstate->pmsa_bank0_gr[1], | ||
368 | minstate->pmsa_bank0_gr[2], minstate->pmsa_bank0_gr[3]); | ||
369 | printk("r20-23 %016lx %016lx %016lx %016lx\n", | ||
370 | minstate->pmsa_bank0_gr[4], minstate->pmsa_bank0_gr[5], | ||
371 | minstate->pmsa_bank0_gr[6], minstate->pmsa_bank0_gr[7]); | ||
372 | printk("r24-27 %016lx %016lx %016lx %016lx\n", | ||
373 | minstate->pmsa_bank0_gr[8], minstate->pmsa_bank0_gr[9], | ||
374 | minstate->pmsa_bank0_gr[10], minstate->pmsa_bank0_gr[11]); | ||
375 | printk("r28-31 %016lx %016lx %016lx %016lx\n", | ||
376 | minstate->pmsa_bank0_gr[12], minstate->pmsa_bank0_gr[13], | ||
377 | minstate->pmsa_bank0_gr[14], minstate->pmsa_bank0_gr[15]); | ||
378 | |||
379 | printk("\nbank 1:\n"); | ||
380 | printk("r16-19 %016lx %016lx %016lx %016lx\n", | ||
381 | minstate->pmsa_bank1_gr[0], minstate->pmsa_bank1_gr[1], | ||
382 | minstate->pmsa_bank1_gr[2], minstate->pmsa_bank1_gr[3]); | ||
383 | printk("r20-23 %016lx %016lx %016lx %016lx\n", | ||
384 | minstate->pmsa_bank1_gr[4], minstate->pmsa_bank1_gr[5], | ||
385 | minstate->pmsa_bank1_gr[6], minstate->pmsa_bank1_gr[7]); | ||
386 | printk("r24-27 %016lx %016lx %016lx %016lx\n", | ||
387 | minstate->pmsa_bank1_gr[8], minstate->pmsa_bank1_gr[9], | ||
388 | minstate->pmsa_bank1_gr[10], minstate->pmsa_bank1_gr[11]); | ||
389 | printk("r28-31 %016lx %016lx %016lx %016lx\n", | ||
390 | minstate->pmsa_bank1_gr[12], minstate->pmsa_bank1_gr[13], | ||
391 | minstate->pmsa_bank1_gr[14], minstate->pmsa_bank1_gr[15]); | ||
392 | } | ||
393 | |||
394 | static void | ||
395 | fetch_min_state (pal_min_state_area_t *ms, struct pt_regs *pt, struct switch_stack *sw) | ||
396 | { | ||
397 | u64 *dst_banked, *src_banked, bit, shift, nat_bits; | ||
398 | int i; | ||
399 | |||
400 | /* | ||
401 | * First, update the pt-regs and switch-stack structures with the contents stored | ||
402 | * in the min-state area: | ||
403 | */ | ||
404 | if (((struct ia64_psr *) &ms->pmsa_ipsr)->ic == 0) { | ||
405 | pt->cr_ipsr = ms->pmsa_xpsr; | ||
406 | pt->cr_iip = ms->pmsa_xip; | ||
407 | pt->cr_ifs = ms->pmsa_xfs; | ||
408 | } else { | ||
409 | pt->cr_ipsr = ms->pmsa_ipsr; | ||
410 | pt->cr_iip = ms->pmsa_iip; | ||
411 | pt->cr_ifs = ms->pmsa_ifs; | ||
412 | } | ||
413 | pt->ar_rsc = ms->pmsa_rsc; | ||
414 | pt->pr = ms->pmsa_pr; | ||
415 | pt->r1 = ms->pmsa_gr[0]; | ||
416 | pt->r2 = ms->pmsa_gr[1]; | ||
417 | pt->r3 = ms->pmsa_gr[2]; | ||
418 | sw->r4 = ms->pmsa_gr[3]; | ||
419 | sw->r5 = ms->pmsa_gr[4]; | ||
420 | sw->r6 = ms->pmsa_gr[5]; | ||
421 | sw->r7 = ms->pmsa_gr[6]; | ||
422 | pt->r8 = ms->pmsa_gr[7]; | ||
423 | pt->r9 = ms->pmsa_gr[8]; | ||
424 | pt->r10 = ms->pmsa_gr[9]; | ||
425 | pt->r11 = ms->pmsa_gr[10]; | ||
426 | pt->r12 = ms->pmsa_gr[11]; | ||
427 | pt->r13 = ms->pmsa_gr[12]; | ||
428 | pt->r14 = ms->pmsa_gr[13]; | ||
429 | pt->r15 = ms->pmsa_gr[14]; | ||
430 | dst_banked = &pt->r16; /* r16-r31 are contiguous in struct pt_regs */ | ||
431 | src_banked = ms->pmsa_bank1_gr; | ||
432 | for (i = 0; i < 16; ++i) | ||
433 | dst_banked[i] = src_banked[i]; | ||
434 | pt->b0 = ms->pmsa_br0; | ||
435 | sw->b1 = ms->pmsa_br1; | ||
436 | |||
437 | /* construct the NaT bits for the pt-regs structure: */ | ||
438 | # define PUT_NAT_BIT(dst, addr) \ | ||
439 | do { \ | ||
440 | bit = nat_bits & 1; nat_bits >>= 1; \ | ||
441 | shift = ((unsigned long) addr >> 3) & 0x3f; \ | ||
442 | dst = ((dst) & ~(1UL << shift)) | (bit << shift); \ | ||
443 | } while (0) | ||
444 | |||
445 | /* Rotate the saved NaT bits such that bit 0 corresponds to pmsa_gr[0]: */ | ||
446 | shift = ((unsigned long) &ms->pmsa_gr[0] >> 3) & 0x3f; | ||
447 | nat_bits = (ms->pmsa_nat_bits >> shift) | (ms->pmsa_nat_bits << (64 - shift)); | ||
448 | |||
449 | PUT_NAT_BIT(sw->caller_unat, &pt->r1); | ||
450 | PUT_NAT_BIT(sw->caller_unat, &pt->r2); | ||
451 | PUT_NAT_BIT(sw->caller_unat, &pt->r3); | ||
452 | PUT_NAT_BIT(sw->ar_unat, &sw->r4); | ||
453 | PUT_NAT_BIT(sw->ar_unat, &sw->r5); | ||
454 | PUT_NAT_BIT(sw->ar_unat, &sw->r6); | ||
455 | PUT_NAT_BIT(sw->ar_unat, &sw->r7); | ||
456 | PUT_NAT_BIT(sw->caller_unat, &pt->r8); PUT_NAT_BIT(sw->caller_unat, &pt->r9); | ||
457 | PUT_NAT_BIT(sw->caller_unat, &pt->r10); PUT_NAT_BIT(sw->caller_unat, &pt->r11); | ||
458 | PUT_NAT_BIT(sw->caller_unat, &pt->r12); PUT_NAT_BIT(sw->caller_unat, &pt->r13); | ||
459 | PUT_NAT_BIT(sw->caller_unat, &pt->r14); PUT_NAT_BIT(sw->caller_unat, &pt->r15); | ||
460 | nat_bits >>= 16; /* skip over bank0 NaT bits */ | ||
461 | PUT_NAT_BIT(sw->caller_unat, &pt->r16); PUT_NAT_BIT(sw->caller_unat, &pt->r17); | ||
462 | PUT_NAT_BIT(sw->caller_unat, &pt->r18); PUT_NAT_BIT(sw->caller_unat, &pt->r19); | ||
463 | PUT_NAT_BIT(sw->caller_unat, &pt->r20); PUT_NAT_BIT(sw->caller_unat, &pt->r21); | ||
464 | PUT_NAT_BIT(sw->caller_unat, &pt->r22); PUT_NAT_BIT(sw->caller_unat, &pt->r23); | ||
465 | PUT_NAT_BIT(sw->caller_unat, &pt->r24); PUT_NAT_BIT(sw->caller_unat, &pt->r25); | ||
466 | PUT_NAT_BIT(sw->caller_unat, &pt->r26); PUT_NAT_BIT(sw->caller_unat, &pt->r27); | ||
467 | PUT_NAT_BIT(sw->caller_unat, &pt->r28); PUT_NAT_BIT(sw->caller_unat, &pt->r29); | ||
468 | PUT_NAT_BIT(sw->caller_unat, &pt->r30); PUT_NAT_BIT(sw->caller_unat, &pt->r31); | ||
469 | } | ||
470 | |||
471 | static void | ||
472 | init_handler_platform (pal_min_state_area_t *ms, | ||
473 | struct pt_regs *pt, struct switch_stack *sw) | ||
474 | { | ||
475 | struct unw_frame_info info; | ||
476 | |||
477 | /* if a kernel debugger is available call it here else just dump the registers */ | ||
478 | |||
479 | /* | ||
480 | * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be | ||
481 | * generated via the BMC's command-line interface, but since the console is on the | ||
482 | * same serial line, the user will need some time to switch out of the BMC before | ||
483 | * the dump begins. | ||
484 | */ | ||
485 | printk("Delaying for 5 seconds...\n"); | ||
486 | udelay(5*1000000); | ||
487 | show_min_state(ms); | ||
488 | |||
489 | printk("Backtrace of current task (pid %d, %s)\n", current->pid, current->comm); | ||
490 | fetch_min_state(ms, pt, sw); | ||
491 | unw_init_from_interruption(&info, current, pt, sw); | ||
492 | ia64_do_show_stack(&info, NULL); | ||
493 | |||
494 | #ifdef CONFIG_SMP | ||
495 | /* read_trylock() would be handy... */ | ||
496 | if (!tasklist_lock.write_lock) | ||
497 | read_lock(&tasklist_lock); | ||
498 | #endif | ||
499 | { | ||
500 | struct task_struct *g, *t; | ||
501 | do_each_thread (g, t) { | ||
502 | if (t == current) | ||
503 | continue; | ||
504 | |||
505 | printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); | ||
506 | show_stack(t, NULL); | ||
507 | } while_each_thread (g, t); | ||
508 | } | ||
509 | #ifdef CONFIG_SMP | ||
510 | if (!tasklist_lock.write_lock) | ||
511 | read_unlock(&tasklist_lock); | ||
512 | #endif | ||
513 | |||
514 | printk("\nINIT dump complete. Please reboot now.\n"); | ||
515 | while (1); /* hang city if no debugger */ | ||
516 | } | ||
517 | |||
518 | #ifdef CONFIG_ACPI | ||
519 | /* | ||
520 | * ia64_mca_register_cpev | ||
521 | * | ||
522 | * Register the corrected platform error vector with SAL. | ||
523 | * | ||
524 | * Inputs | ||
525 | * cpev Corrected Platform Error Vector number | ||
526 | * | ||
527 | * Outputs | ||
528 | * None | ||
529 | */ | ||
530 | static void | ||
531 | ia64_mca_register_cpev (int cpev) | ||
532 | { | ||
533 | /* Register the CPE interrupt vector with SAL */ | ||
534 | struct ia64_sal_retval isrv; | ||
535 | |||
536 | isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0); | ||
537 | if (isrv.status) { | ||
538 | printk(KERN_ERR "Failed to register Corrected Platform " | ||
539 | "Error interrupt vector with SAL (status %ld)\n", isrv.status); | ||
540 | return; | ||
541 | } | ||
542 | |||
543 | IA64_MCA_DEBUG("%s: corrected platform error " | ||
544 | "vector %#x registered\n", __FUNCTION__, cpev); | ||
545 | } | ||
546 | #endif /* CONFIG_ACPI */ | ||
547 | |||
548 | #endif /* PLATFORM_MCA_HANDLERS */ | ||
549 | |||
550 | /* | ||
551 | * ia64_mca_cmc_vector_setup | ||
552 | * | ||
553 | * Setup the corrected machine check vector register in the processor. | ||
554 | * (The interrupt is masked on boot. ia64_mca_late_init unmask this.) | ||
555 | * This function is invoked on a per-processor basis. | ||
556 | * | ||
557 | * Inputs | ||
558 | * None | ||
559 | * | ||
560 | * Outputs | ||
561 | * None | ||
562 | */ | ||
563 | void | ||
564 | ia64_mca_cmc_vector_setup (void) | ||
565 | { | ||
566 | cmcv_reg_t cmcv; | ||
567 | |||
568 | cmcv.cmcv_regval = 0; | ||
569 | cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */ | ||
570 | cmcv.cmcv_vector = IA64_CMC_VECTOR; | ||
571 | ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); | ||
572 | |||
573 | IA64_MCA_DEBUG("%s: CPU %d corrected " | ||
574 | "machine check vector %#x registered.\n", | ||
575 | __FUNCTION__, smp_processor_id(), IA64_CMC_VECTOR); | ||
576 | |||
577 | IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", | ||
578 | __FUNCTION__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV)); | ||
579 | } | ||
580 | |||
581 | /* | ||
582 | * ia64_mca_cmc_vector_disable | ||
583 | * | ||
584 | * Mask the corrected machine check vector register in the processor. | ||
585 | * This function is invoked on a per-processor basis. | ||
586 | * | ||
587 | * Inputs | ||
588 | * dummy(unused) | ||
589 | * | ||
590 | * Outputs | ||
591 | * None | ||
592 | */ | ||
593 | static void | ||
594 | ia64_mca_cmc_vector_disable (void *dummy) | ||
595 | { | ||
596 | cmcv_reg_t cmcv; | ||
597 | |||
598 | cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); | ||
599 | |||
600 | cmcv.cmcv_mask = 1; /* Mask/disable interrupt */ | ||
601 | ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); | ||
602 | |||
603 | IA64_MCA_DEBUG("%s: CPU %d corrected " | ||
604 | "machine check vector %#x disabled.\n", | ||
605 | __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector); | ||
606 | } | ||
607 | |||
608 | /* | ||
609 | * ia64_mca_cmc_vector_enable | ||
610 | * | ||
611 | * Unmask the corrected machine check vector register in the processor. | ||
612 | * This function is invoked on a per-processor basis. | ||
613 | * | ||
614 | * Inputs | ||
615 | * dummy(unused) | ||
616 | * | ||
617 | * Outputs | ||
618 | * None | ||
619 | */ | ||
620 | static void | ||
621 | ia64_mca_cmc_vector_enable (void *dummy) | ||
622 | { | ||
623 | cmcv_reg_t cmcv; | ||
624 | |||
625 | cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); | ||
626 | |||
627 | cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ | ||
628 | ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); | ||
629 | |||
630 | IA64_MCA_DEBUG("%s: CPU %d corrected " | ||
631 | "machine check vector %#x enabled.\n", | ||
632 | __FUNCTION__, smp_processor_id(), cmcv.cmcv_vector); | ||
633 | } | ||
634 | |||
635 | /* | ||
636 | * ia64_mca_cmc_vector_disable_keventd | ||
637 | * | ||
638 | * Called via keventd (smp_call_function() is not safe in interrupt context) to | ||
639 | * disable the cmc interrupt vector. | ||
640 | */ | ||
641 | static void | ||
642 | ia64_mca_cmc_vector_disable_keventd(void *unused) | ||
643 | { | ||
644 | on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 1, 0); | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * ia64_mca_cmc_vector_enable_keventd | ||
649 | * | ||
650 | * Called via keventd (smp_call_function() is not safe in interrupt context) to | ||
651 | * enable the cmc interrupt vector. | ||
652 | */ | ||
653 | static void | ||
654 | ia64_mca_cmc_vector_enable_keventd(void *unused) | ||
655 | { | ||
656 | on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 1, 0); | ||
657 | } | ||
658 | |||
659 | /* | ||
660 | * ia64_mca_wakeup_ipi_wait | ||
661 | * | ||
662 | * Wait for the inter-cpu interrupt to be sent by the | ||
663 | * monarch processor once it is done with handling the | ||
664 | * MCA. | ||
665 | * | ||
666 | * Inputs : None | ||
667 | * Outputs : None | ||
668 | */ | ||
669 | static void | ||
670 | ia64_mca_wakeup_ipi_wait(void) | ||
671 | { | ||
672 | int irr_num = (IA64_MCA_WAKEUP_VECTOR >> 6); | ||
673 | int irr_bit = (IA64_MCA_WAKEUP_VECTOR & 0x3f); | ||
674 | u64 irr = 0; | ||
675 | |||
676 | do { | ||
677 | switch(irr_num) { | ||
678 | case 0: | ||
679 | irr = ia64_getreg(_IA64_REG_CR_IRR0); | ||
680 | break; | ||
681 | case 1: | ||
682 | irr = ia64_getreg(_IA64_REG_CR_IRR1); | ||
683 | break; | ||
684 | case 2: | ||
685 | irr = ia64_getreg(_IA64_REG_CR_IRR2); | ||
686 | break; | ||
687 | case 3: | ||
688 | irr = ia64_getreg(_IA64_REG_CR_IRR3); | ||
689 | break; | ||
690 | } | ||
691 | cpu_relax(); | ||
692 | } while (!(irr & (1UL << irr_bit))) ; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * ia64_mca_wakeup | ||
697 | * | ||
698 | * Send an inter-cpu interrupt to wake-up a particular cpu | ||
699 | * and mark that cpu to be out of rendez. | ||
700 | * | ||
701 | * Inputs : cpuid | ||
702 | * Outputs : None | ||
703 | */ | ||
704 | static void | ||
705 | ia64_mca_wakeup(int cpu) | ||
706 | { | ||
707 | platform_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0); | ||
708 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; | ||
709 | |||
710 | } | ||
711 | |||
712 | /* | ||
713 | * ia64_mca_wakeup_all | ||
714 | * | ||
715 | * Wakeup all the cpus which have rendez'ed previously. | ||
716 | * | ||
717 | * Inputs : None | ||
718 | * Outputs : None | ||
719 | */ | ||
720 | static void | ||
721 | ia64_mca_wakeup_all(void) | ||
722 | { | ||
723 | int cpu; | ||
724 | |||
725 | /* Clear the Rendez checkin flag for all cpus */ | ||
726 | for(cpu = 0; cpu < NR_CPUS; cpu++) { | ||
727 | if (!cpu_online(cpu)) | ||
728 | continue; | ||
729 | if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) | ||
730 | ia64_mca_wakeup(cpu); | ||
731 | } | ||
732 | |||
733 | } | ||
734 | |||
735 | /* | ||
736 | * ia64_mca_rendez_interrupt_handler | ||
737 | * | ||
738 | * This is handler used to put slave processors into spinloop | ||
739 | * while the monarch processor does the mca handling and later | ||
740 | * wake each slave up once the monarch is done. | ||
741 | * | ||
742 | * Inputs : None | ||
743 | * Outputs : None | ||
744 | */ | ||
745 | static irqreturn_t | ||
746 | ia64_mca_rendez_int_handler(int rendez_irq, void *arg, struct pt_regs *ptregs) | ||
747 | { | ||
748 | unsigned long flags; | ||
749 | int cpu = smp_processor_id(); | ||
750 | |||
751 | /* Mask all interrupts */ | ||
752 | local_irq_save(flags); | ||
753 | |||
754 | ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; | ||
755 | /* Register with the SAL monarch that the slave has | ||
756 | * reached SAL | ||
757 | */ | ||
758 | ia64_sal_mc_rendez(); | ||
759 | |||
760 | /* Wait for the wakeup IPI from the monarch | ||
761 | * This waiting is done by polling on the wakeup-interrupt | ||
762 | * vector bit in the processor's IRRs | ||
763 | */ | ||
764 | ia64_mca_wakeup_ipi_wait(); | ||
765 | |||
766 | /* Enable all interrupts */ | ||
767 | local_irq_restore(flags); | ||
768 | return IRQ_HANDLED; | ||
769 | } | ||
770 | |||
771 | /* | ||
772 | * ia64_mca_wakeup_int_handler | ||
773 | * | ||
774 | * The interrupt handler for processing the inter-cpu interrupt to the | ||
775 | * slave cpu which was spinning in the rendez loop. | ||
776 | * Since this spinning is done by turning off the interrupts and | ||
777 | * polling on the wakeup-interrupt bit in the IRR, there is | ||
778 | * nothing useful to be done in the handler. | ||
779 | * | ||
780 | * Inputs : wakeup_irq (Wakeup-interrupt bit) | ||
781 | * arg (Interrupt handler specific argument) | ||
782 | * ptregs (Exception frame at the time of the interrupt) | ||
783 | * Outputs : None | ||
784 | * | ||
785 | */ | ||
786 | static irqreturn_t | ||
787 | ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg, struct pt_regs *ptregs) | ||
788 | { | ||
789 | return IRQ_HANDLED; | ||
790 | } | ||
791 | |||
792 | /* | ||
793 | * ia64_return_to_sal_check | ||
794 | * | ||
795 | * This is function called before going back from the OS_MCA handler | ||
796 | * to the OS_MCA dispatch code which finally takes the control back | ||
797 | * to the SAL. | ||
798 | * The main purpose of this routine is to setup the OS_MCA to SAL | ||
799 | * return state which can be used by the OS_MCA dispatch code | ||
800 | * just before going back to SAL. | ||
801 | * | ||
802 | * Inputs : None | ||
803 | * Outputs : None | ||
804 | */ | ||
805 | |||
806 | static void | ||
807 | ia64_return_to_sal_check(int recover) | ||
808 | { | ||
809 | |||
810 | /* Copy over some relevant stuff from the sal_to_os_mca_handoff | ||
811 | * so that it can be used at the time of os_mca_to_sal_handoff | ||
812 | */ | ||
813 | ia64_os_to_sal_handoff_state.imots_sal_gp = | ||
814 | ia64_sal_to_os_handoff_state.imsto_sal_gp; | ||
815 | |||
816 | ia64_os_to_sal_handoff_state.imots_sal_check_ra = | ||
817 | ia64_sal_to_os_handoff_state.imsto_sal_check_ra; | ||
818 | |||
819 | if (recover) | ||
820 | ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_CORRECTED; | ||
821 | else | ||
822 | ia64_os_to_sal_handoff_state.imots_os_status = IA64_MCA_COLD_BOOT; | ||
823 | |||
824 | /* Default = tell SAL to return to same context */ | ||
825 | ia64_os_to_sal_handoff_state.imots_context = IA64_MCA_SAME_CONTEXT; | ||
826 | |||
827 | ia64_os_to_sal_handoff_state.imots_new_min_state = | ||
828 | (u64 *)ia64_sal_to_os_handoff_state.pal_min_state; | ||
829 | |||
830 | } | ||
831 | |||
832 | /* Function pointer for extra MCA recovery */ | ||
833 | int (*ia64_mca_ucmc_extension) | ||
834 | (void*,ia64_mca_sal_to_os_state_t*,ia64_mca_os_to_sal_state_t*) | ||
835 | = NULL; | ||
836 | |||
837 | int | ||
838 | ia64_reg_MCA_extension(void *fn) | ||
839 | { | ||
840 | if (ia64_mca_ucmc_extension) | ||
841 | return 1; | ||
842 | |||
843 | ia64_mca_ucmc_extension = fn; | ||
844 | return 0; | ||
845 | } | ||
846 | |||
847 | void | ||
848 | ia64_unreg_MCA_extension(void) | ||
849 | { | ||
850 | if (ia64_mca_ucmc_extension) | ||
851 | ia64_mca_ucmc_extension = NULL; | ||
852 | } | ||
853 | |||
854 | EXPORT_SYMBOL(ia64_reg_MCA_extension); | ||
855 | EXPORT_SYMBOL(ia64_unreg_MCA_extension); | ||
856 | |||
857 | /* | ||
858 | * ia64_mca_ucmc_handler | ||
859 | * | ||
860 | * This is uncorrectable machine check handler called from OS_MCA | ||
861 | * dispatch code which is in turn called from SAL_CHECK(). | ||
862 | * This is the place where the core of OS MCA handling is done. | ||
863 | * Right now the logs are extracted and displayed in a well-defined | ||
864 | * format. This handler code is supposed to be run only on the | ||
865 | * monarch processor. Once the monarch is done with MCA handling | ||
866 | * further MCA logging is enabled by clearing logs. | ||
867 | * Monarch also has the duty of sending wakeup-IPIs to pull the | ||
868 | * slave processors out of rendezvous spinloop. | ||
869 | * | ||
870 | * Inputs : None | ||
871 | * Outputs : None | ||
872 | */ | ||
873 | void | ||
874 | ia64_mca_ucmc_handler(void) | ||
875 | { | ||
876 | pal_processor_state_info_t *psp = (pal_processor_state_info_t *) | ||
877 | &ia64_sal_to_os_handoff_state.proc_state_param; | ||
878 | int recover; | ||
879 | |||
880 | /* Get the MCA error record and log it */ | ||
881 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); | ||
882 | |||
883 | /* TLB error is only exist in this SAL error record */ | ||
884 | recover = (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) | ||
885 | /* other error recovery */ | ||
886 | || (ia64_mca_ucmc_extension | ||
887 | && ia64_mca_ucmc_extension( | ||
888 | IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), | ||
889 | &ia64_sal_to_os_handoff_state, | ||
890 | &ia64_os_to_sal_handoff_state)); | ||
891 | |||
892 | if (recover) { | ||
893 | sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); | ||
894 | rh->severity = sal_log_severity_corrected; | ||
895 | ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); | ||
896 | } | ||
897 | /* | ||
898 | * Wakeup all the processors which are spinning in the rendezvous | ||
899 | * loop. | ||
900 | */ | ||
901 | ia64_mca_wakeup_all(); | ||
902 | |||
903 | /* Return to SAL */ | ||
904 | ia64_return_to_sal_check(recover); | ||
905 | } | ||
906 | |||
907 | static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd, NULL); | ||
908 | static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd, NULL); | ||
909 | |||
910 | /* | ||
911 | * ia64_mca_cmc_int_handler | ||
912 | * | ||
913 | * This is corrected machine check interrupt handler. | ||
914 | * Right now the logs are extracted and displayed in a well-defined | ||
915 | * format. | ||
916 | * | ||
917 | * Inputs | ||
918 | * interrupt number | ||
919 | * client data arg ptr | ||
920 | * saved registers ptr | ||
921 | * | ||
922 | * Outputs | ||
923 | * None | ||
924 | */ | ||
925 | static irqreturn_t | ||
926 | ia64_mca_cmc_int_handler(int cmc_irq, void *arg, struct pt_regs *ptregs) | ||
927 | { | ||
928 | static unsigned long cmc_history[CMC_HISTORY_LENGTH]; | ||
929 | static int index; | ||
930 | static DEFINE_SPINLOCK(cmc_history_lock); | ||
931 | |||
932 | IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", | ||
933 | __FUNCTION__, cmc_irq, smp_processor_id()); | ||
934 | |||
935 | /* SAL spec states this should run w/ interrupts enabled */ | ||
936 | local_irq_enable(); | ||
937 | |||
938 | /* Get the CMC error record and log it */ | ||
939 | ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); | ||
940 | |||
941 | spin_lock(&cmc_history_lock); | ||
942 | if (!cmc_polling_enabled) { | ||
943 | int i, count = 1; /* we know 1 happened now */ | ||
944 | unsigned long now = jiffies; | ||
945 | |||
946 | for (i = 0; i < CMC_HISTORY_LENGTH; i++) { | ||
947 | if (now - cmc_history[i] <= HZ) | ||
948 | count++; | ||
949 | } | ||
950 | |||
951 | IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH); | ||
952 | if (count >= CMC_HISTORY_LENGTH) { | ||
953 | |||
954 | cmc_polling_enabled = 1; | ||
955 | spin_unlock(&cmc_history_lock); | ||
956 | schedule_work(&cmc_disable_work); | ||
957 | |||
958 | /* | ||
959 | * Corrected errors will still be corrected, but | ||
960 | * make sure there's a log somewhere that indicates | ||
961 | * something is generating more than we can handle. | ||
962 | */ | ||
963 | printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n"); | ||
964 | |||
965 | mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); | ||
966 | |||
967 | /* lock already released, get out now */ | ||
968 | return IRQ_HANDLED; | ||
969 | } else { | ||
970 | cmc_history[index++] = now; | ||
971 | if (index == CMC_HISTORY_LENGTH) | ||
972 | index = 0; | ||
973 | } | ||
974 | } | ||
975 | spin_unlock(&cmc_history_lock); | ||
976 | return IRQ_HANDLED; | ||
977 | } | ||
978 | |||
979 | /* | ||
980 | * ia64_mca_cmc_int_caller | ||
981 | * | ||
982 | * Triggered by sw interrupt from CMC polling routine. Calls | ||
983 | * real interrupt handler and either triggers a sw interrupt | ||
984 | * on the next cpu or does cleanup at the end. | ||
985 | * | ||
986 | * Inputs | ||
987 | * interrupt number | ||
988 | * client data arg ptr | ||
989 | * saved registers ptr | ||
990 | * Outputs | ||
991 | * handled | ||
992 | */ | ||
993 | static irqreturn_t | ||
994 | ia64_mca_cmc_int_caller(int cmc_irq, void *arg, struct pt_regs *ptregs) | ||
995 | { | ||
996 | static int start_count = -1; | ||
997 | unsigned int cpuid; | ||
998 | |||
999 | cpuid = smp_processor_id(); | ||
1000 | |||
1001 | /* If first cpu, update count */ | ||
1002 | if (start_count == -1) | ||
1003 | start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC); | ||
1004 | |||
1005 | ia64_mca_cmc_int_handler(cmc_irq, arg, ptregs); | ||
1006 | |||
1007 | for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); | ||
1008 | |||
1009 | if (cpuid < NR_CPUS) { | ||
1010 | platform_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); | ||
1011 | } else { | ||
1012 | /* If no log record, switch out of polling mode */ | ||
1013 | if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) { | ||
1014 | |||
1015 | printk(KERN_WARNING "Returning to interrupt driven CMC handler\n"); | ||
1016 | schedule_work(&cmc_enable_work); | ||
1017 | cmc_polling_enabled = 0; | ||
1018 | |||
1019 | } else { | ||
1020 | |||
1021 | mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); | ||
1022 | } | ||
1023 | |||
1024 | start_count = -1; | ||
1025 | } | ||
1026 | |||
1027 | return IRQ_HANDLED; | ||
1028 | } | ||
1029 | |||
1030 | /* | ||
1031 | * ia64_mca_cmc_poll | ||
1032 | * | ||
1033 | * Poll for Corrected Machine Checks (CMCs) | ||
1034 | * | ||
1035 | * Inputs : dummy(unused) | ||
1036 | * Outputs : None | ||
1037 | * | ||
1038 | */ | ||
1039 | static void | ||
1040 | ia64_mca_cmc_poll (unsigned long dummy) | ||
1041 | { | ||
1042 | /* Trigger a CMC interrupt cascade */ | ||
1043 | platform_send_ipi(first_cpu(cpu_online_map), IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); | ||
1044 | } | ||
1045 | |||
1046 | /* | ||
1047 | * ia64_mca_cpe_int_caller | ||
1048 | * | ||
1049 | * Triggered by sw interrupt from CPE polling routine. Calls | ||
1050 | * real interrupt handler and either triggers a sw interrupt | ||
1051 | * on the next cpu or does cleanup at the end. | ||
1052 | * | ||
1053 | * Inputs | ||
1054 | * interrupt number | ||
1055 | * client data arg ptr | ||
1056 | * saved registers ptr | ||
1057 | * Outputs | ||
1058 | * handled | ||
1059 | */ | ||
1060 | #ifdef CONFIG_ACPI | ||
1061 | |||
1062 | static irqreturn_t | ||
1063 | ia64_mca_cpe_int_caller(int cpe_irq, void *arg, struct pt_regs *ptregs) | ||
1064 | { | ||
1065 | static int start_count = -1; | ||
1066 | static int poll_time = MIN_CPE_POLL_INTERVAL; | ||
1067 | unsigned int cpuid; | ||
1068 | |||
1069 | cpuid = smp_processor_id(); | ||
1070 | |||
1071 | /* If first cpu, update count */ | ||
1072 | if (start_count == -1) | ||
1073 | start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE); | ||
1074 | |||
1075 | ia64_mca_cpe_int_handler(cpe_irq, arg, ptregs); | ||
1076 | |||
1077 | for (++cpuid ; cpuid < NR_CPUS && !cpu_online(cpuid) ; cpuid++); | ||
1078 | |||
1079 | if (cpuid < NR_CPUS) { | ||
1080 | platform_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); | ||
1081 | } else { | ||
1082 | /* | ||
1083 | * If a log was recorded, increase our polling frequency, | ||
1084 | * otherwise, backoff or return to interrupt mode. | ||
1085 | */ | ||
1086 | if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) { | ||
1087 | poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2); | ||
1088 | } else if (cpe_vector < 0) { | ||
1089 | poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2); | ||
1090 | } else { | ||
1091 | poll_time = MIN_CPE_POLL_INTERVAL; | ||
1092 | |||
1093 | printk(KERN_WARNING "Returning to interrupt driven CPE handler\n"); | ||
1094 | enable_irq(local_vector_to_irq(IA64_CPE_VECTOR)); | ||
1095 | cpe_poll_enabled = 0; | ||
1096 | } | ||
1097 | |||
1098 | if (cpe_poll_enabled) | ||
1099 | mod_timer(&cpe_poll_timer, jiffies + poll_time); | ||
1100 | start_count = -1; | ||
1101 | } | ||
1102 | |||
1103 | return IRQ_HANDLED; | ||
1104 | } | ||
1105 | |||
1106 | #endif /* CONFIG_ACPI */ | ||
1107 | |||
1108 | /* | ||
1109 | * ia64_mca_cpe_poll | ||
1110 | * | ||
1111 | * Poll for Corrected Platform Errors (CPEs), trigger interrupt | ||
1112 | * on first cpu, from there it will trickle through all the cpus. | ||
1113 | * | ||
1114 | * Inputs : dummy(unused) | ||
1115 | * Outputs : None | ||
1116 | * | ||
1117 | */ | ||
1118 | static void | ||
1119 | ia64_mca_cpe_poll (unsigned long dummy) | ||
1120 | { | ||
1121 | /* Trigger a CPE interrupt cascade */ | ||
1122 | platform_send_ipi(first_cpu(cpu_online_map), IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); | ||
1123 | } | ||
1124 | |||
1125 | /* | ||
1126 | * C portion of the OS INIT handler | ||
1127 | * | ||
1128 | * Called from ia64_monarch_init_handler | ||
1129 | * | ||
1130 | * Inputs: pointer to pt_regs where processor info was saved. | ||
1131 | * | ||
1132 | * Returns: | ||
1133 | * 0 if SAL must warm boot the System | ||
1134 | * 1 if SAL must return to interrupted context using PAL_MC_RESUME | ||
1135 | * | ||
1136 | */ | ||
1137 | void | ||
1138 | ia64_init_handler (struct pt_regs *pt, struct switch_stack *sw) | ||
1139 | { | ||
1140 | pal_min_state_area_t *ms; | ||
1141 | |||
1142 | oops_in_progress = 1; /* avoid deadlock in printk, but it makes recovery dodgy */ | ||
1143 | console_loglevel = 15; /* make sure printks make it to console */ | ||
1144 | |||
1145 | printk(KERN_INFO "Entered OS INIT handler. PSP=%lx\n", | ||
1146 | ia64_sal_to_os_handoff_state.proc_state_param); | ||
1147 | |||
1148 | /* | ||
1149 | * Address of minstate area provided by PAL is physical, | ||
1150 | * uncacheable (bit 63 set). Convert to Linux virtual | ||
1151 | * address in region 6. | ||
1152 | */ | ||
1153 | ms = (pal_min_state_area_t *)(ia64_sal_to_os_handoff_state.pal_min_state | (6ul<<61)); | ||
1154 | |||
1155 | init_handler_platform(ms, pt, sw); /* call platform specific routines */ | ||
1156 | } | ||
1157 | |||
1158 | static int __init | ||
1159 | ia64_mca_disable_cpe_polling(char *str) | ||
1160 | { | ||
1161 | cpe_poll_enabled = 0; | ||
1162 | return 1; | ||
1163 | } | ||
1164 | |||
1165 | __setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); | ||
1166 | |||
1167 | static struct irqaction cmci_irqaction = { | ||
1168 | .handler = ia64_mca_cmc_int_handler, | ||
1169 | .flags = SA_INTERRUPT, | ||
1170 | .name = "cmc_hndlr" | ||
1171 | }; | ||
1172 | |||
1173 | static struct irqaction cmcp_irqaction = { | ||
1174 | .handler = ia64_mca_cmc_int_caller, | ||
1175 | .flags = SA_INTERRUPT, | ||
1176 | .name = "cmc_poll" | ||
1177 | }; | ||
1178 | |||
1179 | static struct irqaction mca_rdzv_irqaction = { | ||
1180 | .handler = ia64_mca_rendez_int_handler, | ||
1181 | .flags = SA_INTERRUPT, | ||
1182 | .name = "mca_rdzv" | ||
1183 | }; | ||
1184 | |||
1185 | static struct irqaction mca_wkup_irqaction = { | ||
1186 | .handler = ia64_mca_wakeup_int_handler, | ||
1187 | .flags = SA_INTERRUPT, | ||
1188 | .name = "mca_wkup" | ||
1189 | }; | ||
1190 | |||
1191 | #ifdef CONFIG_ACPI | ||
1192 | static struct irqaction mca_cpe_irqaction = { | ||
1193 | .handler = ia64_mca_cpe_int_handler, | ||
1194 | .flags = SA_INTERRUPT, | ||
1195 | .name = "cpe_hndlr" | ||
1196 | }; | ||
1197 | |||
1198 | static struct irqaction mca_cpep_irqaction = { | ||
1199 | .handler = ia64_mca_cpe_int_caller, | ||
1200 | .flags = SA_INTERRUPT, | ||
1201 | .name = "cpe_poll" | ||
1202 | }; | ||
1203 | #endif /* CONFIG_ACPI */ | ||
1204 | |||
1205 | /* Do per-CPU MCA-related initialization. */ | ||
1206 | |||
1207 | void __devinit | ||
1208 | ia64_mca_cpu_init(void *cpu_data) | ||
1209 | { | ||
1210 | void *pal_vaddr; | ||
1211 | |||
1212 | if (smp_processor_id() == 0) { | ||
1213 | void *mca_data; | ||
1214 | int cpu; | ||
1215 | |||
1216 | mca_data = alloc_bootmem(sizeof(struct ia64_mca_cpu) | ||
1217 | * NR_CPUS); | ||
1218 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
1219 | __per_cpu_mca[cpu] = __pa(mca_data); | ||
1220 | mca_data += sizeof(struct ia64_mca_cpu); | ||
1221 | } | ||
1222 | } | ||
1223 | |||
1224 | /* | ||
1225 | * The MCA info structure was allocated earlier and its | ||
1226 | * physical address saved in __per_cpu_mca[cpu]. Copy that | ||
1227 | * address * to ia64_mca_data so we can access it as a per-CPU | ||
1228 | * variable. | ||
1229 | */ | ||
1230 | __get_cpu_var(ia64_mca_data) = __per_cpu_mca[smp_processor_id()]; | ||
1231 | |||
1232 | /* | ||
1233 | * Stash away a copy of the PTE needed to map the per-CPU page. | ||
1234 | * We may need it during MCA recovery. | ||
1235 | */ | ||
1236 | __get_cpu_var(ia64_mca_per_cpu_pte) = | ||
1237 | pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL)); | ||
1238 | |||
1239 | /* | ||
1240 | * Also, stash away a copy of the PAL address and the PTE | ||
1241 | * needed to map it. | ||
1242 | */ | ||
1243 | pal_vaddr = efi_get_pal_addr(); | ||
1244 | if (!pal_vaddr) | ||
1245 | return; | ||
1246 | __get_cpu_var(ia64_mca_pal_base) = | ||
1247 | GRANULEROUNDDOWN((unsigned long) pal_vaddr); | ||
1248 | __get_cpu_var(ia64_mca_pal_pte) = pte_val(mk_pte_phys(__pa(pal_vaddr), | ||
1249 | PAGE_KERNEL)); | ||
1250 | } | ||
1251 | |||
1252 | /* | ||
1253 | * ia64_mca_init | ||
1254 | * | ||
1255 | * Do all the system level mca specific initialization. | ||
1256 | * | ||
1257 | * 1. Register spinloop and wakeup request interrupt vectors | ||
1258 | * | ||
1259 | * 2. Register OS_MCA handler entry point | ||
1260 | * | ||
1261 | * 3. Register OS_INIT handler entry point | ||
1262 | * | ||
1263 | * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS. | ||
1264 | * | ||
1265 | * Note that this initialization is done very early before some kernel | ||
1266 | * services are available. | ||
1267 | * | ||
1268 | * Inputs : None | ||
1269 | * | ||
1270 | * Outputs : None | ||
1271 | */ | ||
1272 | void __init | ||
1273 | ia64_mca_init(void) | ||
1274 | { | ||
1275 | ia64_fptr_t *mon_init_ptr = (ia64_fptr_t *)ia64_monarch_init_handler; | ||
1276 | ia64_fptr_t *slave_init_ptr = (ia64_fptr_t *)ia64_slave_init_handler; | ||
1277 | ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; | ||
1278 | int i; | ||
1279 | s64 rc; | ||
1280 | struct ia64_sal_retval isrv; | ||
1281 | u64 timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ | ||
1282 | |||
1283 | IA64_MCA_DEBUG("%s: begin\n", __FUNCTION__); | ||
1284 | |||
1285 | /* Clear the Rendez checkin flag for all cpus */ | ||
1286 | for(i = 0 ; i < NR_CPUS; i++) | ||
1287 | ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; | ||
1288 | |||
1289 | /* | ||
1290 | * Register the rendezvous spinloop and wakeup mechanism with SAL | ||
1291 | */ | ||
1292 | |||
1293 | /* Register the rendezvous interrupt vector with SAL */ | ||
1294 | while (1) { | ||
1295 | isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, | ||
1296 | SAL_MC_PARAM_MECHANISM_INT, | ||
1297 | IA64_MCA_RENDEZ_VECTOR, | ||
1298 | timeout, | ||
1299 | SAL_MC_PARAM_RZ_ALWAYS); | ||
1300 | rc = isrv.status; | ||
1301 | if (rc == 0) | ||
1302 | break; | ||
1303 | if (rc == -2) { | ||
1304 | printk(KERN_INFO "Increasing MCA rendezvous timeout from " | ||
1305 | "%ld to %ld milliseconds\n", timeout, isrv.v0); | ||
1306 | timeout = isrv.v0; | ||
1307 | continue; | ||
1308 | } | ||
1309 | printk(KERN_ERR "Failed to register rendezvous interrupt " | ||
1310 | "with SAL (status %ld)\n", rc); | ||
1311 | return; | ||
1312 | } | ||
1313 | |||
1314 | /* Register the wakeup interrupt vector with SAL */ | ||
1315 | isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, | ||
1316 | SAL_MC_PARAM_MECHANISM_INT, | ||
1317 | IA64_MCA_WAKEUP_VECTOR, | ||
1318 | 0, 0); | ||
1319 | rc = isrv.status; | ||
1320 | if (rc) { | ||
1321 | printk(KERN_ERR "Failed to register wakeup interrupt with SAL " | ||
1322 | "(status %ld)\n", rc); | ||
1323 | return; | ||
1324 | } | ||
1325 | |||
1326 | IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __FUNCTION__); | ||
1327 | |||
1328 | ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp); | ||
1329 | /* | ||
1330 | * XXX - disable SAL checksum by setting size to 0; should be | ||
1331 | * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch); | ||
1332 | */ | ||
1333 | ia64_mc_info.imi_mca_handler_size = 0; | ||
1334 | |||
1335 | /* Register the os mca handler with SAL */ | ||
1336 | if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, | ||
1337 | ia64_mc_info.imi_mca_handler, | ||
1338 | ia64_tpa(mca_hldlr_ptr->gp), | ||
1339 | ia64_mc_info.imi_mca_handler_size, | ||
1340 | 0, 0, 0))) | ||
1341 | { | ||
1342 | printk(KERN_ERR "Failed to register OS MCA handler with SAL " | ||
1343 | "(status %ld)\n", rc); | ||
1344 | return; | ||
1345 | } | ||
1346 | |||
1347 | IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __FUNCTION__, | ||
1348 | ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp)); | ||
1349 | |||
1350 | /* | ||
1351 | * XXX - disable SAL checksum by setting size to 0, should be | ||
1352 | * size of the actual init handler in mca_asm.S. | ||
1353 | */ | ||
1354 | ia64_mc_info.imi_monarch_init_handler = ia64_tpa(mon_init_ptr->fp); | ||
1355 | ia64_mc_info.imi_monarch_init_handler_size = 0; | ||
1356 | ia64_mc_info.imi_slave_init_handler = ia64_tpa(slave_init_ptr->fp); | ||
1357 | ia64_mc_info.imi_slave_init_handler_size = 0; | ||
1358 | |||
1359 | IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __FUNCTION__, | ||
1360 | ia64_mc_info.imi_monarch_init_handler); | ||
1361 | |||
1362 | /* Register the os init handler with SAL */ | ||
1363 | if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, | ||
1364 | ia64_mc_info.imi_monarch_init_handler, | ||
1365 | ia64_tpa(ia64_getreg(_IA64_REG_GP)), | ||
1366 | ia64_mc_info.imi_monarch_init_handler_size, | ||
1367 | ia64_mc_info.imi_slave_init_handler, | ||
1368 | ia64_tpa(ia64_getreg(_IA64_REG_GP)), | ||
1369 | ia64_mc_info.imi_slave_init_handler_size))) | ||
1370 | { | ||
1371 | printk(KERN_ERR "Failed to register m/s INIT handlers with SAL " | ||
1372 | "(status %ld)\n", rc); | ||
1373 | return; | ||
1374 | } | ||
1375 | |||
1376 | IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __FUNCTION__); | ||
1377 | |||
1378 | /* | ||
1379 | * Configure the CMCI/P vector and handler. Interrupts for CMC are | ||
1380 | * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). | ||
1381 | */ | ||
1382 | register_percpu_irq(IA64_CMC_VECTOR, &cmci_irqaction); | ||
1383 | register_percpu_irq(IA64_CMCP_VECTOR, &cmcp_irqaction); | ||
1384 | ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */ | ||
1385 | |||
1386 | /* Setup the MCA rendezvous interrupt vector */ | ||
1387 | register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, &mca_rdzv_irqaction); | ||
1388 | |||
1389 | /* Setup the MCA wakeup interrupt vector */ | ||
1390 | register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, &mca_wkup_irqaction); | ||
1391 | |||
1392 | #ifdef CONFIG_ACPI | ||
1393 | /* Setup the CPEI/P vector and handler */ | ||
1394 | cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); | ||
1395 | register_percpu_irq(IA64_CPEP_VECTOR, &mca_cpep_irqaction); | ||
1396 | #endif | ||
1397 | |||
1398 | /* Initialize the areas set aside by the OS to buffer the | ||
1399 | * platform/processor error states for MCA/INIT/CMC | ||
1400 | * handling. | ||
1401 | */ | ||
1402 | ia64_log_init(SAL_INFO_TYPE_MCA); | ||
1403 | ia64_log_init(SAL_INFO_TYPE_INIT); | ||
1404 | ia64_log_init(SAL_INFO_TYPE_CMC); | ||
1405 | ia64_log_init(SAL_INFO_TYPE_CPE); | ||
1406 | |||
1407 | mca_init = 1; | ||
1408 | printk(KERN_INFO "MCA related initialization done\n"); | ||
1409 | } | ||
1410 | |||
1411 | /* | ||
1412 | * ia64_mca_late_init | ||
1413 | * | ||
1414 | * Opportunity to setup things that require initialization later | ||
1415 | * than ia64_mca_init. Setup a timer to poll for CPEs if the | ||
1416 | * platform doesn't support an interrupt driven mechanism. | ||
1417 | * | ||
1418 | * Inputs : None | ||
1419 | * Outputs : Status | ||
1420 | */ | ||
1421 | static int __init | ||
1422 | ia64_mca_late_init(void) | ||
1423 | { | ||
1424 | if (!mca_init) | ||
1425 | return 0; | ||
1426 | |||
1427 | /* Setup the CMCI/P vector and handler */ | ||
1428 | init_timer(&cmc_poll_timer); | ||
1429 | cmc_poll_timer.function = ia64_mca_cmc_poll; | ||
1430 | |||
1431 | /* Unmask/enable the vector */ | ||
1432 | cmc_polling_enabled = 0; | ||
1433 | schedule_work(&cmc_enable_work); | ||
1434 | |||
1435 | IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __FUNCTION__); | ||
1436 | |||
1437 | #ifdef CONFIG_ACPI | ||
1438 | /* Setup the CPEI/P vector and handler */ | ||
1439 | init_timer(&cpe_poll_timer); | ||
1440 | cpe_poll_timer.function = ia64_mca_cpe_poll; | ||
1441 | |||
1442 | { | ||
1443 | irq_desc_t *desc; | ||
1444 | unsigned int irq; | ||
1445 | |||
1446 | if (cpe_vector >= 0) { | ||
1447 | /* If platform supports CPEI, enable the irq. */ | ||
1448 | cpe_poll_enabled = 0; | ||
1449 | for (irq = 0; irq < NR_IRQS; ++irq) | ||
1450 | if (irq_to_vector(irq) == cpe_vector) { | ||
1451 | desc = irq_descp(irq); | ||
1452 | desc->status |= IRQ_PER_CPU; | ||
1453 | setup_irq(irq, &mca_cpe_irqaction); | ||
1454 | } | ||
1455 | ia64_mca_register_cpev(cpe_vector); | ||
1456 | IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__); | ||
1457 | } else { | ||
1458 | /* If platform doesn't support CPEI, get the timer going. */ | ||
1459 | if (cpe_poll_enabled) { | ||
1460 | ia64_mca_cpe_poll(0UL); | ||
1461 | IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __FUNCTION__); | ||
1462 | } | ||
1463 | } | ||
1464 | } | ||
1465 | #endif | ||
1466 | |||
1467 | return 0; | ||
1468 | } | ||
1469 | |||
1470 | device_initcall(ia64_mca_late_init); | ||
diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S new file mode 100644 index 000000000000..cf3f8014f9ad --- /dev/null +++ b/arch/ia64/kernel/mca_asm.S | |||
@@ -0,0 +1,928 @@ | |||
1 | // | ||
2 | // assembly portion of the IA64 MCA handling | ||
3 | // | ||
4 | // Mods by cfleck to integrate into kernel build | ||
5 | // 00/03/15 davidm Added various stop bits to get a clean compile | ||
6 | // | ||
7 | // 00/03/29 cfleck Added code to save INIT handoff state in pt_regs format, switch to temp | ||
8 | // kstack, switch modes, jump to C INIT handler | ||
9 | // | ||
10 | // 02/01/04 J.Hall <jenna.s.hall@intel.com> | ||
11 | // Before entering virtual mode code: | ||
12 | // 1. Check for TLB CPU error | ||
13 | // 2. Restore current thread pointer to kr6 | ||
14 | // 3. Move stack ptr 16 bytes to conform to C calling convention | ||
15 | // | ||
16 | // 04/11/12 Russ Anderson <rja@sgi.com> | ||
17 | // Added per cpu MCA/INIT stack save areas. | ||
18 | // | ||
19 | #include <linux/config.h> | ||
20 | #include <linux/threads.h> | ||
21 | |||
22 | #include <asm/asmmacro.h> | ||
23 | #include <asm/pgtable.h> | ||
24 | #include <asm/processor.h> | ||
25 | #include <asm/mca_asm.h> | ||
26 | #include <asm/mca.h> | ||
27 | |||
28 | /* | ||
29 | * When we get a machine check, the kernel stack pointer is no longer | ||
30 | * valid, so we need to set a new stack pointer. | ||
31 | */ | ||
32 | #define MINSTATE_PHYS /* Make sure stack access is physical for MINSTATE */ | ||
33 | |||
34 | /* | ||
35 | * Needed for return context to SAL | ||
36 | */ | ||
37 | #define IA64_MCA_SAME_CONTEXT 0 | ||
38 | #define IA64_MCA_COLD_BOOT -2 | ||
39 | |||
40 | #include "minstate.h" | ||
41 | |||
42 | /* | ||
43 | * SAL_TO_OS_MCA_HANDOFF_STATE (SAL 3.0 spec) | ||
44 | * 1. GR1 = OS GP | ||
45 | * 2. GR8 = PAL_PROC physical address | ||
46 | * 3. GR9 = SAL_PROC physical address | ||
47 | * 4. GR10 = SAL GP (physical) | ||
48 | * 5. GR11 = Rendez state | ||
49 | * 6. GR12 = Return address to location within SAL_CHECK | ||
50 | */ | ||
51 | #define SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(_tmp) \ | ||
52 | LOAD_PHYSICAL(p0, _tmp, ia64_sal_to_os_handoff_state);; \ | ||
53 | st8 [_tmp]=r1,0x08;; \ | ||
54 | st8 [_tmp]=r8,0x08;; \ | ||
55 | st8 [_tmp]=r9,0x08;; \ | ||
56 | st8 [_tmp]=r10,0x08;; \ | ||
57 | st8 [_tmp]=r11,0x08;; \ | ||
58 | st8 [_tmp]=r12,0x08;; \ | ||
59 | st8 [_tmp]=r17,0x08;; \ | ||
60 | st8 [_tmp]=r18,0x08 | ||
61 | |||
62 | /* | ||
63 | * OS_MCA_TO_SAL_HANDOFF_STATE (SAL 3.0 spec) | ||
64 | * (p6) is executed if we never entered virtual mode (TLB error) | ||
65 | * (p7) is executed if we entered virtual mode as expected (normal case) | ||
66 | * 1. GR8 = OS_MCA return status | ||
67 | * 2. GR9 = SAL GP (physical) | ||
68 | * 3. GR10 = 0/1 returning same/new context | ||
69 | * 4. GR22 = New min state save area pointer | ||
70 | * returns ptr to SAL rtn save loc in _tmp | ||
71 | */ | ||
72 | #define OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(_tmp) \ | ||
73 | movl _tmp=ia64_os_to_sal_handoff_state;; \ | ||
74 | DATA_VA_TO_PA(_tmp);; \ | ||
75 | ld8 r8=[_tmp],0x08;; \ | ||
76 | ld8 r9=[_tmp],0x08;; \ | ||
77 | ld8 r10=[_tmp],0x08;; \ | ||
78 | ld8 r22=[_tmp],0x08;; | ||
79 | // now _tmp is pointing to SAL rtn save location | ||
80 | |||
81 | /* | ||
82 | * COLD_BOOT_HANDOFF_STATE() sets ia64_mca_os_to_sal_state | ||
83 | * imots_os_status=IA64_MCA_COLD_BOOT | ||
84 | * imots_sal_gp=SAL GP | ||
85 | * imots_context=IA64_MCA_SAME_CONTEXT | ||
86 | * imots_new_min_state=Min state save area pointer | ||
87 | * imots_sal_check_ra=Return address to location within SAL_CHECK | ||
88 | * | ||
89 | */ | ||
90 | #define COLD_BOOT_HANDOFF_STATE(sal_to_os_handoff,os_to_sal_handoff,tmp)\ | ||
91 | movl tmp=IA64_MCA_COLD_BOOT; \ | ||
92 | movl sal_to_os_handoff=__pa(ia64_sal_to_os_handoff_state); \ | ||
93 | movl os_to_sal_handoff=__pa(ia64_os_to_sal_handoff_state);; \ | ||
94 | st8 [os_to_sal_handoff]=tmp,8;; \ | ||
95 | ld8 tmp=[sal_to_os_handoff],48;; \ | ||
96 | st8 [os_to_sal_handoff]=tmp,8;; \ | ||
97 | movl tmp=IA64_MCA_SAME_CONTEXT;; \ | ||
98 | st8 [os_to_sal_handoff]=tmp,8;; \ | ||
99 | ld8 tmp=[sal_to_os_handoff],-8;; \ | ||
100 | st8 [os_to_sal_handoff]=tmp,8;; \ | ||
101 | ld8 tmp=[sal_to_os_handoff];; \ | ||
102 | st8 [os_to_sal_handoff]=tmp;; | ||
103 | |||
104 | #define GET_IA64_MCA_DATA(reg) \ | ||
105 | GET_THIS_PADDR(reg, ia64_mca_data) \ | ||
106 | ;; \ | ||
107 | ld8 reg=[reg] | ||
108 | |||
109 | .global ia64_os_mca_dispatch | ||
110 | .global ia64_os_mca_dispatch_end | ||
111 | .global ia64_sal_to_os_handoff_state | ||
112 | .global ia64_os_to_sal_handoff_state | ||
113 | |||
114 | .text | ||
115 | .align 16 | ||
116 | |||
117 | ia64_os_mca_dispatch: | ||
118 | |||
119 | // Serialize all MCA processing | ||
120 | mov r3=1;; | ||
121 | LOAD_PHYSICAL(p0,r2,ia64_mca_serialize);; | ||
122 | ia64_os_mca_spin: | ||
123 | xchg8 r4=[r2],r3;; | ||
124 | cmp.ne p6,p0=r4,r0 | ||
125 | (p6) br ia64_os_mca_spin | ||
126 | |||
127 | // Save the SAL to OS MCA handoff state as defined | ||
128 | // by SAL SPEC 3.0 | ||
129 | // NOTE : The order in which the state gets saved | ||
130 | // is dependent on the way the C-structure | ||
131 | // for ia64_mca_sal_to_os_state_t has been | ||
132 | // defined in include/asm/mca.h | ||
133 | SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) | ||
134 | ;; | ||
135 | |||
136 | // LOG PROCESSOR STATE INFO FROM HERE ON.. | ||
137 | begin_os_mca_dump: | ||
138 | br ia64_os_mca_proc_state_dump;; | ||
139 | |||
140 | ia64_os_mca_done_dump: | ||
141 | |||
142 | LOAD_PHYSICAL(p0,r16,ia64_sal_to_os_handoff_state+56) | ||
143 | ;; | ||
144 | ld8 r18=[r16] // Get processor state parameter on existing PALE_CHECK. | ||
145 | ;; | ||
146 | tbit.nz p6,p7=r18,60 | ||
147 | (p7) br.spnt done_tlb_purge_and_reload | ||
148 | |||
149 | // The following code purges TC and TR entries. Then reload all TC entries. | ||
150 | // Purge percpu data TC entries. | ||
151 | begin_tlb_purge_and_reload: | ||
152 | |||
153 | #define O(member) IA64_CPUINFO_##member##_OFFSET | ||
154 | |||
155 | GET_THIS_PADDR(r2, cpu_info) // load phys addr of cpu_info into r2 | ||
156 | ;; | ||
157 | addl r17=O(PTCE_STRIDE),r2 | ||
158 | addl r2=O(PTCE_BASE),r2 | ||
159 | ;; | ||
160 | ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base | ||
161 | ld4 r19=[r2],4 // r19=ptce_count[0] | ||
162 | ld4 r21=[r17],4 // r21=ptce_stride[0] | ||
163 | ;; | ||
164 | ld4 r20=[r2] // r20=ptce_count[1] | ||
165 | ld4 r22=[r17] // r22=ptce_stride[1] | ||
166 | mov r24=0 | ||
167 | ;; | ||
168 | adds r20=-1,r20 | ||
169 | ;; | ||
170 | #undef O | ||
171 | |||
172 | 2: | ||
173 | cmp.ltu p6,p7=r24,r19 | ||
174 | (p7) br.cond.dpnt.few 4f | ||
175 | mov ar.lc=r20 | ||
176 | 3: | ||
177 | ptc.e r18 | ||
178 | ;; | ||
179 | add r18=r22,r18 | ||
180 | br.cloop.sptk.few 3b | ||
181 | ;; | ||
182 | add r18=r21,r18 | ||
183 | add r24=1,r24 | ||
184 | ;; | ||
185 | br.sptk.few 2b | ||
186 | 4: | ||
187 | srlz.i // srlz.i implies srlz.d | ||
188 | ;; | ||
189 | |||
190 | // Now purge addresses formerly mapped by TR registers | ||
191 | // 1. Purge ITR&DTR for kernel. | ||
192 | movl r16=KERNEL_START | ||
193 | mov r18=KERNEL_TR_PAGE_SHIFT<<2 | ||
194 | ;; | ||
195 | ptr.i r16, r18 | ||
196 | ptr.d r16, r18 | ||
197 | ;; | ||
198 | srlz.i | ||
199 | ;; | ||
200 | srlz.d | ||
201 | ;; | ||
202 | // 2. Purge DTR for PERCPU data. | ||
203 | movl r16=PERCPU_ADDR | ||
204 | mov r18=PERCPU_PAGE_SHIFT<<2 | ||
205 | ;; | ||
206 | ptr.d r16,r18 | ||
207 | ;; | ||
208 | srlz.d | ||
209 | ;; | ||
210 | // 3. Purge ITR for PAL code. | ||
211 | GET_THIS_PADDR(r2, ia64_mca_pal_base) | ||
212 | ;; | ||
213 | ld8 r16=[r2] | ||
214 | mov r18=IA64_GRANULE_SHIFT<<2 | ||
215 | ;; | ||
216 | ptr.i r16,r18 | ||
217 | ;; | ||
218 | srlz.i | ||
219 | ;; | ||
220 | // 4. Purge DTR for stack. | ||
221 | mov r16=IA64_KR(CURRENT_STACK) | ||
222 | ;; | ||
223 | shl r16=r16,IA64_GRANULE_SHIFT | ||
224 | movl r19=PAGE_OFFSET | ||
225 | ;; | ||
226 | add r16=r19,r16 | ||
227 | mov r18=IA64_GRANULE_SHIFT<<2 | ||
228 | ;; | ||
229 | ptr.d r16,r18 | ||
230 | ;; | ||
231 | srlz.i | ||
232 | ;; | ||
233 | // Finally reload the TR registers. | ||
234 | // 1. Reload DTR/ITR registers for kernel. | ||
235 | mov r18=KERNEL_TR_PAGE_SHIFT<<2 | ||
236 | movl r17=KERNEL_START | ||
237 | ;; | ||
238 | mov cr.itir=r18 | ||
239 | mov cr.ifa=r17 | ||
240 | mov r16=IA64_TR_KERNEL | ||
241 | mov r19=ip | ||
242 | movl r18=PAGE_KERNEL | ||
243 | ;; | ||
244 | dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT | ||
245 | ;; | ||
246 | or r18=r17,r18 | ||
247 | ;; | ||
248 | itr.i itr[r16]=r18 | ||
249 | ;; | ||
250 | itr.d dtr[r16]=r18 | ||
251 | ;; | ||
252 | srlz.i | ||
253 | srlz.d | ||
254 | ;; | ||
255 | // 2. Reload DTR register for PERCPU data. | ||
256 | GET_THIS_PADDR(r2, ia64_mca_per_cpu_pte) | ||
257 | ;; | ||
258 | movl r16=PERCPU_ADDR // vaddr | ||
259 | movl r18=PERCPU_PAGE_SHIFT<<2 | ||
260 | ;; | ||
261 | mov cr.itir=r18 | ||
262 | mov cr.ifa=r16 | ||
263 | ;; | ||
264 | ld8 r18=[r2] // load per-CPU PTE | ||
265 | mov r16=IA64_TR_PERCPU_DATA; | ||
266 | ;; | ||
267 | itr.d dtr[r16]=r18 | ||
268 | ;; | ||
269 | srlz.d | ||
270 | ;; | ||
271 | // 3. Reload ITR for PAL code. | ||
272 | GET_THIS_PADDR(r2, ia64_mca_pal_pte) | ||
273 | ;; | ||
274 | ld8 r18=[r2] // load PAL PTE | ||
275 | ;; | ||
276 | GET_THIS_PADDR(r2, ia64_mca_pal_base) | ||
277 | ;; | ||
278 | ld8 r16=[r2] // load PAL vaddr | ||
279 | mov r19=IA64_GRANULE_SHIFT<<2 | ||
280 | ;; | ||
281 | mov cr.itir=r19 | ||
282 | mov cr.ifa=r16 | ||
283 | mov r20=IA64_TR_PALCODE | ||
284 | ;; | ||
285 | itr.i itr[r20]=r18 | ||
286 | ;; | ||
287 | srlz.i | ||
288 | ;; | ||
289 | // 4. Reload DTR for stack. | ||
290 | mov r16=IA64_KR(CURRENT_STACK) | ||
291 | ;; | ||
292 | shl r16=r16,IA64_GRANULE_SHIFT | ||
293 | movl r19=PAGE_OFFSET | ||
294 | ;; | ||
295 | add r18=r19,r16 | ||
296 | movl r20=PAGE_KERNEL | ||
297 | ;; | ||
298 | add r16=r20,r16 | ||
299 | mov r19=IA64_GRANULE_SHIFT<<2 | ||
300 | ;; | ||
301 | mov cr.itir=r19 | ||
302 | mov cr.ifa=r18 | ||
303 | mov r20=IA64_TR_CURRENT_STACK | ||
304 | ;; | ||
305 | itr.d dtr[r20]=r16 | ||
306 | ;; | ||
307 | srlz.d | ||
308 | ;; | ||
309 | br.sptk.many done_tlb_purge_and_reload | ||
310 | err: | ||
311 | COLD_BOOT_HANDOFF_STATE(r20,r21,r22) | ||
312 | br.sptk.many ia64_os_mca_done_restore | ||
313 | |||
314 | done_tlb_purge_and_reload: | ||
315 | |||
316 | // Setup new stack frame for OS_MCA handling | ||
317 | GET_IA64_MCA_DATA(r2) | ||
318 | ;; | ||
319 | add r3 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2 | ||
320 | add r2 = IA64_MCA_CPU_RBSTORE_OFFSET, r2 | ||
321 | ;; | ||
322 | rse_switch_context(r6,r3,r2);; // RSC management in this new context | ||
323 | |||
324 | GET_IA64_MCA_DATA(r2) | ||
325 | ;; | ||
326 | add r2 = IA64_MCA_CPU_STACK_OFFSET+IA64_MCA_STACK_SIZE-16, r2 | ||
327 | ;; | ||
328 | mov r12=r2 // establish new stack-pointer | ||
329 | |||
330 | // Enter virtual mode from physical mode | ||
331 | VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) | ||
332 | ia64_os_mca_virtual_begin: | ||
333 | |||
334 | // Call virtual mode handler | ||
335 | movl r2=ia64_mca_ucmc_handler;; | ||
336 | mov b6=r2;; | ||
337 | br.call.sptk.many b0=b6;; | ||
338 | .ret0: | ||
339 | // Revert back to physical mode before going back to SAL | ||
340 | PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4) | ||
341 | ia64_os_mca_virtual_end: | ||
342 | |||
343 | // restore the original stack frame here | ||
344 | GET_IA64_MCA_DATA(r2) | ||
345 | ;; | ||
346 | add r2 = IA64_MCA_CPU_STACKFRAME_OFFSET, r2 | ||
347 | ;; | ||
348 | movl r4=IA64_PSR_MC | ||
349 | ;; | ||
350 | rse_return_context(r4,r3,r2) // switch from interrupt context for RSE | ||
351 | |||
352 | // let us restore all the registers from our PSI structure | ||
353 | mov r8=gp | ||
354 | ;; | ||
355 | begin_os_mca_restore: | ||
356 | br ia64_os_mca_proc_state_restore;; | ||
357 | |||
358 | ia64_os_mca_done_restore: | ||
359 | OS_MCA_TO_SAL_HANDOFF_STATE_RESTORE(r2);; | ||
360 | // branch back to SALE_CHECK | ||
361 | ld8 r3=[r2];; | ||
362 | mov b0=r3;; // SAL_CHECK return address | ||
363 | |||
364 | // release lock | ||
365 | movl r3=ia64_mca_serialize;; | ||
366 | DATA_VA_TO_PA(r3);; | ||
367 | st8.rel [r3]=r0 | ||
368 | |||
369 | br b0 | ||
370 | ;; | ||
371 | ia64_os_mca_dispatch_end: | ||
372 | //EndMain////////////////////////////////////////////////////////////////////// | ||
373 | |||
374 | |||
375 | //++ | ||
376 | // Name: | ||
377 | // ia64_os_mca_proc_state_dump() | ||
378 | // | ||
379 | // Stub Description: | ||
380 | // | ||
381 | // This stub dumps the processor state during MCHK to a data area | ||
382 | // | ||
383 | //-- | ||
384 | |||
385 | ia64_os_mca_proc_state_dump: | ||
386 | // Save bank 1 GRs 16-31 which will be used by c-language code when we switch | ||
387 | // to virtual addressing mode. | ||
388 | GET_IA64_MCA_DATA(r2) | ||
389 | ;; | ||
390 | add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2 | ||
391 | ;; | ||
392 | // save ar.NaT | ||
393 | mov r5=ar.unat // ar.unat | ||
394 | |||
395 | // save banked GRs 16-31 along with NaT bits | ||
396 | bsw.1;; | ||
397 | st8.spill [r2]=r16,8;; | ||
398 | st8.spill [r2]=r17,8;; | ||
399 | st8.spill [r2]=r18,8;; | ||
400 | st8.spill [r2]=r19,8;; | ||
401 | st8.spill [r2]=r20,8;; | ||
402 | st8.spill [r2]=r21,8;; | ||
403 | st8.spill [r2]=r22,8;; | ||
404 | st8.spill [r2]=r23,8;; | ||
405 | st8.spill [r2]=r24,8;; | ||
406 | st8.spill [r2]=r25,8;; | ||
407 | st8.spill [r2]=r26,8;; | ||
408 | st8.spill [r2]=r27,8;; | ||
409 | st8.spill [r2]=r28,8;; | ||
410 | st8.spill [r2]=r29,8;; | ||
411 | st8.spill [r2]=r30,8;; | ||
412 | st8.spill [r2]=r31,8;; | ||
413 | |||
414 | mov r4=ar.unat;; | ||
415 | st8 [r2]=r4,8 // save User NaT bits for r16-r31 | ||
416 | mov ar.unat=r5 // restore original unat | ||
417 | bsw.0;; | ||
418 | |||
419 | //save BRs | ||
420 | add r4=8,r2 // duplicate r2 in r4 | ||
421 | add r6=2*8,r2 // duplicate r2 in r4 | ||
422 | |||
423 | mov r3=b0 | ||
424 | mov r5=b1 | ||
425 | mov r7=b2;; | ||
426 | st8 [r2]=r3,3*8 | ||
427 | st8 [r4]=r5,3*8 | ||
428 | st8 [r6]=r7,3*8;; | ||
429 | |||
430 | mov r3=b3 | ||
431 | mov r5=b4 | ||
432 | mov r7=b5;; | ||
433 | st8 [r2]=r3,3*8 | ||
434 | st8 [r4]=r5,3*8 | ||
435 | st8 [r6]=r7,3*8;; | ||
436 | |||
437 | mov r3=b6 | ||
438 | mov r5=b7;; | ||
439 | st8 [r2]=r3,2*8 | ||
440 | st8 [r4]=r5,2*8;; | ||
441 | |||
442 | cSaveCRs: | ||
443 | // save CRs | ||
444 | add r4=8,r2 // duplicate r2 in r4 | ||
445 | add r6=2*8,r2 // duplicate r2 in r4 | ||
446 | |||
447 | mov r3=cr.dcr | ||
448 | mov r5=cr.itm | ||
449 | mov r7=cr.iva;; | ||
450 | |||
451 | st8 [r2]=r3,8*8 | ||
452 | st8 [r4]=r5,3*8 | ||
453 | st8 [r6]=r7,3*8;; // 48 byte rements | ||
454 | |||
455 | mov r3=cr.pta;; | ||
456 | st8 [r2]=r3,8*8;; // 64 byte rements | ||
457 | |||
458 | // if PSR.ic=0, reading interruption registers causes an illegal operation fault | ||
459 | mov r3=psr;; | ||
460 | tbit.nz.unc p6,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test | ||
461 | (p6) st8 [r2]=r0,9*8+160 // increment by 232 byte inc. | ||
462 | begin_skip_intr_regs: | ||
463 | (p6) br SkipIntrRegs;; | ||
464 | |||
465 | add r4=8,r2 // duplicate r2 in r4 | ||
466 | add r6=2*8,r2 // duplicate r2 in r6 | ||
467 | |||
468 | mov r3=cr.ipsr | ||
469 | mov r5=cr.isr | ||
470 | mov r7=r0;; | ||
471 | st8 [r2]=r3,3*8 | ||
472 | st8 [r4]=r5,3*8 | ||
473 | st8 [r6]=r7,3*8;; | ||
474 | |||
475 | mov r3=cr.iip | ||
476 | mov r5=cr.ifa | ||
477 | mov r7=cr.itir;; | ||
478 | st8 [r2]=r3,3*8 | ||
479 | st8 [r4]=r5,3*8 | ||
480 | st8 [r6]=r7,3*8;; | ||
481 | |||
482 | mov r3=cr.iipa | ||
483 | mov r5=cr.ifs | ||
484 | mov r7=cr.iim;; | ||
485 | st8 [r2]=r3,3*8 | ||
486 | st8 [r4]=r5,3*8 | ||
487 | st8 [r6]=r7,3*8;; | ||
488 | |||
489 | mov r3=cr25;; // cr.iha | ||
490 | st8 [r2]=r3,160;; // 160 byte rement | ||
491 | |||
492 | SkipIntrRegs: | ||
493 | st8 [r2]=r0,152;; // another 152 byte . | ||
494 | |||
495 | add r4=8,r2 // duplicate r2 in r4 | ||
496 | add r6=2*8,r2 // duplicate r2 in r6 | ||
497 | |||
498 | mov r3=cr.lid | ||
499 | // mov r5=cr.ivr // cr.ivr, don't read it | ||
500 | mov r7=cr.tpr;; | ||
501 | st8 [r2]=r3,3*8 | ||
502 | st8 [r4]=r5,3*8 | ||
503 | st8 [r6]=r7,3*8;; | ||
504 | |||
505 | mov r3=r0 // cr.eoi => cr67 | ||
506 | mov r5=r0 // cr.irr0 => cr68 | ||
507 | mov r7=r0;; // cr.irr1 => cr69 | ||
508 | st8 [r2]=r3,3*8 | ||
509 | st8 [r4]=r5,3*8 | ||
510 | st8 [r6]=r7,3*8;; | ||
511 | |||
512 | mov r3=r0 // cr.irr2 => cr70 | ||
513 | mov r5=r0 // cr.irr3 => cr71 | ||
514 | mov r7=cr.itv;; | ||
515 | st8 [r2]=r3,3*8 | ||
516 | st8 [r4]=r5,3*8 | ||
517 | st8 [r6]=r7,3*8;; | ||
518 | |||
519 | mov r3=cr.pmv | ||
520 | mov r5=cr.cmcv;; | ||
521 | st8 [r2]=r3,7*8 | ||
522 | st8 [r4]=r5,7*8;; | ||
523 | |||
524 | mov r3=r0 // cr.lrr0 => cr80 | ||
525 | mov r5=r0;; // cr.lrr1 => cr81 | ||
526 | st8 [r2]=r3,23*8 | ||
527 | st8 [r4]=r5,23*8;; | ||
528 | |||
529 | adds r2=25*8,r2;; | ||
530 | |||
531 | cSaveARs: | ||
532 | // save ARs | ||
533 | add r4=8,r2 // duplicate r2 in r4 | ||
534 | add r6=2*8,r2 // duplicate r2 in r6 | ||
535 | |||
536 | mov r3=ar.k0 | ||
537 | mov r5=ar.k1 | ||
538 | mov r7=ar.k2;; | ||
539 | st8 [r2]=r3,3*8 | ||
540 | st8 [r4]=r5,3*8 | ||
541 | st8 [r6]=r7,3*8;; | ||
542 | |||
543 | mov r3=ar.k3 | ||
544 | mov r5=ar.k4 | ||
545 | mov r7=ar.k5;; | ||
546 | st8 [r2]=r3,3*8 | ||
547 | st8 [r4]=r5,3*8 | ||
548 | st8 [r6]=r7,3*8;; | ||
549 | |||
550 | mov r3=ar.k6 | ||
551 | mov r5=ar.k7 | ||
552 | mov r7=r0;; // ar.kr8 | ||
553 | st8 [r2]=r3,10*8 | ||
554 | st8 [r4]=r5,10*8 | ||
555 | st8 [r6]=r7,10*8;; // rement by 72 bytes | ||
556 | |||
557 | mov r3=ar.rsc | ||
558 | mov ar.rsc=r0 // put RSE in enforced lazy mode | ||
559 | mov r5=ar.bsp | ||
560 | ;; | ||
561 | mov r7=ar.bspstore;; | ||
562 | st8 [r2]=r3,3*8 | ||
563 | st8 [r4]=r5,3*8 | ||
564 | st8 [r6]=r7,3*8;; | ||
565 | |||
566 | mov r3=ar.rnat;; | ||
567 | st8 [r2]=r3,8*13 // increment by 13x8 bytes | ||
568 | |||
569 | mov r3=ar.ccv;; | ||
570 | st8 [r2]=r3,8*4 | ||
571 | |||
572 | mov r3=ar.unat;; | ||
573 | st8 [r2]=r3,8*4 | ||
574 | |||
575 | mov r3=ar.fpsr;; | ||
576 | st8 [r2]=r3,8*4 | ||
577 | |||
578 | mov r3=ar.itc;; | ||
579 | st8 [r2]=r3,160 // 160 | ||
580 | |||
581 | mov r3=ar.pfs;; | ||
582 | st8 [r2]=r3,8 | ||
583 | |||
584 | mov r3=ar.lc;; | ||
585 | st8 [r2]=r3,8 | ||
586 | |||
587 | mov r3=ar.ec;; | ||
588 | st8 [r2]=r3 | ||
589 | add r2=8*62,r2 //padding | ||
590 | |||
591 | // save RRs | ||
592 | mov ar.lc=0x08-1 | ||
593 | movl r4=0x00;; | ||
594 | |||
595 | cStRR: | ||
596 | dep.z r5=r4,61,3;; | ||
597 | mov r3=rr[r5];; | ||
598 | st8 [r2]=r3,8 | ||
599 | add r4=1,r4 | ||
600 | br.cloop.sptk.few cStRR | ||
601 | ;; | ||
602 | end_os_mca_dump: | ||
603 | br ia64_os_mca_done_dump;; | ||
604 | |||
605 | //EndStub////////////////////////////////////////////////////////////////////// | ||
606 | |||
607 | |||
608 | //++ | ||
609 | // Name: | ||
610 | // ia64_os_mca_proc_state_restore() | ||
611 | // | ||
612 | // Stub Description: | ||
613 | // | ||
614 | // This is a stub to restore the saved processor state during MCHK | ||
615 | // | ||
616 | //-- | ||
617 | |||
618 | ia64_os_mca_proc_state_restore: | ||
619 | |||
620 | // Restore bank1 GR16-31 | ||
621 | GET_IA64_MCA_DATA(r2) | ||
622 | ;; | ||
623 | add r2 = IA64_MCA_CPU_PROC_STATE_DUMP_OFFSET, r2 | ||
624 | |||
625 | restore_GRs: // restore bank-1 GRs 16-31 | ||
626 | bsw.1;; | ||
627 | add r3=16*8,r2;; // to get to NaT of GR 16-31 | ||
628 | ld8 r3=[r3];; | ||
629 | mov ar.unat=r3;; // first restore NaT | ||
630 | |||
631 | ld8.fill r16=[r2],8;; | ||
632 | ld8.fill r17=[r2],8;; | ||
633 | ld8.fill r18=[r2],8;; | ||
634 | ld8.fill r19=[r2],8;; | ||
635 | ld8.fill r20=[r2],8;; | ||
636 | ld8.fill r21=[r2],8;; | ||
637 | ld8.fill r22=[r2],8;; | ||
638 | ld8.fill r23=[r2],8;; | ||
639 | ld8.fill r24=[r2],8;; | ||
640 | ld8.fill r25=[r2],8;; | ||
641 | ld8.fill r26=[r2],8;; | ||
642 | ld8.fill r27=[r2],8;; | ||
643 | ld8.fill r28=[r2],8;; | ||
644 | ld8.fill r29=[r2],8;; | ||
645 | ld8.fill r30=[r2],8;; | ||
646 | ld8.fill r31=[r2],8;; | ||
647 | |||
648 | ld8 r3=[r2],8;; // increment to skip NaT | ||
649 | bsw.0;; | ||
650 | |||
651 | restore_BRs: | ||
652 | add r4=8,r2 // duplicate r2 in r4 | ||
653 | add r6=2*8,r2;; // duplicate r2 in r4 | ||
654 | |||
655 | ld8 r3=[r2],3*8 | ||
656 | ld8 r5=[r4],3*8 | ||
657 | ld8 r7=[r6],3*8;; | ||
658 | mov b0=r3 | ||
659 | mov b1=r5 | ||
660 | mov b2=r7;; | ||
661 | |||
662 | ld8 r3=[r2],3*8 | ||
663 | ld8 r5=[r4],3*8 | ||
664 | ld8 r7=[r6],3*8;; | ||
665 | mov b3=r3 | ||
666 | mov b4=r5 | ||
667 | mov b5=r7;; | ||
668 | |||
669 | ld8 r3=[r2],2*8 | ||
670 | ld8 r5=[r4],2*8;; | ||
671 | mov b6=r3 | ||
672 | mov b7=r5;; | ||
673 | |||
674 | restore_CRs: | ||
675 | add r4=8,r2 // duplicate r2 in r4 | ||
676 | add r6=2*8,r2;; // duplicate r2 in r4 | ||
677 | |||
678 | ld8 r3=[r2],8*8 | ||
679 | ld8 r5=[r4],3*8 | ||
680 | ld8 r7=[r6],3*8;; // 48 byte increments | ||
681 | mov cr.dcr=r3 | ||
682 | mov cr.itm=r5 | ||
683 | mov cr.iva=r7;; | ||
684 | |||
685 | ld8 r3=[r2],8*8;; // 64 byte increments | ||
686 | // mov cr.pta=r3 | ||
687 | |||
688 | |||
689 | // if PSR.ic=1, reading interruption registers causes an illegal operation fault | ||
690 | mov r3=psr;; | ||
691 | tbit.nz.unc p6,p0=r3,PSR_IC;; // PSI Valid Log bit pos. test | ||
692 | (p6) st8 [r2]=r0,9*8+160 // increment by 232 byte inc. | ||
693 | |||
694 | begin_rskip_intr_regs: | ||
695 | (p6) br rSkipIntrRegs;; | ||
696 | |||
697 | add r4=8,r2 // duplicate r2 in r4 | ||
698 | add r6=2*8,r2;; // duplicate r2 in r4 | ||
699 | |||
700 | ld8 r3=[r2],3*8 | ||
701 | ld8 r5=[r4],3*8 | ||
702 | ld8 r7=[r6],3*8;; | ||
703 | mov cr.ipsr=r3 | ||
704 | // mov cr.isr=r5 // cr.isr is read only | ||
705 | |||
706 | ld8 r3=[r2],3*8 | ||
707 | ld8 r5=[r4],3*8 | ||
708 | ld8 r7=[r6],3*8;; | ||
709 | mov cr.iip=r3 | ||
710 | mov cr.ifa=r5 | ||
711 | mov cr.itir=r7;; | ||
712 | |||
713 | ld8 r3=[r2],3*8 | ||
714 | ld8 r5=[r4],3*8 | ||
715 | ld8 r7=[r6],3*8;; | ||
716 | mov cr.iipa=r3 | ||
717 | mov cr.ifs=r5 | ||
718 | mov cr.iim=r7 | ||
719 | |||
720 | ld8 r3=[r2],160;; // 160 byte increment | ||
721 | mov cr.iha=r3 | ||
722 | |||
723 | rSkipIntrRegs: | ||
724 | ld8 r3=[r2],152;; // another 152 byte inc. | ||
725 | |||
726 | add r4=8,r2 // duplicate r2 in r4 | ||
727 | add r6=2*8,r2;; // duplicate r2 in r6 | ||
728 | |||
729 | ld8 r3=[r2],8*3 | ||
730 | ld8 r5=[r4],8*3 | ||
731 | ld8 r7=[r6],8*3;; | ||
732 | mov cr.lid=r3 | ||
733 | // mov cr.ivr=r5 // cr.ivr is read only | ||
734 | mov cr.tpr=r7;; | ||
735 | |||
736 | ld8 r3=[r2],8*3 | ||
737 | ld8 r5=[r4],8*3 | ||
738 | ld8 r7=[r6],8*3;; | ||
739 | // mov cr.eoi=r3 | ||
740 | // mov cr.irr0=r5 // cr.irr0 is read only | ||
741 | // mov cr.irr1=r7;; // cr.irr1 is read only | ||
742 | |||
743 | ld8 r3=[r2],8*3 | ||
744 | ld8 r5=[r4],8*3 | ||
745 | ld8 r7=[r6],8*3;; | ||
746 | // mov cr.irr2=r3 // cr.irr2 is read only | ||
747 | // mov cr.irr3=r5 // cr.irr3 is read only | ||
748 | mov cr.itv=r7;; | ||
749 | |||
750 | ld8 r3=[r2],8*7 | ||
751 | ld8 r5=[r4],8*7;; | ||
752 | mov cr.pmv=r3 | ||
753 | mov cr.cmcv=r5;; | ||
754 | |||
755 | ld8 r3=[r2],8*23 | ||
756 | ld8 r5=[r4],8*23;; | ||
757 | adds r2=8*23,r2 | ||
758 | adds r4=8*23,r4;; | ||
759 | // mov cr.lrr0=r3 | ||
760 | // mov cr.lrr1=r5 | ||
761 | |||
762 | adds r2=8*2,r2;; | ||
763 | |||
764 | restore_ARs: | ||
765 | add r4=8,r2 // duplicate r2 in r4 | ||
766 | add r6=2*8,r2;; // duplicate r2 in r4 | ||
767 | |||
768 | ld8 r3=[r2],3*8 | ||
769 | ld8 r5=[r4],3*8 | ||
770 | ld8 r7=[r6],3*8;; | ||
771 | mov ar.k0=r3 | ||
772 | mov ar.k1=r5 | ||
773 | mov ar.k2=r7;; | ||
774 | |||
775 | ld8 r3=[r2],3*8 | ||
776 | ld8 r5=[r4],3*8 | ||
777 | ld8 r7=[r6],3*8;; | ||
778 | mov ar.k3=r3 | ||
779 | mov ar.k4=r5 | ||
780 | mov ar.k5=r7;; | ||
781 | |||
782 | ld8 r3=[r2],10*8 | ||
783 | ld8 r5=[r4],10*8 | ||
784 | ld8 r7=[r6],10*8;; | ||
785 | mov ar.k6=r3 | ||
786 | mov ar.k7=r5 | ||
787 | ;; | ||
788 | |||
789 | ld8 r3=[r2],3*8 | ||
790 | ld8 r5=[r4],3*8 | ||
791 | ld8 r7=[r6],3*8;; | ||
792 | // mov ar.rsc=r3 | ||
793 | // mov ar.bsp=r5 // ar.bsp is read only | ||
794 | mov ar.rsc=r0 // make sure that RSE is in enforced lazy mode | ||
795 | ;; | ||
796 | mov ar.bspstore=r7;; | ||
797 | |||
798 | ld8 r9=[r2],8*13;; | ||
799 | mov ar.rnat=r9 | ||
800 | |||
801 | mov ar.rsc=r3 | ||
802 | ld8 r3=[r2],8*4;; | ||
803 | mov ar.ccv=r3 | ||
804 | |||
805 | ld8 r3=[r2],8*4;; | ||
806 | mov ar.unat=r3 | ||
807 | |||
808 | ld8 r3=[r2],8*4;; | ||
809 | mov ar.fpsr=r3 | ||
810 | |||
811 | ld8 r3=[r2],160;; // 160 | ||
812 | // mov ar.itc=r3 | ||
813 | |||
814 | ld8 r3=[r2],8;; | ||
815 | mov ar.pfs=r3 | ||
816 | |||
817 | ld8 r3=[r2],8;; | ||
818 | mov ar.lc=r3 | ||
819 | |||
820 | ld8 r3=[r2];; | ||
821 | mov ar.ec=r3 | ||
822 | add r2=8*62,r2;; // padding | ||
823 | |||
824 | restore_RRs: | ||
825 | mov r5=ar.lc | ||
826 | mov ar.lc=0x08-1 | ||
827 | movl r4=0x00;; | ||
828 | cStRRr: | ||
829 | dep.z r7=r4,61,3 | ||
830 | ld8 r3=[r2],8;; | ||
831 | mov rr[r7]=r3 // what are its access previledges? | ||
832 | add r4=1,r4 | ||
833 | br.cloop.sptk.few cStRRr | ||
834 | ;; | ||
835 | mov ar.lc=r5 | ||
836 | ;; | ||
837 | end_os_mca_restore: | ||
838 | br ia64_os_mca_done_restore;; | ||
839 | |||
840 | //EndStub////////////////////////////////////////////////////////////////////// | ||
841 | |||
842 | |||
843 | // ok, the issue here is that we need to save state information so | ||
844 | // it can be useable by the kernel debugger and show regs routines. | ||
845 | // In order to do this, our best bet is save the current state (plus | ||
846 | // the state information obtain from the MIN_STATE_AREA) into a pt_regs | ||
847 | // format. This way we can pass it on in a useable format. | ||
848 | // | ||
849 | |||
850 | // | ||
851 | // SAL to OS entry point for INIT on the monarch processor | ||
852 | // This has been defined for registration purposes with SAL | ||
853 | // as a part of ia64_mca_init. | ||
854 | // | ||
855 | // When we get here, the following registers have been | ||
856 | // set by the SAL for our use | ||
857 | // | ||
858 | // 1. GR1 = OS INIT GP | ||
859 | // 2. GR8 = PAL_PROC physical address | ||
860 | // 3. GR9 = SAL_PROC physical address | ||
861 | // 4. GR10 = SAL GP (physical) | ||
862 | // 5. GR11 = Init Reason | ||
863 | // 0 = Received INIT for event other than crash dump switch | ||
864 | // 1 = Received wakeup at the end of an OS_MCA corrected machine check | ||
865 | // 2 = Received INIT dude to CrashDump switch assertion | ||
866 | // | ||
867 | // 6. GR12 = Return address to location within SAL_INIT procedure | ||
868 | |||
869 | |||
870 | GLOBAL_ENTRY(ia64_monarch_init_handler) | ||
871 | .prologue | ||
872 | // stash the information the SAL passed to os | ||
873 | SAL_TO_OS_MCA_HANDOFF_STATE_SAVE(r2) | ||
874 | ;; | ||
875 | SAVE_MIN_WITH_COVER | ||
876 | ;; | ||
877 | mov r8=cr.ifa | ||
878 | mov r9=cr.isr | ||
879 | adds r3=8,r2 // set up second base pointer | ||
880 | ;; | ||
881 | SAVE_REST | ||
882 | |||
883 | // ok, enough should be saved at this point to be dangerous, and supply | ||
884 | // information for a dump | ||
885 | // We need to switch to Virtual mode before hitting the C functions. | ||
886 | |||
887 | movl r2=IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN | ||
888 | mov r3=psr // get the current psr, minimum enabled at this point | ||
889 | ;; | ||
890 | or r2=r2,r3 | ||
891 | ;; | ||
892 | movl r3=IVirtual_Switch | ||
893 | ;; | ||
894 | mov cr.iip=r3 // short return to set the appropriate bits | ||
895 | mov cr.ipsr=r2 // need to do an rfi to set appropriate bits | ||
896 | ;; | ||
897 | rfi | ||
898 | ;; | ||
899 | IVirtual_Switch: | ||
900 | // | ||
901 | // We should now be running virtual | ||
902 | // | ||
903 | // Let's call the C handler to get the rest of the state info | ||
904 | // | ||
905 | alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) | ||
906 | ;; | ||
907 | adds out0=16,sp // out0 = pointer to pt_regs | ||
908 | ;; | ||
909 | DO_SAVE_SWITCH_STACK | ||
910 | .body | ||
911 | adds out1=16,sp // out0 = pointer to switch_stack | ||
912 | |||
913 | br.call.sptk.many rp=ia64_init_handler | ||
914 | .ret1: | ||
915 | |||
916 | return_from_init: | ||
917 | br.sptk return_from_init | ||
918 | END(ia64_monarch_init_handler) | ||
919 | |||
920 | // | ||
921 | // SAL to OS entry point for INIT on the slave processor | ||
922 | // This has been defined for registration purposes with SAL | ||
923 | // as a part of ia64_mca_init. | ||
924 | // | ||
925 | |||
926 | GLOBAL_ENTRY(ia64_slave_init_handler) | ||
927 | 1: br.sptk 1b | ||
928 | END(ia64_slave_init_handler) | ||
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c new file mode 100644 index 000000000000..ab478172c349 --- /dev/null +++ b/arch/ia64/kernel/mca_drv.c | |||
@@ -0,0 +1,639 @@ | |||
1 | /* | ||
2 | * File: mca_drv.c | ||
3 | * Purpose: Generic MCA handling layer | ||
4 | * | ||
5 | * Copyright (C) 2004 FUJITSU LIMITED | ||
6 | * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) | ||
7 | */ | ||
8 | #include <linux/config.h> | ||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/irq.h> | ||
14 | #include <linux/kallsyms.h> | ||
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/bootmem.h> | ||
17 | #include <linux/acpi.h> | ||
18 | #include <linux/timer.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/smp.h> | ||
22 | #include <linux/workqueue.h> | ||
23 | #include <linux/mm.h> | ||
24 | |||
25 | #include <asm/delay.h> | ||
26 | #include <asm/machvec.h> | ||
27 | #include <asm/page.h> | ||
28 | #include <asm/ptrace.h> | ||
29 | #include <asm/system.h> | ||
30 | #include <asm/sal.h> | ||
31 | #include <asm/mca.h> | ||
32 | |||
33 | #include <asm/irq.h> | ||
34 | #include <asm/hw_irq.h> | ||
35 | |||
36 | #include "mca_drv.h" | ||
37 | |||
38 | /* max size of SAL error record (default) */ | ||
39 | static int sal_rec_max = 10000; | ||
40 | |||
41 | /* from mca.c */ | ||
42 | static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state; | ||
43 | static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state; | ||
44 | |||
45 | /* from mca_drv_asm.S */ | ||
46 | extern void *mca_handler_bhhook(void); | ||
47 | |||
48 | static DEFINE_SPINLOCK(mca_bh_lock); | ||
49 | |||
50 | typedef enum { | ||
51 | MCA_IS_LOCAL = 0, | ||
52 | MCA_IS_GLOBAL = 1 | ||
53 | } mca_type_t; | ||
54 | |||
55 | #define MAX_PAGE_ISOLATE 1024 | ||
56 | |||
57 | static struct page *page_isolate[MAX_PAGE_ISOLATE]; | ||
58 | static int num_page_isolate = 0; | ||
59 | |||
60 | typedef enum { | ||
61 | ISOLATE_NG = 0, | ||
62 | ISOLATE_OK = 1 | ||
63 | } isolate_status_t; | ||
64 | |||
65 | /* | ||
66 | * This pool keeps pointers to the section part of SAL error record | ||
67 | */ | ||
68 | static struct { | ||
69 | slidx_list_t *buffer; /* section pointer list pool */ | ||
70 | int cur_idx; /* Current index of section pointer list pool */ | ||
71 | int max_idx; /* Maximum index of section pointer list pool */ | ||
72 | } slidx_pool; | ||
73 | |||
74 | /** | ||
75 | * mca_page_isolate - isolate a poisoned page in order not to use it later | ||
76 | * @paddr: poisoned memory location | ||
77 | * | ||
78 | * Return value: | ||
79 | * ISOLATE_OK / ISOLATE_NG | ||
80 | */ | ||
81 | |||
82 | static isolate_status_t | ||
83 | mca_page_isolate(unsigned long paddr) | ||
84 | { | ||
85 | int i; | ||
86 | struct page *p; | ||
87 | |||
88 | /* whether physical address is valid or not */ | ||
89 | if ( !ia64_phys_addr_valid(paddr) ) | ||
90 | return ISOLATE_NG; | ||
91 | |||
92 | /* convert physical address to physical page number */ | ||
93 | p = pfn_to_page(paddr>>PAGE_SHIFT); | ||
94 | |||
95 | /* check whether a page number have been already registered or not */ | ||
96 | for( i = 0; i < num_page_isolate; i++ ) | ||
97 | if( page_isolate[i] == p ) | ||
98 | return ISOLATE_OK; /* already listed */ | ||
99 | |||
100 | /* limitation check */ | ||
101 | if( num_page_isolate == MAX_PAGE_ISOLATE ) | ||
102 | return ISOLATE_NG; | ||
103 | |||
104 | /* kick pages having attribute 'SLAB' or 'Reserved' */ | ||
105 | if( PageSlab(p) || PageReserved(p) ) | ||
106 | return ISOLATE_NG; | ||
107 | |||
108 | /* add attribute 'Reserved' and register the page */ | ||
109 | SetPageReserved(p); | ||
110 | page_isolate[num_page_isolate++] = p; | ||
111 | |||
112 | return ISOLATE_OK; | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * mca_hanlder_bh - Kill the process which occurred memory read error | ||
117 | * @paddr: poisoned address received from MCA Handler | ||
118 | */ | ||
119 | |||
120 | void | ||
121 | mca_handler_bh(unsigned long paddr) | ||
122 | { | ||
123 | printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n", | ||
124 | current->pid, current->comm); | ||
125 | |||
126 | spin_lock(&mca_bh_lock); | ||
127 | if (mca_page_isolate(paddr) == ISOLATE_OK) { | ||
128 | printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); | ||
129 | } else { | ||
130 | printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr); | ||
131 | } | ||
132 | spin_unlock(&mca_bh_lock); | ||
133 | |||
134 | /* This process is about to be killed itself */ | ||
135 | force_sig(SIGKILL, current); | ||
136 | schedule(); | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * mca_make_peidx - Make index of processor error section | ||
141 | * @slpi: pointer to record of processor error section | ||
142 | * @peidx: pointer to index of processor error section | ||
143 | */ | ||
144 | |||
145 | static void | ||
146 | mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx) | ||
147 | { | ||
148 | /* | ||
149 | * calculate the start address of | ||
150 | * "struct cpuid_info" and "sal_processor_static_info_t". | ||
151 | */ | ||
152 | u64 total_check_num = slpi->valid.num_cache_check | ||
153 | + slpi->valid.num_tlb_check | ||
154 | + slpi->valid.num_bus_check | ||
155 | + slpi->valid.num_reg_file_check | ||
156 | + slpi->valid.num_ms_check; | ||
157 | u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num | ||
158 | + sizeof(sal_log_processor_info_t); | ||
159 | u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info); | ||
160 | |||
161 | peidx_head(peidx) = slpi; | ||
162 | peidx_mid(peidx) = (struct sal_cpuid_info *) | ||
163 | (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL); | ||
164 | peidx_bottom(peidx) = (sal_processor_static_info_t *) | ||
165 | (slpi->valid.psi_static_struct ? | ||
166 | ((char*)slpi + head_size + mid_size) : NULL); | ||
167 | } | ||
168 | |||
169 | /** | ||
170 | * mca_make_slidx - Make index of SAL error record | ||
171 | * @buffer: pointer to SAL error record | ||
172 | * @slidx: pointer to index of SAL error record | ||
173 | * | ||
174 | * Return value: | ||
175 | * 1 if record has platform error / 0 if not | ||
176 | */ | ||
177 | #define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \ | ||
178 | { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \ | ||
179 | hl->hdr = ptr; \ | ||
180 | list_add(&hl->list, &(sect)); \ | ||
181 | slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; } | ||
182 | |||
183 | static int | ||
184 | mca_make_slidx(void *buffer, slidx_table_t *slidx) | ||
185 | { | ||
186 | int platform_err = 0; | ||
187 | int record_len = ((sal_log_record_header_t*)buffer)->len; | ||
188 | u32 ercd_pos; | ||
189 | int sects; | ||
190 | sal_log_section_hdr_t *sp; | ||
191 | |||
192 | /* | ||
193 | * Initialize index referring current record | ||
194 | */ | ||
195 | INIT_LIST_HEAD(&(slidx->proc_err)); | ||
196 | INIT_LIST_HEAD(&(slidx->mem_dev_err)); | ||
197 | INIT_LIST_HEAD(&(slidx->sel_dev_err)); | ||
198 | INIT_LIST_HEAD(&(slidx->pci_bus_err)); | ||
199 | INIT_LIST_HEAD(&(slidx->smbios_dev_err)); | ||
200 | INIT_LIST_HEAD(&(slidx->pci_comp_err)); | ||
201 | INIT_LIST_HEAD(&(slidx->plat_specific_err)); | ||
202 | INIT_LIST_HEAD(&(slidx->host_ctlr_err)); | ||
203 | INIT_LIST_HEAD(&(slidx->plat_bus_err)); | ||
204 | INIT_LIST_HEAD(&(slidx->unsupported)); | ||
205 | |||
206 | /* | ||
207 | * Extract a Record Header | ||
208 | */ | ||
209 | slidx->header = buffer; | ||
210 | |||
211 | /* | ||
212 | * Extract each section records | ||
213 | * (arranged from "int ia64_log_platform_info_print()") | ||
214 | */ | ||
215 | for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0; | ||
216 | ercd_pos < record_len; ercd_pos += sp->len, sects++) { | ||
217 | sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos); | ||
218 | if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) { | ||
219 | LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp); | ||
220 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) { | ||
221 | platform_err = 1; | ||
222 | LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp); | ||
223 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) { | ||
224 | platform_err = 1; | ||
225 | LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp); | ||
226 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) { | ||
227 | platform_err = 1; | ||
228 | LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp); | ||
229 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) { | ||
230 | platform_err = 1; | ||
231 | LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp); | ||
232 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) { | ||
233 | platform_err = 1; | ||
234 | LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp); | ||
235 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) { | ||
236 | platform_err = 1; | ||
237 | LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp); | ||
238 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) { | ||
239 | platform_err = 1; | ||
240 | LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp); | ||
241 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) { | ||
242 | platform_err = 1; | ||
243 | LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp); | ||
244 | } else { | ||
245 | LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp); | ||
246 | } | ||
247 | } | ||
248 | slidx->n_sections = sects; | ||
249 | |||
250 | return platform_err; | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * init_record_index_pools - Initialize pool of lists for SAL record index | ||
255 | * | ||
256 | * Return value: | ||
257 | * 0 on Success / -ENOMEM on Failure | ||
258 | */ | ||
259 | static int | ||
260 | init_record_index_pools(void) | ||
261 | { | ||
262 | int i; | ||
263 | int rec_max_size; /* Maximum size of SAL error records */ | ||
264 | int sect_min_size; /* Minimum size of SAL error sections */ | ||
265 | /* minimum size table of each section */ | ||
266 | static int sal_log_sect_min_sizes[] = { | ||
267 | sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t), | ||
268 | sizeof(sal_log_mem_dev_err_info_t), | ||
269 | sizeof(sal_log_sel_dev_err_info_t), | ||
270 | sizeof(sal_log_pci_bus_err_info_t), | ||
271 | sizeof(sal_log_smbios_dev_err_info_t), | ||
272 | sizeof(sal_log_pci_comp_err_info_t), | ||
273 | sizeof(sal_log_plat_specific_err_info_t), | ||
274 | sizeof(sal_log_host_ctlr_err_info_t), | ||
275 | sizeof(sal_log_plat_bus_err_info_t), | ||
276 | }; | ||
277 | |||
278 | /* | ||
279 | * MCA handler cannot allocate new memory on flight, | ||
280 | * so we preallocate enough memory to handle a SAL record. | ||
281 | * | ||
282 | * Initialize a handling set of slidx_pool: | ||
283 | * 1. Pick up the max size of SAL error records | ||
284 | * 2. Pick up the min size of SAL error sections | ||
285 | * 3. Allocate the pool as enough to 2 SAL records | ||
286 | * (now we can estimate the maxinum of section in a record.) | ||
287 | */ | ||
288 | |||
289 | /* - 1 - */ | ||
290 | rec_max_size = sal_rec_max; | ||
291 | |||
292 | /* - 2 - */ | ||
293 | sect_min_size = sal_log_sect_min_sizes[0]; | ||
294 | for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++) | ||
295 | if (sect_min_size > sal_log_sect_min_sizes[i]) | ||
296 | sect_min_size = sal_log_sect_min_sizes[i]; | ||
297 | |||
298 | /* - 3 - */ | ||
299 | slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; | ||
300 | slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL); | ||
301 | |||
302 | return slidx_pool.buffer ? 0 : -ENOMEM; | ||
303 | } | ||
304 | |||
305 | |||
306 | /***************************************************************************** | ||
307 | * Recovery functions * | ||
308 | *****************************************************************************/ | ||
309 | |||
310 | /** | ||
311 | * is_mca_global - Check whether this MCA is global or not | ||
312 | * @peidx: pointer of index of processor error section | ||
313 | * @pbci: pointer to pal_bus_check_info_t | ||
314 | * | ||
315 | * Return value: | ||
316 | * MCA_IS_LOCAL / MCA_IS_GLOBAL | ||
317 | */ | ||
318 | |||
319 | static mca_type_t | ||
320 | is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci) | ||
321 | { | ||
322 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | ||
323 | |||
324 | /* | ||
325 | * PAL can request a rendezvous, if the MCA has a global scope. | ||
326 | * If "rz_always" flag is set, SAL requests MCA rendezvous | ||
327 | * in spite of global MCA. | ||
328 | * Therefore it is local MCA when rendezvous has not been requested. | ||
329 | * Failed to rendezvous, the system must be down. | ||
330 | */ | ||
331 | switch (sal_to_os_handoff_state->imsto_rendez_state) { | ||
332 | case -1: /* SAL rendezvous unsuccessful */ | ||
333 | return MCA_IS_GLOBAL; | ||
334 | case 0: /* SAL rendezvous not required */ | ||
335 | return MCA_IS_LOCAL; | ||
336 | case 1: /* SAL rendezvous successful int */ | ||
337 | case 2: /* SAL rendezvous successful int with init */ | ||
338 | default: | ||
339 | break; | ||
340 | } | ||
341 | |||
342 | /* | ||
343 | * If One or more Cache/TLB/Reg_File/Uarch_Check is here, | ||
344 | * it would be a local MCA. (i.e. processor internal error) | ||
345 | */ | ||
346 | if (psp->tc || psp->cc || psp->rc || psp->uc) | ||
347 | return MCA_IS_LOCAL; | ||
348 | |||
349 | /* | ||
350 | * Bus_Check structure with Bus_Check.ib (internal bus error) flag set | ||
351 | * would be a global MCA. (e.g. a system bus address parity error) | ||
352 | */ | ||
353 | if (!pbci || pbci->ib) | ||
354 | return MCA_IS_GLOBAL; | ||
355 | |||
356 | /* | ||
357 | * Bus_Check structure with Bus_Check.eb (external bus error) flag set | ||
358 | * could be either a local MCA or a global MCA. | ||
359 | * | ||
360 | * Referring Bus_Check.bsi: | ||
361 | * 0: Unknown/unclassified | ||
362 | * 1: BERR# | ||
363 | * 2: BINIT# | ||
364 | * 3: Hard Fail | ||
365 | * (FIXME: Are these SGI specific or generic bsi values?) | ||
366 | */ | ||
367 | if (pbci->eb) | ||
368 | switch (pbci->bsi) { | ||
369 | case 0: | ||
370 | /* e.g. a load from poisoned memory */ | ||
371 | return MCA_IS_LOCAL; | ||
372 | case 1: | ||
373 | case 2: | ||
374 | case 3: | ||
375 | return MCA_IS_GLOBAL; | ||
376 | } | ||
377 | |||
378 | return MCA_IS_GLOBAL; | ||
379 | } | ||
380 | |||
381 | /** | ||
382 | * recover_from_read_error - Try to recover the errors which type are "read"s. | ||
383 | * @slidx: pointer of index of SAL error record | ||
384 | * @peidx: pointer of index of processor error section | ||
385 | * @pbci: pointer of pal_bus_check_info | ||
386 | * | ||
387 | * Return value: | ||
388 | * 1 on Success / 0 on Failure | ||
389 | */ | ||
390 | |||
391 | static int | ||
392 | recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | ||
393 | { | ||
394 | sal_log_mod_error_info_t *smei; | ||
395 | pal_min_state_area_t *pmsa; | ||
396 | struct ia64_psr *psr1, *psr2; | ||
397 | ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; | ||
398 | |||
399 | /* Is target address valid? */ | ||
400 | if (!pbci->tv) | ||
401 | return 0; | ||
402 | |||
403 | /* | ||
404 | * cpu read or memory-mapped io read | ||
405 | * | ||
406 | * offending process affected process OS MCA do | ||
407 | * kernel mode kernel mode down system | ||
408 | * kernel mode user mode kill the process | ||
409 | * user mode kernel mode down system (*) | ||
410 | * user mode user mode kill the process | ||
411 | * | ||
412 | * (*) You could terminate offending user-mode process | ||
413 | * if (pbci->pv && pbci->pl != 0) *and* if you sure | ||
414 | * the process not have any locks of kernel. | ||
415 | */ | ||
416 | |||
417 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); | ||
418 | |||
419 | /* | ||
420 | * Check the privilege level of interrupted context. | ||
421 | * If it is user-mode, then terminate affected process. | ||
422 | */ | ||
423 | if (psr1->cpl != 0) { | ||
424 | smei = peidx_bus_check(peidx, 0); | ||
425 | if (smei->valid.target_identifier) { | ||
426 | /* | ||
427 | * setup for resume to bottom half of MCA, | ||
428 | * "mca_handler_bhhook" | ||
429 | */ | ||
430 | pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61)); | ||
431 | /* pass to bhhook as 1st argument (gr8) */ | ||
432 | pmsa->pmsa_gr[8-1] = smei->target_identifier; | ||
433 | /* set interrupted return address (but no use) */ | ||
434 | pmsa->pmsa_br0 = pmsa->pmsa_iip; | ||
435 | /* change resume address to bottom half */ | ||
436 | pmsa->pmsa_iip = mca_hdlr_bh->fp; | ||
437 | pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; | ||
438 | /* set cpl with kernel mode */ | ||
439 | psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; | ||
440 | psr2->cpl = 0; | ||
441 | psr2->ri = 0; | ||
442 | |||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | } | ||
447 | |||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | /** | ||
452 | * recover_from_platform_error - Recover from platform error. | ||
453 | * @slidx: pointer of index of SAL error record | ||
454 | * @peidx: pointer of index of processor error section | ||
455 | * @pbci: pointer of pal_bus_check_info | ||
456 | * | ||
457 | * Return value: | ||
458 | * 1 on Success / 0 on Failure | ||
459 | */ | ||
460 | |||
461 | static int | ||
462 | recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | ||
463 | { | ||
464 | int status = 0; | ||
465 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | ||
466 | |||
467 | if (psp->bc && pbci->eb && pbci->bsi == 0) { | ||
468 | switch(pbci->type) { | ||
469 | case 1: /* partial read */ | ||
470 | case 3: /* full line(cpu) read */ | ||
471 | case 9: /* I/O space read */ | ||
472 | status = recover_from_read_error(slidx, peidx, pbci); | ||
473 | break; | ||
474 | case 0: /* unknown */ | ||
475 | case 2: /* partial write */ | ||
476 | case 4: /* full line write */ | ||
477 | case 5: /* implicit or explicit write-back operation */ | ||
478 | case 6: /* snoop probe */ | ||
479 | case 7: /* incoming or outgoing ptc.g */ | ||
480 | case 8: /* write coalescing transactions */ | ||
481 | case 10: /* I/O space write */ | ||
482 | case 11: /* inter-processor interrupt message(IPI) */ | ||
483 | case 12: /* interrupt acknowledge or external task priority cycle */ | ||
484 | default: | ||
485 | break; | ||
486 | } | ||
487 | } | ||
488 | |||
489 | return status; | ||
490 | } | ||
491 | |||
492 | /** | ||
493 | * recover_from_processor_error | ||
494 | * @platform: whether there are some platform error section or not | ||
495 | * @slidx: pointer of index of SAL error record | ||
496 | * @peidx: pointer of index of processor error section | ||
497 | * @pbci: pointer of pal_bus_check_info | ||
498 | * | ||
499 | * Return value: | ||
500 | * 1 on Success / 0 on Failure | ||
501 | */ | ||
502 | /* | ||
503 | * Later we try to recover when below all conditions are satisfied. | ||
504 | * 1. Only one processor error section is exist. | ||
505 | * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK) | ||
506 | * 3. The entry of BUS_CHECK_INFO is 1. | ||
507 | * 4. "External bus error" flag is set and the others are not set. | ||
508 | */ | ||
509 | |||
510 | static int | ||
511 | recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | ||
512 | { | ||
513 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | ||
514 | |||
515 | /* | ||
516 | * We cannot recover errors with other than bus_check. | ||
517 | */ | ||
518 | if (psp->cc || psp->rc || psp->uc) | ||
519 | return 0; | ||
520 | |||
521 | /* | ||
522 | * If there is no bus error, record is weird but we need not to recover. | ||
523 | */ | ||
524 | if (psp->bc == 0 || pbci == NULL) | ||
525 | return 1; | ||
526 | |||
527 | /* | ||
528 | * Sorry, we cannot handle so many. | ||
529 | */ | ||
530 | if (peidx_bus_check_num(peidx) > 1) | ||
531 | return 0; | ||
532 | /* | ||
533 | * Well, here is only one bus error. | ||
534 | */ | ||
535 | if (pbci->ib || pbci->cc) | ||
536 | return 0; | ||
537 | if (pbci->eb && pbci->bsi > 0) | ||
538 | return 0; | ||
539 | if (psp->ci == 0) | ||
540 | return 0; | ||
541 | |||
542 | /* | ||
543 | * This is a local MCA and estimated as recoverble external bus error. | ||
544 | * (e.g. a load from poisoned memory) | ||
545 | * This means "there are some platform errors". | ||
546 | */ | ||
547 | if (platform) | ||
548 | return recover_from_platform_error(slidx, peidx, pbci); | ||
549 | /* | ||
550 | * On account of strange SAL error record, we cannot recover. | ||
551 | */ | ||
552 | return 0; | ||
553 | } | ||
554 | |||
555 | /** | ||
556 | * mca_try_to_recover - Try to recover from MCA | ||
557 | * @rec: pointer to a SAL error record | ||
558 | * | ||
559 | * Return value: | ||
560 | * 1 on Success / 0 on Failure | ||
561 | */ | ||
562 | |||
563 | static int | ||
564 | mca_try_to_recover(void *rec, | ||
565 | ia64_mca_sal_to_os_state_t *sal_to_os_state, | ||
566 | ia64_mca_os_to_sal_state_t *os_to_sal_state) | ||
567 | { | ||
568 | int platform_err; | ||
569 | int n_proc_err; | ||
570 | slidx_table_t slidx; | ||
571 | peidx_table_t peidx; | ||
572 | pal_bus_check_info_t pbci; | ||
573 | |||
574 | /* handoff state from/to mca.c */ | ||
575 | sal_to_os_handoff_state = sal_to_os_state; | ||
576 | os_to_sal_handoff_state = os_to_sal_state; | ||
577 | |||
578 | /* Make index of SAL error record */ | ||
579 | platform_err = mca_make_slidx(rec, &slidx); | ||
580 | |||
581 | /* Count processor error sections */ | ||
582 | n_proc_err = slidx_count(&slidx, proc_err); | ||
583 | |||
584 | /* Now, OS can recover when there is one processor error section */ | ||
585 | if (n_proc_err > 1) | ||
586 | return 0; | ||
587 | else if (n_proc_err == 0) { | ||
588 | /* Weird SAL record ... We need not to recover */ | ||
589 | |||
590 | return 1; | ||
591 | } | ||
592 | |||
593 | /* Make index of processor error section */ | ||
594 | mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx); | ||
595 | |||
596 | /* Extract Processor BUS_CHECK[0] */ | ||
597 | *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0); | ||
598 | |||
599 | /* Check whether MCA is global or not */ | ||
600 | if (is_mca_global(&peidx, &pbci)) | ||
601 | return 0; | ||
602 | |||
603 | /* Try to recover a processor error */ | ||
604 | return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci); | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * ============================================================================= | ||
609 | */ | ||
610 | |||
611 | int __init mca_external_handler_init(void) | ||
612 | { | ||
613 | if (init_record_index_pools()) | ||
614 | return -ENOMEM; | ||
615 | |||
616 | /* register external mca handlers */ | ||
617 | if (ia64_reg_MCA_extension(mca_try_to_recover)){ | ||
618 | printk(KERN_ERR "ia64_reg_MCA_extension failed.\n"); | ||
619 | kfree(slidx_pool.buffer); | ||
620 | return -EFAULT; | ||
621 | } | ||
622 | return 0; | ||
623 | } | ||
624 | |||
625 | void __exit mca_external_handler_exit(void) | ||
626 | { | ||
627 | /* unregister external mca handlers */ | ||
628 | ia64_unreg_MCA_extension(); | ||
629 | kfree(slidx_pool.buffer); | ||
630 | } | ||
631 | |||
632 | module_init(mca_external_handler_init); | ||
633 | module_exit(mca_external_handler_exit); | ||
634 | |||
635 | module_param(sal_rec_max, int, 0644); | ||
636 | MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record"); | ||
637 | |||
638 | MODULE_DESCRIPTION("ia64 platform dependent mca handler driver"); | ||
639 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h new file mode 100644 index 000000000000..0227b761f2c4 --- /dev/null +++ b/arch/ia64/kernel/mca_drv.h | |||
@@ -0,0 +1,113 @@ | |||
1 | /* | ||
2 | * File: mca_drv.h | ||
3 | * Purpose: Define helpers for Generic MCA handling | ||
4 | * | ||
5 | * Copyright (C) 2004 FUJITSU LIMITED | ||
6 | * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) | ||
7 | */ | ||
8 | /* | ||
9 | * Processor error section: | ||
10 | * | ||
11 | * +-sal_log_processor_info_t *info-------------+ | ||
12 | * | sal_log_section_hdr_t header; | | ||
13 | * | ... | | ||
14 | * | sal_log_mod_error_info_t info[0]; | | ||
15 | * +-+----------------+-------------------------+ | ||
16 | * | CACHE_CHECK | ^ num_cache_check v | ||
17 | * +----------------+ | ||
18 | * | TLB_CHECK | ^ num_tlb_check v | ||
19 | * +----------------+ | ||
20 | * | BUS_CHECK | ^ num_bus_check v | ||
21 | * +----------------+ | ||
22 | * | REG_FILE_CHECK | ^ num_reg_file_check v | ||
23 | * +----------------+ | ||
24 | * | MS_CHECK | ^ num_ms_check v | ||
25 | * +-struct cpuid_info *id----------------------+ | ||
26 | * | regs[5]; | | ||
27 | * | reserved; | | ||
28 | * +-sal_processor_static_info_t *regs----------+ | ||
29 | * | valid; | | ||
30 | * | ... | | ||
31 | * | fr[128]; | | ||
32 | * +--------------------------------------------+ | ||
33 | */ | ||
34 | |||
35 | /* peidx: index of processor error section */ | ||
36 | typedef struct peidx_table { | ||
37 | sal_log_processor_info_t *info; | ||
38 | struct sal_cpuid_info *id; | ||
39 | sal_processor_static_info_t *regs; | ||
40 | } peidx_table_t; | ||
41 | |||
42 | #define peidx_head(p) (((p)->info)) | ||
43 | #define peidx_mid(p) (((p)->id)) | ||
44 | #define peidx_bottom(p) (((p)->regs)) | ||
45 | |||
46 | #define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter)) | ||
47 | #define peidx_field_valid(p) (&(peidx_head(p)->valid)) | ||
48 | #define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area)) | ||
49 | |||
50 | #define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check) | ||
51 | #define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check) | ||
52 | #define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check) | ||
53 | #define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check) | ||
54 | #define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check) | ||
55 | |||
56 | #define peidx_cache_check_idx(p, n) (n) | ||
57 | #define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n) | ||
58 | #define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n) | ||
59 | #define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n) | ||
60 | #define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n) | ||
61 | |||
62 | #define peidx_mod_error_info(p, name, n) \ | ||
63 | ({ int __idx = peidx_##name##_idx(p, n); \ | ||
64 | sal_log_mod_error_info_t *__ret = NULL; \ | ||
65 | if (peidx_##name##_num(p) > n) /*BUG*/ \ | ||
66 | __ret = &(peidx_head(p)->info[__idx]); \ | ||
67 | __ret; }) | ||
68 | |||
69 | #define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n) | ||
70 | #define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n) | ||
71 | #define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n) | ||
72 | #define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n) | ||
73 | #define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n) | ||
74 | |||
75 | #define peidx_check_info(proc, name, n) \ | ||
76 | ({ \ | ||
77 | sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\ | ||
78 | u64 __temp = __info && __info->valid.check_info \ | ||
79 | ? __info->check_info : 0; \ | ||
80 | __temp; }) | ||
81 | |||
82 | /* slidx: index of SAL log error record */ | ||
83 | |||
84 | typedef struct slidx_list { | ||
85 | struct list_head list; | ||
86 | sal_log_section_hdr_t *hdr; | ||
87 | } slidx_list_t; | ||
88 | |||
89 | typedef struct slidx_table { | ||
90 | sal_log_record_header_t *header; | ||
91 | int n_sections; /* # of section headers */ | ||
92 | struct list_head proc_err; | ||
93 | struct list_head mem_dev_err; | ||
94 | struct list_head sel_dev_err; | ||
95 | struct list_head pci_bus_err; | ||
96 | struct list_head smbios_dev_err; | ||
97 | struct list_head pci_comp_err; | ||
98 | struct list_head plat_specific_err; | ||
99 | struct list_head host_ctlr_err; | ||
100 | struct list_head plat_bus_err; | ||
101 | struct list_head unsupported; /* list of unsupported sections */ | ||
102 | } slidx_table_t; | ||
103 | |||
104 | #define slidx_foreach_entry(pos, head) \ | ||
105 | list_for_each_entry(pos, head, list) | ||
106 | #define slidx_first_entry(head) \ | ||
107 | (((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL) | ||
108 | #define slidx_count(slidx, sec) \ | ||
109 | ({ int __count = 0; \ | ||
110 | slidx_list_t *__pos; \ | ||
111 | slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\ | ||
112 | __count; }) | ||
113 | |||
diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S new file mode 100644 index 000000000000..bcfa05acc561 --- /dev/null +++ b/arch/ia64/kernel/mca_drv_asm.S | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * File: mca_drv_asm.S | ||
3 | * Purpose: Assembly portion of Generic MCA handling | ||
4 | * | ||
5 | * Copyright (C) 2004 FUJITSU LIMITED | ||
6 | * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) | ||
7 | */ | ||
8 | #include <linux/config.h> | ||
9 | #include <linux/threads.h> | ||
10 | |||
11 | #include <asm/asmmacro.h> | ||
12 | #include <asm/processor.h> | ||
13 | |||
14 | GLOBAL_ENTRY(mca_handler_bhhook) | ||
15 | invala // clear RSE ? | ||
16 | ;; // | ||
17 | cover // | ||
18 | ;; // | ||
19 | clrrrb // | ||
20 | ;; | ||
21 | alloc r16=ar.pfs,0,2,1,0 // make a new frame | ||
22 | ;; | ||
23 | mov r13=IA64_KR(CURRENT) // current task pointer | ||
24 | ;; | ||
25 | adds r12=IA64_TASK_THREAD_KSP_OFFSET,r13 | ||
26 | ;; | ||
27 | ld8 r12=[r12] // stack pointer | ||
28 | ;; | ||
29 | mov loc0=r16 | ||
30 | movl loc1=mca_handler_bh // recovery C function | ||
31 | ;; | ||
32 | mov out0=r8 // poisoned address | ||
33 | mov b6=loc1 | ||
34 | ;; | ||
35 | mov loc1=rp | ||
36 | ;; | ||
37 | br.call.sptk.many rp=b6 // not return ... | ||
38 | ;; | ||
39 | mov ar.pfs=loc0 | ||
40 | mov rp=loc1 | ||
41 | ;; | ||
42 | mov r8=r0 | ||
43 | br.ret.sptk.many rp | ||
44 | ;; | ||
45 | END(mca_handler_bhhook) | ||
diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h new file mode 100644 index 000000000000..1dbc7b2497c9 --- /dev/null +++ b/arch/ia64/kernel/minstate.h | |||
@@ -0,0 +1,251 @@ | |||
1 | #include <linux/config.h> | ||
2 | |||
3 | #include <asm/cache.h> | ||
4 | |||
5 | #include "entry.h" | ||
6 | |||
7 | /* | ||
8 | * For ivt.s we want to access the stack virtually so we don't have to disable translation | ||
9 | * on interrupts. | ||
10 | * | ||
11 | * On entry: | ||
12 | * r1: pointer to current task (ar.k6) | ||
13 | */ | ||
14 | #define MINSTATE_START_SAVE_MIN_VIRT \ | ||
15 | (pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ | ||
16 | ;; \ | ||
17 | (pUStk) mov.m r24=ar.rnat; \ | ||
18 | (pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ | ||
19 | (pKStk) mov r1=sp; /* get sp */ \ | ||
20 | ;; \ | ||
21 | (pUStk) lfetch.fault.excl.nt1 [r22]; \ | ||
22 | (pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ | ||
23 | (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ | ||
24 | ;; \ | ||
25 | (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ | ||
26 | (pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ | ||
27 | ;; \ | ||
28 | (pUStk) mov r18=ar.bsp; \ | ||
29 | (pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ | ||
30 | |||
31 | #define MINSTATE_END_SAVE_MIN_VIRT \ | ||
32 | bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ | ||
33 | ;; | ||
34 | |||
35 | /* | ||
36 | * For mca_asm.S we want to access the stack physically since the state is saved before we | ||
37 | * go virtual and don't want to destroy the iip or ipsr. | ||
38 | */ | ||
39 | #define MINSTATE_START_SAVE_MIN_PHYS \ | ||
40 | (pKStk) mov r3=IA64_KR(PER_CPU_DATA);; \ | ||
41 | (pKStk) addl r3=THIS_CPU(ia64_mca_data),r3;; \ | ||
42 | (pKStk) ld8 r3 = [r3];; \ | ||
43 | (pKStk) addl r3=IA64_MCA_CPU_INIT_STACK_OFFSET,r3;; \ | ||
44 | (pKStk) addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r3; \ | ||
45 | (pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ | ||
46 | (pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of register backing store */ \ | ||
47 | ;; \ | ||
48 | (pUStk) mov r24=ar.rnat; \ | ||
49 | (pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ | ||
50 | (pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ | ||
51 | (pUStk) dep r22=-1,r22,61,3; /* compute kernel virtual addr of RBS */ \ | ||
52 | ;; \ | ||
53 | (pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ | ||
54 | (pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ | ||
55 | ;; \ | ||
56 | (pUStk) mov r18=ar.bsp; \ | ||
57 | (pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ | ||
58 | |||
59 | #define MINSTATE_END_SAVE_MIN_PHYS \ | ||
60 | dep r12=-1,r12,61,3; /* make sp a kernel virtual address */ \ | ||
61 | ;; | ||
62 | |||
63 | #ifdef MINSTATE_VIRT | ||
64 | # define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT) | ||
65 | # define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_VIRT | ||
66 | # define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_VIRT | ||
67 | #endif | ||
68 | |||
69 | #ifdef MINSTATE_PHYS | ||
70 | # define MINSTATE_GET_CURRENT(reg) mov reg=IA64_KR(CURRENT);; tpa reg=reg | ||
71 | # define MINSTATE_START_SAVE_MIN MINSTATE_START_SAVE_MIN_PHYS | ||
72 | # define MINSTATE_END_SAVE_MIN MINSTATE_END_SAVE_MIN_PHYS | ||
73 | #endif | ||
74 | |||
75 | /* | ||
76 | * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves | ||
77 | * the minimum state necessary that allows us to turn psr.ic back | ||
78 | * on. | ||
79 | * | ||
80 | * Assumed state upon entry: | ||
81 | * psr.ic: off | ||
82 | * r31: contains saved predicates (pr) | ||
83 | * | ||
84 | * Upon exit, the state is as follows: | ||
85 | * psr.ic: off | ||
86 | * r2 = points to &pt_regs.r16 | ||
87 | * r8 = contents of ar.ccv | ||
88 | * r9 = contents of ar.csd | ||
89 | * r10 = contents of ar.ssd | ||
90 | * r11 = FPSR_DEFAULT | ||
91 | * r12 = kernel sp (kernel virtual address) | ||
92 | * r13 = points to current task_struct (kernel virtual address) | ||
93 | * p15 = TRUE if psr.i is set in cr.ipsr | ||
94 | * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: | ||
95 | * preserved | ||
96 | * | ||
97 | * Note that psr.ic is NOT turned on by this macro. This is so that | ||
98 | * we can pass interruption state as arguments to a handler. | ||
99 | */ | ||
100 | #define DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA) \ | ||
101 | MINSTATE_GET_CURRENT(r16); /* M (or M;;I) */ \ | ||
102 | mov r27=ar.rsc; /* M */ \ | ||
103 | mov r20=r1; /* A */ \ | ||
104 | mov r25=ar.unat; /* M */ \ | ||
105 | mov r29=cr.ipsr; /* M */ \ | ||
106 | mov r26=ar.pfs; /* I */ \ | ||
107 | mov r28=cr.iip; /* M */ \ | ||
108 | mov r21=ar.fpsr; /* M */ \ | ||
109 | COVER; /* B;; (or nothing) */ \ | ||
110 | ;; \ | ||
111 | adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ | ||
112 | ;; \ | ||
113 | ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \ | ||
114 | st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \ | ||
115 | adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \ | ||
116 | /* switch from user to kernel RBS: */ \ | ||
117 | ;; \ | ||
118 | invala; /* M */ \ | ||
119 | SAVE_IFS; \ | ||
120 | cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \ | ||
121 | ;; \ | ||
122 | MINSTATE_START_SAVE_MIN \ | ||
123 | adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ | ||
124 | adds r16=PT(CR_IPSR),r1; \ | ||
125 | ;; \ | ||
126 | lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ | ||
127 | st8 [r16]=r29; /* save cr.ipsr */ \ | ||
128 | ;; \ | ||
129 | lfetch.fault.excl.nt1 [r17]; \ | ||
130 | tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ | ||
131 | mov r29=b0 \ | ||
132 | ;; \ | ||
133 | adds r16=PT(R8),r1; /* initialize first base pointer */ \ | ||
134 | adds r17=PT(R9),r1; /* initialize second base pointer */ \ | ||
135 | (pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ | ||
136 | ;; \ | ||
137 | .mem.offset 0,0; st8.spill [r16]=r8,16; \ | ||
138 | .mem.offset 8,0; st8.spill [r17]=r9,16; \ | ||
139 | ;; \ | ||
140 | .mem.offset 0,0; st8.spill [r16]=r10,24; \ | ||
141 | .mem.offset 8,0; st8.spill [r17]=r11,24; \ | ||
142 | ;; \ | ||
143 | st8 [r16]=r28,16; /* save cr.iip */ \ | ||
144 | st8 [r17]=r30,16; /* save cr.ifs */ \ | ||
145 | (pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ | ||
146 | mov r8=ar.ccv; \ | ||
147 | mov r9=ar.csd; \ | ||
148 | mov r10=ar.ssd; \ | ||
149 | movl r11=FPSR_DEFAULT; /* L-unit */ \ | ||
150 | ;; \ | ||
151 | st8 [r16]=r25,16; /* save ar.unat */ \ | ||
152 | st8 [r17]=r26,16; /* save ar.pfs */ \ | ||
153 | shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ | ||
154 | ;; \ | ||
155 | st8 [r16]=r27,16; /* save ar.rsc */ \ | ||
156 | (pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \ | ||
157 | (pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \ | ||
158 | ;; /* avoid RAW on r16 & r17 */ \ | ||
159 | (pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \ | ||
160 | st8 [r17]=r31,16; /* save predicates */ \ | ||
161 | (pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \ | ||
162 | ;; \ | ||
163 | st8 [r16]=r29,16; /* save b0 */ \ | ||
164 | st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ | ||
165 | cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ | ||
166 | ;; \ | ||
167 | .mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ | ||
168 | .mem.offset 8,0; st8.spill [r17]=r12,16; \ | ||
169 | adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ | ||
170 | ;; \ | ||
171 | .mem.offset 0,0; st8.spill [r16]=r13,16; \ | ||
172 | .mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \ | ||
173 | mov r13=IA64_KR(CURRENT); /* establish `current' */ \ | ||
174 | ;; \ | ||
175 | .mem.offset 0,0; st8.spill [r16]=r15,16; \ | ||
176 | .mem.offset 8,0; st8.spill [r17]=r14,16; \ | ||
177 | ;; \ | ||
178 | .mem.offset 0,0; st8.spill [r16]=r2,16; \ | ||
179 | .mem.offset 8,0; st8.spill [r17]=r3,16; \ | ||
180 | adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ | ||
181 | ;; \ | ||
182 | EXTRA; \ | ||
183 | movl r1=__gp; /* establish kernel global pointer */ \ | ||
184 | ;; \ | ||
185 | MINSTATE_END_SAVE_MIN | ||
186 | |||
187 | /* | ||
188 | * SAVE_REST saves the remainder of pt_regs (with psr.ic on). | ||
189 | * | ||
190 | * Assumed state upon entry: | ||
191 | * psr.ic: on | ||
192 | * r2: points to &pt_regs.r16 | ||
193 | * r3: points to &pt_regs.r17 | ||
194 | * r8: contents of ar.ccv | ||
195 | * r9: contents of ar.csd | ||
196 | * r10: contents of ar.ssd | ||
197 | * r11: FPSR_DEFAULT | ||
198 | * | ||
199 | * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. | ||
200 | */ | ||
201 | #define SAVE_REST \ | ||
202 | .mem.offset 0,0; st8.spill [r2]=r16,16; \ | ||
203 | .mem.offset 8,0; st8.spill [r3]=r17,16; \ | ||
204 | ;; \ | ||
205 | .mem.offset 0,0; st8.spill [r2]=r18,16; \ | ||
206 | .mem.offset 8,0; st8.spill [r3]=r19,16; \ | ||
207 | ;; \ | ||
208 | .mem.offset 0,0; st8.spill [r2]=r20,16; \ | ||
209 | .mem.offset 8,0; st8.spill [r3]=r21,16; \ | ||
210 | mov r18=b6; \ | ||
211 | ;; \ | ||
212 | .mem.offset 0,0; st8.spill [r2]=r22,16; \ | ||
213 | .mem.offset 8,0; st8.spill [r3]=r23,16; \ | ||
214 | mov r19=b7; \ | ||
215 | ;; \ | ||
216 | .mem.offset 0,0; st8.spill [r2]=r24,16; \ | ||
217 | .mem.offset 8,0; st8.spill [r3]=r25,16; \ | ||
218 | ;; \ | ||
219 | .mem.offset 0,0; st8.spill [r2]=r26,16; \ | ||
220 | .mem.offset 8,0; st8.spill [r3]=r27,16; \ | ||
221 | ;; \ | ||
222 | .mem.offset 0,0; st8.spill [r2]=r28,16; \ | ||
223 | .mem.offset 8,0; st8.spill [r3]=r29,16; \ | ||
224 | ;; \ | ||
225 | .mem.offset 0,0; st8.spill [r2]=r30,16; \ | ||
226 | .mem.offset 8,0; st8.spill [r3]=r31,32; \ | ||
227 | ;; \ | ||
228 | mov ar.fpsr=r11; /* M-unit */ \ | ||
229 | st8 [r2]=r8,8; /* ar.ccv */ \ | ||
230 | adds r24=PT(B6)-PT(F7),r3; \ | ||
231 | ;; \ | ||
232 | stf.spill [r2]=f6,32; \ | ||
233 | stf.spill [r3]=f7,32; \ | ||
234 | ;; \ | ||
235 | stf.spill [r2]=f8,32; \ | ||
236 | stf.spill [r3]=f9,32; \ | ||
237 | ;; \ | ||
238 | stf.spill [r2]=f10; \ | ||
239 | stf.spill [r3]=f11; \ | ||
240 | adds r25=PT(B7)-PT(F11),r3; \ | ||
241 | ;; \ | ||
242 | st8 [r24]=r18,16; /* b6 */ \ | ||
243 | st8 [r25]=r19,16; /* b7 */ \ | ||
244 | ;; \ | ||
245 | st8 [r24]=r9; /* ar.csd */ \ | ||
246 | st8 [r25]=r10; /* ar.ssd */ \ | ||
247 | ;; | ||
248 | |||
249 | #define SAVE_MIN_WITH_COVER DO_SAVE_MIN(cover, mov r30=cr.ifs,) | ||
250 | #define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(cover, mov r30=cr.ifs, mov r15=r19) | ||
251 | #define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, ) | ||
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c new file mode 100644 index 000000000000..febc091c2f02 --- /dev/null +++ b/arch/ia64/kernel/module.c | |||
@@ -0,0 +1,952 @@ | |||
1 | /* | ||
2 | * IA-64-specific support for kernel module loader. | ||
3 | * | ||
4 | * Copyright (C) 2003 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * | ||
7 | * Loosely based on patch by Rusty Russell. | ||
8 | */ | ||
9 | |||
10 | /* relocs tested so far: | ||
11 | |||
12 | DIR64LSB | ||
13 | FPTR64LSB | ||
14 | GPREL22 | ||
15 | LDXMOV | ||
16 | LDXMOV | ||
17 | LTOFF22 | ||
18 | LTOFF22X | ||
19 | LTOFF22X | ||
20 | LTOFF_FPTR22 | ||
21 | PCREL21B (for br.call only; br.cond is not supported out of modules!) | ||
22 | PCREL60B (for brl.cond only; brl.call is not supported for modules!) | ||
23 | PCREL64LSB | ||
24 | SECREL32LSB | ||
25 | SEGREL64LSB | ||
26 | */ | ||
27 | |||
28 | #include <linux/config.h> | ||
29 | |||
30 | #include <linux/kernel.h> | ||
31 | #include <linux/sched.h> | ||
32 | #include <linux/elf.h> | ||
33 | #include <linux/moduleloader.h> | ||
34 | #include <linux/string.h> | ||
35 | #include <linux/vmalloc.h> | ||
36 | |||
37 | #include <asm/patch.h> | ||
38 | #include <asm/unaligned.h> | ||
39 | |||
40 | #define ARCH_MODULE_DEBUG 0 | ||
41 | |||
42 | #if ARCH_MODULE_DEBUG | ||
43 | # define DEBUGP printk | ||
44 | # define inline | ||
45 | #else | ||
46 | # define DEBUGP(fmt , a...) | ||
47 | #endif | ||
48 | |||
49 | #ifdef CONFIG_ITANIUM | ||
50 | # define USE_BRL 0 | ||
51 | #else | ||
52 | # define USE_BRL 1 | ||
53 | #endif | ||
54 | |||
55 | #define MAX_LTOFF ((uint64_t) (1 << 22)) /* max. allowable linkage-table offset */ | ||
56 | |||
57 | /* Define some relocation helper macros/types: */ | ||
58 | |||
59 | #define FORMAT_SHIFT 0 | ||
60 | #define FORMAT_BITS 3 | ||
61 | #define FORMAT_MASK ((1 << FORMAT_BITS) - 1) | ||
62 | #define VALUE_SHIFT 3 | ||
63 | #define VALUE_BITS 5 | ||
64 | #define VALUE_MASK ((1 << VALUE_BITS) - 1) | ||
65 | |||
66 | enum reloc_target_format { | ||
67 | /* direct encoded formats: */ | ||
68 | RF_NONE = 0, | ||
69 | RF_INSN14 = 1, | ||
70 | RF_INSN22 = 2, | ||
71 | RF_INSN64 = 3, | ||
72 | RF_32MSB = 4, | ||
73 | RF_32LSB = 5, | ||
74 | RF_64MSB = 6, | ||
75 | RF_64LSB = 7, | ||
76 | |||
77 | /* formats that cannot be directly decoded: */ | ||
78 | RF_INSN60, | ||
79 | RF_INSN21B, /* imm21 form 1 */ | ||
80 | RF_INSN21M, /* imm21 form 2 */ | ||
81 | RF_INSN21F /* imm21 form 3 */ | ||
82 | }; | ||
83 | |||
84 | enum reloc_value_formula { | ||
85 | RV_DIRECT = 4, /* S + A */ | ||
86 | RV_GPREL = 5, /* @gprel(S + A) */ | ||
87 | RV_LTREL = 6, /* @ltoff(S + A) */ | ||
88 | RV_PLTREL = 7, /* @pltoff(S + A) */ | ||
89 | RV_FPTR = 8, /* @fptr(S + A) */ | ||
90 | RV_PCREL = 9, /* S + A - P */ | ||
91 | RV_LTREL_FPTR = 10, /* @ltoff(@fptr(S + A)) */ | ||
92 | RV_SEGREL = 11, /* @segrel(S + A) */ | ||
93 | RV_SECREL = 12, /* @secrel(S + A) */ | ||
94 | RV_BDREL = 13, /* BD + A */ | ||
95 | RV_LTV = 14, /* S + A (like RV_DIRECT, except frozen at static link-time) */ | ||
96 | RV_PCREL2 = 15, /* S + A - P */ | ||
97 | RV_SPECIAL = 16, /* various (see below) */ | ||
98 | RV_RSVD17 = 17, | ||
99 | RV_TPREL = 18, /* @tprel(S + A) */ | ||
100 | RV_LTREL_TPREL = 19, /* @ltoff(@tprel(S + A)) */ | ||
101 | RV_DTPMOD = 20, /* @dtpmod(S + A) */ | ||
102 | RV_LTREL_DTPMOD = 21, /* @ltoff(@dtpmod(S + A)) */ | ||
103 | RV_DTPREL = 22, /* @dtprel(S + A) */ | ||
104 | RV_LTREL_DTPREL = 23, /* @ltoff(@dtprel(S + A)) */ | ||
105 | RV_RSVD24 = 24, | ||
106 | RV_RSVD25 = 25, | ||
107 | RV_RSVD26 = 26, | ||
108 | RV_RSVD27 = 27 | ||
109 | /* 28-31 reserved for implementation-specific purposes. */ | ||
110 | }; | ||
111 | |||
112 | #define N(reloc) [R_IA64_##reloc] = #reloc | ||
113 | |||
114 | static const char *reloc_name[256] = { | ||
115 | N(NONE), N(IMM14), N(IMM22), N(IMM64), | ||
116 | N(DIR32MSB), N(DIR32LSB), N(DIR64MSB), N(DIR64LSB), | ||
117 | N(GPREL22), N(GPREL64I), N(GPREL32MSB), N(GPREL32LSB), | ||
118 | N(GPREL64MSB), N(GPREL64LSB), N(LTOFF22), N(LTOFF64I), | ||
119 | N(PLTOFF22), N(PLTOFF64I), N(PLTOFF64MSB), N(PLTOFF64LSB), | ||
120 | N(FPTR64I), N(FPTR32MSB), N(FPTR32LSB), N(FPTR64MSB), | ||
121 | N(FPTR64LSB), N(PCREL60B), N(PCREL21B), N(PCREL21M), | ||
122 | N(PCREL21F), N(PCREL32MSB), N(PCREL32LSB), N(PCREL64MSB), | ||
123 | N(PCREL64LSB), N(LTOFF_FPTR22), N(LTOFF_FPTR64I), N(LTOFF_FPTR32MSB), | ||
124 | N(LTOFF_FPTR32LSB), N(LTOFF_FPTR64MSB), N(LTOFF_FPTR64LSB), N(SEGREL32MSB), | ||
125 | N(SEGREL32LSB), N(SEGREL64MSB), N(SEGREL64LSB), N(SECREL32MSB), | ||
126 | N(SECREL32LSB), N(SECREL64MSB), N(SECREL64LSB), N(REL32MSB), | ||
127 | N(REL32LSB), N(REL64MSB), N(REL64LSB), N(LTV32MSB), | ||
128 | N(LTV32LSB), N(LTV64MSB), N(LTV64LSB), N(PCREL21BI), | ||
129 | N(PCREL22), N(PCREL64I), N(IPLTMSB), N(IPLTLSB), | ||
130 | N(COPY), N(LTOFF22X), N(LDXMOV), N(TPREL14), | ||
131 | N(TPREL22), N(TPREL64I), N(TPREL64MSB), N(TPREL64LSB), | ||
132 | N(LTOFF_TPREL22), N(DTPMOD64MSB), N(DTPMOD64LSB), N(LTOFF_DTPMOD22), | ||
133 | N(DTPREL14), N(DTPREL22), N(DTPREL64I), N(DTPREL32MSB), | ||
134 | N(DTPREL32LSB), N(DTPREL64MSB), N(DTPREL64LSB), N(LTOFF_DTPREL22) | ||
135 | }; | ||
136 | |||
137 | #undef N | ||
138 | |||
139 | struct got_entry { | ||
140 | uint64_t val; | ||
141 | }; | ||
142 | |||
143 | struct fdesc { | ||
144 | uint64_t ip; | ||
145 | uint64_t gp; | ||
146 | }; | ||
147 | |||
148 | /* Opaque struct for insns, to protect against derefs. */ | ||
149 | struct insn; | ||
150 | |||
151 | static inline uint64_t | ||
152 | bundle (const struct insn *insn) | ||
153 | { | ||
154 | return (uint64_t) insn & ~0xfUL; | ||
155 | } | ||
156 | |||
157 | static inline int | ||
158 | slot (const struct insn *insn) | ||
159 | { | ||
160 | return (uint64_t) insn & 0x3; | ||
161 | } | ||
162 | |||
163 | static int | ||
164 | apply_imm64 (struct module *mod, struct insn *insn, uint64_t val) | ||
165 | { | ||
166 | if (slot(insn) != 2) { | ||
167 | printk(KERN_ERR "%s: invalid slot number %d for IMM64\n", | ||
168 | mod->name, slot(insn)); | ||
169 | return 0; | ||
170 | } | ||
171 | ia64_patch_imm64((u64) insn, val); | ||
172 | return 1; | ||
173 | } | ||
174 | |||
175 | static int | ||
176 | apply_imm60 (struct module *mod, struct insn *insn, uint64_t val) | ||
177 | { | ||
178 | if (slot(insn) != 2) { | ||
179 | printk(KERN_ERR "%s: invalid slot number %d for IMM60\n", | ||
180 | mod->name, slot(insn)); | ||
181 | return 0; | ||
182 | } | ||
183 | if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) { | ||
184 | printk(KERN_ERR "%s: value %ld out of IMM60 range\n", mod->name, (int64_t) val); | ||
185 | return 0; | ||
186 | } | ||
187 | ia64_patch_imm60((u64) insn, val); | ||
188 | return 1; | ||
189 | } | ||
190 | |||
191 | static int | ||
192 | apply_imm22 (struct module *mod, struct insn *insn, uint64_t val) | ||
193 | { | ||
194 | if (val + (1 << 21) >= (1 << 22)) { | ||
195 | printk(KERN_ERR "%s: value %li out of IMM22 range\n", mod->name, (int64_t)val); | ||
196 | return 0; | ||
197 | } | ||
198 | ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ | ||
199 | | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ | ||
200 | | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ | ||
201 | | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); | ||
202 | return 1; | ||
203 | } | ||
204 | |||
205 | static int | ||
206 | apply_imm21b (struct module *mod, struct insn *insn, uint64_t val) | ||
207 | { | ||
208 | if (val + (1 << 20) >= (1 << 21)) { | ||
209 | printk(KERN_ERR "%s: value %li out of IMM21b range\n", mod->name, (int64_t)val); | ||
210 | return 0; | ||
211 | } | ||
212 | ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ | ||
213 | | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); | ||
214 | return 1; | ||
215 | } | ||
216 | |||
217 | #if USE_BRL | ||
218 | |||
219 | struct plt_entry { | ||
220 | /* Three instruction bundles in PLT. */ | ||
221 | unsigned char bundle[2][16]; | ||
222 | }; | ||
223 | |||
224 | static const struct plt_entry ia64_plt_template = { | ||
225 | { | ||
226 | { | ||
227 | 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ | ||
228 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ | ||
229 | 0x00, 0x00, 0x00, 0x60 | ||
230 | }, | ||
231 | { | ||
232 | 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ | ||
233 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* brl.many gp=TARGET_GP */ | ||
234 | 0x08, 0x00, 0x00, 0xc0 | ||
235 | } | ||
236 | } | ||
237 | }; | ||
238 | |||
239 | static int | ||
240 | patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) | ||
241 | { | ||
242 | if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp) | ||
243 | && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2), | ||
244 | (target_ip - (int64_t) plt->bundle[1]) / 16)) | ||
245 | return 1; | ||
246 | return 0; | ||
247 | } | ||
248 | |||
249 | unsigned long | ||
250 | plt_target (struct plt_entry *plt) | ||
251 | { | ||
252 | uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1]; | ||
253 | long off; | ||
254 | |||
255 | b0 = b[0]; b1 = b[1]; | ||
256 | off = ( ((b1 & 0x00fffff000000000UL) >> 36) /* imm20b -> bit 0 */ | ||
257 | | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36) /* imm39 -> bit 20 */ | ||
258 | | ((b1 & 0x0800000000000000UL) << 0)); /* i -> bit 59 */ | ||
259 | return (long) plt->bundle[1] + 16*off; | ||
260 | } | ||
261 | |||
262 | #else /* !USE_BRL */ | ||
263 | |||
264 | struct plt_entry { | ||
265 | /* Three instruction bundles in PLT. */ | ||
266 | unsigned char bundle[3][16]; | ||
267 | }; | ||
268 | |||
269 | static const struct plt_entry ia64_plt_template = { | ||
270 | { | ||
271 | { | ||
272 | 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ | ||
273 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* movl r16=TARGET_IP */ | ||
274 | 0x02, 0x00, 0x00, 0x60 | ||
275 | }, | ||
276 | { | ||
277 | 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ | ||
278 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ | ||
279 | 0x00, 0x00, 0x00, 0x60 | ||
280 | }, | ||
281 | { | ||
282 | 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */ | ||
283 | 0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */ | ||
284 | 0x60, 0x00, 0x80, 0x00 /* br.few b6 */ | ||
285 | } | ||
286 | } | ||
287 | }; | ||
288 | |||
289 | static int | ||
290 | patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) | ||
291 | { | ||
292 | if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip) | ||
293 | && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp)) | ||
294 | return 1; | ||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | unsigned long | ||
299 | plt_target (struct plt_entry *plt) | ||
300 | { | ||
301 | uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0]; | ||
302 | |||
303 | b0 = b[0]; b1 = b[1]; | ||
304 | return ( ((b1 & 0x000007f000000000) >> 36) /* imm7b -> bit 0 */ | ||
305 | | ((b1 & 0x07fc000000000000) >> 43) /* imm9d -> bit 7 */ | ||
306 | | ((b1 & 0x0003e00000000000) >> 29) /* imm5c -> bit 16 */ | ||
307 | | ((b1 & 0x0000100000000000) >> 23) /* ic -> bit 21 */ | ||
308 | | ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40) /* imm41 -> bit 22 */ | ||
309 | | ((b1 & 0x0800000000000000) << 4)); /* i -> bit 63 */ | ||
310 | } | ||
311 | |||
312 | #endif /* !USE_BRL */ | ||
313 | |||
314 | void * | ||
315 | module_alloc (unsigned long size) | ||
316 | { | ||
317 | if (!size) | ||
318 | return NULL; | ||
319 | return vmalloc(size); | ||
320 | } | ||
321 | |||
322 | void | ||
323 | module_free (struct module *mod, void *module_region) | ||
324 | { | ||
325 | if (mod->arch.init_unw_table && module_region == mod->module_init) { | ||
326 | unw_remove_unwind_table(mod->arch.init_unw_table); | ||
327 | mod->arch.init_unw_table = NULL; | ||
328 | } | ||
329 | vfree(module_region); | ||
330 | } | ||
331 | |||
332 | /* Have we already seen one of these relocations? */ | ||
333 | /* FIXME: we could look in other sections, too --RR */ | ||
334 | static int | ||
335 | duplicate_reloc (const Elf64_Rela *rela, unsigned int num) | ||
336 | { | ||
337 | unsigned int i; | ||
338 | |||
339 | for (i = 0; i < num; i++) { | ||
340 | if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend) | ||
341 | return 1; | ||
342 | } | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | /* Count how many GOT entries we may need */ | ||
347 | static unsigned int | ||
348 | count_gots (const Elf64_Rela *rela, unsigned int num) | ||
349 | { | ||
350 | unsigned int i, ret = 0; | ||
351 | |||
352 | /* Sure, this is order(n^2), but it's usually short, and not | ||
353 | time critical */ | ||
354 | for (i = 0; i < num; i++) { | ||
355 | switch (ELF64_R_TYPE(rela[i].r_info)) { | ||
356 | case R_IA64_LTOFF22: | ||
357 | case R_IA64_LTOFF22X: | ||
358 | case R_IA64_LTOFF64I: | ||
359 | case R_IA64_LTOFF_FPTR22: | ||
360 | case R_IA64_LTOFF_FPTR64I: | ||
361 | case R_IA64_LTOFF_FPTR32MSB: | ||
362 | case R_IA64_LTOFF_FPTR32LSB: | ||
363 | case R_IA64_LTOFF_FPTR64MSB: | ||
364 | case R_IA64_LTOFF_FPTR64LSB: | ||
365 | if (!duplicate_reloc(rela, i)) | ||
366 | ret++; | ||
367 | break; | ||
368 | } | ||
369 | } | ||
370 | return ret; | ||
371 | } | ||
372 | |||
373 | /* Count how many PLT entries we may need */ | ||
374 | static unsigned int | ||
375 | count_plts (const Elf64_Rela *rela, unsigned int num) | ||
376 | { | ||
377 | unsigned int i, ret = 0; | ||
378 | |||
379 | /* Sure, this is order(n^2), but it's usually short, and not | ||
380 | time critical */ | ||
381 | for (i = 0; i < num; i++) { | ||
382 | switch (ELF64_R_TYPE(rela[i].r_info)) { | ||
383 | case R_IA64_PCREL21B: | ||
384 | case R_IA64_PLTOFF22: | ||
385 | case R_IA64_PLTOFF64I: | ||
386 | case R_IA64_PLTOFF64MSB: | ||
387 | case R_IA64_PLTOFF64LSB: | ||
388 | case R_IA64_IPLTMSB: | ||
389 | case R_IA64_IPLTLSB: | ||
390 | if (!duplicate_reloc(rela, i)) | ||
391 | ret++; | ||
392 | break; | ||
393 | } | ||
394 | } | ||
395 | return ret; | ||
396 | } | ||
397 | |||
398 | /* We need to create an function-descriptors for any internal function | ||
399 | which is referenced. */ | ||
400 | static unsigned int | ||
401 | count_fdescs (const Elf64_Rela *rela, unsigned int num) | ||
402 | { | ||
403 | unsigned int i, ret = 0; | ||
404 | |||
405 | /* Sure, this is order(n^2), but it's usually short, and not time critical. */ | ||
406 | for (i = 0; i < num; i++) { | ||
407 | switch (ELF64_R_TYPE(rela[i].r_info)) { | ||
408 | case R_IA64_FPTR64I: | ||
409 | case R_IA64_FPTR32LSB: | ||
410 | case R_IA64_FPTR32MSB: | ||
411 | case R_IA64_FPTR64LSB: | ||
412 | case R_IA64_FPTR64MSB: | ||
413 | case R_IA64_LTOFF_FPTR22: | ||
414 | case R_IA64_LTOFF_FPTR32LSB: | ||
415 | case R_IA64_LTOFF_FPTR32MSB: | ||
416 | case R_IA64_LTOFF_FPTR64I: | ||
417 | case R_IA64_LTOFF_FPTR64LSB: | ||
418 | case R_IA64_LTOFF_FPTR64MSB: | ||
419 | case R_IA64_IPLTMSB: | ||
420 | case R_IA64_IPLTLSB: | ||
421 | /* | ||
422 | * Jumps to static functions sometimes go straight to their | ||
423 | * offset. Of course, that may not be possible if the jump is | ||
424 | * from init -> core or vice. versa, so we need to generate an | ||
425 | * FDESC (and PLT etc) for that. | ||
426 | */ | ||
427 | case R_IA64_PCREL21B: | ||
428 | if (!duplicate_reloc(rela, i)) | ||
429 | ret++; | ||
430 | break; | ||
431 | } | ||
432 | } | ||
433 | return ret; | ||
434 | } | ||
435 | |||
436 | int | ||
437 | module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, | ||
438 | struct module *mod) | ||
439 | { | ||
440 | unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0; | ||
441 | Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; | ||
442 | |||
443 | /* | ||
444 | * To store the PLTs and function-descriptors, we expand the .text section for | ||
445 | * core module-code and the .init.text section for initialization code. | ||
446 | */ | ||
447 | for (s = sechdrs; s < sechdrs_end; ++s) | ||
448 | if (strcmp(".core.plt", secstrings + s->sh_name) == 0) | ||
449 | mod->arch.core_plt = s; | ||
450 | else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) | ||
451 | mod->arch.init_plt = s; | ||
452 | else if (strcmp(".got", secstrings + s->sh_name) == 0) | ||
453 | mod->arch.got = s; | ||
454 | else if (strcmp(".opd", secstrings + s->sh_name) == 0) | ||
455 | mod->arch.opd = s; | ||
456 | else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0) | ||
457 | mod->arch.unwind = s; | ||
458 | |||
459 | if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) { | ||
460 | printk(KERN_ERR "%s: sections missing\n", mod->name); | ||
461 | return -ENOEXEC; | ||
462 | } | ||
463 | |||
464 | /* GOT and PLTs can occur in any relocated section... */ | ||
465 | for (s = sechdrs + 1; s < sechdrs_end; ++s) { | ||
466 | const Elf64_Rela *rels = (void *)ehdr + s->sh_offset; | ||
467 | unsigned long numrels = s->sh_size/sizeof(Elf64_Rela); | ||
468 | |||
469 | if (s->sh_type != SHT_RELA) | ||
470 | continue; | ||
471 | |||
472 | gots += count_gots(rels, numrels); | ||
473 | fdescs += count_fdescs(rels, numrels); | ||
474 | if (strstr(secstrings + s->sh_name, ".init")) | ||
475 | init_plts += count_plts(rels, numrels); | ||
476 | else | ||
477 | core_plts += count_plts(rels, numrels); | ||
478 | } | ||
479 | |||
480 | mod->arch.core_plt->sh_type = SHT_NOBITS; | ||
481 | mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; | ||
482 | mod->arch.core_plt->sh_addralign = 16; | ||
483 | mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry); | ||
484 | mod->arch.init_plt->sh_type = SHT_NOBITS; | ||
485 | mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; | ||
486 | mod->arch.init_plt->sh_addralign = 16; | ||
487 | mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry); | ||
488 | mod->arch.got->sh_type = SHT_NOBITS; | ||
489 | mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC; | ||
490 | mod->arch.got->sh_addralign = 8; | ||
491 | mod->arch.got->sh_size = gots * sizeof(struct got_entry); | ||
492 | mod->arch.opd->sh_type = SHT_NOBITS; | ||
493 | mod->arch.opd->sh_flags = SHF_ALLOC; | ||
494 | mod->arch.opd->sh_addralign = 8; | ||
495 | mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc); | ||
496 | DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n", | ||
497 | __FUNCTION__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size, | ||
498 | mod->arch.got->sh_size, mod->arch.opd->sh_size); | ||
499 | return 0; | ||
500 | } | ||
501 | |||
502 | static inline int | ||
503 | in_init (const struct module *mod, uint64_t addr) | ||
504 | { | ||
505 | return addr - (uint64_t) mod->module_init < mod->init_size; | ||
506 | } | ||
507 | |||
508 | static inline int | ||
509 | in_core (const struct module *mod, uint64_t addr) | ||
510 | { | ||
511 | return addr - (uint64_t) mod->module_core < mod->core_size; | ||
512 | } | ||
513 | |||
514 | static inline int | ||
515 | is_internal (const struct module *mod, uint64_t value) | ||
516 | { | ||
517 | return in_init(mod, value) || in_core(mod, value); | ||
518 | } | ||
519 | |||
520 | /* | ||
521 | * Get gp-relative offset for the linkage-table entry of VALUE. | ||
522 | */ | ||
523 | static uint64_t | ||
524 | get_ltoff (struct module *mod, uint64_t value, int *okp) | ||
525 | { | ||
526 | struct got_entry *got, *e; | ||
527 | |||
528 | if (!*okp) | ||
529 | return 0; | ||
530 | |||
531 | got = (void *) mod->arch.got->sh_addr; | ||
532 | for (e = got; e < got + mod->arch.next_got_entry; ++e) | ||
533 | if (e->val == value) | ||
534 | goto found; | ||
535 | |||
536 | /* Not enough GOT entries? */ | ||
537 | if (e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)) | ||
538 | BUG(); | ||
539 | |||
540 | e->val = value; | ||
541 | ++mod->arch.next_got_entry; | ||
542 | found: | ||
543 | return (uint64_t) e - mod->arch.gp; | ||
544 | } | ||
545 | |||
546 | static inline int | ||
547 | gp_addressable (struct module *mod, uint64_t value) | ||
548 | { | ||
549 | return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF; | ||
550 | } | ||
551 | |||
552 | /* Get PC-relative PLT entry for this value. Returns 0 on failure. */ | ||
553 | static uint64_t | ||
554 | get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp) | ||
555 | { | ||
556 | struct plt_entry *plt, *plt_end; | ||
557 | uint64_t target_ip, target_gp; | ||
558 | |||
559 | if (!*okp) | ||
560 | return 0; | ||
561 | |||
562 | if (in_init(mod, (uint64_t) insn)) { | ||
563 | plt = (void *) mod->arch.init_plt->sh_addr; | ||
564 | plt_end = (void *) plt + mod->arch.init_plt->sh_size; | ||
565 | } else { | ||
566 | plt = (void *) mod->arch.core_plt->sh_addr; | ||
567 | plt_end = (void *) plt + mod->arch.core_plt->sh_size; | ||
568 | } | ||
569 | |||
570 | /* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */ | ||
571 | target_ip = ((uint64_t *) value)[0]; | ||
572 | target_gp = ((uint64_t *) value)[1]; | ||
573 | |||
574 | /* Look for existing PLT entry. */ | ||
575 | while (plt->bundle[0][0]) { | ||
576 | if (plt_target(plt) == target_ip) | ||
577 | goto found; | ||
578 | if (++plt >= plt_end) | ||
579 | BUG(); | ||
580 | } | ||
581 | *plt = ia64_plt_template; | ||
582 | if (!patch_plt(mod, plt, target_ip, target_gp)) { | ||
583 | *okp = 0; | ||
584 | return 0; | ||
585 | } | ||
586 | #if ARCH_MODULE_DEBUG | ||
587 | if (plt_target(plt) != target_ip) { | ||
588 | printk("%s: mistargeted PLT: wanted %lx, got %lx\n", | ||
589 | __FUNCTION__, target_ip, plt_target(plt)); | ||
590 | *okp = 0; | ||
591 | return 0; | ||
592 | } | ||
593 | #endif | ||
594 | found: | ||
595 | return (uint64_t) plt; | ||
596 | } | ||
597 | |||
598 | /* Get function descriptor for VALUE. */ | ||
599 | static uint64_t | ||
600 | get_fdesc (struct module *mod, uint64_t value, int *okp) | ||
601 | { | ||
602 | struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr; | ||
603 | |||
604 | if (!*okp) | ||
605 | return 0; | ||
606 | |||
607 | if (!value) { | ||
608 | printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name); | ||
609 | return 0; | ||
610 | } | ||
611 | |||
612 | if (!is_internal(mod, value)) | ||
613 | /* | ||
614 | * If it's not a module-local entry-point, "value" already points to a | ||
615 | * function-descriptor. | ||
616 | */ | ||
617 | return value; | ||
618 | |||
619 | /* Look for existing function descriptor. */ | ||
620 | while (fdesc->ip) { | ||
621 | if (fdesc->ip == value) | ||
622 | return (uint64_t)fdesc; | ||
623 | if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) | ||
624 | BUG(); | ||
625 | } | ||
626 | |||
627 | /* Create new one */ | ||
628 | fdesc->ip = value; | ||
629 | fdesc->gp = mod->arch.gp; | ||
630 | return (uint64_t) fdesc; | ||
631 | } | ||
632 | |||
633 | static inline int | ||
634 | do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, | ||
635 | Elf64_Shdr *sec, void *location) | ||
636 | { | ||
637 | enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK; | ||
638 | enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK; | ||
639 | uint64_t val; | ||
640 | int ok = 1; | ||
641 | |||
642 | val = sym->st_value + addend; | ||
643 | |||
644 | switch (formula) { | ||
645 | case RV_SEGREL: /* segment base is arbitrarily chosen to be 0 for kernel modules */ | ||
646 | case RV_DIRECT: | ||
647 | break; | ||
648 | |||
649 | case RV_GPREL: val -= mod->arch.gp; break; | ||
650 | case RV_LTREL: val = get_ltoff(mod, val, &ok); break; | ||
651 | case RV_PLTREL: val = get_plt(mod, location, val, &ok); break; | ||
652 | case RV_FPTR: val = get_fdesc(mod, val, &ok); break; | ||
653 | case RV_SECREL: val -= sec->sh_addr; break; | ||
654 | case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break; | ||
655 | |||
656 | case RV_PCREL: | ||
657 | switch (r_type) { | ||
658 | case R_IA64_PCREL21B: | ||
659 | if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) || | ||
660 | (in_core(mod, val) && in_init(mod, (uint64_t)location))) { | ||
661 | /* | ||
662 | * Init section may have been allocated far away from core, | ||
663 | * if the branch won't reach, then allocate a plt for it. | ||
664 | */ | ||
665 | uint64_t delta = ((int64_t)val - (int64_t)location) / 16; | ||
666 | if (delta + (1 << 20) >= (1 << 21)) { | ||
667 | val = get_fdesc(mod, val, &ok); | ||
668 | val = get_plt(mod, location, val, &ok); | ||
669 | } | ||
670 | } else if (!is_internal(mod, val)) | ||
671 | val = get_plt(mod, location, val, &ok); | ||
672 | /* FALL THROUGH */ | ||
673 | default: | ||
674 | val -= bundle(location); | ||
675 | break; | ||
676 | |||
677 | case R_IA64_PCREL32MSB: | ||
678 | case R_IA64_PCREL32LSB: | ||
679 | case R_IA64_PCREL64MSB: | ||
680 | case R_IA64_PCREL64LSB: | ||
681 | val -= (uint64_t) location; | ||
682 | break; | ||
683 | |||
684 | } | ||
685 | switch (r_type) { | ||
686 | case R_IA64_PCREL60B: format = RF_INSN60; break; | ||
687 | case R_IA64_PCREL21B: format = RF_INSN21B; break; | ||
688 | case R_IA64_PCREL21M: format = RF_INSN21M; break; | ||
689 | case R_IA64_PCREL21F: format = RF_INSN21F; break; | ||
690 | default: break; | ||
691 | } | ||
692 | break; | ||
693 | |||
694 | case RV_BDREL: | ||
695 | val -= (uint64_t) (in_init(mod, val) ? mod->module_init : mod->module_core); | ||
696 | break; | ||
697 | |||
698 | case RV_LTV: | ||
699 | /* can link-time value relocs happen here? */ | ||
700 | BUG(); | ||
701 | break; | ||
702 | |||
703 | case RV_PCREL2: | ||
704 | if (r_type == R_IA64_PCREL21BI) { | ||
705 | if (!is_internal(mod, val)) { | ||
706 | printk(KERN_ERR "%s: %s reloc against non-local symbol (%lx)\n", | ||
707 | __FUNCTION__, reloc_name[r_type], val); | ||
708 | return -ENOEXEC; | ||
709 | } | ||
710 | format = RF_INSN21B; | ||
711 | } | ||
712 | val -= bundle(location); | ||
713 | break; | ||
714 | |||
715 | case RV_SPECIAL: | ||
716 | switch (r_type) { | ||
717 | case R_IA64_IPLTMSB: | ||
718 | case R_IA64_IPLTLSB: | ||
719 | val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok); | ||
720 | format = RF_64LSB; | ||
721 | if (r_type == R_IA64_IPLTMSB) | ||
722 | format = RF_64MSB; | ||
723 | break; | ||
724 | |||
725 | case R_IA64_SUB: | ||
726 | val = addend - sym->st_value; | ||
727 | format = RF_INSN64; | ||
728 | break; | ||
729 | |||
730 | case R_IA64_LTOFF22X: | ||
731 | if (gp_addressable(mod, val)) | ||
732 | val -= mod->arch.gp; | ||
733 | else | ||
734 | val = get_ltoff(mod, val, &ok); | ||
735 | format = RF_INSN22; | ||
736 | break; | ||
737 | |||
738 | case R_IA64_LDXMOV: | ||
739 | if (gp_addressable(mod, val)) { | ||
740 | /* turn "ld8" into "mov": */ | ||
741 | DEBUGP("%s: patching ld8 at %p to mov\n", __FUNCTION__, location); | ||
742 | ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL); | ||
743 | } | ||
744 | return 0; | ||
745 | |||
746 | default: | ||
747 | if (reloc_name[r_type]) | ||
748 | printk(KERN_ERR "%s: special reloc %s not supported", | ||
749 | mod->name, reloc_name[r_type]); | ||
750 | else | ||
751 | printk(KERN_ERR "%s: unknown special reloc %x\n", | ||
752 | mod->name, r_type); | ||
753 | return -ENOEXEC; | ||
754 | } | ||
755 | break; | ||
756 | |||
757 | case RV_TPREL: | ||
758 | case RV_LTREL_TPREL: | ||
759 | case RV_DTPMOD: | ||
760 | case RV_LTREL_DTPMOD: | ||
761 | case RV_DTPREL: | ||
762 | case RV_LTREL_DTPREL: | ||
763 | printk(KERN_ERR "%s: %s reloc not supported\n", | ||
764 | mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?"); | ||
765 | return -ENOEXEC; | ||
766 | |||
767 | default: | ||
768 | printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type); | ||
769 | return -ENOEXEC; | ||
770 | } | ||
771 | |||
772 | if (!ok) | ||
773 | return -ENOEXEC; | ||
774 | |||
775 | DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __FUNCTION__, location, val, | ||
776 | reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend); | ||
777 | |||
778 | switch (format) { | ||
779 | case RF_INSN21B: ok = apply_imm21b(mod, location, (int64_t) val / 16); break; | ||
780 | case RF_INSN22: ok = apply_imm22(mod, location, val); break; | ||
781 | case RF_INSN64: ok = apply_imm64(mod, location, val); break; | ||
782 | case RF_INSN60: ok = apply_imm60(mod, location, (int64_t) val / 16); break; | ||
783 | case RF_32LSB: put_unaligned(val, (uint32_t *) location); break; | ||
784 | case RF_64LSB: put_unaligned(val, (uint64_t *) location); break; | ||
785 | case RF_32MSB: /* ia64 Linux is little-endian... */ | ||
786 | case RF_64MSB: /* ia64 Linux is little-endian... */ | ||
787 | case RF_INSN14: /* must be within-module, i.e., resolved by "ld -r" */ | ||
788 | case RF_INSN21M: /* must be within-module, i.e., resolved by "ld -r" */ | ||
789 | case RF_INSN21F: /* must be within-module, i.e., resolved by "ld -r" */ | ||
790 | printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n", | ||
791 | mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?"); | ||
792 | return -ENOEXEC; | ||
793 | |||
794 | default: | ||
795 | printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n", | ||
796 | mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format); | ||
797 | return -ENOEXEC; | ||
798 | } | ||
799 | return ok ? 0 : -ENOEXEC; | ||
800 | } | ||
801 | |||
802 | int | ||
803 | apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, | ||
804 | unsigned int relsec, struct module *mod) | ||
805 | { | ||
806 | unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela); | ||
807 | Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr; | ||
808 | Elf64_Shdr *target_sec; | ||
809 | int ret; | ||
810 | |||
811 | DEBUGP("%s: applying section %u (%u relocs) to %u\n", __FUNCTION__, | ||
812 | relsec, n, sechdrs[relsec].sh_info); | ||
813 | |||
814 | target_sec = sechdrs + sechdrs[relsec].sh_info; | ||
815 | |||
816 | if (target_sec->sh_entsize == ~0UL) | ||
817 | /* | ||
818 | * If target section wasn't allocated, we don't need to relocate it. | ||
819 | * Happens, e.g., for debug sections. | ||
820 | */ | ||
821 | return 0; | ||
822 | |||
823 | if (!mod->arch.gp) { | ||
824 | /* | ||
825 | * XXX Should have an arch-hook for running this after final section | ||
826 | * addresses have been selected... | ||
827 | */ | ||
828 | /* See if gp can cover the entire core module: */ | ||
829 | uint64_t gp = (uint64_t) mod->module_core + MAX_LTOFF / 2; | ||
830 | if (mod->core_size >= MAX_LTOFF) | ||
831 | /* | ||
832 | * This takes advantage of fact that SHF_ARCH_SMALL gets allocated | ||
833 | * at the end of the module. | ||
834 | */ | ||
835 | gp = (uint64_t) mod->module_core + mod->core_size - MAX_LTOFF / 2; | ||
836 | mod->arch.gp = gp; | ||
837 | DEBUGP("%s: placing gp at 0x%lx\n", __FUNCTION__, gp); | ||
838 | } | ||
839 | |||
840 | for (i = 0; i < n; i++) { | ||
841 | ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info), | ||
842 | ((Elf64_Sym *) sechdrs[symindex].sh_addr | ||
843 | + ELF64_R_SYM(rela[i].r_info)), | ||
844 | rela[i].r_addend, target_sec, | ||
845 | (void *) target_sec->sh_addr + rela[i].r_offset); | ||
846 | if (ret < 0) | ||
847 | return ret; | ||
848 | } | ||
849 | return 0; | ||
850 | } | ||
851 | |||
852 | int | ||
853 | apply_relocate (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, | ||
854 | unsigned int relsec, struct module *mod) | ||
855 | { | ||
856 | printk(KERN_ERR "module %s: REL relocs in section %u unsupported\n", mod->name, relsec); | ||
857 | return -ENOEXEC; | ||
858 | } | ||
859 | |||
860 | /* | ||
861 | * Modules contain a single unwind table which covers both the core and the init text | ||
862 | * sections but since the two are not contiguous, we need to split this table up such that | ||
863 | * we can register (and unregister) each "segment" seperately. Fortunately, this sounds | ||
864 | * more complicated than it really is. | ||
865 | */ | ||
866 | static void | ||
867 | register_unwind_table (struct module *mod) | ||
868 | { | ||
869 | struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr; | ||
870 | struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start); | ||
871 | struct unw_table_entry tmp, *e1, *e2, *core, *init; | ||
872 | unsigned long num_init = 0, num_core = 0; | ||
873 | |||
874 | /* First, count how many init and core unwind-table entries there are. */ | ||
875 | for (e1 = start; e1 < end; ++e1) | ||
876 | if (in_init(mod, e1->start_offset)) | ||
877 | ++num_init; | ||
878 | else | ||
879 | ++num_core; | ||
880 | /* | ||
881 | * Second, sort the table such that all unwind-table entries for the init and core | ||
882 | * text sections are nicely separated. We do this with a stupid bubble sort | ||
883 | * (unwind tables don't get ridiculously huge). | ||
884 | */ | ||
885 | for (e1 = start; e1 < end; ++e1) { | ||
886 | for (e2 = e1 + 1; e2 < end; ++e2) { | ||
887 | if (e2->start_offset < e1->start_offset) { | ||
888 | tmp = *e1; | ||
889 | *e1 = *e2; | ||
890 | *e2 = tmp; | ||
891 | } | ||
892 | } | ||
893 | } | ||
894 | /* | ||
895 | * Third, locate the init and core segments in the unwind table: | ||
896 | */ | ||
897 | if (in_init(mod, start->start_offset)) { | ||
898 | init = start; | ||
899 | core = start + num_init; | ||
900 | } else { | ||
901 | core = start; | ||
902 | init = start + num_core; | ||
903 | } | ||
904 | |||
905 | DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __FUNCTION__, | ||
906 | mod->name, mod->arch.gp, num_init, num_core); | ||
907 | |||
908 | /* | ||
909 | * Fourth, register both tables (if not empty). | ||
910 | */ | ||
911 | if (num_core > 0) { | ||
912 | mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, | ||
913 | core, core + num_core); | ||
914 | DEBUGP("%s: core: handle=%p [%p-%p)\n", __FUNCTION__, | ||
915 | mod->arch.core_unw_table, core, core + num_core); | ||
916 | } | ||
917 | if (num_init > 0) { | ||
918 | mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, | ||
919 | init, init + num_init); | ||
920 | DEBUGP("%s: init: handle=%p [%p-%p)\n", __FUNCTION__, | ||
921 | mod->arch.init_unw_table, init, init + num_init); | ||
922 | } | ||
923 | } | ||
924 | |||
925 | int | ||
926 | module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) | ||
927 | { | ||
928 | DEBUGP("%s: init: entry=%p\n", __FUNCTION__, mod->init); | ||
929 | if (mod->arch.unwind) | ||
930 | register_unwind_table(mod); | ||
931 | return 0; | ||
932 | } | ||
933 | |||
934 | void | ||
935 | module_arch_cleanup (struct module *mod) | ||
936 | { | ||
937 | if (mod->arch.init_unw_table) | ||
938 | unw_remove_unwind_table(mod->arch.init_unw_table); | ||
939 | if (mod->arch.core_unw_table) | ||
940 | unw_remove_unwind_table(mod->arch.core_unw_table); | ||
941 | } | ||
942 | |||
943 | #ifdef CONFIG_SMP | ||
944 | void | ||
945 | percpu_modcopy (void *pcpudst, const void *src, unsigned long size) | ||
946 | { | ||
947 | unsigned int i; | ||
948 | for (i = 0; i < NR_CPUS; i++) | ||
949 | if (cpu_possible(i)) | ||
950 | memcpy(pcpudst + __per_cpu_offset[i], src, size); | ||
951 | } | ||
952 | #endif /* CONFIG_SMP */ | ||
diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S new file mode 100644 index 000000000000..5018c7f2e7a8 --- /dev/null +++ b/arch/ia64/kernel/pal.S | |||
@@ -0,0 +1,302 @@ | |||
1 | /* | ||
2 | * PAL Firmware support | ||
3 | * IA-64 Processor Programmers Reference Vol 2 | ||
4 | * | ||
5 | * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> | ||
6 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
7 | * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co | ||
8 | * David Mosberger <davidm@hpl.hp.com> | ||
9 | * Stephane Eranian <eranian@hpl.hp.com> | ||
10 | * | ||
11 | * 05/22/2000 eranian Added support for stacked register calls | ||
12 | * 05/24/2000 eranian Added support for physical mode static calls | ||
13 | */ | ||
14 | |||
15 | #include <asm/asmmacro.h> | ||
16 | #include <asm/processor.h> | ||
17 | |||
18 | .data | ||
19 | pal_entry_point: | ||
20 | data8 ia64_pal_default_handler | ||
21 | .text | ||
22 | |||
23 | /* | ||
24 | * Set the PAL entry point address. This could be written in C code, but we do it here | ||
25 | * to keep it all in one module (besides, it's so trivial that it's | ||
26 | * not a big deal). | ||
27 | * | ||
28 | * in0 Address of the PAL entry point (text address, NOT a function descriptor). | ||
29 | */ | ||
30 | GLOBAL_ENTRY(ia64_pal_handler_init) | ||
31 | alloc r3=ar.pfs,1,0,0,0 | ||
32 | movl r2=pal_entry_point | ||
33 | ;; | ||
34 | st8 [r2]=in0 | ||
35 | br.ret.sptk.many rp | ||
36 | END(ia64_pal_handler_init) | ||
37 | |||
38 | /* | ||
39 | * Default PAL call handler. This needs to be coded in assembly because it uses | ||
40 | * the static calling convention, i.e., the RSE may not be used and calls are | ||
41 | * done via "br.cond" (not "br.call"). | ||
42 | */ | ||
43 | GLOBAL_ENTRY(ia64_pal_default_handler) | ||
44 | mov r8=-1 | ||
45 | br.cond.sptk.many rp | ||
46 | END(ia64_pal_default_handler) | ||
47 | |||
48 | /* | ||
49 | * Make a PAL call using the static calling convention. | ||
50 | * | ||
51 | * in0 Index of PAL service | ||
52 | * in1 - in3 Remaining PAL arguments | ||
53 | * in4 1 ==> clear psr.ic, 0 ==> don't clear psr.ic | ||
54 | * | ||
55 | */ | ||
56 | GLOBAL_ENTRY(ia64_pal_call_static) | ||
57 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) | ||
58 | alloc loc1 = ar.pfs,5,5,0,0 | ||
59 | movl loc2 = pal_entry_point | ||
60 | 1: { | ||
61 | mov r28 = in0 | ||
62 | mov r29 = in1 | ||
63 | mov r8 = ip | ||
64 | } | ||
65 | ;; | ||
66 | ld8 loc2 = [loc2] // loc2 <- entry point | ||
67 | tbit.nz p6,p7 = in4, 0 | ||
68 | adds r8 = 1f-1b,r8 | ||
69 | mov loc4=ar.rsc // save RSE configuration | ||
70 | ;; | ||
71 | mov ar.rsc=0 // put RSE in enforced lazy, LE mode | ||
72 | mov loc3 = psr | ||
73 | mov loc0 = rp | ||
74 | .body | ||
75 | mov r30 = in2 | ||
76 | |||
77 | (p6) rsm psr.i | psr.ic | ||
78 | mov r31 = in3 | ||
79 | mov b7 = loc2 | ||
80 | |||
81 | (p7) rsm psr.i | ||
82 | ;; | ||
83 | (p6) srlz.i | ||
84 | mov rp = r8 | ||
85 | br.cond.sptk.many b7 | ||
86 | 1: mov psr.l = loc3 | ||
87 | mov ar.rsc = loc4 // restore RSE configuration | ||
88 | mov ar.pfs = loc1 | ||
89 | mov rp = loc0 | ||
90 | ;; | ||
91 | srlz.d // seralize restoration of psr.l | ||
92 | br.ret.sptk.many b0 | ||
93 | END(ia64_pal_call_static) | ||
94 | |||
95 | /* | ||
96 | * Make a PAL call using the stacked registers calling convention. | ||
97 | * | ||
98 | * Inputs: | ||
99 | * in0 Index of PAL service | ||
100 | * in2 - in3 Remaning PAL arguments | ||
101 | */ | ||
102 | GLOBAL_ENTRY(ia64_pal_call_stacked) | ||
103 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) | ||
104 | alloc loc1 = ar.pfs,4,4,4,0 | ||
105 | movl loc2 = pal_entry_point | ||
106 | |||
107 | mov r28 = in0 // Index MUST be copied to r28 | ||
108 | mov out0 = in0 // AND in0 of PAL function | ||
109 | mov loc0 = rp | ||
110 | .body | ||
111 | ;; | ||
112 | ld8 loc2 = [loc2] // loc2 <- entry point | ||
113 | mov out1 = in1 | ||
114 | mov out2 = in2 | ||
115 | mov out3 = in3 | ||
116 | mov loc3 = psr | ||
117 | ;; | ||
118 | rsm psr.i | ||
119 | mov b7 = loc2 | ||
120 | ;; | ||
121 | br.call.sptk.many rp=b7 // now make the call | ||
122 | .ret0: mov psr.l = loc3 | ||
123 | mov ar.pfs = loc1 | ||
124 | mov rp = loc0 | ||
125 | ;; | ||
126 | srlz.d // serialize restoration of psr.l | ||
127 | br.ret.sptk.many b0 | ||
128 | END(ia64_pal_call_stacked) | ||
129 | |||
130 | /* | ||
131 | * Make a physical mode PAL call using the static registers calling convention. | ||
132 | * | ||
133 | * Inputs: | ||
134 | * in0 Index of PAL service | ||
135 | * in2 - in3 Remaning PAL arguments | ||
136 | * | ||
137 | * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel. | ||
138 | * So we don't need to clear them. | ||
139 | */ | ||
140 | #define PAL_PSR_BITS_TO_CLEAR \ | ||
141 | (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT | \ | ||
142 | IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ | ||
143 | IA64_PSR_DFL | IA64_PSR_DFH) | ||
144 | |||
145 | #define PAL_PSR_BITS_TO_SET \ | ||
146 | (IA64_PSR_BN) | ||
147 | |||
148 | |||
149 | GLOBAL_ENTRY(ia64_pal_call_phys_static) | ||
150 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) | ||
151 | alloc loc1 = ar.pfs,4,7,0,0 | ||
152 | movl loc2 = pal_entry_point | ||
153 | 1: { | ||
154 | mov r28 = in0 // copy procedure index | ||
155 | mov r8 = ip // save ip to compute branch | ||
156 | mov loc0 = rp // save rp | ||
157 | } | ||
158 | .body | ||
159 | ;; | ||
160 | ld8 loc2 = [loc2] // loc2 <- entry point | ||
161 | mov r29 = in1 // first argument | ||
162 | mov r30 = in2 // copy arg2 | ||
163 | mov r31 = in3 // copy arg3 | ||
164 | ;; | ||
165 | mov loc3 = psr // save psr | ||
166 | adds r8 = 1f-1b,r8 // calculate return address for call | ||
167 | ;; | ||
168 | mov loc4=ar.rsc // save RSE configuration | ||
169 | dep.z loc2=loc2,0,61 // convert pal entry point to physical | ||
170 | tpa r8=r8 // convert rp to physical | ||
171 | ;; | ||
172 | mov b7 = loc2 // install target to branch reg | ||
173 | mov ar.rsc=0 // put RSE in enforced lazy, LE mode | ||
174 | movl r16=PAL_PSR_BITS_TO_CLEAR | ||
175 | movl r17=PAL_PSR_BITS_TO_SET | ||
176 | ;; | ||
177 | or loc3=loc3,r17 // add in psr the bits to set | ||
178 | ;; | ||
179 | andcm r16=loc3,r16 // removes bits to clear from psr | ||
180 | br.call.sptk.many rp=ia64_switch_mode_phys | ||
181 | .ret1: mov rp = r8 // install return address (physical) | ||
182 | mov loc5 = r19 | ||
183 | mov loc6 = r20 | ||
184 | br.cond.sptk.many b7 | ||
185 | 1: | ||
186 | mov ar.rsc=0 // put RSE in enforced lazy, LE mode | ||
187 | mov r16=loc3 // r16= original psr | ||
188 | mov r19=loc5 | ||
189 | mov r20=loc6 | ||
190 | br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode | ||
191 | .ret2: | ||
192 | mov psr.l = loc3 // restore init PSR | ||
193 | |||
194 | mov ar.pfs = loc1 | ||
195 | mov rp = loc0 | ||
196 | ;; | ||
197 | mov ar.rsc=loc4 // restore RSE configuration | ||
198 | srlz.d // seralize restoration of psr.l | ||
199 | br.ret.sptk.many b0 | ||
200 | END(ia64_pal_call_phys_static) | ||
201 | |||
202 | /* | ||
203 | * Make a PAL call using the stacked registers in physical mode. | ||
204 | * | ||
205 | * Inputs: | ||
206 | * in0 Index of PAL service | ||
207 | * in2 - in3 Remaning PAL arguments | ||
208 | */ | ||
209 | GLOBAL_ENTRY(ia64_pal_call_phys_stacked) | ||
210 | .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) | ||
211 | alloc loc1 = ar.pfs,5,7,4,0 | ||
212 | movl loc2 = pal_entry_point | ||
213 | 1: { | ||
214 | mov r28 = in0 // copy procedure index | ||
215 | mov loc0 = rp // save rp | ||
216 | } | ||
217 | .body | ||
218 | ;; | ||
219 | ld8 loc2 = [loc2] // loc2 <- entry point | ||
220 | mov out0 = in0 // first argument | ||
221 | mov out1 = in1 // copy arg2 | ||
222 | mov out2 = in2 // copy arg3 | ||
223 | mov out3 = in3 // copy arg3 | ||
224 | ;; | ||
225 | mov loc3 = psr // save psr | ||
226 | ;; | ||
227 | mov loc4=ar.rsc // save RSE configuration | ||
228 | dep.z loc2=loc2,0,61 // convert pal entry point to physical | ||
229 | ;; | ||
230 | mov ar.rsc=0 // put RSE in enforced lazy, LE mode | ||
231 | movl r16=PAL_PSR_BITS_TO_CLEAR | ||
232 | movl r17=PAL_PSR_BITS_TO_SET | ||
233 | ;; | ||
234 | or loc3=loc3,r17 // add in psr the bits to set | ||
235 | mov b7 = loc2 // install target to branch reg | ||
236 | ;; | ||
237 | andcm r16=loc3,r16 // removes bits to clear from psr | ||
238 | br.call.sptk.many rp=ia64_switch_mode_phys | ||
239 | .ret6: | ||
240 | mov loc5 = r19 | ||
241 | mov loc6 = r20 | ||
242 | br.call.sptk.many rp=b7 // now make the call | ||
243 | .ret7: | ||
244 | mov ar.rsc=0 // put RSE in enforced lazy, LE mode | ||
245 | mov r16=loc3 // r16= original psr | ||
246 | mov r19=loc5 | ||
247 | mov r20=loc6 | ||
248 | br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode | ||
249 | |||
250 | .ret8: mov psr.l = loc3 // restore init PSR | ||
251 | mov ar.pfs = loc1 | ||
252 | mov rp = loc0 | ||
253 | ;; | ||
254 | mov ar.rsc=loc4 // restore RSE configuration | ||
255 | srlz.d // seralize restoration of psr.l | ||
256 | br.ret.sptk.many b0 | ||
257 | END(ia64_pal_call_phys_stacked) | ||
258 | |||
259 | /* | ||
260 | * Save scratch fp scratch regs which aren't saved in pt_regs already (fp10-fp15). | ||
261 | * | ||
262 | * NOTE: We need to do this since firmware (SAL and PAL) may use any of the scratch | ||
263 | * regs fp-low partition. | ||
264 | * | ||
265 | * Inputs: | ||
266 | * in0 Address of stack storage for fp regs | ||
267 | */ | ||
268 | GLOBAL_ENTRY(ia64_save_scratch_fpregs) | ||
269 | alloc r3=ar.pfs,1,0,0,0 | ||
270 | add r2=16,in0 | ||
271 | ;; | ||
272 | stf.spill [in0] = f10,32 | ||
273 | stf.spill [r2] = f11,32 | ||
274 | ;; | ||
275 | stf.spill [in0] = f12,32 | ||
276 | stf.spill [r2] = f13,32 | ||
277 | ;; | ||
278 | stf.spill [in0] = f14,32 | ||
279 | stf.spill [r2] = f15,32 | ||
280 | br.ret.sptk.many rp | ||
281 | END(ia64_save_scratch_fpregs) | ||
282 | |||
283 | /* | ||
284 | * Load scratch fp scratch regs (fp10-fp15) | ||
285 | * | ||
286 | * Inputs: | ||
287 | * in0 Address of stack storage for fp regs | ||
288 | */ | ||
289 | GLOBAL_ENTRY(ia64_load_scratch_fpregs) | ||
290 | alloc r3=ar.pfs,1,0,0,0 | ||
291 | add r2=16,in0 | ||
292 | ;; | ||
293 | ldf.fill f10 = [in0],32 | ||
294 | ldf.fill f11 = [r2],32 | ||
295 | ;; | ||
296 | ldf.fill f12 = [in0],32 | ||
297 | ldf.fill f13 = [r2],32 | ||
298 | ;; | ||
299 | ldf.fill f14 = [in0],32 | ||
300 | ldf.fill f15 = [r2],32 | ||
301 | br.ret.sptk.many rp | ||
302 | END(ia64_load_scratch_fpregs) | ||
diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c new file mode 100644 index 000000000000..25e7c8344564 --- /dev/null +++ b/arch/ia64/kernel/palinfo.c | |||
@@ -0,0 +1,1023 @@ | |||
1 | /* | ||
2 | * palinfo.c | ||
3 | * | ||
4 | * Prints processor specific information reported by PAL. | ||
5 | * This code is based on specification of PAL as of the | ||
6 | * Intel IA-64 Architecture Software Developer's Manual v1.0. | ||
7 | * | ||
8 | * | ||
9 | * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co | ||
10 | * Stephane Eranian <eranian@hpl.hp.com> | ||
11 | * Copyright (C) 2004 Intel Corporation | ||
12 | * Ashok Raj <ashok.raj@intel.com> | ||
13 | * | ||
14 | * 05/26/2000 S.Eranian initial release | ||
15 | * 08/21/2000 S.Eranian updated to July 2000 PAL specs | ||
16 | * 02/05/2001 S.Eranian fixed module support | ||
17 | * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes | ||
18 | * 03/24/2004 Ashok Raj updated to work with CPU Hotplug | ||
19 | */ | ||
20 | #include <linux/config.h> | ||
21 | #include <linux/types.h> | ||
22 | #include <linux/errno.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/proc_fs.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/module.h> | ||
27 | #include <linux/efi.h> | ||
28 | #include <linux/notifier.h> | ||
29 | #include <linux/cpu.h> | ||
30 | #include <linux/cpumask.h> | ||
31 | |||
32 | #include <asm/pal.h> | ||
33 | #include <asm/sal.h> | ||
34 | #include <asm/page.h> | ||
35 | #include <asm/processor.h> | ||
36 | #include <linux/smp.h> | ||
37 | |||
38 | MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | ||
39 | MODULE_DESCRIPTION("/proc interface to IA-64 PAL"); | ||
40 | MODULE_LICENSE("GPL"); | ||
41 | |||
42 | #define PALINFO_VERSION "0.5" | ||
43 | |||
44 | typedef int (*palinfo_func_t)(char*); | ||
45 | |||
46 | typedef struct { | ||
47 | const char *name; /* name of the proc entry */ | ||
48 | palinfo_func_t proc_read; /* function to call for reading */ | ||
49 | struct proc_dir_entry *entry; /* registered entry (removal) */ | ||
50 | } palinfo_entry_t; | ||
51 | |||
52 | |||
53 | /* | ||
54 | * A bunch of string array to get pretty printing | ||
55 | */ | ||
56 | |||
57 | static char *cache_types[] = { | ||
58 | "", /* not used */ | ||
59 | "Instruction", | ||
60 | "Data", | ||
61 | "Data/Instruction" /* unified */ | ||
62 | }; | ||
63 | |||
64 | static const char *cache_mattrib[]={ | ||
65 | "WriteThrough", | ||
66 | "WriteBack", | ||
67 | "", /* reserved */ | ||
68 | "" /* reserved */ | ||
69 | }; | ||
70 | |||
71 | static const char *cache_st_hints[]={ | ||
72 | "Temporal, level 1", | ||
73 | "Reserved", | ||
74 | "Reserved", | ||
75 | "Non-temporal, all levels", | ||
76 | "Reserved", | ||
77 | "Reserved", | ||
78 | "Reserved", | ||
79 | "Reserved" | ||
80 | }; | ||
81 | |||
82 | static const char *cache_ld_hints[]={ | ||
83 | "Temporal, level 1", | ||
84 | "Non-temporal, level 1", | ||
85 | "Reserved", | ||
86 | "Non-temporal, all levels", | ||
87 | "Reserved", | ||
88 | "Reserved", | ||
89 | "Reserved", | ||
90 | "Reserved" | ||
91 | }; | ||
92 | |||
93 | static const char *rse_hints[]={ | ||
94 | "enforced lazy", | ||
95 | "eager stores", | ||
96 | "eager loads", | ||
97 | "eager loads and stores" | ||
98 | }; | ||
99 | |||
100 | #define RSE_HINTS_COUNT ARRAY_SIZE(rse_hints) | ||
101 | |||
102 | static const char *mem_attrib[]={ | ||
103 | "WB", /* 000 */ | ||
104 | "SW", /* 001 */ | ||
105 | "010", /* 010 */ | ||
106 | "011", /* 011 */ | ||
107 | "UC", /* 100 */ | ||
108 | "UCE", /* 101 */ | ||
109 | "WC", /* 110 */ | ||
110 | "NaTPage" /* 111 */ | ||
111 | }; | ||
112 | |||
113 | /* | ||
114 | * Take a 64bit vector and produces a string such that | ||
115 | * if bit n is set then 2^n in clear text is generated. The adjustment | ||
116 | * to the right unit is also done. | ||
117 | * | ||
118 | * Input: | ||
119 | * - a pointer to a buffer to hold the string | ||
120 | * - a 64-bit vector | ||
121 | * Ouput: | ||
122 | * - a pointer to the end of the buffer | ||
123 | * | ||
124 | */ | ||
125 | static char * | ||
126 | bitvector_process(char *p, u64 vector) | ||
127 | { | ||
128 | int i,j; | ||
129 | const char *units[]={ "", "K", "M", "G", "T" }; | ||
130 | |||
131 | for (i=0, j=0; i < 64; i++ , j=i/10) { | ||
132 | if (vector & 0x1) { | ||
133 | p += sprintf(p, "%d%s ", 1 << (i-j*10), units[j]); | ||
134 | } | ||
135 | vector >>= 1; | ||
136 | } | ||
137 | return p; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Take a 64bit vector and produces a string such that | ||
142 | * if bit n is set then register n is present. The function | ||
143 | * takes into account consecutive registers and prints out ranges. | ||
144 | * | ||
145 | * Input: | ||
146 | * - a pointer to a buffer to hold the string | ||
147 | * - a 64-bit vector | ||
148 | * Ouput: | ||
149 | * - a pointer to the end of the buffer | ||
150 | * | ||
151 | */ | ||
152 | static char * | ||
153 | bitregister_process(char *p, u64 *reg_info, int max) | ||
154 | { | ||
155 | int i, begin, skip = 0; | ||
156 | u64 value = reg_info[0]; | ||
157 | |||
158 | value >>= i = begin = ffs(value) - 1; | ||
159 | |||
160 | for(; i < max; i++ ) { | ||
161 | |||
162 | if (i != 0 && (i%64) == 0) value = *++reg_info; | ||
163 | |||
164 | if ((value & 0x1) == 0 && skip == 0) { | ||
165 | if (begin <= i - 2) | ||
166 | p += sprintf(p, "%d-%d ", begin, i-1); | ||
167 | else | ||
168 | p += sprintf(p, "%d ", i-1); | ||
169 | skip = 1; | ||
170 | begin = -1; | ||
171 | } else if ((value & 0x1) && skip == 1) { | ||
172 | skip = 0; | ||
173 | begin = i; | ||
174 | } | ||
175 | value >>=1; | ||
176 | } | ||
177 | if (begin > -1) { | ||
178 | if (begin < 127) | ||
179 | p += sprintf(p, "%d-127", begin); | ||
180 | else | ||
181 | p += sprintf(p, "127"); | ||
182 | } | ||
183 | |||
184 | return p; | ||
185 | } | ||
186 | |||
187 | static int | ||
188 | power_info(char *page) | ||
189 | { | ||
190 | s64 status; | ||
191 | char *p = page; | ||
192 | u64 halt_info_buffer[8]; | ||
193 | pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer; | ||
194 | int i; | ||
195 | |||
196 | status = ia64_pal_halt_info(halt_info); | ||
197 | if (status != 0) return 0; | ||
198 | |||
199 | for (i=0; i < 8 ; i++ ) { | ||
200 | if (halt_info[i].pal_power_mgmt_info_s.im == 1) { | ||
201 | p += sprintf(p, "Power level %d:\n" | ||
202 | "\tentry_latency : %d cycles\n" | ||
203 | "\texit_latency : %d cycles\n" | ||
204 | "\tpower consumption : %d mW\n" | ||
205 | "\tCache+TLB coherency : %s\n", i, | ||
206 | halt_info[i].pal_power_mgmt_info_s.entry_latency, | ||
207 | halt_info[i].pal_power_mgmt_info_s.exit_latency, | ||
208 | halt_info[i].pal_power_mgmt_info_s.power_consumption, | ||
209 | halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No"); | ||
210 | } else { | ||
211 | p += sprintf(p,"Power level %d: not implemented\n",i); | ||
212 | } | ||
213 | } | ||
214 | return p - page; | ||
215 | } | ||
216 | |||
217 | static int | ||
218 | cache_info(char *page) | ||
219 | { | ||
220 | char *p = page; | ||
221 | u64 i, levels, unique_caches; | ||
222 | pal_cache_config_info_t cci; | ||
223 | int j, k; | ||
224 | s64 status; | ||
225 | |||
226 | if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { | ||
227 | printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); | ||
228 | return 0; | ||
229 | } | ||
230 | |||
231 | p += sprintf(p, "Cache levels : %ld\nUnique caches : %ld\n\n", levels, unique_caches); | ||
232 | |||
233 | for (i=0; i < levels; i++) { | ||
234 | |||
235 | for (j=2; j >0 ; j--) { | ||
236 | |||
237 | /* even without unification some level may not be present */ | ||
238 | if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) { | ||
239 | continue; | ||
240 | } | ||
241 | p += sprintf(p, | ||
242 | "%s Cache level %lu:\n" | ||
243 | "\tSize : %lu bytes\n" | ||
244 | "\tAttributes : ", | ||
245 | cache_types[j+cci.pcci_unified], i+1, | ||
246 | cci.pcci_cache_size); | ||
247 | |||
248 | if (cci.pcci_unified) p += sprintf(p, "Unified "); | ||
249 | |||
250 | p += sprintf(p, "%s\n", cache_mattrib[cci.pcci_cache_attr]); | ||
251 | |||
252 | p += sprintf(p, | ||
253 | "\tAssociativity : %d\n" | ||
254 | "\tLine size : %d bytes\n" | ||
255 | "\tStride : %d bytes\n", | ||
256 | cci.pcci_assoc, 1<<cci.pcci_line_size, 1<<cci.pcci_stride); | ||
257 | if (j == 1) | ||
258 | p += sprintf(p, "\tStore latency : N/A\n"); | ||
259 | else | ||
260 | p += sprintf(p, "\tStore latency : %d cycle(s)\n", | ||
261 | cci.pcci_st_latency); | ||
262 | |||
263 | p += sprintf(p, | ||
264 | "\tLoad latency : %d cycle(s)\n" | ||
265 | "\tStore hints : ", cci.pcci_ld_latency); | ||
266 | |||
267 | for(k=0; k < 8; k++ ) { | ||
268 | if ( cci.pcci_st_hints & 0x1) | ||
269 | p += sprintf(p, "[%s]", cache_st_hints[k]); | ||
270 | cci.pcci_st_hints >>=1; | ||
271 | } | ||
272 | p += sprintf(p, "\n\tLoad hints : "); | ||
273 | |||
274 | for(k=0; k < 8; k++ ) { | ||
275 | if (cci.pcci_ld_hints & 0x1) | ||
276 | p += sprintf(p, "[%s]", cache_ld_hints[k]); | ||
277 | cci.pcci_ld_hints >>=1; | ||
278 | } | ||
279 | p += sprintf(p, | ||
280 | "\n\tAlias boundary : %d byte(s)\n" | ||
281 | "\tTag LSB : %d\n" | ||
282 | "\tTag MSB : %d\n", | ||
283 | 1<<cci.pcci_alias_boundary, cci.pcci_tag_lsb, | ||
284 | cci.pcci_tag_msb); | ||
285 | |||
286 | /* when unified, data(j=2) is enough */ | ||
287 | if (cci.pcci_unified) break; | ||
288 | } | ||
289 | } | ||
290 | return p - page; | ||
291 | } | ||
292 | |||
293 | |||
294 | static int | ||
295 | vm_info(char *page) | ||
296 | { | ||
297 | char *p = page; | ||
298 | u64 tr_pages =0, vw_pages=0, tc_pages; | ||
299 | u64 attrib; | ||
300 | pal_vm_info_1_u_t vm_info_1; | ||
301 | pal_vm_info_2_u_t vm_info_2; | ||
302 | pal_tc_info_u_t tc_info; | ||
303 | ia64_ptce_info_t ptce; | ||
304 | const char *sep; | ||
305 | int i, j; | ||
306 | s64 status; | ||
307 | |||
308 | if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { | ||
309 | printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); | ||
310 | return 0; | ||
311 | } | ||
312 | |||
313 | |||
314 | p += sprintf(p, | ||
315 | "Physical Address Space : %d bits\n" | ||
316 | "Virtual Address Space : %d bits\n" | ||
317 | "Protection Key Registers(PKR) : %d\n" | ||
318 | "Implemented bits in PKR.key : %d\n" | ||
319 | "Hash Tag ID : 0x%x\n" | ||
320 | "Size of RR.rid : %d\n", | ||
321 | vm_info_1.pal_vm_info_1_s.phys_add_size, | ||
322 | vm_info_2.pal_vm_info_2_s.impl_va_msb+1, vm_info_1.pal_vm_info_1_s.max_pkr+1, | ||
323 | vm_info_1.pal_vm_info_1_s.key_size, vm_info_1.pal_vm_info_1_s.hash_tag_id, | ||
324 | vm_info_2.pal_vm_info_2_s.rid_size); | ||
325 | |||
326 | if (ia64_pal_mem_attrib(&attrib) != 0) | ||
327 | return 0; | ||
328 | |||
329 | p += sprintf(p, "Supported memory attributes : "); | ||
330 | sep = ""; | ||
331 | for (i = 0; i < 8; i++) { | ||
332 | if (attrib & (1 << i)) { | ||
333 | p += sprintf(p, "%s%s", sep, mem_attrib[i]); | ||
334 | sep = ", "; | ||
335 | } | ||
336 | } | ||
337 | p += sprintf(p, "\n"); | ||
338 | |||
339 | if ((status = ia64_pal_vm_page_size(&tr_pages, &vw_pages)) !=0) { | ||
340 | printk(KERN_ERR "ia64_pal_vm_page_size=%ld\n", status); | ||
341 | return 0; | ||
342 | } | ||
343 | |||
344 | p += sprintf(p, | ||
345 | "\nTLB walker : %simplemented\n" | ||
346 | "Number of DTR : %d\n" | ||
347 | "Number of ITR : %d\n" | ||
348 | "TLB insertable page sizes : ", | ||
349 | vm_info_1.pal_vm_info_1_s.vw ? "" : "not ", | ||
350 | vm_info_1.pal_vm_info_1_s.max_dtr_entry+1, | ||
351 | vm_info_1.pal_vm_info_1_s.max_itr_entry+1); | ||
352 | |||
353 | |||
354 | p = bitvector_process(p, tr_pages); | ||
355 | |||
356 | p += sprintf(p, "\nTLB purgeable page sizes : "); | ||
357 | |||
358 | p = bitvector_process(p, vw_pages); | ||
359 | |||
360 | if ((status=ia64_get_ptce(&ptce)) != 0) { | ||
361 | printk(KERN_ERR "ia64_get_ptce=%ld\n", status); | ||
362 | return 0; | ||
363 | } | ||
364 | |||
365 | p += sprintf(p, | ||
366 | "\nPurge base address : 0x%016lx\n" | ||
367 | "Purge outer loop count : %d\n" | ||
368 | "Purge inner loop count : %d\n" | ||
369 | "Purge outer loop stride : %d\n" | ||
370 | "Purge inner loop stride : %d\n", | ||
371 | ptce.base, ptce.count[0], ptce.count[1], ptce.stride[0], ptce.stride[1]); | ||
372 | |||
373 | p += sprintf(p, | ||
374 | "TC Levels : %d\n" | ||
375 | "Unique TC(s) : %d\n", | ||
376 | vm_info_1.pal_vm_info_1_s.num_tc_levels, | ||
377 | vm_info_1.pal_vm_info_1_s.max_unique_tcs); | ||
378 | |||
379 | for(i=0; i < vm_info_1.pal_vm_info_1_s.num_tc_levels; i++) { | ||
380 | for (j=2; j>0 ; j--) { | ||
381 | tc_pages = 0; /* just in case */ | ||
382 | |||
383 | |||
384 | /* even without unification, some levels may not be present */ | ||
385 | if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) { | ||
386 | continue; | ||
387 | } | ||
388 | |||
389 | p += sprintf(p, | ||
390 | "\n%s Translation Cache Level %d:\n" | ||
391 | "\tHash sets : %d\n" | ||
392 | "\tAssociativity : %d\n" | ||
393 | "\tNumber of entries : %d\n" | ||
394 | "\tFlags : ", | ||
395 | cache_types[j+tc_info.tc_unified], i+1, tc_info.tc_num_sets, | ||
396 | tc_info.tc_associativity, tc_info.tc_num_entries); | ||
397 | |||
398 | if (tc_info.tc_pf) p += sprintf(p, "PreferredPageSizeOptimized "); | ||
399 | if (tc_info.tc_unified) p += sprintf(p, "Unified "); | ||
400 | if (tc_info.tc_reduce_tr) p += sprintf(p, "TCReduction"); | ||
401 | |||
402 | p += sprintf(p, "\n\tSupported page sizes: "); | ||
403 | |||
404 | p = bitvector_process(p, tc_pages); | ||
405 | |||
406 | /* when unified date (j=2) is enough */ | ||
407 | if (tc_info.tc_unified) break; | ||
408 | } | ||
409 | } | ||
410 | p += sprintf(p, "\n"); | ||
411 | |||
412 | return p - page; | ||
413 | } | ||
414 | |||
415 | |||
416 | static int | ||
417 | register_info(char *page) | ||
418 | { | ||
419 | char *p = page; | ||
420 | u64 reg_info[2]; | ||
421 | u64 info; | ||
422 | u64 phys_stacked; | ||
423 | pal_hints_u_t hints; | ||
424 | u64 iregs, dregs; | ||
425 | char *info_type[]={ | ||
426 | "Implemented AR(s)", | ||
427 | "AR(s) with read side-effects", | ||
428 | "Implemented CR(s)", | ||
429 | "CR(s) with read side-effects", | ||
430 | }; | ||
431 | |||
432 | for(info=0; info < 4; info++) { | ||
433 | |||
434 | if (ia64_pal_register_info(info, ®_info[0], ®_info[1]) != 0) return 0; | ||
435 | |||
436 | p += sprintf(p, "%-32s : ", info_type[info]); | ||
437 | |||
438 | p = bitregister_process(p, reg_info, 128); | ||
439 | |||
440 | p += sprintf(p, "\n"); | ||
441 | } | ||
442 | |||
443 | if (ia64_pal_rse_info(&phys_stacked, &hints) != 0) return 0; | ||
444 | |||
445 | p += sprintf(p, | ||
446 | "RSE stacked physical registers : %ld\n" | ||
447 | "RSE load/store hints : %ld (%s)\n", | ||
448 | phys_stacked, hints.ph_data, | ||
449 | hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)"); | ||
450 | |||
451 | if (ia64_pal_debug_info(&iregs, &dregs)) | ||
452 | return 0; | ||
453 | |||
454 | p += sprintf(p, | ||
455 | "Instruction debug register pairs : %ld\n" | ||
456 | "Data debug register pairs : %ld\n", iregs, dregs); | ||
457 | |||
458 | return p - page; | ||
459 | } | ||
460 | |||
461 | static const char *proc_features[]={ | ||
462 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, | ||
463 | NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, | ||
464 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, | ||
465 | NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL, | ||
466 | NULL,NULL,NULL,NULL,NULL, | ||
467 | "XIP,XPSR,XFS implemented", | ||
468 | "XR1-XR3 implemented", | ||
469 | "Disable dynamic predicate prediction", | ||
470 | "Disable processor physical number", | ||
471 | "Disable dynamic data cache prefetch", | ||
472 | "Disable dynamic inst cache prefetch", | ||
473 | "Disable dynamic branch prediction", | ||
474 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
475 | "Disable BINIT on processor time-out", | ||
476 | "Disable dynamic power management (DPM)", | ||
477 | "Disable coherency", | ||
478 | "Disable cache", | ||
479 | "Enable CMCI promotion", | ||
480 | "Enable MCA to BINIT promotion", | ||
481 | "Enable MCA promotion", | ||
482 | "Enable BERR promotion" | ||
483 | }; | ||
484 | |||
485 | |||
486 | static int | ||
487 | processor_info(char *page) | ||
488 | { | ||
489 | char *p = page; | ||
490 | const char **v = proc_features; | ||
491 | u64 avail=1, status=1, control=1; | ||
492 | int i; | ||
493 | s64 ret; | ||
494 | |||
495 | if ((ret=ia64_pal_proc_get_features(&avail, &status, &control)) != 0) return 0; | ||
496 | |||
497 | for(i=0; i < 64; i++, v++,avail >>=1, status >>=1, control >>=1) { | ||
498 | if ( ! *v ) continue; | ||
499 | p += sprintf(p, "%-40s : %s%s %s\n", *v, | ||
500 | avail & 0x1 ? "" : "NotImpl", | ||
501 | avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", | ||
502 | avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); | ||
503 | } | ||
504 | return p - page; | ||
505 | } | ||
506 | |||
507 | static const char *bus_features[]={ | ||
508 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, | ||
509 | NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, | ||
510 | NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, | ||
511 | NULL,NULL, | ||
512 | "Request Bus Parking", | ||
513 | "Bus Lock Mask", | ||
514 | "Enable Half Transfer", | ||
515 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
516 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | ||
517 | NULL, NULL, NULL, NULL, | ||
518 | "Enable Cache Line Repl. Shared", | ||
519 | "Enable Cache Line Repl. Exclusive", | ||
520 | "Disable Transaction Queuing", | ||
521 | "Disable Response Error Checking", | ||
522 | "Disable Bus Error Checking", | ||
523 | "Disable Bus Requester Internal Error Signalling", | ||
524 | "Disable Bus Requester Error Signalling", | ||
525 | "Disable Bus Initialization Event Checking", | ||
526 | "Disable Bus Initialization Event Signalling", | ||
527 | "Disable Bus Address Error Checking", | ||
528 | "Disable Bus Address Error Signalling", | ||
529 | "Disable Bus Data Error Checking" | ||
530 | }; | ||
531 | |||
532 | |||
533 | static int | ||
534 | bus_info(char *page) | ||
535 | { | ||
536 | char *p = page; | ||
537 | const char **v = bus_features; | ||
538 | pal_bus_features_u_t av, st, ct; | ||
539 | u64 avail, status, control; | ||
540 | int i; | ||
541 | s64 ret; | ||
542 | |||
543 | if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) return 0; | ||
544 | |||
545 | avail = av.pal_bus_features_val; | ||
546 | status = st.pal_bus_features_val; | ||
547 | control = ct.pal_bus_features_val; | ||
548 | |||
549 | for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) { | ||
550 | if ( ! *v ) continue; | ||
551 | p += sprintf(p, "%-48s : %s%s %s\n", *v, | ||
552 | avail & 0x1 ? "" : "NotImpl", | ||
553 | avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", | ||
554 | avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); | ||
555 | } | ||
556 | return p - page; | ||
557 | } | ||
558 | |||
559 | static int | ||
560 | version_info(char *page) | ||
561 | { | ||
562 | pal_version_u_t min_ver, cur_ver; | ||
563 | char *p = page; | ||
564 | |||
565 | /* The PAL_VERSION call is advertised as being able to support | ||
566 | * both physical and virtual mode calls. This seems to be a documentation | ||
567 | * bug rather than firmware bug. In fact, it does only support physical mode. | ||
568 | * So now the code reflects this fact and the pal_version() has been updated | ||
569 | * accordingly. | ||
570 | */ | ||
571 | if (ia64_pal_version(&min_ver, &cur_ver) != 0) return 0; | ||
572 | |||
573 | p += sprintf(p, | ||
574 | "PAL_vendor : 0x%02x (min=0x%02x)\n" | ||
575 | "PAL_A : %x.%x.%x (min=%x.%x.%x)\n" | ||
576 | "PAL_B : %x.%x.%x (min=%x.%x.%x)\n", | ||
577 | cur_ver.pal_version_s.pv_pal_vendor, min_ver.pal_version_s.pv_pal_vendor, | ||
578 | |||
579 | cur_ver.pal_version_s.pv_pal_a_model>>4, | ||
580 | cur_ver.pal_version_s.pv_pal_a_model&0xf, cur_ver.pal_version_s.pv_pal_a_rev, | ||
581 | min_ver.pal_version_s.pv_pal_a_model>>4, | ||
582 | min_ver.pal_version_s.pv_pal_a_model&0xf, min_ver.pal_version_s.pv_pal_a_rev, | ||
583 | |||
584 | cur_ver.pal_version_s.pv_pal_b_model>>4, | ||
585 | cur_ver.pal_version_s.pv_pal_b_model&0xf, cur_ver.pal_version_s.pv_pal_b_rev, | ||
586 | min_ver.pal_version_s.pv_pal_b_model>>4, | ||
587 | min_ver.pal_version_s.pv_pal_b_model&0xf, min_ver.pal_version_s.pv_pal_b_rev); | ||
588 | return p - page; | ||
589 | } | ||
590 | |||
591 | static int | ||
592 | perfmon_info(char *page) | ||
593 | { | ||
594 | char *p = page; | ||
595 | u64 pm_buffer[16]; | ||
596 | pal_perf_mon_info_u_t pm_info; | ||
597 | |||
598 | if (ia64_pal_perf_mon_info(pm_buffer, &pm_info) != 0) return 0; | ||
599 | |||
600 | p += sprintf(p, | ||
601 | "PMC/PMD pairs : %d\n" | ||
602 | "Counter width : %d bits\n" | ||
603 | "Cycle event number : %d\n" | ||
604 | "Retired event number : %d\n" | ||
605 | "Implemented PMC : ", | ||
606 | pm_info.pal_perf_mon_info_s.generic, pm_info.pal_perf_mon_info_s.width, | ||
607 | pm_info.pal_perf_mon_info_s.cycles, pm_info.pal_perf_mon_info_s.retired); | ||
608 | |||
609 | p = bitregister_process(p, pm_buffer, 256); | ||
610 | p += sprintf(p, "\nImplemented PMD : "); | ||
611 | p = bitregister_process(p, pm_buffer+4, 256); | ||
612 | p += sprintf(p, "\nCycles count capable : "); | ||
613 | p = bitregister_process(p, pm_buffer+8, 256); | ||
614 | p += sprintf(p, "\nRetired bundles count capable : "); | ||
615 | |||
616 | #ifdef CONFIG_ITANIUM | ||
617 | /* | ||
618 | * PAL_PERF_MON_INFO reports that only PMC4 can be used to count CPU_CYCLES | ||
619 | * which is wrong, both PMC4 and PMD5 support it. | ||
620 | */ | ||
621 | if (pm_buffer[12] == 0x10) pm_buffer[12]=0x30; | ||
622 | #endif | ||
623 | |||
624 | p = bitregister_process(p, pm_buffer+12, 256); | ||
625 | |||
626 | p += sprintf(p, "\n"); | ||
627 | |||
628 | return p - page; | ||
629 | } | ||
630 | |||
631 | static int | ||
632 | frequency_info(char *page) | ||
633 | { | ||
634 | char *p = page; | ||
635 | struct pal_freq_ratio proc, itc, bus; | ||
636 | u64 base; | ||
637 | |||
638 | if (ia64_pal_freq_base(&base) == -1) | ||
639 | p += sprintf(p, "Output clock : not implemented\n"); | ||
640 | else | ||
641 | p += sprintf(p, "Output clock : %ld ticks/s\n", base); | ||
642 | |||
643 | if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0; | ||
644 | |||
645 | p += sprintf(p, | ||
646 | "Processor/Clock ratio : %ld/%ld\n" | ||
647 | "Bus/Clock ratio : %ld/%ld\n" | ||
648 | "ITC/Clock ratio : %ld/%ld\n", | ||
649 | proc.num, proc.den, bus.num, bus.den, itc.num, itc.den); | ||
650 | |||
651 | return p - page; | ||
652 | } | ||
653 | |||
654 | static int | ||
655 | tr_info(char *page) | ||
656 | { | ||
657 | char *p = page; | ||
658 | s64 status; | ||
659 | pal_tr_valid_u_t tr_valid; | ||
660 | u64 tr_buffer[4]; | ||
661 | pal_vm_info_1_u_t vm_info_1; | ||
662 | pal_vm_info_2_u_t vm_info_2; | ||
663 | u64 i, j; | ||
664 | u64 max[3], pgm; | ||
665 | struct ifa_reg { | ||
666 | u64 valid:1; | ||
667 | u64 ig:11; | ||
668 | u64 vpn:52; | ||
669 | } *ifa_reg; | ||
670 | struct itir_reg { | ||
671 | u64 rv1:2; | ||
672 | u64 ps:6; | ||
673 | u64 key:24; | ||
674 | u64 rv2:32; | ||
675 | } *itir_reg; | ||
676 | struct gr_reg { | ||
677 | u64 p:1; | ||
678 | u64 rv1:1; | ||
679 | u64 ma:3; | ||
680 | u64 a:1; | ||
681 | u64 d:1; | ||
682 | u64 pl:2; | ||
683 | u64 ar:3; | ||
684 | u64 ppn:38; | ||
685 | u64 rv2:2; | ||
686 | u64 ed:1; | ||
687 | u64 ig:11; | ||
688 | } *gr_reg; | ||
689 | struct rid_reg { | ||
690 | u64 ig1:1; | ||
691 | u64 rv1:1; | ||
692 | u64 ig2:6; | ||
693 | u64 rid:24; | ||
694 | u64 rv2:32; | ||
695 | } *rid_reg; | ||
696 | |||
697 | if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { | ||
698 | printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); | ||
699 | return 0; | ||
700 | } | ||
701 | max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; | ||
702 | max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; | ||
703 | |||
704 | for (i=0; i < 2; i++ ) { | ||
705 | for (j=0; j < max[i]; j++) { | ||
706 | |||
707 | status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid); | ||
708 | if (status != 0) { | ||
709 | printk(KERN_ERR "palinfo: pal call failed on tr[%lu:%lu]=%ld\n", | ||
710 | i, j, status); | ||
711 | continue; | ||
712 | } | ||
713 | |||
714 | ifa_reg = (struct ifa_reg *)&tr_buffer[2]; | ||
715 | |||
716 | if (ifa_reg->valid == 0) continue; | ||
717 | |||
718 | gr_reg = (struct gr_reg *)tr_buffer; | ||
719 | itir_reg = (struct itir_reg *)&tr_buffer[1]; | ||
720 | rid_reg = (struct rid_reg *)&tr_buffer[3]; | ||
721 | |||
722 | pgm = -1 << (itir_reg->ps - 12); | ||
723 | p += sprintf(p, | ||
724 | "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n" | ||
725 | "\tppn : 0x%lx\n" | ||
726 | "\tvpn : 0x%lx\n" | ||
727 | "\tps : ", | ||
728 | "ID"[i], j, | ||
729 | tr_valid.pal_tr_valid_s.access_rights_valid, | ||
730 | tr_valid.pal_tr_valid_s.priv_level_valid, | ||
731 | tr_valid.pal_tr_valid_s.dirty_bit_valid, | ||
732 | tr_valid.pal_tr_valid_s.mem_attr_valid, | ||
733 | (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12); | ||
734 | |||
735 | p = bitvector_process(p, 1<< itir_reg->ps); | ||
736 | |||
737 | p += sprintf(p, | ||
738 | "\n\tpl : %d\n" | ||
739 | "\tar : %d\n" | ||
740 | "\trid : %x\n" | ||
741 | "\tp : %d\n" | ||
742 | "\tma : %d\n" | ||
743 | "\td : %d\n", | ||
744 | gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma, | ||
745 | gr_reg->d); | ||
746 | } | ||
747 | } | ||
748 | return p - page; | ||
749 | } | ||
750 | |||
751 | |||
752 | |||
753 | /* | ||
754 | * List {name,function} pairs for every entry in /proc/palinfo/cpu* | ||
755 | */ | ||
756 | static palinfo_entry_t palinfo_entries[]={ | ||
757 | { "version_info", version_info, }, | ||
758 | { "vm_info", vm_info, }, | ||
759 | { "cache_info", cache_info, }, | ||
760 | { "power_info", power_info, }, | ||
761 | { "register_info", register_info, }, | ||
762 | { "processor_info", processor_info, }, | ||
763 | { "perfmon_info", perfmon_info, }, | ||
764 | { "frequency_info", frequency_info, }, | ||
765 | { "bus_info", bus_info }, | ||
766 | { "tr_info", tr_info, } | ||
767 | }; | ||
768 | |||
769 | #define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) | ||
770 | |||
771 | /* | ||
772 | * this array is used to keep track of the proc entries we create. This is | ||
773 | * required in the module mode when we need to remove all entries. The procfs code | ||
774 | * does not do recursion of deletion | ||
775 | * | ||
776 | * Notes: | ||
777 | * - +1 accounts for the cpuN directory entry in /proc/pal | ||
778 | */ | ||
779 | #define NR_PALINFO_PROC_ENTRIES (NR_CPUS*(NR_PALINFO_ENTRIES+1)) | ||
780 | |||
781 | static struct proc_dir_entry *palinfo_proc_entries[NR_PALINFO_PROC_ENTRIES]; | ||
782 | static struct proc_dir_entry *palinfo_dir; | ||
783 | |||
784 | /* | ||
785 | * This data structure is used to pass which cpu,function is being requested | ||
786 | * It must fit in a 64bit quantity to be passed to the proc callback routine | ||
787 | * | ||
788 | * In SMP mode, when we get a request for another CPU, we must call that | ||
789 | * other CPU using IPI and wait for the result before returning. | ||
790 | */ | ||
791 | typedef union { | ||
792 | u64 value; | ||
793 | struct { | ||
794 | unsigned req_cpu: 32; /* for which CPU this info is */ | ||
795 | unsigned func_id: 32; /* which function is requested */ | ||
796 | } pal_func_cpu; | ||
797 | } pal_func_cpu_u_t; | ||
798 | |||
799 | #define req_cpu pal_func_cpu.req_cpu | ||
800 | #define func_id pal_func_cpu.func_id | ||
801 | |||
802 | #ifdef CONFIG_SMP | ||
803 | |||
804 | /* | ||
805 | * used to hold information about final function to call | ||
806 | */ | ||
807 | typedef struct { | ||
808 | palinfo_func_t func; /* pointer to function to call */ | ||
809 | char *page; /* buffer to store results */ | ||
810 | int ret; /* return value from call */ | ||
811 | } palinfo_smp_data_t; | ||
812 | |||
813 | |||
814 | /* | ||
815 | * this function does the actual final call and he called | ||
816 | * from the smp code, i.e., this is the palinfo callback routine | ||
817 | */ | ||
818 | static void | ||
819 | palinfo_smp_call(void *info) | ||
820 | { | ||
821 | palinfo_smp_data_t *data = (palinfo_smp_data_t *)info; | ||
822 | if (data == NULL) { | ||
823 | printk(KERN_ERR "palinfo: data pointer is NULL\n"); | ||
824 | data->ret = 0; /* no output */ | ||
825 | return; | ||
826 | } | ||
827 | /* does this actual call */ | ||
828 | data->ret = (*data->func)(data->page); | ||
829 | } | ||
830 | |||
831 | /* | ||
832 | * function called to trigger the IPI, we need to access a remote CPU | ||
833 | * Return: | ||
834 | * 0 : error or nothing to output | ||
835 | * otherwise how many bytes in the "page" buffer were written | ||
836 | */ | ||
837 | static | ||
838 | int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) | ||
839 | { | ||
840 | palinfo_smp_data_t ptr; | ||
841 | int ret; | ||
842 | |||
843 | ptr.func = palinfo_entries[f->func_id].proc_read; | ||
844 | ptr.page = page; | ||
845 | ptr.ret = 0; /* just in case */ | ||
846 | |||
847 | |||
848 | /* will send IPI to other CPU and wait for completion of remote call */ | ||
849 | if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 0, 1))) { | ||
850 | printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: " | ||
851 | "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret); | ||
852 | return 0; | ||
853 | } | ||
854 | return ptr.ret; | ||
855 | } | ||
856 | #else /* ! CONFIG_SMP */ | ||
857 | static | ||
858 | int palinfo_handle_smp(pal_func_cpu_u_t *f, char *page) | ||
859 | { | ||
860 | printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n"); | ||
861 | return 0; | ||
862 | } | ||
863 | #endif /* CONFIG_SMP */ | ||
864 | |||
865 | /* | ||
866 | * Entry point routine: all calls go through this function | ||
867 | */ | ||
868 | static int | ||
869 | palinfo_read_entry(char *page, char **start, off_t off, int count, int *eof, void *data) | ||
870 | { | ||
871 | int len=0; | ||
872 | pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&data; | ||
873 | |||
874 | /* | ||
875 | * in SMP mode, we may need to call another CPU to get correct | ||
876 | * information. PAL, by definition, is processor specific | ||
877 | */ | ||
878 | if (f->req_cpu == get_cpu()) | ||
879 | len = (*palinfo_entries[f->func_id].proc_read)(page); | ||
880 | else | ||
881 | len = palinfo_handle_smp(f, page); | ||
882 | |||
883 | put_cpu(); | ||
884 | |||
885 | if (len <= off+count) *eof = 1; | ||
886 | |||
887 | *start = page + off; | ||
888 | len -= off; | ||
889 | |||
890 | if (len>count) len = count; | ||
891 | if (len<0) len = 0; | ||
892 | |||
893 | return len; | ||
894 | } | ||
895 | |||
896 | static void | ||
897 | create_palinfo_proc_entries(unsigned int cpu) | ||
898 | { | ||
899 | # define CPUSTR "cpu%d" | ||
900 | |||
901 | pal_func_cpu_u_t f; | ||
902 | struct proc_dir_entry **pdir; | ||
903 | struct proc_dir_entry *cpu_dir; | ||
904 | int j; | ||
905 | char cpustr[sizeof(CPUSTR)]; | ||
906 | |||
907 | |||
908 | /* | ||
909 | * we keep track of created entries in a depth-first order for | ||
910 | * cleanup purposes. Each entry is stored into palinfo_proc_entries | ||
911 | */ | ||
912 | sprintf(cpustr,CPUSTR, cpu); | ||
913 | |||
914 | cpu_dir = proc_mkdir(cpustr, palinfo_dir); | ||
915 | |||
916 | f.req_cpu = cpu; | ||
917 | |||
918 | /* | ||
919 | * Compute the location to store per cpu entries | ||
920 | * We dont store the top level entry in this list, but | ||
921 | * remove it finally after removing all cpu entries. | ||
922 | */ | ||
923 | pdir = &palinfo_proc_entries[cpu*(NR_PALINFO_ENTRIES+1)]; | ||
924 | *pdir++ = cpu_dir; | ||
925 | for (j=0; j < NR_PALINFO_ENTRIES; j++) { | ||
926 | f.func_id = j; | ||
927 | *pdir = create_proc_read_entry( | ||
928 | palinfo_entries[j].name, 0, cpu_dir, | ||
929 | palinfo_read_entry, (void *)f.value); | ||
930 | if (*pdir) | ||
931 | (*pdir)->owner = THIS_MODULE; | ||
932 | pdir++; | ||
933 | } | ||
934 | } | ||
935 | |||
936 | static void | ||
937 | remove_palinfo_proc_entries(unsigned int hcpu) | ||
938 | { | ||
939 | int j; | ||
940 | struct proc_dir_entry *cpu_dir, **pdir; | ||
941 | |||
942 | pdir = &palinfo_proc_entries[hcpu*(NR_PALINFO_ENTRIES+1)]; | ||
943 | cpu_dir = *pdir; | ||
944 | *pdir++=NULL; | ||
945 | for (j=0; j < (NR_PALINFO_ENTRIES); j++) { | ||
946 | if ((*pdir)) { | ||
947 | remove_proc_entry ((*pdir)->name, cpu_dir); | ||
948 | *pdir ++= NULL; | ||
949 | } | ||
950 | } | ||
951 | |||
952 | if (cpu_dir) { | ||
953 | remove_proc_entry(cpu_dir->name, palinfo_dir); | ||
954 | } | ||
955 | } | ||
956 | |||
957 | static int __devinit palinfo_cpu_callback(struct notifier_block *nfb, | ||
958 | unsigned long action, | ||
959 | void *hcpu) | ||
960 | { | ||
961 | unsigned int hotcpu = (unsigned long)hcpu; | ||
962 | |||
963 | switch (action) { | ||
964 | case CPU_ONLINE: | ||
965 | create_palinfo_proc_entries(hotcpu); | ||
966 | break; | ||
967 | #ifdef CONFIG_HOTPLUG_CPU | ||
968 | case CPU_DEAD: | ||
969 | remove_palinfo_proc_entries(hotcpu); | ||
970 | break; | ||
971 | #endif | ||
972 | } | ||
973 | return NOTIFY_OK; | ||
974 | } | ||
975 | |||
976 | static struct notifier_block palinfo_cpu_notifier = | ||
977 | { | ||
978 | .notifier_call = palinfo_cpu_callback, | ||
979 | .priority = 0, | ||
980 | }; | ||
981 | |||
982 | static int __init | ||
983 | palinfo_init(void) | ||
984 | { | ||
985 | int i = 0; | ||
986 | |||
987 | printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); | ||
988 | palinfo_dir = proc_mkdir("pal", NULL); | ||
989 | |||
990 | /* Create palinfo dirs in /proc for all online cpus */ | ||
991 | for_each_online_cpu(i) { | ||
992 | create_palinfo_proc_entries(i); | ||
993 | } | ||
994 | |||
995 | /* Register for future delivery via notify registration */ | ||
996 | register_cpu_notifier(&palinfo_cpu_notifier); | ||
997 | |||
998 | return 0; | ||
999 | } | ||
1000 | |||
1001 | static void __exit | ||
1002 | palinfo_exit(void) | ||
1003 | { | ||
1004 | int i = 0; | ||
1005 | |||
1006 | /* remove all nodes: depth first pass. Could optimize this */ | ||
1007 | for_each_online_cpu(i) { | ||
1008 | remove_palinfo_proc_entries(i); | ||
1009 | } | ||
1010 | |||
1011 | /* | ||
1012 | * Remove the top level entry finally | ||
1013 | */ | ||
1014 | remove_proc_entry(palinfo_dir->name, NULL); | ||
1015 | |||
1016 | /* | ||
1017 | * Unregister from cpu notifier callbacks | ||
1018 | */ | ||
1019 | unregister_cpu_notifier(&palinfo_cpu_notifier); | ||
1020 | } | ||
1021 | |||
1022 | module_init(palinfo_init); | ||
1023 | module_exit(palinfo_exit); | ||
diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c new file mode 100644 index 000000000000..367804a605fa --- /dev/null +++ b/arch/ia64/kernel/patch.c | |||
@@ -0,0 +1,189 @@ | |||
1 | /* | ||
2 | * Instruction-patching support. | ||
3 | * | ||
4 | * Copyright (C) 2003 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | */ | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/string.h> | ||
9 | |||
10 | #include <asm/patch.h> | ||
11 | #include <asm/processor.h> | ||
12 | #include <asm/sections.h> | ||
13 | #include <asm/system.h> | ||
14 | #include <asm/unistd.h> | ||
15 | |||
16 | /* | ||
17 | * This was adapted from code written by Tony Luck: | ||
18 | * | ||
19 | * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle | ||
20 | * like this: | ||
21 | * | ||
22 | * 6 6 5 4 3 2 1 | ||
23 | * 3210987654321098765432109876543210987654321098765432109876543210 | ||
24 | * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG | ||
25 | * | ||
26 | * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | ||
27 | * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB | ||
28 | */ | ||
29 | static u64 | ||
30 | get_imm64 (u64 insn_addr) | ||
31 | { | ||
32 | u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */ | ||
33 | |||
34 | return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/ | ||
35 | ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/ | ||
36 | ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/ | ||
37 | ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/ | ||
38 | ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/ | ||
39 | ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/ | ||
40 | ((p[1] & 0x000007f000000000UL) >> 36); /*G*/ | ||
41 | } | ||
42 | |||
43 | /* Patch instruction with "val" where "mask" has 1 bits. */ | ||
44 | void | ||
45 | ia64_patch (u64 insn_addr, u64 mask, u64 val) | ||
46 | { | ||
47 | u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16); | ||
48 | # define insn_mask ((1UL << 41) - 1) | ||
49 | unsigned long shift; | ||
50 | |||
51 | b0 = b[0]; b1 = b[1]; | ||
52 | shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */ | ||
53 | if (shift >= 64) { | ||
54 | m1 = mask << (shift - 64); | ||
55 | v1 = val << (shift - 64); | ||
56 | } else { | ||
57 | m0 = mask << shift; m1 = mask >> (64 - shift); | ||
58 | v0 = val << shift; v1 = val >> (64 - shift); | ||
59 | b[0] = (b0 & ~m0) | (v0 & m0); | ||
60 | } | ||
61 | b[1] = (b1 & ~m1) | (v1 & m1); | ||
62 | } | ||
63 | |||
64 | void | ||
65 | ia64_patch_imm64 (u64 insn_addr, u64 val) | ||
66 | { | ||
67 | ia64_patch(insn_addr, | ||
68 | 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ | ||
69 | | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ | ||
70 | | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ | ||
71 | | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ | ||
72 | | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)); | ||
73 | ia64_patch(insn_addr - 1, 0x1ffffffffffUL, val >> 22); | ||
74 | } | ||
75 | |||
76 | void | ||
77 | ia64_patch_imm60 (u64 insn_addr, u64 val) | ||
78 | { | ||
79 | ia64_patch(insn_addr, | ||
80 | 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ | ||
81 | | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); | ||
82 | ia64_patch(insn_addr - 1, 0x1fffffffffcUL, val >> 18); | ||
83 | } | ||
84 | |||
85 | /* | ||
86 | * We need sometimes to load the physical address of a kernel | ||
87 | * object. Often we can convert the virtual address to physical | ||
88 | * at execution time, but sometimes (either for performance reasons | ||
89 | * or during error recovery) we cannot to this. Patch the marked | ||
90 | * bundles to load the physical address. | ||
91 | */ | ||
92 | void __init | ||
93 | ia64_patch_vtop (unsigned long start, unsigned long end) | ||
94 | { | ||
95 | s32 *offp = (s32 *) start; | ||
96 | u64 ip; | ||
97 | |||
98 | while (offp < (s32 *) end) { | ||
99 | ip = (u64) offp + *offp; | ||
100 | |||
101 | /* replace virtual address with corresponding physical address: */ | ||
102 | ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip))); | ||
103 | ia64_fc((void *) ip); | ||
104 | ++offp; | ||
105 | } | ||
106 | ia64_sync_i(); | ||
107 | ia64_srlz_i(); | ||
108 | } | ||
109 | |||
110 | void | ||
111 | ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) | ||
112 | { | ||
113 | static int first_time = 1; | ||
114 | int need_workaround; | ||
115 | s32 *offp = (s32 *) start; | ||
116 | u64 *wp; | ||
117 | |||
118 | need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0); | ||
119 | |||
120 | if (first_time) { | ||
121 | first_time = 0; | ||
122 | if (need_workaround) | ||
123 | printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n"); | ||
124 | else | ||
125 | printk(KERN_INFO "McKinley Errata 9 workaround not needed; " | ||
126 | "disabling it\n"); | ||
127 | } | ||
128 | if (need_workaround) | ||
129 | return; | ||
130 | |||
131 | while (offp < (s32 *) end) { | ||
132 | wp = (u64 *) ia64_imva((char *) offp + *offp); | ||
133 | wp[0] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ | ||
134 | wp[1] = 0x0004000000000200UL; | ||
135 | wp[2] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ | ||
136 | wp[3] = 0x0084006880000200UL; | ||
137 | ia64_fc(wp); ia64_fc(wp + 2); | ||
138 | ++offp; | ||
139 | } | ||
140 | ia64_sync_i(); | ||
141 | ia64_srlz_i(); | ||
142 | } | ||
143 | |||
144 | static void | ||
145 | patch_fsyscall_table (unsigned long start, unsigned long end) | ||
146 | { | ||
147 | extern unsigned long fsyscall_table[NR_syscalls]; | ||
148 | s32 *offp = (s32 *) start; | ||
149 | u64 ip; | ||
150 | |||
151 | while (offp < (s32 *) end) { | ||
152 | ip = (u64) ia64_imva((char *) offp + *offp); | ||
153 | ia64_patch_imm64(ip, (u64) fsyscall_table); | ||
154 | ia64_fc((void *) ip); | ||
155 | ++offp; | ||
156 | } | ||
157 | ia64_sync_i(); | ||
158 | ia64_srlz_i(); | ||
159 | } | ||
160 | |||
161 | static void | ||
162 | patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) | ||
163 | { | ||
164 | extern char fsys_bubble_down[]; | ||
165 | s32 *offp = (s32 *) start; | ||
166 | u64 ip; | ||
167 | |||
168 | while (offp < (s32 *) end) { | ||
169 | ip = (u64) offp + *offp; | ||
170 | ia64_patch_imm60((u64) ia64_imva((void *) ip), | ||
171 | (u64) (fsys_bubble_down - (ip & -16)) / 16); | ||
172 | ia64_fc((void *) ip); | ||
173 | ++offp; | ||
174 | } | ||
175 | ia64_sync_i(); | ||
176 | ia64_srlz_i(); | ||
177 | } | ||
178 | |||
179 | void | ||
180 | ia64_patch_gate (void) | ||
181 | { | ||
182 | # define START(name) ((unsigned long) __start_gate_##name##_patchlist) | ||
183 | # define END(name) ((unsigned long)__end_gate_##name##_patchlist) | ||
184 | |||
185 | patch_fsyscall_table(START(fsyscall), END(fsyscall)); | ||
186 | patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); | ||
187 | ia64_patch_vtop(START(vtop), END(vtop)); | ||
188 | ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); | ||
189 | } | ||
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c new file mode 100644 index 000000000000..71147be3279c --- /dev/null +++ b/arch/ia64/kernel/perfmon.c | |||
@@ -0,0 +1,6676 @@ | |||
1 | /* | ||
2 | * This file implements the perfmon-2 subsystem which is used | ||
3 | * to program the IA-64 Performance Monitoring Unit (PMU). | ||
4 | * | ||
5 | * The initial version of perfmon.c was written by | ||
6 | * Ganesh Venkitachalam, IBM Corp. | ||
7 | * | ||
8 | * Then it was modified for perfmon-1.x by Stephane Eranian and | ||
9 | * David Mosberger, Hewlett Packard Co. | ||
10 | * | ||
11 | * Version Perfmon-2.x is a rewrite of perfmon-1.x | ||
12 | * by Stephane Eranian, Hewlett Packard Co. | ||
13 | * | ||
14 | * Copyright (C) 1999-2003, 2005 Hewlett Packard Co | ||
15 | * Stephane Eranian <eranian@hpl.hp.com> | ||
16 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
17 | * | ||
18 | * More information about perfmon available at: | ||
19 | * http://www.hpl.hp.com/research/linux/perfmon | ||
20 | */ | ||
21 | |||
22 | #include <linux/config.h> | ||
23 | #include <linux/module.h> | ||
24 | #include <linux/kernel.h> | ||
25 | #include <linux/sched.h> | ||
26 | #include <linux/interrupt.h> | ||
27 | #include <linux/smp_lock.h> | ||
28 | #include <linux/proc_fs.h> | ||
29 | #include <linux/seq_file.h> | ||
30 | #include <linux/init.h> | ||
31 | #include <linux/vmalloc.h> | ||
32 | #include <linux/mm.h> | ||
33 | #include <linux/sysctl.h> | ||
34 | #include <linux/list.h> | ||
35 | #include <linux/file.h> | ||
36 | #include <linux/poll.h> | ||
37 | #include <linux/vfs.h> | ||
38 | #include <linux/pagemap.h> | ||
39 | #include <linux/mount.h> | ||
40 | #include <linux/version.h> | ||
41 | #include <linux/bitops.h> | ||
42 | |||
43 | #include <asm/errno.h> | ||
44 | #include <asm/intrinsics.h> | ||
45 | #include <asm/page.h> | ||
46 | #include <asm/perfmon.h> | ||
47 | #include <asm/processor.h> | ||
48 | #include <asm/signal.h> | ||
49 | #include <asm/system.h> | ||
50 | #include <asm/uaccess.h> | ||
51 | #include <asm/delay.h> | ||
52 | |||
53 | #ifdef CONFIG_PERFMON | ||
54 | /* | ||
55 | * perfmon context state | ||
56 | */ | ||
57 | #define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ | ||
58 | #define PFM_CTX_LOADED 2 /* context is loaded onto a task */ | ||
59 | #define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ | ||
60 | #define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ | ||
61 | |||
62 | #define PFM_INVALID_ACTIVATION (~0UL) | ||
63 | |||
64 | /* | ||
65 | * depth of message queue | ||
66 | */ | ||
67 | #define PFM_MAX_MSGS 32 | ||
68 | #define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) | ||
69 | |||
70 | /* | ||
71 | * type of a PMU register (bitmask). | ||
72 | * bitmask structure: | ||
73 | * bit0 : register implemented | ||
74 | * bit1 : end marker | ||
75 | * bit2-3 : reserved | ||
76 | * bit4 : pmc has pmc.pm | ||
77 | * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter | ||
78 | * bit6-7 : register type | ||
79 | * bit8-31: reserved | ||
80 | */ | ||
81 | #define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ | ||
82 | #define PFM_REG_IMPL 0x1 /* register implemented */ | ||
83 | #define PFM_REG_END 0x2 /* end marker */ | ||
84 | #define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ | ||
85 | #define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ | ||
86 | #define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ | ||
87 | #define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ | ||
88 | #define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ | ||
89 | |||
90 | #define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) | ||
91 | #define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) | ||
92 | |||
93 | #define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) | ||
94 | |||
95 | /* i assumed unsigned */ | ||
96 | #define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) | ||
97 | #define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) | ||
98 | |||
99 | /* XXX: these assume that register i is implemented */ | ||
100 | #define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) | ||
101 | #define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) | ||
102 | #define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) | ||
103 | #define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) | ||
104 | |||
105 | #define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value | ||
106 | #define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask | ||
107 | #define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] | ||
108 | #define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] | ||
109 | |||
110 | #define PFM_NUM_IBRS IA64_NUM_DBG_REGS | ||
111 | #define PFM_NUM_DBRS IA64_NUM_DBG_REGS | ||
112 | |||
113 | #define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) | ||
114 | #define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) | ||
115 | #define PFM_CTX_TASK(h) (h)->ctx_task | ||
116 | |||
117 | #define PMU_PMC_OI 5 /* position of pmc.oi bit */ | ||
118 | |||
119 | /* XXX: does not support more than 64 PMDs */ | ||
120 | #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) | ||
121 | #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) | ||
122 | |||
123 | #define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) | ||
124 | |||
125 | #define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) | ||
126 | #define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) | ||
127 | #define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) | ||
128 | #define PFM_CODE_RR 0 /* requesting code range restriction */ | ||
129 | #define PFM_DATA_RR 1 /* requestion data range restriction */ | ||
130 | |||
131 | #define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) | ||
132 | #define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) | ||
133 | #define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) | ||
134 | |||
135 | #define RDEP(x) (1UL<<(x)) | ||
136 | |||
137 | /* | ||
138 | * context protection macros | ||
139 | * in SMP: | ||
140 | * - we need to protect against CPU concurrency (spin_lock) | ||
141 | * - we need to protect against PMU overflow interrupts (local_irq_disable) | ||
142 | * in UP: | ||
143 | * - we need to protect against PMU overflow interrupts (local_irq_disable) | ||
144 | * | ||
145 | * spin_lock_irqsave()/spin_lock_irqrestore(): | ||
146 | * in SMP: local_irq_disable + spin_lock | ||
147 | * in UP : local_irq_disable | ||
148 | * | ||
149 | * spin_lock()/spin_lock(): | ||
150 | * in UP : removed automatically | ||
151 | * in SMP: protect against context accesses from other CPU. interrupts | ||
152 | * are not masked. This is useful for the PMU interrupt handler | ||
153 | * because we know we will not get PMU concurrency in that code. | ||
154 | */ | ||
155 | #define PROTECT_CTX(c, f) \ | ||
156 | do { \ | ||
157 | DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, current->pid)); \ | ||
158 | spin_lock_irqsave(&(c)->ctx_lock, f); \ | ||
159 | DPRINT(("spinlocked ctx %p by [%d]\n", c, current->pid)); \ | ||
160 | } while(0) | ||
161 | |||
162 | #define UNPROTECT_CTX(c, f) \ | ||
163 | do { \ | ||
164 | DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, current->pid)); \ | ||
165 | spin_unlock_irqrestore(&(c)->ctx_lock, f); \ | ||
166 | } while(0) | ||
167 | |||
168 | #define PROTECT_CTX_NOPRINT(c, f) \ | ||
169 | do { \ | ||
170 | spin_lock_irqsave(&(c)->ctx_lock, f); \ | ||
171 | } while(0) | ||
172 | |||
173 | |||
174 | #define UNPROTECT_CTX_NOPRINT(c, f) \ | ||
175 | do { \ | ||
176 | spin_unlock_irqrestore(&(c)->ctx_lock, f); \ | ||
177 | } while(0) | ||
178 | |||
179 | |||
180 | #define PROTECT_CTX_NOIRQ(c) \ | ||
181 | do { \ | ||
182 | spin_lock(&(c)->ctx_lock); \ | ||
183 | } while(0) | ||
184 | |||
185 | #define UNPROTECT_CTX_NOIRQ(c) \ | ||
186 | do { \ | ||
187 | spin_unlock(&(c)->ctx_lock); \ | ||
188 | } while(0) | ||
189 | |||
190 | |||
191 | #ifdef CONFIG_SMP | ||
192 | |||
193 | #define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) | ||
194 | #define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ | ||
195 | #define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() | ||
196 | |||
197 | #else /* !CONFIG_SMP */ | ||
198 | #define SET_ACTIVATION(t) do {} while(0) | ||
199 | #define GET_ACTIVATION(t) do {} while(0) | ||
200 | #define INC_ACTIVATION(t) do {} while(0) | ||
201 | #endif /* CONFIG_SMP */ | ||
202 | |||
203 | #define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) | ||
204 | #define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) | ||
205 | #define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) | ||
206 | |||
207 | #define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) | ||
208 | #define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) | ||
209 | |||
210 | #define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) | ||
211 | |||
212 | /* | ||
213 | * cmp0 must be the value of pmc0 | ||
214 | */ | ||
215 | #define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) | ||
216 | |||
217 | #define PFMFS_MAGIC 0xa0b4d889 | ||
218 | |||
219 | /* | ||
220 | * debugging | ||
221 | */ | ||
222 | #define PFM_DEBUGGING 1 | ||
223 | #ifdef PFM_DEBUGGING | ||
224 | #define DPRINT(a) \ | ||
225 | do { \ | ||
226 | if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ | ||
227 | } while (0) | ||
228 | |||
229 | #define DPRINT_ovfl(a) \ | ||
230 | do { \ | ||
231 | if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __FUNCTION__, __LINE__, smp_processor_id(), current->pid); printk a; } \ | ||
232 | } while (0) | ||
233 | #endif | ||
234 | |||
235 | /* | ||
236 | * 64-bit software counter structure | ||
237 | * | ||
238 | * the next_reset_type is applied to the next call to pfm_reset_regs() | ||
239 | */ | ||
240 | typedef struct { | ||
241 | unsigned long val; /* virtual 64bit counter value */ | ||
242 | unsigned long lval; /* last reset value */ | ||
243 | unsigned long long_reset; /* reset value on sampling overflow */ | ||
244 | unsigned long short_reset; /* reset value on overflow */ | ||
245 | unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ | ||
246 | unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ | ||
247 | unsigned long seed; /* seed for random-number generator */ | ||
248 | unsigned long mask; /* mask for random-number generator */ | ||
249 | unsigned int flags; /* notify/do not notify */ | ||
250 | unsigned long eventid; /* overflow event identifier */ | ||
251 | } pfm_counter_t; | ||
252 | |||
253 | /* | ||
254 | * context flags | ||
255 | */ | ||
256 | typedef struct { | ||
257 | unsigned int block:1; /* when 1, task will blocked on user notifications */ | ||
258 | unsigned int system:1; /* do system wide monitoring */ | ||
259 | unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ | ||
260 | unsigned int is_sampling:1; /* true if using a custom format */ | ||
261 | unsigned int excl_idle:1; /* exclude idle task in system wide session */ | ||
262 | unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ | ||
263 | unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ | ||
264 | unsigned int no_msg:1; /* no message sent on overflow */ | ||
265 | unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ | ||
266 | unsigned int reserved:22; | ||
267 | } pfm_context_flags_t; | ||
268 | |||
269 | #define PFM_TRAP_REASON_NONE 0x0 /* default value */ | ||
270 | #define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ | ||
271 | #define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ | ||
272 | |||
273 | |||
274 | /* | ||
275 | * perfmon context: encapsulates all the state of a monitoring session | ||
276 | */ | ||
277 | |||
278 | typedef struct pfm_context { | ||
279 | spinlock_t ctx_lock; /* context protection */ | ||
280 | |||
281 | pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ | ||
282 | unsigned int ctx_state; /* state: active/inactive (no bitfield) */ | ||
283 | |||
284 | struct task_struct *ctx_task; /* task to which context is attached */ | ||
285 | |||
286 | unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ | ||
287 | |||
288 | struct semaphore ctx_restart_sem; /* use for blocking notification mode */ | ||
289 | |||
290 | unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ | ||
291 | unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ | ||
292 | unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ | ||
293 | |||
294 | unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ | ||
295 | unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ | ||
296 | unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ | ||
297 | |||
298 | unsigned long ctx_pmcs[IA64_NUM_PMC_REGS]; /* saved copies of PMC values */ | ||
299 | |||
300 | unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ | ||
301 | unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ | ||
302 | unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ | ||
303 | unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ | ||
304 | |||
305 | pfm_counter_t ctx_pmds[IA64_NUM_PMD_REGS]; /* software state for PMDS */ | ||
306 | |||
307 | u64 ctx_saved_psr_up; /* only contains psr.up value */ | ||
308 | |||
309 | unsigned long ctx_last_activation; /* context last activation number for last_cpu */ | ||
310 | unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ | ||
311 | unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ | ||
312 | |||
313 | int ctx_fd; /* file descriptor used my this context */ | ||
314 | pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ | ||
315 | |||
316 | pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ | ||
317 | void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ | ||
318 | unsigned long ctx_smpl_size; /* size of sampling buffer */ | ||
319 | void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ | ||
320 | |||
321 | wait_queue_head_t ctx_msgq_wait; | ||
322 | pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; | ||
323 | int ctx_msgq_head; | ||
324 | int ctx_msgq_tail; | ||
325 | struct fasync_struct *ctx_async_queue; | ||
326 | |||
327 | wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ | ||
328 | } pfm_context_t; | ||
329 | |||
330 | /* | ||
331 | * magic number used to verify that structure is really | ||
332 | * a perfmon context | ||
333 | */ | ||
334 | #define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) | ||
335 | |||
336 | #define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) | ||
337 | |||
338 | #ifdef CONFIG_SMP | ||
339 | #define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) | ||
340 | #define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu | ||
341 | #else | ||
342 | #define SET_LAST_CPU(ctx, v) do {} while(0) | ||
343 | #define GET_LAST_CPU(ctx) do {} while(0) | ||
344 | #endif | ||
345 | |||
346 | |||
347 | #define ctx_fl_block ctx_flags.block | ||
348 | #define ctx_fl_system ctx_flags.system | ||
349 | #define ctx_fl_using_dbreg ctx_flags.using_dbreg | ||
350 | #define ctx_fl_is_sampling ctx_flags.is_sampling | ||
351 | #define ctx_fl_excl_idle ctx_flags.excl_idle | ||
352 | #define ctx_fl_going_zombie ctx_flags.going_zombie | ||
353 | #define ctx_fl_trap_reason ctx_flags.trap_reason | ||
354 | #define ctx_fl_no_msg ctx_flags.no_msg | ||
355 | #define ctx_fl_can_restart ctx_flags.can_restart | ||
356 | |||
357 | #define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); | ||
358 | #define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking | ||
359 | |||
360 | /* | ||
361 | * global information about all sessions | ||
362 | * mostly used to synchronize between system wide and per-process | ||
363 | */ | ||
364 | typedef struct { | ||
365 | spinlock_t pfs_lock; /* lock the structure */ | ||
366 | |||
367 | unsigned int pfs_task_sessions; /* number of per task sessions */ | ||
368 | unsigned int pfs_sys_sessions; /* number of per system wide sessions */ | ||
369 | unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ | ||
370 | unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ | ||
371 | struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ | ||
372 | } pfm_session_t; | ||
373 | |||
374 | /* | ||
375 | * information about a PMC or PMD. | ||
376 | * dep_pmd[]: a bitmask of dependent PMD registers | ||
377 | * dep_pmc[]: a bitmask of dependent PMC registers | ||
378 | */ | ||
379 | typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); | ||
380 | typedef struct { | ||
381 | unsigned int type; | ||
382 | int pm_pos; | ||
383 | unsigned long default_value; /* power-on default value */ | ||
384 | unsigned long reserved_mask; /* bitmask of reserved bits */ | ||
385 | pfm_reg_check_t read_check; | ||
386 | pfm_reg_check_t write_check; | ||
387 | unsigned long dep_pmd[4]; | ||
388 | unsigned long dep_pmc[4]; | ||
389 | } pfm_reg_desc_t; | ||
390 | |||
391 | /* assume cnum is a valid monitor */ | ||
392 | #define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) | ||
393 | |||
394 | /* | ||
395 | * This structure is initialized at boot time and contains | ||
396 | * a description of the PMU main characteristics. | ||
397 | * | ||
398 | * If the probe function is defined, detection is based | ||
399 | * on its return value: | ||
400 | * - 0 means recognized PMU | ||
401 | * - anything else means not supported | ||
402 | * When the probe function is not defined, then the pmu_family field | ||
403 | * is used and it must match the host CPU family such that: | ||
404 | * - cpu->family & config->pmu_family != 0 | ||
405 | */ | ||
406 | typedef struct { | ||
407 | unsigned long ovfl_val; /* overflow value for counters */ | ||
408 | |||
409 | pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ | ||
410 | pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ | ||
411 | |||
412 | unsigned int num_pmcs; /* number of PMCS: computed at init time */ | ||
413 | unsigned int num_pmds; /* number of PMDS: computed at init time */ | ||
414 | unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ | ||
415 | unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ | ||
416 | |||
417 | char *pmu_name; /* PMU family name */ | ||
418 | unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ | ||
419 | unsigned int flags; /* pmu specific flags */ | ||
420 | unsigned int num_ibrs; /* number of IBRS: computed at init time */ | ||
421 | unsigned int num_dbrs; /* number of DBRS: computed at init time */ | ||
422 | unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ | ||
423 | int (*probe)(void); /* customized probe routine */ | ||
424 | unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ | ||
425 | } pmu_config_t; | ||
426 | /* | ||
427 | * PMU specific flags | ||
428 | */ | ||
429 | #define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ | ||
430 | |||
431 | /* | ||
432 | * debug register related type definitions | ||
433 | */ | ||
434 | typedef struct { | ||
435 | unsigned long ibr_mask:56; | ||
436 | unsigned long ibr_plm:4; | ||
437 | unsigned long ibr_ig:3; | ||
438 | unsigned long ibr_x:1; | ||
439 | } ibr_mask_reg_t; | ||
440 | |||
441 | typedef struct { | ||
442 | unsigned long dbr_mask:56; | ||
443 | unsigned long dbr_plm:4; | ||
444 | unsigned long dbr_ig:2; | ||
445 | unsigned long dbr_w:1; | ||
446 | unsigned long dbr_r:1; | ||
447 | } dbr_mask_reg_t; | ||
448 | |||
449 | typedef union { | ||
450 | unsigned long val; | ||
451 | ibr_mask_reg_t ibr; | ||
452 | dbr_mask_reg_t dbr; | ||
453 | } dbreg_t; | ||
454 | |||
455 | |||
456 | /* | ||
457 | * perfmon command descriptions | ||
458 | */ | ||
459 | typedef struct { | ||
460 | int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); | ||
461 | char *cmd_name; | ||
462 | int cmd_flags; | ||
463 | unsigned int cmd_narg; | ||
464 | size_t cmd_argsize; | ||
465 | int (*cmd_getsize)(void *arg, size_t *sz); | ||
466 | } pfm_cmd_desc_t; | ||
467 | |||
468 | #define PFM_CMD_FD 0x01 /* command requires a file descriptor */ | ||
469 | #define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ | ||
470 | #define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ | ||
471 | #define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ | ||
472 | |||
473 | |||
474 | #define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name | ||
475 | #define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) | ||
476 | #define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) | ||
477 | #define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) | ||
478 | #define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) | ||
479 | |||
480 | #define PFM_CMD_ARG_MANY -1 /* cannot be zero */ | ||
481 | |||
482 | typedef struct { | ||
483 | int debug; /* turn on/off debugging via syslog */ | ||
484 | int debug_ovfl; /* turn on/off debug printk in overflow handler */ | ||
485 | int fastctxsw; /* turn on/off fast (unsecure) ctxsw */ | ||
486 | int expert_mode; /* turn on/off value checking */ | ||
487 | int debug_pfm_read; | ||
488 | } pfm_sysctl_t; | ||
489 | |||
490 | typedef struct { | ||
491 | unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ | ||
492 | unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ | ||
493 | unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ | ||
494 | unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ | ||
495 | unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ | ||
496 | unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ | ||
497 | unsigned long pfm_smpl_handler_calls; | ||
498 | unsigned long pfm_smpl_handler_cycles; | ||
499 | char pad[SMP_CACHE_BYTES] ____cacheline_aligned; | ||
500 | } pfm_stats_t; | ||
501 | |||
502 | /* | ||
503 | * perfmon internal variables | ||
504 | */ | ||
505 | static pfm_stats_t pfm_stats[NR_CPUS]; | ||
506 | static pfm_session_t pfm_sessions; /* global sessions information */ | ||
507 | |||
508 | static struct proc_dir_entry *perfmon_dir; | ||
509 | static pfm_uuid_t pfm_null_uuid = {0,}; | ||
510 | |||
511 | static spinlock_t pfm_buffer_fmt_lock; | ||
512 | static LIST_HEAD(pfm_buffer_fmt_list); | ||
513 | |||
514 | static pmu_config_t *pmu_conf; | ||
515 | |||
516 | /* sysctl() controls */ | ||
517 | static pfm_sysctl_t pfm_sysctl; | ||
518 | int pfm_debug_var; | ||
519 | |||
520 | static ctl_table pfm_ctl_table[]={ | ||
521 | {1, "debug", &pfm_sysctl.debug, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, | ||
522 | {2, "debug_ovfl", &pfm_sysctl.debug_ovfl, sizeof(int), 0666, NULL, &proc_dointvec, NULL,}, | ||
523 | {3, "fastctxsw", &pfm_sysctl.fastctxsw, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, | ||
524 | {4, "expert_mode", &pfm_sysctl.expert_mode, sizeof(int), 0600, NULL, &proc_dointvec, NULL,}, | ||
525 | { 0, }, | ||
526 | }; | ||
527 | static ctl_table pfm_sysctl_dir[] = { | ||
528 | {1, "perfmon", NULL, 0, 0755, pfm_ctl_table, }, | ||
529 | {0,}, | ||
530 | }; | ||
531 | static ctl_table pfm_sysctl_root[] = { | ||
532 | {1, "kernel", NULL, 0, 0755, pfm_sysctl_dir, }, | ||
533 | {0,}, | ||
534 | }; | ||
535 | static struct ctl_table_header *pfm_sysctl_header; | ||
536 | |||
537 | static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); | ||
538 | static int pfm_flush(struct file *filp); | ||
539 | |||
540 | #define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) | ||
541 | #define pfm_get_cpu_data(a,b) per_cpu(a, b) | ||
542 | |||
543 | static inline void | ||
544 | pfm_put_task(struct task_struct *task) | ||
545 | { | ||
546 | if (task != current) put_task_struct(task); | ||
547 | } | ||
548 | |||
549 | static inline void | ||
550 | pfm_set_task_notify(struct task_struct *task) | ||
551 | { | ||
552 | struct thread_info *info; | ||
553 | |||
554 | info = (struct thread_info *) ((char *) task + IA64_TASK_SIZE); | ||
555 | set_bit(TIF_NOTIFY_RESUME, &info->flags); | ||
556 | } | ||
557 | |||
558 | static inline void | ||
559 | pfm_clear_task_notify(void) | ||
560 | { | ||
561 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
562 | } | ||
563 | |||
564 | static inline void | ||
565 | pfm_reserve_page(unsigned long a) | ||
566 | { | ||
567 | SetPageReserved(vmalloc_to_page((void *)a)); | ||
568 | } | ||
569 | static inline void | ||
570 | pfm_unreserve_page(unsigned long a) | ||
571 | { | ||
572 | ClearPageReserved(vmalloc_to_page((void*)a)); | ||
573 | } | ||
574 | |||
575 | static inline unsigned long | ||
576 | pfm_protect_ctx_ctxsw(pfm_context_t *x) | ||
577 | { | ||
578 | spin_lock(&(x)->ctx_lock); | ||
579 | return 0UL; | ||
580 | } | ||
581 | |||
582 | static inline unsigned long | ||
583 | pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) | ||
584 | { | ||
585 | spin_unlock(&(x)->ctx_lock); | ||
586 | } | ||
587 | |||
588 | static inline unsigned int | ||
589 | pfm_do_munmap(struct mm_struct *mm, unsigned long addr, size_t len, int acct) | ||
590 | { | ||
591 | return do_munmap(mm, addr, len); | ||
592 | } | ||
593 | |||
594 | static inline unsigned long | ||
595 | pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags, unsigned long exec) | ||
596 | { | ||
597 | return get_unmapped_area(file, addr, len, pgoff, flags); | ||
598 | } | ||
599 | |||
600 | |||
601 | static struct super_block * | ||
602 | pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) | ||
603 | { | ||
604 | return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); | ||
605 | } | ||
606 | |||
607 | static struct file_system_type pfm_fs_type = { | ||
608 | .name = "pfmfs", | ||
609 | .get_sb = pfmfs_get_sb, | ||
610 | .kill_sb = kill_anon_super, | ||
611 | }; | ||
612 | |||
613 | DEFINE_PER_CPU(unsigned long, pfm_syst_info); | ||
614 | DEFINE_PER_CPU(struct task_struct *, pmu_owner); | ||
615 | DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); | ||
616 | DEFINE_PER_CPU(unsigned long, pmu_activation_number); | ||
617 | |||
618 | |||
619 | /* forward declaration */ | ||
620 | static struct file_operations pfm_file_ops; | ||
621 | |||
622 | /* | ||
623 | * forward declarations | ||
624 | */ | ||
625 | #ifndef CONFIG_SMP | ||
626 | static void pfm_lazy_save_regs (struct task_struct *ta); | ||
627 | #endif | ||
628 | |||
629 | void dump_pmu_state(const char *); | ||
630 | static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); | ||
631 | |||
632 | #include "perfmon_itanium.h" | ||
633 | #include "perfmon_mckinley.h" | ||
634 | #include "perfmon_generic.h" | ||
635 | |||
636 | static pmu_config_t *pmu_confs[]={ | ||
637 | &pmu_conf_mck, | ||
638 | &pmu_conf_ita, | ||
639 | &pmu_conf_gen, /* must be last */ | ||
640 | NULL | ||
641 | }; | ||
642 | |||
643 | |||
644 | static int pfm_end_notify_user(pfm_context_t *ctx); | ||
645 | |||
646 | static inline void | ||
647 | pfm_clear_psr_pp(void) | ||
648 | { | ||
649 | ia64_rsm(IA64_PSR_PP); | ||
650 | ia64_srlz_i(); | ||
651 | } | ||
652 | |||
653 | static inline void | ||
654 | pfm_set_psr_pp(void) | ||
655 | { | ||
656 | ia64_ssm(IA64_PSR_PP); | ||
657 | ia64_srlz_i(); | ||
658 | } | ||
659 | |||
660 | static inline void | ||
661 | pfm_clear_psr_up(void) | ||
662 | { | ||
663 | ia64_rsm(IA64_PSR_UP); | ||
664 | ia64_srlz_i(); | ||
665 | } | ||
666 | |||
667 | static inline void | ||
668 | pfm_set_psr_up(void) | ||
669 | { | ||
670 | ia64_ssm(IA64_PSR_UP); | ||
671 | ia64_srlz_i(); | ||
672 | } | ||
673 | |||
674 | static inline unsigned long | ||
675 | pfm_get_psr(void) | ||
676 | { | ||
677 | unsigned long tmp; | ||
678 | tmp = ia64_getreg(_IA64_REG_PSR); | ||
679 | ia64_srlz_i(); | ||
680 | return tmp; | ||
681 | } | ||
682 | |||
683 | static inline void | ||
684 | pfm_set_psr_l(unsigned long val) | ||
685 | { | ||
686 | ia64_setreg(_IA64_REG_PSR_L, val); | ||
687 | ia64_srlz_i(); | ||
688 | } | ||
689 | |||
690 | static inline void | ||
691 | pfm_freeze_pmu(void) | ||
692 | { | ||
693 | ia64_set_pmc(0,1UL); | ||
694 | ia64_srlz_d(); | ||
695 | } | ||
696 | |||
697 | static inline void | ||
698 | pfm_unfreeze_pmu(void) | ||
699 | { | ||
700 | ia64_set_pmc(0,0UL); | ||
701 | ia64_srlz_d(); | ||
702 | } | ||
703 | |||
704 | static inline void | ||
705 | pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) | ||
706 | { | ||
707 | int i; | ||
708 | |||
709 | for (i=0; i < nibrs; i++) { | ||
710 | ia64_set_ibr(i, ibrs[i]); | ||
711 | ia64_dv_serialize_instruction(); | ||
712 | } | ||
713 | ia64_srlz_i(); | ||
714 | } | ||
715 | |||
716 | static inline void | ||
717 | pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) | ||
718 | { | ||
719 | int i; | ||
720 | |||
721 | for (i=0; i < ndbrs; i++) { | ||
722 | ia64_set_dbr(i, dbrs[i]); | ||
723 | ia64_dv_serialize_data(); | ||
724 | } | ||
725 | ia64_srlz_d(); | ||
726 | } | ||
727 | |||
728 | /* | ||
729 | * PMD[i] must be a counter. no check is made | ||
730 | */ | ||
731 | static inline unsigned long | ||
732 | pfm_read_soft_counter(pfm_context_t *ctx, int i) | ||
733 | { | ||
734 | return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); | ||
735 | } | ||
736 | |||
737 | /* | ||
738 | * PMD[i] must be a counter. no check is made | ||
739 | */ | ||
740 | static inline void | ||
741 | pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) | ||
742 | { | ||
743 | unsigned long ovfl_val = pmu_conf->ovfl_val; | ||
744 | |||
745 | ctx->ctx_pmds[i].val = val & ~ovfl_val; | ||
746 | /* | ||
747 | * writing to unimplemented part is ignore, so we do not need to | ||
748 | * mask off top part | ||
749 | */ | ||
750 | ia64_set_pmd(i, val & ovfl_val); | ||
751 | } | ||
752 | |||
753 | static pfm_msg_t * | ||
754 | pfm_get_new_msg(pfm_context_t *ctx) | ||
755 | { | ||
756 | int idx, next; | ||
757 | |||
758 | next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; | ||
759 | |||
760 | DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); | ||
761 | if (next == ctx->ctx_msgq_head) return NULL; | ||
762 | |||
763 | idx = ctx->ctx_msgq_tail; | ||
764 | ctx->ctx_msgq_tail = next; | ||
765 | |||
766 | DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); | ||
767 | |||
768 | return ctx->ctx_msgq+idx; | ||
769 | } | ||
770 | |||
771 | static pfm_msg_t * | ||
772 | pfm_get_next_msg(pfm_context_t *ctx) | ||
773 | { | ||
774 | pfm_msg_t *msg; | ||
775 | |||
776 | DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); | ||
777 | |||
778 | if (PFM_CTXQ_EMPTY(ctx)) return NULL; | ||
779 | |||
780 | /* | ||
781 | * get oldest message | ||
782 | */ | ||
783 | msg = ctx->ctx_msgq+ctx->ctx_msgq_head; | ||
784 | |||
785 | /* | ||
786 | * and move forward | ||
787 | */ | ||
788 | ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; | ||
789 | |||
790 | DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); | ||
791 | |||
792 | return msg; | ||
793 | } | ||
794 | |||
795 | static void | ||
796 | pfm_reset_msgq(pfm_context_t *ctx) | ||
797 | { | ||
798 | ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; | ||
799 | DPRINT(("ctx=%p msgq reset\n", ctx)); | ||
800 | } | ||
801 | |||
802 | static void * | ||
803 | pfm_rvmalloc(unsigned long size) | ||
804 | { | ||
805 | void *mem; | ||
806 | unsigned long addr; | ||
807 | |||
808 | size = PAGE_ALIGN(size); | ||
809 | mem = vmalloc(size); | ||
810 | if (mem) { | ||
811 | //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); | ||
812 | memset(mem, 0, size); | ||
813 | addr = (unsigned long)mem; | ||
814 | while (size > 0) { | ||
815 | pfm_reserve_page(addr); | ||
816 | addr+=PAGE_SIZE; | ||
817 | size-=PAGE_SIZE; | ||
818 | } | ||
819 | } | ||
820 | return mem; | ||
821 | } | ||
822 | |||
823 | static void | ||
824 | pfm_rvfree(void *mem, unsigned long size) | ||
825 | { | ||
826 | unsigned long addr; | ||
827 | |||
828 | if (mem) { | ||
829 | DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); | ||
830 | addr = (unsigned long) mem; | ||
831 | while ((long) size > 0) { | ||
832 | pfm_unreserve_page(addr); | ||
833 | addr+=PAGE_SIZE; | ||
834 | size-=PAGE_SIZE; | ||
835 | } | ||
836 | vfree(mem); | ||
837 | } | ||
838 | return; | ||
839 | } | ||
840 | |||
841 | static pfm_context_t * | ||
842 | pfm_context_alloc(void) | ||
843 | { | ||
844 | pfm_context_t *ctx; | ||
845 | |||
846 | /* | ||
847 | * allocate context descriptor | ||
848 | * must be able to free with interrupts disabled | ||
849 | */ | ||
850 | ctx = kmalloc(sizeof(pfm_context_t), GFP_KERNEL); | ||
851 | if (ctx) { | ||
852 | memset(ctx, 0, sizeof(pfm_context_t)); | ||
853 | DPRINT(("alloc ctx @%p\n", ctx)); | ||
854 | } | ||
855 | return ctx; | ||
856 | } | ||
857 | |||
858 | static void | ||
859 | pfm_context_free(pfm_context_t *ctx) | ||
860 | { | ||
861 | if (ctx) { | ||
862 | DPRINT(("free ctx @%p\n", ctx)); | ||
863 | kfree(ctx); | ||
864 | } | ||
865 | } | ||
866 | |||
867 | static void | ||
868 | pfm_mask_monitoring(struct task_struct *task) | ||
869 | { | ||
870 | pfm_context_t *ctx = PFM_GET_CTX(task); | ||
871 | struct thread_struct *th = &task->thread; | ||
872 | unsigned long mask, val, ovfl_mask; | ||
873 | int i; | ||
874 | |||
875 | DPRINT_ovfl(("masking monitoring for [%d]\n", task->pid)); | ||
876 | |||
877 | ovfl_mask = pmu_conf->ovfl_val; | ||
878 | /* | ||
879 | * monitoring can only be masked as a result of a valid | ||
880 | * counter overflow. In UP, it means that the PMU still | ||
881 | * has an owner. Note that the owner can be different | ||
882 | * from the current task. However the PMU state belongs | ||
883 | * to the owner. | ||
884 | * In SMP, a valid overflow only happens when task is | ||
885 | * current. Therefore if we come here, we know that | ||
886 | * the PMU state belongs to the current task, therefore | ||
887 | * we can access the live registers. | ||
888 | * | ||
889 | * So in both cases, the live register contains the owner's | ||
890 | * state. We can ONLY touch the PMU registers and NOT the PSR. | ||
891 | * | ||
892 | * As a consequence to this call, the thread->pmds[] array | ||
893 | * contains stale information which must be ignored | ||
894 | * when context is reloaded AND monitoring is active (see | ||
895 | * pfm_restart). | ||
896 | */ | ||
897 | mask = ctx->ctx_used_pmds[0]; | ||
898 | for (i = 0; mask; i++, mask>>=1) { | ||
899 | /* skip non used pmds */ | ||
900 | if ((mask & 0x1) == 0) continue; | ||
901 | val = ia64_get_pmd(i); | ||
902 | |||
903 | if (PMD_IS_COUNTING(i)) { | ||
904 | /* | ||
905 | * we rebuild the full 64 bit value of the counter | ||
906 | */ | ||
907 | ctx->ctx_pmds[i].val += (val & ovfl_mask); | ||
908 | } else { | ||
909 | ctx->ctx_pmds[i].val = val; | ||
910 | } | ||
911 | DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", | ||
912 | i, | ||
913 | ctx->ctx_pmds[i].val, | ||
914 | val & ovfl_mask)); | ||
915 | } | ||
916 | /* | ||
917 | * mask monitoring by setting the privilege level to 0 | ||
918 | * we cannot use psr.pp/psr.up for this, it is controlled by | ||
919 | * the user | ||
920 | * | ||
921 | * if task is current, modify actual registers, otherwise modify | ||
922 | * thread save state, i.e., what will be restored in pfm_load_regs() | ||
923 | */ | ||
924 | mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; | ||
925 | for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { | ||
926 | if ((mask & 0x1) == 0UL) continue; | ||
927 | ia64_set_pmc(i, th->pmcs[i] & ~0xfUL); | ||
928 | th->pmcs[i] &= ~0xfUL; | ||
929 | DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, th->pmcs[i])); | ||
930 | } | ||
931 | /* | ||
932 | * make all of this visible | ||
933 | */ | ||
934 | ia64_srlz_d(); | ||
935 | } | ||
936 | |||
937 | /* | ||
938 | * must always be done with task == current | ||
939 | * | ||
940 | * context must be in MASKED state when calling | ||
941 | */ | ||
942 | static void | ||
943 | pfm_restore_monitoring(struct task_struct *task) | ||
944 | { | ||
945 | pfm_context_t *ctx = PFM_GET_CTX(task); | ||
946 | struct thread_struct *th = &task->thread; | ||
947 | unsigned long mask, ovfl_mask; | ||
948 | unsigned long psr, val; | ||
949 | int i, is_system; | ||
950 | |||
951 | is_system = ctx->ctx_fl_system; | ||
952 | ovfl_mask = pmu_conf->ovfl_val; | ||
953 | |||
954 | if (task != current) { | ||
955 | printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task->pid, current->pid); | ||
956 | return; | ||
957 | } | ||
958 | if (ctx->ctx_state != PFM_CTX_MASKED) { | ||
959 | printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, | ||
960 | task->pid, current->pid, ctx->ctx_state); | ||
961 | return; | ||
962 | } | ||
963 | psr = pfm_get_psr(); | ||
964 | /* | ||
965 | * monitoring is masked via the PMC. | ||
966 | * As we restore their value, we do not want each counter to | ||
967 | * restart right away. We stop monitoring using the PSR, | ||
968 | * restore the PMC (and PMD) and then re-establish the psr | ||
969 | * as it was. Note that there can be no pending overflow at | ||
970 | * this point, because monitoring was MASKED. | ||
971 | * | ||
972 | * system-wide session are pinned and self-monitoring | ||
973 | */ | ||
974 | if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { | ||
975 | /* disable dcr pp */ | ||
976 | ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); | ||
977 | pfm_clear_psr_pp(); | ||
978 | } else { | ||
979 | pfm_clear_psr_up(); | ||
980 | } | ||
981 | /* | ||
982 | * first, we restore the PMD | ||
983 | */ | ||
984 | mask = ctx->ctx_used_pmds[0]; | ||
985 | for (i = 0; mask; i++, mask>>=1) { | ||
986 | /* skip non used pmds */ | ||
987 | if ((mask & 0x1) == 0) continue; | ||
988 | |||
989 | if (PMD_IS_COUNTING(i)) { | ||
990 | /* | ||
991 | * we split the 64bit value according to | ||
992 | * counter width | ||
993 | */ | ||
994 | val = ctx->ctx_pmds[i].val & ovfl_mask; | ||
995 | ctx->ctx_pmds[i].val &= ~ovfl_mask; | ||
996 | } else { | ||
997 | val = ctx->ctx_pmds[i].val; | ||
998 | } | ||
999 | ia64_set_pmd(i, val); | ||
1000 | |||
1001 | DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", | ||
1002 | i, | ||
1003 | ctx->ctx_pmds[i].val, | ||
1004 | val)); | ||
1005 | } | ||
1006 | /* | ||
1007 | * restore the PMCs | ||
1008 | */ | ||
1009 | mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; | ||
1010 | for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { | ||
1011 | if ((mask & 0x1) == 0UL) continue; | ||
1012 | th->pmcs[i] = ctx->ctx_pmcs[i]; | ||
1013 | ia64_set_pmc(i, th->pmcs[i]); | ||
1014 | DPRINT(("[%d] pmc[%d]=0x%lx\n", task->pid, i, th->pmcs[i])); | ||
1015 | } | ||
1016 | ia64_srlz_d(); | ||
1017 | |||
1018 | /* | ||
1019 | * must restore DBR/IBR because could be modified while masked | ||
1020 | * XXX: need to optimize | ||
1021 | */ | ||
1022 | if (ctx->ctx_fl_using_dbreg) { | ||
1023 | pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); | ||
1024 | pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); | ||
1025 | } | ||
1026 | |||
1027 | /* | ||
1028 | * now restore PSR | ||
1029 | */ | ||
1030 | if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { | ||
1031 | /* enable dcr pp */ | ||
1032 | ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); | ||
1033 | ia64_srlz_i(); | ||
1034 | } | ||
1035 | pfm_set_psr_l(psr); | ||
1036 | } | ||
1037 | |||
1038 | static inline void | ||
1039 | pfm_save_pmds(unsigned long *pmds, unsigned long mask) | ||
1040 | { | ||
1041 | int i; | ||
1042 | |||
1043 | ia64_srlz_d(); | ||
1044 | |||
1045 | for (i=0; mask; i++, mask>>=1) { | ||
1046 | if (mask & 0x1) pmds[i] = ia64_get_pmd(i); | ||
1047 | } | ||
1048 | } | ||
1049 | |||
1050 | /* | ||
1051 | * reload from thread state (used for ctxw only) | ||
1052 | */ | ||
1053 | static inline void | ||
1054 | pfm_restore_pmds(unsigned long *pmds, unsigned long mask) | ||
1055 | { | ||
1056 | int i; | ||
1057 | unsigned long val, ovfl_val = pmu_conf->ovfl_val; | ||
1058 | |||
1059 | for (i=0; mask; i++, mask>>=1) { | ||
1060 | if ((mask & 0x1) == 0) continue; | ||
1061 | val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; | ||
1062 | ia64_set_pmd(i, val); | ||
1063 | } | ||
1064 | ia64_srlz_d(); | ||
1065 | } | ||
1066 | |||
1067 | /* | ||
1068 | * propagate PMD from context to thread-state | ||
1069 | */ | ||
1070 | static inline void | ||
1071 | pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) | ||
1072 | { | ||
1073 | struct thread_struct *thread = &task->thread; | ||
1074 | unsigned long ovfl_val = pmu_conf->ovfl_val; | ||
1075 | unsigned long mask = ctx->ctx_all_pmds[0]; | ||
1076 | unsigned long val; | ||
1077 | int i; | ||
1078 | |||
1079 | DPRINT(("mask=0x%lx\n", mask)); | ||
1080 | |||
1081 | for (i=0; mask; i++, mask>>=1) { | ||
1082 | |||
1083 | val = ctx->ctx_pmds[i].val; | ||
1084 | |||
1085 | /* | ||
1086 | * We break up the 64 bit value into 2 pieces | ||
1087 | * the lower bits go to the machine state in the | ||
1088 | * thread (will be reloaded on ctxsw in). | ||
1089 | * The upper part stays in the soft-counter. | ||
1090 | */ | ||
1091 | if (PMD_IS_COUNTING(i)) { | ||
1092 | ctx->ctx_pmds[i].val = val & ~ovfl_val; | ||
1093 | val &= ovfl_val; | ||
1094 | } | ||
1095 | thread->pmds[i] = val; | ||
1096 | |||
1097 | DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", | ||
1098 | i, | ||
1099 | thread->pmds[i], | ||
1100 | ctx->ctx_pmds[i].val)); | ||
1101 | } | ||
1102 | } | ||
1103 | |||
1104 | /* | ||
1105 | * propagate PMC from context to thread-state | ||
1106 | */ | ||
1107 | static inline void | ||
1108 | pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) | ||
1109 | { | ||
1110 | struct thread_struct *thread = &task->thread; | ||
1111 | unsigned long mask = ctx->ctx_all_pmcs[0]; | ||
1112 | int i; | ||
1113 | |||
1114 | DPRINT(("mask=0x%lx\n", mask)); | ||
1115 | |||
1116 | for (i=0; mask; i++, mask>>=1) { | ||
1117 | /* masking 0 with ovfl_val yields 0 */ | ||
1118 | thread->pmcs[i] = ctx->ctx_pmcs[i]; | ||
1119 | DPRINT(("pmc[%d]=0x%lx\n", i, thread->pmcs[i])); | ||
1120 | } | ||
1121 | } | ||
1122 | |||
1123 | |||
1124 | |||
1125 | static inline void | ||
1126 | pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) | ||
1127 | { | ||
1128 | int i; | ||
1129 | |||
1130 | for (i=0; mask; i++, mask>>=1) { | ||
1131 | if ((mask & 0x1) == 0) continue; | ||
1132 | ia64_set_pmc(i, pmcs[i]); | ||
1133 | } | ||
1134 | ia64_srlz_d(); | ||
1135 | } | ||
1136 | |||
1137 | static inline int | ||
1138 | pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) | ||
1139 | { | ||
1140 | return memcmp(a, b, sizeof(pfm_uuid_t)); | ||
1141 | } | ||
1142 | |||
1143 | static inline int | ||
1144 | pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) | ||
1145 | { | ||
1146 | int ret = 0; | ||
1147 | if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); | ||
1148 | return ret; | ||
1149 | } | ||
1150 | |||
1151 | static inline int | ||
1152 | pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) | ||
1153 | { | ||
1154 | int ret = 0; | ||
1155 | if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); | ||
1156 | return ret; | ||
1157 | } | ||
1158 | |||
1159 | |||
1160 | static inline int | ||
1161 | pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, | ||
1162 | int cpu, void *arg) | ||
1163 | { | ||
1164 | int ret = 0; | ||
1165 | if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); | ||
1166 | return ret; | ||
1167 | } | ||
1168 | |||
1169 | static inline int | ||
1170 | pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, | ||
1171 | int cpu, void *arg) | ||
1172 | { | ||
1173 | int ret = 0; | ||
1174 | if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); | ||
1175 | return ret; | ||
1176 | } | ||
1177 | |||
1178 | static inline int | ||
1179 | pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) | ||
1180 | { | ||
1181 | int ret = 0; | ||
1182 | if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); | ||
1183 | return ret; | ||
1184 | } | ||
1185 | |||
1186 | static inline int | ||
1187 | pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) | ||
1188 | { | ||
1189 | int ret = 0; | ||
1190 | if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); | ||
1191 | return ret; | ||
1192 | } | ||
1193 | |||
1194 | static pfm_buffer_fmt_t * | ||
1195 | __pfm_find_buffer_fmt(pfm_uuid_t uuid) | ||
1196 | { | ||
1197 | struct list_head * pos; | ||
1198 | pfm_buffer_fmt_t * entry; | ||
1199 | |||
1200 | list_for_each(pos, &pfm_buffer_fmt_list) { | ||
1201 | entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); | ||
1202 | if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) | ||
1203 | return entry; | ||
1204 | } | ||
1205 | return NULL; | ||
1206 | } | ||
1207 | |||
1208 | /* | ||
1209 | * find a buffer format based on its uuid | ||
1210 | */ | ||
1211 | static pfm_buffer_fmt_t * | ||
1212 | pfm_find_buffer_fmt(pfm_uuid_t uuid) | ||
1213 | { | ||
1214 | pfm_buffer_fmt_t * fmt; | ||
1215 | spin_lock(&pfm_buffer_fmt_lock); | ||
1216 | fmt = __pfm_find_buffer_fmt(uuid); | ||
1217 | spin_unlock(&pfm_buffer_fmt_lock); | ||
1218 | return fmt; | ||
1219 | } | ||
1220 | |||
1221 | int | ||
1222 | pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) | ||
1223 | { | ||
1224 | int ret = 0; | ||
1225 | |||
1226 | /* some sanity checks */ | ||
1227 | if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; | ||
1228 | |||
1229 | /* we need at least a handler */ | ||
1230 | if (fmt->fmt_handler == NULL) return -EINVAL; | ||
1231 | |||
1232 | /* | ||
1233 | * XXX: need check validity of fmt_arg_size | ||
1234 | */ | ||
1235 | |||
1236 | spin_lock(&pfm_buffer_fmt_lock); | ||
1237 | |||
1238 | if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { | ||
1239 | printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); | ||
1240 | ret = -EBUSY; | ||
1241 | goto out; | ||
1242 | } | ||
1243 | list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); | ||
1244 | printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); | ||
1245 | |||
1246 | out: | ||
1247 | spin_unlock(&pfm_buffer_fmt_lock); | ||
1248 | return ret; | ||
1249 | } | ||
1250 | EXPORT_SYMBOL(pfm_register_buffer_fmt); | ||
1251 | |||
1252 | int | ||
1253 | pfm_unregister_buffer_fmt(pfm_uuid_t uuid) | ||
1254 | { | ||
1255 | pfm_buffer_fmt_t *fmt; | ||
1256 | int ret = 0; | ||
1257 | |||
1258 | spin_lock(&pfm_buffer_fmt_lock); | ||
1259 | |||
1260 | fmt = __pfm_find_buffer_fmt(uuid); | ||
1261 | if (!fmt) { | ||
1262 | printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); | ||
1263 | ret = -EINVAL; | ||
1264 | goto out; | ||
1265 | } | ||
1266 | list_del_init(&fmt->fmt_list); | ||
1267 | printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); | ||
1268 | |||
1269 | out: | ||
1270 | spin_unlock(&pfm_buffer_fmt_lock); | ||
1271 | return ret; | ||
1272 | |||
1273 | } | ||
1274 | EXPORT_SYMBOL(pfm_unregister_buffer_fmt); | ||
1275 | |||
1276 | static int | ||
1277 | pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) | ||
1278 | { | ||
1279 | unsigned long flags; | ||
1280 | /* | ||
1281 | * validy checks on cpu_mask have been done upstream | ||
1282 | */ | ||
1283 | LOCK_PFS(flags); | ||
1284 | |||
1285 | DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", | ||
1286 | pfm_sessions.pfs_sys_sessions, | ||
1287 | pfm_sessions.pfs_task_sessions, | ||
1288 | pfm_sessions.pfs_sys_use_dbregs, | ||
1289 | is_syswide, | ||
1290 | cpu)); | ||
1291 | |||
1292 | if (is_syswide) { | ||
1293 | /* | ||
1294 | * cannot mix system wide and per-task sessions | ||
1295 | */ | ||
1296 | if (pfm_sessions.pfs_task_sessions > 0UL) { | ||
1297 | DPRINT(("system wide not possible, %u conflicting task_sessions\n", | ||
1298 | pfm_sessions.pfs_task_sessions)); | ||
1299 | goto abort; | ||
1300 | } | ||
1301 | |||
1302 | if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; | ||
1303 | |||
1304 | DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); | ||
1305 | |||
1306 | pfm_sessions.pfs_sys_session[cpu] = task; | ||
1307 | |||
1308 | pfm_sessions.pfs_sys_sessions++ ; | ||
1309 | |||
1310 | } else { | ||
1311 | if (pfm_sessions.pfs_sys_sessions) goto abort; | ||
1312 | pfm_sessions.pfs_task_sessions++; | ||
1313 | } | ||
1314 | |||
1315 | DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", | ||
1316 | pfm_sessions.pfs_sys_sessions, | ||
1317 | pfm_sessions.pfs_task_sessions, | ||
1318 | pfm_sessions.pfs_sys_use_dbregs, | ||
1319 | is_syswide, | ||
1320 | cpu)); | ||
1321 | |||
1322 | UNLOCK_PFS(flags); | ||
1323 | |||
1324 | return 0; | ||
1325 | |||
1326 | error_conflict: | ||
1327 | DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", | ||
1328 | pfm_sessions.pfs_sys_session[cpu]->pid, | ||
1329 | smp_processor_id())); | ||
1330 | abort: | ||
1331 | UNLOCK_PFS(flags); | ||
1332 | |||
1333 | return -EBUSY; | ||
1334 | |||
1335 | } | ||
1336 | |||
1337 | static int | ||
1338 | pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) | ||
1339 | { | ||
1340 | unsigned long flags; | ||
1341 | /* | ||
1342 | * validy checks on cpu_mask have been done upstream | ||
1343 | */ | ||
1344 | LOCK_PFS(flags); | ||
1345 | |||
1346 | DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", | ||
1347 | pfm_sessions.pfs_sys_sessions, | ||
1348 | pfm_sessions.pfs_task_sessions, | ||
1349 | pfm_sessions.pfs_sys_use_dbregs, | ||
1350 | is_syswide, | ||
1351 | cpu)); | ||
1352 | |||
1353 | |||
1354 | if (is_syswide) { | ||
1355 | pfm_sessions.pfs_sys_session[cpu] = NULL; | ||
1356 | /* | ||
1357 | * would not work with perfmon+more than one bit in cpu_mask | ||
1358 | */ | ||
1359 | if (ctx && ctx->ctx_fl_using_dbreg) { | ||
1360 | if (pfm_sessions.pfs_sys_use_dbregs == 0) { | ||
1361 | printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); | ||
1362 | } else { | ||
1363 | pfm_sessions.pfs_sys_use_dbregs--; | ||
1364 | } | ||
1365 | } | ||
1366 | pfm_sessions.pfs_sys_sessions--; | ||
1367 | } else { | ||
1368 | pfm_sessions.pfs_task_sessions--; | ||
1369 | } | ||
1370 | DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", | ||
1371 | pfm_sessions.pfs_sys_sessions, | ||
1372 | pfm_sessions.pfs_task_sessions, | ||
1373 | pfm_sessions.pfs_sys_use_dbregs, | ||
1374 | is_syswide, | ||
1375 | cpu)); | ||
1376 | |||
1377 | UNLOCK_PFS(flags); | ||
1378 | |||
1379 | return 0; | ||
1380 | } | ||
1381 | |||
1382 | /* | ||
1383 | * removes virtual mapping of the sampling buffer. | ||
1384 | * IMPORTANT: cannot be called with interrupts disable, e.g. inside | ||
1385 | * a PROTECT_CTX() section. | ||
1386 | */ | ||
1387 | static int | ||
1388 | pfm_remove_smpl_mapping(struct task_struct *task, void *vaddr, unsigned long size) | ||
1389 | { | ||
1390 | int r; | ||
1391 | |||
1392 | /* sanity checks */ | ||
1393 | if (task->mm == NULL || size == 0UL || vaddr == NULL) { | ||
1394 | printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task->pid, task->mm); | ||
1395 | return -EINVAL; | ||
1396 | } | ||
1397 | |||
1398 | DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); | ||
1399 | |||
1400 | /* | ||
1401 | * does the actual unmapping | ||
1402 | */ | ||
1403 | down_write(&task->mm->mmap_sem); | ||
1404 | |||
1405 | DPRINT(("down_write done smpl_vaddr=%p size=%lu\n", vaddr, size)); | ||
1406 | |||
1407 | r = pfm_do_munmap(task->mm, (unsigned long)vaddr, size, 0); | ||
1408 | |||
1409 | up_write(&task->mm->mmap_sem); | ||
1410 | if (r !=0) { | ||
1411 | printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task->pid, vaddr, size); | ||
1412 | } | ||
1413 | |||
1414 | DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); | ||
1415 | |||
1416 | return 0; | ||
1417 | } | ||
1418 | |||
1419 | /* | ||
1420 | * free actual physical storage used by sampling buffer | ||
1421 | */ | ||
1422 | #if 0 | ||
1423 | static int | ||
1424 | pfm_free_smpl_buffer(pfm_context_t *ctx) | ||
1425 | { | ||
1426 | pfm_buffer_fmt_t *fmt; | ||
1427 | |||
1428 | if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; | ||
1429 | |||
1430 | /* | ||
1431 | * we won't use the buffer format anymore | ||
1432 | */ | ||
1433 | fmt = ctx->ctx_buf_fmt; | ||
1434 | |||
1435 | DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", | ||
1436 | ctx->ctx_smpl_hdr, | ||
1437 | ctx->ctx_smpl_size, | ||
1438 | ctx->ctx_smpl_vaddr)); | ||
1439 | |||
1440 | pfm_buf_fmt_exit(fmt, current, NULL, NULL); | ||
1441 | |||
1442 | /* | ||
1443 | * free the buffer | ||
1444 | */ | ||
1445 | pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size); | ||
1446 | |||
1447 | ctx->ctx_smpl_hdr = NULL; | ||
1448 | ctx->ctx_smpl_size = 0UL; | ||
1449 | |||
1450 | return 0; | ||
1451 | |||
1452 | invalid_free: | ||
1453 | printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", current->pid); | ||
1454 | return -EINVAL; | ||
1455 | } | ||
1456 | #endif | ||
1457 | |||
1458 | static inline void | ||
1459 | pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) | ||
1460 | { | ||
1461 | if (fmt == NULL) return; | ||
1462 | |||
1463 | pfm_buf_fmt_exit(fmt, current, NULL, NULL); | ||
1464 | |||
1465 | } | ||
1466 | |||
1467 | /* | ||
1468 | * pfmfs should _never_ be mounted by userland - too much of security hassle, | ||
1469 | * no real gain from having the whole whorehouse mounted. So we don't need | ||
1470 | * any operations on the root directory. However, we need a non-trivial | ||
1471 | * d_name - pfm: will go nicely and kill the special-casing in procfs. | ||
1472 | */ | ||
1473 | static struct vfsmount *pfmfs_mnt; | ||
1474 | |||
1475 | static int __init | ||
1476 | init_pfm_fs(void) | ||
1477 | { | ||
1478 | int err = register_filesystem(&pfm_fs_type); | ||
1479 | if (!err) { | ||
1480 | pfmfs_mnt = kern_mount(&pfm_fs_type); | ||
1481 | err = PTR_ERR(pfmfs_mnt); | ||
1482 | if (IS_ERR(pfmfs_mnt)) | ||
1483 | unregister_filesystem(&pfm_fs_type); | ||
1484 | else | ||
1485 | err = 0; | ||
1486 | } | ||
1487 | return err; | ||
1488 | } | ||
1489 | |||
1490 | static void __exit | ||
1491 | exit_pfm_fs(void) | ||
1492 | { | ||
1493 | unregister_filesystem(&pfm_fs_type); | ||
1494 | mntput(pfmfs_mnt); | ||
1495 | } | ||
1496 | |||
1497 | static ssize_t | ||
1498 | pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) | ||
1499 | { | ||
1500 | pfm_context_t *ctx; | ||
1501 | pfm_msg_t *msg; | ||
1502 | ssize_t ret; | ||
1503 | unsigned long flags; | ||
1504 | DECLARE_WAITQUEUE(wait, current); | ||
1505 | if (PFM_IS_FILE(filp) == 0) { | ||
1506 | printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); | ||
1507 | return -EINVAL; | ||
1508 | } | ||
1509 | |||
1510 | ctx = (pfm_context_t *)filp->private_data; | ||
1511 | if (ctx == NULL) { | ||
1512 | printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", current->pid); | ||
1513 | return -EINVAL; | ||
1514 | } | ||
1515 | |||
1516 | /* | ||
1517 | * check even when there is no message | ||
1518 | */ | ||
1519 | if (size < sizeof(pfm_msg_t)) { | ||
1520 | DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); | ||
1521 | return -EINVAL; | ||
1522 | } | ||
1523 | |||
1524 | PROTECT_CTX(ctx, flags); | ||
1525 | |||
1526 | /* | ||
1527 | * put ourselves on the wait queue | ||
1528 | */ | ||
1529 | add_wait_queue(&ctx->ctx_msgq_wait, &wait); | ||
1530 | |||
1531 | |||
1532 | for(;;) { | ||
1533 | /* | ||
1534 | * check wait queue | ||
1535 | */ | ||
1536 | |||
1537 | set_current_state(TASK_INTERRUPTIBLE); | ||
1538 | |||
1539 | DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); | ||
1540 | |||
1541 | ret = 0; | ||
1542 | if(PFM_CTXQ_EMPTY(ctx) == 0) break; | ||
1543 | |||
1544 | UNPROTECT_CTX(ctx, flags); | ||
1545 | |||
1546 | /* | ||
1547 | * check non-blocking read | ||
1548 | */ | ||
1549 | ret = -EAGAIN; | ||
1550 | if(filp->f_flags & O_NONBLOCK) break; | ||
1551 | |||
1552 | /* | ||
1553 | * check pending signals | ||
1554 | */ | ||
1555 | if(signal_pending(current)) { | ||
1556 | ret = -EINTR; | ||
1557 | break; | ||
1558 | } | ||
1559 | /* | ||
1560 | * no message, so wait | ||
1561 | */ | ||
1562 | schedule(); | ||
1563 | |||
1564 | PROTECT_CTX(ctx, flags); | ||
1565 | } | ||
1566 | DPRINT(("[%d] back to running ret=%ld\n", current->pid, ret)); | ||
1567 | set_current_state(TASK_RUNNING); | ||
1568 | remove_wait_queue(&ctx->ctx_msgq_wait, &wait); | ||
1569 | |||
1570 | if (ret < 0) goto abort; | ||
1571 | |||
1572 | ret = -EINVAL; | ||
1573 | msg = pfm_get_next_msg(ctx); | ||
1574 | if (msg == NULL) { | ||
1575 | printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, current->pid); | ||
1576 | goto abort_locked; | ||
1577 | } | ||
1578 | |||
1579 | DPRINT(("[%d] fd=%d type=%d\n", current->pid, msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); | ||
1580 | |||
1581 | ret = -EFAULT; | ||
1582 | if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); | ||
1583 | |||
1584 | abort_locked: | ||
1585 | UNPROTECT_CTX(ctx, flags); | ||
1586 | abort: | ||
1587 | return ret; | ||
1588 | } | ||
1589 | |||
1590 | static ssize_t | ||
1591 | pfm_write(struct file *file, const char __user *ubuf, | ||
1592 | size_t size, loff_t *ppos) | ||
1593 | { | ||
1594 | DPRINT(("pfm_write called\n")); | ||
1595 | return -EINVAL; | ||
1596 | } | ||
1597 | |||
1598 | static unsigned int | ||
1599 | pfm_poll(struct file *filp, poll_table * wait) | ||
1600 | { | ||
1601 | pfm_context_t *ctx; | ||
1602 | unsigned long flags; | ||
1603 | unsigned int mask = 0; | ||
1604 | |||
1605 | if (PFM_IS_FILE(filp) == 0) { | ||
1606 | printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", current->pid); | ||
1607 | return 0; | ||
1608 | } | ||
1609 | |||
1610 | ctx = (pfm_context_t *)filp->private_data; | ||
1611 | if (ctx == NULL) { | ||
1612 | printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", current->pid); | ||
1613 | return 0; | ||
1614 | } | ||
1615 | |||
1616 | |||
1617 | DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); | ||
1618 | |||
1619 | poll_wait(filp, &ctx->ctx_msgq_wait, wait); | ||
1620 | |||
1621 | PROTECT_CTX(ctx, flags); | ||
1622 | |||
1623 | if (PFM_CTXQ_EMPTY(ctx) == 0) | ||
1624 | mask = POLLIN | POLLRDNORM; | ||
1625 | |||
1626 | UNPROTECT_CTX(ctx, flags); | ||
1627 | |||
1628 | DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); | ||
1629 | |||
1630 | return mask; | ||
1631 | } | ||
1632 | |||
1633 | static int | ||
1634 | pfm_ioctl(struct inode *inode, struct file *file, unsigned int cmd, unsigned long arg) | ||
1635 | { | ||
1636 | DPRINT(("pfm_ioctl called\n")); | ||
1637 | return -EINVAL; | ||
1638 | } | ||
1639 | |||
1640 | /* | ||
1641 | * interrupt cannot be masked when coming here | ||
1642 | */ | ||
1643 | static inline int | ||
1644 | pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) | ||
1645 | { | ||
1646 | int ret; | ||
1647 | |||
1648 | ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); | ||
1649 | |||
1650 | DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", | ||
1651 | current->pid, | ||
1652 | fd, | ||
1653 | on, | ||
1654 | ctx->ctx_async_queue, ret)); | ||
1655 | |||
1656 | return ret; | ||
1657 | } | ||
1658 | |||
1659 | static int | ||
1660 | pfm_fasync(int fd, struct file *filp, int on) | ||
1661 | { | ||
1662 | pfm_context_t *ctx; | ||
1663 | int ret; | ||
1664 | |||
1665 | if (PFM_IS_FILE(filp) == 0) { | ||
1666 | printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", current->pid); | ||
1667 | return -EBADF; | ||
1668 | } | ||
1669 | |||
1670 | ctx = (pfm_context_t *)filp->private_data; | ||
1671 | if (ctx == NULL) { | ||
1672 | printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", current->pid); | ||
1673 | return -EBADF; | ||
1674 | } | ||
1675 | /* | ||
1676 | * we cannot mask interrupts during this call because this may | ||
1677 | * may go to sleep if memory is not readily avalaible. | ||
1678 | * | ||
1679 | * We are protected from the conetxt disappearing by the get_fd()/put_fd() | ||
1680 | * done in caller. Serialization of this function is ensured by caller. | ||
1681 | */ | ||
1682 | ret = pfm_do_fasync(fd, filp, ctx, on); | ||
1683 | |||
1684 | |||
1685 | DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", | ||
1686 | fd, | ||
1687 | on, | ||
1688 | ctx->ctx_async_queue, ret)); | ||
1689 | |||
1690 | return ret; | ||
1691 | } | ||
1692 | |||
1693 | #ifdef CONFIG_SMP | ||
1694 | /* | ||
1695 | * this function is exclusively called from pfm_close(). | ||
1696 | * The context is not protected at that time, nor are interrupts | ||
1697 | * on the remote CPU. That's necessary to avoid deadlocks. | ||
1698 | */ | ||
1699 | static void | ||
1700 | pfm_syswide_force_stop(void *info) | ||
1701 | { | ||
1702 | pfm_context_t *ctx = (pfm_context_t *)info; | ||
1703 | struct pt_regs *regs = ia64_task_regs(current); | ||
1704 | struct task_struct *owner; | ||
1705 | unsigned long flags; | ||
1706 | int ret; | ||
1707 | |||
1708 | if (ctx->ctx_cpu != smp_processor_id()) { | ||
1709 | printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", | ||
1710 | ctx->ctx_cpu, | ||
1711 | smp_processor_id()); | ||
1712 | return; | ||
1713 | } | ||
1714 | owner = GET_PMU_OWNER(); | ||
1715 | if (owner != ctx->ctx_task) { | ||
1716 | printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", | ||
1717 | smp_processor_id(), | ||
1718 | owner->pid, ctx->ctx_task->pid); | ||
1719 | return; | ||
1720 | } | ||
1721 | if (GET_PMU_CTX() != ctx) { | ||
1722 | printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", | ||
1723 | smp_processor_id(), | ||
1724 | GET_PMU_CTX(), ctx); | ||
1725 | return; | ||
1726 | } | ||
1727 | |||
1728 | DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), ctx->ctx_task->pid)); | ||
1729 | /* | ||
1730 | * the context is already protected in pfm_close(), we simply | ||
1731 | * need to mask interrupts to avoid a PMU interrupt race on | ||
1732 | * this CPU | ||
1733 | */ | ||
1734 | local_irq_save(flags); | ||
1735 | |||
1736 | ret = pfm_context_unload(ctx, NULL, 0, regs); | ||
1737 | if (ret) { | ||
1738 | DPRINT(("context_unload returned %d\n", ret)); | ||
1739 | } | ||
1740 | |||
1741 | /* | ||
1742 | * unmask interrupts, PMU interrupts are now spurious here | ||
1743 | */ | ||
1744 | local_irq_restore(flags); | ||
1745 | } | ||
1746 | |||
1747 | static void | ||
1748 | pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) | ||
1749 | { | ||
1750 | int ret; | ||
1751 | |||
1752 | DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); | ||
1753 | ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 0, 1); | ||
1754 | DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); | ||
1755 | } | ||
1756 | #endif /* CONFIG_SMP */ | ||
1757 | |||
1758 | /* | ||
1759 | * called for each close(). Partially free resources. | ||
1760 | * When caller is self-monitoring, the context is unloaded. | ||
1761 | */ | ||
1762 | static int | ||
1763 | pfm_flush(struct file *filp) | ||
1764 | { | ||
1765 | pfm_context_t *ctx; | ||
1766 | struct task_struct *task; | ||
1767 | struct pt_regs *regs; | ||
1768 | unsigned long flags; | ||
1769 | unsigned long smpl_buf_size = 0UL; | ||
1770 | void *smpl_buf_vaddr = NULL; | ||
1771 | int state, is_system; | ||
1772 | |||
1773 | if (PFM_IS_FILE(filp) == 0) { | ||
1774 | DPRINT(("bad magic for\n")); | ||
1775 | return -EBADF; | ||
1776 | } | ||
1777 | |||
1778 | ctx = (pfm_context_t *)filp->private_data; | ||
1779 | if (ctx == NULL) { | ||
1780 | printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", current->pid); | ||
1781 | return -EBADF; | ||
1782 | } | ||
1783 | |||
1784 | /* | ||
1785 | * remove our file from the async queue, if we use this mode. | ||
1786 | * This can be done without the context being protected. We come | ||
1787 | * here when the context has become unreacheable by other tasks. | ||
1788 | * | ||
1789 | * We may still have active monitoring at this point and we may | ||
1790 | * end up in pfm_overflow_handler(). However, fasync_helper() | ||
1791 | * operates with interrupts disabled and it cleans up the | ||
1792 | * queue. If the PMU handler is called prior to entering | ||
1793 | * fasync_helper() then it will send a signal. If it is | ||
1794 | * invoked after, it will find an empty queue and no | ||
1795 | * signal will be sent. In both case, we are safe | ||
1796 | */ | ||
1797 | if (filp->f_flags & FASYNC) { | ||
1798 | DPRINT(("cleaning up async_queue=%p\n", ctx->ctx_async_queue)); | ||
1799 | pfm_do_fasync (-1, filp, ctx, 0); | ||
1800 | } | ||
1801 | |||
1802 | PROTECT_CTX(ctx, flags); | ||
1803 | |||
1804 | state = ctx->ctx_state; | ||
1805 | is_system = ctx->ctx_fl_system; | ||
1806 | |||
1807 | task = PFM_CTX_TASK(ctx); | ||
1808 | regs = ia64_task_regs(task); | ||
1809 | |||
1810 | DPRINT(("ctx_state=%d is_current=%d\n", | ||
1811 | state, | ||
1812 | task == current ? 1 : 0)); | ||
1813 | |||
1814 | /* | ||
1815 | * if state == UNLOADED, then task is NULL | ||
1816 | */ | ||
1817 | |||
1818 | /* | ||
1819 | * we must stop and unload because we are losing access to the context. | ||
1820 | */ | ||
1821 | if (task == current) { | ||
1822 | #ifdef CONFIG_SMP | ||
1823 | /* | ||
1824 | * the task IS the owner but it migrated to another CPU: that's bad | ||
1825 | * but we must handle this cleanly. Unfortunately, the kernel does | ||
1826 | * not provide a mechanism to block migration (while the context is loaded). | ||
1827 | * | ||
1828 | * We need to release the resource on the ORIGINAL cpu. | ||
1829 | */ | ||
1830 | if (is_system && ctx->ctx_cpu != smp_processor_id()) { | ||
1831 | |||
1832 | DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); | ||
1833 | /* | ||
1834 | * keep context protected but unmask interrupt for IPI | ||
1835 | */ | ||
1836 | local_irq_restore(flags); | ||
1837 | |||
1838 | pfm_syswide_cleanup_other_cpu(ctx); | ||
1839 | |||
1840 | /* | ||
1841 | * restore interrupt masking | ||
1842 | */ | ||
1843 | local_irq_save(flags); | ||
1844 | |||
1845 | /* | ||
1846 | * context is unloaded at this point | ||
1847 | */ | ||
1848 | } else | ||
1849 | #endif /* CONFIG_SMP */ | ||
1850 | { | ||
1851 | |||
1852 | DPRINT(("forcing unload\n")); | ||
1853 | /* | ||
1854 | * stop and unload, returning with state UNLOADED | ||
1855 | * and session unreserved. | ||
1856 | */ | ||
1857 | pfm_context_unload(ctx, NULL, 0, regs); | ||
1858 | |||
1859 | DPRINT(("ctx_state=%d\n", ctx->ctx_state)); | ||
1860 | } | ||
1861 | } | ||
1862 | |||
1863 | /* | ||
1864 | * remove virtual mapping, if any, for the calling task. | ||
1865 | * cannot reset ctx field until last user is calling close(). | ||
1866 | * | ||
1867 | * ctx_smpl_vaddr must never be cleared because it is needed | ||
1868 | * by every task with access to the context | ||
1869 | * | ||
1870 | * When called from do_exit(), the mm context is gone already, therefore | ||
1871 | * mm is NULL, i.e., the VMA is already gone and we do not have to | ||
1872 | * do anything here | ||
1873 | */ | ||
1874 | if (ctx->ctx_smpl_vaddr && current->mm) { | ||
1875 | smpl_buf_vaddr = ctx->ctx_smpl_vaddr; | ||
1876 | smpl_buf_size = ctx->ctx_smpl_size; | ||
1877 | } | ||
1878 | |||
1879 | UNPROTECT_CTX(ctx, flags); | ||
1880 | |||
1881 | /* | ||
1882 | * if there was a mapping, then we systematically remove it | ||
1883 | * at this point. Cannot be done inside critical section | ||
1884 | * because some VM function reenables interrupts. | ||
1885 | * | ||
1886 | */ | ||
1887 | if (smpl_buf_vaddr) pfm_remove_smpl_mapping(current, smpl_buf_vaddr, smpl_buf_size); | ||
1888 | |||
1889 | return 0; | ||
1890 | } | ||
1891 | /* | ||
1892 | * called either on explicit close() or from exit_files(). | ||
1893 | * Only the LAST user of the file gets to this point, i.e., it is | ||
1894 | * called only ONCE. | ||
1895 | * | ||
1896 | * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero | ||
1897 | * (fput()),i.e, last task to access the file. Nobody else can access the | ||
1898 | * file at this point. | ||
1899 | * | ||
1900 | * When called from exit_files(), the VMA has been freed because exit_mm() | ||
1901 | * is executed before exit_files(). | ||
1902 | * | ||
1903 | * When called from exit_files(), the current task is not yet ZOMBIE but we | ||
1904 | * flush the PMU state to the context. | ||
1905 | */ | ||
1906 | static int | ||
1907 | pfm_close(struct inode *inode, struct file *filp) | ||
1908 | { | ||
1909 | pfm_context_t *ctx; | ||
1910 | struct task_struct *task; | ||
1911 | struct pt_regs *regs; | ||
1912 | DECLARE_WAITQUEUE(wait, current); | ||
1913 | unsigned long flags; | ||
1914 | unsigned long smpl_buf_size = 0UL; | ||
1915 | void *smpl_buf_addr = NULL; | ||
1916 | int free_possible = 1; | ||
1917 | int state, is_system; | ||
1918 | |||
1919 | DPRINT(("pfm_close called private=%p\n", filp->private_data)); | ||
1920 | |||
1921 | if (PFM_IS_FILE(filp) == 0) { | ||
1922 | DPRINT(("bad magic\n")); | ||
1923 | return -EBADF; | ||
1924 | } | ||
1925 | |||
1926 | ctx = (pfm_context_t *)filp->private_data; | ||
1927 | if (ctx == NULL) { | ||
1928 | printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", current->pid); | ||
1929 | return -EBADF; | ||
1930 | } | ||
1931 | |||
1932 | PROTECT_CTX(ctx, flags); | ||
1933 | |||
1934 | state = ctx->ctx_state; | ||
1935 | is_system = ctx->ctx_fl_system; | ||
1936 | |||
1937 | task = PFM_CTX_TASK(ctx); | ||
1938 | regs = ia64_task_regs(task); | ||
1939 | |||
1940 | DPRINT(("ctx_state=%d is_current=%d\n", | ||
1941 | state, | ||
1942 | task == current ? 1 : 0)); | ||
1943 | |||
1944 | /* | ||
1945 | * if task == current, then pfm_flush() unloaded the context | ||
1946 | */ | ||
1947 | if (state == PFM_CTX_UNLOADED) goto doit; | ||
1948 | |||
1949 | /* | ||
1950 | * context is loaded/masked and task != current, we need to | ||
1951 | * either force an unload or go zombie | ||
1952 | */ | ||
1953 | |||
1954 | /* | ||
1955 | * The task is currently blocked or will block after an overflow. | ||
1956 | * we must force it to wakeup to get out of the | ||
1957 | * MASKED state and transition to the unloaded state by itself. | ||
1958 | * | ||
1959 | * This situation is only possible for per-task mode | ||
1960 | */ | ||
1961 | if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { | ||
1962 | |||
1963 | /* | ||
1964 | * set a "partial" zombie state to be checked | ||
1965 | * upon return from down() in pfm_handle_work(). | ||
1966 | * | ||
1967 | * We cannot use the ZOMBIE state, because it is checked | ||
1968 | * by pfm_load_regs() which is called upon wakeup from down(). | ||
1969 | * In such case, it would free the context and then we would | ||
1970 | * return to pfm_handle_work() which would access the | ||
1971 | * stale context. Instead, we set a flag invisible to pfm_load_regs() | ||
1972 | * but visible to pfm_handle_work(). | ||
1973 | * | ||
1974 | * For some window of time, we have a zombie context with | ||
1975 | * ctx_state = MASKED and not ZOMBIE | ||
1976 | */ | ||
1977 | ctx->ctx_fl_going_zombie = 1; | ||
1978 | |||
1979 | /* | ||
1980 | * force task to wake up from MASKED state | ||
1981 | */ | ||
1982 | up(&ctx->ctx_restart_sem); | ||
1983 | |||
1984 | DPRINT(("waking up ctx_state=%d\n", state)); | ||
1985 | |||
1986 | /* | ||
1987 | * put ourself to sleep waiting for the other | ||
1988 | * task to report completion | ||
1989 | * | ||
1990 | * the context is protected by mutex, therefore there | ||
1991 | * is no risk of being notified of completion before | ||
1992 | * begin actually on the waitq. | ||
1993 | */ | ||
1994 | set_current_state(TASK_INTERRUPTIBLE); | ||
1995 | add_wait_queue(&ctx->ctx_zombieq, &wait); | ||
1996 | |||
1997 | UNPROTECT_CTX(ctx, flags); | ||
1998 | |||
1999 | /* | ||
2000 | * XXX: check for signals : | ||
2001 | * - ok for explicit close | ||
2002 | * - not ok when coming from exit_files() | ||
2003 | */ | ||
2004 | schedule(); | ||
2005 | |||
2006 | |||
2007 | PROTECT_CTX(ctx, flags); | ||
2008 | |||
2009 | |||
2010 | remove_wait_queue(&ctx->ctx_zombieq, &wait); | ||
2011 | set_current_state(TASK_RUNNING); | ||
2012 | |||
2013 | /* | ||
2014 | * context is unloaded at this point | ||
2015 | */ | ||
2016 | DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); | ||
2017 | } | ||
2018 | else if (task != current) { | ||
2019 | #ifdef CONFIG_SMP | ||
2020 | /* | ||
2021 | * switch context to zombie state | ||
2022 | */ | ||
2023 | ctx->ctx_state = PFM_CTX_ZOMBIE; | ||
2024 | |||
2025 | DPRINT(("zombie ctx for [%d]\n", task->pid)); | ||
2026 | /* | ||
2027 | * cannot free the context on the spot. deferred until | ||
2028 | * the task notices the ZOMBIE state | ||
2029 | */ | ||
2030 | free_possible = 0; | ||
2031 | #else | ||
2032 | pfm_context_unload(ctx, NULL, 0, regs); | ||
2033 | #endif | ||
2034 | } | ||
2035 | |||
2036 | doit: | ||
2037 | /* reload state, may have changed during opening of critical section */ | ||
2038 | state = ctx->ctx_state; | ||
2039 | |||
2040 | /* | ||
2041 | * the context is still attached to a task (possibly current) | ||
2042 | * we cannot destroy it right now | ||
2043 | */ | ||
2044 | |||
2045 | /* | ||
2046 | * we must free the sampling buffer right here because | ||
2047 | * we cannot rely on it being cleaned up later by the | ||
2048 | * monitored task. It is not possible to free vmalloc'ed | ||
2049 | * memory in pfm_load_regs(). Instead, we remove the buffer | ||
2050 | * now. should there be subsequent PMU overflow originally | ||
2051 | * meant for sampling, the will be converted to spurious | ||
2052 | * and that's fine because the monitoring tools is gone anyway. | ||
2053 | */ | ||
2054 | if (ctx->ctx_smpl_hdr) { | ||
2055 | smpl_buf_addr = ctx->ctx_smpl_hdr; | ||
2056 | smpl_buf_size = ctx->ctx_smpl_size; | ||
2057 | /* no more sampling */ | ||
2058 | ctx->ctx_smpl_hdr = NULL; | ||
2059 | ctx->ctx_fl_is_sampling = 0; | ||
2060 | } | ||
2061 | |||
2062 | DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", | ||
2063 | state, | ||
2064 | free_possible, | ||
2065 | smpl_buf_addr, | ||
2066 | smpl_buf_size)); | ||
2067 | |||
2068 | if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); | ||
2069 | |||
2070 | /* | ||
2071 | * UNLOADED that the session has already been unreserved. | ||
2072 | */ | ||
2073 | if (state == PFM_CTX_ZOMBIE) { | ||
2074 | pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); | ||
2075 | } | ||
2076 | |||
2077 | /* | ||
2078 | * disconnect file descriptor from context must be done | ||
2079 | * before we unlock. | ||
2080 | */ | ||
2081 | filp->private_data = NULL; | ||
2082 | |||
2083 | /* | ||
2084 | * if we free on the spot, the context is now completely unreacheable | ||
2085 | * from the callers side. The monitored task side is also cut, so we | ||
2086 | * can freely cut. | ||
2087 | * | ||
2088 | * If we have a deferred free, only the caller side is disconnected. | ||
2089 | */ | ||
2090 | UNPROTECT_CTX(ctx, flags); | ||
2091 | |||
2092 | /* | ||
2093 | * All memory free operations (especially for vmalloc'ed memory) | ||
2094 | * MUST be done with interrupts ENABLED. | ||
2095 | */ | ||
2096 | if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); | ||
2097 | |||
2098 | /* | ||
2099 | * return the memory used by the context | ||
2100 | */ | ||
2101 | if (free_possible) pfm_context_free(ctx); | ||
2102 | |||
2103 | return 0; | ||
2104 | } | ||
2105 | |||
2106 | static int | ||
2107 | pfm_no_open(struct inode *irrelevant, struct file *dontcare) | ||
2108 | { | ||
2109 | DPRINT(("pfm_no_open called\n")); | ||
2110 | return -ENXIO; | ||
2111 | } | ||
2112 | |||
2113 | |||
2114 | |||
2115 | static struct file_operations pfm_file_ops = { | ||
2116 | .llseek = no_llseek, | ||
2117 | .read = pfm_read, | ||
2118 | .write = pfm_write, | ||
2119 | .poll = pfm_poll, | ||
2120 | .ioctl = pfm_ioctl, | ||
2121 | .open = pfm_no_open, /* special open code to disallow open via /proc */ | ||
2122 | .fasync = pfm_fasync, | ||
2123 | .release = pfm_close, | ||
2124 | .flush = pfm_flush | ||
2125 | }; | ||
2126 | |||
2127 | static int | ||
2128 | pfmfs_delete_dentry(struct dentry *dentry) | ||
2129 | { | ||
2130 | return 1; | ||
2131 | } | ||
2132 | |||
2133 | static struct dentry_operations pfmfs_dentry_operations = { | ||
2134 | .d_delete = pfmfs_delete_dentry, | ||
2135 | }; | ||
2136 | |||
2137 | |||
2138 | static int | ||
2139 | pfm_alloc_fd(struct file **cfile) | ||
2140 | { | ||
2141 | int fd, ret = 0; | ||
2142 | struct file *file = NULL; | ||
2143 | struct inode * inode; | ||
2144 | char name[32]; | ||
2145 | struct qstr this; | ||
2146 | |||
2147 | fd = get_unused_fd(); | ||
2148 | if (fd < 0) return -ENFILE; | ||
2149 | |||
2150 | ret = -ENFILE; | ||
2151 | |||
2152 | file = get_empty_filp(); | ||
2153 | if (!file) goto out; | ||
2154 | |||
2155 | /* | ||
2156 | * allocate a new inode | ||
2157 | */ | ||
2158 | inode = new_inode(pfmfs_mnt->mnt_sb); | ||
2159 | if (!inode) goto out; | ||
2160 | |||
2161 | DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); | ||
2162 | |||
2163 | inode->i_mode = S_IFCHR|S_IRUGO; | ||
2164 | inode->i_uid = current->fsuid; | ||
2165 | inode->i_gid = current->fsgid; | ||
2166 | |||
2167 | sprintf(name, "[%lu]", inode->i_ino); | ||
2168 | this.name = name; | ||
2169 | this.len = strlen(name); | ||
2170 | this.hash = inode->i_ino; | ||
2171 | |||
2172 | ret = -ENOMEM; | ||
2173 | |||
2174 | /* | ||
2175 | * allocate a new dcache entry | ||
2176 | */ | ||
2177 | file->f_dentry = d_alloc(pfmfs_mnt->mnt_sb->s_root, &this); | ||
2178 | if (!file->f_dentry) goto out; | ||
2179 | |||
2180 | file->f_dentry->d_op = &pfmfs_dentry_operations; | ||
2181 | |||
2182 | d_add(file->f_dentry, inode); | ||
2183 | file->f_vfsmnt = mntget(pfmfs_mnt); | ||
2184 | file->f_mapping = inode->i_mapping; | ||
2185 | |||
2186 | file->f_op = &pfm_file_ops; | ||
2187 | file->f_mode = FMODE_READ; | ||
2188 | file->f_flags = O_RDONLY; | ||
2189 | file->f_pos = 0; | ||
2190 | |||
2191 | /* | ||
2192 | * may have to delay until context is attached? | ||
2193 | */ | ||
2194 | fd_install(fd, file); | ||
2195 | |||
2196 | /* | ||
2197 | * the file structure we will use | ||
2198 | */ | ||
2199 | *cfile = file; | ||
2200 | |||
2201 | return fd; | ||
2202 | out: | ||
2203 | if (file) put_filp(file); | ||
2204 | put_unused_fd(fd); | ||
2205 | return ret; | ||
2206 | } | ||
2207 | |||
2208 | static void | ||
2209 | pfm_free_fd(int fd, struct file *file) | ||
2210 | { | ||
2211 | struct files_struct *files = current->files; | ||
2212 | |||
2213 | /* | ||
2214 | * there ie no fd_uninstall(), so we do it here | ||
2215 | */ | ||
2216 | spin_lock(&files->file_lock); | ||
2217 | files->fd[fd] = NULL; | ||
2218 | spin_unlock(&files->file_lock); | ||
2219 | |||
2220 | if (file) put_filp(file); | ||
2221 | put_unused_fd(fd); | ||
2222 | } | ||
2223 | |||
2224 | static int | ||
2225 | pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) | ||
2226 | { | ||
2227 | DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); | ||
2228 | |||
2229 | while (size > 0) { | ||
2230 | unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; | ||
2231 | |||
2232 | |||
2233 | if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) | ||
2234 | return -ENOMEM; | ||
2235 | |||
2236 | addr += PAGE_SIZE; | ||
2237 | buf += PAGE_SIZE; | ||
2238 | size -= PAGE_SIZE; | ||
2239 | } | ||
2240 | return 0; | ||
2241 | } | ||
2242 | |||
2243 | /* | ||
2244 | * allocate a sampling buffer and remaps it into the user address space of the task | ||
2245 | */ | ||
2246 | static int | ||
2247 | pfm_smpl_buffer_alloc(struct task_struct *task, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) | ||
2248 | { | ||
2249 | struct mm_struct *mm = task->mm; | ||
2250 | struct vm_area_struct *vma = NULL; | ||
2251 | unsigned long size; | ||
2252 | void *smpl_buf; | ||
2253 | |||
2254 | |||
2255 | /* | ||
2256 | * the fixed header + requested size and align to page boundary | ||
2257 | */ | ||
2258 | size = PAGE_ALIGN(rsize); | ||
2259 | |||
2260 | DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); | ||
2261 | |||
2262 | /* | ||
2263 | * check requested size to avoid Denial-of-service attacks | ||
2264 | * XXX: may have to refine this test | ||
2265 | * Check against address space limit. | ||
2266 | * | ||
2267 | * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) | ||
2268 | * return -ENOMEM; | ||
2269 | */ | ||
2270 | if (size > task->signal->rlim[RLIMIT_MEMLOCK].rlim_cur) | ||
2271 | return -ENOMEM; | ||
2272 | |||
2273 | /* | ||
2274 | * We do the easy to undo allocations first. | ||
2275 | * | ||
2276 | * pfm_rvmalloc(), clears the buffer, so there is no leak | ||
2277 | */ | ||
2278 | smpl_buf = pfm_rvmalloc(size); | ||
2279 | if (smpl_buf == NULL) { | ||
2280 | DPRINT(("Can't allocate sampling buffer\n")); | ||
2281 | return -ENOMEM; | ||
2282 | } | ||
2283 | |||
2284 | DPRINT(("smpl_buf @%p\n", smpl_buf)); | ||
2285 | |||
2286 | /* allocate vma */ | ||
2287 | vma = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL); | ||
2288 | if (!vma) { | ||
2289 | DPRINT(("Cannot allocate vma\n")); | ||
2290 | goto error_kmem; | ||
2291 | } | ||
2292 | memset(vma, 0, sizeof(*vma)); | ||
2293 | |||
2294 | /* | ||
2295 | * partially initialize the vma for the sampling buffer | ||
2296 | */ | ||
2297 | vma->vm_mm = mm; | ||
2298 | vma->vm_flags = VM_READ| VM_MAYREAD |VM_RESERVED; | ||
2299 | vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ | ||
2300 | |||
2301 | /* | ||
2302 | * Now we have everything we need and we can initialize | ||
2303 | * and connect all the data structures | ||
2304 | */ | ||
2305 | |||
2306 | ctx->ctx_smpl_hdr = smpl_buf; | ||
2307 | ctx->ctx_smpl_size = size; /* aligned size */ | ||
2308 | |||
2309 | /* | ||
2310 | * Let's do the difficult operations next. | ||
2311 | * | ||
2312 | * now we atomically find some area in the address space and | ||
2313 | * remap the buffer in it. | ||
2314 | */ | ||
2315 | down_write(&task->mm->mmap_sem); | ||
2316 | |||
2317 | /* find some free area in address space, must have mmap sem held */ | ||
2318 | vma->vm_start = pfm_get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS, 0); | ||
2319 | if (vma->vm_start == 0UL) { | ||
2320 | DPRINT(("Cannot find unmapped area for size %ld\n", size)); | ||
2321 | up_write(&task->mm->mmap_sem); | ||
2322 | goto error; | ||
2323 | } | ||
2324 | vma->vm_end = vma->vm_start + size; | ||
2325 | vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; | ||
2326 | |||
2327 | DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); | ||
2328 | |||
2329 | /* can only be applied to current task, need to have the mm semaphore held when called */ | ||
2330 | if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { | ||
2331 | DPRINT(("Can't remap buffer\n")); | ||
2332 | up_write(&task->mm->mmap_sem); | ||
2333 | goto error; | ||
2334 | } | ||
2335 | |||
2336 | /* | ||
2337 | * now insert the vma in the vm list for the process, must be | ||
2338 | * done with mmap lock held | ||
2339 | */ | ||
2340 | insert_vm_struct(mm, vma); | ||
2341 | |||
2342 | mm->total_vm += size >> PAGE_SHIFT; | ||
2343 | vm_stat_account(vma); | ||
2344 | up_write(&task->mm->mmap_sem); | ||
2345 | |||
2346 | /* | ||
2347 | * keep track of user level virtual address | ||
2348 | */ | ||
2349 | ctx->ctx_smpl_vaddr = (void *)vma->vm_start; | ||
2350 | *(unsigned long *)user_vaddr = vma->vm_start; | ||
2351 | |||
2352 | return 0; | ||
2353 | |||
2354 | error: | ||
2355 | kmem_cache_free(vm_area_cachep, vma); | ||
2356 | error_kmem: | ||
2357 | pfm_rvfree(smpl_buf, size); | ||
2358 | |||
2359 | return -ENOMEM; | ||
2360 | } | ||
2361 | |||
2362 | /* | ||
2363 | * XXX: do something better here | ||
2364 | */ | ||
2365 | static int | ||
2366 | pfm_bad_permissions(struct task_struct *task) | ||
2367 | { | ||
2368 | /* inspired by ptrace_attach() */ | ||
2369 | DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", | ||
2370 | current->uid, | ||
2371 | current->gid, | ||
2372 | task->euid, | ||
2373 | task->suid, | ||
2374 | task->uid, | ||
2375 | task->egid, | ||
2376 | task->sgid)); | ||
2377 | |||
2378 | return ((current->uid != task->euid) | ||
2379 | || (current->uid != task->suid) | ||
2380 | || (current->uid != task->uid) | ||
2381 | || (current->gid != task->egid) | ||
2382 | || (current->gid != task->sgid) | ||
2383 | || (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE); | ||
2384 | } | ||
2385 | |||
2386 | static int | ||
2387 | pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) | ||
2388 | { | ||
2389 | int ctx_flags; | ||
2390 | |||
2391 | /* valid signal */ | ||
2392 | |||
2393 | ctx_flags = pfx->ctx_flags; | ||
2394 | |||
2395 | if (ctx_flags & PFM_FL_SYSTEM_WIDE) { | ||
2396 | |||
2397 | /* | ||
2398 | * cannot block in this mode | ||
2399 | */ | ||
2400 | if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { | ||
2401 | DPRINT(("cannot use blocking mode when in system wide monitoring\n")); | ||
2402 | return -EINVAL; | ||
2403 | } | ||
2404 | } else { | ||
2405 | } | ||
2406 | /* probably more to add here */ | ||
2407 | |||
2408 | return 0; | ||
2409 | } | ||
2410 | |||
2411 | static int | ||
2412 | pfm_setup_buffer_fmt(struct task_struct *task, pfm_context_t *ctx, unsigned int ctx_flags, | ||
2413 | unsigned int cpu, pfarg_context_t *arg) | ||
2414 | { | ||
2415 | pfm_buffer_fmt_t *fmt = NULL; | ||
2416 | unsigned long size = 0UL; | ||
2417 | void *uaddr = NULL; | ||
2418 | void *fmt_arg = NULL; | ||
2419 | int ret = 0; | ||
2420 | #define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) | ||
2421 | |||
2422 | /* invoke and lock buffer format, if found */ | ||
2423 | fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); | ||
2424 | if (fmt == NULL) { | ||
2425 | DPRINT(("[%d] cannot find buffer format\n", task->pid)); | ||
2426 | return -EINVAL; | ||
2427 | } | ||
2428 | |||
2429 | /* | ||
2430 | * buffer argument MUST be contiguous to pfarg_context_t | ||
2431 | */ | ||
2432 | if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); | ||
2433 | |||
2434 | ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); | ||
2435 | |||
2436 | DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task->pid, ctx_flags, cpu, fmt_arg, ret)); | ||
2437 | |||
2438 | if (ret) goto error; | ||
2439 | |||
2440 | /* link buffer format and context */ | ||
2441 | ctx->ctx_buf_fmt = fmt; | ||
2442 | |||
2443 | /* | ||
2444 | * check if buffer format wants to use perfmon buffer allocation/mapping service | ||
2445 | */ | ||
2446 | ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); | ||
2447 | if (ret) goto error; | ||
2448 | |||
2449 | if (size) { | ||
2450 | /* | ||
2451 | * buffer is always remapped into the caller's address space | ||
2452 | */ | ||
2453 | ret = pfm_smpl_buffer_alloc(current, ctx, size, &uaddr); | ||
2454 | if (ret) goto error; | ||
2455 | |||
2456 | /* keep track of user address of buffer */ | ||
2457 | arg->ctx_smpl_vaddr = uaddr; | ||
2458 | } | ||
2459 | ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); | ||
2460 | |||
2461 | error: | ||
2462 | return ret; | ||
2463 | } | ||
2464 | |||
2465 | static void | ||
2466 | pfm_reset_pmu_state(pfm_context_t *ctx) | ||
2467 | { | ||
2468 | int i; | ||
2469 | |||
2470 | /* | ||
2471 | * install reset values for PMC. | ||
2472 | */ | ||
2473 | for (i=1; PMC_IS_LAST(i) == 0; i++) { | ||
2474 | if (PMC_IS_IMPL(i) == 0) continue; | ||
2475 | ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); | ||
2476 | DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); | ||
2477 | } | ||
2478 | /* | ||
2479 | * PMD registers are set to 0UL when the context in memset() | ||
2480 | */ | ||
2481 | |||
2482 | /* | ||
2483 | * On context switched restore, we must restore ALL pmc and ALL pmd even | ||
2484 | * when they are not actively used by the task. In UP, the incoming process | ||
2485 | * may otherwise pick up left over PMC, PMD state from the previous process. | ||
2486 | * As opposed to PMD, stale PMC can cause harm to the incoming | ||
2487 | * process because they may change what is being measured. | ||
2488 | * Therefore, we must systematically reinstall the entire | ||
2489 | * PMC state. In SMP, the same thing is possible on the | ||
2490 | * same CPU but also on between 2 CPUs. | ||
2491 | * | ||
2492 | * The problem with PMD is information leaking especially | ||
2493 | * to user level when psr.sp=0 | ||
2494 | * | ||
2495 | * There is unfortunately no easy way to avoid this problem | ||
2496 | * on either UP or SMP. This definitively slows down the | ||
2497 | * pfm_load_regs() function. | ||
2498 | */ | ||
2499 | |||
2500 | /* | ||
2501 | * bitmask of all PMCs accessible to this context | ||
2502 | * | ||
2503 | * PMC0 is treated differently. | ||
2504 | */ | ||
2505 | ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; | ||
2506 | |||
2507 | /* | ||
2508 | * bitmask of all PMDs that are accesible to this context | ||
2509 | */ | ||
2510 | ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; | ||
2511 | |||
2512 | DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); | ||
2513 | |||
2514 | /* | ||
2515 | * useful in case of re-enable after disable | ||
2516 | */ | ||
2517 | ctx->ctx_used_ibrs[0] = 0UL; | ||
2518 | ctx->ctx_used_dbrs[0] = 0UL; | ||
2519 | } | ||
2520 | |||
2521 | static int | ||
2522 | pfm_ctx_getsize(void *arg, size_t *sz) | ||
2523 | { | ||
2524 | pfarg_context_t *req = (pfarg_context_t *)arg; | ||
2525 | pfm_buffer_fmt_t *fmt; | ||
2526 | |||
2527 | *sz = 0; | ||
2528 | |||
2529 | if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; | ||
2530 | |||
2531 | fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); | ||
2532 | if (fmt == NULL) { | ||
2533 | DPRINT(("cannot find buffer format\n")); | ||
2534 | return -EINVAL; | ||
2535 | } | ||
2536 | /* get just enough to copy in user parameters */ | ||
2537 | *sz = fmt->fmt_arg_size; | ||
2538 | DPRINT(("arg_size=%lu\n", *sz)); | ||
2539 | |||
2540 | return 0; | ||
2541 | } | ||
2542 | |||
2543 | |||
2544 | |||
2545 | /* | ||
2546 | * cannot attach if : | ||
2547 | * - kernel task | ||
2548 | * - task not owned by caller | ||
2549 | * - task incompatible with context mode | ||
2550 | */ | ||
2551 | static int | ||
2552 | pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) | ||
2553 | { | ||
2554 | /* | ||
2555 | * no kernel task or task not owner by caller | ||
2556 | */ | ||
2557 | if (task->mm == NULL) { | ||
2558 | DPRINT(("task [%d] has not memory context (kernel thread)\n", task->pid)); | ||
2559 | return -EPERM; | ||
2560 | } | ||
2561 | if (pfm_bad_permissions(task)) { | ||
2562 | DPRINT(("no permission to attach to [%d]\n", task->pid)); | ||
2563 | return -EPERM; | ||
2564 | } | ||
2565 | /* | ||
2566 | * cannot block in self-monitoring mode | ||
2567 | */ | ||
2568 | if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { | ||
2569 | DPRINT(("cannot load a blocking context on self for [%d]\n", task->pid)); | ||
2570 | return -EINVAL; | ||
2571 | } | ||
2572 | |||
2573 | if (task->exit_state == EXIT_ZOMBIE) { | ||
2574 | DPRINT(("cannot attach to zombie task [%d]\n", task->pid)); | ||
2575 | return -EBUSY; | ||
2576 | } | ||
2577 | |||
2578 | /* | ||
2579 | * always ok for self | ||
2580 | */ | ||
2581 | if (task == current) return 0; | ||
2582 | |||
2583 | if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { | ||
2584 | DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task->pid, task->state)); | ||
2585 | return -EBUSY; | ||
2586 | } | ||
2587 | /* | ||
2588 | * make sure the task is off any CPU | ||
2589 | */ | ||
2590 | wait_task_inactive(task); | ||
2591 | |||
2592 | /* more to come... */ | ||
2593 | |||
2594 | return 0; | ||
2595 | } | ||
2596 | |||
2597 | static int | ||
2598 | pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) | ||
2599 | { | ||
2600 | struct task_struct *p = current; | ||
2601 | int ret; | ||
2602 | |||
2603 | /* XXX: need to add more checks here */ | ||
2604 | if (pid < 2) return -EPERM; | ||
2605 | |||
2606 | if (pid != current->pid) { | ||
2607 | |||
2608 | read_lock(&tasklist_lock); | ||
2609 | |||
2610 | p = find_task_by_pid(pid); | ||
2611 | |||
2612 | /* make sure task cannot go away while we operate on it */ | ||
2613 | if (p) get_task_struct(p); | ||
2614 | |||
2615 | read_unlock(&tasklist_lock); | ||
2616 | |||
2617 | if (p == NULL) return -ESRCH; | ||
2618 | } | ||
2619 | |||
2620 | ret = pfm_task_incompatible(ctx, p); | ||
2621 | if (ret == 0) { | ||
2622 | *task = p; | ||
2623 | } else if (p != current) { | ||
2624 | pfm_put_task(p); | ||
2625 | } | ||
2626 | return ret; | ||
2627 | } | ||
2628 | |||
2629 | |||
2630 | |||
2631 | static int | ||
2632 | pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
2633 | { | ||
2634 | pfarg_context_t *req = (pfarg_context_t *)arg; | ||
2635 | struct file *filp; | ||
2636 | int ctx_flags; | ||
2637 | int ret; | ||
2638 | |||
2639 | /* let's check the arguments first */ | ||
2640 | ret = pfarg_is_sane(current, req); | ||
2641 | if (ret < 0) return ret; | ||
2642 | |||
2643 | ctx_flags = req->ctx_flags; | ||
2644 | |||
2645 | ret = -ENOMEM; | ||
2646 | |||
2647 | ctx = pfm_context_alloc(); | ||
2648 | if (!ctx) goto error; | ||
2649 | |||
2650 | ret = pfm_alloc_fd(&filp); | ||
2651 | if (ret < 0) goto error_file; | ||
2652 | |||
2653 | req->ctx_fd = ctx->ctx_fd = ret; | ||
2654 | |||
2655 | /* | ||
2656 | * attach context to file | ||
2657 | */ | ||
2658 | filp->private_data = ctx; | ||
2659 | |||
2660 | /* | ||
2661 | * does the user want to sample? | ||
2662 | */ | ||
2663 | if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { | ||
2664 | ret = pfm_setup_buffer_fmt(current, ctx, ctx_flags, 0, req); | ||
2665 | if (ret) goto buffer_error; | ||
2666 | } | ||
2667 | |||
2668 | /* | ||
2669 | * init context protection lock | ||
2670 | */ | ||
2671 | spin_lock_init(&ctx->ctx_lock); | ||
2672 | |||
2673 | /* | ||
2674 | * context is unloaded | ||
2675 | */ | ||
2676 | ctx->ctx_state = PFM_CTX_UNLOADED; | ||
2677 | |||
2678 | /* | ||
2679 | * initialization of context's flags | ||
2680 | */ | ||
2681 | ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; | ||
2682 | ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; | ||
2683 | ctx->ctx_fl_is_sampling = ctx->ctx_buf_fmt ? 1 : 0; /* assume record() is defined */ | ||
2684 | ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; | ||
2685 | /* | ||
2686 | * will move to set properties | ||
2687 | * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; | ||
2688 | */ | ||
2689 | |||
2690 | /* | ||
2691 | * init restart semaphore to locked | ||
2692 | */ | ||
2693 | sema_init(&ctx->ctx_restart_sem, 0); | ||
2694 | |||
2695 | /* | ||
2696 | * activation is used in SMP only | ||
2697 | */ | ||
2698 | ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; | ||
2699 | SET_LAST_CPU(ctx, -1); | ||
2700 | |||
2701 | /* | ||
2702 | * initialize notification message queue | ||
2703 | */ | ||
2704 | ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; | ||
2705 | init_waitqueue_head(&ctx->ctx_msgq_wait); | ||
2706 | init_waitqueue_head(&ctx->ctx_zombieq); | ||
2707 | |||
2708 | DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d \n", | ||
2709 | ctx, | ||
2710 | ctx_flags, | ||
2711 | ctx->ctx_fl_system, | ||
2712 | ctx->ctx_fl_block, | ||
2713 | ctx->ctx_fl_excl_idle, | ||
2714 | ctx->ctx_fl_no_msg, | ||
2715 | ctx->ctx_fd)); | ||
2716 | |||
2717 | /* | ||
2718 | * initialize soft PMU state | ||
2719 | */ | ||
2720 | pfm_reset_pmu_state(ctx); | ||
2721 | |||
2722 | return 0; | ||
2723 | |||
2724 | buffer_error: | ||
2725 | pfm_free_fd(ctx->ctx_fd, filp); | ||
2726 | |||
2727 | if (ctx->ctx_buf_fmt) { | ||
2728 | pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); | ||
2729 | } | ||
2730 | error_file: | ||
2731 | pfm_context_free(ctx); | ||
2732 | |||
2733 | error: | ||
2734 | return ret; | ||
2735 | } | ||
2736 | |||
2737 | static inline unsigned long | ||
2738 | pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) | ||
2739 | { | ||
2740 | unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; | ||
2741 | unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; | ||
2742 | extern unsigned long carta_random32 (unsigned long seed); | ||
2743 | |||
2744 | if (reg->flags & PFM_REGFL_RANDOM) { | ||
2745 | new_seed = carta_random32(old_seed); | ||
2746 | val -= (old_seed & mask); /* counter values are negative numbers! */ | ||
2747 | if ((mask >> 32) != 0) | ||
2748 | /* construct a full 64-bit random value: */ | ||
2749 | new_seed |= carta_random32(old_seed >> 32) << 32; | ||
2750 | reg->seed = new_seed; | ||
2751 | } | ||
2752 | reg->lval = val; | ||
2753 | return val; | ||
2754 | } | ||
2755 | |||
2756 | static void | ||
2757 | pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) | ||
2758 | { | ||
2759 | unsigned long mask = ovfl_regs[0]; | ||
2760 | unsigned long reset_others = 0UL; | ||
2761 | unsigned long val; | ||
2762 | int i; | ||
2763 | |||
2764 | /* | ||
2765 | * now restore reset value on sampling overflowed counters | ||
2766 | */ | ||
2767 | mask >>= PMU_FIRST_COUNTER; | ||
2768 | for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { | ||
2769 | |||
2770 | if ((mask & 0x1UL) == 0UL) continue; | ||
2771 | |||
2772 | ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); | ||
2773 | reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; | ||
2774 | |||
2775 | DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); | ||
2776 | } | ||
2777 | |||
2778 | /* | ||
2779 | * Now take care of resetting the other registers | ||
2780 | */ | ||
2781 | for(i = 0; reset_others; i++, reset_others >>= 1) { | ||
2782 | |||
2783 | if ((reset_others & 0x1) == 0) continue; | ||
2784 | |||
2785 | ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); | ||
2786 | |||
2787 | DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", | ||
2788 | is_long_reset ? "long" : "short", i, val)); | ||
2789 | } | ||
2790 | } | ||
2791 | |||
2792 | static void | ||
2793 | pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) | ||
2794 | { | ||
2795 | unsigned long mask = ovfl_regs[0]; | ||
2796 | unsigned long reset_others = 0UL; | ||
2797 | unsigned long val; | ||
2798 | int i; | ||
2799 | |||
2800 | DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); | ||
2801 | |||
2802 | if (ctx->ctx_state == PFM_CTX_MASKED) { | ||
2803 | pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); | ||
2804 | return; | ||
2805 | } | ||
2806 | |||
2807 | /* | ||
2808 | * now restore reset value on sampling overflowed counters | ||
2809 | */ | ||
2810 | mask >>= PMU_FIRST_COUNTER; | ||
2811 | for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { | ||
2812 | |||
2813 | if ((mask & 0x1UL) == 0UL) continue; | ||
2814 | |||
2815 | val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); | ||
2816 | reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; | ||
2817 | |||
2818 | DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); | ||
2819 | |||
2820 | pfm_write_soft_counter(ctx, i, val); | ||
2821 | } | ||
2822 | |||
2823 | /* | ||
2824 | * Now take care of resetting the other registers | ||
2825 | */ | ||
2826 | for(i = 0; reset_others; i++, reset_others >>= 1) { | ||
2827 | |||
2828 | if ((reset_others & 0x1) == 0) continue; | ||
2829 | |||
2830 | val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); | ||
2831 | |||
2832 | if (PMD_IS_COUNTING(i)) { | ||
2833 | pfm_write_soft_counter(ctx, i, val); | ||
2834 | } else { | ||
2835 | ia64_set_pmd(i, val); | ||
2836 | } | ||
2837 | DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", | ||
2838 | is_long_reset ? "long" : "short", i, val)); | ||
2839 | } | ||
2840 | ia64_srlz_d(); | ||
2841 | } | ||
2842 | |||
2843 | static int | ||
2844 | pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
2845 | { | ||
2846 | struct thread_struct *thread = NULL; | ||
2847 | struct task_struct *task; | ||
2848 | pfarg_reg_t *req = (pfarg_reg_t *)arg; | ||
2849 | unsigned long value, pmc_pm; | ||
2850 | unsigned long smpl_pmds, reset_pmds, impl_pmds; | ||
2851 | unsigned int cnum, reg_flags, flags, pmc_type; | ||
2852 | int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; | ||
2853 | int is_monitor, is_counting, state; | ||
2854 | int ret = -EINVAL; | ||
2855 | pfm_reg_check_t wr_func; | ||
2856 | #define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) | ||
2857 | |||
2858 | state = ctx->ctx_state; | ||
2859 | is_loaded = state == PFM_CTX_LOADED ? 1 : 0; | ||
2860 | is_system = ctx->ctx_fl_system; | ||
2861 | task = ctx->ctx_task; | ||
2862 | impl_pmds = pmu_conf->impl_pmds[0]; | ||
2863 | |||
2864 | if (state == PFM_CTX_ZOMBIE) return -EINVAL; | ||
2865 | |||
2866 | if (is_loaded) { | ||
2867 | thread = &task->thread; | ||
2868 | /* | ||
2869 | * In system wide and when the context is loaded, access can only happen | ||
2870 | * when the caller is running on the CPU being monitored by the session. | ||
2871 | * It does not have to be the owner (ctx_task) of the context per se. | ||
2872 | */ | ||
2873 | if (is_system && ctx->ctx_cpu != smp_processor_id()) { | ||
2874 | DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); | ||
2875 | return -EBUSY; | ||
2876 | } | ||
2877 | can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; | ||
2878 | } | ||
2879 | expert_mode = pfm_sysctl.expert_mode; | ||
2880 | |||
2881 | for (i = 0; i < count; i++, req++) { | ||
2882 | |||
2883 | cnum = req->reg_num; | ||
2884 | reg_flags = req->reg_flags; | ||
2885 | value = req->reg_value; | ||
2886 | smpl_pmds = req->reg_smpl_pmds[0]; | ||
2887 | reset_pmds = req->reg_reset_pmds[0]; | ||
2888 | flags = 0; | ||
2889 | |||
2890 | |||
2891 | if (cnum >= PMU_MAX_PMCS) { | ||
2892 | DPRINT(("pmc%u is invalid\n", cnum)); | ||
2893 | goto error; | ||
2894 | } | ||
2895 | |||
2896 | pmc_type = pmu_conf->pmc_desc[cnum].type; | ||
2897 | pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; | ||
2898 | is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; | ||
2899 | is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; | ||
2900 | |||
2901 | /* | ||
2902 | * we reject all non implemented PMC as well | ||
2903 | * as attempts to modify PMC[0-3] which are used | ||
2904 | * as status registers by the PMU | ||
2905 | */ | ||
2906 | if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { | ||
2907 | DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); | ||
2908 | goto error; | ||
2909 | } | ||
2910 | wr_func = pmu_conf->pmc_desc[cnum].write_check; | ||
2911 | /* | ||
2912 | * If the PMC is a monitor, then if the value is not the default: | ||
2913 | * - system-wide session: PMCx.pm=1 (privileged monitor) | ||
2914 | * - per-task : PMCx.pm=0 (user monitor) | ||
2915 | */ | ||
2916 | if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { | ||
2917 | DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", | ||
2918 | cnum, | ||
2919 | pmc_pm, | ||
2920 | is_system)); | ||
2921 | goto error; | ||
2922 | } | ||
2923 | |||
2924 | if (is_counting) { | ||
2925 | /* | ||
2926 | * enforce generation of overflow interrupt. Necessary on all | ||
2927 | * CPUs. | ||
2928 | */ | ||
2929 | value |= 1 << PMU_PMC_OI; | ||
2930 | |||
2931 | if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { | ||
2932 | flags |= PFM_REGFL_OVFL_NOTIFY; | ||
2933 | } | ||
2934 | |||
2935 | if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; | ||
2936 | |||
2937 | /* verify validity of smpl_pmds */ | ||
2938 | if ((smpl_pmds & impl_pmds) != smpl_pmds) { | ||
2939 | DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); | ||
2940 | goto error; | ||
2941 | } | ||
2942 | |||
2943 | /* verify validity of reset_pmds */ | ||
2944 | if ((reset_pmds & impl_pmds) != reset_pmds) { | ||
2945 | DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); | ||
2946 | goto error; | ||
2947 | } | ||
2948 | } else { | ||
2949 | if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { | ||
2950 | DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); | ||
2951 | goto error; | ||
2952 | } | ||
2953 | /* eventid on non-counting monitors are ignored */ | ||
2954 | } | ||
2955 | |||
2956 | /* | ||
2957 | * execute write checker, if any | ||
2958 | */ | ||
2959 | if (likely(expert_mode == 0 && wr_func)) { | ||
2960 | ret = (*wr_func)(task, ctx, cnum, &value, regs); | ||
2961 | if (ret) goto error; | ||
2962 | ret = -EINVAL; | ||
2963 | } | ||
2964 | |||
2965 | /* | ||
2966 | * no error on this register | ||
2967 | */ | ||
2968 | PFM_REG_RETFLAG_SET(req->reg_flags, 0); | ||
2969 | |||
2970 | /* | ||
2971 | * Now we commit the changes to the software state | ||
2972 | */ | ||
2973 | |||
2974 | /* | ||
2975 | * update overflow information | ||
2976 | */ | ||
2977 | if (is_counting) { | ||
2978 | /* | ||
2979 | * full flag update each time a register is programmed | ||
2980 | */ | ||
2981 | ctx->ctx_pmds[cnum].flags = flags; | ||
2982 | |||
2983 | ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; | ||
2984 | ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; | ||
2985 | ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; | ||
2986 | |||
2987 | /* | ||
2988 | * Mark all PMDS to be accessed as used. | ||
2989 | * | ||
2990 | * We do not keep track of PMC because we have to | ||
2991 | * systematically restore ALL of them. | ||
2992 | * | ||
2993 | * We do not update the used_monitors mask, because | ||
2994 | * if we have not programmed them, then will be in | ||
2995 | * a quiescent state, therefore we will not need to | ||
2996 | * mask/restore then when context is MASKED. | ||
2997 | */ | ||
2998 | CTX_USED_PMD(ctx, reset_pmds); | ||
2999 | CTX_USED_PMD(ctx, smpl_pmds); | ||
3000 | /* | ||
3001 | * make sure we do not try to reset on | ||
3002 | * restart because we have established new values | ||
3003 | */ | ||
3004 | if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; | ||
3005 | } | ||
3006 | /* | ||
3007 | * Needed in case the user does not initialize the equivalent | ||
3008 | * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no | ||
3009 | * possible leak here. | ||
3010 | */ | ||
3011 | CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); | ||
3012 | |||
3013 | /* | ||
3014 | * keep track of the monitor PMC that we are using. | ||
3015 | * we save the value of the pmc in ctx_pmcs[] and if | ||
3016 | * the monitoring is not stopped for the context we also | ||
3017 | * place it in the saved state area so that it will be | ||
3018 | * picked up later by the context switch code. | ||
3019 | * | ||
3020 | * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). | ||
3021 | * | ||
3022 | * The value in thread->pmcs[] may be modified on overflow, i.e., when | ||
3023 | * monitoring needs to be stopped. | ||
3024 | */ | ||
3025 | if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); | ||
3026 | |||
3027 | /* | ||
3028 | * update context state | ||
3029 | */ | ||
3030 | ctx->ctx_pmcs[cnum] = value; | ||
3031 | |||
3032 | if (is_loaded) { | ||
3033 | /* | ||
3034 | * write thread state | ||
3035 | */ | ||
3036 | if (is_system == 0) thread->pmcs[cnum] = value; | ||
3037 | |||
3038 | /* | ||
3039 | * write hardware register if we can | ||
3040 | */ | ||
3041 | if (can_access_pmu) { | ||
3042 | ia64_set_pmc(cnum, value); | ||
3043 | } | ||
3044 | #ifdef CONFIG_SMP | ||
3045 | else { | ||
3046 | /* | ||
3047 | * per-task SMP only here | ||
3048 | * | ||
3049 | * we are guaranteed that the task is not running on the other CPU, | ||
3050 | * we indicate that this PMD will need to be reloaded if the task | ||
3051 | * is rescheduled on the CPU it ran last on. | ||
3052 | */ | ||
3053 | ctx->ctx_reload_pmcs[0] |= 1UL << cnum; | ||
3054 | } | ||
3055 | #endif | ||
3056 | } | ||
3057 | |||
3058 | DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", | ||
3059 | cnum, | ||
3060 | value, | ||
3061 | is_loaded, | ||
3062 | can_access_pmu, | ||
3063 | flags, | ||
3064 | ctx->ctx_all_pmcs[0], | ||
3065 | ctx->ctx_used_pmds[0], | ||
3066 | ctx->ctx_pmds[cnum].eventid, | ||
3067 | smpl_pmds, | ||
3068 | reset_pmds, | ||
3069 | ctx->ctx_reload_pmcs[0], | ||
3070 | ctx->ctx_used_monitors[0], | ||
3071 | ctx->ctx_ovfl_regs[0])); | ||
3072 | } | ||
3073 | |||
3074 | /* | ||
3075 | * make sure the changes are visible | ||
3076 | */ | ||
3077 | if (can_access_pmu) ia64_srlz_d(); | ||
3078 | |||
3079 | return 0; | ||
3080 | error: | ||
3081 | PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); | ||
3082 | return ret; | ||
3083 | } | ||
3084 | |||
3085 | static int | ||
3086 | pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3087 | { | ||
3088 | struct thread_struct *thread = NULL; | ||
3089 | struct task_struct *task; | ||
3090 | pfarg_reg_t *req = (pfarg_reg_t *)arg; | ||
3091 | unsigned long value, hw_value, ovfl_mask; | ||
3092 | unsigned int cnum; | ||
3093 | int i, can_access_pmu = 0, state; | ||
3094 | int is_counting, is_loaded, is_system, expert_mode; | ||
3095 | int ret = -EINVAL; | ||
3096 | pfm_reg_check_t wr_func; | ||
3097 | |||
3098 | |||
3099 | state = ctx->ctx_state; | ||
3100 | is_loaded = state == PFM_CTX_LOADED ? 1 : 0; | ||
3101 | is_system = ctx->ctx_fl_system; | ||
3102 | ovfl_mask = pmu_conf->ovfl_val; | ||
3103 | task = ctx->ctx_task; | ||
3104 | |||
3105 | if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; | ||
3106 | |||
3107 | /* | ||
3108 | * on both UP and SMP, we can only write to the PMC when the task is | ||
3109 | * the owner of the local PMU. | ||
3110 | */ | ||
3111 | if (likely(is_loaded)) { | ||
3112 | thread = &task->thread; | ||
3113 | /* | ||
3114 | * In system wide and when the context is loaded, access can only happen | ||
3115 | * when the caller is running on the CPU being monitored by the session. | ||
3116 | * It does not have to be the owner (ctx_task) of the context per se. | ||
3117 | */ | ||
3118 | if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { | ||
3119 | DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); | ||
3120 | return -EBUSY; | ||
3121 | } | ||
3122 | can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; | ||
3123 | } | ||
3124 | expert_mode = pfm_sysctl.expert_mode; | ||
3125 | |||
3126 | for (i = 0; i < count; i++, req++) { | ||
3127 | |||
3128 | cnum = req->reg_num; | ||
3129 | value = req->reg_value; | ||
3130 | |||
3131 | if (!PMD_IS_IMPL(cnum)) { | ||
3132 | DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); | ||
3133 | goto abort_mission; | ||
3134 | } | ||
3135 | is_counting = PMD_IS_COUNTING(cnum); | ||
3136 | wr_func = pmu_conf->pmd_desc[cnum].write_check; | ||
3137 | |||
3138 | /* | ||
3139 | * execute write checker, if any | ||
3140 | */ | ||
3141 | if (unlikely(expert_mode == 0 && wr_func)) { | ||
3142 | unsigned long v = value; | ||
3143 | |||
3144 | ret = (*wr_func)(task, ctx, cnum, &v, regs); | ||
3145 | if (ret) goto abort_mission; | ||
3146 | |||
3147 | value = v; | ||
3148 | ret = -EINVAL; | ||
3149 | } | ||
3150 | |||
3151 | /* | ||
3152 | * no error on this register | ||
3153 | */ | ||
3154 | PFM_REG_RETFLAG_SET(req->reg_flags, 0); | ||
3155 | |||
3156 | /* | ||
3157 | * now commit changes to software state | ||
3158 | */ | ||
3159 | hw_value = value; | ||
3160 | |||
3161 | /* | ||
3162 | * update virtualized (64bits) counter | ||
3163 | */ | ||
3164 | if (is_counting) { | ||
3165 | /* | ||
3166 | * write context state | ||
3167 | */ | ||
3168 | ctx->ctx_pmds[cnum].lval = value; | ||
3169 | |||
3170 | /* | ||
3171 | * when context is load we use the split value | ||
3172 | */ | ||
3173 | if (is_loaded) { | ||
3174 | hw_value = value & ovfl_mask; | ||
3175 | value = value & ~ovfl_mask; | ||
3176 | } | ||
3177 | } | ||
3178 | /* | ||
3179 | * update reset values (not just for counters) | ||
3180 | */ | ||
3181 | ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; | ||
3182 | ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; | ||
3183 | |||
3184 | /* | ||
3185 | * update randomization parameters (not just for counters) | ||
3186 | */ | ||
3187 | ctx->ctx_pmds[cnum].seed = req->reg_random_seed; | ||
3188 | ctx->ctx_pmds[cnum].mask = req->reg_random_mask; | ||
3189 | |||
3190 | /* | ||
3191 | * update context value | ||
3192 | */ | ||
3193 | ctx->ctx_pmds[cnum].val = value; | ||
3194 | |||
3195 | /* | ||
3196 | * Keep track of what we use | ||
3197 | * | ||
3198 | * We do not keep track of PMC because we have to | ||
3199 | * systematically restore ALL of them. | ||
3200 | */ | ||
3201 | CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); | ||
3202 | |||
3203 | /* | ||
3204 | * mark this PMD register used as well | ||
3205 | */ | ||
3206 | CTX_USED_PMD(ctx, RDEP(cnum)); | ||
3207 | |||
3208 | /* | ||
3209 | * make sure we do not try to reset on | ||
3210 | * restart because we have established new values | ||
3211 | */ | ||
3212 | if (is_counting && state == PFM_CTX_MASKED) { | ||
3213 | ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; | ||
3214 | } | ||
3215 | |||
3216 | if (is_loaded) { | ||
3217 | /* | ||
3218 | * write thread state | ||
3219 | */ | ||
3220 | if (is_system == 0) thread->pmds[cnum] = hw_value; | ||
3221 | |||
3222 | /* | ||
3223 | * write hardware register if we can | ||
3224 | */ | ||
3225 | if (can_access_pmu) { | ||
3226 | ia64_set_pmd(cnum, hw_value); | ||
3227 | } else { | ||
3228 | #ifdef CONFIG_SMP | ||
3229 | /* | ||
3230 | * we are guaranteed that the task is not running on the other CPU, | ||
3231 | * we indicate that this PMD will need to be reloaded if the task | ||
3232 | * is rescheduled on the CPU it ran last on. | ||
3233 | */ | ||
3234 | ctx->ctx_reload_pmds[0] |= 1UL << cnum; | ||
3235 | #endif | ||
3236 | } | ||
3237 | } | ||
3238 | |||
3239 | DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " | ||
3240 | "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", | ||
3241 | cnum, | ||
3242 | value, | ||
3243 | is_loaded, | ||
3244 | can_access_pmu, | ||
3245 | hw_value, | ||
3246 | ctx->ctx_pmds[cnum].val, | ||
3247 | ctx->ctx_pmds[cnum].short_reset, | ||
3248 | ctx->ctx_pmds[cnum].long_reset, | ||
3249 | PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', | ||
3250 | ctx->ctx_pmds[cnum].seed, | ||
3251 | ctx->ctx_pmds[cnum].mask, | ||
3252 | ctx->ctx_used_pmds[0], | ||
3253 | ctx->ctx_pmds[cnum].reset_pmds[0], | ||
3254 | ctx->ctx_reload_pmds[0], | ||
3255 | ctx->ctx_all_pmds[0], | ||
3256 | ctx->ctx_ovfl_regs[0])); | ||
3257 | } | ||
3258 | |||
3259 | /* | ||
3260 | * make changes visible | ||
3261 | */ | ||
3262 | if (can_access_pmu) ia64_srlz_d(); | ||
3263 | |||
3264 | return 0; | ||
3265 | |||
3266 | abort_mission: | ||
3267 | /* | ||
3268 | * for now, we have only one possibility for error | ||
3269 | */ | ||
3270 | PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); | ||
3271 | return ret; | ||
3272 | } | ||
3273 | |||
3274 | /* | ||
3275 | * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. | ||
3276 | * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an | ||
3277 | * interrupt is delivered during the call, it will be kept pending until we leave, making | ||
3278 | * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are | ||
3279 | * guaranteed to return consistent data to the user, it may simply be old. It is not | ||
3280 | * trivial to treat the overflow while inside the call because you may end up in | ||
3281 | * some module sampling buffer code causing deadlocks. | ||
3282 | */ | ||
3283 | static int | ||
3284 | pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3285 | { | ||
3286 | struct thread_struct *thread = NULL; | ||
3287 | struct task_struct *task; | ||
3288 | unsigned long val = 0UL, lval, ovfl_mask, sval; | ||
3289 | pfarg_reg_t *req = (pfarg_reg_t *)arg; | ||
3290 | unsigned int cnum, reg_flags = 0; | ||
3291 | int i, can_access_pmu = 0, state; | ||
3292 | int is_loaded, is_system, is_counting, expert_mode; | ||
3293 | int ret = -EINVAL; | ||
3294 | pfm_reg_check_t rd_func; | ||
3295 | |||
3296 | /* | ||
3297 | * access is possible when loaded only for | ||
3298 | * self-monitoring tasks or in UP mode | ||
3299 | */ | ||
3300 | |||
3301 | state = ctx->ctx_state; | ||
3302 | is_loaded = state == PFM_CTX_LOADED ? 1 : 0; | ||
3303 | is_system = ctx->ctx_fl_system; | ||
3304 | ovfl_mask = pmu_conf->ovfl_val; | ||
3305 | task = ctx->ctx_task; | ||
3306 | |||
3307 | if (state == PFM_CTX_ZOMBIE) return -EINVAL; | ||
3308 | |||
3309 | if (likely(is_loaded)) { | ||
3310 | thread = &task->thread; | ||
3311 | /* | ||
3312 | * In system wide and when the context is loaded, access can only happen | ||
3313 | * when the caller is running on the CPU being monitored by the session. | ||
3314 | * It does not have to be the owner (ctx_task) of the context per se. | ||
3315 | */ | ||
3316 | if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { | ||
3317 | DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); | ||
3318 | return -EBUSY; | ||
3319 | } | ||
3320 | /* | ||
3321 | * this can be true when not self-monitoring only in UP | ||
3322 | */ | ||
3323 | can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; | ||
3324 | |||
3325 | if (can_access_pmu) ia64_srlz_d(); | ||
3326 | } | ||
3327 | expert_mode = pfm_sysctl.expert_mode; | ||
3328 | |||
3329 | DPRINT(("ld=%d apmu=%d ctx_state=%d\n", | ||
3330 | is_loaded, | ||
3331 | can_access_pmu, | ||
3332 | state)); | ||
3333 | |||
3334 | /* | ||
3335 | * on both UP and SMP, we can only read the PMD from the hardware register when | ||
3336 | * the task is the owner of the local PMU. | ||
3337 | */ | ||
3338 | |||
3339 | for (i = 0; i < count; i++, req++) { | ||
3340 | |||
3341 | cnum = req->reg_num; | ||
3342 | reg_flags = req->reg_flags; | ||
3343 | |||
3344 | if (unlikely(!PMD_IS_IMPL(cnum))) goto error; | ||
3345 | /* | ||
3346 | * we can only read the register that we use. That includes | ||
3347 | * the one we explicitely initialize AND the one we want included | ||
3348 | * in the sampling buffer (smpl_regs). | ||
3349 | * | ||
3350 | * Having this restriction allows optimization in the ctxsw routine | ||
3351 | * without compromising security (leaks) | ||
3352 | */ | ||
3353 | if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; | ||
3354 | |||
3355 | sval = ctx->ctx_pmds[cnum].val; | ||
3356 | lval = ctx->ctx_pmds[cnum].lval; | ||
3357 | is_counting = PMD_IS_COUNTING(cnum); | ||
3358 | |||
3359 | /* | ||
3360 | * If the task is not the current one, then we check if the | ||
3361 | * PMU state is still in the local live register due to lazy ctxsw. | ||
3362 | * If true, then we read directly from the registers. | ||
3363 | */ | ||
3364 | if (can_access_pmu){ | ||
3365 | val = ia64_get_pmd(cnum); | ||
3366 | } else { | ||
3367 | /* | ||
3368 | * context has been saved | ||
3369 | * if context is zombie, then task does not exist anymore. | ||
3370 | * In this case, we use the full value saved in the context (pfm_flush_regs()). | ||
3371 | */ | ||
3372 | val = is_loaded ? thread->pmds[cnum] : 0UL; | ||
3373 | } | ||
3374 | rd_func = pmu_conf->pmd_desc[cnum].read_check; | ||
3375 | |||
3376 | if (is_counting) { | ||
3377 | /* | ||
3378 | * XXX: need to check for overflow when loaded | ||
3379 | */ | ||
3380 | val &= ovfl_mask; | ||
3381 | val += sval; | ||
3382 | } | ||
3383 | |||
3384 | /* | ||
3385 | * execute read checker, if any | ||
3386 | */ | ||
3387 | if (unlikely(expert_mode == 0 && rd_func)) { | ||
3388 | unsigned long v = val; | ||
3389 | ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); | ||
3390 | if (ret) goto error; | ||
3391 | val = v; | ||
3392 | ret = -EINVAL; | ||
3393 | } | ||
3394 | |||
3395 | PFM_REG_RETFLAG_SET(reg_flags, 0); | ||
3396 | |||
3397 | DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); | ||
3398 | |||
3399 | /* | ||
3400 | * update register return value, abort all if problem during copy. | ||
3401 | * we only modify the reg_flags field. no check mode is fine because | ||
3402 | * access has been verified upfront in sys_perfmonctl(). | ||
3403 | */ | ||
3404 | req->reg_value = val; | ||
3405 | req->reg_flags = reg_flags; | ||
3406 | req->reg_last_reset_val = lval; | ||
3407 | } | ||
3408 | |||
3409 | return 0; | ||
3410 | |||
3411 | error: | ||
3412 | PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); | ||
3413 | return ret; | ||
3414 | } | ||
3415 | |||
3416 | int | ||
3417 | pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) | ||
3418 | { | ||
3419 | pfm_context_t *ctx; | ||
3420 | |||
3421 | if (req == NULL) return -EINVAL; | ||
3422 | |||
3423 | ctx = GET_PMU_CTX(); | ||
3424 | |||
3425 | if (ctx == NULL) return -EINVAL; | ||
3426 | |||
3427 | /* | ||
3428 | * for now limit to current task, which is enough when calling | ||
3429 | * from overflow handler | ||
3430 | */ | ||
3431 | if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; | ||
3432 | |||
3433 | return pfm_write_pmcs(ctx, req, nreq, regs); | ||
3434 | } | ||
3435 | EXPORT_SYMBOL(pfm_mod_write_pmcs); | ||
3436 | |||
3437 | int | ||
3438 | pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) | ||
3439 | { | ||
3440 | pfm_context_t *ctx; | ||
3441 | |||
3442 | if (req == NULL) return -EINVAL; | ||
3443 | |||
3444 | ctx = GET_PMU_CTX(); | ||
3445 | |||
3446 | if (ctx == NULL) return -EINVAL; | ||
3447 | |||
3448 | /* | ||
3449 | * for now limit to current task, which is enough when calling | ||
3450 | * from overflow handler | ||
3451 | */ | ||
3452 | if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; | ||
3453 | |||
3454 | return pfm_read_pmds(ctx, req, nreq, regs); | ||
3455 | } | ||
3456 | EXPORT_SYMBOL(pfm_mod_read_pmds); | ||
3457 | |||
3458 | /* | ||
3459 | * Only call this function when a process it trying to | ||
3460 | * write the debug registers (reading is always allowed) | ||
3461 | */ | ||
3462 | int | ||
3463 | pfm_use_debug_registers(struct task_struct *task) | ||
3464 | { | ||
3465 | pfm_context_t *ctx = task->thread.pfm_context; | ||
3466 | unsigned long flags; | ||
3467 | int ret = 0; | ||
3468 | |||
3469 | if (pmu_conf->use_rr_dbregs == 0) return 0; | ||
3470 | |||
3471 | DPRINT(("called for [%d]\n", task->pid)); | ||
3472 | |||
3473 | /* | ||
3474 | * do it only once | ||
3475 | */ | ||
3476 | if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; | ||
3477 | |||
3478 | /* | ||
3479 | * Even on SMP, we do not need to use an atomic here because | ||
3480 | * the only way in is via ptrace() and this is possible only when the | ||
3481 | * process is stopped. Even in the case where the ctxsw out is not totally | ||
3482 | * completed by the time we come here, there is no way the 'stopped' process | ||
3483 | * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. | ||
3484 | * So this is always safe. | ||
3485 | */ | ||
3486 | if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; | ||
3487 | |||
3488 | LOCK_PFS(flags); | ||
3489 | |||
3490 | /* | ||
3491 | * We cannot allow setting breakpoints when system wide monitoring | ||
3492 | * sessions are using the debug registers. | ||
3493 | */ | ||
3494 | if (pfm_sessions.pfs_sys_use_dbregs> 0) | ||
3495 | ret = -1; | ||
3496 | else | ||
3497 | pfm_sessions.pfs_ptrace_use_dbregs++; | ||
3498 | |||
3499 | DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", | ||
3500 | pfm_sessions.pfs_ptrace_use_dbregs, | ||
3501 | pfm_sessions.pfs_sys_use_dbregs, | ||
3502 | task->pid, ret)); | ||
3503 | |||
3504 | UNLOCK_PFS(flags); | ||
3505 | |||
3506 | return ret; | ||
3507 | } | ||
3508 | |||
3509 | /* | ||
3510 | * This function is called for every task that exits with the | ||
3511 | * IA64_THREAD_DBG_VALID set. This indicates a task which was | ||
3512 | * able to use the debug registers for debugging purposes via | ||
3513 | * ptrace(). Therefore we know it was not using them for | ||
3514 | * perfmormance monitoring, so we only decrement the number | ||
3515 | * of "ptraced" debug register users to keep the count up to date | ||
3516 | */ | ||
3517 | int | ||
3518 | pfm_release_debug_registers(struct task_struct *task) | ||
3519 | { | ||
3520 | unsigned long flags; | ||
3521 | int ret; | ||
3522 | |||
3523 | if (pmu_conf->use_rr_dbregs == 0) return 0; | ||
3524 | |||
3525 | LOCK_PFS(flags); | ||
3526 | if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { | ||
3527 | printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task->pid); | ||
3528 | ret = -1; | ||
3529 | } else { | ||
3530 | pfm_sessions.pfs_ptrace_use_dbregs--; | ||
3531 | ret = 0; | ||
3532 | } | ||
3533 | UNLOCK_PFS(flags); | ||
3534 | |||
3535 | return ret; | ||
3536 | } | ||
3537 | |||
3538 | static int | ||
3539 | pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3540 | { | ||
3541 | struct task_struct *task; | ||
3542 | pfm_buffer_fmt_t *fmt; | ||
3543 | pfm_ovfl_ctrl_t rst_ctrl; | ||
3544 | int state, is_system; | ||
3545 | int ret = 0; | ||
3546 | |||
3547 | state = ctx->ctx_state; | ||
3548 | fmt = ctx->ctx_buf_fmt; | ||
3549 | is_system = ctx->ctx_fl_system; | ||
3550 | task = PFM_CTX_TASK(ctx); | ||
3551 | |||
3552 | switch(state) { | ||
3553 | case PFM_CTX_MASKED: | ||
3554 | break; | ||
3555 | case PFM_CTX_LOADED: | ||
3556 | if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; | ||
3557 | /* fall through */ | ||
3558 | case PFM_CTX_UNLOADED: | ||
3559 | case PFM_CTX_ZOMBIE: | ||
3560 | DPRINT(("invalid state=%d\n", state)); | ||
3561 | return -EBUSY; | ||
3562 | default: | ||
3563 | DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); | ||
3564 | return -EINVAL; | ||
3565 | } | ||
3566 | |||
3567 | /* | ||
3568 | * In system wide and when the context is loaded, access can only happen | ||
3569 | * when the caller is running on the CPU being monitored by the session. | ||
3570 | * It does not have to be the owner (ctx_task) of the context per se. | ||
3571 | */ | ||
3572 | if (is_system && ctx->ctx_cpu != smp_processor_id()) { | ||
3573 | DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); | ||
3574 | return -EBUSY; | ||
3575 | } | ||
3576 | |||
3577 | /* sanity check */ | ||
3578 | if (unlikely(task == NULL)) { | ||
3579 | printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", current->pid); | ||
3580 | return -EINVAL; | ||
3581 | } | ||
3582 | |||
3583 | if (task == current || is_system) { | ||
3584 | |||
3585 | fmt = ctx->ctx_buf_fmt; | ||
3586 | |||
3587 | DPRINT(("restarting self %d ovfl=0x%lx\n", | ||
3588 | task->pid, | ||
3589 | ctx->ctx_ovfl_regs[0])); | ||
3590 | |||
3591 | if (CTX_HAS_SMPL(ctx)) { | ||
3592 | |||
3593 | prefetch(ctx->ctx_smpl_hdr); | ||
3594 | |||
3595 | rst_ctrl.bits.mask_monitoring = 0; | ||
3596 | rst_ctrl.bits.reset_ovfl_pmds = 0; | ||
3597 | |||
3598 | if (state == PFM_CTX_LOADED) | ||
3599 | ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); | ||
3600 | else | ||
3601 | ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); | ||
3602 | } else { | ||
3603 | rst_ctrl.bits.mask_monitoring = 0; | ||
3604 | rst_ctrl.bits.reset_ovfl_pmds = 1; | ||
3605 | } | ||
3606 | |||
3607 | if (ret == 0) { | ||
3608 | if (rst_ctrl.bits.reset_ovfl_pmds) | ||
3609 | pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); | ||
3610 | |||
3611 | if (rst_ctrl.bits.mask_monitoring == 0) { | ||
3612 | DPRINT(("resuming monitoring for [%d]\n", task->pid)); | ||
3613 | |||
3614 | if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); | ||
3615 | } else { | ||
3616 | DPRINT(("keeping monitoring stopped for [%d]\n", task->pid)); | ||
3617 | |||
3618 | // cannot use pfm_stop_monitoring(task, regs); | ||
3619 | } | ||
3620 | } | ||
3621 | /* | ||
3622 | * clear overflowed PMD mask to remove any stale information | ||
3623 | */ | ||
3624 | ctx->ctx_ovfl_regs[0] = 0UL; | ||
3625 | |||
3626 | /* | ||
3627 | * back to LOADED state | ||
3628 | */ | ||
3629 | ctx->ctx_state = PFM_CTX_LOADED; | ||
3630 | |||
3631 | /* | ||
3632 | * XXX: not really useful for self monitoring | ||
3633 | */ | ||
3634 | ctx->ctx_fl_can_restart = 0; | ||
3635 | |||
3636 | return 0; | ||
3637 | } | ||
3638 | |||
3639 | /* | ||
3640 | * restart another task | ||
3641 | */ | ||
3642 | |||
3643 | /* | ||
3644 | * When PFM_CTX_MASKED, we cannot issue a restart before the previous | ||
3645 | * one is seen by the task. | ||
3646 | */ | ||
3647 | if (state == PFM_CTX_MASKED) { | ||
3648 | if (ctx->ctx_fl_can_restart == 0) return -EINVAL; | ||
3649 | /* | ||
3650 | * will prevent subsequent restart before this one is | ||
3651 | * seen by other task | ||
3652 | */ | ||
3653 | ctx->ctx_fl_can_restart = 0; | ||
3654 | } | ||
3655 | |||
3656 | /* | ||
3657 | * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. | ||
3658 | * the task is blocked or on its way to block. That's the normal | ||
3659 | * restart path. If the monitoring is not masked, then the task | ||
3660 | * can be actively monitoring and we cannot directly intervene. | ||
3661 | * Therefore we use the trap mechanism to catch the task and | ||
3662 | * force it to reset the buffer/reset PMDs. | ||
3663 | * | ||
3664 | * if non-blocking, then we ensure that the task will go into | ||
3665 | * pfm_handle_work() before returning to user mode. | ||
3666 | * | ||
3667 | * We cannot explicitely reset another task, it MUST always | ||
3668 | * be done by the task itself. This works for system wide because | ||
3669 | * the tool that is controlling the session is logically doing | ||
3670 | * "self-monitoring". | ||
3671 | */ | ||
3672 | if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { | ||
3673 | DPRINT(("unblocking [%d] \n", task->pid)); | ||
3674 | up(&ctx->ctx_restart_sem); | ||
3675 | } else { | ||
3676 | DPRINT(("[%d] armed exit trap\n", task->pid)); | ||
3677 | |||
3678 | ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; | ||
3679 | |||
3680 | PFM_SET_WORK_PENDING(task, 1); | ||
3681 | |||
3682 | pfm_set_task_notify(task); | ||
3683 | |||
3684 | /* | ||
3685 | * XXX: send reschedule if task runs on another CPU | ||
3686 | */ | ||
3687 | } | ||
3688 | return 0; | ||
3689 | } | ||
3690 | |||
3691 | static int | ||
3692 | pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3693 | { | ||
3694 | unsigned int m = *(unsigned int *)arg; | ||
3695 | |||
3696 | pfm_sysctl.debug = m == 0 ? 0 : 1; | ||
3697 | |||
3698 | pfm_debug_var = pfm_sysctl.debug; | ||
3699 | |||
3700 | printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); | ||
3701 | |||
3702 | if (m == 0) { | ||
3703 | memset(pfm_stats, 0, sizeof(pfm_stats)); | ||
3704 | for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; | ||
3705 | } | ||
3706 | return 0; | ||
3707 | } | ||
3708 | |||
3709 | /* | ||
3710 | * arg can be NULL and count can be zero for this function | ||
3711 | */ | ||
3712 | static int | ||
3713 | pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3714 | { | ||
3715 | struct thread_struct *thread = NULL; | ||
3716 | struct task_struct *task; | ||
3717 | pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; | ||
3718 | unsigned long flags; | ||
3719 | dbreg_t dbreg; | ||
3720 | unsigned int rnum; | ||
3721 | int first_time; | ||
3722 | int ret = 0, state; | ||
3723 | int i, can_access_pmu = 0; | ||
3724 | int is_system, is_loaded; | ||
3725 | |||
3726 | if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; | ||
3727 | |||
3728 | state = ctx->ctx_state; | ||
3729 | is_loaded = state == PFM_CTX_LOADED ? 1 : 0; | ||
3730 | is_system = ctx->ctx_fl_system; | ||
3731 | task = ctx->ctx_task; | ||
3732 | |||
3733 | if (state == PFM_CTX_ZOMBIE) return -EINVAL; | ||
3734 | |||
3735 | /* | ||
3736 | * on both UP and SMP, we can only write to the PMC when the task is | ||
3737 | * the owner of the local PMU. | ||
3738 | */ | ||
3739 | if (is_loaded) { | ||
3740 | thread = &task->thread; | ||
3741 | /* | ||
3742 | * In system wide and when the context is loaded, access can only happen | ||
3743 | * when the caller is running on the CPU being monitored by the session. | ||
3744 | * It does not have to be the owner (ctx_task) of the context per se. | ||
3745 | */ | ||
3746 | if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { | ||
3747 | DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); | ||
3748 | return -EBUSY; | ||
3749 | } | ||
3750 | can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; | ||
3751 | } | ||
3752 | |||
3753 | /* | ||
3754 | * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w | ||
3755 | * ensuring that no real breakpoint can be installed via this call. | ||
3756 | * | ||
3757 | * IMPORTANT: regs can be NULL in this function | ||
3758 | */ | ||
3759 | |||
3760 | first_time = ctx->ctx_fl_using_dbreg == 0; | ||
3761 | |||
3762 | /* | ||
3763 | * don't bother if we are loaded and task is being debugged | ||
3764 | */ | ||
3765 | if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { | ||
3766 | DPRINT(("debug registers already in use for [%d]\n", task->pid)); | ||
3767 | return -EBUSY; | ||
3768 | } | ||
3769 | |||
3770 | /* | ||
3771 | * check for debug registers in system wide mode | ||
3772 | * | ||
3773 | * If though a check is done in pfm_context_load(), | ||
3774 | * we must repeat it here, in case the registers are | ||
3775 | * written after the context is loaded | ||
3776 | */ | ||
3777 | if (is_loaded) { | ||
3778 | LOCK_PFS(flags); | ||
3779 | |||
3780 | if (first_time && is_system) { | ||
3781 | if (pfm_sessions.pfs_ptrace_use_dbregs) | ||
3782 | ret = -EBUSY; | ||
3783 | else | ||
3784 | pfm_sessions.pfs_sys_use_dbregs++; | ||
3785 | } | ||
3786 | UNLOCK_PFS(flags); | ||
3787 | } | ||
3788 | |||
3789 | if (ret != 0) return ret; | ||
3790 | |||
3791 | /* | ||
3792 | * mark ourself as user of the debug registers for | ||
3793 | * perfmon purposes. | ||
3794 | */ | ||
3795 | ctx->ctx_fl_using_dbreg = 1; | ||
3796 | |||
3797 | /* | ||
3798 | * clear hardware registers to make sure we don't | ||
3799 | * pick up stale state. | ||
3800 | * | ||
3801 | * for a system wide session, we do not use | ||
3802 | * thread.dbr, thread.ibr because this process | ||
3803 | * never leaves the current CPU and the state | ||
3804 | * is shared by all processes running on it | ||
3805 | */ | ||
3806 | if (first_time && can_access_pmu) { | ||
3807 | DPRINT(("[%d] clearing ibrs, dbrs\n", task->pid)); | ||
3808 | for (i=0; i < pmu_conf->num_ibrs; i++) { | ||
3809 | ia64_set_ibr(i, 0UL); | ||
3810 | ia64_dv_serialize_instruction(); | ||
3811 | } | ||
3812 | ia64_srlz_i(); | ||
3813 | for (i=0; i < pmu_conf->num_dbrs; i++) { | ||
3814 | ia64_set_dbr(i, 0UL); | ||
3815 | ia64_dv_serialize_data(); | ||
3816 | } | ||
3817 | ia64_srlz_d(); | ||
3818 | } | ||
3819 | |||
3820 | /* | ||
3821 | * Now install the values into the registers | ||
3822 | */ | ||
3823 | for (i = 0; i < count; i++, req++) { | ||
3824 | |||
3825 | rnum = req->dbreg_num; | ||
3826 | dbreg.val = req->dbreg_value; | ||
3827 | |||
3828 | ret = -EINVAL; | ||
3829 | |||
3830 | if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { | ||
3831 | DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", | ||
3832 | rnum, dbreg.val, mode, i, count)); | ||
3833 | |||
3834 | goto abort_mission; | ||
3835 | } | ||
3836 | |||
3837 | /* | ||
3838 | * make sure we do not install enabled breakpoint | ||
3839 | */ | ||
3840 | if (rnum & 0x1) { | ||
3841 | if (mode == PFM_CODE_RR) | ||
3842 | dbreg.ibr.ibr_x = 0; | ||
3843 | else | ||
3844 | dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; | ||
3845 | } | ||
3846 | |||
3847 | PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); | ||
3848 | |||
3849 | /* | ||
3850 | * Debug registers, just like PMC, can only be modified | ||
3851 | * by a kernel call. Moreover, perfmon() access to those | ||
3852 | * registers are centralized in this routine. The hardware | ||
3853 | * does not modify the value of these registers, therefore, | ||
3854 | * if we save them as they are written, we can avoid having | ||
3855 | * to save them on context switch out. This is made possible | ||
3856 | * by the fact that when perfmon uses debug registers, ptrace() | ||
3857 | * won't be able to modify them concurrently. | ||
3858 | */ | ||
3859 | if (mode == PFM_CODE_RR) { | ||
3860 | CTX_USED_IBR(ctx, rnum); | ||
3861 | |||
3862 | if (can_access_pmu) { | ||
3863 | ia64_set_ibr(rnum, dbreg.val); | ||
3864 | ia64_dv_serialize_instruction(); | ||
3865 | } | ||
3866 | |||
3867 | ctx->ctx_ibrs[rnum] = dbreg.val; | ||
3868 | |||
3869 | DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", | ||
3870 | rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); | ||
3871 | } else { | ||
3872 | CTX_USED_DBR(ctx, rnum); | ||
3873 | |||
3874 | if (can_access_pmu) { | ||
3875 | ia64_set_dbr(rnum, dbreg.val); | ||
3876 | ia64_dv_serialize_data(); | ||
3877 | } | ||
3878 | ctx->ctx_dbrs[rnum] = dbreg.val; | ||
3879 | |||
3880 | DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", | ||
3881 | rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); | ||
3882 | } | ||
3883 | } | ||
3884 | |||
3885 | return 0; | ||
3886 | |||
3887 | abort_mission: | ||
3888 | /* | ||
3889 | * in case it was our first attempt, we undo the global modifications | ||
3890 | */ | ||
3891 | if (first_time) { | ||
3892 | LOCK_PFS(flags); | ||
3893 | if (ctx->ctx_fl_system) { | ||
3894 | pfm_sessions.pfs_sys_use_dbregs--; | ||
3895 | } | ||
3896 | UNLOCK_PFS(flags); | ||
3897 | ctx->ctx_fl_using_dbreg = 0; | ||
3898 | } | ||
3899 | /* | ||
3900 | * install error return flag | ||
3901 | */ | ||
3902 | PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); | ||
3903 | |||
3904 | return ret; | ||
3905 | } | ||
3906 | |||
3907 | static int | ||
3908 | pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3909 | { | ||
3910 | return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); | ||
3911 | } | ||
3912 | |||
3913 | static int | ||
3914 | pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3915 | { | ||
3916 | return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); | ||
3917 | } | ||
3918 | |||
3919 | int | ||
3920 | pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) | ||
3921 | { | ||
3922 | pfm_context_t *ctx; | ||
3923 | |||
3924 | if (req == NULL) return -EINVAL; | ||
3925 | |||
3926 | ctx = GET_PMU_CTX(); | ||
3927 | |||
3928 | if (ctx == NULL) return -EINVAL; | ||
3929 | |||
3930 | /* | ||
3931 | * for now limit to current task, which is enough when calling | ||
3932 | * from overflow handler | ||
3933 | */ | ||
3934 | if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; | ||
3935 | |||
3936 | return pfm_write_ibrs(ctx, req, nreq, regs); | ||
3937 | } | ||
3938 | EXPORT_SYMBOL(pfm_mod_write_ibrs); | ||
3939 | |||
3940 | int | ||
3941 | pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) | ||
3942 | { | ||
3943 | pfm_context_t *ctx; | ||
3944 | |||
3945 | if (req == NULL) return -EINVAL; | ||
3946 | |||
3947 | ctx = GET_PMU_CTX(); | ||
3948 | |||
3949 | if (ctx == NULL) return -EINVAL; | ||
3950 | |||
3951 | /* | ||
3952 | * for now limit to current task, which is enough when calling | ||
3953 | * from overflow handler | ||
3954 | */ | ||
3955 | if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; | ||
3956 | |||
3957 | return pfm_write_dbrs(ctx, req, nreq, regs); | ||
3958 | } | ||
3959 | EXPORT_SYMBOL(pfm_mod_write_dbrs); | ||
3960 | |||
3961 | |||
3962 | static int | ||
3963 | pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3964 | { | ||
3965 | pfarg_features_t *req = (pfarg_features_t *)arg; | ||
3966 | |||
3967 | req->ft_version = PFM_VERSION; | ||
3968 | return 0; | ||
3969 | } | ||
3970 | |||
3971 | static int | ||
3972 | pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
3973 | { | ||
3974 | struct pt_regs *tregs; | ||
3975 | struct task_struct *task = PFM_CTX_TASK(ctx); | ||
3976 | int state, is_system; | ||
3977 | |||
3978 | state = ctx->ctx_state; | ||
3979 | is_system = ctx->ctx_fl_system; | ||
3980 | |||
3981 | /* | ||
3982 | * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) | ||
3983 | */ | ||
3984 | if (state == PFM_CTX_UNLOADED) return -EINVAL; | ||
3985 | |||
3986 | /* | ||
3987 | * In system wide and when the context is loaded, access can only happen | ||
3988 | * when the caller is running on the CPU being monitored by the session. | ||
3989 | * It does not have to be the owner (ctx_task) of the context per se. | ||
3990 | */ | ||
3991 | if (is_system && ctx->ctx_cpu != smp_processor_id()) { | ||
3992 | DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); | ||
3993 | return -EBUSY; | ||
3994 | } | ||
3995 | DPRINT(("task [%d] ctx_state=%d is_system=%d\n", | ||
3996 | PFM_CTX_TASK(ctx)->pid, | ||
3997 | state, | ||
3998 | is_system)); | ||
3999 | /* | ||
4000 | * in system mode, we need to update the PMU directly | ||
4001 | * and the user level state of the caller, which may not | ||
4002 | * necessarily be the creator of the context. | ||
4003 | */ | ||
4004 | if (is_system) { | ||
4005 | /* | ||
4006 | * Update local PMU first | ||
4007 | * | ||
4008 | * disable dcr pp | ||
4009 | */ | ||
4010 | ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); | ||
4011 | ia64_srlz_i(); | ||
4012 | |||
4013 | /* | ||
4014 | * update local cpuinfo | ||
4015 | */ | ||
4016 | PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); | ||
4017 | |||
4018 | /* | ||
4019 | * stop monitoring, does srlz.i | ||
4020 | */ | ||
4021 | pfm_clear_psr_pp(); | ||
4022 | |||
4023 | /* | ||
4024 | * stop monitoring in the caller | ||
4025 | */ | ||
4026 | ia64_psr(regs)->pp = 0; | ||
4027 | |||
4028 | return 0; | ||
4029 | } | ||
4030 | /* | ||
4031 | * per-task mode | ||
4032 | */ | ||
4033 | |||
4034 | if (task == current) { | ||
4035 | /* stop monitoring at kernel level */ | ||
4036 | pfm_clear_psr_up(); | ||
4037 | |||
4038 | /* | ||
4039 | * stop monitoring at the user level | ||
4040 | */ | ||
4041 | ia64_psr(regs)->up = 0; | ||
4042 | } else { | ||
4043 | tregs = ia64_task_regs(task); | ||
4044 | |||
4045 | /* | ||
4046 | * stop monitoring at the user level | ||
4047 | */ | ||
4048 | ia64_psr(tregs)->up = 0; | ||
4049 | |||
4050 | /* | ||
4051 | * monitoring disabled in kernel at next reschedule | ||
4052 | */ | ||
4053 | ctx->ctx_saved_psr_up = 0; | ||
4054 | DPRINT(("task=[%d]\n", task->pid)); | ||
4055 | } | ||
4056 | return 0; | ||
4057 | } | ||
4058 | |||
4059 | |||
4060 | static int | ||
4061 | pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
4062 | { | ||
4063 | struct pt_regs *tregs; | ||
4064 | int state, is_system; | ||
4065 | |||
4066 | state = ctx->ctx_state; | ||
4067 | is_system = ctx->ctx_fl_system; | ||
4068 | |||
4069 | if (state != PFM_CTX_LOADED) return -EINVAL; | ||
4070 | |||
4071 | /* | ||
4072 | * In system wide and when the context is loaded, access can only happen | ||
4073 | * when the caller is running on the CPU being monitored by the session. | ||
4074 | * It does not have to be the owner (ctx_task) of the context per se. | ||
4075 | */ | ||
4076 | if (is_system && ctx->ctx_cpu != smp_processor_id()) { | ||
4077 | DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); | ||
4078 | return -EBUSY; | ||
4079 | } | ||
4080 | |||
4081 | /* | ||
4082 | * in system mode, we need to update the PMU directly | ||
4083 | * and the user level state of the caller, which may not | ||
4084 | * necessarily be the creator of the context. | ||
4085 | */ | ||
4086 | if (is_system) { | ||
4087 | |||
4088 | /* | ||
4089 | * set user level psr.pp for the caller | ||
4090 | */ | ||
4091 | ia64_psr(regs)->pp = 1; | ||
4092 | |||
4093 | /* | ||
4094 | * now update the local PMU and cpuinfo | ||
4095 | */ | ||
4096 | PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); | ||
4097 | |||
4098 | /* | ||
4099 | * start monitoring at kernel level | ||
4100 | */ | ||
4101 | pfm_set_psr_pp(); | ||
4102 | |||
4103 | /* enable dcr pp */ | ||
4104 | ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); | ||
4105 | ia64_srlz_i(); | ||
4106 | |||
4107 | return 0; | ||
4108 | } | ||
4109 | |||
4110 | /* | ||
4111 | * per-process mode | ||
4112 | */ | ||
4113 | |||
4114 | if (ctx->ctx_task == current) { | ||
4115 | |||
4116 | /* start monitoring at kernel level */ | ||
4117 | pfm_set_psr_up(); | ||
4118 | |||
4119 | /* | ||
4120 | * activate monitoring at user level | ||
4121 | */ | ||
4122 | ia64_psr(regs)->up = 1; | ||
4123 | |||
4124 | } else { | ||
4125 | tregs = ia64_task_regs(ctx->ctx_task); | ||
4126 | |||
4127 | /* | ||
4128 | * start monitoring at the kernel level the next | ||
4129 | * time the task is scheduled | ||
4130 | */ | ||
4131 | ctx->ctx_saved_psr_up = IA64_PSR_UP; | ||
4132 | |||
4133 | /* | ||
4134 | * activate monitoring at user level | ||
4135 | */ | ||
4136 | ia64_psr(tregs)->up = 1; | ||
4137 | } | ||
4138 | return 0; | ||
4139 | } | ||
4140 | |||
4141 | static int | ||
4142 | pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
4143 | { | ||
4144 | pfarg_reg_t *req = (pfarg_reg_t *)arg; | ||
4145 | unsigned int cnum; | ||
4146 | int i; | ||
4147 | int ret = -EINVAL; | ||
4148 | |||
4149 | for (i = 0; i < count; i++, req++) { | ||
4150 | |||
4151 | cnum = req->reg_num; | ||
4152 | |||
4153 | if (!PMC_IS_IMPL(cnum)) goto abort_mission; | ||
4154 | |||
4155 | req->reg_value = PMC_DFL_VAL(cnum); | ||
4156 | |||
4157 | PFM_REG_RETFLAG_SET(req->reg_flags, 0); | ||
4158 | |||
4159 | DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); | ||
4160 | } | ||
4161 | return 0; | ||
4162 | |||
4163 | abort_mission: | ||
4164 | PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); | ||
4165 | return ret; | ||
4166 | } | ||
4167 | |||
4168 | static int | ||
4169 | pfm_check_task_exist(pfm_context_t *ctx) | ||
4170 | { | ||
4171 | struct task_struct *g, *t; | ||
4172 | int ret = -ESRCH; | ||
4173 | |||
4174 | read_lock(&tasklist_lock); | ||
4175 | |||
4176 | do_each_thread (g, t) { | ||
4177 | if (t->thread.pfm_context == ctx) { | ||
4178 | ret = 0; | ||
4179 | break; | ||
4180 | } | ||
4181 | } while_each_thread (g, t); | ||
4182 | |||
4183 | read_unlock(&tasklist_lock); | ||
4184 | |||
4185 | DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); | ||
4186 | |||
4187 | return ret; | ||
4188 | } | ||
4189 | |||
4190 | static int | ||
4191 | pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
4192 | { | ||
4193 | struct task_struct *task; | ||
4194 | struct thread_struct *thread; | ||
4195 | struct pfm_context_t *old; | ||
4196 | unsigned long flags; | ||
4197 | #ifndef CONFIG_SMP | ||
4198 | struct task_struct *owner_task = NULL; | ||
4199 | #endif | ||
4200 | pfarg_load_t *req = (pfarg_load_t *)arg; | ||
4201 | unsigned long *pmcs_source, *pmds_source; | ||
4202 | int the_cpu; | ||
4203 | int ret = 0; | ||
4204 | int state, is_system, set_dbregs = 0; | ||
4205 | |||
4206 | state = ctx->ctx_state; | ||
4207 | is_system = ctx->ctx_fl_system; | ||
4208 | /* | ||
4209 | * can only load from unloaded or terminated state | ||
4210 | */ | ||
4211 | if (state != PFM_CTX_UNLOADED) { | ||
4212 | DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", | ||
4213 | req->load_pid, | ||
4214 | ctx->ctx_state)); | ||
4215 | return -EINVAL; | ||
4216 | } | ||
4217 | |||
4218 | DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); | ||
4219 | |||
4220 | if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { | ||
4221 | DPRINT(("cannot use blocking mode on self\n")); | ||
4222 | return -EINVAL; | ||
4223 | } | ||
4224 | |||
4225 | ret = pfm_get_task(ctx, req->load_pid, &task); | ||
4226 | if (ret) { | ||
4227 | DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); | ||
4228 | return ret; | ||
4229 | } | ||
4230 | |||
4231 | ret = -EINVAL; | ||
4232 | |||
4233 | /* | ||
4234 | * system wide is self monitoring only | ||
4235 | */ | ||
4236 | if (is_system && task != current) { | ||
4237 | DPRINT(("system wide is self monitoring only load_pid=%d\n", | ||
4238 | req->load_pid)); | ||
4239 | goto error; | ||
4240 | } | ||
4241 | |||
4242 | thread = &task->thread; | ||
4243 | |||
4244 | ret = 0; | ||
4245 | /* | ||
4246 | * cannot load a context which is using range restrictions, | ||
4247 | * into a task that is being debugged. | ||
4248 | */ | ||
4249 | if (ctx->ctx_fl_using_dbreg) { | ||
4250 | if (thread->flags & IA64_THREAD_DBG_VALID) { | ||
4251 | ret = -EBUSY; | ||
4252 | DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); | ||
4253 | goto error; | ||
4254 | } | ||
4255 | LOCK_PFS(flags); | ||
4256 | |||
4257 | if (is_system) { | ||
4258 | if (pfm_sessions.pfs_ptrace_use_dbregs) { | ||
4259 | DPRINT(("cannot load [%d] dbregs in use\n", task->pid)); | ||
4260 | ret = -EBUSY; | ||
4261 | } else { | ||
4262 | pfm_sessions.pfs_sys_use_dbregs++; | ||
4263 | DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task->pid, pfm_sessions.pfs_sys_use_dbregs)); | ||
4264 | set_dbregs = 1; | ||
4265 | } | ||
4266 | } | ||
4267 | |||
4268 | UNLOCK_PFS(flags); | ||
4269 | |||
4270 | if (ret) goto error; | ||
4271 | } | ||
4272 | |||
4273 | /* | ||
4274 | * SMP system-wide monitoring implies self-monitoring. | ||
4275 | * | ||
4276 | * The programming model expects the task to | ||
4277 | * be pinned on a CPU throughout the session. | ||
4278 | * Here we take note of the current CPU at the | ||
4279 | * time the context is loaded. No call from | ||
4280 | * another CPU will be allowed. | ||
4281 | * | ||
4282 | * The pinning via shed_setaffinity() | ||
4283 | * must be done by the calling task prior | ||
4284 | * to this call. | ||
4285 | * | ||
4286 | * systemwide: keep track of CPU this session is supposed to run on | ||
4287 | */ | ||
4288 | the_cpu = ctx->ctx_cpu = smp_processor_id(); | ||
4289 | |||
4290 | ret = -EBUSY; | ||
4291 | /* | ||
4292 | * now reserve the session | ||
4293 | */ | ||
4294 | ret = pfm_reserve_session(current, is_system, the_cpu); | ||
4295 | if (ret) goto error; | ||
4296 | |||
4297 | /* | ||
4298 | * task is necessarily stopped at this point. | ||
4299 | * | ||
4300 | * If the previous context was zombie, then it got removed in | ||
4301 | * pfm_save_regs(). Therefore we should not see it here. | ||
4302 | * If we see a context, then this is an active context | ||
4303 | * | ||
4304 | * XXX: needs to be atomic | ||
4305 | */ | ||
4306 | DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", | ||
4307 | thread->pfm_context, ctx)); | ||
4308 | |||
4309 | old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); | ||
4310 | if (old != NULL) { | ||
4311 | DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); | ||
4312 | goto error_unres; | ||
4313 | } | ||
4314 | |||
4315 | pfm_reset_msgq(ctx); | ||
4316 | |||
4317 | ctx->ctx_state = PFM_CTX_LOADED; | ||
4318 | |||
4319 | /* | ||
4320 | * link context to task | ||
4321 | */ | ||
4322 | ctx->ctx_task = task; | ||
4323 | |||
4324 | if (is_system) { | ||
4325 | /* | ||
4326 | * we load as stopped | ||
4327 | */ | ||
4328 | PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); | ||
4329 | PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); | ||
4330 | |||
4331 | if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); | ||
4332 | } else { | ||
4333 | thread->flags |= IA64_THREAD_PM_VALID; | ||
4334 | } | ||
4335 | |||
4336 | /* | ||
4337 | * propagate into thread-state | ||
4338 | */ | ||
4339 | pfm_copy_pmds(task, ctx); | ||
4340 | pfm_copy_pmcs(task, ctx); | ||
4341 | |||
4342 | pmcs_source = thread->pmcs; | ||
4343 | pmds_source = thread->pmds; | ||
4344 | |||
4345 | /* | ||
4346 | * always the case for system-wide | ||
4347 | */ | ||
4348 | if (task == current) { | ||
4349 | |||
4350 | if (is_system == 0) { | ||
4351 | |||
4352 | /* allow user level control */ | ||
4353 | ia64_psr(regs)->sp = 0; | ||
4354 | DPRINT(("clearing psr.sp for [%d]\n", task->pid)); | ||
4355 | |||
4356 | SET_LAST_CPU(ctx, smp_processor_id()); | ||
4357 | INC_ACTIVATION(); | ||
4358 | SET_ACTIVATION(ctx); | ||
4359 | #ifndef CONFIG_SMP | ||
4360 | /* | ||
4361 | * push the other task out, if any | ||
4362 | */ | ||
4363 | owner_task = GET_PMU_OWNER(); | ||
4364 | if (owner_task) pfm_lazy_save_regs(owner_task); | ||
4365 | #endif | ||
4366 | } | ||
4367 | /* | ||
4368 | * load all PMD from ctx to PMU (as opposed to thread state) | ||
4369 | * restore all PMC from ctx to PMU | ||
4370 | */ | ||
4371 | pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); | ||
4372 | pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); | ||
4373 | |||
4374 | ctx->ctx_reload_pmcs[0] = 0UL; | ||
4375 | ctx->ctx_reload_pmds[0] = 0UL; | ||
4376 | |||
4377 | /* | ||
4378 | * guaranteed safe by earlier check against DBG_VALID | ||
4379 | */ | ||
4380 | if (ctx->ctx_fl_using_dbreg) { | ||
4381 | pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); | ||
4382 | pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); | ||
4383 | } | ||
4384 | /* | ||
4385 | * set new ownership | ||
4386 | */ | ||
4387 | SET_PMU_OWNER(task, ctx); | ||
4388 | |||
4389 | DPRINT(("context loaded on PMU for [%d]\n", task->pid)); | ||
4390 | } else { | ||
4391 | /* | ||
4392 | * when not current, task MUST be stopped, so this is safe | ||
4393 | */ | ||
4394 | regs = ia64_task_regs(task); | ||
4395 | |||
4396 | /* force a full reload */ | ||
4397 | ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; | ||
4398 | SET_LAST_CPU(ctx, -1); | ||
4399 | |||
4400 | /* initial saved psr (stopped) */ | ||
4401 | ctx->ctx_saved_psr_up = 0UL; | ||
4402 | ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; | ||
4403 | } | ||
4404 | |||
4405 | ret = 0; | ||
4406 | |||
4407 | error_unres: | ||
4408 | if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); | ||
4409 | error: | ||
4410 | /* | ||
4411 | * we must undo the dbregs setting (for system-wide) | ||
4412 | */ | ||
4413 | if (ret && set_dbregs) { | ||
4414 | LOCK_PFS(flags); | ||
4415 | pfm_sessions.pfs_sys_use_dbregs--; | ||
4416 | UNLOCK_PFS(flags); | ||
4417 | } | ||
4418 | /* | ||
4419 | * release task, there is now a link with the context | ||
4420 | */ | ||
4421 | if (is_system == 0 && task != current) { | ||
4422 | pfm_put_task(task); | ||
4423 | |||
4424 | if (ret == 0) { | ||
4425 | ret = pfm_check_task_exist(ctx); | ||
4426 | if (ret) { | ||
4427 | ctx->ctx_state = PFM_CTX_UNLOADED; | ||
4428 | ctx->ctx_task = NULL; | ||
4429 | } | ||
4430 | } | ||
4431 | } | ||
4432 | return ret; | ||
4433 | } | ||
4434 | |||
4435 | /* | ||
4436 | * in this function, we do not need to increase the use count | ||
4437 | * for the task via get_task_struct(), because we hold the | ||
4438 | * context lock. If the task were to disappear while having | ||
4439 | * a context attached, it would go through pfm_exit_thread() | ||
4440 | * which also grabs the context lock and would therefore be blocked | ||
4441 | * until we are here. | ||
4442 | */ | ||
4443 | static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); | ||
4444 | |||
4445 | static int | ||
4446 | pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) | ||
4447 | { | ||
4448 | struct task_struct *task = PFM_CTX_TASK(ctx); | ||
4449 | struct pt_regs *tregs; | ||
4450 | int prev_state, is_system; | ||
4451 | int ret; | ||
4452 | |||
4453 | DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task->pid : -1)); | ||
4454 | |||
4455 | prev_state = ctx->ctx_state; | ||
4456 | is_system = ctx->ctx_fl_system; | ||
4457 | |||
4458 | /* | ||
4459 | * unload only when necessary | ||
4460 | */ | ||
4461 | if (prev_state == PFM_CTX_UNLOADED) { | ||
4462 | DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); | ||
4463 | return 0; | ||
4464 | } | ||
4465 | |||
4466 | /* | ||
4467 | * clear psr and dcr bits | ||
4468 | */ | ||
4469 | ret = pfm_stop(ctx, NULL, 0, regs); | ||
4470 | if (ret) return ret; | ||
4471 | |||
4472 | ctx->ctx_state = PFM_CTX_UNLOADED; | ||
4473 | |||
4474 | /* | ||
4475 | * in system mode, we need to update the PMU directly | ||
4476 | * and the user level state of the caller, which may not | ||
4477 | * necessarily be the creator of the context. | ||
4478 | */ | ||
4479 | if (is_system) { | ||
4480 | |||
4481 | /* | ||
4482 | * Update cpuinfo | ||
4483 | * | ||
4484 | * local PMU is taken care of in pfm_stop() | ||
4485 | */ | ||
4486 | PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); | ||
4487 | PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); | ||
4488 | |||
4489 | /* | ||
4490 | * save PMDs in context | ||
4491 | * release ownership | ||
4492 | */ | ||
4493 | pfm_flush_pmds(current, ctx); | ||
4494 | |||
4495 | /* | ||
4496 | * at this point we are done with the PMU | ||
4497 | * so we can unreserve the resource. | ||
4498 | */ | ||
4499 | if (prev_state != PFM_CTX_ZOMBIE) | ||
4500 | pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); | ||
4501 | |||
4502 | /* | ||
4503 | * disconnect context from task | ||
4504 | */ | ||
4505 | task->thread.pfm_context = NULL; | ||
4506 | /* | ||
4507 | * disconnect task from context | ||
4508 | */ | ||
4509 | ctx->ctx_task = NULL; | ||
4510 | |||
4511 | /* | ||
4512 | * There is nothing more to cleanup here. | ||
4513 | */ | ||
4514 | return 0; | ||
4515 | } | ||
4516 | |||
4517 | /* | ||
4518 | * per-task mode | ||
4519 | */ | ||
4520 | tregs = task == current ? regs : ia64_task_regs(task); | ||
4521 | |||
4522 | if (task == current) { | ||
4523 | /* | ||
4524 | * cancel user level control | ||
4525 | */ | ||
4526 | ia64_psr(regs)->sp = 1; | ||
4527 | |||
4528 | DPRINT(("setting psr.sp for [%d]\n", task->pid)); | ||
4529 | } | ||
4530 | /* | ||
4531 | * save PMDs to context | ||
4532 | * release ownership | ||
4533 | */ | ||
4534 | pfm_flush_pmds(task, ctx); | ||
4535 | |||
4536 | /* | ||
4537 | * at this point we are done with the PMU | ||
4538 | * so we can unreserve the resource. | ||
4539 | * | ||
4540 | * when state was ZOMBIE, we have already unreserved. | ||
4541 | */ | ||
4542 | if (prev_state != PFM_CTX_ZOMBIE) | ||
4543 | pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); | ||
4544 | |||
4545 | /* | ||
4546 | * reset activation counter and psr | ||
4547 | */ | ||
4548 | ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; | ||
4549 | SET_LAST_CPU(ctx, -1); | ||
4550 | |||
4551 | /* | ||
4552 | * PMU state will not be restored | ||
4553 | */ | ||
4554 | task->thread.flags &= ~IA64_THREAD_PM_VALID; | ||
4555 | |||
4556 | /* | ||
4557 | * break links between context and task | ||
4558 | */ | ||
4559 | task->thread.pfm_context = NULL; | ||
4560 | ctx->ctx_task = NULL; | ||
4561 | |||
4562 | PFM_SET_WORK_PENDING(task, 0); | ||
4563 | |||
4564 | ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; | ||
4565 | ctx->ctx_fl_can_restart = 0; | ||
4566 | ctx->ctx_fl_going_zombie = 0; | ||
4567 | |||
4568 | DPRINT(("disconnected [%d] from context\n", task->pid)); | ||
4569 | |||
4570 | return 0; | ||
4571 | } | ||
4572 | |||
4573 | |||
4574 | /* | ||
4575 | * called only from exit_thread(): task == current | ||
4576 | * we come here only if current has a context attached (loaded or masked) | ||
4577 | */ | ||
4578 | void | ||
4579 | pfm_exit_thread(struct task_struct *task) | ||
4580 | { | ||
4581 | pfm_context_t *ctx; | ||
4582 | unsigned long flags; | ||
4583 | struct pt_regs *regs = ia64_task_regs(task); | ||
4584 | int ret, state; | ||
4585 | int free_ok = 0; | ||
4586 | |||
4587 | ctx = PFM_GET_CTX(task); | ||
4588 | |||
4589 | PROTECT_CTX(ctx, flags); | ||
4590 | |||
4591 | DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task->pid)); | ||
4592 | |||
4593 | state = ctx->ctx_state; | ||
4594 | switch(state) { | ||
4595 | case PFM_CTX_UNLOADED: | ||
4596 | /* | ||
4597 | * only comes to thios function if pfm_context is not NULL, i.e., cannot | ||
4598 | * be in unloaded state | ||
4599 | */ | ||
4600 | printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task->pid); | ||
4601 | break; | ||
4602 | case PFM_CTX_LOADED: | ||
4603 | case PFM_CTX_MASKED: | ||
4604 | ret = pfm_context_unload(ctx, NULL, 0, regs); | ||
4605 | if (ret) { | ||
4606 | printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); | ||
4607 | } | ||
4608 | DPRINT(("ctx unloaded for current state was %d\n", state)); | ||
4609 | |||
4610 | pfm_end_notify_user(ctx); | ||
4611 | break; | ||
4612 | case PFM_CTX_ZOMBIE: | ||
4613 | ret = pfm_context_unload(ctx, NULL, 0, regs); | ||
4614 | if (ret) { | ||
4615 | printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task->pid, state, ret); | ||
4616 | } | ||
4617 | free_ok = 1; | ||
4618 | break; | ||
4619 | default: | ||
4620 | printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task->pid, state); | ||
4621 | break; | ||
4622 | } | ||
4623 | UNPROTECT_CTX(ctx, flags); | ||
4624 | |||
4625 | { u64 psr = pfm_get_psr(); | ||
4626 | BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); | ||
4627 | BUG_ON(GET_PMU_OWNER()); | ||
4628 | BUG_ON(ia64_psr(regs)->up); | ||
4629 | BUG_ON(ia64_psr(regs)->pp); | ||
4630 | } | ||
4631 | |||
4632 | /* | ||
4633 | * All memory free operations (especially for vmalloc'ed memory) | ||
4634 | * MUST be done with interrupts ENABLED. | ||
4635 | */ | ||
4636 | if (free_ok) pfm_context_free(ctx); | ||
4637 | } | ||
4638 | |||
4639 | /* | ||
4640 | * functions MUST be listed in the increasing order of their index (see permfon.h) | ||
4641 | */ | ||
4642 | #define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } | ||
4643 | #define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } | ||
4644 | #define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) | ||
4645 | #define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) | ||
4646 | #define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} | ||
4647 | |||
4648 | static pfm_cmd_desc_t pfm_cmd_tab[]={ | ||
4649 | /* 0 */PFM_CMD_NONE, | ||
4650 | /* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), | ||
4651 | /* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), | ||
4652 | /* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), | ||
4653 | /* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), | ||
4654 | /* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), | ||
4655 | /* 6 */PFM_CMD_NONE, | ||
4656 | /* 7 */PFM_CMD_NONE, | ||
4657 | /* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), | ||
4658 | /* 9 */PFM_CMD_NONE, | ||
4659 | /* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), | ||
4660 | /* 11 */PFM_CMD_NONE, | ||
4661 | /* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), | ||
4662 | /* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), | ||
4663 | /* 14 */PFM_CMD_NONE, | ||
4664 | /* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), | ||
4665 | /* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), | ||
4666 | /* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), | ||
4667 | /* 18 */PFM_CMD_NONE, | ||
4668 | /* 19 */PFM_CMD_NONE, | ||
4669 | /* 20 */PFM_CMD_NONE, | ||
4670 | /* 21 */PFM_CMD_NONE, | ||
4671 | /* 22 */PFM_CMD_NONE, | ||
4672 | /* 23 */PFM_CMD_NONE, | ||
4673 | /* 24 */PFM_CMD_NONE, | ||
4674 | /* 25 */PFM_CMD_NONE, | ||
4675 | /* 26 */PFM_CMD_NONE, | ||
4676 | /* 27 */PFM_CMD_NONE, | ||
4677 | /* 28 */PFM_CMD_NONE, | ||
4678 | /* 29 */PFM_CMD_NONE, | ||
4679 | /* 30 */PFM_CMD_NONE, | ||
4680 | /* 31 */PFM_CMD_NONE, | ||
4681 | /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), | ||
4682 | /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) | ||
4683 | }; | ||
4684 | #define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) | ||
4685 | |||
4686 | static int | ||
4687 | pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) | ||
4688 | { | ||
4689 | struct task_struct *task; | ||
4690 | int state, old_state; | ||
4691 | |||
4692 | recheck: | ||
4693 | state = ctx->ctx_state; | ||
4694 | task = ctx->ctx_task; | ||
4695 | |||
4696 | if (task == NULL) { | ||
4697 | DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); | ||
4698 | return 0; | ||
4699 | } | ||
4700 | |||
4701 | DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", | ||
4702 | ctx->ctx_fd, | ||
4703 | state, | ||
4704 | task->pid, | ||
4705 | task->state, PFM_CMD_STOPPED(cmd))); | ||
4706 | |||
4707 | /* | ||
4708 | * self-monitoring always ok. | ||
4709 | * | ||
4710 | * for system-wide the caller can either be the creator of the | ||
4711 | * context (to one to which the context is attached to) OR | ||
4712 | * a task running on the same CPU as the session. | ||
4713 | */ | ||
4714 | if (task == current || ctx->ctx_fl_system) return 0; | ||
4715 | |||
4716 | /* | ||
4717 | * if context is UNLOADED we are safe to go | ||
4718 | */ | ||
4719 | if (state == PFM_CTX_UNLOADED) return 0; | ||
4720 | |||
4721 | /* | ||
4722 | * no command can operate on a zombie context | ||
4723 | */ | ||
4724 | if (state == PFM_CTX_ZOMBIE) { | ||
4725 | DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); | ||
4726 | return -EINVAL; | ||
4727 | } | ||
4728 | |||
4729 | /* | ||
4730 | * context is LOADED or MASKED. Some commands may need to have | ||
4731 | * the task stopped. | ||
4732 | * | ||
4733 | * We could lift this restriction for UP but it would mean that | ||
4734 | * the user has no guarantee the task would not run between | ||
4735 | * two successive calls to perfmonctl(). That's probably OK. | ||
4736 | * If this user wants to ensure the task does not run, then | ||
4737 | * the task must be stopped. | ||
4738 | */ | ||
4739 | if (PFM_CMD_STOPPED(cmd)) { | ||
4740 | if ((task->state != TASK_STOPPED) && (task->state != TASK_TRACED)) { | ||
4741 | DPRINT(("[%d] task not in stopped state\n", task->pid)); | ||
4742 | return -EBUSY; | ||
4743 | } | ||
4744 | /* | ||
4745 | * task is now stopped, wait for ctxsw out | ||
4746 | * | ||
4747 | * This is an interesting point in the code. | ||
4748 | * We need to unprotect the context because | ||
4749 | * the pfm_save_regs() routines needs to grab | ||
4750 | * the same lock. There are danger in doing | ||
4751 | * this because it leaves a window open for | ||
4752 | * another task to get access to the context | ||
4753 | * and possibly change its state. The one thing | ||
4754 | * that is not possible is for the context to disappear | ||
4755 | * because we are protected by the VFS layer, i.e., | ||
4756 | * get_fd()/put_fd(). | ||
4757 | */ | ||
4758 | old_state = state; | ||
4759 | |||
4760 | UNPROTECT_CTX(ctx, flags); | ||
4761 | |||
4762 | wait_task_inactive(task); | ||
4763 | |||
4764 | PROTECT_CTX(ctx, flags); | ||
4765 | |||
4766 | /* | ||
4767 | * we must recheck to verify if state has changed | ||
4768 | */ | ||
4769 | if (ctx->ctx_state != old_state) { | ||
4770 | DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); | ||
4771 | goto recheck; | ||
4772 | } | ||
4773 | } | ||
4774 | return 0; | ||
4775 | } | ||
4776 | |||
4777 | /* | ||
4778 | * system-call entry point (must return long) | ||
4779 | */ | ||
4780 | asmlinkage long | ||
4781 | sys_perfmonctl (int fd, int cmd, void __user *arg, int count) | ||
4782 | { | ||
4783 | struct file *file = NULL; | ||
4784 | pfm_context_t *ctx = NULL; | ||
4785 | unsigned long flags = 0UL; | ||
4786 | void *args_k = NULL; | ||
4787 | long ret; /* will expand int return types */ | ||
4788 | size_t base_sz, sz, xtra_sz = 0; | ||
4789 | int narg, completed_args = 0, call_made = 0, cmd_flags; | ||
4790 | int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); | ||
4791 | int (*getsize)(void *arg, size_t *sz); | ||
4792 | #define PFM_MAX_ARGSIZE 4096 | ||
4793 | |||
4794 | /* | ||
4795 | * reject any call if perfmon was disabled at initialization | ||
4796 | */ | ||
4797 | if (unlikely(pmu_conf == NULL)) return -ENOSYS; | ||
4798 | |||
4799 | if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { | ||
4800 | DPRINT(("invalid cmd=%d\n", cmd)); | ||
4801 | return -EINVAL; | ||
4802 | } | ||
4803 | |||
4804 | func = pfm_cmd_tab[cmd].cmd_func; | ||
4805 | narg = pfm_cmd_tab[cmd].cmd_narg; | ||
4806 | base_sz = pfm_cmd_tab[cmd].cmd_argsize; | ||
4807 | getsize = pfm_cmd_tab[cmd].cmd_getsize; | ||
4808 | cmd_flags = pfm_cmd_tab[cmd].cmd_flags; | ||
4809 | |||
4810 | if (unlikely(func == NULL)) { | ||
4811 | DPRINT(("invalid cmd=%d\n", cmd)); | ||
4812 | return -EINVAL; | ||
4813 | } | ||
4814 | |||
4815 | DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", | ||
4816 | PFM_CMD_NAME(cmd), | ||
4817 | cmd, | ||
4818 | narg, | ||
4819 | base_sz, | ||
4820 | count)); | ||
4821 | |||
4822 | /* | ||
4823 | * check if number of arguments matches what the command expects | ||
4824 | */ | ||
4825 | if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) | ||
4826 | return -EINVAL; | ||
4827 | |||
4828 | restart_args: | ||
4829 | sz = xtra_sz + base_sz*count; | ||
4830 | /* | ||
4831 | * limit abuse to min page size | ||
4832 | */ | ||
4833 | if (unlikely(sz > PFM_MAX_ARGSIZE)) { | ||
4834 | printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", current->pid, sz); | ||
4835 | return -E2BIG; | ||
4836 | } | ||
4837 | |||
4838 | /* | ||
4839 | * allocate default-sized argument buffer | ||
4840 | */ | ||
4841 | if (likely(count && args_k == NULL)) { | ||
4842 | args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); | ||
4843 | if (args_k == NULL) return -ENOMEM; | ||
4844 | } | ||
4845 | |||
4846 | ret = -EFAULT; | ||
4847 | |||
4848 | /* | ||
4849 | * copy arguments | ||
4850 | * | ||
4851 | * assume sz = 0 for command without parameters | ||
4852 | */ | ||
4853 | if (sz && copy_from_user(args_k, arg, sz)) { | ||
4854 | DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); | ||
4855 | goto error_args; | ||
4856 | } | ||
4857 | |||
4858 | /* | ||
4859 | * check if command supports extra parameters | ||
4860 | */ | ||
4861 | if (completed_args == 0 && getsize) { | ||
4862 | /* | ||
4863 | * get extra parameters size (based on main argument) | ||
4864 | */ | ||
4865 | ret = (*getsize)(args_k, &xtra_sz); | ||
4866 | if (ret) goto error_args; | ||
4867 | |||
4868 | completed_args = 1; | ||
4869 | |||
4870 | DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); | ||
4871 | |||
4872 | /* retry if necessary */ | ||
4873 | if (likely(xtra_sz)) goto restart_args; | ||
4874 | } | ||
4875 | |||
4876 | if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; | ||
4877 | |||
4878 | ret = -EBADF; | ||
4879 | |||
4880 | file = fget(fd); | ||
4881 | if (unlikely(file == NULL)) { | ||
4882 | DPRINT(("invalid fd %d\n", fd)); | ||
4883 | goto error_args; | ||
4884 | } | ||
4885 | if (unlikely(PFM_IS_FILE(file) == 0)) { | ||
4886 | DPRINT(("fd %d not related to perfmon\n", fd)); | ||
4887 | goto error_args; | ||
4888 | } | ||
4889 | |||
4890 | ctx = (pfm_context_t *)file->private_data; | ||
4891 | if (unlikely(ctx == NULL)) { | ||
4892 | DPRINT(("no context for fd %d\n", fd)); | ||
4893 | goto error_args; | ||
4894 | } | ||
4895 | prefetch(&ctx->ctx_state); | ||
4896 | |||
4897 | PROTECT_CTX(ctx, flags); | ||
4898 | |||
4899 | /* | ||
4900 | * check task is stopped | ||
4901 | */ | ||
4902 | ret = pfm_check_task_state(ctx, cmd, flags); | ||
4903 | if (unlikely(ret)) goto abort_locked; | ||
4904 | |||
4905 | skip_fd: | ||
4906 | ret = (*func)(ctx, args_k, count, ia64_task_regs(current)); | ||
4907 | |||
4908 | call_made = 1; | ||
4909 | |||
4910 | abort_locked: | ||
4911 | if (likely(ctx)) { | ||
4912 | DPRINT(("context unlocked\n")); | ||
4913 | UNPROTECT_CTX(ctx, flags); | ||
4914 | fput(file); | ||
4915 | } | ||
4916 | |||
4917 | /* copy argument back to user, if needed */ | ||
4918 | if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; | ||
4919 | |||
4920 | error_args: | ||
4921 | if (args_k) kfree(args_k); | ||
4922 | |||
4923 | DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); | ||
4924 | |||
4925 | return ret; | ||
4926 | } | ||
4927 | |||
4928 | static void | ||
4929 | pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) | ||
4930 | { | ||
4931 | pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; | ||
4932 | pfm_ovfl_ctrl_t rst_ctrl; | ||
4933 | int state; | ||
4934 | int ret = 0; | ||
4935 | |||
4936 | state = ctx->ctx_state; | ||
4937 | /* | ||
4938 | * Unlock sampling buffer and reset index atomically | ||
4939 | * XXX: not really needed when blocking | ||
4940 | */ | ||
4941 | if (CTX_HAS_SMPL(ctx)) { | ||
4942 | |||
4943 | rst_ctrl.bits.mask_monitoring = 0; | ||
4944 | rst_ctrl.bits.reset_ovfl_pmds = 0; | ||
4945 | |||
4946 | if (state == PFM_CTX_LOADED) | ||
4947 | ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); | ||
4948 | else | ||
4949 | ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); | ||
4950 | } else { | ||
4951 | rst_ctrl.bits.mask_monitoring = 0; | ||
4952 | rst_ctrl.bits.reset_ovfl_pmds = 1; | ||
4953 | } | ||
4954 | |||
4955 | if (ret == 0) { | ||
4956 | if (rst_ctrl.bits.reset_ovfl_pmds) { | ||
4957 | pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); | ||
4958 | } | ||
4959 | if (rst_ctrl.bits.mask_monitoring == 0) { | ||
4960 | DPRINT(("resuming monitoring\n")); | ||
4961 | if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); | ||
4962 | } else { | ||
4963 | DPRINT(("stopping monitoring\n")); | ||
4964 | //pfm_stop_monitoring(current, regs); | ||
4965 | } | ||
4966 | ctx->ctx_state = PFM_CTX_LOADED; | ||
4967 | } | ||
4968 | } | ||
4969 | |||
4970 | /* | ||
4971 | * context MUST BE LOCKED when calling | ||
4972 | * can only be called for current | ||
4973 | */ | ||
4974 | static void | ||
4975 | pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) | ||
4976 | { | ||
4977 | int ret; | ||
4978 | |||
4979 | DPRINT(("entering for [%d]\n", current->pid)); | ||
4980 | |||
4981 | ret = pfm_context_unload(ctx, NULL, 0, regs); | ||
4982 | if (ret) { | ||
4983 | printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", current->pid, ret); | ||
4984 | } | ||
4985 | |||
4986 | /* | ||
4987 | * and wakeup controlling task, indicating we are now disconnected | ||
4988 | */ | ||
4989 | wake_up_interruptible(&ctx->ctx_zombieq); | ||
4990 | |||
4991 | /* | ||
4992 | * given that context is still locked, the controlling | ||
4993 | * task will only get access when we return from | ||
4994 | * pfm_handle_work(). | ||
4995 | */ | ||
4996 | } | ||
4997 | |||
4998 | static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); | ||
4999 | |||
5000 | void | ||
5001 | pfm_handle_work(void) | ||
5002 | { | ||
5003 | pfm_context_t *ctx; | ||
5004 | struct pt_regs *regs; | ||
5005 | unsigned long flags; | ||
5006 | unsigned long ovfl_regs; | ||
5007 | unsigned int reason; | ||
5008 | int ret; | ||
5009 | |||
5010 | ctx = PFM_GET_CTX(current); | ||
5011 | if (ctx == NULL) { | ||
5012 | printk(KERN_ERR "perfmon: [%d] has no PFM context\n", current->pid); | ||
5013 | return; | ||
5014 | } | ||
5015 | |||
5016 | PROTECT_CTX(ctx, flags); | ||
5017 | |||
5018 | PFM_SET_WORK_PENDING(current, 0); | ||
5019 | |||
5020 | pfm_clear_task_notify(); | ||
5021 | |||
5022 | regs = ia64_task_regs(current); | ||
5023 | |||
5024 | /* | ||
5025 | * extract reason for being here and clear | ||
5026 | */ | ||
5027 | reason = ctx->ctx_fl_trap_reason; | ||
5028 | ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; | ||
5029 | ovfl_regs = ctx->ctx_ovfl_regs[0]; | ||
5030 | |||
5031 | DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); | ||
5032 | |||
5033 | /* | ||
5034 | * must be done before we check for simple-reset mode | ||
5035 | */ | ||
5036 | if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) goto do_zombie; | ||
5037 | |||
5038 | |||
5039 | //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; | ||
5040 | if (reason == PFM_TRAP_REASON_RESET) goto skip_blocking; | ||
5041 | |||
5042 | UNPROTECT_CTX(ctx, flags); | ||
5043 | |||
5044 | /* | ||
5045 | * pfm_handle_work() is currently called with interrupts disabled. | ||
5046 | * The down_interruptible call may sleep, therefore we | ||
5047 | * must re-enable interrupts to avoid deadlocks. It is | ||
5048 | * safe to do so because this function is called ONLY | ||
5049 | * when returning to user level (PUStk=1), in which case | ||
5050 | * there is no risk of kernel stack overflow due to deep | ||
5051 | * interrupt nesting. | ||
5052 | */ | ||
5053 | BUG_ON(flags & IA64_PSR_I); | ||
5054 | local_irq_enable(); | ||
5055 | |||
5056 | DPRINT(("before block sleeping\n")); | ||
5057 | |||
5058 | /* | ||
5059 | * may go through without blocking on SMP systems | ||
5060 | * if restart has been received already by the time we call down() | ||
5061 | */ | ||
5062 | ret = down_interruptible(&ctx->ctx_restart_sem); | ||
5063 | |||
5064 | DPRINT(("after block sleeping ret=%d\n", ret)); | ||
5065 | |||
5066 | /* | ||
5067 | * disable interrupts to restore state we had upon entering | ||
5068 | * this function | ||
5069 | */ | ||
5070 | local_irq_disable(); | ||
5071 | |||
5072 | PROTECT_CTX(ctx, flags); | ||
5073 | |||
5074 | /* | ||
5075 | * we need to read the ovfl_regs only after wake-up | ||
5076 | * because we may have had pfm_write_pmds() in between | ||
5077 | * and that can changed PMD values and therefore | ||
5078 | * ovfl_regs is reset for these new PMD values. | ||
5079 | */ | ||
5080 | ovfl_regs = ctx->ctx_ovfl_regs[0]; | ||
5081 | |||
5082 | if (ctx->ctx_fl_going_zombie) { | ||
5083 | do_zombie: | ||
5084 | DPRINT(("context is zombie, bailing out\n")); | ||
5085 | pfm_context_force_terminate(ctx, regs); | ||
5086 | goto nothing_to_do; | ||
5087 | } | ||
5088 | /* | ||
5089 | * in case of interruption of down() we don't restart anything | ||
5090 | */ | ||
5091 | if (ret < 0) goto nothing_to_do; | ||
5092 | |||
5093 | skip_blocking: | ||
5094 | pfm_resume_after_ovfl(ctx, ovfl_regs, regs); | ||
5095 | ctx->ctx_ovfl_regs[0] = 0UL; | ||
5096 | |||
5097 | nothing_to_do: | ||
5098 | |||
5099 | UNPROTECT_CTX(ctx, flags); | ||
5100 | } | ||
5101 | |||
5102 | static int | ||
5103 | pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) | ||
5104 | { | ||
5105 | if (ctx->ctx_state == PFM_CTX_ZOMBIE) { | ||
5106 | DPRINT(("ignoring overflow notification, owner is zombie\n")); | ||
5107 | return 0; | ||
5108 | } | ||
5109 | |||
5110 | DPRINT(("waking up somebody\n")); | ||
5111 | |||
5112 | if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); | ||
5113 | |||
5114 | /* | ||
5115 | * safe, we are not in intr handler, nor in ctxsw when | ||
5116 | * we come here | ||
5117 | */ | ||
5118 | kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); | ||
5119 | |||
5120 | return 0; | ||
5121 | } | ||
5122 | |||
5123 | static int | ||
5124 | pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) | ||
5125 | { | ||
5126 | pfm_msg_t *msg = NULL; | ||
5127 | |||
5128 | if (ctx->ctx_fl_no_msg == 0) { | ||
5129 | msg = pfm_get_new_msg(ctx); | ||
5130 | if (msg == NULL) { | ||
5131 | printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); | ||
5132 | return -1; | ||
5133 | } | ||
5134 | |||
5135 | msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; | ||
5136 | msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; | ||
5137 | msg->pfm_ovfl_msg.msg_active_set = 0; | ||
5138 | msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; | ||
5139 | msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; | ||
5140 | msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; | ||
5141 | msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; | ||
5142 | msg->pfm_ovfl_msg.msg_tstamp = 0UL; | ||
5143 | } | ||
5144 | |||
5145 | DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", | ||
5146 | msg, | ||
5147 | ctx->ctx_fl_no_msg, | ||
5148 | ctx->ctx_fd, | ||
5149 | ovfl_pmds)); | ||
5150 | |||
5151 | return pfm_notify_user(ctx, msg); | ||
5152 | } | ||
5153 | |||
5154 | static int | ||
5155 | pfm_end_notify_user(pfm_context_t *ctx) | ||
5156 | { | ||
5157 | pfm_msg_t *msg; | ||
5158 | |||
5159 | msg = pfm_get_new_msg(ctx); | ||
5160 | if (msg == NULL) { | ||
5161 | printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); | ||
5162 | return -1; | ||
5163 | } | ||
5164 | /* no leak */ | ||
5165 | memset(msg, 0, sizeof(*msg)); | ||
5166 | |||
5167 | msg->pfm_end_msg.msg_type = PFM_MSG_END; | ||
5168 | msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; | ||
5169 | msg->pfm_ovfl_msg.msg_tstamp = 0UL; | ||
5170 | |||
5171 | DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", | ||
5172 | msg, | ||
5173 | ctx->ctx_fl_no_msg, | ||
5174 | ctx->ctx_fd)); | ||
5175 | |||
5176 | return pfm_notify_user(ctx, msg); | ||
5177 | } | ||
5178 | |||
5179 | /* | ||
5180 | * main overflow processing routine. | ||
5181 | * it can be called from the interrupt path or explicitely during the context switch code | ||
5182 | */ | ||
5183 | static void | ||
5184 | pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, u64 pmc0, struct pt_regs *regs) | ||
5185 | { | ||
5186 | pfm_ovfl_arg_t *ovfl_arg; | ||
5187 | unsigned long mask; | ||
5188 | unsigned long old_val, ovfl_val, new_val; | ||
5189 | unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; | ||
5190 | unsigned long tstamp; | ||
5191 | pfm_ovfl_ctrl_t ovfl_ctrl; | ||
5192 | unsigned int i, has_smpl; | ||
5193 | int must_notify = 0; | ||
5194 | |||
5195 | if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; | ||
5196 | |||
5197 | /* | ||
5198 | * sanity test. Should never happen | ||
5199 | */ | ||
5200 | if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; | ||
5201 | |||
5202 | tstamp = ia64_get_itc(); | ||
5203 | mask = pmc0 >> PMU_FIRST_COUNTER; | ||
5204 | ovfl_val = pmu_conf->ovfl_val; | ||
5205 | has_smpl = CTX_HAS_SMPL(ctx); | ||
5206 | |||
5207 | DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " | ||
5208 | "used_pmds=0x%lx\n", | ||
5209 | pmc0, | ||
5210 | task ? task->pid: -1, | ||
5211 | (regs ? regs->cr_iip : 0), | ||
5212 | CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", | ||
5213 | ctx->ctx_used_pmds[0])); | ||
5214 | |||
5215 | |||
5216 | /* | ||
5217 | * first we update the virtual counters | ||
5218 | * assume there was a prior ia64_srlz_d() issued | ||
5219 | */ | ||
5220 | for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { | ||
5221 | |||
5222 | /* skip pmd which did not overflow */ | ||
5223 | if ((mask & 0x1) == 0) continue; | ||
5224 | |||
5225 | /* | ||
5226 | * Note that the pmd is not necessarily 0 at this point as qualified events | ||
5227 | * may have happened before the PMU was frozen. The residual count is not | ||
5228 | * taken into consideration here but will be with any read of the pmd via | ||
5229 | * pfm_read_pmds(). | ||
5230 | */ | ||
5231 | old_val = new_val = ctx->ctx_pmds[i].val; | ||
5232 | new_val += 1 + ovfl_val; | ||
5233 | ctx->ctx_pmds[i].val = new_val; | ||
5234 | |||
5235 | /* | ||
5236 | * check for overflow condition | ||
5237 | */ | ||
5238 | if (likely(old_val > new_val)) { | ||
5239 | ovfl_pmds |= 1UL << i; | ||
5240 | if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; | ||
5241 | } | ||
5242 | |||
5243 | DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", | ||
5244 | i, | ||
5245 | new_val, | ||
5246 | old_val, | ||
5247 | ia64_get_pmd(i) & ovfl_val, | ||
5248 | ovfl_pmds, | ||
5249 | ovfl_notify)); | ||
5250 | } | ||
5251 | |||
5252 | /* | ||
5253 | * there was no 64-bit overflow, nothing else to do | ||
5254 | */ | ||
5255 | if (ovfl_pmds == 0UL) return; | ||
5256 | |||
5257 | /* | ||
5258 | * reset all control bits | ||
5259 | */ | ||
5260 | ovfl_ctrl.val = 0; | ||
5261 | reset_pmds = 0UL; | ||
5262 | |||
5263 | /* | ||
5264 | * if a sampling format module exists, then we "cache" the overflow by | ||
5265 | * calling the module's handler() routine. | ||
5266 | */ | ||
5267 | if (has_smpl) { | ||
5268 | unsigned long start_cycles, end_cycles; | ||
5269 | unsigned long pmd_mask; | ||
5270 | int j, k, ret = 0; | ||
5271 | int this_cpu = smp_processor_id(); | ||
5272 | |||
5273 | pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; | ||
5274 | ovfl_arg = &ctx->ctx_ovfl_arg; | ||
5275 | |||
5276 | prefetch(ctx->ctx_smpl_hdr); | ||
5277 | |||
5278 | for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { | ||
5279 | |||
5280 | mask = 1UL << i; | ||
5281 | |||
5282 | if ((pmd_mask & 0x1) == 0) continue; | ||
5283 | |||
5284 | ovfl_arg->ovfl_pmd = (unsigned char )i; | ||
5285 | ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; | ||
5286 | ovfl_arg->active_set = 0; | ||
5287 | ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ | ||
5288 | ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; | ||
5289 | |||
5290 | ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; | ||
5291 | ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; | ||
5292 | ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; | ||
5293 | |||
5294 | /* | ||
5295 | * copy values of pmds of interest. Sampling format may copy them | ||
5296 | * into sampling buffer. | ||
5297 | */ | ||
5298 | if (smpl_pmds) { | ||
5299 | for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { | ||
5300 | if ((smpl_pmds & 0x1) == 0) continue; | ||
5301 | ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); | ||
5302 | DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); | ||
5303 | } | ||
5304 | } | ||
5305 | |||
5306 | pfm_stats[this_cpu].pfm_smpl_handler_calls++; | ||
5307 | |||
5308 | start_cycles = ia64_get_itc(); | ||
5309 | |||
5310 | /* | ||
5311 | * call custom buffer format record (handler) routine | ||
5312 | */ | ||
5313 | ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); | ||
5314 | |||
5315 | end_cycles = ia64_get_itc(); | ||
5316 | |||
5317 | /* | ||
5318 | * For those controls, we take the union because they have | ||
5319 | * an all or nothing behavior. | ||
5320 | */ | ||
5321 | ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; | ||
5322 | ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; | ||
5323 | ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; | ||
5324 | /* | ||
5325 | * build the bitmask of pmds to reset now | ||
5326 | */ | ||
5327 | if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; | ||
5328 | |||
5329 | pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; | ||
5330 | } | ||
5331 | /* | ||
5332 | * when the module cannot handle the rest of the overflows, we abort right here | ||
5333 | */ | ||
5334 | if (ret && pmd_mask) { | ||
5335 | DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", | ||
5336 | pmd_mask<<PMU_FIRST_COUNTER)); | ||
5337 | } | ||
5338 | /* | ||
5339 | * remove the pmds we reset now from the set of pmds to reset in pfm_restart() | ||
5340 | */ | ||
5341 | ovfl_pmds &= ~reset_pmds; | ||
5342 | } else { | ||
5343 | /* | ||
5344 | * when no sampling module is used, then the default | ||
5345 | * is to notify on overflow if requested by user | ||
5346 | */ | ||
5347 | ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; | ||
5348 | ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; | ||
5349 | ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ | ||
5350 | ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; | ||
5351 | /* | ||
5352 | * if needed, we reset all overflowed pmds | ||
5353 | */ | ||
5354 | if (ovfl_notify == 0) reset_pmds = ovfl_pmds; | ||
5355 | } | ||
5356 | |||
5357 | DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); | ||
5358 | |||
5359 | /* | ||
5360 | * reset the requested PMD registers using the short reset values | ||
5361 | */ | ||
5362 | if (reset_pmds) { | ||
5363 | unsigned long bm = reset_pmds; | ||
5364 | pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); | ||
5365 | } | ||
5366 | |||
5367 | if (ovfl_notify && ovfl_ctrl.bits.notify_user) { | ||
5368 | /* | ||
5369 | * keep track of what to reset when unblocking | ||
5370 | */ | ||
5371 | ctx->ctx_ovfl_regs[0] = ovfl_pmds; | ||
5372 | |||
5373 | /* | ||
5374 | * check for blocking context | ||
5375 | */ | ||
5376 | if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { | ||
5377 | |||
5378 | ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; | ||
5379 | |||
5380 | /* | ||
5381 | * set the perfmon specific checking pending work for the task | ||
5382 | */ | ||
5383 | PFM_SET_WORK_PENDING(task, 1); | ||
5384 | |||
5385 | /* | ||
5386 | * when coming from ctxsw, current still points to the | ||
5387 | * previous task, therefore we must work with task and not current. | ||
5388 | */ | ||
5389 | pfm_set_task_notify(task); | ||
5390 | } | ||
5391 | /* | ||
5392 | * defer until state is changed (shorten spin window). the context is locked | ||
5393 | * anyway, so the signal receiver would come spin for nothing. | ||
5394 | */ | ||
5395 | must_notify = 1; | ||
5396 | } | ||
5397 | |||
5398 | DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", | ||
5399 | GET_PMU_OWNER() ? GET_PMU_OWNER()->pid : -1, | ||
5400 | PFM_GET_WORK_PENDING(task), | ||
5401 | ctx->ctx_fl_trap_reason, | ||
5402 | ovfl_pmds, | ||
5403 | ovfl_notify, | ||
5404 | ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); | ||
5405 | /* | ||
5406 | * in case monitoring must be stopped, we toggle the psr bits | ||
5407 | */ | ||
5408 | if (ovfl_ctrl.bits.mask_monitoring) { | ||
5409 | pfm_mask_monitoring(task); | ||
5410 | ctx->ctx_state = PFM_CTX_MASKED; | ||
5411 | ctx->ctx_fl_can_restart = 1; | ||
5412 | } | ||
5413 | |||
5414 | /* | ||
5415 | * send notification now | ||
5416 | */ | ||
5417 | if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); | ||
5418 | |||
5419 | return; | ||
5420 | |||
5421 | sanity_check: | ||
5422 | printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", | ||
5423 | smp_processor_id(), | ||
5424 | task ? task->pid : -1, | ||
5425 | pmc0); | ||
5426 | return; | ||
5427 | |||
5428 | stop_monitoring: | ||
5429 | /* | ||
5430 | * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). | ||
5431 | * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can | ||
5432 | * come here as zombie only if the task is the current task. In which case, we | ||
5433 | * can access the PMU hardware directly. | ||
5434 | * | ||
5435 | * Note that zombies do have PM_VALID set. So here we do the minimal. | ||
5436 | * | ||
5437 | * In case the context was zombified it could not be reclaimed at the time | ||
5438 | * the monitoring program exited. At this point, the PMU reservation has been | ||
5439 | * returned, the sampiing buffer has been freed. We must convert this call | ||
5440 | * into a spurious interrupt. However, we must also avoid infinite overflows | ||
5441 | * by stopping monitoring for this task. We can only come here for a per-task | ||
5442 | * context. All we need to do is to stop monitoring using the psr bits which | ||
5443 | * are always task private. By re-enabling secure montioring, we ensure that | ||
5444 | * the monitored task will not be able to re-activate monitoring. | ||
5445 | * The task will eventually be context switched out, at which point the context | ||
5446 | * will be reclaimed (that includes releasing ownership of the PMU). | ||
5447 | * | ||
5448 | * So there might be a window of time where the number of per-task session is zero | ||
5449 | * yet one PMU might have a owner and get at most one overflow interrupt for a zombie | ||
5450 | * context. This is safe because if a per-task session comes in, it will push this one | ||
5451 | * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide | ||
5452 | * session is force on that CPU, given that we use task pinning, pfm_save_regs() will | ||
5453 | * also push our zombie context out. | ||
5454 | * | ||
5455 | * Overall pretty hairy stuff.... | ||
5456 | */ | ||
5457 | DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task->pid: -1)); | ||
5458 | pfm_clear_psr_up(); | ||
5459 | ia64_psr(regs)->up = 0; | ||
5460 | ia64_psr(regs)->sp = 1; | ||
5461 | return; | ||
5462 | } | ||
5463 | |||
5464 | static int | ||
5465 | pfm_do_interrupt_handler(int irq, void *arg, struct pt_regs *regs) | ||
5466 | { | ||
5467 | struct task_struct *task; | ||
5468 | pfm_context_t *ctx; | ||
5469 | unsigned long flags; | ||
5470 | u64 pmc0; | ||
5471 | int this_cpu = smp_processor_id(); | ||
5472 | int retval = 0; | ||
5473 | |||
5474 | pfm_stats[this_cpu].pfm_ovfl_intr_count++; | ||
5475 | |||
5476 | /* | ||
5477 | * srlz.d done before arriving here | ||
5478 | */ | ||
5479 | pmc0 = ia64_get_pmc(0); | ||
5480 | |||
5481 | task = GET_PMU_OWNER(); | ||
5482 | ctx = GET_PMU_CTX(); | ||
5483 | |||
5484 | /* | ||
5485 | * if we have some pending bits set | ||
5486 | * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 | ||
5487 | */ | ||
5488 | if (PMC0_HAS_OVFL(pmc0) && task) { | ||
5489 | /* | ||
5490 | * we assume that pmc0.fr is always set here | ||
5491 | */ | ||
5492 | |||
5493 | /* sanity check */ | ||
5494 | if (!ctx) goto report_spurious1; | ||
5495 | |||
5496 | if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) | ||
5497 | goto report_spurious2; | ||
5498 | |||
5499 | PROTECT_CTX_NOPRINT(ctx, flags); | ||
5500 | |||
5501 | pfm_overflow_handler(task, ctx, pmc0, regs); | ||
5502 | |||
5503 | UNPROTECT_CTX_NOPRINT(ctx, flags); | ||
5504 | |||
5505 | } else { | ||
5506 | pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; | ||
5507 | retval = -1; | ||
5508 | } | ||
5509 | /* | ||
5510 | * keep it unfrozen at all times | ||
5511 | */ | ||
5512 | pfm_unfreeze_pmu(); | ||
5513 | |||
5514 | return retval; | ||
5515 | |||
5516 | report_spurious1: | ||
5517 | printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", | ||
5518 | this_cpu, task->pid); | ||
5519 | pfm_unfreeze_pmu(); | ||
5520 | return -1; | ||
5521 | report_spurious2: | ||
5522 | printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", | ||
5523 | this_cpu, | ||
5524 | task->pid); | ||
5525 | pfm_unfreeze_pmu(); | ||
5526 | return -1; | ||
5527 | } | ||
5528 | |||
5529 | static irqreturn_t | ||
5530 | pfm_interrupt_handler(int irq, void *arg, struct pt_regs *regs) | ||
5531 | { | ||
5532 | unsigned long start_cycles, total_cycles; | ||
5533 | unsigned long min, max; | ||
5534 | int this_cpu; | ||
5535 | int ret; | ||
5536 | |||
5537 | this_cpu = get_cpu(); | ||
5538 | min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; | ||
5539 | max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; | ||
5540 | |||
5541 | start_cycles = ia64_get_itc(); | ||
5542 | |||
5543 | ret = pfm_do_interrupt_handler(irq, arg, regs); | ||
5544 | |||
5545 | total_cycles = ia64_get_itc(); | ||
5546 | |||
5547 | /* | ||
5548 | * don't measure spurious interrupts | ||
5549 | */ | ||
5550 | if (likely(ret == 0)) { | ||
5551 | total_cycles -= start_cycles; | ||
5552 | |||
5553 | if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; | ||
5554 | if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; | ||
5555 | |||
5556 | pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; | ||
5557 | } | ||
5558 | put_cpu_no_resched(); | ||
5559 | return IRQ_HANDLED; | ||
5560 | } | ||
5561 | |||
5562 | /* | ||
5563 | * /proc/perfmon interface, for debug only | ||
5564 | */ | ||
5565 | |||
5566 | #define PFM_PROC_SHOW_HEADER ((void *)NR_CPUS+1) | ||
5567 | |||
5568 | static void * | ||
5569 | pfm_proc_start(struct seq_file *m, loff_t *pos) | ||
5570 | { | ||
5571 | if (*pos == 0) { | ||
5572 | return PFM_PROC_SHOW_HEADER; | ||
5573 | } | ||
5574 | |||
5575 | while (*pos <= NR_CPUS) { | ||
5576 | if (cpu_online(*pos - 1)) { | ||
5577 | return (void *)*pos; | ||
5578 | } | ||
5579 | ++*pos; | ||
5580 | } | ||
5581 | return NULL; | ||
5582 | } | ||
5583 | |||
5584 | static void * | ||
5585 | pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) | ||
5586 | { | ||
5587 | ++*pos; | ||
5588 | return pfm_proc_start(m, pos); | ||
5589 | } | ||
5590 | |||
5591 | static void | ||
5592 | pfm_proc_stop(struct seq_file *m, void *v) | ||
5593 | { | ||
5594 | } | ||
5595 | |||
5596 | static void | ||
5597 | pfm_proc_show_header(struct seq_file *m) | ||
5598 | { | ||
5599 | struct list_head * pos; | ||
5600 | pfm_buffer_fmt_t * entry; | ||
5601 | unsigned long flags; | ||
5602 | |||
5603 | seq_printf(m, | ||
5604 | "perfmon version : %u.%u\n" | ||
5605 | "model : %s\n" | ||
5606 | "fastctxsw : %s\n" | ||
5607 | "expert mode : %s\n" | ||
5608 | "ovfl_mask : 0x%lx\n" | ||
5609 | "PMU flags : 0x%x\n", | ||
5610 | PFM_VERSION_MAJ, PFM_VERSION_MIN, | ||
5611 | pmu_conf->pmu_name, | ||
5612 | pfm_sysctl.fastctxsw > 0 ? "Yes": "No", | ||
5613 | pfm_sysctl.expert_mode > 0 ? "Yes": "No", | ||
5614 | pmu_conf->ovfl_val, | ||
5615 | pmu_conf->flags); | ||
5616 | |||
5617 | LOCK_PFS(flags); | ||
5618 | |||
5619 | seq_printf(m, | ||
5620 | "proc_sessions : %u\n" | ||
5621 | "sys_sessions : %u\n" | ||
5622 | "sys_use_dbregs : %u\n" | ||
5623 | "ptrace_use_dbregs : %u\n", | ||
5624 | pfm_sessions.pfs_task_sessions, | ||
5625 | pfm_sessions.pfs_sys_sessions, | ||
5626 | pfm_sessions.pfs_sys_use_dbregs, | ||
5627 | pfm_sessions.pfs_ptrace_use_dbregs); | ||
5628 | |||
5629 | UNLOCK_PFS(flags); | ||
5630 | |||
5631 | spin_lock(&pfm_buffer_fmt_lock); | ||
5632 | |||
5633 | list_for_each(pos, &pfm_buffer_fmt_list) { | ||
5634 | entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); | ||
5635 | seq_printf(m, "format : %02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x-%02x %s\n", | ||
5636 | entry->fmt_uuid[0], | ||
5637 | entry->fmt_uuid[1], | ||
5638 | entry->fmt_uuid[2], | ||
5639 | entry->fmt_uuid[3], | ||
5640 | entry->fmt_uuid[4], | ||
5641 | entry->fmt_uuid[5], | ||
5642 | entry->fmt_uuid[6], | ||
5643 | entry->fmt_uuid[7], | ||
5644 | entry->fmt_uuid[8], | ||
5645 | entry->fmt_uuid[9], | ||
5646 | entry->fmt_uuid[10], | ||
5647 | entry->fmt_uuid[11], | ||
5648 | entry->fmt_uuid[12], | ||
5649 | entry->fmt_uuid[13], | ||
5650 | entry->fmt_uuid[14], | ||
5651 | entry->fmt_uuid[15], | ||
5652 | entry->fmt_name); | ||
5653 | } | ||
5654 | spin_unlock(&pfm_buffer_fmt_lock); | ||
5655 | |||
5656 | } | ||
5657 | |||
5658 | static int | ||
5659 | pfm_proc_show(struct seq_file *m, void *v) | ||
5660 | { | ||
5661 | unsigned long psr; | ||
5662 | unsigned int i; | ||
5663 | int cpu; | ||
5664 | |||
5665 | if (v == PFM_PROC_SHOW_HEADER) { | ||
5666 | pfm_proc_show_header(m); | ||
5667 | return 0; | ||
5668 | } | ||
5669 | |||
5670 | /* show info for CPU (v - 1) */ | ||
5671 | |||
5672 | cpu = (long)v - 1; | ||
5673 | seq_printf(m, | ||
5674 | "CPU%-2d overflow intrs : %lu\n" | ||
5675 | "CPU%-2d overflow cycles : %lu\n" | ||
5676 | "CPU%-2d overflow min : %lu\n" | ||
5677 | "CPU%-2d overflow max : %lu\n" | ||
5678 | "CPU%-2d smpl handler calls : %lu\n" | ||
5679 | "CPU%-2d smpl handler cycles : %lu\n" | ||
5680 | "CPU%-2d spurious intrs : %lu\n" | ||
5681 | "CPU%-2d replay intrs : %lu\n" | ||
5682 | "CPU%-2d syst_wide : %d\n" | ||
5683 | "CPU%-2d dcr_pp : %d\n" | ||
5684 | "CPU%-2d exclude idle : %d\n" | ||
5685 | "CPU%-2d owner : %d\n" | ||
5686 | "CPU%-2d context : %p\n" | ||
5687 | "CPU%-2d activations : %lu\n", | ||
5688 | cpu, pfm_stats[cpu].pfm_ovfl_intr_count, | ||
5689 | cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, | ||
5690 | cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, | ||
5691 | cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, | ||
5692 | cpu, pfm_stats[cpu].pfm_smpl_handler_calls, | ||
5693 | cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, | ||
5694 | cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, | ||
5695 | cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, | ||
5696 | cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, | ||
5697 | cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, | ||
5698 | cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, | ||
5699 | cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, | ||
5700 | cpu, pfm_get_cpu_data(pmu_ctx, cpu), | ||
5701 | cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); | ||
5702 | |||
5703 | if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { | ||
5704 | |||
5705 | psr = pfm_get_psr(); | ||
5706 | |||
5707 | ia64_srlz_d(); | ||
5708 | |||
5709 | seq_printf(m, | ||
5710 | "CPU%-2d psr : 0x%lx\n" | ||
5711 | "CPU%-2d pmc0 : 0x%lx\n", | ||
5712 | cpu, psr, | ||
5713 | cpu, ia64_get_pmc(0)); | ||
5714 | |||
5715 | for (i=0; PMC_IS_LAST(i) == 0; i++) { | ||
5716 | if (PMC_IS_COUNTING(i) == 0) continue; | ||
5717 | seq_printf(m, | ||
5718 | "CPU%-2d pmc%u : 0x%lx\n" | ||
5719 | "CPU%-2d pmd%u : 0x%lx\n", | ||
5720 | cpu, i, ia64_get_pmc(i), | ||
5721 | cpu, i, ia64_get_pmd(i)); | ||
5722 | } | ||
5723 | } | ||
5724 | return 0; | ||
5725 | } | ||
5726 | |||
5727 | struct seq_operations pfm_seq_ops = { | ||
5728 | .start = pfm_proc_start, | ||
5729 | .next = pfm_proc_next, | ||
5730 | .stop = pfm_proc_stop, | ||
5731 | .show = pfm_proc_show | ||
5732 | }; | ||
5733 | |||
5734 | static int | ||
5735 | pfm_proc_open(struct inode *inode, struct file *file) | ||
5736 | { | ||
5737 | return seq_open(file, &pfm_seq_ops); | ||
5738 | } | ||
5739 | |||
5740 | |||
5741 | /* | ||
5742 | * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens | ||
5743 | * during pfm_enable() hence before pfm_start(). We cannot assume monitoring | ||
5744 | * is active or inactive based on mode. We must rely on the value in | ||
5745 | * local_cpu_data->pfm_syst_info | ||
5746 | */ | ||
5747 | void | ||
5748 | pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) | ||
5749 | { | ||
5750 | struct pt_regs *regs; | ||
5751 | unsigned long dcr; | ||
5752 | unsigned long dcr_pp; | ||
5753 | |||
5754 | dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; | ||
5755 | |||
5756 | /* | ||
5757 | * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 | ||
5758 | * on every CPU, so we can rely on the pid to identify the idle task. | ||
5759 | */ | ||
5760 | if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { | ||
5761 | regs = ia64_task_regs(task); | ||
5762 | ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; | ||
5763 | return; | ||
5764 | } | ||
5765 | /* | ||
5766 | * if monitoring has started | ||
5767 | */ | ||
5768 | if (dcr_pp) { | ||
5769 | dcr = ia64_getreg(_IA64_REG_CR_DCR); | ||
5770 | /* | ||
5771 | * context switching in? | ||
5772 | */ | ||
5773 | if (is_ctxswin) { | ||
5774 | /* mask monitoring for the idle task */ | ||
5775 | ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); | ||
5776 | pfm_clear_psr_pp(); | ||
5777 | ia64_srlz_i(); | ||
5778 | return; | ||
5779 | } | ||
5780 | /* | ||
5781 | * context switching out | ||
5782 | * restore monitoring for next task | ||
5783 | * | ||
5784 | * Due to inlining this odd if-then-else construction generates | ||
5785 | * better code. | ||
5786 | */ | ||
5787 | ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); | ||
5788 | pfm_set_psr_pp(); | ||
5789 | ia64_srlz_i(); | ||
5790 | } | ||
5791 | } | ||
5792 | |||
5793 | #ifdef CONFIG_SMP | ||
5794 | |||
5795 | static void | ||
5796 | pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) | ||
5797 | { | ||
5798 | struct task_struct *task = ctx->ctx_task; | ||
5799 | |||
5800 | ia64_psr(regs)->up = 0; | ||
5801 | ia64_psr(regs)->sp = 1; | ||
5802 | |||
5803 | if (GET_PMU_OWNER() == task) { | ||
5804 | DPRINT(("cleared ownership for [%d]\n", ctx->ctx_task->pid)); | ||
5805 | SET_PMU_OWNER(NULL, NULL); | ||
5806 | } | ||
5807 | |||
5808 | /* | ||
5809 | * disconnect the task from the context and vice-versa | ||
5810 | */ | ||
5811 | PFM_SET_WORK_PENDING(task, 0); | ||
5812 | |||
5813 | task->thread.pfm_context = NULL; | ||
5814 | task->thread.flags &= ~IA64_THREAD_PM_VALID; | ||
5815 | |||
5816 | DPRINT(("force cleanup for [%d]\n", task->pid)); | ||
5817 | } | ||
5818 | |||
5819 | |||
5820 | /* | ||
5821 | * in 2.6, interrupts are masked when we come here and the runqueue lock is held | ||
5822 | */ | ||
5823 | void | ||
5824 | pfm_save_regs(struct task_struct *task) | ||
5825 | { | ||
5826 | pfm_context_t *ctx; | ||
5827 | struct thread_struct *t; | ||
5828 | unsigned long flags; | ||
5829 | u64 psr; | ||
5830 | |||
5831 | |||
5832 | ctx = PFM_GET_CTX(task); | ||
5833 | if (ctx == NULL) return; | ||
5834 | t = &task->thread; | ||
5835 | |||
5836 | /* | ||
5837 | * we always come here with interrupts ALREADY disabled by | ||
5838 | * the scheduler. So we simply need to protect against concurrent | ||
5839 | * access, not CPU concurrency. | ||
5840 | */ | ||
5841 | flags = pfm_protect_ctx_ctxsw(ctx); | ||
5842 | |||
5843 | if (ctx->ctx_state == PFM_CTX_ZOMBIE) { | ||
5844 | struct pt_regs *regs = ia64_task_regs(task); | ||
5845 | |||
5846 | pfm_clear_psr_up(); | ||
5847 | |||
5848 | pfm_force_cleanup(ctx, regs); | ||
5849 | |||
5850 | BUG_ON(ctx->ctx_smpl_hdr); | ||
5851 | |||
5852 | pfm_unprotect_ctx_ctxsw(ctx, flags); | ||
5853 | |||
5854 | pfm_context_free(ctx); | ||
5855 | return; | ||
5856 | } | ||
5857 | |||
5858 | /* | ||
5859 | * save current PSR: needed because we modify it | ||
5860 | */ | ||
5861 | ia64_srlz_d(); | ||
5862 | psr = pfm_get_psr(); | ||
5863 | |||
5864 | BUG_ON(psr & (IA64_PSR_I)); | ||
5865 | |||
5866 | /* | ||
5867 | * stop monitoring: | ||
5868 | * This is the last instruction which may generate an overflow | ||
5869 | * | ||
5870 | * We do not need to set psr.sp because, it is irrelevant in kernel. | ||
5871 | * It will be restored from ipsr when going back to user level | ||
5872 | */ | ||
5873 | pfm_clear_psr_up(); | ||
5874 | |||
5875 | /* | ||
5876 | * keep a copy of psr.up (for reload) | ||
5877 | */ | ||
5878 | ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; | ||
5879 | |||
5880 | /* | ||
5881 | * release ownership of this PMU. | ||
5882 | * PM interrupts are masked, so nothing | ||
5883 | * can happen. | ||
5884 | */ | ||
5885 | SET_PMU_OWNER(NULL, NULL); | ||
5886 | |||
5887 | /* | ||
5888 | * we systematically save the PMD as we have no | ||
5889 | * guarantee we will be schedule at that same | ||
5890 | * CPU again. | ||
5891 | */ | ||
5892 | pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); | ||
5893 | |||
5894 | /* | ||
5895 | * save pmc0 ia64_srlz_d() done in pfm_save_pmds() | ||
5896 | * we will need it on the restore path to check | ||
5897 | * for pending overflow. | ||
5898 | */ | ||
5899 | t->pmcs[0] = ia64_get_pmc(0); | ||
5900 | |||
5901 | /* | ||
5902 | * unfreeze PMU if had pending overflows | ||
5903 | */ | ||
5904 | if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); | ||
5905 | |||
5906 | /* | ||
5907 | * finally, allow context access. | ||
5908 | * interrupts will still be masked after this call. | ||
5909 | */ | ||
5910 | pfm_unprotect_ctx_ctxsw(ctx, flags); | ||
5911 | } | ||
5912 | |||
5913 | #else /* !CONFIG_SMP */ | ||
5914 | void | ||
5915 | pfm_save_regs(struct task_struct *task) | ||
5916 | { | ||
5917 | pfm_context_t *ctx; | ||
5918 | u64 psr; | ||
5919 | |||
5920 | ctx = PFM_GET_CTX(task); | ||
5921 | if (ctx == NULL) return; | ||
5922 | |||
5923 | /* | ||
5924 | * save current PSR: needed because we modify it | ||
5925 | */ | ||
5926 | psr = pfm_get_psr(); | ||
5927 | |||
5928 | BUG_ON(psr & (IA64_PSR_I)); | ||
5929 | |||
5930 | /* | ||
5931 | * stop monitoring: | ||
5932 | * This is the last instruction which may generate an overflow | ||
5933 | * | ||
5934 | * We do not need to set psr.sp because, it is irrelevant in kernel. | ||
5935 | * It will be restored from ipsr when going back to user level | ||
5936 | */ | ||
5937 | pfm_clear_psr_up(); | ||
5938 | |||
5939 | /* | ||
5940 | * keep a copy of psr.up (for reload) | ||
5941 | */ | ||
5942 | ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; | ||
5943 | } | ||
5944 | |||
5945 | static void | ||
5946 | pfm_lazy_save_regs (struct task_struct *task) | ||
5947 | { | ||
5948 | pfm_context_t *ctx; | ||
5949 | struct thread_struct *t; | ||
5950 | unsigned long flags; | ||
5951 | |||
5952 | { u64 psr = pfm_get_psr(); | ||
5953 | BUG_ON(psr & IA64_PSR_UP); | ||
5954 | } | ||
5955 | |||
5956 | ctx = PFM_GET_CTX(task); | ||
5957 | t = &task->thread; | ||
5958 | |||
5959 | /* | ||
5960 | * we need to mask PMU overflow here to | ||
5961 | * make sure that we maintain pmc0 until | ||
5962 | * we save it. overflow interrupts are | ||
5963 | * treated as spurious if there is no | ||
5964 | * owner. | ||
5965 | * | ||
5966 | * XXX: I don't think this is necessary | ||
5967 | */ | ||
5968 | PROTECT_CTX(ctx,flags); | ||
5969 | |||
5970 | /* | ||
5971 | * release ownership of this PMU. | ||
5972 | * must be done before we save the registers. | ||
5973 | * | ||
5974 | * after this call any PMU interrupt is treated | ||
5975 | * as spurious. | ||
5976 | */ | ||
5977 | SET_PMU_OWNER(NULL, NULL); | ||
5978 | |||
5979 | /* | ||
5980 | * save all the pmds we use | ||
5981 | */ | ||
5982 | pfm_save_pmds(t->pmds, ctx->ctx_used_pmds[0]); | ||
5983 | |||
5984 | /* | ||
5985 | * save pmc0 ia64_srlz_d() done in pfm_save_pmds() | ||
5986 | * it is needed to check for pended overflow | ||
5987 | * on the restore path | ||
5988 | */ | ||
5989 | t->pmcs[0] = ia64_get_pmc(0); | ||
5990 | |||
5991 | /* | ||
5992 | * unfreeze PMU if had pending overflows | ||
5993 | */ | ||
5994 | if (t->pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); | ||
5995 | |||
5996 | /* | ||
5997 | * now get can unmask PMU interrupts, they will | ||
5998 | * be treated as purely spurious and we will not | ||
5999 | * lose any information | ||
6000 | */ | ||
6001 | UNPROTECT_CTX(ctx,flags); | ||
6002 | } | ||
6003 | #endif /* CONFIG_SMP */ | ||
6004 | |||
6005 | #ifdef CONFIG_SMP | ||
6006 | /* | ||
6007 | * in 2.6, interrupts are masked when we come here and the runqueue lock is held | ||
6008 | */ | ||
6009 | void | ||
6010 | pfm_load_regs (struct task_struct *task) | ||
6011 | { | ||
6012 | pfm_context_t *ctx; | ||
6013 | struct thread_struct *t; | ||
6014 | unsigned long pmc_mask = 0UL, pmd_mask = 0UL; | ||
6015 | unsigned long flags; | ||
6016 | u64 psr, psr_up; | ||
6017 | int need_irq_resend; | ||
6018 | |||
6019 | ctx = PFM_GET_CTX(task); | ||
6020 | if (unlikely(ctx == NULL)) return; | ||
6021 | |||
6022 | BUG_ON(GET_PMU_OWNER()); | ||
6023 | |||
6024 | t = &task->thread; | ||
6025 | /* | ||
6026 | * possible on unload | ||
6027 | */ | ||
6028 | if (unlikely((t->flags & IA64_THREAD_PM_VALID) == 0)) return; | ||
6029 | |||
6030 | /* | ||
6031 | * we always come here with interrupts ALREADY disabled by | ||
6032 | * the scheduler. So we simply need to protect against concurrent | ||
6033 | * access, not CPU concurrency. | ||
6034 | */ | ||
6035 | flags = pfm_protect_ctx_ctxsw(ctx); | ||
6036 | psr = pfm_get_psr(); | ||
6037 | |||
6038 | need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; | ||
6039 | |||
6040 | BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); | ||
6041 | BUG_ON(psr & IA64_PSR_I); | ||
6042 | |||
6043 | if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { | ||
6044 | struct pt_regs *regs = ia64_task_regs(task); | ||
6045 | |||
6046 | BUG_ON(ctx->ctx_smpl_hdr); | ||
6047 | |||
6048 | pfm_force_cleanup(ctx, regs); | ||
6049 | |||
6050 | pfm_unprotect_ctx_ctxsw(ctx, flags); | ||
6051 | |||
6052 | /* | ||
6053 | * this one (kmalloc'ed) is fine with interrupts disabled | ||
6054 | */ | ||
6055 | pfm_context_free(ctx); | ||
6056 | |||
6057 | return; | ||
6058 | } | ||
6059 | |||
6060 | /* | ||
6061 | * we restore ALL the debug registers to avoid picking up | ||
6062 | * stale state. | ||
6063 | */ | ||
6064 | if (ctx->ctx_fl_using_dbreg) { | ||
6065 | pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); | ||
6066 | pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); | ||
6067 | } | ||
6068 | /* | ||
6069 | * retrieve saved psr.up | ||
6070 | */ | ||
6071 | psr_up = ctx->ctx_saved_psr_up; | ||
6072 | |||
6073 | /* | ||
6074 | * if we were the last user of the PMU on that CPU, | ||
6075 | * then nothing to do except restore psr | ||
6076 | */ | ||
6077 | if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { | ||
6078 | |||
6079 | /* | ||
6080 | * retrieve partial reload masks (due to user modifications) | ||
6081 | */ | ||
6082 | pmc_mask = ctx->ctx_reload_pmcs[0]; | ||
6083 | pmd_mask = ctx->ctx_reload_pmds[0]; | ||
6084 | |||
6085 | } else { | ||
6086 | /* | ||
6087 | * To avoid leaking information to the user level when psr.sp=0, | ||
6088 | * we must reload ALL implemented pmds (even the ones we don't use). | ||
6089 | * In the kernel we only allow PFM_READ_PMDS on registers which | ||
6090 | * we initialized or requested (sampling) so there is no risk there. | ||
6091 | */ | ||
6092 | pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; | ||
6093 | |||
6094 | /* | ||
6095 | * ALL accessible PMCs are systematically reloaded, unused registers | ||
6096 | * get their default (from pfm_reset_pmu_state()) values to avoid picking | ||
6097 | * up stale configuration. | ||
6098 | * | ||
6099 | * PMC0 is never in the mask. It is always restored separately. | ||
6100 | */ | ||
6101 | pmc_mask = ctx->ctx_all_pmcs[0]; | ||
6102 | } | ||
6103 | /* | ||
6104 | * when context is MASKED, we will restore PMC with plm=0 | ||
6105 | * and PMD with stale information, but that's ok, nothing | ||
6106 | * will be captured. | ||
6107 | * | ||
6108 | * XXX: optimize here | ||
6109 | */ | ||
6110 | if (pmd_mask) pfm_restore_pmds(t->pmds, pmd_mask); | ||
6111 | if (pmc_mask) pfm_restore_pmcs(t->pmcs, pmc_mask); | ||
6112 | |||
6113 | /* | ||
6114 | * check for pending overflow at the time the state | ||
6115 | * was saved. | ||
6116 | */ | ||
6117 | if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { | ||
6118 | /* | ||
6119 | * reload pmc0 with the overflow information | ||
6120 | * On McKinley PMU, this will trigger a PMU interrupt | ||
6121 | */ | ||
6122 | ia64_set_pmc(0, t->pmcs[0]); | ||
6123 | ia64_srlz_d(); | ||
6124 | t->pmcs[0] = 0UL; | ||
6125 | |||
6126 | /* | ||
6127 | * will replay the PMU interrupt | ||
6128 | */ | ||
6129 | if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); | ||
6130 | |||
6131 | pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; | ||
6132 | } | ||
6133 | |||
6134 | /* | ||
6135 | * we just did a reload, so we reset the partial reload fields | ||
6136 | */ | ||
6137 | ctx->ctx_reload_pmcs[0] = 0UL; | ||
6138 | ctx->ctx_reload_pmds[0] = 0UL; | ||
6139 | |||
6140 | SET_LAST_CPU(ctx, smp_processor_id()); | ||
6141 | |||
6142 | /* | ||
6143 | * dump activation value for this PMU | ||
6144 | */ | ||
6145 | INC_ACTIVATION(); | ||
6146 | /* | ||
6147 | * record current activation for this context | ||
6148 | */ | ||
6149 | SET_ACTIVATION(ctx); | ||
6150 | |||
6151 | /* | ||
6152 | * establish new ownership. | ||
6153 | */ | ||
6154 | SET_PMU_OWNER(task, ctx); | ||
6155 | |||
6156 | /* | ||
6157 | * restore the psr.up bit. measurement | ||
6158 | * is active again. | ||
6159 | * no PMU interrupt can happen at this point | ||
6160 | * because we still have interrupts disabled. | ||
6161 | */ | ||
6162 | if (likely(psr_up)) pfm_set_psr_up(); | ||
6163 | |||
6164 | /* | ||
6165 | * allow concurrent access to context | ||
6166 | */ | ||
6167 | pfm_unprotect_ctx_ctxsw(ctx, flags); | ||
6168 | } | ||
6169 | #else /* !CONFIG_SMP */ | ||
6170 | /* | ||
6171 | * reload PMU state for UP kernels | ||
6172 | * in 2.5 we come here with interrupts disabled | ||
6173 | */ | ||
6174 | void | ||
6175 | pfm_load_regs (struct task_struct *task) | ||
6176 | { | ||
6177 | struct thread_struct *t; | ||
6178 | pfm_context_t *ctx; | ||
6179 | struct task_struct *owner; | ||
6180 | unsigned long pmd_mask, pmc_mask; | ||
6181 | u64 psr, psr_up; | ||
6182 | int need_irq_resend; | ||
6183 | |||
6184 | owner = GET_PMU_OWNER(); | ||
6185 | ctx = PFM_GET_CTX(task); | ||
6186 | t = &task->thread; | ||
6187 | psr = pfm_get_psr(); | ||
6188 | |||
6189 | BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); | ||
6190 | BUG_ON(psr & IA64_PSR_I); | ||
6191 | |||
6192 | /* | ||
6193 | * we restore ALL the debug registers to avoid picking up | ||
6194 | * stale state. | ||
6195 | * | ||
6196 | * This must be done even when the task is still the owner | ||
6197 | * as the registers may have been modified via ptrace() | ||
6198 | * (not perfmon) by the previous task. | ||
6199 | */ | ||
6200 | if (ctx->ctx_fl_using_dbreg) { | ||
6201 | pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); | ||
6202 | pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); | ||
6203 | } | ||
6204 | |||
6205 | /* | ||
6206 | * retrieved saved psr.up | ||
6207 | */ | ||
6208 | psr_up = ctx->ctx_saved_psr_up; | ||
6209 | need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; | ||
6210 | |||
6211 | /* | ||
6212 | * short path, our state is still there, just | ||
6213 | * need to restore psr and we go | ||
6214 | * | ||
6215 | * we do not touch either PMC nor PMD. the psr is not touched | ||
6216 | * by the overflow_handler. So we are safe w.r.t. to interrupt | ||
6217 | * concurrency even without interrupt masking. | ||
6218 | */ | ||
6219 | if (likely(owner == task)) { | ||
6220 | if (likely(psr_up)) pfm_set_psr_up(); | ||
6221 | return; | ||
6222 | } | ||
6223 | |||
6224 | /* | ||
6225 | * someone else is still using the PMU, first push it out and | ||
6226 | * then we'll be able to install our stuff ! | ||
6227 | * | ||
6228 | * Upon return, there will be no owner for the current PMU | ||
6229 | */ | ||
6230 | if (owner) pfm_lazy_save_regs(owner); | ||
6231 | |||
6232 | /* | ||
6233 | * To avoid leaking information to the user level when psr.sp=0, | ||
6234 | * we must reload ALL implemented pmds (even the ones we don't use). | ||
6235 | * In the kernel we only allow PFM_READ_PMDS on registers which | ||
6236 | * we initialized or requested (sampling) so there is no risk there. | ||
6237 | */ | ||
6238 | pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; | ||
6239 | |||
6240 | /* | ||
6241 | * ALL accessible PMCs are systematically reloaded, unused registers | ||
6242 | * get their default (from pfm_reset_pmu_state()) values to avoid picking | ||
6243 | * up stale configuration. | ||
6244 | * | ||
6245 | * PMC0 is never in the mask. It is always restored separately | ||
6246 | */ | ||
6247 | pmc_mask = ctx->ctx_all_pmcs[0]; | ||
6248 | |||
6249 | pfm_restore_pmds(t->pmds, pmd_mask); | ||
6250 | pfm_restore_pmcs(t->pmcs, pmc_mask); | ||
6251 | |||
6252 | /* | ||
6253 | * check for pending overflow at the time the state | ||
6254 | * was saved. | ||
6255 | */ | ||
6256 | if (unlikely(PMC0_HAS_OVFL(t->pmcs[0]))) { | ||
6257 | /* | ||
6258 | * reload pmc0 with the overflow information | ||
6259 | * On McKinley PMU, this will trigger a PMU interrupt | ||
6260 | */ | ||
6261 | ia64_set_pmc(0, t->pmcs[0]); | ||
6262 | ia64_srlz_d(); | ||
6263 | |||
6264 | t->pmcs[0] = 0UL; | ||
6265 | |||
6266 | /* | ||
6267 | * will replay the PMU interrupt | ||
6268 | */ | ||
6269 | if (need_irq_resend) hw_resend_irq(NULL, IA64_PERFMON_VECTOR); | ||
6270 | |||
6271 | pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; | ||
6272 | } | ||
6273 | |||
6274 | /* | ||
6275 | * establish new ownership. | ||
6276 | */ | ||
6277 | SET_PMU_OWNER(task, ctx); | ||
6278 | |||
6279 | /* | ||
6280 | * restore the psr.up bit. measurement | ||
6281 | * is active again. | ||
6282 | * no PMU interrupt can happen at this point | ||
6283 | * because we still have interrupts disabled. | ||
6284 | */ | ||
6285 | if (likely(psr_up)) pfm_set_psr_up(); | ||
6286 | } | ||
6287 | #endif /* CONFIG_SMP */ | ||
6288 | |||
6289 | /* | ||
6290 | * this function assumes monitoring is stopped | ||
6291 | */ | ||
6292 | static void | ||
6293 | pfm_flush_pmds(struct task_struct *task, pfm_context_t *ctx) | ||
6294 | { | ||
6295 | u64 pmc0; | ||
6296 | unsigned long mask2, val, pmd_val, ovfl_val; | ||
6297 | int i, can_access_pmu = 0; | ||
6298 | int is_self; | ||
6299 | |||
6300 | /* | ||
6301 | * is the caller the task being monitored (or which initiated the | ||
6302 | * session for system wide measurements) | ||
6303 | */ | ||
6304 | is_self = ctx->ctx_task == task ? 1 : 0; | ||
6305 | |||
6306 | /* | ||
6307 | * can access PMU is task is the owner of the PMU state on the current CPU | ||
6308 | * or if we are running on the CPU bound to the context in system-wide mode | ||
6309 | * (that is not necessarily the task the context is attached to in this mode). | ||
6310 | * In system-wide we always have can_access_pmu true because a task running on an | ||
6311 | * invalid processor is flagged earlier in the call stack (see pfm_stop). | ||
6312 | */ | ||
6313 | can_access_pmu = (GET_PMU_OWNER() == task) || (ctx->ctx_fl_system && ctx->ctx_cpu == smp_processor_id()); | ||
6314 | if (can_access_pmu) { | ||
6315 | /* | ||
6316 | * Mark the PMU as not owned | ||
6317 | * This will cause the interrupt handler to do nothing in case an overflow | ||
6318 | * interrupt was in-flight | ||
6319 | * This also guarantees that pmc0 will contain the final state | ||
6320 | * It virtually gives us full control on overflow processing from that point | ||
6321 | * on. | ||
6322 | */ | ||
6323 | SET_PMU_OWNER(NULL, NULL); | ||
6324 | DPRINT(("releasing ownership\n")); | ||
6325 | |||
6326 | /* | ||
6327 | * read current overflow status: | ||
6328 | * | ||
6329 | * we are guaranteed to read the final stable state | ||
6330 | */ | ||
6331 | ia64_srlz_d(); | ||
6332 | pmc0 = ia64_get_pmc(0); /* slow */ | ||
6333 | |||
6334 | /* | ||
6335 | * reset freeze bit, overflow status information destroyed | ||
6336 | */ | ||
6337 | pfm_unfreeze_pmu(); | ||
6338 | } else { | ||
6339 | pmc0 = task->thread.pmcs[0]; | ||
6340 | /* | ||
6341 | * clear whatever overflow status bits there were | ||
6342 | */ | ||
6343 | task->thread.pmcs[0] = 0; | ||
6344 | } | ||
6345 | ovfl_val = pmu_conf->ovfl_val; | ||
6346 | /* | ||
6347 | * we save all the used pmds | ||
6348 | * we take care of overflows for counting PMDs | ||
6349 | * | ||
6350 | * XXX: sampling situation is not taken into account here | ||
6351 | */ | ||
6352 | mask2 = ctx->ctx_used_pmds[0]; | ||
6353 | |||
6354 | DPRINT(("is_self=%d ovfl_val=0x%lx mask2=0x%lx\n", is_self, ovfl_val, mask2)); | ||
6355 | |||
6356 | for (i = 0; mask2; i++, mask2>>=1) { | ||
6357 | |||
6358 | /* skip non used pmds */ | ||
6359 | if ((mask2 & 0x1) == 0) continue; | ||
6360 | |||
6361 | /* | ||
6362 | * can access PMU always true in system wide mode | ||
6363 | */ | ||
6364 | val = pmd_val = can_access_pmu ? ia64_get_pmd(i) : task->thread.pmds[i]; | ||
6365 | |||
6366 | if (PMD_IS_COUNTING(i)) { | ||
6367 | DPRINT(("[%d] pmd[%d] ctx_pmd=0x%lx hw_pmd=0x%lx\n", | ||
6368 | task->pid, | ||
6369 | i, | ||
6370 | ctx->ctx_pmds[i].val, | ||
6371 | val & ovfl_val)); | ||
6372 | |||
6373 | /* | ||
6374 | * we rebuild the full 64 bit value of the counter | ||
6375 | */ | ||
6376 | val = ctx->ctx_pmds[i].val + (val & ovfl_val); | ||
6377 | |||
6378 | /* | ||
6379 | * now everything is in ctx_pmds[] and we need | ||
6380 | * to clear the saved context from save_regs() such that | ||
6381 | * pfm_read_pmds() gets the correct value | ||
6382 | */ | ||
6383 | pmd_val = 0UL; | ||
6384 | |||
6385 | /* | ||
6386 | * take care of overflow inline | ||
6387 | */ | ||
6388 | if (pmc0 & (1UL << i)) { | ||
6389 | val += 1 + ovfl_val; | ||
6390 | DPRINT(("[%d] pmd[%d] overflowed\n", task->pid, i)); | ||
6391 | } | ||
6392 | } | ||
6393 | |||
6394 | DPRINT(("[%d] ctx_pmd[%d]=0x%lx pmd_val=0x%lx\n", task->pid, i, val, pmd_val)); | ||
6395 | |||
6396 | if (is_self) task->thread.pmds[i] = pmd_val; | ||
6397 | |||
6398 | ctx->ctx_pmds[i].val = val; | ||
6399 | } | ||
6400 | } | ||
6401 | |||
6402 | static struct irqaction perfmon_irqaction = { | ||
6403 | .handler = pfm_interrupt_handler, | ||
6404 | .flags = SA_INTERRUPT, | ||
6405 | .name = "perfmon" | ||
6406 | }; | ||
6407 | |||
6408 | /* | ||
6409 | * perfmon initialization routine, called from the initcall() table | ||
6410 | */ | ||
6411 | static int init_pfm_fs(void); | ||
6412 | |||
6413 | static int __init | ||
6414 | pfm_probe_pmu(void) | ||
6415 | { | ||
6416 | pmu_config_t **p; | ||
6417 | int family; | ||
6418 | |||
6419 | family = local_cpu_data->family; | ||
6420 | p = pmu_confs; | ||
6421 | |||
6422 | while(*p) { | ||
6423 | if ((*p)->probe) { | ||
6424 | if ((*p)->probe() == 0) goto found; | ||
6425 | } else if ((*p)->pmu_family == family || (*p)->pmu_family == 0xff) { | ||
6426 | goto found; | ||
6427 | } | ||
6428 | p++; | ||
6429 | } | ||
6430 | return -1; | ||
6431 | found: | ||
6432 | pmu_conf = *p; | ||
6433 | return 0; | ||
6434 | } | ||
6435 | |||
6436 | static struct file_operations pfm_proc_fops = { | ||
6437 | .open = pfm_proc_open, | ||
6438 | .read = seq_read, | ||
6439 | .llseek = seq_lseek, | ||
6440 | .release = seq_release, | ||
6441 | }; | ||
6442 | |||
6443 | int __init | ||
6444 | pfm_init(void) | ||
6445 | { | ||
6446 | unsigned int n, n_counters, i; | ||
6447 | |||
6448 | printk("perfmon: version %u.%u IRQ %u\n", | ||
6449 | PFM_VERSION_MAJ, | ||
6450 | PFM_VERSION_MIN, | ||
6451 | IA64_PERFMON_VECTOR); | ||
6452 | |||
6453 | if (pfm_probe_pmu()) { | ||
6454 | printk(KERN_INFO "perfmon: disabled, there is no support for processor family %d\n", | ||
6455 | local_cpu_data->family); | ||
6456 | return -ENODEV; | ||
6457 | } | ||
6458 | |||
6459 | /* | ||
6460 | * compute the number of implemented PMD/PMC from the | ||
6461 | * description tables | ||
6462 | */ | ||
6463 | n = 0; | ||
6464 | for (i=0; PMC_IS_LAST(i) == 0; i++) { | ||
6465 | if (PMC_IS_IMPL(i) == 0) continue; | ||
6466 | pmu_conf->impl_pmcs[i>>6] |= 1UL << (i&63); | ||
6467 | n++; | ||
6468 | } | ||
6469 | pmu_conf->num_pmcs = n; | ||
6470 | |||
6471 | n = 0; n_counters = 0; | ||
6472 | for (i=0; PMD_IS_LAST(i) == 0; i++) { | ||
6473 | if (PMD_IS_IMPL(i) == 0) continue; | ||
6474 | pmu_conf->impl_pmds[i>>6] |= 1UL << (i&63); | ||
6475 | n++; | ||
6476 | if (PMD_IS_COUNTING(i)) n_counters++; | ||
6477 | } | ||
6478 | pmu_conf->num_pmds = n; | ||
6479 | pmu_conf->num_counters = n_counters; | ||
6480 | |||
6481 | /* | ||
6482 | * sanity checks on the number of debug registers | ||
6483 | */ | ||
6484 | if (pmu_conf->use_rr_dbregs) { | ||
6485 | if (pmu_conf->num_ibrs > IA64_NUM_DBG_REGS) { | ||
6486 | printk(KERN_INFO "perfmon: unsupported number of code debug registers (%u)\n", pmu_conf->num_ibrs); | ||
6487 | pmu_conf = NULL; | ||
6488 | return -1; | ||
6489 | } | ||
6490 | if (pmu_conf->num_dbrs > IA64_NUM_DBG_REGS) { | ||
6491 | printk(KERN_INFO "perfmon: unsupported number of data debug registers (%u)\n", pmu_conf->num_ibrs); | ||
6492 | pmu_conf = NULL; | ||
6493 | return -1; | ||
6494 | } | ||
6495 | } | ||
6496 | |||
6497 | printk("perfmon: %s PMU detected, %u PMCs, %u PMDs, %u counters (%lu bits)\n", | ||
6498 | pmu_conf->pmu_name, | ||
6499 | pmu_conf->num_pmcs, | ||
6500 | pmu_conf->num_pmds, | ||
6501 | pmu_conf->num_counters, | ||
6502 | ffz(pmu_conf->ovfl_val)); | ||
6503 | |||
6504 | /* sanity check */ | ||
6505 | if (pmu_conf->num_pmds >= IA64_NUM_PMD_REGS || pmu_conf->num_pmcs >= IA64_NUM_PMC_REGS) { | ||
6506 | printk(KERN_ERR "perfmon: not enough pmc/pmd, perfmon disabled\n"); | ||
6507 | pmu_conf = NULL; | ||
6508 | return -1; | ||
6509 | } | ||
6510 | |||
6511 | /* | ||
6512 | * create /proc/perfmon (mostly for debugging purposes) | ||
6513 | */ | ||
6514 | perfmon_dir = create_proc_entry("perfmon", S_IRUGO, NULL); | ||
6515 | if (perfmon_dir == NULL) { | ||
6516 | printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); | ||
6517 | pmu_conf = NULL; | ||
6518 | return -1; | ||
6519 | } | ||
6520 | /* | ||
6521 | * install customized file operations for /proc/perfmon entry | ||
6522 | */ | ||
6523 | perfmon_dir->proc_fops = &pfm_proc_fops; | ||
6524 | |||
6525 | /* | ||
6526 | * create /proc/sys/kernel/perfmon (for debugging purposes) | ||
6527 | */ | ||
6528 | pfm_sysctl_header = register_sysctl_table(pfm_sysctl_root, 0); | ||
6529 | |||
6530 | /* | ||
6531 | * initialize all our spinlocks | ||
6532 | */ | ||
6533 | spin_lock_init(&pfm_sessions.pfs_lock); | ||
6534 | spin_lock_init(&pfm_buffer_fmt_lock); | ||
6535 | |||
6536 | init_pfm_fs(); | ||
6537 | |||
6538 | for(i=0; i < NR_CPUS; i++) pfm_stats[i].pfm_ovfl_intr_cycles_min = ~0UL; | ||
6539 | |||
6540 | return 0; | ||
6541 | } | ||
6542 | |||
6543 | __initcall(pfm_init); | ||
6544 | |||
6545 | /* | ||
6546 | * this function is called before pfm_init() | ||
6547 | */ | ||
6548 | void | ||
6549 | pfm_init_percpu (void) | ||
6550 | { | ||
6551 | /* | ||
6552 | * make sure no measurement is active | ||
6553 | * (may inherit programmed PMCs from EFI). | ||
6554 | */ | ||
6555 | pfm_clear_psr_pp(); | ||
6556 | pfm_clear_psr_up(); | ||
6557 | |||
6558 | /* | ||
6559 | * we run with the PMU not frozen at all times | ||
6560 | */ | ||
6561 | pfm_unfreeze_pmu(); | ||
6562 | |||
6563 | if (smp_processor_id() == 0) | ||
6564 | register_percpu_irq(IA64_PERFMON_VECTOR, &perfmon_irqaction); | ||
6565 | |||
6566 | ia64_setreg(_IA64_REG_CR_PMV, IA64_PERFMON_VECTOR); | ||
6567 | ia64_srlz_d(); | ||
6568 | } | ||
6569 | |||
6570 | /* | ||
6571 | * used for debug purposes only | ||
6572 | */ | ||
6573 | void | ||
6574 | dump_pmu_state(const char *from) | ||
6575 | { | ||
6576 | struct task_struct *task; | ||
6577 | struct thread_struct *t; | ||
6578 | struct pt_regs *regs; | ||
6579 | pfm_context_t *ctx; | ||
6580 | unsigned long psr, dcr, info, flags; | ||
6581 | int i, this_cpu; | ||
6582 | |||
6583 | local_irq_save(flags); | ||
6584 | |||
6585 | this_cpu = smp_processor_id(); | ||
6586 | regs = ia64_task_regs(current); | ||
6587 | info = PFM_CPUINFO_GET(); | ||
6588 | dcr = ia64_getreg(_IA64_REG_CR_DCR); | ||
6589 | |||
6590 | if (info == 0 && ia64_psr(regs)->pp == 0 && (dcr & IA64_DCR_PP) == 0) { | ||
6591 | local_irq_restore(flags); | ||
6592 | return; | ||
6593 | } | ||
6594 | |||
6595 | printk("CPU%d from %s() current [%d] iip=0x%lx %s\n", | ||
6596 | this_cpu, | ||
6597 | from, | ||
6598 | current->pid, | ||
6599 | regs->cr_iip, | ||
6600 | current->comm); | ||
6601 | |||
6602 | task = GET_PMU_OWNER(); | ||
6603 | ctx = GET_PMU_CTX(); | ||
6604 | |||
6605 | printk("->CPU%d owner [%d] ctx=%p\n", this_cpu, task ? task->pid : -1, ctx); | ||
6606 | |||
6607 | psr = pfm_get_psr(); | ||
6608 | |||
6609 | printk("->CPU%d pmc0=0x%lx psr.pp=%d psr.up=%d dcr.pp=%d syst_info=0x%lx user_psr.up=%d user_psr.pp=%d\n", | ||
6610 | this_cpu, | ||
6611 | ia64_get_pmc(0), | ||
6612 | psr & IA64_PSR_PP ? 1 : 0, | ||
6613 | psr & IA64_PSR_UP ? 1 : 0, | ||
6614 | dcr & IA64_DCR_PP ? 1 : 0, | ||
6615 | info, | ||
6616 | ia64_psr(regs)->up, | ||
6617 | ia64_psr(regs)->pp); | ||
6618 | |||
6619 | ia64_psr(regs)->up = 0; | ||
6620 | ia64_psr(regs)->pp = 0; | ||
6621 | |||
6622 | t = ¤t->thread; | ||
6623 | |||
6624 | for (i=1; PMC_IS_LAST(i) == 0; i++) { | ||
6625 | if (PMC_IS_IMPL(i) == 0) continue; | ||
6626 | printk("->CPU%d pmc[%d]=0x%lx thread_pmc[%d]=0x%lx\n", this_cpu, i, ia64_get_pmc(i), i, t->pmcs[i]); | ||
6627 | } | ||
6628 | |||
6629 | for (i=1; PMD_IS_LAST(i) == 0; i++) { | ||
6630 | if (PMD_IS_IMPL(i) == 0) continue; | ||
6631 | printk("->CPU%d pmd[%d]=0x%lx thread_pmd[%d]=0x%lx\n", this_cpu, i, ia64_get_pmd(i), i, t->pmds[i]); | ||
6632 | } | ||
6633 | |||
6634 | if (ctx) { | ||
6635 | printk("->CPU%d ctx_state=%d vaddr=%p addr=%p fd=%d ctx_task=[%d] saved_psr_up=0x%lx\n", | ||
6636 | this_cpu, | ||
6637 | ctx->ctx_state, | ||
6638 | ctx->ctx_smpl_vaddr, | ||
6639 | ctx->ctx_smpl_hdr, | ||
6640 | ctx->ctx_msgq_head, | ||
6641 | ctx->ctx_msgq_tail, | ||
6642 | ctx->ctx_saved_psr_up); | ||
6643 | } | ||
6644 | local_irq_restore(flags); | ||
6645 | } | ||
6646 | |||
6647 | /* | ||
6648 | * called from process.c:copy_thread(). task is new child. | ||
6649 | */ | ||
6650 | void | ||
6651 | pfm_inherit(struct task_struct *task, struct pt_regs *regs) | ||
6652 | { | ||
6653 | struct thread_struct *thread; | ||
6654 | |||
6655 | DPRINT(("perfmon: pfm_inherit clearing state for [%d]\n", task->pid)); | ||
6656 | |||
6657 | thread = &task->thread; | ||
6658 | |||
6659 | /* | ||
6660 | * cut links inherited from parent (current) | ||
6661 | */ | ||
6662 | thread->pfm_context = NULL; | ||
6663 | |||
6664 | PFM_SET_WORK_PENDING(task, 0); | ||
6665 | |||
6666 | /* | ||
6667 | * the psr bits are already set properly in copy_threads() | ||
6668 | */ | ||
6669 | } | ||
6670 | #else /* !CONFIG_PERFMON */ | ||
6671 | asmlinkage long | ||
6672 | sys_perfmonctl (int fd, int cmd, void *arg, int count) | ||
6673 | { | ||
6674 | return -ENOSYS; | ||
6675 | } | ||
6676 | #endif /* CONFIG_PERFMON */ | ||
diff --git a/arch/ia64/kernel/perfmon_default_smpl.c b/arch/ia64/kernel/perfmon_default_smpl.c new file mode 100644 index 000000000000..965d29004555 --- /dev/null +++ b/arch/ia64/kernel/perfmon_default_smpl.c | |||
@@ -0,0 +1,306 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2002-2003 Hewlett-Packard Co | ||
3 | * Stephane Eranian <eranian@hpl.hp.com> | ||
4 | * | ||
5 | * This file implements the default sampling buffer format | ||
6 | * for the Linux/ia64 perfmon-2 subsystem. | ||
7 | */ | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/types.h> | ||
10 | #include <linux/module.h> | ||
11 | #include <linux/config.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <asm/delay.h> | ||
14 | #include <linux/smp.h> | ||
15 | |||
16 | #include <asm/perfmon.h> | ||
17 | #include <asm/perfmon_default_smpl.h> | ||
18 | |||
19 | MODULE_AUTHOR("Stephane Eranian <eranian@hpl.hp.com>"); | ||
20 | MODULE_DESCRIPTION("perfmon default sampling format"); | ||
21 | MODULE_LICENSE("GPL"); | ||
22 | |||
23 | MODULE_PARM(debug, "i"); | ||
24 | MODULE_PARM_DESC(debug, "debug"); | ||
25 | |||
26 | MODULE_PARM(debug_ovfl, "i"); | ||
27 | MODULE_PARM_DESC(debug_ovfl, "debug ovfl"); | ||
28 | |||
29 | |||
30 | #define DEFAULT_DEBUG 1 | ||
31 | |||
32 | #ifdef DEFAULT_DEBUG | ||
33 | #define DPRINT(a) \ | ||
34 | do { \ | ||
35 | if (unlikely(debug >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ | ||
36 | } while (0) | ||
37 | |||
38 | #define DPRINT_ovfl(a) \ | ||
39 | do { \ | ||
40 | if (unlikely(debug_ovfl >0)) { printk("%s.%d: CPU%d ", __FUNCTION__, __LINE__, smp_processor_id()); printk a; } \ | ||
41 | } while (0) | ||
42 | |||
43 | #else | ||
44 | #define DPRINT(a) | ||
45 | #define DPRINT_ovfl(a) | ||
46 | #endif | ||
47 | |||
48 | static int debug, debug_ovfl; | ||
49 | |||
50 | static int | ||
51 | default_validate(struct task_struct *task, unsigned int flags, int cpu, void *data) | ||
52 | { | ||
53 | pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t*)data; | ||
54 | int ret = 0; | ||
55 | |||
56 | if (data == NULL) { | ||
57 | DPRINT(("[%d] no argument passed\n", task->pid)); | ||
58 | return -EINVAL; | ||
59 | } | ||
60 | |||
61 | DPRINT(("[%d] validate flags=0x%x CPU%d\n", task->pid, flags, cpu)); | ||
62 | |||
63 | /* | ||
64 | * must hold at least the buffer header + one minimally sized entry | ||
65 | */ | ||
66 | if (arg->buf_size < PFM_DEFAULT_SMPL_MIN_BUF_SIZE) return -EINVAL; | ||
67 | |||
68 | DPRINT(("buf_size=%lu\n", arg->buf_size)); | ||
69 | |||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | static int | ||
74 | default_get_size(struct task_struct *task, unsigned int flags, int cpu, void *data, unsigned long *size) | ||
75 | { | ||
76 | pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; | ||
77 | |||
78 | /* | ||
79 | * size has been validated in default_validate | ||
80 | */ | ||
81 | *size = arg->buf_size; | ||
82 | |||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | static int | ||
87 | default_init(struct task_struct *task, void *buf, unsigned int flags, int cpu, void *data) | ||
88 | { | ||
89 | pfm_default_smpl_hdr_t *hdr; | ||
90 | pfm_default_smpl_arg_t *arg = (pfm_default_smpl_arg_t *)data; | ||
91 | |||
92 | hdr = (pfm_default_smpl_hdr_t *)buf; | ||
93 | |||
94 | hdr->hdr_version = PFM_DEFAULT_SMPL_VERSION; | ||
95 | hdr->hdr_buf_size = arg->buf_size; | ||
96 | hdr->hdr_cur_offs = sizeof(*hdr); | ||
97 | hdr->hdr_overflows = 0UL; | ||
98 | hdr->hdr_count = 0UL; | ||
99 | |||
100 | DPRINT(("[%d] buffer=%p buf_size=%lu hdr_size=%lu hdr_version=%u cur_offs=%lu\n", | ||
101 | task->pid, | ||
102 | buf, | ||
103 | hdr->hdr_buf_size, | ||
104 | sizeof(*hdr), | ||
105 | hdr->hdr_version, | ||
106 | hdr->hdr_cur_offs)); | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | static int | ||
112 | default_handler(struct task_struct *task, void *buf, pfm_ovfl_arg_t *arg, struct pt_regs *regs, unsigned long stamp) | ||
113 | { | ||
114 | pfm_default_smpl_hdr_t *hdr; | ||
115 | pfm_default_smpl_entry_t *ent; | ||
116 | void *cur, *last; | ||
117 | unsigned long *e, entry_size; | ||
118 | unsigned int npmds, i; | ||
119 | unsigned char ovfl_pmd; | ||
120 | unsigned char ovfl_notify; | ||
121 | |||
122 | if (unlikely(buf == NULL || arg == NULL|| regs == NULL || task == NULL)) { | ||
123 | DPRINT(("[%d] invalid arguments buf=%p arg=%p\n", task->pid, buf, arg)); | ||
124 | return -EINVAL; | ||
125 | } | ||
126 | |||
127 | hdr = (pfm_default_smpl_hdr_t *)buf; | ||
128 | cur = buf+hdr->hdr_cur_offs; | ||
129 | last = buf+hdr->hdr_buf_size; | ||
130 | ovfl_pmd = arg->ovfl_pmd; | ||
131 | ovfl_notify = arg->ovfl_notify; | ||
132 | |||
133 | /* | ||
134 | * precheck for sanity | ||
135 | */ | ||
136 | if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; | ||
137 | |||
138 | npmds = hweight64(arg->smpl_pmds[0]); | ||
139 | |||
140 | ent = (pfm_default_smpl_entry_t *)cur; | ||
141 | |||
142 | prefetch(arg->smpl_pmds_values); | ||
143 | |||
144 | entry_size = sizeof(*ent) + (npmds << 3); | ||
145 | |||
146 | /* position for first pmd */ | ||
147 | e = (unsigned long *)(ent+1); | ||
148 | |||
149 | hdr->hdr_count++; | ||
150 | |||
151 | DPRINT_ovfl(("[%d] count=%lu cur=%p last=%p free_bytes=%lu ovfl_pmd=%d ovfl_notify=%d npmds=%u\n", | ||
152 | task->pid, | ||
153 | hdr->hdr_count, | ||
154 | cur, last, | ||
155 | last-cur, | ||
156 | ovfl_pmd, | ||
157 | ovfl_notify, npmds)); | ||
158 | |||
159 | /* | ||
160 | * current = task running at the time of the overflow. | ||
161 | * | ||
162 | * per-task mode: | ||
163 | * - this is ususally the task being monitored. | ||
164 | * Under certain conditions, it might be a different task | ||
165 | * | ||
166 | * system-wide: | ||
167 | * - this is not necessarily the task controlling the session | ||
168 | */ | ||
169 | ent->pid = current->pid; | ||
170 | ent->ovfl_pmd = ovfl_pmd; | ||
171 | ent->last_reset_val = arg->pmd_last_reset; //pmd[0].reg_last_reset_val; | ||
172 | |||
173 | /* | ||
174 | * where did the fault happen (includes slot number) | ||
175 | */ | ||
176 | ent->ip = regs->cr_iip | ((regs->cr_ipsr >> 41) & 0x3); | ||
177 | |||
178 | ent->tstamp = stamp; | ||
179 | ent->cpu = smp_processor_id(); | ||
180 | ent->set = arg->active_set; | ||
181 | ent->tgid = current->tgid; | ||
182 | |||
183 | /* | ||
184 | * selectively store PMDs in increasing index number | ||
185 | */ | ||
186 | if (npmds) { | ||
187 | unsigned long *val = arg->smpl_pmds_values; | ||
188 | for(i=0; i < npmds; i++) { | ||
189 | *e++ = *val++; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * update position for next entry | ||
195 | */ | ||
196 | hdr->hdr_cur_offs += entry_size; | ||
197 | cur += entry_size; | ||
198 | |||
199 | /* | ||
200 | * post check to avoid losing the last sample | ||
201 | */ | ||
202 | if ((last - cur) < PFM_DEFAULT_MAX_ENTRY_SIZE) goto full; | ||
203 | |||
204 | /* | ||
205 | * keep same ovfl_pmds, ovfl_notify | ||
206 | */ | ||
207 | arg->ovfl_ctrl.bits.notify_user = 0; | ||
208 | arg->ovfl_ctrl.bits.block_task = 0; | ||
209 | arg->ovfl_ctrl.bits.mask_monitoring = 0; | ||
210 | arg->ovfl_ctrl.bits.reset_ovfl_pmds = 1; /* reset before returning from interrupt handler */ | ||
211 | |||
212 | return 0; | ||
213 | full: | ||
214 | DPRINT_ovfl(("sampling buffer full free=%lu, count=%lu, ovfl_notify=%d\n", last-cur, hdr->hdr_count, ovfl_notify)); | ||
215 | |||
216 | /* | ||
217 | * increment number of buffer overflow. | ||
218 | * important to detect duplicate set of samples. | ||
219 | */ | ||
220 | hdr->hdr_overflows++; | ||
221 | |||
222 | /* | ||
223 | * if no notification requested, then we saturate the buffer | ||
224 | */ | ||
225 | if (ovfl_notify == 0) { | ||
226 | arg->ovfl_ctrl.bits.notify_user = 0; | ||
227 | arg->ovfl_ctrl.bits.block_task = 0; | ||
228 | arg->ovfl_ctrl.bits.mask_monitoring = 1; | ||
229 | arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; | ||
230 | } else { | ||
231 | arg->ovfl_ctrl.bits.notify_user = 1; | ||
232 | arg->ovfl_ctrl.bits.block_task = 1; /* ignored for non-blocking context */ | ||
233 | arg->ovfl_ctrl.bits.mask_monitoring = 1; | ||
234 | arg->ovfl_ctrl.bits.reset_ovfl_pmds = 0; /* no reset now */ | ||
235 | } | ||
236 | return -1; /* we are full, sorry */ | ||
237 | } | ||
238 | |||
239 | static int | ||
240 | default_restart(struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) | ||
241 | { | ||
242 | pfm_default_smpl_hdr_t *hdr; | ||
243 | |||
244 | hdr = (pfm_default_smpl_hdr_t *)buf; | ||
245 | |||
246 | hdr->hdr_count = 0UL; | ||
247 | hdr->hdr_cur_offs = sizeof(*hdr); | ||
248 | |||
249 | ctrl->bits.mask_monitoring = 0; | ||
250 | ctrl->bits.reset_ovfl_pmds = 1; /* uses long-reset values */ | ||
251 | |||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static int | ||
256 | default_exit(struct task_struct *task, void *buf, struct pt_regs *regs) | ||
257 | { | ||
258 | DPRINT(("[%d] exit(%p)\n", task->pid, buf)); | ||
259 | return 0; | ||
260 | } | ||
261 | |||
262 | static pfm_buffer_fmt_t default_fmt={ | ||
263 | .fmt_name = "default_format", | ||
264 | .fmt_uuid = PFM_DEFAULT_SMPL_UUID, | ||
265 | .fmt_arg_size = sizeof(pfm_default_smpl_arg_t), | ||
266 | .fmt_validate = default_validate, | ||
267 | .fmt_getsize = default_get_size, | ||
268 | .fmt_init = default_init, | ||
269 | .fmt_handler = default_handler, | ||
270 | .fmt_restart = default_restart, | ||
271 | .fmt_restart_active = default_restart, | ||
272 | .fmt_exit = default_exit, | ||
273 | }; | ||
274 | |||
275 | static int __init | ||
276 | pfm_default_smpl_init_module(void) | ||
277 | { | ||
278 | int ret; | ||
279 | |||
280 | ret = pfm_register_buffer_fmt(&default_fmt); | ||
281 | if (ret == 0) { | ||
282 | printk("perfmon_default_smpl: %s v%u.%u registered\n", | ||
283 | default_fmt.fmt_name, | ||
284 | PFM_DEFAULT_SMPL_VERSION_MAJ, | ||
285 | PFM_DEFAULT_SMPL_VERSION_MIN); | ||
286 | } else { | ||
287 | printk("perfmon_default_smpl: %s cannot register ret=%d\n", | ||
288 | default_fmt.fmt_name, | ||
289 | ret); | ||
290 | } | ||
291 | |||
292 | return ret; | ||
293 | } | ||
294 | |||
295 | static void __exit | ||
296 | pfm_default_smpl_cleanup_module(void) | ||
297 | { | ||
298 | int ret; | ||
299 | ret = pfm_unregister_buffer_fmt(default_fmt.fmt_uuid); | ||
300 | |||
301 | printk("perfmon_default_smpl: unregister %s=%d\n", default_fmt.fmt_name, ret); | ||
302 | } | ||
303 | |||
304 | module_init(pfm_default_smpl_init_module); | ||
305 | module_exit(pfm_default_smpl_cleanup_module); | ||
306 | |||
diff --git a/arch/ia64/kernel/perfmon_generic.h b/arch/ia64/kernel/perfmon_generic.h new file mode 100644 index 000000000000..67489478041e --- /dev/null +++ b/arch/ia64/kernel/perfmon_generic.h | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * This file contains the generic PMU register description tables | ||
3 | * and pmc checker used by perfmon.c. | ||
4 | * | ||
5 | * Copyright (C) 2002-2003 Hewlett Packard Co | ||
6 | * Stephane Eranian <eranian@hpl.hp.com> | ||
7 | */ | ||
8 | |||
9 | static pfm_reg_desc_t pfm_gen_pmc_desc[PMU_MAX_PMCS]={ | ||
10 | /* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
11 | /* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
12 | /* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
13 | /* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
14 | /* pmc4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
15 | /* pmc5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
16 | /* pmc6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
17 | /* pmc7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
18 | { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | ||
19 | }; | ||
20 | |||
21 | static pfm_reg_desc_t pfm_gen_pmd_desc[PMU_MAX_PMDS]={ | ||
22 | /* pmd0 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, | ||
23 | /* pmd1 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, | ||
24 | /* pmd2 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, | ||
25 | /* pmd3 */ { PFM_REG_NOTIMPL , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, | ||
26 | /* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, | ||
27 | /* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, | ||
28 | /* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, | ||
29 | /* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, | ||
30 | { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | ||
31 | }; | ||
32 | |||
33 | /* | ||
34 | * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | ||
35 | */ | ||
36 | static pmu_config_t pmu_conf_gen={ | ||
37 | .pmu_name = "Generic", | ||
38 | .pmu_family = 0xff, /* any */ | ||
39 | .ovfl_val = (1UL << 32) - 1, | ||
40 | .num_ibrs = 0, /* does not use */ | ||
41 | .num_dbrs = 0, /* does not use */ | ||
42 | .pmd_desc = pfm_gen_pmd_desc, | ||
43 | .pmc_desc = pfm_gen_pmc_desc | ||
44 | }; | ||
45 | |||
diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h new file mode 100644 index 000000000000..d1d508a0fbd3 --- /dev/null +++ b/arch/ia64/kernel/perfmon_itanium.h | |||
@@ -0,0 +1,115 @@ | |||
1 | /* | ||
2 | * This file contains the Itanium PMU register description tables | ||
3 | * and pmc checker used by perfmon.c. | ||
4 | * | ||
5 | * Copyright (C) 2002-2003 Hewlett Packard Co | ||
6 | * Stephane Eranian <eranian@hpl.hp.com> | ||
7 | */ | ||
8 | static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); | ||
9 | |||
10 | static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={ | ||
11 | /* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
12 | /* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
13 | /* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
14 | /* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
15 | /* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
16 | /* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
17 | /* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
18 | /* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
19 | /* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
20 | /* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
21 | /* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
22 | /* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
23 | /* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
24 | /* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
25 | { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | ||
26 | }; | ||
27 | |||
28 | static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ | ||
29 | /* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, | ||
30 | /* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, | ||
31 | /* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | ||
32 | /* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | ||
33 | /* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, | ||
34 | /* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, | ||
35 | /* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, | ||
36 | /* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, | ||
37 | /* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
38 | /* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
39 | /* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
40 | /* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
41 | /* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
42 | /* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
43 | /* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
44 | /* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
45 | /* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
46 | /* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | ||
47 | { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | ||
48 | }; | ||
49 | |||
50 | static int | ||
51 | pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) | ||
52 | { | ||
53 | int ret; | ||
54 | int is_loaded; | ||
55 | |||
56 | /* sanitfy check */ | ||
57 | if (ctx == NULL) return -EINVAL; | ||
58 | |||
59 | is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; | ||
60 | |||
61 | /* | ||
62 | * we must clear the (instruction) debug registers if pmc13.ta bit is cleared | ||
63 | * before they are written (fl_using_dbreg==0) to avoid picking up stale information. | ||
64 | */ | ||
65 | if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { | ||
66 | |||
67 | DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); | ||
68 | |||
69 | /* don't mix debug with perfmon */ | ||
70 | if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | ||
71 | |||
72 | /* | ||
73 | * a count of 0 will mark the debug registers as in use and also | ||
74 | * ensure that they are properly cleared. | ||
75 | */ | ||
76 | ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); | ||
77 | if (ret) return ret; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * we must clear the (data) debug registers if pmc11.pt bit is cleared | ||
82 | * before they are written (fl_using_dbreg==0) to avoid picking up stale information. | ||
83 | */ | ||
84 | if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { | ||
85 | |||
86 | DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); | ||
87 | |||
88 | /* don't mix debug with perfmon */ | ||
89 | if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | ||
90 | |||
91 | /* | ||
92 | * a count of 0 will mark the debug registers as in use and also | ||
93 | * ensure that they are properly cleared. | ||
94 | */ | ||
95 | ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); | ||
96 | if (ret) return ret; | ||
97 | } | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | /* | ||
102 | * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | ||
103 | */ | ||
104 | static pmu_config_t pmu_conf_ita={ | ||
105 | .pmu_name = "Itanium", | ||
106 | .pmu_family = 0x7, | ||
107 | .ovfl_val = (1UL << 32) - 1, | ||
108 | .pmd_desc = pfm_ita_pmd_desc, | ||
109 | .pmc_desc = pfm_ita_pmc_desc, | ||
110 | .num_ibrs = 8, | ||
111 | .num_dbrs = 8, | ||
112 | .use_rr_dbregs = 1, /* debug register are use for range retrictions */ | ||
113 | }; | ||
114 | |||
115 | |||
diff --git a/arch/ia64/kernel/perfmon_mckinley.h b/arch/ia64/kernel/perfmon_mckinley.h new file mode 100644 index 000000000000..9becccda2897 --- /dev/null +++ b/arch/ia64/kernel/perfmon_mckinley.h | |||
@@ -0,0 +1,187 @@ | |||
1 | /* | ||
2 | * This file contains the McKinley PMU register description tables | ||
3 | * and pmc checker used by perfmon.c. | ||
4 | * | ||
5 | * Copyright (C) 2002-2003 Hewlett Packard Co | ||
6 | * Stephane Eranian <eranian@hpl.hp.com> | ||
7 | */ | ||
8 | static int pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); | ||
9 | |||
10 | static pfm_reg_desc_t pfm_mck_pmc_desc[PMU_MAX_PMCS]={ | ||
11 | /* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
12 | /* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
13 | /* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
14 | /* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
15 | /* pmc4 */ { PFM_REG_COUNTING, 6, 0x0000000000800000UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
16 | /* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
17 | /* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
18 | /* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, 0xfffff7fUL, NULL, pfm_mck_pmc_check, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
19 | /* pmc8 */ { PFM_REG_CONFIG , 0, 0xffffffff3fffffffUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
20 | /* pmc9 */ { PFM_REG_CONFIG , 0, 0xffffffff3ffffffcUL, 0xffffffff3ffffffbUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
21 | /* pmc10 */ { PFM_REG_MONITOR , 4, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
22 | /* pmc11 */ { PFM_REG_MONITOR , 6, 0x0UL, 0x30f01cf, NULL, pfm_mck_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
23 | /* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, 0xffffUL, NULL, pfm_mck_pmc_check, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
24 | /* pmc13 */ { PFM_REG_CONFIG , 0, 0x00002078fefefefeUL, 0x1e00018181818UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
25 | /* pmc14 */ { PFM_REG_CONFIG , 0, 0x0db60db60db60db6UL, 0x2492UL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
26 | /* pmc15 */ { PFM_REG_CONFIG , 0, 0x00000000fffffff0UL, 0xfUL, NULL, pfm_mck_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, | ||
27 | { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | ||
28 | }; | ||
29 | |||
30 | static pfm_reg_desc_t pfm_mck_pmd_desc[PMU_MAX_PMDS]={ | ||
31 | /* pmd0 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, | ||
32 | /* pmd1 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, | ||
33 | /* pmd2 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | ||
34 | /* pmd3 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | ||
35 | /* pmd4 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, | ||
36 | /* pmd5 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, | ||
37 | /* pmd6 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, | ||
38 | /* pmd7 */ { PFM_REG_COUNTING, 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, | ||
39 | /* pmd8 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
40 | /* pmd9 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
41 | /* pmd10 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
42 | /* pmd11 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
43 | /* pmd12 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
44 | /* pmd13 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
45 | /* pmd14 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
46 | /* pmd15 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
47 | /* pmd16 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, | ||
48 | /* pmd17 */ { PFM_REG_BUFFER , 0, 0x0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, | ||
49 | { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ | ||
50 | }; | ||
51 | |||
52 | /* | ||
53 | * PMC reserved fields must have their power-up values preserved | ||
54 | */ | ||
55 | static int | ||
56 | pfm_mck_reserved(unsigned int cnum, unsigned long *val, struct pt_regs *regs) | ||
57 | { | ||
58 | unsigned long tmp1, tmp2, ival = *val; | ||
59 | |||
60 | /* remove reserved areas from user value */ | ||
61 | tmp1 = ival & PMC_RSVD_MASK(cnum); | ||
62 | |||
63 | /* get reserved fields values */ | ||
64 | tmp2 = PMC_DFL_VAL(cnum) & ~PMC_RSVD_MASK(cnum); | ||
65 | |||
66 | *val = tmp1 | tmp2; | ||
67 | |||
68 | DPRINT(("pmc[%d]=0x%lx, mask=0x%lx, reset=0x%lx, val=0x%lx\n", | ||
69 | cnum, ival, PMC_RSVD_MASK(cnum), PMC_DFL_VAL(cnum), *val)); | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | /* | ||
74 | * task can be NULL if the context is unloaded | ||
75 | */ | ||
76 | static int | ||
77 | pfm_mck_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) | ||
78 | { | ||
79 | int ret = 0, check_case1 = 0; | ||
80 | unsigned long val8 = 0, val14 = 0, val13 = 0; | ||
81 | int is_loaded; | ||
82 | |||
83 | /* first preserve the reserved fields */ | ||
84 | pfm_mck_reserved(cnum, val, regs); | ||
85 | |||
86 | /* sanitfy check */ | ||
87 | if (ctx == NULL) return -EINVAL; | ||
88 | |||
89 | is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; | ||
90 | |||
91 | /* | ||
92 | * we must clear the debug registers if pmc13 has a value which enable | ||
93 | * memory pipeline event constraints. In this case we need to clear the | ||
94 | * the debug registers if they have not yet been accessed. This is required | ||
95 | * to avoid picking stale state. | ||
96 | * PMC13 is "active" if: | ||
97 | * one of the pmc13.cfg_dbrpXX field is different from 0x3 | ||
98 | * AND | ||
99 | * at the corresponding pmc13.ena_dbrpXX is set. | ||
100 | */ | ||
101 | DPRINT(("cnum=%u val=0x%lx, using_dbreg=%d loaded=%d\n", cnum, *val, ctx->ctx_fl_using_dbreg, is_loaded)); | ||
102 | |||
103 | if (cnum == 13 && is_loaded | ||
104 | && (*val & 0x1e00000000000UL) && (*val & 0x18181818UL) != 0x18181818UL && ctx->ctx_fl_using_dbreg == 0) { | ||
105 | |||
106 | DPRINT(("pmc[%d]=0x%lx has active pmc13 settings, clearing dbr\n", cnum, *val)); | ||
107 | |||
108 | /* don't mix debug with perfmon */ | ||
109 | if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | ||
110 | |||
111 | /* | ||
112 | * a count of 0 will mark the debug registers as in use and also | ||
113 | * ensure that they are properly cleared. | ||
114 | */ | ||
115 | ret = pfm_write_ibr_dbr(PFM_DATA_RR, ctx, NULL, 0, regs); | ||
116 | if (ret) return ret; | ||
117 | } | ||
118 | /* | ||
119 | * we must clear the (instruction) debug registers if any pmc14.ibrpX bit is enabled | ||
120 | * before they are (fl_using_dbreg==0) to avoid picking up stale information. | ||
121 | */ | ||
122 | if (cnum == 14 && is_loaded && ((*val & 0x2222UL) != 0x2222UL) && ctx->ctx_fl_using_dbreg == 0) { | ||
123 | |||
124 | DPRINT(("pmc[%d]=0x%lx has active pmc14 settings, clearing ibr\n", cnum, *val)); | ||
125 | |||
126 | /* don't mix debug with perfmon */ | ||
127 | if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; | ||
128 | |||
129 | /* | ||
130 | * a count of 0 will mark the debug registers as in use and also | ||
131 | * ensure that they are properly cleared. | ||
132 | */ | ||
133 | ret = pfm_write_ibr_dbr(PFM_CODE_RR, ctx, NULL, 0, regs); | ||
134 | if (ret) return ret; | ||
135 | |||
136 | } | ||
137 | |||
138 | switch(cnum) { | ||
139 | case 4: *val |= 1UL << 23; /* force power enable bit */ | ||
140 | break; | ||
141 | case 8: val8 = *val; | ||
142 | val13 = ctx->ctx_pmcs[13]; | ||
143 | val14 = ctx->ctx_pmcs[14]; | ||
144 | check_case1 = 1; | ||
145 | break; | ||
146 | case 13: val8 = ctx->ctx_pmcs[8]; | ||
147 | val13 = *val; | ||
148 | val14 = ctx->ctx_pmcs[14]; | ||
149 | check_case1 = 1; | ||
150 | break; | ||
151 | case 14: val8 = ctx->ctx_pmcs[8]; | ||
152 | val13 = ctx->ctx_pmcs[13]; | ||
153 | val14 = *val; | ||
154 | check_case1 = 1; | ||
155 | break; | ||
156 | } | ||
157 | /* check illegal configuration which can produce inconsistencies in tagging | ||
158 | * i-side events in L1D and L2 caches | ||
159 | */ | ||
160 | if (check_case1) { | ||
161 | ret = ((val13 >> 45) & 0xf) == 0 | ||
162 | && ((val8 & 0x1) == 0) | ||
163 | && ((((val14>>1) & 0x3) == 0x2 || ((val14>>1) & 0x3) == 0x0) | ||
164 | ||(((val14>>4) & 0x3) == 0x2 || ((val14>>4) & 0x3) == 0x0)); | ||
165 | |||
166 | if (ret) DPRINT((KERN_DEBUG "perfmon: failure check_case1\n")); | ||
167 | } | ||
168 | |||
169 | return ret ? -EINVAL : 0; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * impl_pmcs, impl_pmds are computed at runtime to minimize errors! | ||
174 | */ | ||
175 | static pmu_config_t pmu_conf_mck={ | ||
176 | .pmu_name = "Itanium 2", | ||
177 | .pmu_family = 0x1f, | ||
178 | .flags = PFM_PMU_IRQ_RESEND, | ||
179 | .ovfl_val = (1UL << 47) - 1, | ||
180 | .pmd_desc = pfm_mck_pmd_desc, | ||
181 | .pmc_desc = pfm_mck_pmc_desc, | ||
182 | .num_ibrs = 8, | ||
183 | .num_dbrs = 8, | ||
184 | .use_rr_dbregs = 1 /* debug register are use for range retrictions */ | ||
185 | }; | ||
186 | |||
187 | |||
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c new file mode 100644 index 000000000000..91293388dd29 --- /dev/null +++ b/arch/ia64/kernel/process.c | |||
@@ -0,0 +1,800 @@ | |||
1 | /* | ||
2 | * Architecture-specific setup. | ||
3 | * | ||
4 | * Copyright (C) 1998-2003 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | */ | ||
7 | #define __KERNEL_SYSCALLS__ /* see <asm/unistd.h> */ | ||
8 | #include <linux/config.h> | ||
9 | |||
10 | #include <linux/cpu.h> | ||
11 | #include <linux/pm.h> | ||
12 | #include <linux/elf.h> | ||
13 | #include <linux/errno.h> | ||
14 | #include <linux/kallsyms.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/notifier.h> | ||
19 | #include <linux/personality.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/slab.h> | ||
22 | #include <linux/smp_lock.h> | ||
23 | #include <linux/stddef.h> | ||
24 | #include <linux/thread_info.h> | ||
25 | #include <linux/unistd.h> | ||
26 | #include <linux/efi.h> | ||
27 | #include <linux/interrupt.h> | ||
28 | #include <linux/delay.h> | ||
29 | |||
30 | #include <asm/cpu.h> | ||
31 | #include <asm/delay.h> | ||
32 | #include <asm/elf.h> | ||
33 | #include <asm/ia32.h> | ||
34 | #include <asm/irq.h> | ||
35 | #include <asm/pgalloc.h> | ||
36 | #include <asm/processor.h> | ||
37 | #include <asm/sal.h> | ||
38 | #include <asm/tlbflush.h> | ||
39 | #include <asm/uaccess.h> | ||
40 | #include <asm/unwind.h> | ||
41 | #include <asm/user.h> | ||
42 | |||
43 | #include "entry.h" | ||
44 | |||
45 | #ifdef CONFIG_PERFMON | ||
46 | # include <asm/perfmon.h> | ||
47 | #endif | ||
48 | |||
49 | #include "sigframe.h" | ||
50 | |||
51 | void (*ia64_mark_idle)(int); | ||
52 | static cpumask_t cpu_idle_map; | ||
53 | |||
54 | unsigned long boot_option_idle_override = 0; | ||
55 | EXPORT_SYMBOL(boot_option_idle_override); | ||
56 | |||
57 | void | ||
58 | ia64_do_show_stack (struct unw_frame_info *info, void *arg) | ||
59 | { | ||
60 | unsigned long ip, sp, bsp; | ||
61 | char buf[128]; /* don't make it so big that it overflows the stack! */ | ||
62 | |||
63 | printk("\nCall Trace:\n"); | ||
64 | do { | ||
65 | unw_get_ip(info, &ip); | ||
66 | if (ip == 0) | ||
67 | break; | ||
68 | |||
69 | unw_get_sp(info, &sp); | ||
70 | unw_get_bsp(info, &bsp); | ||
71 | snprintf(buf, sizeof(buf), | ||
72 | " [<%016lx>] %%s\n" | ||
73 | " sp=%016lx bsp=%016lx\n", | ||
74 | ip, sp, bsp); | ||
75 | print_symbol(buf, ip); | ||
76 | } while (unw_unwind(info) >= 0); | ||
77 | } | ||
78 | |||
79 | void | ||
80 | show_stack (struct task_struct *task, unsigned long *sp) | ||
81 | { | ||
82 | if (!task) | ||
83 | unw_init_running(ia64_do_show_stack, NULL); | ||
84 | else { | ||
85 | struct unw_frame_info info; | ||
86 | |||
87 | unw_init_from_blocked_task(&info, task); | ||
88 | ia64_do_show_stack(&info, NULL); | ||
89 | } | ||
90 | } | ||
91 | |||
92 | void | ||
93 | dump_stack (void) | ||
94 | { | ||
95 | show_stack(NULL, NULL); | ||
96 | } | ||
97 | |||
98 | EXPORT_SYMBOL(dump_stack); | ||
99 | |||
100 | void | ||
101 | show_regs (struct pt_regs *regs) | ||
102 | { | ||
103 | unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; | ||
104 | |||
105 | print_modules(); | ||
106 | printk("\nPid: %d, CPU %d, comm: %20s\n", current->pid, smp_processor_id(), current->comm); | ||
107 | printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s\n", | ||
108 | regs->cr_ipsr, regs->cr_ifs, ip, print_tainted()); | ||
109 | print_symbol("ip is at %s\n", ip); | ||
110 | printk("unat: %016lx pfs : %016lx rsc : %016lx\n", | ||
111 | regs->ar_unat, regs->ar_pfs, regs->ar_rsc); | ||
112 | printk("rnat: %016lx bsps: %016lx pr : %016lx\n", | ||
113 | regs->ar_rnat, regs->ar_bspstore, regs->pr); | ||
114 | printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", | ||
115 | regs->loadrs, regs->ar_ccv, regs->ar_fpsr); | ||
116 | printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); | ||
117 | printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7); | ||
118 | printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", | ||
119 | regs->f6.u.bits[1], regs->f6.u.bits[0], | ||
120 | regs->f7.u.bits[1], regs->f7.u.bits[0]); | ||
121 | printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", | ||
122 | regs->f8.u.bits[1], regs->f8.u.bits[0], | ||
123 | regs->f9.u.bits[1], regs->f9.u.bits[0]); | ||
124 | printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", | ||
125 | regs->f10.u.bits[1], regs->f10.u.bits[0], | ||
126 | regs->f11.u.bits[1], regs->f11.u.bits[0]); | ||
127 | |||
128 | printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3); | ||
129 | printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10); | ||
130 | printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13); | ||
131 | printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16); | ||
132 | printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19); | ||
133 | printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22); | ||
134 | printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25); | ||
135 | printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28); | ||
136 | printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31); | ||
137 | |||
138 | if (user_mode(regs)) { | ||
139 | /* print the stacked registers */ | ||
140 | unsigned long val, *bsp, ndirty; | ||
141 | int i, sof, is_nat = 0; | ||
142 | |||
143 | sof = regs->cr_ifs & 0x7f; /* size of frame */ | ||
144 | ndirty = (regs->loadrs >> 19); | ||
145 | bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty); | ||
146 | for (i = 0; i < sof; ++i) { | ||
147 | get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i)); | ||
148 | printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val, | ||
149 | ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); | ||
150 | } | ||
151 | } else | ||
152 | show_stack(NULL, NULL); | ||
153 | } | ||
154 | |||
155 | void | ||
156 | do_notify_resume_user (sigset_t *oldset, struct sigscratch *scr, long in_syscall) | ||
157 | { | ||
158 | if (fsys_mode(current, &scr->pt)) { | ||
159 | /* defer signal-handling etc. until we return to privilege-level 0. */ | ||
160 | if (!ia64_psr(&scr->pt)->lp) | ||
161 | ia64_psr(&scr->pt)->lp = 1; | ||
162 | return; | ||
163 | } | ||
164 | |||
165 | #ifdef CONFIG_PERFMON | ||
166 | if (current->thread.pfm_needs_checking) | ||
167 | pfm_handle_work(); | ||
168 | #endif | ||
169 | |||
170 | /* deal with pending signal delivery */ | ||
171 | if (test_thread_flag(TIF_SIGPENDING)) | ||
172 | ia64_do_signal(oldset, scr, in_syscall); | ||
173 | } | ||
174 | |||
175 | static int pal_halt = 1; | ||
176 | static int __init nohalt_setup(char * str) | ||
177 | { | ||
178 | pal_halt = 0; | ||
179 | return 1; | ||
180 | } | ||
181 | __setup("nohalt", nohalt_setup); | ||
182 | |||
183 | /* | ||
184 | * We use this if we don't have any better idle routine.. | ||
185 | */ | ||
186 | void | ||
187 | default_idle (void) | ||
188 | { | ||
189 | unsigned long pmu_active = ia64_getreg(_IA64_REG_PSR) & (IA64_PSR_PP | IA64_PSR_UP); | ||
190 | |||
191 | while (!need_resched()) | ||
192 | if (pal_halt && !pmu_active) | ||
193 | safe_halt(); | ||
194 | else | ||
195 | cpu_relax(); | ||
196 | } | ||
197 | |||
198 | #ifdef CONFIG_HOTPLUG_CPU | ||
199 | /* We don't actually take CPU down, just spin without interrupts. */ | ||
200 | static inline void play_dead(void) | ||
201 | { | ||
202 | extern void ia64_cpu_local_tick (void); | ||
203 | /* Ack it */ | ||
204 | __get_cpu_var(cpu_state) = CPU_DEAD; | ||
205 | |||
206 | /* We shouldn't have to disable interrupts while dead, but | ||
207 | * some interrupts just don't seem to go away, and this makes | ||
208 | * it "work" for testing purposes. */ | ||
209 | max_xtp(); | ||
210 | local_irq_disable(); | ||
211 | /* Death loop */ | ||
212 | while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE) | ||
213 | cpu_relax(); | ||
214 | |||
215 | /* | ||
216 | * Enable timer interrupts from now on | ||
217 | * Not required if we put processor in SAL_BOOT_RENDEZ mode. | ||
218 | */ | ||
219 | local_flush_tlb_all(); | ||
220 | cpu_set(smp_processor_id(), cpu_online_map); | ||
221 | wmb(); | ||
222 | ia64_cpu_local_tick (); | ||
223 | local_irq_enable(); | ||
224 | } | ||
225 | #else | ||
226 | static inline void play_dead(void) | ||
227 | { | ||
228 | BUG(); | ||
229 | } | ||
230 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
231 | |||
232 | |||
233 | void cpu_idle_wait(void) | ||
234 | { | ||
235 | int cpu; | ||
236 | cpumask_t map; | ||
237 | |||
238 | for_each_online_cpu(cpu) | ||
239 | cpu_set(cpu, cpu_idle_map); | ||
240 | |||
241 | wmb(); | ||
242 | do { | ||
243 | ssleep(1); | ||
244 | cpus_and(map, cpu_idle_map, cpu_online_map); | ||
245 | } while (!cpus_empty(map)); | ||
246 | } | ||
247 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | ||
248 | |||
249 | void __attribute__((noreturn)) | ||
250 | cpu_idle (void) | ||
251 | { | ||
252 | void (*mark_idle)(int) = ia64_mark_idle; | ||
253 | int cpu = smp_processor_id(); | ||
254 | |||
255 | /* endless idle loop with no priority at all */ | ||
256 | while (1) { | ||
257 | #ifdef CONFIG_SMP | ||
258 | if (!need_resched()) | ||
259 | min_xtp(); | ||
260 | #endif | ||
261 | while (!need_resched()) { | ||
262 | void (*idle)(void); | ||
263 | |||
264 | if (mark_idle) | ||
265 | (*mark_idle)(1); | ||
266 | |||
267 | if (cpu_isset(cpu, cpu_idle_map)) | ||
268 | cpu_clear(cpu, cpu_idle_map); | ||
269 | rmb(); | ||
270 | idle = pm_idle; | ||
271 | if (!idle) | ||
272 | idle = default_idle; | ||
273 | (*idle)(); | ||
274 | } | ||
275 | |||
276 | if (mark_idle) | ||
277 | (*mark_idle)(0); | ||
278 | |||
279 | #ifdef CONFIG_SMP | ||
280 | normal_xtp(); | ||
281 | #endif | ||
282 | schedule(); | ||
283 | check_pgt_cache(); | ||
284 | if (cpu_is_offline(smp_processor_id())) | ||
285 | play_dead(); | ||
286 | } | ||
287 | } | ||
288 | |||
289 | void | ||
290 | ia64_save_extra (struct task_struct *task) | ||
291 | { | ||
292 | #ifdef CONFIG_PERFMON | ||
293 | unsigned long info; | ||
294 | #endif | ||
295 | |||
296 | if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) | ||
297 | ia64_save_debug_regs(&task->thread.dbr[0]); | ||
298 | |||
299 | #ifdef CONFIG_PERFMON | ||
300 | if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) | ||
301 | pfm_save_regs(task); | ||
302 | |||
303 | info = __get_cpu_var(pfm_syst_info); | ||
304 | if (info & PFM_CPUINFO_SYST_WIDE) | ||
305 | pfm_syst_wide_update_task(task, info, 0); | ||
306 | #endif | ||
307 | |||
308 | #ifdef CONFIG_IA32_SUPPORT | ||
309 | if (IS_IA32_PROCESS(ia64_task_regs(task))) | ||
310 | ia32_save_state(task); | ||
311 | #endif | ||
312 | } | ||
313 | |||
314 | void | ||
315 | ia64_load_extra (struct task_struct *task) | ||
316 | { | ||
317 | #ifdef CONFIG_PERFMON | ||
318 | unsigned long info; | ||
319 | #endif | ||
320 | |||
321 | if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) | ||
322 | ia64_load_debug_regs(&task->thread.dbr[0]); | ||
323 | |||
324 | #ifdef CONFIG_PERFMON | ||
325 | if ((task->thread.flags & IA64_THREAD_PM_VALID) != 0) | ||
326 | pfm_load_regs(task); | ||
327 | |||
328 | info = __get_cpu_var(pfm_syst_info); | ||
329 | if (info & PFM_CPUINFO_SYST_WIDE) | ||
330 | pfm_syst_wide_update_task(task, info, 1); | ||
331 | #endif | ||
332 | |||
333 | #ifdef CONFIG_IA32_SUPPORT | ||
334 | if (IS_IA32_PROCESS(ia64_task_regs(task))) | ||
335 | ia32_load_state(task); | ||
336 | #endif | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * Copy the state of an ia-64 thread. | ||
341 | * | ||
342 | * We get here through the following call chain: | ||
343 | * | ||
344 | * from user-level: from kernel: | ||
345 | * | ||
346 | * <clone syscall> <some kernel call frames> | ||
347 | * sys_clone : | ||
348 | * do_fork do_fork | ||
349 | * copy_thread copy_thread | ||
350 | * | ||
351 | * This means that the stack layout is as follows: | ||
352 | * | ||
353 | * +---------------------+ (highest addr) | ||
354 | * | struct pt_regs | | ||
355 | * +---------------------+ | ||
356 | * | struct switch_stack | | ||
357 | * +---------------------+ | ||
358 | * | | | ||
359 | * | memory stack | | ||
360 | * | | <-- sp (lowest addr) | ||
361 | * +---------------------+ | ||
362 | * | ||
363 | * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an | ||
364 | * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register, | ||
365 | * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the | ||
366 | * pt_regs structure in the parent is congruent to that of the child, modulo 512. Since | ||
367 | * the stack is page aligned and the page size is at least 4KB, this is always the case, | ||
368 | * so there is nothing to worry about. | ||
369 | */ | ||
370 | int | ||
371 | copy_thread (int nr, unsigned long clone_flags, | ||
372 | unsigned long user_stack_base, unsigned long user_stack_size, | ||
373 | struct task_struct *p, struct pt_regs *regs) | ||
374 | { | ||
375 | extern char ia64_ret_from_clone, ia32_ret_from_clone; | ||
376 | struct switch_stack *child_stack, *stack; | ||
377 | unsigned long rbs, child_rbs, rbs_size; | ||
378 | struct pt_regs *child_ptregs; | ||
379 | int retval = 0; | ||
380 | |||
381 | #ifdef CONFIG_SMP | ||
382 | /* | ||
383 | * For SMP idle threads, fork_by_hand() calls do_fork with | ||
384 | * NULL regs. | ||
385 | */ | ||
386 | if (!regs) | ||
387 | return 0; | ||
388 | #endif | ||
389 | |||
390 | stack = ((struct switch_stack *) regs) - 1; | ||
391 | |||
392 | child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1; | ||
393 | child_stack = (struct switch_stack *) child_ptregs - 1; | ||
394 | |||
395 | /* copy parent's switch_stack & pt_regs to child: */ | ||
396 | memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack)); | ||
397 | |||
398 | rbs = (unsigned long) current + IA64_RBS_OFFSET; | ||
399 | child_rbs = (unsigned long) p + IA64_RBS_OFFSET; | ||
400 | rbs_size = stack->ar_bspstore - rbs; | ||
401 | |||
402 | /* copy the parent's register backing store to the child: */ | ||
403 | memcpy((void *) child_rbs, (void *) rbs, rbs_size); | ||
404 | |||
405 | if (likely(user_mode(child_ptregs))) { | ||
406 | if ((clone_flags & CLONE_SETTLS) && !IS_IA32_PROCESS(regs)) | ||
407 | child_ptregs->r13 = regs->r16; /* see sys_clone2() in entry.S */ | ||
408 | if (user_stack_base) { | ||
409 | child_ptregs->r12 = user_stack_base + user_stack_size - 16; | ||
410 | child_ptregs->ar_bspstore = user_stack_base; | ||
411 | child_ptregs->ar_rnat = 0; | ||
412 | child_ptregs->loadrs = 0; | ||
413 | } | ||
414 | } else { | ||
415 | /* | ||
416 | * Note: we simply preserve the relative position of | ||
417 | * the stack pointer here. There is no need to | ||
418 | * allocate a scratch area here, since that will have | ||
419 | * been taken care of by the caller of sys_clone() | ||
420 | * already. | ||
421 | */ | ||
422 | child_ptregs->r12 = (unsigned long) child_ptregs - 16; /* kernel sp */ | ||
423 | child_ptregs->r13 = (unsigned long) p; /* set `current' pointer */ | ||
424 | } | ||
425 | child_stack->ar_bspstore = child_rbs + rbs_size; | ||
426 | if (IS_IA32_PROCESS(regs)) | ||
427 | child_stack->b0 = (unsigned long) &ia32_ret_from_clone; | ||
428 | else | ||
429 | child_stack->b0 = (unsigned long) &ia64_ret_from_clone; | ||
430 | |||
431 | /* copy parts of thread_struct: */ | ||
432 | p->thread.ksp = (unsigned long) child_stack - 16; | ||
433 | |||
434 | /* stop some PSR bits from being inherited. | ||
435 | * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() | ||
436 | * therefore we must specify them explicitly here and not include them in | ||
437 | * IA64_PSR_BITS_TO_CLEAR. | ||
438 | */ | ||
439 | child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) | ||
440 | & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); | ||
441 | |||
442 | /* | ||
443 | * NOTE: The calling convention considers all floating point | ||
444 | * registers in the high partition (fph) to be scratch. Since | ||
445 | * the only way to get to this point is through a system call, | ||
446 | * we know that the values in fph are all dead. Hence, there | ||
447 | * is no need to inherit the fph state from the parent to the | ||
448 | * child and all we have to do is to make sure that | ||
449 | * IA64_THREAD_FPH_VALID is cleared in the child. | ||
450 | * | ||
451 | * XXX We could push this optimization a bit further by | ||
452 | * clearing IA64_THREAD_FPH_VALID on ANY system call. | ||
453 | * However, it's not clear this is worth doing. Also, it | ||
454 | * would be a slight deviation from the normal Linux system | ||
455 | * call behavior where scratch registers are preserved across | ||
456 | * system calls (unless used by the system call itself). | ||
457 | */ | ||
458 | # define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \ | ||
459 | | IA64_THREAD_PM_VALID) | ||
460 | # define THREAD_FLAGS_TO_SET 0 | ||
461 | p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) | ||
462 | | THREAD_FLAGS_TO_SET); | ||
463 | ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ | ||
464 | #ifdef CONFIG_IA32_SUPPORT | ||
465 | /* | ||
466 | * If we're cloning an IA32 task then save the IA32 extra | ||
467 | * state from the current task to the new task | ||
468 | */ | ||
469 | if (IS_IA32_PROCESS(ia64_task_regs(current))) { | ||
470 | ia32_save_state(p); | ||
471 | if (clone_flags & CLONE_SETTLS) | ||
472 | retval = ia32_clone_tls(p, child_ptregs); | ||
473 | |||
474 | /* Copy partially mapped page list */ | ||
475 | if (!retval) | ||
476 | retval = ia32_copy_partial_page_list(p, clone_flags); | ||
477 | } | ||
478 | #endif | ||
479 | |||
480 | #ifdef CONFIG_PERFMON | ||
481 | if (current->thread.pfm_context) | ||
482 | pfm_inherit(p, child_ptregs); | ||
483 | #endif | ||
484 | return retval; | ||
485 | } | ||
486 | |||
487 | static void | ||
488 | do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg) | ||
489 | { | ||
490 | unsigned long mask, sp, nat_bits = 0, ip, ar_rnat, urbs_end, cfm; | ||
491 | elf_greg_t *dst = arg; | ||
492 | struct pt_regs *pt; | ||
493 | char nat; | ||
494 | int i; | ||
495 | |||
496 | memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */ | ||
497 | |||
498 | if (unw_unwind_to_user(info) < 0) | ||
499 | return; | ||
500 | |||
501 | unw_get_sp(info, &sp); | ||
502 | pt = (struct pt_regs *) (sp + 16); | ||
503 | |||
504 | urbs_end = ia64_get_user_rbs_end(task, pt, &cfm); | ||
505 | |||
506 | if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0) | ||
507 | return; | ||
508 | |||
509 | ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end), | ||
510 | &ar_rnat); | ||
511 | |||
512 | /* | ||
513 | * coredump format: | ||
514 | * r0-r31 | ||
515 | * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) | ||
516 | * predicate registers (p0-p63) | ||
517 | * b0-b7 | ||
518 | * ip cfm user-mask | ||
519 | * ar.rsc ar.bsp ar.bspstore ar.rnat | ||
520 | * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec | ||
521 | */ | ||
522 | |||
523 | /* r0 is zero */ | ||
524 | for (i = 1, mask = (1UL << i); i < 32; ++i) { | ||
525 | unw_get_gr(info, i, &dst[i], &nat); | ||
526 | if (nat) | ||
527 | nat_bits |= mask; | ||
528 | mask <<= 1; | ||
529 | } | ||
530 | dst[32] = nat_bits; | ||
531 | unw_get_pr(info, &dst[33]); | ||
532 | |||
533 | for (i = 0; i < 8; ++i) | ||
534 | unw_get_br(info, i, &dst[34 + i]); | ||
535 | |||
536 | unw_get_rp(info, &ip); | ||
537 | dst[42] = ip + ia64_psr(pt)->ri; | ||
538 | dst[43] = cfm; | ||
539 | dst[44] = pt->cr_ipsr & IA64_PSR_UM; | ||
540 | |||
541 | unw_get_ar(info, UNW_AR_RSC, &dst[45]); | ||
542 | /* | ||
543 | * For bsp and bspstore, unw_get_ar() would return the kernel | ||
544 | * addresses, but we need the user-level addresses instead: | ||
545 | */ | ||
546 | dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */ | ||
547 | dst[47] = pt->ar_bspstore; | ||
548 | dst[48] = ar_rnat; | ||
549 | unw_get_ar(info, UNW_AR_CCV, &dst[49]); | ||
550 | unw_get_ar(info, UNW_AR_UNAT, &dst[50]); | ||
551 | unw_get_ar(info, UNW_AR_FPSR, &dst[51]); | ||
552 | dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */ | ||
553 | unw_get_ar(info, UNW_AR_LC, &dst[53]); | ||
554 | unw_get_ar(info, UNW_AR_EC, &dst[54]); | ||
555 | unw_get_ar(info, UNW_AR_CSD, &dst[55]); | ||
556 | unw_get_ar(info, UNW_AR_SSD, &dst[56]); | ||
557 | } | ||
558 | |||
559 | void | ||
560 | do_dump_task_fpu (struct task_struct *task, struct unw_frame_info *info, void *arg) | ||
561 | { | ||
562 | elf_fpreg_t *dst = arg; | ||
563 | int i; | ||
564 | |||
565 | memset(dst, 0, sizeof(elf_fpregset_t)); /* don't leak any "random" bits */ | ||
566 | |||
567 | if (unw_unwind_to_user(info) < 0) | ||
568 | return; | ||
569 | |||
570 | /* f0 is 0.0, f1 is 1.0 */ | ||
571 | |||
572 | for (i = 2; i < 32; ++i) | ||
573 | unw_get_fr(info, i, dst + i); | ||
574 | |||
575 | ia64_flush_fph(task); | ||
576 | if ((task->thread.flags & IA64_THREAD_FPH_VALID) != 0) | ||
577 | memcpy(dst + 32, task->thread.fph, 96*16); | ||
578 | } | ||
579 | |||
580 | void | ||
581 | do_copy_regs (struct unw_frame_info *info, void *arg) | ||
582 | { | ||
583 | do_copy_task_regs(current, info, arg); | ||
584 | } | ||
585 | |||
586 | void | ||
587 | do_dump_fpu (struct unw_frame_info *info, void *arg) | ||
588 | { | ||
589 | do_dump_task_fpu(current, info, arg); | ||
590 | } | ||
591 | |||
592 | int | ||
593 | dump_task_regs(struct task_struct *task, elf_gregset_t *regs) | ||
594 | { | ||
595 | struct unw_frame_info tcore_info; | ||
596 | |||
597 | if (current == task) { | ||
598 | unw_init_running(do_copy_regs, regs); | ||
599 | } else { | ||
600 | memset(&tcore_info, 0, sizeof(tcore_info)); | ||
601 | unw_init_from_blocked_task(&tcore_info, task); | ||
602 | do_copy_task_regs(task, &tcore_info, regs); | ||
603 | } | ||
604 | return 1; | ||
605 | } | ||
606 | |||
607 | void | ||
608 | ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) | ||
609 | { | ||
610 | unw_init_running(do_copy_regs, dst); | ||
611 | } | ||
612 | |||
613 | int | ||
614 | dump_task_fpu (struct task_struct *task, elf_fpregset_t *dst) | ||
615 | { | ||
616 | struct unw_frame_info tcore_info; | ||
617 | |||
618 | if (current == task) { | ||
619 | unw_init_running(do_dump_fpu, dst); | ||
620 | } else { | ||
621 | memset(&tcore_info, 0, sizeof(tcore_info)); | ||
622 | unw_init_from_blocked_task(&tcore_info, task); | ||
623 | do_dump_task_fpu(task, &tcore_info, dst); | ||
624 | } | ||
625 | return 1; | ||
626 | } | ||
627 | |||
628 | int | ||
629 | dump_fpu (struct pt_regs *pt, elf_fpregset_t dst) | ||
630 | { | ||
631 | unw_init_running(do_dump_fpu, dst); | ||
632 | return 1; /* f0-f31 are always valid so we always return 1 */ | ||
633 | } | ||
634 | |||
635 | long | ||
636 | sys_execve (char __user *filename, char __user * __user *argv, char __user * __user *envp, | ||
637 | struct pt_regs *regs) | ||
638 | { | ||
639 | char *fname; | ||
640 | int error; | ||
641 | |||
642 | fname = getname(filename); | ||
643 | error = PTR_ERR(fname); | ||
644 | if (IS_ERR(fname)) | ||
645 | goto out; | ||
646 | error = do_execve(fname, argv, envp, regs); | ||
647 | putname(fname); | ||
648 | out: | ||
649 | return error; | ||
650 | } | ||
651 | |||
652 | pid_t | ||
653 | kernel_thread (int (*fn)(void *), void *arg, unsigned long flags) | ||
654 | { | ||
655 | extern void start_kernel_thread (void); | ||
656 | unsigned long *helper_fptr = (unsigned long *) &start_kernel_thread; | ||
657 | struct { | ||
658 | struct switch_stack sw; | ||
659 | struct pt_regs pt; | ||
660 | } regs; | ||
661 | |||
662 | memset(®s, 0, sizeof(regs)); | ||
663 | regs.pt.cr_iip = helper_fptr[0]; /* set entry point (IP) */ | ||
664 | regs.pt.r1 = helper_fptr[1]; /* set GP */ | ||
665 | regs.pt.r9 = (unsigned long) fn; /* 1st argument */ | ||
666 | regs.pt.r11 = (unsigned long) arg; /* 2nd argument */ | ||
667 | /* Preserve PSR bits, except for bits 32-34 and 37-45, which we can't read. */ | ||
668 | regs.pt.cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN; | ||
669 | regs.pt.cr_ifs = 1UL << 63; /* mark as valid, empty frame */ | ||
670 | regs.sw.ar_fpsr = regs.pt.ar_fpsr = ia64_getreg(_IA64_REG_AR_FPSR); | ||
671 | regs.sw.ar_bspstore = (unsigned long) current + IA64_RBS_OFFSET; | ||
672 | regs.sw.pr = (1 << PRED_KERNEL_STACK); | ||
673 | return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s.pt, 0, NULL, NULL); | ||
674 | } | ||
675 | EXPORT_SYMBOL(kernel_thread); | ||
676 | |||
677 | /* This gets called from kernel_thread() via ia64_invoke_thread_helper(). */ | ||
678 | int | ||
679 | kernel_thread_helper (int (*fn)(void *), void *arg) | ||
680 | { | ||
681 | #ifdef CONFIG_IA32_SUPPORT | ||
682 | if (IS_IA32_PROCESS(ia64_task_regs(current))) { | ||
683 | /* A kernel thread is always a 64-bit process. */ | ||
684 | current->thread.map_base = DEFAULT_MAP_BASE; | ||
685 | current->thread.task_size = DEFAULT_TASK_SIZE; | ||
686 | ia64_set_kr(IA64_KR_IO_BASE, current->thread.old_iob); | ||
687 | ia64_set_kr(IA64_KR_TSSD, current->thread.old_k1); | ||
688 | } | ||
689 | #endif | ||
690 | return (*fn)(arg); | ||
691 | } | ||
692 | |||
693 | /* | ||
694 | * Flush thread state. This is called when a thread does an execve(). | ||
695 | */ | ||
696 | void | ||
697 | flush_thread (void) | ||
698 | { | ||
699 | /* drop floating-point and debug-register state if it exists: */ | ||
700 | current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); | ||
701 | ia64_drop_fpu(current); | ||
702 | if (IS_IA32_PROCESS(ia64_task_regs(current))) | ||
703 | ia32_drop_partial_page_list(current); | ||
704 | } | ||
705 | |||
706 | /* | ||
707 | * Clean up state associated with current thread. This is called when | ||
708 | * the thread calls exit(). | ||
709 | */ | ||
710 | void | ||
711 | exit_thread (void) | ||
712 | { | ||
713 | ia64_drop_fpu(current); | ||
714 | #ifdef CONFIG_PERFMON | ||
715 | /* if needed, stop monitoring and flush state to perfmon context */ | ||
716 | if (current->thread.pfm_context) | ||
717 | pfm_exit_thread(current); | ||
718 | |||
719 | /* free debug register resources */ | ||
720 | if (current->thread.flags & IA64_THREAD_DBG_VALID) | ||
721 | pfm_release_debug_registers(current); | ||
722 | #endif | ||
723 | if (IS_IA32_PROCESS(ia64_task_regs(current))) | ||
724 | ia32_drop_partial_page_list(current); | ||
725 | } | ||
726 | |||
727 | unsigned long | ||
728 | get_wchan (struct task_struct *p) | ||
729 | { | ||
730 | struct unw_frame_info info; | ||
731 | unsigned long ip; | ||
732 | int count = 0; | ||
733 | |||
734 | /* | ||
735 | * Note: p may not be a blocked task (it could be current or | ||
736 | * another process running on some other CPU. Rather than | ||
737 | * trying to determine if p is really blocked, we just assume | ||
738 | * it's blocked and rely on the unwind routines to fail | ||
739 | * gracefully if the process wasn't really blocked after all. | ||
740 | * --davidm 99/12/15 | ||
741 | */ | ||
742 | unw_init_from_blocked_task(&info, p); | ||
743 | do { | ||
744 | if (unw_unwind(&info) < 0) | ||
745 | return 0; | ||
746 | unw_get_ip(&info, &ip); | ||
747 | if (!in_sched_functions(ip)) | ||
748 | return ip; | ||
749 | } while (count++ < 16); | ||
750 | return 0; | ||
751 | } | ||
752 | |||
753 | void | ||
754 | cpu_halt (void) | ||
755 | { | ||
756 | pal_power_mgmt_info_u_t power_info[8]; | ||
757 | unsigned long min_power; | ||
758 | int i, min_power_state; | ||
759 | |||
760 | if (ia64_pal_halt_info(power_info) != 0) | ||
761 | return; | ||
762 | |||
763 | min_power_state = 0; | ||
764 | min_power = power_info[0].pal_power_mgmt_info_s.power_consumption; | ||
765 | for (i = 1; i < 8; ++i) | ||
766 | if (power_info[i].pal_power_mgmt_info_s.im | ||
767 | && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) { | ||
768 | min_power = power_info[i].pal_power_mgmt_info_s.power_consumption; | ||
769 | min_power_state = i; | ||
770 | } | ||
771 | |||
772 | while (1) | ||
773 | ia64_pal_halt(min_power_state); | ||
774 | } | ||
775 | |||
776 | void | ||
777 | machine_restart (char *restart_cmd) | ||
778 | { | ||
779 | (*efi.reset_system)(EFI_RESET_WARM, 0, 0, NULL); | ||
780 | } | ||
781 | |||
782 | EXPORT_SYMBOL(machine_restart); | ||
783 | |||
784 | void | ||
785 | machine_halt (void) | ||
786 | { | ||
787 | cpu_halt(); | ||
788 | } | ||
789 | |||
790 | EXPORT_SYMBOL(machine_halt); | ||
791 | |||
792 | void | ||
793 | machine_power_off (void) | ||
794 | { | ||
795 | if (pm_power_off) | ||
796 | pm_power_off(); | ||
797 | machine_halt(); | ||
798 | } | ||
799 | |||
800 | EXPORT_SYMBOL(machine_power_off); | ||
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c new file mode 100644 index 000000000000..55789fcd7210 --- /dev/null +++ b/arch/ia64/kernel/ptrace.c | |||
@@ -0,0 +1,1627 @@ | |||
1 | /* | ||
2 | * Kernel support for the ptrace() and syscall tracing interfaces. | ||
3 | * | ||
4 | * Copyright (C) 1999-2005 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * | ||
7 | * Derived from the x86 and Alpha versions. | ||
8 | */ | ||
9 | #include <linux/config.h> | ||
10 | #include <linux/kernel.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/slab.h> | ||
13 | #include <linux/mm.h> | ||
14 | #include <linux/errno.h> | ||
15 | #include <linux/ptrace.h> | ||
16 | #include <linux/smp_lock.h> | ||
17 | #include <linux/user.h> | ||
18 | #include <linux/security.h> | ||
19 | #include <linux/audit.h> | ||
20 | |||
21 | #include <asm/pgtable.h> | ||
22 | #include <asm/processor.h> | ||
23 | #include <asm/ptrace_offsets.h> | ||
24 | #include <asm/rse.h> | ||
25 | #include <asm/system.h> | ||
26 | #include <asm/uaccess.h> | ||
27 | #include <asm/unwind.h> | ||
28 | #ifdef CONFIG_PERFMON | ||
29 | #include <asm/perfmon.h> | ||
30 | #endif | ||
31 | |||
32 | #include "entry.h" | ||
33 | |||
34 | /* | ||
35 | * Bits in the PSR that we allow ptrace() to change: | ||
36 | * be, up, ac, mfl, mfh (the user mask; five bits total) | ||
37 | * db (debug breakpoint fault; one bit) | ||
38 | * id (instruction debug fault disable; one bit) | ||
39 | * dd (data debug fault disable; one bit) | ||
40 | * ri (restart instruction; two bits) | ||
41 | * is (instruction set; one bit) | ||
42 | */ | ||
43 | #define IPSR_MASK (IA64_PSR_UM | IA64_PSR_DB | IA64_PSR_IS \ | ||
44 | | IA64_PSR_ID | IA64_PSR_DD | IA64_PSR_RI) | ||
45 | |||
46 | #define MASK(nbits) ((1UL << (nbits)) - 1) /* mask with NBITS bits set */ | ||
47 | #define PFM_MASK MASK(38) | ||
48 | |||
49 | #define PTRACE_DEBUG 0 | ||
50 | |||
51 | #if PTRACE_DEBUG | ||
52 | # define dprintk(format...) printk(format) | ||
53 | # define inline | ||
54 | #else | ||
55 | # define dprintk(format...) | ||
56 | #endif | ||
57 | |||
58 | /* Return TRUE if PT was created due to kernel-entry via a system-call. */ | ||
59 | |||
60 | static inline int | ||
61 | in_syscall (struct pt_regs *pt) | ||
62 | { | ||
63 | return (long) pt->cr_ifs >= 0; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * Collect the NaT bits for r1-r31 from scratch_unat and return a NaT | ||
68 | * bitset where bit i is set iff the NaT bit of register i is set. | ||
69 | */ | ||
70 | unsigned long | ||
71 | ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat) | ||
72 | { | ||
73 | # define GET_BITS(first, last, unat) \ | ||
74 | ({ \ | ||
75 | unsigned long bit = ia64_unat_pos(&pt->r##first); \ | ||
76 | unsigned long nbits = (last - first + 1); \ | ||
77 | unsigned long mask = MASK(nbits) << first; \ | ||
78 | unsigned long dist; \ | ||
79 | if (bit < first) \ | ||
80 | dist = 64 + bit - first; \ | ||
81 | else \ | ||
82 | dist = bit - first; \ | ||
83 | ia64_rotr(unat, dist) & mask; \ | ||
84 | }) | ||
85 | unsigned long val; | ||
86 | |||
87 | /* | ||
88 | * Registers that are stored consecutively in struct pt_regs | ||
89 | * can be handled in parallel. If the register order in | ||
90 | * struct_pt_regs changes, this code MUST be updated. | ||
91 | */ | ||
92 | val = GET_BITS( 1, 1, scratch_unat); | ||
93 | val |= GET_BITS( 2, 3, scratch_unat); | ||
94 | val |= GET_BITS(12, 13, scratch_unat); | ||
95 | val |= GET_BITS(14, 14, scratch_unat); | ||
96 | val |= GET_BITS(15, 15, scratch_unat); | ||
97 | val |= GET_BITS( 8, 11, scratch_unat); | ||
98 | val |= GET_BITS(16, 31, scratch_unat); | ||
99 | return val; | ||
100 | |||
101 | # undef GET_BITS | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Set the NaT bits for the scratch registers according to NAT and | ||
106 | * return the resulting unat (assuming the scratch registers are | ||
107 | * stored in PT). | ||
108 | */ | ||
109 | unsigned long | ||
110 | ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat) | ||
111 | { | ||
112 | # define PUT_BITS(first, last, nat) \ | ||
113 | ({ \ | ||
114 | unsigned long bit = ia64_unat_pos(&pt->r##first); \ | ||
115 | unsigned long nbits = (last - first + 1); \ | ||
116 | unsigned long mask = MASK(nbits) << first; \ | ||
117 | long dist; \ | ||
118 | if (bit < first) \ | ||
119 | dist = 64 + bit - first; \ | ||
120 | else \ | ||
121 | dist = bit - first; \ | ||
122 | ia64_rotl(nat & mask, dist); \ | ||
123 | }) | ||
124 | unsigned long scratch_unat; | ||
125 | |||
126 | /* | ||
127 | * Registers that are stored consecutively in struct pt_regs | ||
128 | * can be handled in parallel. If the register order in | ||
129 | * struct_pt_regs changes, this code MUST be updated. | ||
130 | */ | ||
131 | scratch_unat = PUT_BITS( 1, 1, nat); | ||
132 | scratch_unat |= PUT_BITS( 2, 3, nat); | ||
133 | scratch_unat |= PUT_BITS(12, 13, nat); | ||
134 | scratch_unat |= PUT_BITS(14, 14, nat); | ||
135 | scratch_unat |= PUT_BITS(15, 15, nat); | ||
136 | scratch_unat |= PUT_BITS( 8, 11, nat); | ||
137 | scratch_unat |= PUT_BITS(16, 31, nat); | ||
138 | |||
139 | return scratch_unat; | ||
140 | |||
141 | # undef PUT_BITS | ||
142 | } | ||
143 | |||
144 | #define IA64_MLX_TEMPLATE 0x2 | ||
145 | #define IA64_MOVL_OPCODE 6 | ||
146 | |||
147 | void | ||
148 | ia64_increment_ip (struct pt_regs *regs) | ||
149 | { | ||
150 | unsigned long w0, ri = ia64_psr(regs)->ri + 1; | ||
151 | |||
152 | if (ri > 2) { | ||
153 | ri = 0; | ||
154 | regs->cr_iip += 16; | ||
155 | } else if (ri == 2) { | ||
156 | get_user(w0, (char __user *) regs->cr_iip + 0); | ||
157 | if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { | ||
158 | /* | ||
159 | * rfi'ing to slot 2 of an MLX bundle causes | ||
160 | * an illegal operation fault. We don't want | ||
161 | * that to happen... | ||
162 | */ | ||
163 | ri = 0; | ||
164 | regs->cr_iip += 16; | ||
165 | } | ||
166 | } | ||
167 | ia64_psr(regs)->ri = ri; | ||
168 | } | ||
169 | |||
170 | void | ||
171 | ia64_decrement_ip (struct pt_regs *regs) | ||
172 | { | ||
173 | unsigned long w0, ri = ia64_psr(regs)->ri - 1; | ||
174 | |||
175 | if (ia64_psr(regs)->ri == 0) { | ||
176 | regs->cr_iip -= 16; | ||
177 | ri = 2; | ||
178 | get_user(w0, (char __user *) regs->cr_iip + 0); | ||
179 | if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { | ||
180 | /* | ||
181 | * rfi'ing to slot 2 of an MLX bundle causes | ||
182 | * an illegal operation fault. We don't want | ||
183 | * that to happen... | ||
184 | */ | ||
185 | ri = 1; | ||
186 | } | ||
187 | } | ||
188 | ia64_psr(regs)->ri = ri; | ||
189 | } | ||
190 | |||
191 | /* | ||
192 | * This routine is used to read an rnat bits that are stored on the | ||
193 | * kernel backing store. Since, in general, the alignment of the user | ||
194 | * and kernel are different, this is not completely trivial. In | ||
195 | * essence, we need to construct the user RNAT based on up to two | ||
196 | * kernel RNAT values and/or the RNAT value saved in the child's | ||
197 | * pt_regs. | ||
198 | * | ||
199 | * user rbs | ||
200 | * | ||
201 | * +--------+ <-- lowest address | ||
202 | * | slot62 | | ||
203 | * +--------+ | ||
204 | * | rnat | 0x....1f8 | ||
205 | * +--------+ | ||
206 | * | slot00 | \ | ||
207 | * +--------+ | | ||
208 | * | slot01 | > child_regs->ar_rnat | ||
209 | * +--------+ | | ||
210 | * | slot02 | / kernel rbs | ||
211 | * +--------+ +--------+ | ||
212 | * <- child_regs->ar_bspstore | slot61 | <-- krbs | ||
213 | * +- - - - + +--------+ | ||
214 | * | slot62 | | ||
215 | * +- - - - + +--------+ | ||
216 | * | rnat | | ||
217 | * +- - - - + +--------+ | ||
218 | * vrnat | slot00 | | ||
219 | * +- - - - + +--------+ | ||
220 | * = = | ||
221 | * +--------+ | ||
222 | * | slot00 | \ | ||
223 | * +--------+ | | ||
224 | * | slot01 | > child_stack->ar_rnat | ||
225 | * +--------+ | | ||
226 | * | slot02 | / | ||
227 | * +--------+ | ||
228 | * <--- child_stack->ar_bspstore | ||
229 | * | ||
230 | * The way to think of this code is as follows: bit 0 in the user rnat | ||
231 | * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat | ||
232 | * value. The kernel rnat value holding this bit is stored in | ||
233 | * variable rnat0. rnat1 is loaded with the kernel rnat value that | ||
234 | * form the upper bits of the user rnat value. | ||
235 | * | ||
236 | * Boundary cases: | ||
237 | * | ||
238 | * o when reading the rnat "below" the first rnat slot on the kernel | ||
239 | * backing store, rnat0/rnat1 are set to 0 and the low order bits are | ||
240 | * merged in from pt->ar_rnat. | ||
241 | * | ||
242 | * o when reading the rnat "above" the last rnat slot on the kernel | ||
243 | * backing store, rnat0/rnat1 gets its value from sw->ar_rnat. | ||
244 | */ | ||
245 | static unsigned long | ||
246 | get_rnat (struct task_struct *task, struct switch_stack *sw, | ||
247 | unsigned long *krbs, unsigned long *urnat_addr, | ||
248 | unsigned long *urbs_end) | ||
249 | { | ||
250 | unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr; | ||
251 | unsigned long umask = 0, mask, m; | ||
252 | unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; | ||
253 | long num_regs, nbits; | ||
254 | struct pt_regs *pt; | ||
255 | |||
256 | pt = ia64_task_regs(task); | ||
257 | kbsp = (unsigned long *) sw->ar_bspstore; | ||
258 | ubspstore = (unsigned long *) pt->ar_bspstore; | ||
259 | |||
260 | if (urbs_end < urnat_addr) | ||
261 | nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_end); | ||
262 | else | ||
263 | nbits = 63; | ||
264 | mask = MASK(nbits); | ||
265 | /* | ||
266 | * First, figure out which bit number slot 0 in user-land maps | ||
267 | * to in the kernel rnat. Do this by figuring out how many | ||
268 | * register slots we're beyond the user's backingstore and | ||
269 | * then computing the equivalent address in kernel space. | ||
270 | */ | ||
271 | num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); | ||
272 | slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); | ||
273 | shift = ia64_rse_slot_num(slot0_kaddr); | ||
274 | rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); | ||
275 | rnat0_kaddr = rnat1_kaddr - 64; | ||
276 | |||
277 | if (ubspstore + 63 > urnat_addr) { | ||
278 | /* some bits need to be merged in from pt->ar_rnat */ | ||
279 | umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; | ||
280 | urnat = (pt->ar_rnat & umask); | ||
281 | mask &= ~umask; | ||
282 | if (!mask) | ||
283 | return urnat; | ||
284 | } | ||
285 | |||
286 | m = mask << shift; | ||
287 | if (rnat0_kaddr >= kbsp) | ||
288 | rnat0 = sw->ar_rnat; | ||
289 | else if (rnat0_kaddr > krbs) | ||
290 | rnat0 = *rnat0_kaddr; | ||
291 | urnat |= (rnat0 & m) >> shift; | ||
292 | |||
293 | m = mask >> (63 - shift); | ||
294 | if (rnat1_kaddr >= kbsp) | ||
295 | rnat1 = sw->ar_rnat; | ||
296 | else if (rnat1_kaddr > krbs) | ||
297 | rnat1 = *rnat1_kaddr; | ||
298 | urnat |= (rnat1 & m) << (63 - shift); | ||
299 | return urnat; | ||
300 | } | ||
301 | |||
302 | /* | ||
303 | * The reverse of get_rnat. | ||
304 | */ | ||
305 | static void | ||
306 | put_rnat (struct task_struct *task, struct switch_stack *sw, | ||
307 | unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat, | ||
308 | unsigned long *urbs_end) | ||
309 | { | ||
310 | unsigned long rnat0 = 0, rnat1 = 0, *slot0_kaddr, umask = 0, mask, m; | ||
311 | unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; | ||
312 | long num_regs, nbits; | ||
313 | struct pt_regs *pt; | ||
314 | unsigned long cfm, *urbs_kargs; | ||
315 | |||
316 | pt = ia64_task_regs(task); | ||
317 | kbsp = (unsigned long *) sw->ar_bspstore; | ||
318 | ubspstore = (unsigned long *) pt->ar_bspstore; | ||
319 | |||
320 | urbs_kargs = urbs_end; | ||
321 | if (in_syscall(pt)) { | ||
322 | /* | ||
323 | * If entered via syscall, don't allow user to set rnat bits | ||
324 | * for syscall args. | ||
325 | */ | ||
326 | cfm = pt->cr_ifs; | ||
327 | urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f)); | ||
328 | } | ||
329 | |||
330 | if (urbs_kargs >= urnat_addr) | ||
331 | nbits = 63; | ||
332 | else { | ||
333 | if ((urnat_addr - 63) >= urbs_kargs) | ||
334 | return; | ||
335 | nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_kargs); | ||
336 | } | ||
337 | mask = MASK(nbits); | ||
338 | |||
339 | /* | ||
340 | * First, figure out which bit number slot 0 in user-land maps | ||
341 | * to in the kernel rnat. Do this by figuring out how many | ||
342 | * register slots we're beyond the user's backingstore and | ||
343 | * then computing the equivalent address in kernel space. | ||
344 | */ | ||
345 | num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); | ||
346 | slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); | ||
347 | shift = ia64_rse_slot_num(slot0_kaddr); | ||
348 | rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); | ||
349 | rnat0_kaddr = rnat1_kaddr - 64; | ||
350 | |||
351 | if (ubspstore + 63 > urnat_addr) { | ||
352 | /* some bits need to be place in pt->ar_rnat: */ | ||
353 | umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; | ||
354 | pt->ar_rnat = (pt->ar_rnat & ~umask) | (urnat & umask); | ||
355 | mask &= ~umask; | ||
356 | if (!mask) | ||
357 | return; | ||
358 | } | ||
359 | /* | ||
360 | * Note: Section 11.1 of the EAS guarantees that bit 63 of an | ||
361 | * rnat slot is ignored. so we don't have to clear it here. | ||
362 | */ | ||
363 | rnat0 = (urnat << shift); | ||
364 | m = mask << shift; | ||
365 | if (rnat0_kaddr >= kbsp) | ||
366 | sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat0 & m); | ||
367 | else if (rnat0_kaddr > krbs) | ||
368 | *rnat0_kaddr = ((*rnat0_kaddr & ~m) | (rnat0 & m)); | ||
369 | |||
370 | rnat1 = (urnat >> (63 - shift)); | ||
371 | m = mask >> (63 - shift); | ||
372 | if (rnat1_kaddr >= kbsp) | ||
373 | sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat1 & m); | ||
374 | else if (rnat1_kaddr > krbs) | ||
375 | *rnat1_kaddr = ((*rnat1_kaddr & ~m) | (rnat1 & m)); | ||
376 | } | ||
377 | |||
378 | static inline int | ||
379 | on_kernel_rbs (unsigned long addr, unsigned long bspstore, | ||
380 | unsigned long urbs_end) | ||
381 | { | ||
382 | unsigned long *rnat_addr = ia64_rse_rnat_addr((unsigned long *) | ||
383 | urbs_end); | ||
384 | return (addr >= bspstore && addr <= (unsigned long) rnat_addr); | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * Read a word from the user-level backing store of task CHILD. ADDR | ||
389 | * is the user-level address to read the word from, VAL a pointer to | ||
390 | * the return value, and USER_BSP gives the end of the user-level | ||
391 | * backing store (i.e., it's the address that would be in ar.bsp after | ||
392 | * the user executed a "cover" instruction). | ||
393 | * | ||
394 | * This routine takes care of accessing the kernel register backing | ||
395 | * store for those registers that got spilled there. It also takes | ||
396 | * care of calculating the appropriate RNaT collection words. | ||
397 | */ | ||
398 | long | ||
399 | ia64_peek (struct task_struct *child, struct switch_stack *child_stack, | ||
400 | unsigned long user_rbs_end, unsigned long addr, long *val) | ||
401 | { | ||
402 | unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr; | ||
403 | struct pt_regs *child_regs; | ||
404 | size_t copied; | ||
405 | long ret; | ||
406 | |||
407 | urbs_end = (long *) user_rbs_end; | ||
408 | laddr = (unsigned long *) addr; | ||
409 | child_regs = ia64_task_regs(child); | ||
410 | bspstore = (unsigned long *) child_regs->ar_bspstore; | ||
411 | krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; | ||
412 | if (on_kernel_rbs(addr, (unsigned long) bspstore, | ||
413 | (unsigned long) urbs_end)) | ||
414 | { | ||
415 | /* | ||
416 | * Attempt to read the RBS in an area that's actually | ||
417 | * on the kernel RBS => read the corresponding bits in | ||
418 | * the kernel RBS. | ||
419 | */ | ||
420 | rnat_addr = ia64_rse_rnat_addr(laddr); | ||
421 | ret = get_rnat(child, child_stack, krbs, rnat_addr, urbs_end); | ||
422 | |||
423 | if (laddr == rnat_addr) { | ||
424 | /* return NaT collection word itself */ | ||
425 | *val = ret; | ||
426 | return 0; | ||
427 | } | ||
428 | |||
429 | if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) { | ||
430 | /* | ||
431 | * It is implementation dependent whether the | ||
432 | * data portion of a NaT value gets saved on a | ||
433 | * st8.spill or RSE spill (e.g., see EAS 2.6, | ||
434 | * 4.4.4.6 Register Spill and Fill). To get | ||
435 | * consistent behavior across all possible | ||
436 | * IA-64 implementations, we return zero in | ||
437 | * this case. | ||
438 | */ | ||
439 | *val = 0; | ||
440 | return 0; | ||
441 | } | ||
442 | |||
443 | if (laddr < urbs_end) { | ||
444 | /* | ||
445 | * The desired word is on the kernel RBS and | ||
446 | * is not a NaT. | ||
447 | */ | ||
448 | regnum = ia64_rse_num_regs(bspstore, laddr); | ||
449 | *val = *ia64_rse_skip_regs(krbs, regnum); | ||
450 | return 0; | ||
451 | } | ||
452 | } | ||
453 | copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); | ||
454 | if (copied != sizeof(ret)) | ||
455 | return -EIO; | ||
456 | *val = ret; | ||
457 | return 0; | ||
458 | } | ||
459 | |||
460 | long | ||
461 | ia64_poke (struct task_struct *child, struct switch_stack *child_stack, | ||
462 | unsigned long user_rbs_end, unsigned long addr, long val) | ||
463 | { | ||
464 | unsigned long *bspstore, *krbs, regnum, *laddr; | ||
465 | unsigned long *urbs_end = (long *) user_rbs_end; | ||
466 | struct pt_regs *child_regs; | ||
467 | |||
468 | laddr = (unsigned long *) addr; | ||
469 | child_regs = ia64_task_regs(child); | ||
470 | bspstore = (unsigned long *) child_regs->ar_bspstore; | ||
471 | krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; | ||
472 | if (on_kernel_rbs(addr, (unsigned long) bspstore, | ||
473 | (unsigned long) urbs_end)) | ||
474 | { | ||
475 | /* | ||
476 | * Attempt to write the RBS in an area that's actually | ||
477 | * on the kernel RBS => write the corresponding bits | ||
478 | * in the kernel RBS. | ||
479 | */ | ||
480 | if (ia64_rse_is_rnat_slot(laddr)) | ||
481 | put_rnat(child, child_stack, krbs, laddr, val, | ||
482 | urbs_end); | ||
483 | else { | ||
484 | if (laddr < urbs_end) { | ||
485 | regnum = ia64_rse_num_regs(bspstore, laddr); | ||
486 | *ia64_rse_skip_regs(krbs, regnum) = val; | ||
487 | } | ||
488 | } | ||
489 | } else if (access_process_vm(child, addr, &val, sizeof(val), 1) | ||
490 | != sizeof(val)) | ||
491 | return -EIO; | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * Calculate the address of the end of the user-level register backing | ||
497 | * store. This is the address that would have been stored in ar.bsp | ||
498 | * if the user had executed a "cover" instruction right before | ||
499 | * entering the kernel. If CFMP is not NULL, it is used to return the | ||
500 | * "current frame mask" that was active at the time the kernel was | ||
501 | * entered. | ||
502 | */ | ||
503 | unsigned long | ||
504 | ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, | ||
505 | unsigned long *cfmp) | ||
506 | { | ||
507 | unsigned long *krbs, *bspstore, cfm = pt->cr_ifs; | ||
508 | long ndirty; | ||
509 | |||
510 | krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; | ||
511 | bspstore = (unsigned long *) pt->ar_bspstore; | ||
512 | ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); | ||
513 | |||
514 | if (in_syscall(pt)) | ||
515 | ndirty += (cfm & 0x7f); | ||
516 | else | ||
517 | cfm &= ~(1UL << 63); /* clear valid bit */ | ||
518 | |||
519 | if (cfmp) | ||
520 | *cfmp = cfm; | ||
521 | return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); | ||
522 | } | ||
523 | |||
524 | /* | ||
525 | * Synchronize (i.e, write) the RSE backing store living in kernel | ||
526 | * space to the VM of the CHILD task. SW and PT are the pointers to | ||
527 | * the switch_stack and pt_regs structures, respectively. | ||
528 | * USER_RBS_END is the user-level address at which the backing store | ||
529 | * ends. | ||
530 | */ | ||
531 | long | ||
532 | ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, | ||
533 | unsigned long user_rbs_start, unsigned long user_rbs_end) | ||
534 | { | ||
535 | unsigned long addr, val; | ||
536 | long ret; | ||
537 | |||
538 | /* now copy word for word from kernel rbs to user rbs: */ | ||
539 | for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { | ||
540 | ret = ia64_peek(child, sw, user_rbs_end, addr, &val); | ||
541 | if (ret < 0) | ||
542 | return ret; | ||
543 | if (access_process_vm(child, addr, &val, sizeof(val), 1) | ||
544 | != sizeof(val)) | ||
545 | return -EIO; | ||
546 | } | ||
547 | return 0; | ||
548 | } | ||
549 | |||
550 | static inline int | ||
551 | thread_matches (struct task_struct *thread, unsigned long addr) | ||
552 | { | ||
553 | unsigned long thread_rbs_end; | ||
554 | struct pt_regs *thread_regs; | ||
555 | |||
556 | if (ptrace_check_attach(thread, 0) < 0) | ||
557 | /* | ||
558 | * If the thread is not in an attachable state, we'll | ||
559 | * ignore it. The net effect is that if ADDR happens | ||
560 | * to overlap with the portion of the thread's | ||
561 | * register backing store that is currently residing | ||
562 | * on the thread's kernel stack, then ptrace() may end | ||
563 | * up accessing a stale value. But if the thread | ||
564 | * isn't stopped, that's a problem anyhow, so we're | ||
565 | * doing as well as we can... | ||
566 | */ | ||
567 | return 0; | ||
568 | |||
569 | thread_regs = ia64_task_regs(thread); | ||
570 | thread_rbs_end = ia64_get_user_rbs_end(thread, thread_regs, NULL); | ||
571 | if (!on_kernel_rbs(addr, thread_regs->ar_bspstore, thread_rbs_end)) | ||
572 | return 0; | ||
573 | |||
574 | return 1; /* looks like we've got a winner */ | ||
575 | } | ||
576 | |||
577 | /* | ||
578 | * GDB apparently wants to be able to read the register-backing store | ||
579 | * of any thread when attached to a given process. If we are peeking | ||
580 | * or poking an address that happens to reside in the kernel-backing | ||
581 | * store of another thread, we need to attach to that thread, because | ||
582 | * otherwise we end up accessing stale data. | ||
583 | * | ||
584 | * task_list_lock must be read-locked before calling this routine! | ||
585 | */ | ||
586 | static struct task_struct * | ||
587 | find_thread_for_addr (struct task_struct *child, unsigned long addr) | ||
588 | { | ||
589 | struct task_struct *g, *p; | ||
590 | struct mm_struct *mm; | ||
591 | int mm_users; | ||
592 | |||
593 | if (!(mm = get_task_mm(child))) | ||
594 | return child; | ||
595 | |||
596 | /* -1 because of our get_task_mm(): */ | ||
597 | mm_users = atomic_read(&mm->mm_users) - 1; | ||
598 | if (mm_users <= 1) | ||
599 | goto out; /* not multi-threaded */ | ||
600 | |||
601 | /* | ||
602 | * First, traverse the child's thread-list. Good for scalability with | ||
603 | * NPTL-threads. | ||
604 | */ | ||
605 | p = child; | ||
606 | do { | ||
607 | if (thread_matches(p, addr)) { | ||
608 | child = p; | ||
609 | goto out; | ||
610 | } | ||
611 | if (mm_users-- <= 1) | ||
612 | goto out; | ||
613 | } while ((p = next_thread(p)) != child); | ||
614 | |||
615 | do_each_thread(g, p) { | ||
616 | if (child->mm != mm) | ||
617 | continue; | ||
618 | |||
619 | if (thread_matches(p, addr)) { | ||
620 | child = p; | ||
621 | goto out; | ||
622 | } | ||
623 | } while_each_thread(g, p); | ||
624 | out: | ||
625 | mmput(mm); | ||
626 | return child; | ||
627 | } | ||
628 | |||
629 | /* | ||
630 | * Write f32-f127 back to task->thread.fph if it has been modified. | ||
631 | */ | ||
632 | inline void | ||
633 | ia64_flush_fph (struct task_struct *task) | ||
634 | { | ||
635 | struct ia64_psr *psr = ia64_psr(ia64_task_regs(task)); | ||
636 | |||
637 | if (ia64_is_local_fpu_owner(task) && psr->mfh) { | ||
638 | psr->mfh = 0; | ||
639 | task->thread.flags |= IA64_THREAD_FPH_VALID; | ||
640 | ia64_save_fpu(&task->thread.fph[0]); | ||
641 | } | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * Sync the fph state of the task so that it can be manipulated | ||
646 | * through thread.fph. If necessary, f32-f127 are written back to | ||
647 | * thread.fph or, if the fph state hasn't been used before, thread.fph | ||
648 | * is cleared to zeroes. Also, access to f32-f127 is disabled to | ||
649 | * ensure that the task picks up the state from thread.fph when it | ||
650 | * executes again. | ||
651 | */ | ||
652 | void | ||
653 | ia64_sync_fph (struct task_struct *task) | ||
654 | { | ||
655 | struct ia64_psr *psr = ia64_psr(ia64_task_regs(task)); | ||
656 | |||
657 | ia64_flush_fph(task); | ||
658 | if (!(task->thread.flags & IA64_THREAD_FPH_VALID)) { | ||
659 | task->thread.flags |= IA64_THREAD_FPH_VALID; | ||
660 | memset(&task->thread.fph, 0, sizeof(task->thread.fph)); | ||
661 | } | ||
662 | ia64_drop_fpu(task); | ||
663 | psr->dfh = 1; | ||
664 | } | ||
665 | |||
666 | static int | ||
667 | access_fr (struct unw_frame_info *info, int regnum, int hi, | ||
668 | unsigned long *data, int write_access) | ||
669 | { | ||
670 | struct ia64_fpreg fpval; | ||
671 | int ret; | ||
672 | |||
673 | ret = unw_get_fr(info, regnum, &fpval); | ||
674 | if (ret < 0) | ||
675 | return ret; | ||
676 | |||
677 | if (write_access) { | ||
678 | fpval.u.bits[hi] = *data; | ||
679 | ret = unw_set_fr(info, regnum, fpval); | ||
680 | } else | ||
681 | *data = fpval.u.bits[hi]; | ||
682 | return ret; | ||
683 | } | ||
684 | |||
685 | /* | ||
686 | * Change the machine-state of CHILD such that it will return via the normal | ||
687 | * kernel exit-path, rather than the syscall-exit path. | ||
688 | */ | ||
689 | static void | ||
690 | convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, | ||
691 | unsigned long cfm) | ||
692 | { | ||
693 | struct unw_frame_info info, prev_info; | ||
694 | unsigned long ip, pr; | ||
695 | |||
696 | unw_init_from_blocked_task(&info, child); | ||
697 | while (1) { | ||
698 | prev_info = info; | ||
699 | if (unw_unwind(&info) < 0) | ||
700 | return; | ||
701 | if (unw_get_rp(&info, &ip) < 0) | ||
702 | return; | ||
703 | if (ip < FIXADDR_USER_END) | ||
704 | break; | ||
705 | } | ||
706 | |||
707 | unw_get_pr(&prev_info, &pr); | ||
708 | pr &= ~(1UL << PRED_SYSCALL); | ||
709 | pr |= (1UL << PRED_NON_SYSCALL); | ||
710 | unw_set_pr(&prev_info, pr); | ||
711 | |||
712 | pt->cr_ifs = (1UL << 63) | cfm; | ||
713 | } | ||
714 | |||
715 | static int | ||
716 | access_nat_bits (struct task_struct *child, struct pt_regs *pt, | ||
717 | struct unw_frame_info *info, | ||
718 | unsigned long *data, int write_access) | ||
719 | { | ||
720 | unsigned long regnum, nat_bits, scratch_unat, dummy = 0; | ||
721 | char nat = 0; | ||
722 | |||
723 | if (write_access) { | ||
724 | nat_bits = *data; | ||
725 | scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits); | ||
726 | if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) { | ||
727 | dprintk("ptrace: failed to set ar.unat\n"); | ||
728 | return -1; | ||
729 | } | ||
730 | for (regnum = 4; regnum <= 7; ++regnum) { | ||
731 | unw_get_gr(info, regnum, &dummy, &nat); | ||
732 | unw_set_gr(info, regnum, dummy, | ||
733 | (nat_bits >> regnum) & 1); | ||
734 | } | ||
735 | } else { | ||
736 | if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) { | ||
737 | dprintk("ptrace: failed to read ar.unat\n"); | ||
738 | return -1; | ||
739 | } | ||
740 | nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat); | ||
741 | for (regnum = 4; regnum <= 7; ++regnum) { | ||
742 | unw_get_gr(info, regnum, &dummy, &nat); | ||
743 | nat_bits |= (nat != 0) << regnum; | ||
744 | } | ||
745 | *data = nat_bits; | ||
746 | } | ||
747 | return 0; | ||
748 | } | ||
749 | |||
750 | static int | ||
751 | access_uarea (struct task_struct *child, unsigned long addr, | ||
752 | unsigned long *data, int write_access) | ||
753 | { | ||
754 | unsigned long *ptr, regnum, urbs_end, rnat_addr, cfm; | ||
755 | struct switch_stack *sw; | ||
756 | struct pt_regs *pt; | ||
757 | # define pt_reg_addr(pt, reg) ((void *) \ | ||
758 | ((unsigned long) (pt) \ | ||
759 | + offsetof(struct pt_regs, reg))) | ||
760 | |||
761 | |||
762 | pt = ia64_task_regs(child); | ||
763 | sw = (struct switch_stack *) (child->thread.ksp + 16); | ||
764 | |||
765 | if ((addr & 0x7) != 0) { | ||
766 | dprintk("ptrace: unaligned register address 0x%lx\n", addr); | ||
767 | return -1; | ||
768 | } | ||
769 | |||
770 | if (addr < PT_F127 + 16) { | ||
771 | /* accessing fph */ | ||
772 | if (write_access) | ||
773 | ia64_sync_fph(child); | ||
774 | else | ||
775 | ia64_flush_fph(child); | ||
776 | ptr = (unsigned long *) | ||
777 | ((unsigned long) &child->thread.fph + addr); | ||
778 | } else if ((addr >= PT_F10) && (addr < PT_F11 + 16)) { | ||
779 | /* scratch registers untouched by kernel (saved in pt_regs) */ | ||
780 | ptr = pt_reg_addr(pt, f10) + (addr - PT_F10); | ||
781 | } else if (addr >= PT_F12 && addr < PT_F15 + 16) { | ||
782 | /* | ||
783 | * Scratch registers untouched by kernel (saved in | ||
784 | * switch_stack). | ||
785 | */ | ||
786 | ptr = (unsigned long *) ((long) sw | ||
787 | + (addr - PT_NAT_BITS - 32)); | ||
788 | } else if (addr < PT_AR_LC + 8) { | ||
789 | /* preserved state: */ | ||
790 | struct unw_frame_info info; | ||
791 | char nat = 0; | ||
792 | int ret; | ||
793 | |||
794 | unw_init_from_blocked_task(&info, child); | ||
795 | if (unw_unwind_to_user(&info) < 0) | ||
796 | return -1; | ||
797 | |||
798 | switch (addr) { | ||
799 | case PT_NAT_BITS: | ||
800 | return access_nat_bits(child, pt, &info, | ||
801 | data, write_access); | ||
802 | |||
803 | case PT_R4: case PT_R5: case PT_R6: case PT_R7: | ||
804 | if (write_access) { | ||
805 | /* read NaT bit first: */ | ||
806 | unsigned long dummy; | ||
807 | |||
808 | ret = unw_get_gr(&info, (addr - PT_R4)/8 + 4, | ||
809 | &dummy, &nat); | ||
810 | if (ret < 0) | ||
811 | return ret; | ||
812 | } | ||
813 | return unw_access_gr(&info, (addr - PT_R4)/8 + 4, data, | ||
814 | &nat, write_access); | ||
815 | |||
816 | case PT_B1: case PT_B2: case PT_B3: | ||
817 | case PT_B4: case PT_B5: | ||
818 | return unw_access_br(&info, (addr - PT_B1)/8 + 1, data, | ||
819 | write_access); | ||
820 | |||
821 | case PT_AR_EC: | ||
822 | return unw_access_ar(&info, UNW_AR_EC, data, | ||
823 | write_access); | ||
824 | |||
825 | case PT_AR_LC: | ||
826 | return unw_access_ar(&info, UNW_AR_LC, data, | ||
827 | write_access); | ||
828 | |||
829 | default: | ||
830 | if (addr >= PT_F2 && addr < PT_F5 + 16) | ||
831 | return access_fr(&info, (addr - PT_F2)/16 + 2, | ||
832 | (addr & 8) != 0, data, | ||
833 | write_access); | ||
834 | else if (addr >= PT_F16 && addr < PT_F31 + 16) | ||
835 | return access_fr(&info, | ||
836 | (addr - PT_F16)/16 + 16, | ||
837 | (addr & 8) != 0, | ||
838 | data, write_access); | ||
839 | else { | ||
840 | dprintk("ptrace: rejecting access to register " | ||
841 | "address 0x%lx\n", addr); | ||
842 | return -1; | ||
843 | } | ||
844 | } | ||
845 | } else if (addr < PT_F9+16) { | ||
846 | /* scratch state */ | ||
847 | switch (addr) { | ||
848 | case PT_AR_BSP: | ||
849 | /* | ||
850 | * By convention, we use PT_AR_BSP to refer to | ||
851 | * the end of the user-level backing store. | ||
852 | * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) | ||
853 | * to get the real value of ar.bsp at the time | ||
854 | * the kernel was entered. | ||
855 | * | ||
856 | * Furthermore, when changing the contents of | ||
857 | * PT_AR_BSP (or PT_CFM) we MUST copy any | ||
858 | * users-level stacked registers that are | ||
859 | * stored on the kernel stack back to | ||
860 | * user-space because otherwise, we might end | ||
861 | * up clobbering kernel stacked registers. | ||
862 | * Also, if this happens while the task is | ||
863 | * blocked in a system call, which convert the | ||
864 | * state such that the non-system-call exit | ||
865 | * path is used. This ensures that the proper | ||
866 | * state will be picked up when resuming | ||
867 | * execution. However, it *also* means that | ||
868 | * once we write PT_AR_BSP/PT_CFM, it won't be | ||
869 | * possible to modify the syscall arguments of | ||
870 | * the pending system call any longer. This | ||
871 | * shouldn't be an issue because modifying | ||
872 | * PT_AR_BSP/PT_CFM generally implies that | ||
873 | * we're either abandoning the pending system | ||
874 | * call or that we defer it's re-execution | ||
875 | * (e.g., due to GDB doing an inferior | ||
876 | * function call). | ||
877 | */ | ||
878 | urbs_end = ia64_get_user_rbs_end(child, pt, &cfm); | ||
879 | if (write_access) { | ||
880 | if (*data != urbs_end) { | ||
881 | if (ia64_sync_user_rbs(child, sw, | ||
882 | pt->ar_bspstore, | ||
883 | urbs_end) < 0) | ||
884 | return -1; | ||
885 | if (in_syscall(pt)) | ||
886 | convert_to_non_syscall(child, | ||
887 | pt, | ||
888 | cfm); | ||
889 | /* | ||
890 | * Simulate user-level write | ||
891 | * of ar.bsp: | ||
892 | */ | ||
893 | pt->loadrs = 0; | ||
894 | pt->ar_bspstore = *data; | ||
895 | } | ||
896 | } else | ||
897 | *data = urbs_end; | ||
898 | return 0; | ||
899 | |||
900 | case PT_CFM: | ||
901 | urbs_end = ia64_get_user_rbs_end(child, pt, &cfm); | ||
902 | if (write_access) { | ||
903 | if (((cfm ^ *data) & PFM_MASK) != 0) { | ||
904 | if (ia64_sync_user_rbs(child, sw, | ||
905 | pt->ar_bspstore, | ||
906 | urbs_end) < 0) | ||
907 | return -1; | ||
908 | if (in_syscall(pt)) | ||
909 | convert_to_non_syscall(child, | ||
910 | pt, | ||
911 | cfm); | ||
912 | pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) | ||
913 | | (*data & PFM_MASK)); | ||
914 | } | ||
915 | } else | ||
916 | *data = cfm; | ||
917 | return 0; | ||
918 | |||
919 | case PT_CR_IPSR: | ||
920 | if (write_access) | ||
921 | pt->cr_ipsr = ((*data & IPSR_MASK) | ||
922 | | (pt->cr_ipsr & ~IPSR_MASK)); | ||
923 | else | ||
924 | *data = (pt->cr_ipsr & IPSR_MASK); | ||
925 | return 0; | ||
926 | |||
927 | case PT_AR_RNAT: | ||
928 | urbs_end = ia64_get_user_rbs_end(child, pt, NULL); | ||
929 | rnat_addr = (long) ia64_rse_rnat_addr((long *) | ||
930 | urbs_end); | ||
931 | if (write_access) | ||
932 | return ia64_poke(child, sw, urbs_end, | ||
933 | rnat_addr, *data); | ||
934 | else | ||
935 | return ia64_peek(child, sw, urbs_end, | ||
936 | rnat_addr, data); | ||
937 | |||
938 | case PT_R1: | ||
939 | ptr = pt_reg_addr(pt, r1); | ||
940 | break; | ||
941 | case PT_R2: case PT_R3: | ||
942 | ptr = pt_reg_addr(pt, r2) + (addr - PT_R2); | ||
943 | break; | ||
944 | case PT_R8: case PT_R9: case PT_R10: case PT_R11: | ||
945 | ptr = pt_reg_addr(pt, r8) + (addr - PT_R8); | ||
946 | break; | ||
947 | case PT_R12: case PT_R13: | ||
948 | ptr = pt_reg_addr(pt, r12) + (addr - PT_R12); | ||
949 | break; | ||
950 | case PT_R14: | ||
951 | ptr = pt_reg_addr(pt, r14); | ||
952 | break; | ||
953 | case PT_R15: | ||
954 | ptr = pt_reg_addr(pt, r15); | ||
955 | break; | ||
956 | case PT_R16: case PT_R17: case PT_R18: case PT_R19: | ||
957 | case PT_R20: case PT_R21: case PT_R22: case PT_R23: | ||
958 | case PT_R24: case PT_R25: case PT_R26: case PT_R27: | ||
959 | case PT_R28: case PT_R29: case PT_R30: case PT_R31: | ||
960 | ptr = pt_reg_addr(pt, r16) + (addr - PT_R16); | ||
961 | break; | ||
962 | case PT_B0: | ||
963 | ptr = pt_reg_addr(pt, b0); | ||
964 | break; | ||
965 | case PT_B6: | ||
966 | ptr = pt_reg_addr(pt, b6); | ||
967 | break; | ||
968 | case PT_B7: | ||
969 | ptr = pt_reg_addr(pt, b7); | ||
970 | break; | ||
971 | case PT_F6: case PT_F6+8: case PT_F7: case PT_F7+8: | ||
972 | case PT_F8: case PT_F8+8: case PT_F9: case PT_F9+8: | ||
973 | ptr = pt_reg_addr(pt, f6) + (addr - PT_F6); | ||
974 | break; | ||
975 | case PT_AR_BSPSTORE: | ||
976 | ptr = pt_reg_addr(pt, ar_bspstore); | ||
977 | break; | ||
978 | case PT_AR_RSC: | ||
979 | ptr = pt_reg_addr(pt, ar_rsc); | ||
980 | break; | ||
981 | case PT_AR_UNAT: | ||
982 | ptr = pt_reg_addr(pt, ar_unat); | ||
983 | break; | ||
984 | case PT_AR_PFS: | ||
985 | ptr = pt_reg_addr(pt, ar_pfs); | ||
986 | break; | ||
987 | case PT_AR_CCV: | ||
988 | ptr = pt_reg_addr(pt, ar_ccv); | ||
989 | break; | ||
990 | case PT_AR_FPSR: | ||
991 | ptr = pt_reg_addr(pt, ar_fpsr); | ||
992 | break; | ||
993 | case PT_CR_IIP: | ||
994 | ptr = pt_reg_addr(pt, cr_iip); | ||
995 | break; | ||
996 | case PT_PR: | ||
997 | ptr = pt_reg_addr(pt, pr); | ||
998 | break; | ||
999 | /* scratch register */ | ||
1000 | |||
1001 | default: | ||
1002 | /* disallow accessing anything else... */ | ||
1003 | dprintk("ptrace: rejecting access to register " | ||
1004 | "address 0x%lx\n", addr); | ||
1005 | return -1; | ||
1006 | } | ||
1007 | } else if (addr <= PT_AR_SSD) { | ||
1008 | ptr = pt_reg_addr(pt, ar_csd) + (addr - PT_AR_CSD); | ||
1009 | } else { | ||
1010 | /* access debug registers */ | ||
1011 | |||
1012 | if (addr >= PT_IBR) { | ||
1013 | regnum = (addr - PT_IBR) >> 3; | ||
1014 | ptr = &child->thread.ibr[0]; | ||
1015 | } else { | ||
1016 | regnum = (addr - PT_DBR) >> 3; | ||
1017 | ptr = &child->thread.dbr[0]; | ||
1018 | } | ||
1019 | |||
1020 | if (regnum >= 8) { | ||
1021 | dprintk("ptrace: rejecting access to register " | ||
1022 | "address 0x%lx\n", addr); | ||
1023 | return -1; | ||
1024 | } | ||
1025 | #ifdef CONFIG_PERFMON | ||
1026 | /* | ||
1027 | * Check if debug registers are used by perfmon. This | ||
1028 | * test must be done once we know that we can do the | ||
1029 | * operation, i.e. the arguments are all valid, but | ||
1030 | * before we start modifying the state. | ||
1031 | * | ||
1032 | * Perfmon needs to keep a count of how many processes | ||
1033 | * are trying to modify the debug registers for system | ||
1034 | * wide monitoring sessions. | ||
1035 | * | ||
1036 | * We also include read access here, because they may | ||
1037 | * cause the PMU-installed debug register state | ||
1038 | * (dbr[], ibr[]) to be reset. The two arrays are also | ||
1039 | * used by perfmon, but we do not use | ||
1040 | * IA64_THREAD_DBG_VALID. The registers are restored | ||
1041 | * by the PMU context switch code. | ||
1042 | */ | ||
1043 | if (pfm_use_debug_registers(child)) return -1; | ||
1044 | #endif | ||
1045 | |||
1046 | if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { | ||
1047 | child->thread.flags |= IA64_THREAD_DBG_VALID; | ||
1048 | memset(child->thread.dbr, 0, | ||
1049 | sizeof(child->thread.dbr)); | ||
1050 | memset(child->thread.ibr, 0, | ||
1051 | sizeof(child->thread.ibr)); | ||
1052 | } | ||
1053 | |||
1054 | ptr += regnum; | ||
1055 | |||
1056 | if ((regnum & 1) && write_access) { | ||
1057 | /* don't let the user set kernel-level breakpoints: */ | ||
1058 | *ptr = *data & ~(7UL << 56); | ||
1059 | return 0; | ||
1060 | } | ||
1061 | } | ||
1062 | if (write_access) | ||
1063 | *ptr = *data; | ||
1064 | else | ||
1065 | *data = *ptr; | ||
1066 | return 0; | ||
1067 | } | ||
1068 | |||
1069 | static long | ||
1070 | ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) | ||
1071 | { | ||
1072 | unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val; | ||
1073 | struct unw_frame_info info; | ||
1074 | struct ia64_fpreg fpval; | ||
1075 | struct switch_stack *sw; | ||
1076 | struct pt_regs *pt; | ||
1077 | long ret, retval = 0; | ||
1078 | char nat = 0; | ||
1079 | int i; | ||
1080 | |||
1081 | if (!access_ok(VERIFY_WRITE, ppr, sizeof(struct pt_all_user_regs))) | ||
1082 | return -EIO; | ||
1083 | |||
1084 | pt = ia64_task_regs(child); | ||
1085 | sw = (struct switch_stack *) (child->thread.ksp + 16); | ||
1086 | unw_init_from_blocked_task(&info, child); | ||
1087 | if (unw_unwind_to_user(&info) < 0) { | ||
1088 | return -EIO; | ||
1089 | } | ||
1090 | |||
1091 | if (((unsigned long) ppr & 0x7) != 0) { | ||
1092 | dprintk("ptrace:unaligned register address %p\n", ppr); | ||
1093 | return -EIO; | ||
1094 | } | ||
1095 | |||
1096 | if (access_uarea(child, PT_CR_IPSR, &psr, 0) < 0 | ||
1097 | || access_uarea(child, PT_AR_EC, &ec, 0) < 0 | ||
1098 | || access_uarea(child, PT_AR_LC, &lc, 0) < 0 | ||
1099 | || access_uarea(child, PT_AR_RNAT, &rnat, 0) < 0 | ||
1100 | || access_uarea(child, PT_AR_BSP, &bsp, 0) < 0 | ||
1101 | || access_uarea(child, PT_CFM, &cfm, 0) | ||
1102 | || access_uarea(child, PT_NAT_BITS, &nat_bits, 0)) | ||
1103 | return -EIO; | ||
1104 | |||
1105 | /* control regs */ | ||
1106 | |||
1107 | retval |= __put_user(pt->cr_iip, &ppr->cr_iip); | ||
1108 | retval |= __put_user(psr, &ppr->cr_ipsr); | ||
1109 | |||
1110 | /* app regs */ | ||
1111 | |||
1112 | retval |= __put_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); | ||
1113 | retval |= __put_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]); | ||
1114 | retval |= __put_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); | ||
1115 | retval |= __put_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); | ||
1116 | retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); | ||
1117 | retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); | ||
1118 | |||
1119 | retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]); | ||
1120 | retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]); | ||
1121 | retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]); | ||
1122 | retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]); | ||
1123 | retval |= __put_user(cfm, &ppr->cfm); | ||
1124 | |||
1125 | /* gr1-gr3 */ | ||
1126 | |||
1127 | retval |= __copy_to_user(&ppr->gr[1], &pt->r1, sizeof(long)); | ||
1128 | retval |= __copy_to_user(&ppr->gr[2], &pt->r2, sizeof(long) *2); | ||
1129 | |||
1130 | /* gr4-gr7 */ | ||
1131 | |||
1132 | for (i = 4; i < 8; i++) { | ||
1133 | if (unw_access_gr(&info, i, &val, &nat, 0) < 0) | ||
1134 | return -EIO; | ||
1135 | retval |= __put_user(val, &ppr->gr[i]); | ||
1136 | } | ||
1137 | |||
1138 | /* gr8-gr11 */ | ||
1139 | |||
1140 | retval |= __copy_to_user(&ppr->gr[8], &pt->r8, sizeof(long) * 4); | ||
1141 | |||
1142 | /* gr12-gr15 */ | ||
1143 | |||
1144 | retval |= __copy_to_user(&ppr->gr[12], &pt->r12, sizeof(long) * 2); | ||
1145 | retval |= __copy_to_user(&ppr->gr[14], &pt->r14, sizeof(long)); | ||
1146 | retval |= __copy_to_user(&ppr->gr[15], &pt->r15, sizeof(long)); | ||
1147 | |||
1148 | /* gr16-gr31 */ | ||
1149 | |||
1150 | retval |= __copy_to_user(&ppr->gr[16], &pt->r16, sizeof(long) * 16); | ||
1151 | |||
1152 | /* b0 */ | ||
1153 | |||
1154 | retval |= __put_user(pt->b0, &ppr->br[0]); | ||
1155 | |||
1156 | /* b1-b5 */ | ||
1157 | |||
1158 | for (i = 1; i < 6; i++) { | ||
1159 | if (unw_access_br(&info, i, &val, 0) < 0) | ||
1160 | return -EIO; | ||
1161 | __put_user(val, &ppr->br[i]); | ||
1162 | } | ||
1163 | |||
1164 | /* b6-b7 */ | ||
1165 | |||
1166 | retval |= __put_user(pt->b6, &ppr->br[6]); | ||
1167 | retval |= __put_user(pt->b7, &ppr->br[7]); | ||
1168 | |||
1169 | /* fr2-fr5 */ | ||
1170 | |||
1171 | for (i = 2; i < 6; i++) { | ||
1172 | if (unw_get_fr(&info, i, &fpval) < 0) | ||
1173 | return -EIO; | ||
1174 | retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); | ||
1175 | } | ||
1176 | |||
1177 | /* fr6-fr11 */ | ||
1178 | |||
1179 | retval |= __copy_to_user(&ppr->fr[6], &pt->f6, | ||
1180 | sizeof(struct ia64_fpreg) * 6); | ||
1181 | |||
1182 | /* fp scratch regs(12-15) */ | ||
1183 | |||
1184 | retval |= __copy_to_user(&ppr->fr[12], &sw->f12, | ||
1185 | sizeof(struct ia64_fpreg) * 4); | ||
1186 | |||
1187 | /* fr16-fr31 */ | ||
1188 | |||
1189 | for (i = 16; i < 32; i++) { | ||
1190 | if (unw_get_fr(&info, i, &fpval) < 0) | ||
1191 | return -EIO; | ||
1192 | retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); | ||
1193 | } | ||
1194 | |||
1195 | /* fph */ | ||
1196 | |||
1197 | ia64_flush_fph(child); | ||
1198 | retval |= __copy_to_user(&ppr->fr[32], &child->thread.fph, | ||
1199 | sizeof(ppr->fr[32]) * 96); | ||
1200 | |||
1201 | /* preds */ | ||
1202 | |||
1203 | retval |= __put_user(pt->pr, &ppr->pr); | ||
1204 | |||
1205 | /* nat bits */ | ||
1206 | |||
1207 | retval |= __put_user(nat_bits, &ppr->nat); | ||
1208 | |||
1209 | ret = retval ? -EIO : 0; | ||
1210 | return ret; | ||
1211 | } | ||
1212 | |||
1213 | static long | ||
1214 | ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) | ||
1215 | { | ||
1216 | unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val = 0; | ||
1217 | struct unw_frame_info info; | ||
1218 | struct switch_stack *sw; | ||
1219 | struct ia64_fpreg fpval; | ||
1220 | struct pt_regs *pt; | ||
1221 | long ret, retval = 0; | ||
1222 | int i; | ||
1223 | |||
1224 | memset(&fpval, 0, sizeof(fpval)); | ||
1225 | |||
1226 | if (!access_ok(VERIFY_READ, ppr, sizeof(struct pt_all_user_regs))) | ||
1227 | return -EIO; | ||
1228 | |||
1229 | pt = ia64_task_regs(child); | ||
1230 | sw = (struct switch_stack *) (child->thread.ksp + 16); | ||
1231 | unw_init_from_blocked_task(&info, child); | ||
1232 | if (unw_unwind_to_user(&info) < 0) { | ||
1233 | return -EIO; | ||
1234 | } | ||
1235 | |||
1236 | if (((unsigned long) ppr & 0x7) != 0) { | ||
1237 | dprintk("ptrace:unaligned register address %p\n", ppr); | ||
1238 | return -EIO; | ||
1239 | } | ||
1240 | |||
1241 | /* control regs */ | ||
1242 | |||
1243 | retval |= __get_user(pt->cr_iip, &ppr->cr_iip); | ||
1244 | retval |= __get_user(psr, &ppr->cr_ipsr); | ||
1245 | |||
1246 | /* app regs */ | ||
1247 | |||
1248 | retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); | ||
1249 | retval |= __get_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]); | ||
1250 | retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); | ||
1251 | retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); | ||
1252 | retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); | ||
1253 | retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); | ||
1254 | |||
1255 | retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]); | ||
1256 | retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]); | ||
1257 | retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]); | ||
1258 | retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]); | ||
1259 | retval |= __get_user(cfm, &ppr->cfm); | ||
1260 | |||
1261 | /* gr1-gr3 */ | ||
1262 | |||
1263 | retval |= __copy_from_user(&pt->r1, &ppr->gr[1], sizeof(long)); | ||
1264 | retval |= __copy_from_user(&pt->r2, &ppr->gr[2], sizeof(long) * 2); | ||
1265 | |||
1266 | /* gr4-gr7 */ | ||
1267 | |||
1268 | for (i = 4; i < 8; i++) { | ||
1269 | retval |= __get_user(val, &ppr->gr[i]); | ||
1270 | /* NaT bit will be set via PT_NAT_BITS: */ | ||
1271 | if (unw_set_gr(&info, i, val, 0) < 0) | ||
1272 | return -EIO; | ||
1273 | } | ||
1274 | |||
1275 | /* gr8-gr11 */ | ||
1276 | |||
1277 | retval |= __copy_from_user(&pt->r8, &ppr->gr[8], sizeof(long) * 4); | ||
1278 | |||
1279 | /* gr12-gr15 */ | ||
1280 | |||
1281 | retval |= __copy_from_user(&pt->r12, &ppr->gr[12], sizeof(long) * 2); | ||
1282 | retval |= __copy_from_user(&pt->r14, &ppr->gr[14], sizeof(long)); | ||
1283 | retval |= __copy_from_user(&pt->r15, &ppr->gr[15], sizeof(long)); | ||
1284 | |||
1285 | /* gr16-gr31 */ | ||
1286 | |||
1287 | retval |= __copy_from_user(&pt->r16, &ppr->gr[16], sizeof(long) * 16); | ||
1288 | |||
1289 | /* b0 */ | ||
1290 | |||
1291 | retval |= __get_user(pt->b0, &ppr->br[0]); | ||
1292 | |||
1293 | /* b1-b5 */ | ||
1294 | |||
1295 | for (i = 1; i < 6; i++) { | ||
1296 | retval |= __get_user(val, &ppr->br[i]); | ||
1297 | unw_set_br(&info, i, val); | ||
1298 | } | ||
1299 | |||
1300 | /* b6-b7 */ | ||
1301 | |||
1302 | retval |= __get_user(pt->b6, &ppr->br[6]); | ||
1303 | retval |= __get_user(pt->b7, &ppr->br[7]); | ||
1304 | |||
1305 | /* fr2-fr5 */ | ||
1306 | |||
1307 | for (i = 2; i < 6; i++) { | ||
1308 | retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval)); | ||
1309 | if (unw_set_fr(&info, i, fpval) < 0) | ||
1310 | return -EIO; | ||
1311 | } | ||
1312 | |||
1313 | /* fr6-fr11 */ | ||
1314 | |||
1315 | retval |= __copy_from_user(&pt->f6, &ppr->fr[6], | ||
1316 | sizeof(ppr->fr[6]) * 6); | ||
1317 | |||
1318 | /* fp scratch regs(12-15) */ | ||
1319 | |||
1320 | retval |= __copy_from_user(&sw->f12, &ppr->fr[12], | ||
1321 | sizeof(ppr->fr[12]) * 4); | ||
1322 | |||
1323 | /* fr16-fr31 */ | ||
1324 | |||
1325 | for (i = 16; i < 32; i++) { | ||
1326 | retval |= __copy_from_user(&fpval, &ppr->fr[i], | ||
1327 | sizeof(fpval)); | ||
1328 | if (unw_set_fr(&info, i, fpval) < 0) | ||
1329 | return -EIO; | ||
1330 | } | ||
1331 | |||
1332 | /* fph */ | ||
1333 | |||
1334 | ia64_sync_fph(child); | ||
1335 | retval |= __copy_from_user(&child->thread.fph, &ppr->fr[32], | ||
1336 | sizeof(ppr->fr[32]) * 96); | ||
1337 | |||
1338 | /* preds */ | ||
1339 | |||
1340 | retval |= __get_user(pt->pr, &ppr->pr); | ||
1341 | |||
1342 | /* nat bits */ | ||
1343 | |||
1344 | retval |= __get_user(nat_bits, &ppr->nat); | ||
1345 | |||
1346 | retval |= access_uarea(child, PT_CR_IPSR, &psr, 1); | ||
1347 | retval |= access_uarea(child, PT_AR_EC, &ec, 1); | ||
1348 | retval |= access_uarea(child, PT_AR_LC, &lc, 1); | ||
1349 | retval |= access_uarea(child, PT_AR_RNAT, &rnat, 1); | ||
1350 | retval |= access_uarea(child, PT_AR_BSP, &bsp, 1); | ||
1351 | retval |= access_uarea(child, PT_CFM, &cfm, 1); | ||
1352 | retval |= access_uarea(child, PT_NAT_BITS, &nat_bits, 1); | ||
1353 | |||
1354 | ret = retval ? -EIO : 0; | ||
1355 | return ret; | ||
1356 | } | ||
1357 | |||
1358 | /* | ||
1359 | * Called by kernel/ptrace.c when detaching.. | ||
1360 | * | ||
1361 | * Make sure the single step bit is not set. | ||
1362 | */ | ||
1363 | void | ||
1364 | ptrace_disable (struct task_struct *child) | ||
1365 | { | ||
1366 | struct ia64_psr *child_psr = ia64_psr(ia64_task_regs(child)); | ||
1367 | |||
1368 | /* make sure the single step/taken-branch trap bits are not set: */ | ||
1369 | child_psr->ss = 0; | ||
1370 | child_psr->tb = 0; | ||
1371 | } | ||
1372 | |||
1373 | asmlinkage long | ||
1374 | sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data) | ||
1375 | { | ||
1376 | struct pt_regs *pt; | ||
1377 | unsigned long urbs_end, peek_or_poke; | ||
1378 | struct task_struct *child; | ||
1379 | struct switch_stack *sw; | ||
1380 | long ret; | ||
1381 | |||
1382 | lock_kernel(); | ||
1383 | ret = -EPERM; | ||
1384 | if (request == PTRACE_TRACEME) { | ||
1385 | /* are we already being traced? */ | ||
1386 | if (current->ptrace & PT_PTRACED) | ||
1387 | goto out; | ||
1388 | ret = security_ptrace(current->parent, current); | ||
1389 | if (ret) | ||
1390 | goto out; | ||
1391 | current->ptrace |= PT_PTRACED; | ||
1392 | ret = 0; | ||
1393 | goto out; | ||
1394 | } | ||
1395 | |||
1396 | peek_or_poke = (request == PTRACE_PEEKTEXT | ||
1397 | || request == PTRACE_PEEKDATA | ||
1398 | || request == PTRACE_POKETEXT | ||
1399 | || request == PTRACE_POKEDATA); | ||
1400 | ret = -ESRCH; | ||
1401 | read_lock(&tasklist_lock); | ||
1402 | { | ||
1403 | child = find_task_by_pid(pid); | ||
1404 | if (child) { | ||
1405 | if (peek_or_poke) | ||
1406 | child = find_thread_for_addr(child, addr); | ||
1407 | get_task_struct(child); | ||
1408 | } | ||
1409 | } | ||
1410 | read_unlock(&tasklist_lock); | ||
1411 | if (!child) | ||
1412 | goto out; | ||
1413 | ret = -EPERM; | ||
1414 | if (pid == 1) /* no messing around with init! */ | ||
1415 | goto out_tsk; | ||
1416 | |||
1417 | if (request == PTRACE_ATTACH) { | ||
1418 | ret = ptrace_attach(child); | ||
1419 | goto out_tsk; | ||
1420 | } | ||
1421 | |||
1422 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | ||
1423 | if (ret < 0) | ||
1424 | goto out_tsk; | ||
1425 | |||
1426 | pt = ia64_task_regs(child); | ||
1427 | sw = (struct switch_stack *) (child->thread.ksp + 16); | ||
1428 | |||
1429 | switch (request) { | ||
1430 | case PTRACE_PEEKTEXT: | ||
1431 | case PTRACE_PEEKDATA: | ||
1432 | /* read word at location addr */ | ||
1433 | urbs_end = ia64_get_user_rbs_end(child, pt, NULL); | ||
1434 | ret = ia64_peek(child, sw, urbs_end, addr, &data); | ||
1435 | if (ret == 0) { | ||
1436 | ret = data; | ||
1437 | /* ensure "ret" is not mistaken as an error code: */ | ||
1438 | force_successful_syscall_return(); | ||
1439 | } | ||
1440 | goto out_tsk; | ||
1441 | |||
1442 | case PTRACE_POKETEXT: | ||
1443 | case PTRACE_POKEDATA: | ||
1444 | /* write the word at location addr */ | ||
1445 | urbs_end = ia64_get_user_rbs_end(child, pt, NULL); | ||
1446 | ret = ia64_poke(child, sw, urbs_end, addr, data); | ||
1447 | goto out_tsk; | ||
1448 | |||
1449 | case PTRACE_PEEKUSR: | ||
1450 | /* read the word at addr in the USER area */ | ||
1451 | if (access_uarea(child, addr, &data, 0) < 0) { | ||
1452 | ret = -EIO; | ||
1453 | goto out_tsk; | ||
1454 | } | ||
1455 | ret = data; | ||
1456 | /* ensure "ret" is not mistaken as an error code */ | ||
1457 | force_successful_syscall_return(); | ||
1458 | goto out_tsk; | ||
1459 | |||
1460 | case PTRACE_POKEUSR: | ||
1461 | /* write the word at addr in the USER area */ | ||
1462 | if (access_uarea(child, addr, &data, 1) < 0) { | ||
1463 | ret = -EIO; | ||
1464 | goto out_tsk; | ||
1465 | } | ||
1466 | ret = 0; | ||
1467 | goto out_tsk; | ||
1468 | |||
1469 | case PTRACE_OLD_GETSIGINFO: | ||
1470 | /* for backwards-compatibility */ | ||
1471 | ret = ptrace_request(child, PTRACE_GETSIGINFO, addr, data); | ||
1472 | goto out_tsk; | ||
1473 | |||
1474 | case PTRACE_OLD_SETSIGINFO: | ||
1475 | /* for backwards-compatibility */ | ||
1476 | ret = ptrace_request(child, PTRACE_SETSIGINFO, addr, data); | ||
1477 | goto out_tsk; | ||
1478 | |||
1479 | case PTRACE_SYSCALL: | ||
1480 | /* continue and stop at next (return from) syscall */ | ||
1481 | case PTRACE_CONT: | ||
1482 | /* restart after signal. */ | ||
1483 | ret = -EIO; | ||
1484 | if (data > _NSIG) | ||
1485 | goto out_tsk; | ||
1486 | if (request == PTRACE_SYSCALL) | ||
1487 | set_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
1488 | else | ||
1489 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
1490 | child->exit_code = data; | ||
1491 | |||
1492 | /* | ||
1493 | * Make sure the single step/taken-branch trap bits | ||
1494 | * are not set: | ||
1495 | */ | ||
1496 | ia64_psr(pt)->ss = 0; | ||
1497 | ia64_psr(pt)->tb = 0; | ||
1498 | |||
1499 | wake_up_process(child); | ||
1500 | ret = 0; | ||
1501 | goto out_tsk; | ||
1502 | |||
1503 | case PTRACE_KILL: | ||
1504 | /* | ||
1505 | * Make the child exit. Best I can do is send it a | ||
1506 | * sigkill. Perhaps it should be put in the status | ||
1507 | * that it wants to exit. | ||
1508 | */ | ||
1509 | if (child->exit_state == EXIT_ZOMBIE) | ||
1510 | /* already dead */ | ||
1511 | goto out_tsk; | ||
1512 | child->exit_code = SIGKILL; | ||
1513 | |||
1514 | ptrace_disable(child); | ||
1515 | wake_up_process(child); | ||
1516 | ret = 0; | ||
1517 | goto out_tsk; | ||
1518 | |||
1519 | case PTRACE_SINGLESTEP: | ||
1520 | /* let child execute for one instruction */ | ||
1521 | case PTRACE_SINGLEBLOCK: | ||
1522 | ret = -EIO; | ||
1523 | if (data > _NSIG) | ||
1524 | goto out_tsk; | ||
1525 | |||
1526 | clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); | ||
1527 | if (request == PTRACE_SINGLESTEP) { | ||
1528 | ia64_psr(pt)->ss = 1; | ||
1529 | } else { | ||
1530 | ia64_psr(pt)->tb = 1; | ||
1531 | } | ||
1532 | child->exit_code = data; | ||
1533 | |||
1534 | /* give it a chance to run. */ | ||
1535 | wake_up_process(child); | ||
1536 | ret = 0; | ||
1537 | goto out_tsk; | ||
1538 | |||
1539 | case PTRACE_DETACH: | ||
1540 | /* detach a process that was attached. */ | ||
1541 | ret = ptrace_detach(child, data); | ||
1542 | goto out_tsk; | ||
1543 | |||
1544 | case PTRACE_GETREGS: | ||
1545 | ret = ptrace_getregs(child, | ||
1546 | (struct pt_all_user_regs __user *) data); | ||
1547 | goto out_tsk; | ||
1548 | |||
1549 | case PTRACE_SETREGS: | ||
1550 | ret = ptrace_setregs(child, | ||
1551 | (struct pt_all_user_regs __user *) data); | ||
1552 | goto out_tsk; | ||
1553 | |||
1554 | default: | ||
1555 | ret = ptrace_request(child, request, addr, data); | ||
1556 | goto out_tsk; | ||
1557 | } | ||
1558 | out_tsk: | ||
1559 | put_task_struct(child); | ||
1560 | out: | ||
1561 | unlock_kernel(); | ||
1562 | return ret; | ||
1563 | } | ||
1564 | |||
1565 | |||
1566 | void | ||
1567 | syscall_trace (void) | ||
1568 | { | ||
1569 | if (!test_thread_flag(TIF_SYSCALL_TRACE)) | ||
1570 | return; | ||
1571 | if (!(current->ptrace & PT_PTRACED)) | ||
1572 | return; | ||
1573 | /* | ||
1574 | * The 0x80 provides a way for the tracing parent to | ||
1575 | * distinguish between a syscall stop and SIGTRAP delivery. | ||
1576 | */ | ||
1577 | ptrace_notify(SIGTRAP | ||
1578 | | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80 : 0)); | ||
1579 | |||
1580 | /* | ||
1581 | * This isn't the same as continuing with a signal, but it | ||
1582 | * will do for normal use. strace only continues with a | ||
1583 | * signal if the stopping signal is not SIGTRAP. -brl | ||
1584 | */ | ||
1585 | if (current->exit_code) { | ||
1586 | send_sig(current->exit_code, current, 1); | ||
1587 | current->exit_code = 0; | ||
1588 | } | ||
1589 | } | ||
1590 | |||
1591 | /* "asmlinkage" so the input arguments are preserved... */ | ||
1592 | |||
1593 | asmlinkage void | ||
1594 | syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, | ||
1595 | long arg4, long arg5, long arg6, long arg7, | ||
1596 | struct pt_regs regs) | ||
1597 | { | ||
1598 | long syscall; | ||
1599 | |||
1600 | if (unlikely(current->audit_context)) { | ||
1601 | if (IS_IA32_PROCESS(®s)) | ||
1602 | syscall = regs.r1; | ||
1603 | else | ||
1604 | syscall = regs.r15; | ||
1605 | |||
1606 | audit_syscall_entry(current, syscall, arg0, arg1, arg2, arg3); | ||
1607 | } | ||
1608 | |||
1609 | if (test_thread_flag(TIF_SYSCALL_TRACE) | ||
1610 | && (current->ptrace & PT_PTRACED)) | ||
1611 | syscall_trace(); | ||
1612 | } | ||
1613 | |||
1614 | /* "asmlinkage" so the input arguments are preserved... */ | ||
1615 | |||
1616 | asmlinkage void | ||
1617 | syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, | ||
1618 | long arg4, long arg5, long arg6, long arg7, | ||
1619 | struct pt_regs regs) | ||
1620 | { | ||
1621 | if (unlikely(current->audit_context)) | ||
1622 | audit_syscall_exit(current, regs.r8); | ||
1623 | |||
1624 | if (test_thread_flag(TIF_SYSCALL_TRACE) | ||
1625 | && (current->ptrace & PT_PTRACED)) | ||
1626 | syscall_trace(); | ||
1627 | } | ||
diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c new file mode 100644 index 000000000000..acc0f132f86c --- /dev/null +++ b/arch/ia64/kernel/sal.c | |||
@@ -0,0 +1,302 @@ | |||
1 | /* | ||
2 | * System Abstraction Layer (SAL) interface routines. | ||
3 | * | ||
4 | * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * Copyright (C) 1999 VA Linux Systems | ||
7 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
8 | */ | ||
9 | #include <linux/config.h> | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/spinlock.h> | ||
15 | #include <linux/string.h> | ||
16 | |||
17 | #include <asm/page.h> | ||
18 | #include <asm/sal.h> | ||
19 | #include <asm/pal.h> | ||
20 | |||
21 | __cacheline_aligned DEFINE_SPINLOCK(sal_lock); | ||
22 | unsigned long sal_platform_features; | ||
23 | |||
24 | unsigned short sal_revision; | ||
25 | unsigned short sal_version; | ||
26 | |||
27 | #define SAL_MAJOR(x) ((x) >> 8) | ||
28 | #define SAL_MINOR(x) ((x) & 0xff) | ||
29 | |||
30 | static struct { | ||
31 | void *addr; /* function entry point */ | ||
32 | void *gpval; /* gp value to use */ | ||
33 | } pdesc; | ||
34 | |||
35 | static long | ||
36 | default_handler (void) | ||
37 | { | ||
38 | return -1; | ||
39 | } | ||
40 | |||
41 | ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler; | ||
42 | ia64_sal_desc_ptc_t *ia64_ptc_domain_info; | ||
43 | |||
44 | const char * | ||
45 | ia64_sal_strerror (long status) | ||
46 | { | ||
47 | const char *str; | ||
48 | switch (status) { | ||
49 | case 0: str = "Call completed without error"; break; | ||
50 | case 1: str = "Effect a warm boot of the system to complete " | ||
51 | "the update"; break; | ||
52 | case -1: str = "Not implemented"; break; | ||
53 | case -2: str = "Invalid argument"; break; | ||
54 | case -3: str = "Call completed with error"; break; | ||
55 | case -4: str = "Virtual address not registered"; break; | ||
56 | case -5: str = "No information available"; break; | ||
57 | case -6: str = "Insufficient space to add the entry"; break; | ||
58 | case -7: str = "Invalid entry_addr value"; break; | ||
59 | case -8: str = "Invalid interrupt vector"; break; | ||
60 | case -9: str = "Requested memory not available"; break; | ||
61 | case -10: str = "Unable to write to the NVM device"; break; | ||
62 | case -11: str = "Invalid partition type specified"; break; | ||
63 | case -12: str = "Invalid NVM_Object id specified"; break; | ||
64 | case -13: str = "NVM_Object already has the maximum number " | ||
65 | "of partitions"; break; | ||
66 | case -14: str = "Insufficient space in partition for the " | ||
67 | "requested write sub-function"; break; | ||
68 | case -15: str = "Insufficient data buffer space for the " | ||
69 | "requested read record sub-function"; break; | ||
70 | case -16: str = "Scratch buffer required for the write/delete " | ||
71 | "sub-function"; break; | ||
72 | case -17: str = "Insufficient space in the NVM_Object for the " | ||
73 | "requested create sub-function"; break; | ||
74 | case -18: str = "Invalid value specified in the partition_rec " | ||
75 | "argument"; break; | ||
76 | case -19: str = "Record oriented I/O not supported for this " | ||
77 | "partition"; break; | ||
78 | case -20: str = "Bad format of record to be written or " | ||
79 | "required keyword variable not " | ||
80 | "specified"; break; | ||
81 | default: str = "Unknown SAL status code"; break; | ||
82 | } | ||
83 | return str; | ||
84 | } | ||
85 | |||
86 | void __init | ||
87 | ia64_sal_handler_init (void *entry_point, void *gpval) | ||
88 | { | ||
89 | /* fill in the SAL procedure descriptor and point ia64_sal to it: */ | ||
90 | pdesc.addr = entry_point; | ||
91 | pdesc.gpval = gpval; | ||
92 | ia64_sal = (ia64_sal_handler) &pdesc; | ||
93 | } | ||
94 | |||
95 | static void __init | ||
96 | check_versions (struct ia64_sal_systab *systab) | ||
97 | { | ||
98 | sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor; | ||
99 | sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor; | ||
100 | |||
101 | /* Check for broken firmware */ | ||
102 | if ((sal_revision == SAL_VERSION_CODE(49, 29)) | ||
103 | && (sal_version == SAL_VERSION_CODE(49, 29))) | ||
104 | { | ||
105 | /* | ||
106 | * Old firmware for zx2000 prototypes have this weird version number, | ||
107 | * reset it to something sane. | ||
108 | */ | ||
109 | sal_revision = SAL_VERSION_CODE(2, 8); | ||
110 | sal_version = SAL_VERSION_CODE(0, 0); | ||
111 | } | ||
112 | } | ||
113 | |||
114 | static void __init | ||
115 | sal_desc_entry_point (void *p) | ||
116 | { | ||
117 | struct ia64_sal_desc_entry_point *ep = p; | ||
118 | ia64_pal_handler_init(__va(ep->pal_proc)); | ||
119 | ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp)); | ||
120 | } | ||
121 | |||
122 | #ifdef CONFIG_SMP | ||
123 | static void __init | ||
124 | set_smp_redirect (int flag) | ||
125 | { | ||
126 | #ifndef CONFIG_HOTPLUG_CPU | ||
127 | if (no_int_routing) | ||
128 | smp_int_redirect &= ~flag; | ||
129 | else | ||
130 | smp_int_redirect |= flag; | ||
131 | #else | ||
132 | /* | ||
133 | * For CPU Hotplug we dont want to do any chipset supported | ||
134 | * interrupt redirection. The reason is this would require that | ||
135 | * All interrupts be stopped and hard bind the irq to a cpu. | ||
136 | * Later when the interrupt is fired we need to set the redir hint | ||
137 | * on again in the vector. This is combersome for something that the | ||
138 | * user mode irq balancer will solve anyways. | ||
139 | */ | ||
140 | no_int_routing=1; | ||
141 | smp_int_redirect &= ~flag; | ||
142 | #endif | ||
143 | } | ||
144 | #else | ||
145 | #define set_smp_redirect(flag) do { } while (0) | ||
146 | #endif | ||
147 | |||
148 | static void __init | ||
149 | sal_desc_platform_feature (void *p) | ||
150 | { | ||
151 | struct ia64_sal_desc_platform_feature *pf = p; | ||
152 | sal_platform_features = pf->feature_mask; | ||
153 | |||
154 | printk(KERN_INFO "SAL Platform features:"); | ||
155 | if (!sal_platform_features) { | ||
156 | printk(" None\n"); | ||
157 | return; | ||
158 | } | ||
159 | |||
160 | if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK) | ||
161 | printk(" BusLock"); | ||
162 | if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) { | ||
163 | printk(" IRQ_Redirection"); | ||
164 | set_smp_redirect(SMP_IRQ_REDIRECTION); | ||
165 | } | ||
166 | if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) { | ||
167 | printk(" IPI_Redirection"); | ||
168 | set_smp_redirect(SMP_IPI_REDIRECTION); | ||
169 | } | ||
170 | if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT) | ||
171 | printk(" ITC_Drift"); | ||
172 | printk("\n"); | ||
173 | } | ||
174 | |||
175 | #ifdef CONFIG_SMP | ||
176 | static void __init | ||
177 | sal_desc_ap_wakeup (void *p) | ||
178 | { | ||
179 | struct ia64_sal_desc_ap_wakeup *ap = p; | ||
180 | |||
181 | switch (ap->mechanism) { | ||
182 | case IA64_SAL_AP_EXTERNAL_INT: | ||
183 | ap_wakeup_vector = ap->vector; | ||
184 | printk(KERN_INFO "SAL: AP wakeup using external interrupt " | ||
185 | "vector 0x%lx\n", ap_wakeup_vector); | ||
186 | break; | ||
187 | default: | ||
188 | printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n"); | ||
189 | break; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | static void __init | ||
194 | chk_nointroute_opt(void) | ||
195 | { | ||
196 | char *cp; | ||
197 | extern char saved_command_line[]; | ||
198 | |||
199 | for (cp = saved_command_line; *cp; ) { | ||
200 | if (memcmp(cp, "nointroute", 10) == 0) { | ||
201 | no_int_routing = 1; | ||
202 | printk ("no_int_routing on\n"); | ||
203 | break; | ||
204 | } else { | ||
205 | while (*cp != ' ' && *cp) | ||
206 | ++cp; | ||
207 | while (*cp == ' ') | ||
208 | ++cp; | ||
209 | } | ||
210 | } | ||
211 | } | ||
212 | |||
213 | #else | ||
214 | static void __init sal_desc_ap_wakeup(void *p) { } | ||
215 | #endif | ||
216 | |||
217 | void __init | ||
218 | ia64_sal_init (struct ia64_sal_systab *systab) | ||
219 | { | ||
220 | char *p; | ||
221 | int i; | ||
222 | |||
223 | if (!systab) { | ||
224 | printk(KERN_WARNING "Hmm, no SAL System Table.\n"); | ||
225 | return; | ||
226 | } | ||
227 | |||
228 | if (strncmp(systab->signature, "SST_", 4) != 0) | ||
229 | printk(KERN_ERR "bad signature in system table!"); | ||
230 | |||
231 | check_versions(systab); | ||
232 | #ifdef CONFIG_SMP | ||
233 | chk_nointroute_opt(); | ||
234 | #endif | ||
235 | |||
236 | /* revisions are coded in BCD, so %x does the job for us */ | ||
237 | printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n", | ||
238 | SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision), | ||
239 | systab->oem_id, systab->product_id, | ||
240 | systab->product_id[0] ? " " : "", | ||
241 | SAL_MAJOR(sal_version), SAL_MINOR(sal_version)); | ||
242 | |||
243 | p = (char *) (systab + 1); | ||
244 | for (i = 0; i < systab->entry_count; i++) { | ||
245 | /* | ||
246 | * The first byte of each entry type contains the type | ||
247 | * descriptor. | ||
248 | */ | ||
249 | switch (*p) { | ||
250 | case SAL_DESC_ENTRY_POINT: | ||
251 | sal_desc_entry_point(p); | ||
252 | break; | ||
253 | case SAL_DESC_PLATFORM_FEATURE: | ||
254 | sal_desc_platform_feature(p); | ||
255 | break; | ||
256 | case SAL_DESC_PTC: | ||
257 | ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p; | ||
258 | break; | ||
259 | case SAL_DESC_AP_WAKEUP: | ||
260 | sal_desc_ap_wakeup(p); | ||
261 | break; | ||
262 | } | ||
263 | p += SAL_DESC_SIZE(*p); | ||
264 | } | ||
265 | } | ||
266 | |||
267 | int | ||
268 | ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, | ||
269 | u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7) | ||
270 | { | ||
271 | if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) | ||
272 | return -1; | ||
273 | SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7); | ||
274 | return 0; | ||
275 | } | ||
276 | EXPORT_SYMBOL(ia64_sal_oemcall); | ||
277 | |||
278 | int | ||
279 | ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, | ||
280 | u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, | ||
281 | u64 arg7) | ||
282 | { | ||
283 | if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) | ||
284 | return -1; | ||
285 | SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, | ||
286 | arg7); | ||
287 | return 0; | ||
288 | } | ||
289 | EXPORT_SYMBOL(ia64_sal_oemcall_nolock); | ||
290 | |||
291 | int | ||
292 | ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc, | ||
293 | u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, | ||
294 | u64 arg6, u64 arg7) | ||
295 | { | ||
296 | if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) | ||
297 | return -1; | ||
298 | SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, | ||
299 | arg7); | ||
300 | return 0; | ||
301 | } | ||
302 | EXPORT_SYMBOL(ia64_sal_oemcall_reentrant); | ||
diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c new file mode 100644 index 000000000000..d227fabecd02 --- /dev/null +++ b/arch/ia64/kernel/salinfo.c | |||
@@ -0,0 +1,629 @@ | |||
1 | /* | ||
2 | * salinfo.c | ||
3 | * | ||
4 | * Creates entries in /proc/sal for various system features. | ||
5 | * | ||
6 | * Copyright (c) 2003 Silicon Graphics, Inc. All rights reserved. | ||
7 | * Copyright (c) 2003 Hewlett-Packard Co | ||
8 | * Bjorn Helgaas <bjorn.helgaas@hp.com> | ||
9 | * | ||
10 | * 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo | ||
11 | * code to create this file | ||
12 | * Oct 23 2003 kaos@sgi.com | ||
13 | * Replace IPI with set_cpus_allowed() to read a record from the required cpu. | ||
14 | * Redesign salinfo log processing to separate interrupt and user space | ||
15 | * contexts. | ||
16 | * Cache the record across multi-block reads from user space. | ||
17 | * Support > 64 cpus. | ||
18 | * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module. | ||
19 | * | ||
20 | * Jan 28 2004 kaos@sgi.com | ||
21 | * Periodically check for outstanding MCA or INIT records. | ||
22 | * | ||
23 | * Dec 5 2004 kaos@sgi.com | ||
24 | * Standardize which records are cleared automatically. | ||
25 | */ | ||
26 | |||
27 | #include <linux/types.h> | ||
28 | #include <linux/proc_fs.h> | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/smp.h> | ||
31 | #include <linux/smp_lock.h> | ||
32 | #include <linux/timer.h> | ||
33 | #include <linux/vmalloc.h> | ||
34 | |||
35 | #include <asm/semaphore.h> | ||
36 | #include <asm/sal.h> | ||
37 | #include <asm/uaccess.h> | ||
38 | |||
39 | MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>"); | ||
40 | MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); | ||
41 | MODULE_LICENSE("GPL"); | ||
42 | |||
43 | static int salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data); | ||
44 | |||
45 | typedef struct { | ||
46 | const char *name; /* name of the proc entry */ | ||
47 | unsigned long feature; /* feature bit */ | ||
48 | struct proc_dir_entry *entry; /* registered entry (removal) */ | ||
49 | } salinfo_entry_t; | ||
50 | |||
51 | /* | ||
52 | * List {name,feature} pairs for every entry in /proc/sal/<feature> | ||
53 | * that this module exports | ||
54 | */ | ||
55 | static salinfo_entry_t salinfo_entries[]={ | ||
56 | { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, }, | ||
57 | { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, }, | ||
58 | { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, }, | ||
59 | { "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, }, | ||
60 | }; | ||
61 | |||
62 | #define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries) | ||
63 | |||
64 | static char *salinfo_log_name[] = { | ||
65 | "mca", | ||
66 | "init", | ||
67 | "cmc", | ||
68 | "cpe", | ||
69 | }; | ||
70 | |||
71 | static struct proc_dir_entry *salinfo_proc_entries[ | ||
72 | ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */ | ||
73 | ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */ | ||
74 | (2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */ | ||
75 | 1]; /* /proc/sal */ | ||
76 | |||
77 | /* Some records we get ourselves, some are accessed as saved data in buffers | ||
78 | * that are owned by mca.c. | ||
79 | */ | ||
80 | struct salinfo_data_saved { | ||
81 | u8* buffer; | ||
82 | u64 size; | ||
83 | u64 id; | ||
84 | int cpu; | ||
85 | }; | ||
86 | |||
87 | /* State transitions. Actions are :- | ||
88 | * Write "read <cpunum>" to the data file. | ||
89 | * Write "clear <cpunum>" to the data file. | ||
90 | * Write "oemdata <cpunum> <offset> to the data file. | ||
91 | * Read from the data file. | ||
92 | * Close the data file. | ||
93 | * | ||
94 | * Start state is NO_DATA. | ||
95 | * | ||
96 | * NO_DATA | ||
97 | * write "read <cpunum>" -> NO_DATA or LOG_RECORD. | ||
98 | * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. | ||
99 | * write "oemdata <cpunum> <offset> -> return -EINVAL. | ||
100 | * read data -> return EOF. | ||
101 | * close -> unchanged. Free record areas. | ||
102 | * | ||
103 | * LOG_RECORD | ||
104 | * write "read <cpunum>" -> NO_DATA or LOG_RECORD. | ||
105 | * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. | ||
106 | * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA. | ||
107 | * read data -> return the INIT/MCA/CMC/CPE record. | ||
108 | * close -> unchanged. Keep record areas. | ||
109 | * | ||
110 | * OEMDATA | ||
111 | * write "read <cpunum>" -> NO_DATA or LOG_RECORD. | ||
112 | * write "clear <cpunum>" -> NO_DATA or LOG_RECORD. | ||
113 | * write "oemdata <cpunum> <offset> -> format the oem data, goto OEMDATA. | ||
114 | * read data -> return the formatted oemdata. | ||
115 | * close -> unchanged. Keep record areas. | ||
116 | * | ||
117 | * Closing the data file does not change the state. This allows shell scripts | ||
118 | * to manipulate salinfo data, each shell redirection opens the file, does one | ||
119 | * action then closes it again. The record areas are only freed at close when | ||
120 | * the state is NO_DATA. | ||
121 | */ | ||
122 | enum salinfo_state { | ||
123 | STATE_NO_DATA, | ||
124 | STATE_LOG_RECORD, | ||
125 | STATE_OEMDATA, | ||
126 | }; | ||
127 | |||
128 | struct salinfo_data { | ||
129 | volatile cpumask_t cpu_event; /* which cpus have outstanding events */ | ||
130 | struct semaphore sem; /* count of cpus with outstanding events (bits set in cpu_event) */ | ||
131 | u8 *log_buffer; | ||
132 | u64 log_size; | ||
133 | u8 *oemdata; /* decoded oem data */ | ||
134 | u64 oemdata_size; | ||
135 | int open; /* single-open to prevent races */ | ||
136 | u8 type; | ||
137 | u8 saved_num; /* using a saved record? */ | ||
138 | enum salinfo_state state :8; /* processing state */ | ||
139 | u8 padding; | ||
140 | int cpu_check; /* next CPU to check */ | ||
141 | struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */ | ||
142 | }; | ||
143 | |||
144 | static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)]; | ||
145 | |||
146 | static spinlock_t data_lock, data_saved_lock; | ||
147 | |||
148 | /** salinfo_platform_oemdata - optional callback to decode oemdata from an error | ||
149 | * record. | ||
150 | * @sect_header: pointer to the start of the section to decode. | ||
151 | * @oemdata: returns vmalloc area containing the decded output. | ||
152 | * @oemdata_size: returns length of decoded output (strlen). | ||
153 | * | ||
154 | * Description: If user space asks for oem data to be decoded by the kernel | ||
155 | * and/or prom and the platform has set salinfo_platform_oemdata to the address | ||
156 | * of a platform specific routine then call that routine. salinfo_platform_oemdata | ||
157 | * vmalloc's and formats its output area, returning the address of the text | ||
158 | * and its strlen. Returns 0 for success, -ve for error. The callback is | ||
159 | * invoked on the cpu that generated the error record. | ||
160 | */ | ||
161 | int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size); | ||
162 | |||
163 | struct salinfo_platform_oemdata_parms { | ||
164 | const u8 *efi_guid; | ||
165 | u8 **oemdata; | ||
166 | u64 *oemdata_size; | ||
167 | int ret; | ||
168 | }; | ||
169 | |||
170 | static void | ||
171 | salinfo_platform_oemdata_cpu(void *context) | ||
172 | { | ||
173 | struct salinfo_platform_oemdata_parms *parms = context; | ||
174 | parms->ret = salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); | ||
175 | } | ||
176 | |||
177 | static void | ||
178 | shift1_data_saved (struct salinfo_data *data, int shift) | ||
179 | { | ||
180 | memcpy(data->data_saved+shift, data->data_saved+shift+1, | ||
181 | (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0])); | ||
182 | memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0, | ||
183 | sizeof(data->data_saved[0])); | ||
184 | } | ||
185 | |||
186 | /* This routine is invoked in interrupt context. Note: mca.c enables | ||
187 | * interrupts before calling this code for CMC/CPE. MCA and INIT events are | ||
188 | * not irq safe, do not call any routines that use spinlocks, they may deadlock. | ||
189 | * MCA and INIT records are recorded, a timer event will look for any | ||
190 | * outstanding events and wake up the user space code. | ||
191 | * | ||
192 | * The buffer passed from mca.c points to the output from ia64_log_get. This is | ||
193 | * a persistent buffer but its contents can change between the interrupt and | ||
194 | * when user space processes the record. Save the record id to identify | ||
195 | * changes. | ||
196 | */ | ||
197 | void | ||
198 | salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) | ||
199 | { | ||
200 | struct salinfo_data *data = salinfo_data + type; | ||
201 | struct salinfo_data_saved *data_saved; | ||
202 | unsigned long flags = 0; | ||
203 | int i; | ||
204 | int saved_size = ARRAY_SIZE(data->data_saved); | ||
205 | |||
206 | BUG_ON(type >= ARRAY_SIZE(salinfo_log_name)); | ||
207 | |||
208 | if (irqsafe) | ||
209 | spin_lock_irqsave(&data_saved_lock, flags); | ||
210 | for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { | ||
211 | if (!data_saved->buffer) | ||
212 | break; | ||
213 | } | ||
214 | if (i == saved_size) { | ||
215 | if (!data->saved_num) { | ||
216 | shift1_data_saved(data, 0); | ||
217 | data_saved = data->data_saved + saved_size - 1; | ||
218 | } else | ||
219 | data_saved = NULL; | ||
220 | } | ||
221 | if (data_saved) { | ||
222 | data_saved->cpu = smp_processor_id(); | ||
223 | data_saved->id = ((sal_log_record_header_t *)buffer)->id; | ||
224 | data_saved->size = size; | ||
225 | data_saved->buffer = buffer; | ||
226 | } | ||
227 | if (irqsafe) | ||
228 | spin_unlock_irqrestore(&data_saved_lock, flags); | ||
229 | |||
230 | if (!test_and_set_bit(smp_processor_id(), &data->cpu_event)) { | ||
231 | if (irqsafe) | ||
232 | up(&data->sem); | ||
233 | } | ||
234 | } | ||
235 | |||
236 | /* Check for outstanding MCA/INIT records every minute (arbitrary) */ | ||
237 | #define SALINFO_TIMER_DELAY (60*HZ) | ||
238 | static struct timer_list salinfo_timer; | ||
239 | |||
240 | static void | ||
241 | salinfo_timeout_check(struct salinfo_data *data) | ||
242 | { | ||
243 | int i; | ||
244 | if (!data->open) | ||
245 | return; | ||
246 | for (i = 0; i < NR_CPUS; ++i) { | ||
247 | if (test_bit(i, &data->cpu_event)) { | ||
248 | /* double up() is not a problem, user space will see no | ||
249 | * records for the additional "events". | ||
250 | */ | ||
251 | up(&data->sem); | ||
252 | } | ||
253 | } | ||
254 | } | ||
255 | |||
256 | static void | ||
257 | salinfo_timeout (unsigned long arg) | ||
258 | { | ||
259 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); | ||
260 | salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); | ||
261 | salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; | ||
262 | add_timer(&salinfo_timer); | ||
263 | } | ||
264 | |||
265 | static int | ||
266 | salinfo_event_open(struct inode *inode, struct file *file) | ||
267 | { | ||
268 | if (!capable(CAP_SYS_ADMIN)) | ||
269 | return -EPERM; | ||
270 | return 0; | ||
271 | } | ||
272 | |||
273 | static ssize_t | ||
274 | salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) | ||
275 | { | ||
276 | struct inode *inode = file->f_dentry->d_inode; | ||
277 | struct proc_dir_entry *entry = PDE(inode); | ||
278 | struct salinfo_data *data = entry->data; | ||
279 | char cmd[32]; | ||
280 | size_t size; | ||
281 | int i, n, cpu = -1; | ||
282 | |||
283 | retry: | ||
284 | if (down_trylock(&data->sem)) { | ||
285 | if (file->f_flags & O_NONBLOCK) | ||
286 | return -EAGAIN; | ||
287 | if (down_interruptible(&data->sem)) | ||
288 | return -ERESTARTSYS; | ||
289 | } | ||
290 | |||
291 | n = data->cpu_check; | ||
292 | for (i = 0; i < NR_CPUS; i++) { | ||
293 | if (test_bit(n, &data->cpu_event)) { | ||
294 | cpu = n; | ||
295 | break; | ||
296 | } | ||
297 | if (++n == NR_CPUS) | ||
298 | n = 0; | ||
299 | } | ||
300 | |||
301 | if (cpu == -1) | ||
302 | goto retry; | ||
303 | |||
304 | /* events are sticky until the user says "clear" */ | ||
305 | up(&data->sem); | ||
306 | |||
307 | /* for next read, start checking at next CPU */ | ||
308 | data->cpu_check = cpu; | ||
309 | if (++data->cpu_check == NR_CPUS) | ||
310 | data->cpu_check = 0; | ||
311 | |||
312 | snprintf(cmd, sizeof(cmd), "read %d\n", cpu); | ||
313 | |||
314 | size = strlen(cmd); | ||
315 | if (size > count) | ||
316 | size = count; | ||
317 | if (copy_to_user(buffer, cmd, size)) | ||
318 | return -EFAULT; | ||
319 | |||
320 | return size; | ||
321 | } | ||
322 | |||
323 | static struct file_operations salinfo_event_fops = { | ||
324 | .open = salinfo_event_open, | ||
325 | .read = salinfo_event_read, | ||
326 | }; | ||
327 | |||
328 | static int | ||
329 | salinfo_log_open(struct inode *inode, struct file *file) | ||
330 | { | ||
331 | struct proc_dir_entry *entry = PDE(inode); | ||
332 | struct salinfo_data *data = entry->data; | ||
333 | |||
334 | if (!capable(CAP_SYS_ADMIN)) | ||
335 | return -EPERM; | ||
336 | |||
337 | spin_lock(&data_lock); | ||
338 | if (data->open) { | ||
339 | spin_unlock(&data_lock); | ||
340 | return -EBUSY; | ||
341 | } | ||
342 | data->open = 1; | ||
343 | spin_unlock(&data_lock); | ||
344 | |||
345 | if (data->state == STATE_NO_DATA && | ||
346 | !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) { | ||
347 | data->open = 0; | ||
348 | return -ENOMEM; | ||
349 | } | ||
350 | |||
351 | return 0; | ||
352 | } | ||
353 | |||
354 | static int | ||
355 | salinfo_log_release(struct inode *inode, struct file *file) | ||
356 | { | ||
357 | struct proc_dir_entry *entry = PDE(inode); | ||
358 | struct salinfo_data *data = entry->data; | ||
359 | |||
360 | if (data->state == STATE_NO_DATA) { | ||
361 | vfree(data->log_buffer); | ||
362 | vfree(data->oemdata); | ||
363 | data->log_buffer = NULL; | ||
364 | data->oemdata = NULL; | ||
365 | } | ||
366 | spin_lock(&data_lock); | ||
367 | data->open = 0; | ||
368 | spin_unlock(&data_lock); | ||
369 | return 0; | ||
370 | } | ||
371 | |||
372 | static void | ||
373 | call_on_cpu(int cpu, void (*fn)(void *), void *arg) | ||
374 | { | ||
375 | cpumask_t save_cpus_allowed, new_cpus_allowed; | ||
376 | memcpy(&save_cpus_allowed, ¤t->cpus_allowed, sizeof(save_cpus_allowed)); | ||
377 | memset(&new_cpus_allowed, 0, sizeof(new_cpus_allowed)); | ||
378 | set_bit(cpu, &new_cpus_allowed); | ||
379 | set_cpus_allowed(current, new_cpus_allowed); | ||
380 | (*fn)(arg); | ||
381 | set_cpus_allowed(current, save_cpus_allowed); | ||
382 | } | ||
383 | |||
384 | static void | ||
385 | salinfo_log_read_cpu(void *context) | ||
386 | { | ||
387 | struct salinfo_data *data = context; | ||
388 | sal_log_record_header_t *rh; | ||
389 | data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer); | ||
390 | rh = (sal_log_record_header_t *)(data->log_buffer); | ||
391 | /* Clear corrected errors as they are read from SAL */ | ||
392 | if (rh->severity == sal_log_severity_corrected) | ||
393 | ia64_sal_clear_state_info(data->type); | ||
394 | } | ||
395 | |||
396 | static void | ||
397 | salinfo_log_new_read(int cpu, struct salinfo_data *data) | ||
398 | { | ||
399 | struct salinfo_data_saved *data_saved; | ||
400 | unsigned long flags; | ||
401 | int i; | ||
402 | int saved_size = ARRAY_SIZE(data->data_saved); | ||
403 | |||
404 | data->saved_num = 0; | ||
405 | spin_lock_irqsave(&data_saved_lock, flags); | ||
406 | retry: | ||
407 | for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { | ||
408 | if (data_saved->buffer && data_saved->cpu == cpu) { | ||
409 | sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer); | ||
410 | data->log_size = data_saved->size; | ||
411 | memcpy(data->log_buffer, rh, data->log_size); | ||
412 | barrier(); /* id check must not be moved */ | ||
413 | if (rh->id == data_saved->id) { | ||
414 | data->saved_num = i+1; | ||
415 | break; | ||
416 | } | ||
417 | /* saved record changed by mca.c since interrupt, discard it */ | ||
418 | shift1_data_saved(data, i); | ||
419 | goto retry; | ||
420 | } | ||
421 | } | ||
422 | spin_unlock_irqrestore(&data_saved_lock, flags); | ||
423 | |||
424 | if (!data->saved_num) | ||
425 | call_on_cpu(cpu, salinfo_log_read_cpu, data); | ||
426 | if (!data->log_size) { | ||
427 | data->state = STATE_NO_DATA; | ||
428 | clear_bit(cpu, &data->cpu_event); | ||
429 | } else { | ||
430 | data->state = STATE_LOG_RECORD; | ||
431 | } | ||
432 | } | ||
433 | |||
434 | static ssize_t | ||
435 | salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) | ||
436 | { | ||
437 | struct inode *inode = file->f_dentry->d_inode; | ||
438 | struct proc_dir_entry *entry = PDE(inode); | ||
439 | struct salinfo_data *data = entry->data; | ||
440 | u8 *buf; | ||
441 | u64 bufsize; | ||
442 | |||
443 | if (data->state == STATE_LOG_RECORD) { | ||
444 | buf = data->log_buffer; | ||
445 | bufsize = data->log_size; | ||
446 | } else if (data->state == STATE_OEMDATA) { | ||
447 | buf = data->oemdata; | ||
448 | bufsize = data->oemdata_size; | ||
449 | } else { | ||
450 | buf = NULL; | ||
451 | bufsize = 0; | ||
452 | } | ||
453 | return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); | ||
454 | } | ||
455 | |||
456 | static void | ||
457 | salinfo_log_clear_cpu(void *context) | ||
458 | { | ||
459 | struct salinfo_data *data = context; | ||
460 | ia64_sal_clear_state_info(data->type); | ||
461 | } | ||
462 | |||
463 | static int | ||
464 | salinfo_log_clear(struct salinfo_data *data, int cpu) | ||
465 | { | ||
466 | sal_log_record_header_t *rh; | ||
467 | data->state = STATE_NO_DATA; | ||
468 | if (!test_bit(cpu, &data->cpu_event)) | ||
469 | return 0; | ||
470 | down(&data->sem); | ||
471 | clear_bit(cpu, &data->cpu_event); | ||
472 | if (data->saved_num) { | ||
473 | unsigned long flags; | ||
474 | spin_lock_irqsave(&data_saved_lock, flags); | ||
475 | shift1_data_saved(data, data->saved_num - 1 ); | ||
476 | data->saved_num = 0; | ||
477 | spin_unlock_irqrestore(&data_saved_lock, flags); | ||
478 | } | ||
479 | rh = (sal_log_record_header_t *)(data->log_buffer); | ||
480 | /* Corrected errors have already been cleared from SAL */ | ||
481 | if (rh->severity != sal_log_severity_corrected) | ||
482 | call_on_cpu(cpu, salinfo_log_clear_cpu, data); | ||
483 | /* clearing a record may make a new record visible */ | ||
484 | salinfo_log_new_read(cpu, data); | ||
485 | if (data->state == STATE_LOG_RECORD && | ||
486 | !test_and_set_bit(cpu, &data->cpu_event)) | ||
487 | up(&data->sem); | ||
488 | return 0; | ||
489 | } | ||
490 | |||
491 | static ssize_t | ||
492 | salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) | ||
493 | { | ||
494 | struct inode *inode = file->f_dentry->d_inode; | ||
495 | struct proc_dir_entry *entry = PDE(inode); | ||
496 | struct salinfo_data *data = entry->data; | ||
497 | char cmd[32]; | ||
498 | size_t size; | ||
499 | u32 offset; | ||
500 | int cpu; | ||
501 | |||
502 | size = sizeof(cmd); | ||
503 | if (count < size) | ||
504 | size = count; | ||
505 | if (copy_from_user(cmd, buffer, size)) | ||
506 | return -EFAULT; | ||
507 | |||
508 | if (sscanf(cmd, "read %d", &cpu) == 1) { | ||
509 | salinfo_log_new_read(cpu, data); | ||
510 | } else if (sscanf(cmd, "clear %d", &cpu) == 1) { | ||
511 | int ret; | ||
512 | if ((ret = salinfo_log_clear(data, cpu))) | ||
513 | count = ret; | ||
514 | } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) { | ||
515 | if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA) | ||
516 | return -EINVAL; | ||
517 | if (offset > data->log_size - sizeof(efi_guid_t)) | ||
518 | return -EINVAL; | ||
519 | data->state = STATE_OEMDATA; | ||
520 | if (salinfo_platform_oemdata) { | ||
521 | struct salinfo_platform_oemdata_parms parms = { | ||
522 | .efi_guid = data->log_buffer + offset, | ||
523 | .oemdata = &data->oemdata, | ||
524 | .oemdata_size = &data->oemdata_size | ||
525 | }; | ||
526 | call_on_cpu(cpu, salinfo_platform_oemdata_cpu, &parms); | ||
527 | if (parms.ret) | ||
528 | count = parms.ret; | ||
529 | } else | ||
530 | data->oemdata_size = 0; | ||
531 | } else | ||
532 | return -EINVAL; | ||
533 | |||
534 | return count; | ||
535 | } | ||
536 | |||
537 | static struct file_operations salinfo_data_fops = { | ||
538 | .open = salinfo_log_open, | ||
539 | .release = salinfo_log_release, | ||
540 | .read = salinfo_log_read, | ||
541 | .write = salinfo_log_write, | ||
542 | }; | ||
543 | |||
544 | static int __init | ||
545 | salinfo_init(void) | ||
546 | { | ||
547 | struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */ | ||
548 | struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */ | ||
549 | struct proc_dir_entry *dir, *entry; | ||
550 | struct salinfo_data *data; | ||
551 | int i, j, online; | ||
552 | |||
553 | salinfo_dir = proc_mkdir("sal", NULL); | ||
554 | if (!salinfo_dir) | ||
555 | return 0; | ||
556 | |||
557 | for (i=0; i < NR_SALINFO_ENTRIES; i++) { | ||
558 | /* pass the feature bit in question as misc data */ | ||
559 | *sdir++ = create_proc_read_entry (salinfo_entries[i].name, 0, salinfo_dir, | ||
560 | salinfo_read, (void *)salinfo_entries[i].feature); | ||
561 | } | ||
562 | |||
563 | for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { | ||
564 | data = salinfo_data + i; | ||
565 | data->type = i; | ||
566 | sema_init(&data->sem, 0); | ||
567 | dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); | ||
568 | if (!dir) | ||
569 | continue; | ||
570 | |||
571 | entry = create_proc_entry("event", S_IRUSR, dir); | ||
572 | if (!entry) | ||
573 | continue; | ||
574 | entry->data = data; | ||
575 | entry->proc_fops = &salinfo_event_fops; | ||
576 | *sdir++ = entry; | ||
577 | |||
578 | entry = create_proc_entry("data", S_IRUSR | S_IWUSR, dir); | ||
579 | if (!entry) | ||
580 | continue; | ||
581 | entry->data = data; | ||
582 | entry->proc_fops = &salinfo_data_fops; | ||
583 | *sdir++ = entry; | ||
584 | |||
585 | /* we missed any events before now */ | ||
586 | online = 0; | ||
587 | for (j = 0; j < NR_CPUS; j++) | ||
588 | if (cpu_online(j)) { | ||
589 | set_bit(j, &data->cpu_event); | ||
590 | ++online; | ||
591 | } | ||
592 | sema_init(&data->sem, online); | ||
593 | |||
594 | *sdir++ = dir; | ||
595 | } | ||
596 | |||
597 | *sdir++ = salinfo_dir; | ||
598 | |||
599 | init_timer(&salinfo_timer); | ||
600 | salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; | ||
601 | salinfo_timer.function = &salinfo_timeout; | ||
602 | add_timer(&salinfo_timer); | ||
603 | |||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * 'data' contains an integer that corresponds to the feature we're | ||
609 | * testing | ||
610 | */ | ||
611 | static int | ||
612 | salinfo_read(char *page, char **start, off_t off, int count, int *eof, void *data) | ||
613 | { | ||
614 | int len = 0; | ||
615 | |||
616 | len = sprintf(page, (sal_platform_features & (unsigned long)data) ? "1\n" : "0\n"); | ||
617 | |||
618 | if (len <= off+count) *eof = 1; | ||
619 | |||
620 | *start = page + off; | ||
621 | len -= off; | ||
622 | |||
623 | if (len>count) len = count; | ||
624 | if (len<0) len = 0; | ||
625 | |||
626 | return len; | ||
627 | } | ||
628 | |||
629 | module_init(salinfo_init); | ||
diff --git a/arch/ia64/kernel/semaphore.c b/arch/ia64/kernel/semaphore.c new file mode 100644 index 000000000000..2724ef3fbae2 --- /dev/null +++ b/arch/ia64/kernel/semaphore.c | |||
@@ -0,0 +1,165 @@ | |||
1 | /* | ||
2 | * IA-64 semaphore implementation (derived from x86 version). | ||
3 | * | ||
4 | * Copyright (C) 1999-2000, 2002 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | */ | ||
7 | |||
8 | /* | ||
9 | * Semaphores are implemented using a two-way counter: The "count" | ||
10 | * variable is decremented for each process that tries to acquire the | ||
11 | * semaphore, while the "sleepers" variable is a count of such | ||
12 | * acquires. | ||
13 | * | ||
14 | * Notably, the inline "up()" and "down()" functions can efficiently | ||
15 | * test if they need to do any extra work (up needs to do something | ||
16 | * only if count was negative before the increment operation. | ||
17 | * | ||
18 | * "sleeping" and the contention routine ordering is protected | ||
19 | * by the spinlock in the semaphore's waitqueue head. | ||
20 | * | ||
21 | * Note that these functions are only called when there is contention | ||
22 | * on the lock, and as such all this is the "non-critical" part of the | ||
23 | * whole semaphore business. The critical part is the inline stuff in | ||
24 | * <asm/semaphore.h> where we want to avoid any extra jumps and calls. | ||
25 | */ | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/init.h> | ||
28 | |||
29 | #include <asm/errno.h> | ||
30 | #include <asm/semaphore.h> | ||
31 | |||
32 | /* | ||
33 | * Logic: | ||
34 | * - Only on a boundary condition do we need to care. When we go | ||
35 | * from a negative count to a non-negative, we wake people up. | ||
36 | * - When we go from a non-negative count to a negative do we | ||
37 | * (a) synchronize with the "sleepers" count and (b) make sure | ||
38 | * that we're on the wakeup list before we synchronize so that | ||
39 | * we cannot lose wakeup events. | ||
40 | */ | ||
41 | |||
42 | void | ||
43 | __up (struct semaphore *sem) | ||
44 | { | ||
45 | wake_up(&sem->wait); | ||
46 | } | ||
47 | |||
48 | void __sched __down (struct semaphore *sem) | ||
49 | { | ||
50 | struct task_struct *tsk = current; | ||
51 | DECLARE_WAITQUEUE(wait, tsk); | ||
52 | unsigned long flags; | ||
53 | |||
54 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
55 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
56 | add_wait_queue_exclusive_locked(&sem->wait, &wait); | ||
57 | |||
58 | sem->sleepers++; | ||
59 | for (;;) { | ||
60 | int sleepers = sem->sleepers; | ||
61 | |||
62 | /* | ||
63 | * Add "everybody else" into it. They aren't | ||
64 | * playing, because we own the spinlock in | ||
65 | * the wait_queue_head. | ||
66 | */ | ||
67 | if (!atomic_add_negative(sleepers - 1, &sem->count)) { | ||
68 | sem->sleepers = 0; | ||
69 | break; | ||
70 | } | ||
71 | sem->sleepers = 1; /* us - see -1 above */ | ||
72 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
73 | |||
74 | schedule(); | ||
75 | |||
76 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
77 | tsk->state = TASK_UNINTERRUPTIBLE; | ||
78 | } | ||
79 | remove_wait_queue_locked(&sem->wait, &wait); | ||
80 | wake_up_locked(&sem->wait); | ||
81 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
82 | tsk->state = TASK_RUNNING; | ||
83 | } | ||
84 | |||
85 | int __sched __down_interruptible (struct semaphore * sem) | ||
86 | { | ||
87 | int retval = 0; | ||
88 | struct task_struct *tsk = current; | ||
89 | DECLARE_WAITQUEUE(wait, tsk); | ||
90 | unsigned long flags; | ||
91 | |||
92 | tsk->state = TASK_INTERRUPTIBLE; | ||
93 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
94 | add_wait_queue_exclusive_locked(&sem->wait, &wait); | ||
95 | |||
96 | sem->sleepers ++; | ||
97 | for (;;) { | ||
98 | int sleepers = sem->sleepers; | ||
99 | |||
100 | /* | ||
101 | * With signals pending, this turns into | ||
102 | * the trylock failure case - we won't be | ||
103 | * sleeping, and we* can't get the lock as | ||
104 | * it has contention. Just correct the count | ||
105 | * and exit. | ||
106 | */ | ||
107 | if (signal_pending(current)) { | ||
108 | retval = -EINTR; | ||
109 | sem->sleepers = 0; | ||
110 | atomic_add(sleepers, &sem->count); | ||
111 | break; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Add "everybody else" into it. They aren't | ||
116 | * playing, because we own the spinlock in | ||
117 | * wait_queue_head. The "-1" is because we're | ||
118 | * still hoping to get the semaphore. | ||
119 | */ | ||
120 | if (!atomic_add_negative(sleepers - 1, &sem->count)) { | ||
121 | sem->sleepers = 0; | ||
122 | break; | ||
123 | } | ||
124 | sem->sleepers = 1; /* us - see -1 above */ | ||
125 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
126 | |||
127 | schedule(); | ||
128 | |||
129 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
130 | tsk->state = TASK_INTERRUPTIBLE; | ||
131 | } | ||
132 | remove_wait_queue_locked(&sem->wait, &wait); | ||
133 | wake_up_locked(&sem->wait); | ||
134 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
135 | |||
136 | tsk->state = TASK_RUNNING; | ||
137 | return retval; | ||
138 | } | ||
139 | |||
140 | /* | ||
141 | * Trylock failed - make sure we correct for having decremented the | ||
142 | * count. | ||
143 | */ | ||
144 | int | ||
145 | __down_trylock (struct semaphore *sem) | ||
146 | { | ||
147 | unsigned long flags; | ||
148 | int sleepers; | ||
149 | |||
150 | spin_lock_irqsave(&sem->wait.lock, flags); | ||
151 | sleepers = sem->sleepers + 1; | ||
152 | sem->sleepers = 0; | ||
153 | |||
154 | /* | ||
155 | * Add "everybody else" and us into it. They aren't | ||
156 | * playing, because we own the spinlock in the | ||
157 | * wait_queue_head. | ||
158 | */ | ||
159 | if (!atomic_add_negative(sleepers, &sem->count)) { | ||
160 | wake_up_locked(&sem->wait); | ||
161 | } | ||
162 | |||
163 | spin_unlock_irqrestore(&sem->wait.lock, flags); | ||
164 | return 1; | ||
165 | } | ||
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c new file mode 100644 index 000000000000..f05650c801d2 --- /dev/null +++ b/arch/ia64/kernel/setup.c | |||
@@ -0,0 +1,723 @@ | |||
1 | /* | ||
2 | * Architecture-specific setup. | ||
3 | * | ||
4 | * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * Stephane Eranian <eranian@hpl.hp.com> | ||
7 | * Copyright (C) 2000, Rohit Seth <rohit.seth@intel.com> | ||
8 | * Copyright (C) 1999 VA Linux Systems | ||
9 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
10 | * | ||
11 | * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo(). | ||
12 | * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map | ||
13 | * 03/31/00 R.Seth cpu_initialized and current->processor fixes | ||
14 | * 02/04/00 D.Mosberger some more get_cpuinfo fixes... | ||
15 | * 02/01/00 R.Seth fixed get_cpuinfo for SMP | ||
16 | * 01/07/99 S.Eranian added the support for command line argument | ||
17 | * 06/24/99 W.Drummond added boot_cpu_data. | ||
18 | */ | ||
19 | #include <linux/config.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/init.h> | ||
22 | |||
23 | #include <linux/acpi.h> | ||
24 | #include <linux/bootmem.h> | ||
25 | #include <linux/console.h> | ||
26 | #include <linux/delay.h> | ||
27 | #include <linux/kernel.h> | ||
28 | #include <linux/reboot.h> | ||
29 | #include <linux/sched.h> | ||
30 | #include <linux/seq_file.h> | ||
31 | #include <linux/string.h> | ||
32 | #include <linux/threads.h> | ||
33 | #include <linux/tty.h> | ||
34 | #include <linux/serial.h> | ||
35 | #include <linux/serial_core.h> | ||
36 | #include <linux/efi.h> | ||
37 | #include <linux/initrd.h> | ||
38 | |||
39 | #include <asm/ia32.h> | ||
40 | #include <asm/machvec.h> | ||
41 | #include <asm/mca.h> | ||
42 | #include <asm/meminit.h> | ||
43 | #include <asm/page.h> | ||
44 | #include <asm/patch.h> | ||
45 | #include <asm/pgtable.h> | ||
46 | #include <asm/processor.h> | ||
47 | #include <asm/sal.h> | ||
48 | #include <asm/sections.h> | ||
49 | #include <asm/serial.h> | ||
50 | #include <asm/setup.h> | ||
51 | #include <asm/smp.h> | ||
52 | #include <asm/system.h> | ||
53 | #include <asm/unistd.h> | ||
54 | |||
55 | #if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) | ||
56 | # error "struct cpuinfo_ia64 too big!" | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_SMP | ||
60 | unsigned long __per_cpu_offset[NR_CPUS]; | ||
61 | EXPORT_SYMBOL(__per_cpu_offset); | ||
62 | #endif | ||
63 | |||
64 | DEFINE_PER_CPU(struct cpuinfo_ia64, cpu_info); | ||
65 | DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); | ||
66 | DEFINE_PER_CPU(unsigned long, ia64_phys_stacked_size_p8); | ||
67 | unsigned long ia64_cycles_per_usec; | ||
68 | struct ia64_boot_param *ia64_boot_param; | ||
69 | struct screen_info screen_info; | ||
70 | |||
71 | unsigned long ia64_max_cacheline_size; | ||
72 | unsigned long ia64_iobase; /* virtual address for I/O accesses */ | ||
73 | EXPORT_SYMBOL(ia64_iobase); | ||
74 | struct io_space io_space[MAX_IO_SPACES]; | ||
75 | EXPORT_SYMBOL(io_space); | ||
76 | unsigned int num_io_spaces; | ||
77 | |||
78 | /* | ||
79 | * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This | ||
80 | * mask specifies a mask of address bits that must be 0 in order for two buffers to be | ||
81 | * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start | ||
82 | * address of the second buffer must be aligned to (merge_mask+1) in order to be | ||
83 | * mergeable). By default, we assume there is no I/O MMU which can merge physically | ||
84 | * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu | ||
85 | * page-size of 2^64. | ||
86 | */ | ||
87 | unsigned long ia64_max_iommu_merge_mask = ~0UL; | ||
88 | EXPORT_SYMBOL(ia64_max_iommu_merge_mask); | ||
89 | |||
90 | /* | ||
91 | * We use a special marker for the end of memory and it uses the extra (+1) slot | ||
92 | */ | ||
93 | struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; | ||
94 | int num_rsvd_regions; | ||
95 | |||
96 | |||
97 | /* | ||
98 | * Filter incoming memory segments based on the primitive map created from the boot | ||
99 | * parameters. Segments contained in the map are removed from the memory ranges. A | ||
100 | * caller-specified function is called with the memory ranges that remain after filtering. | ||
101 | * This routine does not assume the incoming segments are sorted. | ||
102 | */ | ||
103 | int | ||
104 | filter_rsvd_memory (unsigned long start, unsigned long end, void *arg) | ||
105 | { | ||
106 | unsigned long range_start, range_end, prev_start; | ||
107 | void (*func)(unsigned long, unsigned long, int); | ||
108 | int i; | ||
109 | |||
110 | #if IGNORE_PFN0 | ||
111 | if (start == PAGE_OFFSET) { | ||
112 | printk(KERN_WARNING "warning: skipping physical page 0\n"); | ||
113 | start += PAGE_SIZE; | ||
114 | if (start >= end) return 0; | ||
115 | } | ||
116 | #endif | ||
117 | /* | ||
118 | * lowest possible address(walker uses virtual) | ||
119 | */ | ||
120 | prev_start = PAGE_OFFSET; | ||
121 | func = arg; | ||
122 | |||
123 | for (i = 0; i < num_rsvd_regions; ++i) { | ||
124 | range_start = max(start, prev_start); | ||
125 | range_end = min(end, rsvd_region[i].start); | ||
126 | |||
127 | if (range_start < range_end) | ||
128 | call_pernode_memory(__pa(range_start), range_end - range_start, func); | ||
129 | |||
130 | /* nothing more available in this segment */ | ||
131 | if (range_end == end) return 0; | ||
132 | |||
133 | prev_start = rsvd_region[i].end; | ||
134 | } | ||
135 | /* end of memory marker allows full processing inside loop body */ | ||
136 | return 0; | ||
137 | } | ||
138 | |||
139 | static void | ||
140 | sort_regions (struct rsvd_region *rsvd_region, int max) | ||
141 | { | ||
142 | int j; | ||
143 | |||
144 | /* simple bubble sorting */ | ||
145 | while (max--) { | ||
146 | for (j = 0; j < max; ++j) { | ||
147 | if (rsvd_region[j].start > rsvd_region[j+1].start) { | ||
148 | struct rsvd_region tmp; | ||
149 | tmp = rsvd_region[j]; | ||
150 | rsvd_region[j] = rsvd_region[j + 1]; | ||
151 | rsvd_region[j + 1] = tmp; | ||
152 | } | ||
153 | } | ||
154 | } | ||
155 | } | ||
156 | |||
157 | /** | ||
158 | * reserve_memory - setup reserved memory areas | ||
159 | * | ||
160 | * Setup the reserved memory areas set aside for the boot parameters, | ||
161 | * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, | ||
162 | * see include/asm-ia64/meminit.h if you need to define more. | ||
163 | */ | ||
164 | void | ||
165 | reserve_memory (void) | ||
166 | { | ||
167 | int n = 0; | ||
168 | |||
169 | /* | ||
170 | * none of the entries in this table overlap | ||
171 | */ | ||
172 | rsvd_region[n].start = (unsigned long) ia64_boot_param; | ||
173 | rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param); | ||
174 | n++; | ||
175 | |||
176 | rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap); | ||
177 | rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size; | ||
178 | n++; | ||
179 | |||
180 | rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line); | ||
181 | rsvd_region[n].end = (rsvd_region[n].start | ||
182 | + strlen(__va(ia64_boot_param->command_line)) + 1); | ||
183 | n++; | ||
184 | |||
185 | rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START); | ||
186 | rsvd_region[n].end = (unsigned long) ia64_imva(_end); | ||
187 | n++; | ||
188 | |||
189 | #ifdef CONFIG_BLK_DEV_INITRD | ||
190 | if (ia64_boot_param->initrd_start) { | ||
191 | rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); | ||
192 | rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size; | ||
193 | n++; | ||
194 | } | ||
195 | #endif | ||
196 | |||
197 | /* end of memory marker */ | ||
198 | rsvd_region[n].start = ~0UL; | ||
199 | rsvd_region[n].end = ~0UL; | ||
200 | n++; | ||
201 | |||
202 | num_rsvd_regions = n; | ||
203 | |||
204 | sort_regions(rsvd_region, num_rsvd_regions); | ||
205 | } | ||
206 | |||
207 | /** | ||
208 | * find_initrd - get initrd parameters from the boot parameter structure | ||
209 | * | ||
210 | * Grab the initrd start and end from the boot parameter struct given us by | ||
211 | * the boot loader. | ||
212 | */ | ||
213 | void | ||
214 | find_initrd (void) | ||
215 | { | ||
216 | #ifdef CONFIG_BLK_DEV_INITRD | ||
217 | if (ia64_boot_param->initrd_start) { | ||
218 | initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); | ||
219 | initrd_end = initrd_start+ia64_boot_param->initrd_size; | ||
220 | |||
221 | printk(KERN_INFO "Initial ramdisk at: 0x%lx (%lu bytes)\n", | ||
222 | initrd_start, ia64_boot_param->initrd_size); | ||
223 | } | ||
224 | #endif | ||
225 | } | ||
226 | |||
227 | static void __init | ||
228 | io_port_init (void) | ||
229 | { | ||
230 | extern unsigned long ia64_iobase; | ||
231 | unsigned long phys_iobase; | ||
232 | |||
233 | /* | ||
234 | * Set `iobase' to the appropriate address in region 6 (uncached access range). | ||
235 | * | ||
236 | * The EFI memory map is the "preferred" location to get the I/O port space base, | ||
237 | * rather the relying on AR.KR0. This should become more clear in future SAL | ||
238 | * specs. We'll fall back to getting it out of AR.KR0 if no appropriate entry is | ||
239 | * found in the memory map. | ||
240 | */ | ||
241 | phys_iobase = efi_get_iobase(); | ||
242 | if (phys_iobase) | ||
243 | /* set AR.KR0 since this is all we use it for anyway */ | ||
244 | ia64_set_kr(IA64_KR_IO_BASE, phys_iobase); | ||
245 | else { | ||
246 | phys_iobase = ia64_get_kr(IA64_KR_IO_BASE); | ||
247 | printk(KERN_INFO "No I/O port range found in EFI memory map, falling back " | ||
248 | "to AR.KR0\n"); | ||
249 | printk(KERN_INFO "I/O port base = 0x%lx\n", phys_iobase); | ||
250 | } | ||
251 | ia64_iobase = (unsigned long) ioremap(phys_iobase, 0); | ||
252 | |||
253 | /* setup legacy IO port space */ | ||
254 | io_space[0].mmio_base = ia64_iobase; | ||
255 | io_space[0].sparse = 1; | ||
256 | num_io_spaces = 1; | ||
257 | } | ||
258 | |||
259 | /** | ||
260 | * early_console_setup - setup debugging console | ||
261 | * | ||
262 | * Consoles started here require little enough setup that we can start using | ||
263 | * them very early in the boot process, either right after the machine | ||
264 | * vector initialization, or even before if the drivers can detect their hw. | ||
265 | * | ||
266 | * Returns non-zero if a console couldn't be setup. | ||
267 | */ | ||
268 | static inline int __init | ||
269 | early_console_setup (char *cmdline) | ||
270 | { | ||
271 | #ifdef CONFIG_SERIAL_SGI_L1_CONSOLE | ||
272 | { | ||
273 | extern int sn_serial_console_early_setup(void); | ||
274 | if (!sn_serial_console_early_setup()) | ||
275 | return 0; | ||
276 | } | ||
277 | #endif | ||
278 | #ifdef CONFIG_EFI_PCDP | ||
279 | if (!efi_setup_pcdp_console(cmdline)) | ||
280 | return 0; | ||
281 | #endif | ||
282 | #ifdef CONFIG_SERIAL_8250_CONSOLE | ||
283 | if (!early_serial_console_init(cmdline)) | ||
284 | return 0; | ||
285 | #endif | ||
286 | |||
287 | return -1; | ||
288 | } | ||
289 | |||
290 | static inline void | ||
291 | mark_bsp_online (void) | ||
292 | { | ||
293 | #ifdef CONFIG_SMP | ||
294 | /* If we register an early console, allow CPU 0 to printk */ | ||
295 | cpu_set(smp_processor_id(), cpu_online_map); | ||
296 | #endif | ||
297 | } | ||
298 | |||
299 | void __init | ||
300 | setup_arch (char **cmdline_p) | ||
301 | { | ||
302 | unw_init(); | ||
303 | |||
304 | ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); | ||
305 | |||
306 | *cmdline_p = __va(ia64_boot_param->command_line); | ||
307 | strlcpy(saved_command_line, *cmdline_p, COMMAND_LINE_SIZE); | ||
308 | |||
309 | efi_init(); | ||
310 | io_port_init(); | ||
311 | |||
312 | #ifdef CONFIG_IA64_GENERIC | ||
313 | { | ||
314 | const char *mvec_name = strstr (*cmdline_p, "machvec="); | ||
315 | char str[64]; | ||
316 | |||
317 | if (mvec_name) { | ||
318 | const char *end; | ||
319 | size_t len; | ||
320 | |||
321 | mvec_name += 8; | ||
322 | end = strchr (mvec_name, ' '); | ||
323 | if (end) | ||
324 | len = end - mvec_name; | ||
325 | else | ||
326 | len = strlen (mvec_name); | ||
327 | len = min(len, sizeof (str) - 1); | ||
328 | strncpy (str, mvec_name, len); | ||
329 | str[len] = '\0'; | ||
330 | mvec_name = str; | ||
331 | } else | ||
332 | mvec_name = acpi_get_sysname(); | ||
333 | machvec_init(mvec_name); | ||
334 | } | ||
335 | #endif | ||
336 | |||
337 | if (early_console_setup(*cmdline_p) == 0) | ||
338 | mark_bsp_online(); | ||
339 | |||
340 | #ifdef CONFIG_ACPI_BOOT | ||
341 | /* Initialize the ACPI boot-time table parser */ | ||
342 | acpi_table_init(); | ||
343 | # ifdef CONFIG_ACPI_NUMA | ||
344 | acpi_numa_init(); | ||
345 | # endif | ||
346 | #else | ||
347 | # ifdef CONFIG_SMP | ||
348 | smp_build_cpu_map(); /* happens, e.g., with the Ski simulator */ | ||
349 | # endif | ||
350 | #endif /* CONFIG_APCI_BOOT */ | ||
351 | |||
352 | find_memory(); | ||
353 | |||
354 | /* process SAL system table: */ | ||
355 | ia64_sal_init(efi.sal_systab); | ||
356 | |||
357 | #ifdef CONFIG_SMP | ||
358 | cpu_physical_id(0) = hard_smp_processor_id(); | ||
359 | #endif | ||
360 | |||
361 | cpu_init(); /* initialize the bootstrap CPU */ | ||
362 | |||
363 | #ifdef CONFIG_ACPI_BOOT | ||
364 | acpi_boot_init(); | ||
365 | #endif | ||
366 | |||
367 | #ifdef CONFIG_VT | ||
368 | if (!conswitchp) { | ||
369 | # if defined(CONFIG_DUMMY_CONSOLE) | ||
370 | conswitchp = &dummy_con; | ||
371 | # endif | ||
372 | # if defined(CONFIG_VGA_CONSOLE) | ||
373 | /* | ||
374 | * Non-legacy systems may route legacy VGA MMIO range to system | ||
375 | * memory. vga_con probes the MMIO hole, so memory looks like | ||
376 | * a VGA device to it. The EFI memory map can tell us if it's | ||
377 | * memory so we can avoid this problem. | ||
378 | */ | ||
379 | if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY) | ||
380 | conswitchp = &vga_con; | ||
381 | # endif | ||
382 | } | ||
383 | #endif | ||
384 | |||
385 | /* enable IA-64 Machine Check Abort Handling unless disabled */ | ||
386 | if (!strstr(saved_command_line, "nomca")) | ||
387 | ia64_mca_init(); | ||
388 | |||
389 | platform_setup(cmdline_p); | ||
390 | paging_init(); | ||
391 | } | ||
392 | |||
393 | /* | ||
394 | * Display cpu info for all cpu's. | ||
395 | */ | ||
396 | static int | ||
397 | show_cpuinfo (struct seq_file *m, void *v) | ||
398 | { | ||
399 | #ifdef CONFIG_SMP | ||
400 | # define lpj c->loops_per_jiffy | ||
401 | # define cpunum c->cpu | ||
402 | #else | ||
403 | # define lpj loops_per_jiffy | ||
404 | # define cpunum 0 | ||
405 | #endif | ||
406 | static struct { | ||
407 | unsigned long mask; | ||
408 | const char *feature_name; | ||
409 | } feature_bits[] = { | ||
410 | { 1UL << 0, "branchlong" }, | ||
411 | { 1UL << 1, "spontaneous deferral"}, | ||
412 | { 1UL << 2, "16-byte atomic ops" } | ||
413 | }; | ||
414 | char family[32], features[128], *cp, sep; | ||
415 | struct cpuinfo_ia64 *c = v; | ||
416 | unsigned long mask; | ||
417 | int i; | ||
418 | |||
419 | mask = c->features; | ||
420 | |||
421 | switch (c->family) { | ||
422 | case 0x07: memcpy(family, "Itanium", 8); break; | ||
423 | case 0x1f: memcpy(family, "Itanium 2", 10); break; | ||
424 | default: sprintf(family, "%u", c->family); break; | ||
425 | } | ||
426 | |||
427 | /* build the feature string: */ | ||
428 | memcpy(features, " standard", 10); | ||
429 | cp = features; | ||
430 | sep = 0; | ||
431 | for (i = 0; i < (int) ARRAY_SIZE(feature_bits); ++i) { | ||
432 | if (mask & feature_bits[i].mask) { | ||
433 | if (sep) | ||
434 | *cp++ = sep; | ||
435 | sep = ','; | ||
436 | *cp++ = ' '; | ||
437 | strcpy(cp, feature_bits[i].feature_name); | ||
438 | cp += strlen(feature_bits[i].feature_name); | ||
439 | mask &= ~feature_bits[i].mask; | ||
440 | } | ||
441 | } | ||
442 | if (mask) { | ||
443 | /* print unknown features as a hex value: */ | ||
444 | if (sep) | ||
445 | *cp++ = sep; | ||
446 | sprintf(cp, " 0x%lx", mask); | ||
447 | } | ||
448 | |||
449 | seq_printf(m, | ||
450 | "processor : %d\n" | ||
451 | "vendor : %s\n" | ||
452 | "arch : IA-64\n" | ||
453 | "family : %s\n" | ||
454 | "model : %u\n" | ||
455 | "revision : %u\n" | ||
456 | "archrev : %u\n" | ||
457 | "features :%s\n" /* don't change this---it _is_ right! */ | ||
458 | "cpu number : %lu\n" | ||
459 | "cpu regs : %u\n" | ||
460 | "cpu MHz : %lu.%06lu\n" | ||
461 | "itc MHz : %lu.%06lu\n" | ||
462 | "BogoMIPS : %lu.%02lu\n\n", | ||
463 | cpunum, c->vendor, family, c->model, c->revision, c->archrev, | ||
464 | features, c->ppn, c->number, | ||
465 | c->proc_freq / 1000000, c->proc_freq % 1000000, | ||
466 | c->itc_freq / 1000000, c->itc_freq % 1000000, | ||
467 | lpj*HZ/500000, (lpj*HZ/5000) % 100); | ||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | static void * | ||
472 | c_start (struct seq_file *m, loff_t *pos) | ||
473 | { | ||
474 | #ifdef CONFIG_SMP | ||
475 | while (*pos < NR_CPUS && !cpu_isset(*pos, cpu_online_map)) | ||
476 | ++*pos; | ||
477 | #endif | ||
478 | return *pos < NR_CPUS ? cpu_data(*pos) : NULL; | ||
479 | } | ||
480 | |||
481 | static void * | ||
482 | c_next (struct seq_file *m, void *v, loff_t *pos) | ||
483 | { | ||
484 | ++*pos; | ||
485 | return c_start(m, pos); | ||
486 | } | ||
487 | |||
488 | static void | ||
489 | c_stop (struct seq_file *m, void *v) | ||
490 | { | ||
491 | } | ||
492 | |||
493 | struct seq_operations cpuinfo_op = { | ||
494 | .start = c_start, | ||
495 | .next = c_next, | ||
496 | .stop = c_stop, | ||
497 | .show = show_cpuinfo | ||
498 | }; | ||
499 | |||
500 | void | ||
501 | identify_cpu (struct cpuinfo_ia64 *c) | ||
502 | { | ||
503 | union { | ||
504 | unsigned long bits[5]; | ||
505 | struct { | ||
506 | /* id 0 & 1: */ | ||
507 | char vendor[16]; | ||
508 | |||
509 | /* id 2 */ | ||
510 | u64 ppn; /* processor serial number */ | ||
511 | |||
512 | /* id 3: */ | ||
513 | unsigned number : 8; | ||
514 | unsigned revision : 8; | ||
515 | unsigned model : 8; | ||
516 | unsigned family : 8; | ||
517 | unsigned archrev : 8; | ||
518 | unsigned reserved : 24; | ||
519 | |||
520 | /* id 4: */ | ||
521 | u64 features; | ||
522 | } field; | ||
523 | } cpuid; | ||
524 | pal_vm_info_1_u_t vm1; | ||
525 | pal_vm_info_2_u_t vm2; | ||
526 | pal_status_t status; | ||
527 | unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ | ||
528 | int i; | ||
529 | |||
530 | for (i = 0; i < 5; ++i) | ||
531 | cpuid.bits[i] = ia64_get_cpuid(i); | ||
532 | |||
533 | memcpy(c->vendor, cpuid.field.vendor, 16); | ||
534 | #ifdef CONFIG_SMP | ||
535 | c->cpu = smp_processor_id(); | ||
536 | #endif | ||
537 | c->ppn = cpuid.field.ppn; | ||
538 | c->number = cpuid.field.number; | ||
539 | c->revision = cpuid.field.revision; | ||
540 | c->model = cpuid.field.model; | ||
541 | c->family = cpuid.field.family; | ||
542 | c->archrev = cpuid.field.archrev; | ||
543 | c->features = cpuid.field.features; | ||
544 | |||
545 | status = ia64_pal_vm_summary(&vm1, &vm2); | ||
546 | if (status == PAL_STATUS_SUCCESS) { | ||
547 | impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb; | ||
548 | phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size; | ||
549 | } | ||
550 | c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1)); | ||
551 | c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); | ||
552 | } | ||
553 | |||
554 | void | ||
555 | setup_per_cpu_areas (void) | ||
556 | { | ||
557 | /* start_kernel() requires this... */ | ||
558 | } | ||
559 | |||
560 | static void | ||
561 | get_max_cacheline_size (void) | ||
562 | { | ||
563 | unsigned long line_size, max = 1; | ||
564 | u64 l, levels, unique_caches; | ||
565 | pal_cache_config_info_t cci; | ||
566 | s64 status; | ||
567 | |||
568 | status = ia64_pal_cache_summary(&levels, &unique_caches); | ||
569 | if (status != 0) { | ||
570 | printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", | ||
571 | __FUNCTION__, status); | ||
572 | max = SMP_CACHE_BYTES; | ||
573 | goto out; | ||
574 | } | ||
575 | |||
576 | for (l = 0; l < levels; ++l) { | ||
577 | status = ia64_pal_cache_config_info(l, /* cache_type (data_or_unified)= */ 2, | ||
578 | &cci); | ||
579 | if (status != 0) { | ||
580 | printk(KERN_ERR | ||
581 | "%s: ia64_pal_cache_config_info(l=%lu) failed (status=%ld)\n", | ||
582 | __FUNCTION__, l, status); | ||
583 | max = SMP_CACHE_BYTES; | ||
584 | } | ||
585 | line_size = 1 << cci.pcci_line_size; | ||
586 | if (line_size > max) | ||
587 | max = line_size; | ||
588 | } | ||
589 | out: | ||
590 | if (max > ia64_max_cacheline_size) | ||
591 | ia64_max_cacheline_size = max; | ||
592 | } | ||
593 | |||
594 | /* | ||
595 | * cpu_init() initializes state that is per-CPU. This function acts | ||
596 | * as a 'CPU state barrier', nothing should get across. | ||
597 | */ | ||
598 | void | ||
599 | cpu_init (void) | ||
600 | { | ||
601 | extern void __devinit ia64_mmu_init (void *); | ||
602 | unsigned long num_phys_stacked; | ||
603 | pal_vm_info_2_u_t vmi; | ||
604 | unsigned int max_ctx; | ||
605 | struct cpuinfo_ia64 *cpu_info; | ||
606 | void *cpu_data; | ||
607 | |||
608 | cpu_data = per_cpu_init(); | ||
609 | |||
610 | /* | ||
611 | * We set ar.k3 so that assembly code in MCA handler can compute | ||
612 | * physical addresses of per cpu variables with a simple: | ||
613 | * phys = ar.k3 + &per_cpu_var | ||
614 | */ | ||
615 | ia64_set_kr(IA64_KR_PER_CPU_DATA, | ||
616 | ia64_tpa(cpu_data) - (long) __per_cpu_start); | ||
617 | |||
618 | get_max_cacheline_size(); | ||
619 | |||
620 | /* | ||
621 | * We can't pass "local_cpu_data" to identify_cpu() because we haven't called | ||
622 | * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it | ||
623 | * depends on the data returned by identify_cpu(). We break the dependency by | ||
624 | * accessing cpu_data() through the canonical per-CPU address. | ||
625 | */ | ||
626 | cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(cpu_info) - __per_cpu_start); | ||
627 | identify_cpu(cpu_info); | ||
628 | |||
629 | #ifdef CONFIG_MCKINLEY | ||
630 | { | ||
631 | # define FEATURE_SET 16 | ||
632 | struct ia64_pal_retval iprv; | ||
633 | |||
634 | if (cpu_info->family == 0x1f) { | ||
635 | PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0); | ||
636 | if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80)) | ||
637 | PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, | ||
638 | (iprv.v1 | 0x80), FEATURE_SET, 0); | ||
639 | } | ||
640 | } | ||
641 | #endif | ||
642 | |||
643 | /* Clear the stack memory reserved for pt_regs: */ | ||
644 | memset(ia64_task_regs(current), 0, sizeof(struct pt_regs)); | ||
645 | |||
646 | ia64_set_kr(IA64_KR_FPU_OWNER, 0); | ||
647 | |||
648 | /* | ||
649 | * Initialize the page-table base register to a global | ||
650 | * directory with all zeroes. This ensure that we can handle | ||
651 | * TLB-misses to user address-space even before we created the | ||
652 | * first user address-space. This may happen, e.g., due to | ||
653 | * aggressive use of lfetch.fault. | ||
654 | */ | ||
655 | ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); | ||
656 | |||
657 | /* | ||
658 | * Initialize default control register to defer all speculative faults. The | ||
659 | * kernel MUST NOT depend on a particular setting of these bits (in other words, | ||
660 | * the kernel must have recovery code for all speculative accesses). Turn on | ||
661 | * dcr.lc as per recommendation by the architecture team. Most IA-32 apps | ||
662 | * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll | ||
663 | * be fine). | ||
664 | */ | ||
665 | ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR | ||
666 | | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); | ||
667 | atomic_inc(&init_mm.mm_count); | ||
668 | current->active_mm = &init_mm; | ||
669 | if (current->mm) | ||
670 | BUG(); | ||
671 | |||
672 | ia64_mmu_init(ia64_imva(cpu_data)); | ||
673 | ia64_mca_cpu_init(ia64_imva(cpu_data)); | ||
674 | |||
675 | #ifdef CONFIG_IA32_SUPPORT | ||
676 | ia32_cpu_init(); | ||
677 | #endif | ||
678 | |||
679 | /* Clear ITC to eliminiate sched_clock() overflows in human time. */ | ||
680 | ia64_set_itc(0); | ||
681 | |||
682 | /* disable all local interrupt sources: */ | ||
683 | ia64_set_itv(1 << 16); | ||
684 | ia64_set_lrr0(1 << 16); | ||
685 | ia64_set_lrr1(1 << 16); | ||
686 | ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); | ||
687 | ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); | ||
688 | |||
689 | /* clear TPR & XTP to enable all interrupt classes: */ | ||
690 | ia64_setreg(_IA64_REG_CR_TPR, 0); | ||
691 | #ifdef CONFIG_SMP | ||
692 | normal_xtp(); | ||
693 | #endif | ||
694 | |||
695 | /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ | ||
696 | if (ia64_pal_vm_summary(NULL, &vmi) == 0) | ||
697 | max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; | ||
698 | else { | ||
699 | printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); | ||
700 | max_ctx = (1U << 15) - 1; /* use architected minimum */ | ||
701 | } | ||
702 | while (max_ctx < ia64_ctx.max_ctx) { | ||
703 | unsigned int old = ia64_ctx.max_ctx; | ||
704 | if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old) | ||
705 | break; | ||
706 | } | ||
707 | |||
708 | if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) { | ||
709 | printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical " | ||
710 | "stacked regs\n"); | ||
711 | num_phys_stacked = 96; | ||
712 | } | ||
713 | /* size of physical stacked register partition plus 8 bytes: */ | ||
714 | __get_cpu_var(ia64_phys_stacked_size_p8) = num_phys_stacked*8 + 8; | ||
715 | platform_cpu_init(); | ||
716 | } | ||
717 | |||
718 | void | ||
719 | check_bugs (void) | ||
720 | { | ||
721 | ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, | ||
722 | (unsigned long) __end___mckinley_e9_bundles); | ||
723 | } | ||
diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h new file mode 100644 index 000000000000..37b986cb86e0 --- /dev/null +++ b/arch/ia64/kernel/sigframe.h | |||
@@ -0,0 +1,25 @@ | |||
1 | struct sigscratch { | ||
2 | unsigned long scratch_unat; /* ar.unat for the general registers saved in pt */ | ||
3 | unsigned long ar_pfs; /* for syscalls, the user-level function-state */ | ||
4 | struct pt_regs pt; | ||
5 | }; | ||
6 | |||
7 | struct sigframe { | ||
8 | /* | ||
9 | * Place signal handler args where user-level unwinder can find them easily. | ||
10 | * DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is | ||
11 | * user-level code that depends on their presence! | ||
12 | */ | ||
13 | unsigned long arg0; /* signum */ | ||
14 | unsigned long arg1; /* siginfo pointer */ | ||
15 | unsigned long arg2; /* sigcontext pointer */ | ||
16 | /* | ||
17 | * End of architected state. | ||
18 | */ | ||
19 | |||
20 | void __user *handler; /* pointer to the plabel of the signal handler */ | ||
21 | struct siginfo info; | ||
22 | struct sigcontext sc; | ||
23 | }; | ||
24 | |||
25 | extern long ia64_do_signal (sigset_t *, struct sigscratch *, long); | ||
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c new file mode 100644 index 000000000000..6891d86937d9 --- /dev/null +++ b/arch/ia64/kernel/signal.c | |||
@@ -0,0 +1,691 @@ | |||
1 | /* | ||
2 | * Architecture-specific signal handling support. | ||
3 | * | ||
4 | * Copyright (C) 1999-2004 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * | ||
7 | * Derived from i386 and Alpha versions. | ||
8 | */ | ||
9 | |||
10 | #include <linux/config.h> | ||
11 | #include <linux/errno.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/mm.h> | ||
14 | #include <linux/ptrace.h> | ||
15 | #include <linux/sched.h> | ||
16 | #include <linux/signal.h> | ||
17 | #include <linux/smp.h> | ||
18 | #include <linux/smp_lock.h> | ||
19 | #include <linux/stddef.h> | ||
20 | #include <linux/tty.h> | ||
21 | #include <linux/binfmts.h> | ||
22 | #include <linux/unistd.h> | ||
23 | #include <linux/wait.h> | ||
24 | |||
25 | #include <asm/ia32.h> | ||
26 | #include <asm/intrinsics.h> | ||
27 | #include <asm/uaccess.h> | ||
28 | #include <asm/rse.h> | ||
29 | #include <asm/sigcontext.h> | ||
30 | |||
31 | #include "sigframe.h" | ||
32 | |||
33 | #define DEBUG_SIG 0 | ||
34 | #define STACK_ALIGN 16 /* minimal alignment for stack pointer */ | ||
35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | ||
36 | |||
37 | #if _NSIG_WORDS > 1 | ||
38 | # define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t)) | ||
39 | # define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t)) | ||
40 | #else | ||
41 | # define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0]) | ||
42 | # define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) | ||
43 | #endif | ||
44 | |||
45 | long | ||
46 | ia64_rt_sigsuspend (sigset_t __user *uset, size_t sigsetsize, struct sigscratch *scr) | ||
47 | { | ||
48 | sigset_t oldset, set; | ||
49 | |||
50 | /* XXX: Don't preclude handling different sized sigset_t's. */ | ||
51 | if (sigsetsize != sizeof(sigset_t)) | ||
52 | return -EINVAL; | ||
53 | |||
54 | if (!access_ok(VERIFY_READ, uset, sigsetsize)) | ||
55 | return -EFAULT; | ||
56 | |||
57 | if (GET_SIGSET(&set, uset)) | ||
58 | return -EFAULT; | ||
59 | |||
60 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
61 | |||
62 | spin_lock_irq(¤t->sighand->siglock); | ||
63 | { | ||
64 | oldset = current->blocked; | ||
65 | current->blocked = set; | ||
66 | recalc_sigpending(); | ||
67 | } | ||
68 | spin_unlock_irq(¤t->sighand->siglock); | ||
69 | |||
70 | /* | ||
71 | * The return below usually returns to the signal handler. We need to | ||
72 | * pre-set the correct error code here to ensure that the right values | ||
73 | * get saved in sigcontext by ia64_do_signal. | ||
74 | */ | ||
75 | scr->pt.r8 = EINTR; | ||
76 | scr->pt.r10 = -1; | ||
77 | |||
78 | while (1) { | ||
79 | current->state = TASK_INTERRUPTIBLE; | ||
80 | schedule(); | ||
81 | if (ia64_do_signal(&oldset, scr, 1)) | ||
82 | return -EINTR; | ||
83 | } | ||
84 | } | ||
85 | |||
86 | asmlinkage long | ||
87 | sys_sigaltstack (const stack_t __user *uss, stack_t __user *uoss, long arg2, | ||
88 | long arg3, long arg4, long arg5, long arg6, long arg7, | ||
89 | struct pt_regs regs) | ||
90 | { | ||
91 | return do_sigaltstack(uss, uoss, regs.r12); | ||
92 | } | ||
93 | |||
94 | static long | ||
95 | restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) | ||
96 | { | ||
97 | unsigned long ip, flags, nat, um, cfm; | ||
98 | long err; | ||
99 | |||
100 | /* Always make any pending restarted system calls return -EINTR */ | ||
101 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | ||
102 | |||
103 | /* restore scratch that always needs gets updated during signal delivery: */ | ||
104 | err = __get_user(flags, &sc->sc_flags); | ||
105 | err |= __get_user(nat, &sc->sc_nat); | ||
106 | err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ | ||
107 | err |= __get_user(cfm, &sc->sc_cfm); | ||
108 | err |= __get_user(um, &sc->sc_um); /* user mask */ | ||
109 | err |= __get_user(scr->pt.ar_rsc, &sc->sc_ar_rsc); | ||
110 | err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat); | ||
111 | err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); | ||
112 | err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); | ||
113 | err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */ | ||
114 | err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ | ||
115 | err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ | ||
116 | err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */ | ||
117 | err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */ | ||
118 | err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */ | ||
119 | err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */ | ||
120 | |||
121 | scr->pt.cr_ifs = cfm | (1UL << 63); | ||
122 | |||
123 | /* establish new instruction pointer: */ | ||
124 | scr->pt.cr_iip = ip & ~0x3UL; | ||
125 | ia64_psr(&scr->pt)->ri = ip & 0x3; | ||
126 | scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM); | ||
127 | |||
128 | scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat); | ||
129 | |||
130 | if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { | ||
131 | /* Restore most scratch-state only when not in syscall. */ | ||
132 | err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ | ||
133 | err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ | ||
134 | err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ | ||
135 | err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */ | ||
136 | err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */ | ||
137 | err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */ | ||
138 | } | ||
139 | |||
140 | if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) { | ||
141 | struct ia64_psr *psr = ia64_psr(&scr->pt); | ||
142 | |||
143 | __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); | ||
144 | psr->mfh = 0; /* drop signal handler's fph contents... */ | ||
145 | if (psr->dfh) | ||
146 | ia64_drop_fpu(current); | ||
147 | else { | ||
148 | /* We already own the local fph, otherwise psr->dfh wouldn't be 0. */ | ||
149 | __ia64_load_fpu(current->thread.fph); | ||
150 | ia64_set_local_fpu_owner(current); | ||
151 | } | ||
152 | } | ||
153 | return err; | ||
154 | } | ||
155 | |||
156 | int | ||
157 | copy_siginfo_to_user (siginfo_t __user *to, siginfo_t *from) | ||
158 | { | ||
159 | if (!access_ok(VERIFY_WRITE, to, sizeof(siginfo_t))) | ||
160 | return -EFAULT; | ||
161 | if (from->si_code < 0) { | ||
162 | if (__copy_to_user(to, from, sizeof(siginfo_t))) | ||
163 | return -EFAULT; | ||
164 | return 0; | ||
165 | } else { | ||
166 | int err; | ||
167 | |||
168 | /* | ||
169 | * If you change siginfo_t structure, please be sure this code is fixed | ||
170 | * accordingly. It should never copy any pad contained in the structure | ||
171 | * to avoid security leaks, but must copy the generic 3 ints plus the | ||
172 | * relevant union member. | ||
173 | */ | ||
174 | err = __put_user(from->si_signo, &to->si_signo); | ||
175 | err |= __put_user(from->si_errno, &to->si_errno); | ||
176 | err |= __put_user((short)from->si_code, &to->si_code); | ||
177 | switch (from->si_code >> 16) { | ||
178 | case __SI_FAULT >> 16: | ||
179 | err |= __put_user(from->si_flags, &to->si_flags); | ||
180 | err |= __put_user(from->si_isr, &to->si_isr); | ||
181 | case __SI_POLL >> 16: | ||
182 | err |= __put_user(from->si_addr, &to->si_addr); | ||
183 | err |= __put_user(from->si_imm, &to->si_imm); | ||
184 | break; | ||
185 | case __SI_TIMER >> 16: | ||
186 | err |= __put_user(from->si_tid, &to->si_tid); | ||
187 | err |= __put_user(from->si_overrun, &to->si_overrun); | ||
188 | err |= __put_user(from->si_ptr, &to->si_ptr); | ||
189 | break; | ||
190 | case __SI_RT >> 16: /* Not generated by the kernel as of now. */ | ||
191 | case __SI_MESGQ >> 16: | ||
192 | err |= __put_user(from->si_uid, &to->si_uid); | ||
193 | err |= __put_user(from->si_pid, &to->si_pid); | ||
194 | err |= __put_user(from->si_ptr, &to->si_ptr); | ||
195 | break; | ||
196 | case __SI_CHLD >> 16: | ||
197 | err |= __put_user(from->si_utime, &to->si_utime); | ||
198 | err |= __put_user(from->si_stime, &to->si_stime); | ||
199 | err |= __put_user(from->si_status, &to->si_status); | ||
200 | default: | ||
201 | err |= __put_user(from->si_uid, &to->si_uid); | ||
202 | err |= __put_user(from->si_pid, &to->si_pid); | ||
203 | break; | ||
204 | } | ||
205 | return err; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | long | ||
210 | ia64_rt_sigreturn (struct sigscratch *scr) | ||
211 | { | ||
212 | extern char ia64_strace_leave_kernel, ia64_leave_kernel; | ||
213 | struct sigcontext __user *sc; | ||
214 | struct siginfo si; | ||
215 | sigset_t set; | ||
216 | long retval; | ||
217 | |||
218 | sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc; | ||
219 | |||
220 | /* | ||
221 | * When we return to the previously executing context, r8 and r10 have already | ||
222 | * been setup the way we want them. Indeed, if the signal wasn't delivered while | ||
223 | * in a system call, we must not touch r8 or r10 as otherwise user-level state | ||
224 | * could be corrupted. | ||
225 | */ | ||
226 | retval = (long) &ia64_leave_kernel; | ||
227 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | ||
228 | /* | ||
229 | * strace expects to be notified after sigreturn returns even though the | ||
230 | * context to which we return may not be in the middle of a syscall. | ||
231 | * Thus, the return-value that strace displays for sigreturn is | ||
232 | * meaningless. | ||
233 | */ | ||
234 | retval = (long) &ia64_strace_leave_kernel; | ||
235 | |||
236 | if (!access_ok(VERIFY_READ, sc, sizeof(*sc))) | ||
237 | goto give_sigsegv; | ||
238 | |||
239 | if (GET_SIGSET(&set, &sc->sc_mask)) | ||
240 | goto give_sigsegv; | ||
241 | |||
242 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
243 | |||
244 | spin_lock_irq(¤t->sighand->siglock); | ||
245 | { | ||
246 | current->blocked = set; | ||
247 | recalc_sigpending(); | ||
248 | } | ||
249 | spin_unlock_irq(¤t->sighand->siglock); | ||
250 | |||
251 | if (restore_sigcontext(sc, scr)) | ||
252 | goto give_sigsegv; | ||
253 | |||
254 | #if DEBUG_SIG | ||
255 | printk("SIG return (%s:%d): sp=%lx ip=%lx\n", | ||
256 | current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip); | ||
257 | #endif | ||
258 | /* | ||
259 | * It is more difficult to avoid calling this function than to | ||
260 | * call it and ignore errors. | ||
261 | */ | ||
262 | do_sigaltstack(&sc->sc_stack, NULL, scr->pt.r12); | ||
263 | return retval; | ||
264 | |||
265 | give_sigsegv: | ||
266 | si.si_signo = SIGSEGV; | ||
267 | si.si_errno = 0; | ||
268 | si.si_code = SI_KERNEL; | ||
269 | si.si_pid = current->pid; | ||
270 | si.si_uid = current->uid; | ||
271 | si.si_addr = sc; | ||
272 | force_sig_info(SIGSEGV, &si, current); | ||
273 | return retval; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * This does just the minimum required setup of sigcontext. | ||
278 | * Specifically, it only installs data that is either not knowable at | ||
279 | * the user-level or that gets modified before execution in the | ||
280 | * trampoline starts. Everything else is done at the user-level. | ||
281 | */ | ||
282 | static long | ||
283 | setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr) | ||
284 | { | ||
285 | unsigned long flags = 0, ifs, cfm, nat; | ||
286 | long err; | ||
287 | |||
288 | ifs = scr->pt.cr_ifs; | ||
289 | |||
290 | if (on_sig_stack((unsigned long) sc)) | ||
291 | flags |= IA64_SC_FLAG_ONSTACK; | ||
292 | if ((ifs & (1UL << 63)) == 0) | ||
293 | /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */ | ||
294 | flags |= IA64_SC_FLAG_IN_SYSCALL; | ||
295 | cfm = ifs & ((1UL << 38) - 1); | ||
296 | ia64_flush_fph(current); | ||
297 | if ((current->thread.flags & IA64_THREAD_FPH_VALID)) { | ||
298 | flags |= IA64_SC_FLAG_FPH_VALID; | ||
299 | __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16); | ||
300 | } | ||
301 | |||
302 | nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat); | ||
303 | |||
304 | err = __put_user(flags, &sc->sc_flags); | ||
305 | err |= __put_user(nat, &sc->sc_nat); | ||
306 | err |= PUT_SIGSET(mask, &sc->sc_mask); | ||
307 | err |= __put_user(cfm, &sc->sc_cfm); | ||
308 | err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um); | ||
309 | err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc); | ||
310 | err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat); /* ar.unat */ | ||
311 | err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */ | ||
312 | err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); | ||
313 | err |= __put_user(scr->pt.pr, &sc->sc_pr); /* predicates */ | ||
314 | err |= __put_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ | ||
315 | err |= __put_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ | ||
316 | err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8); /* r1 */ | ||
317 | err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8); /* r8-r11 */ | ||
318 | err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8); /* r12-r13 */ | ||
319 | err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */ | ||
320 | err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip); | ||
321 | |||
322 | if (flags & IA64_SC_FLAG_IN_SYSCALL) { | ||
323 | /* Clear scratch registers if the signal interrupted a system call. */ | ||
324 | err |= __put_user(0, &sc->sc_ar_ccv); /* ar.ccv */ | ||
325 | err |= __put_user(0, &sc->sc_br[7]); /* b7 */ | ||
326 | err |= __put_user(0, &sc->sc_gr[14]); /* r14 */ | ||
327 | err |= __clear_user(&sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */ | ||
328 | err |= __clear_user(&sc->sc_gr[2], 2*8); /* r2-r3 */ | ||
329 | err |= __clear_user(&sc->sc_gr[16], 16*8); /* r16-r31 */ | ||
330 | } else { | ||
331 | /* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */ | ||
332 | err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ | ||
333 | err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ | ||
334 | err |= __put_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ | ||
335 | err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */ | ||
336 | err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8); /* r2-r3 */ | ||
337 | err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8); /* r16-r31 */ | ||
338 | } | ||
339 | return err; | ||
340 | } | ||
341 | |||
342 | /* | ||
343 | * Check whether the register-backing store is already on the signal stack. | ||
344 | */ | ||
345 | static inline int | ||
346 | rbs_on_sig_stack (unsigned long bsp) | ||
347 | { | ||
348 | return (bsp - current->sas_ss_sp < current->sas_ss_size); | ||
349 | } | ||
350 | |||
351 | static long | ||
352 | force_sigsegv_info (int sig, void __user *addr) | ||
353 | { | ||
354 | unsigned long flags; | ||
355 | struct siginfo si; | ||
356 | |||
357 | if (sig == SIGSEGV) { | ||
358 | /* | ||
359 | * Acquiring siglock around the sa_handler-update is almost | ||
360 | * certainly overkill, but this isn't a | ||
361 | * performance-critical path and I'd rather play it safe | ||
362 | * here than having to debug a nasty race if and when | ||
363 | * something changes in kernel/signal.c that would make it | ||
364 | * no longer safe to modify sa_handler without holding the | ||
365 | * lock. | ||
366 | */ | ||
367 | spin_lock_irqsave(¤t->sighand->siglock, flags); | ||
368 | current->sighand->action[sig - 1].sa.sa_handler = SIG_DFL; | ||
369 | spin_unlock_irqrestore(¤t->sighand->siglock, flags); | ||
370 | } | ||
371 | si.si_signo = SIGSEGV; | ||
372 | si.si_errno = 0; | ||
373 | si.si_code = SI_KERNEL; | ||
374 | si.si_pid = current->pid; | ||
375 | si.si_uid = current->uid; | ||
376 | si.si_addr = addr; | ||
377 | force_sig_info(SIGSEGV, &si, current); | ||
378 | return 0; | ||
379 | } | ||
380 | |||
381 | static long | ||
382 | setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, | ||
383 | struct sigscratch *scr) | ||
384 | { | ||
385 | extern char __kernel_sigtramp[]; | ||
386 | unsigned long tramp_addr, new_rbs = 0; | ||
387 | struct sigframe __user *frame; | ||
388 | long err; | ||
389 | |||
390 | frame = (void __user *) scr->pt.r12; | ||
391 | tramp_addr = (unsigned long) __kernel_sigtramp; | ||
392 | if ((ka->sa.sa_flags & SA_ONSTACK) && sas_ss_flags((unsigned long) frame) == 0) { | ||
393 | frame = (void __user *) ((current->sas_ss_sp + current->sas_ss_size) | ||
394 | & ~(STACK_ALIGN - 1)); | ||
395 | /* | ||
396 | * We need to check for the register stack being on the signal stack | ||
397 | * separately, because it's switched separately (memory stack is switched | ||
398 | * in the kernel, register stack is switched in the signal trampoline). | ||
399 | */ | ||
400 | if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) | ||
401 | new_rbs = (current->sas_ss_sp + sizeof(long) - 1) & ~(sizeof(long) - 1); | ||
402 | } | ||
403 | frame = (void __user *) frame - ((sizeof(*frame) + STACK_ALIGN - 1) & ~(STACK_ALIGN - 1)); | ||
404 | |||
405 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
406 | return force_sigsegv_info(sig, frame); | ||
407 | |||
408 | err = __put_user(sig, &frame->arg0); | ||
409 | err |= __put_user(&frame->info, &frame->arg1); | ||
410 | err |= __put_user(&frame->sc, &frame->arg2); | ||
411 | err |= __put_user(new_rbs, &frame->sc.sc_rbs_base); | ||
412 | err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */ | ||
413 | err |= __put_user(ka->sa.sa_handler, &frame->handler); | ||
414 | |||
415 | err |= copy_siginfo_to_user(&frame->info, info); | ||
416 | |||
417 | err |= __put_user(current->sas_ss_sp, &frame->sc.sc_stack.ss_sp); | ||
418 | err |= __put_user(current->sas_ss_size, &frame->sc.sc_stack.ss_size); | ||
419 | err |= __put_user(sas_ss_flags(scr->pt.r12), &frame->sc.sc_stack.ss_flags); | ||
420 | err |= setup_sigcontext(&frame->sc, set, scr); | ||
421 | |||
422 | if (unlikely(err)) | ||
423 | return force_sigsegv_info(sig, frame); | ||
424 | |||
425 | scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */ | ||
426 | scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ | ||
427 | scr->pt.cr_iip = tramp_addr; | ||
428 | ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */ | ||
429 | ia64_psr(&scr->pt)->be = 0; /* force little-endian byte-order */ | ||
430 | /* | ||
431 | * Force the interruption function mask to zero. This has no effect when a | ||
432 | * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is | ||
433 | * ignored), but it has the desirable effect of making it possible to deliver a | ||
434 | * signal with an incomplete register frame (which happens when a mandatory RSE | ||
435 | * load faults). Furthermore, it has no negative effect on the getting the user's | ||
436 | * dirty partition preserved, because that's governed by scr->pt.loadrs. | ||
437 | */ | ||
438 | scr->pt.cr_ifs = (1UL << 63); | ||
439 | |||
440 | /* | ||
441 | * Note: this affects only the NaT bits of the scratch regs (the ones saved in | ||
442 | * pt_regs), which is exactly what we want. | ||
443 | */ | ||
444 | scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */ | ||
445 | |||
446 | #if DEBUG_SIG | ||
447 | printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n", | ||
448 | current->comm, current->pid, sig, scr->pt.r12, frame->sc.sc_ip, frame->handler); | ||
449 | #endif | ||
450 | return 1; | ||
451 | } | ||
452 | |||
453 | static long | ||
454 | handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset, | ||
455 | struct sigscratch *scr) | ||
456 | { | ||
457 | if (IS_IA32_PROCESS(&scr->pt)) { | ||
458 | /* send signal to IA-32 process */ | ||
459 | if (!ia32_setup_frame1(sig, ka, info, oldset, &scr->pt)) | ||
460 | return 0; | ||
461 | } else | ||
462 | /* send signal to IA-64 process */ | ||
463 | if (!setup_frame(sig, ka, info, oldset, scr)) | ||
464 | return 0; | ||
465 | |||
466 | if (!(ka->sa.sa_flags & SA_NODEFER)) { | ||
467 | spin_lock_irq(¤t->sighand->siglock); | ||
468 | { | ||
469 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
470 | sigaddset(¤t->blocked, sig); | ||
471 | recalc_sigpending(); | ||
472 | } | ||
473 | spin_unlock_irq(¤t->sighand->siglock); | ||
474 | } | ||
475 | return 1; | ||
476 | } | ||
477 | |||
478 | /* | ||
479 | * Note that `init' is a special process: it doesn't get signals it doesn't want to | ||
480 | * handle. Thus you cannot kill init even with a SIGKILL even by mistake. | ||
481 | */ | ||
482 | long | ||
483 | ia64_do_signal (sigset_t *oldset, struct sigscratch *scr, long in_syscall) | ||
484 | { | ||
485 | struct k_sigaction ka; | ||
486 | siginfo_t info; | ||
487 | long restart = in_syscall; | ||
488 | long errno = scr->pt.r8; | ||
489 | # define ERR_CODE(c) (IS_IA32_PROCESS(&scr->pt) ? -(c) : (c)) | ||
490 | |||
491 | /* | ||
492 | * In the ia64_leave_kernel code path, we want the common case to go fast, which | ||
493 | * is why we may in certain cases get here from kernel mode. Just return without | ||
494 | * doing anything if so. | ||
495 | */ | ||
496 | if (!user_mode(&scr->pt)) | ||
497 | return 0; | ||
498 | |||
499 | if (!oldset) | ||
500 | oldset = ¤t->blocked; | ||
501 | |||
502 | /* | ||
503 | * This only loops in the rare cases of handle_signal() failing, in which case we | ||
504 | * need to push through a forced SIGSEGV. | ||
505 | */ | ||
506 | while (1) { | ||
507 | int signr = get_signal_to_deliver(&info, &ka, &scr->pt, NULL); | ||
508 | |||
509 | /* | ||
510 | * get_signal_to_deliver() may have run a debugger (via notify_parent()) | ||
511 | * and the debugger may have modified the state (e.g., to arrange for an | ||
512 | * inferior call), thus it's important to check for restarting _after_ | ||
513 | * get_signal_to_deliver(). | ||
514 | */ | ||
515 | if (IS_IA32_PROCESS(&scr->pt)) { | ||
516 | if (in_syscall) { | ||
517 | if (errno >= 0) | ||
518 | restart = 0; | ||
519 | else | ||
520 | errno = -errno; | ||
521 | } | ||
522 | } else if ((long) scr->pt.r10 != -1) | ||
523 | /* | ||
524 | * A system calls has to be restarted only if one of the error codes | ||
525 | * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10 | ||
526 | * isn't -1 then r8 doesn't hold an error code and we don't need to | ||
527 | * restart the syscall, so we can clear the "restart" flag here. | ||
528 | */ | ||
529 | restart = 0; | ||
530 | |||
531 | if (signr <= 0) | ||
532 | break; | ||
533 | |||
534 | if (unlikely(restart)) { | ||
535 | switch (errno) { | ||
536 | case ERESTART_RESTARTBLOCK: | ||
537 | case ERESTARTNOHAND: | ||
538 | scr->pt.r8 = ERR_CODE(EINTR); | ||
539 | /* note: scr->pt.r10 is already -1 */ | ||
540 | break; | ||
541 | |||
542 | case ERESTARTSYS: | ||
543 | if ((ka.sa.sa_flags & SA_RESTART) == 0) { | ||
544 | scr->pt.r8 = ERR_CODE(EINTR); | ||
545 | /* note: scr->pt.r10 is already -1 */ | ||
546 | break; | ||
547 | } | ||
548 | case ERESTARTNOINTR: | ||
549 | if (IS_IA32_PROCESS(&scr->pt)) { | ||
550 | scr->pt.r8 = scr->pt.r1; | ||
551 | scr->pt.cr_iip -= 2; | ||
552 | } else | ||
553 | ia64_decrement_ip(&scr->pt); | ||
554 | restart = 0; /* don't restart twice if handle_signal() fails... */ | ||
555 | } | ||
556 | } | ||
557 | |||
558 | /* | ||
559 | * Whee! Actually deliver the signal. If the delivery failed, we need to | ||
560 | * continue to iterate in this loop so we can deliver the SIGSEGV... | ||
561 | */ | ||
562 | if (handle_signal(signr, &ka, &info, oldset, scr)) | ||
563 | return 1; | ||
564 | } | ||
565 | |||
566 | /* Did we come from a system call? */ | ||
567 | if (restart) { | ||
568 | /* Restart the system call - no handlers present */ | ||
569 | if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR | ||
570 | || errno == ERESTART_RESTARTBLOCK) | ||
571 | { | ||
572 | if (IS_IA32_PROCESS(&scr->pt)) { | ||
573 | scr->pt.r8 = scr->pt.r1; | ||
574 | scr->pt.cr_iip -= 2; | ||
575 | if (errno == ERESTART_RESTARTBLOCK) | ||
576 | scr->pt.r8 = 0; /* x86 version of __NR_restart_syscall */ | ||
577 | } else { | ||
578 | /* | ||
579 | * Note: the syscall number is in r15 which is saved in | ||
580 | * pt_regs so all we need to do here is adjust ip so that | ||
581 | * the "break" instruction gets re-executed. | ||
582 | */ | ||
583 | ia64_decrement_ip(&scr->pt); | ||
584 | if (errno == ERESTART_RESTARTBLOCK) | ||
585 | scr->pt.r15 = __NR_restart_syscall; | ||
586 | } | ||
587 | } | ||
588 | } | ||
589 | return 0; | ||
590 | } | ||
591 | |||
592 | /* Set a delayed signal that was detected in MCA/INIT/NMI/PMI context where it | ||
593 | * could not be delivered. It is important that the target process is not | ||
594 | * allowed to do any more work in user space. Possible cases for the target | ||
595 | * process: | ||
596 | * | ||
597 | * - It is sleeping and will wake up soon. Store the data in the current task, | ||
598 | * the signal will be sent when the current task returns from the next | ||
599 | * interrupt. | ||
600 | * | ||
601 | * - It is running in user context. Store the data in the current task, the | ||
602 | * signal will be sent when the current task returns from the next interrupt. | ||
603 | * | ||
604 | * - It is running in kernel context on this or another cpu and will return to | ||
605 | * user context. Store the data in the target task, the signal will be sent | ||
606 | * to itself when the target task returns to user space. | ||
607 | * | ||
608 | * - It is running in kernel context on this cpu and will sleep before | ||
609 | * returning to user context. Because this is also the current task, the | ||
610 | * signal will not get delivered and the task could sleep indefinitely. | ||
611 | * Store the data in the idle task for this cpu, the signal will be sent | ||
612 | * after the idle task processes its next interrupt. | ||
613 | * | ||
614 | * To cover all cases, store the data in the target task, the current task and | ||
615 | * the idle task on this cpu. Whatever happens, the signal will be delivered | ||
616 | * to the target task before it can do any useful user space work. Multiple | ||
617 | * deliveries have no unwanted side effects. | ||
618 | * | ||
619 | * Note: This code is executed in MCA/INIT/NMI/PMI context, with interrupts | ||
620 | * disabled. It must not take any locks nor use kernel structures or services | ||
621 | * that require locks. | ||
622 | */ | ||
623 | |||
624 | /* To ensure that we get the right pid, check its start time. To avoid extra | ||
625 | * include files in thread_info.h, convert the task start_time to unsigned long, | ||
626 | * giving us a cycle time of > 580 years. | ||
627 | */ | ||
628 | static inline unsigned long | ||
629 | start_time_ul(const struct task_struct *t) | ||
630 | { | ||
631 | return t->start_time.tv_sec * NSEC_PER_SEC + t->start_time.tv_nsec; | ||
632 | } | ||
633 | |||
634 | void | ||
635 | set_sigdelayed(pid_t pid, int signo, int code, void __user *addr) | ||
636 | { | ||
637 | struct task_struct *t; | ||
638 | unsigned long start_time = 0; | ||
639 | int i; | ||
640 | |||
641 | for (i = 1; i <= 3; ++i) { | ||
642 | switch (i) { | ||
643 | case 1: | ||
644 | t = find_task_by_pid(pid); | ||
645 | if (t) | ||
646 | start_time = start_time_ul(t); | ||
647 | break; | ||
648 | case 2: | ||
649 | t = current; | ||
650 | break; | ||
651 | default: | ||
652 | t = idle_task(smp_processor_id()); | ||
653 | break; | ||
654 | } | ||
655 | |||
656 | if (!t) | ||
657 | return; | ||
658 | t->thread_info->sigdelayed.signo = signo; | ||
659 | t->thread_info->sigdelayed.code = code; | ||
660 | t->thread_info->sigdelayed.addr = addr; | ||
661 | t->thread_info->sigdelayed.start_time = start_time; | ||
662 | t->thread_info->sigdelayed.pid = pid; | ||
663 | wmb(); | ||
664 | set_tsk_thread_flag(t, TIF_SIGDELAYED); | ||
665 | } | ||
666 | } | ||
667 | |||
668 | /* Called from entry.S when it detects TIF_SIGDELAYED, a delayed signal that | ||
669 | * was detected in MCA/INIT/NMI/PMI context where it could not be delivered. | ||
670 | */ | ||
671 | |||
672 | void | ||
673 | do_sigdelayed(void) | ||
674 | { | ||
675 | struct siginfo siginfo; | ||
676 | pid_t pid; | ||
677 | struct task_struct *t; | ||
678 | |||
679 | clear_thread_flag(TIF_SIGDELAYED); | ||
680 | memset(&siginfo, 0, sizeof(siginfo)); | ||
681 | siginfo.si_signo = current_thread_info()->sigdelayed.signo; | ||
682 | siginfo.si_code = current_thread_info()->sigdelayed.code; | ||
683 | siginfo.si_addr = current_thread_info()->sigdelayed.addr; | ||
684 | pid = current_thread_info()->sigdelayed.pid; | ||
685 | t = find_task_by_pid(pid); | ||
686 | if (!t) | ||
687 | return; | ||
688 | if (current_thread_info()->sigdelayed.start_time != start_time_ul(t)) | ||
689 | return; | ||
690 | force_sig_info(siginfo.si_signo, &siginfo, t); | ||
691 | } | ||
diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c new file mode 100644 index 000000000000..953095e2ce15 --- /dev/null +++ b/arch/ia64/kernel/smp.c | |||
@@ -0,0 +1,376 @@ | |||
1 | /* | ||
2 | * SMP Support | ||
3 | * | ||
4 | * Copyright (C) 1999 Walt Drummond <drummond@valinux.com> | ||
5 | * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * | ||
7 | * Lots of stuff stolen from arch/alpha/kernel/smp.c | ||
8 | * | ||
9 | * 01/05/16 Rohit Seth <rohit.seth@intel.com> IA64-SMP functions. Reorganized | ||
10 | * the existing code (on the lines of x86 port). | ||
11 | * 00/09/11 David Mosberger <davidm@hpl.hp.com> Do loops_per_jiffy | ||
12 | * calibration on each CPU. | ||
13 | * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> fixed logical processor id | ||
14 | * 00/03/31 Rohit Seth <rohit.seth@intel.com> Fixes for Bootstrap Processor | ||
15 | * & cpu_online_map now gets done here (instead of setup.c) | ||
16 | * 99/10/05 davidm Update to bring it in sync with new command-line processing | ||
17 | * scheme. | ||
18 | * 10/13/00 Goutham Rao <goutham.rao@intel.com> Updated smp_call_function and | ||
19 | * smp_call_function_single to resend IPI on timeouts | ||
20 | */ | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/kernel.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/init.h> | ||
25 | #include <linux/interrupt.h> | ||
26 | #include <linux/smp.h> | ||
27 | #include <linux/kernel_stat.h> | ||
28 | #include <linux/mm.h> | ||
29 | #include <linux/cache.h> | ||
30 | #include <linux/delay.h> | ||
31 | #include <linux/efi.h> | ||
32 | #include <linux/bitops.h> | ||
33 | |||
34 | #include <asm/atomic.h> | ||
35 | #include <asm/current.h> | ||
36 | #include <asm/delay.h> | ||
37 | #include <asm/machvec.h> | ||
38 | #include <asm/io.h> | ||
39 | #include <asm/irq.h> | ||
40 | #include <asm/page.h> | ||
41 | #include <asm/pgalloc.h> | ||
42 | #include <asm/pgtable.h> | ||
43 | #include <asm/processor.h> | ||
44 | #include <asm/ptrace.h> | ||
45 | #include <asm/sal.h> | ||
46 | #include <asm/system.h> | ||
47 | #include <asm/tlbflush.h> | ||
48 | #include <asm/unistd.h> | ||
49 | #include <asm/mca.h> | ||
50 | |||
51 | /* | ||
52 | * Structure and data for smp_call_function(). This is designed to minimise static memory | ||
53 | * requirements. It also looks cleaner. | ||
54 | */ | ||
55 | static __cacheline_aligned DEFINE_SPINLOCK(call_lock); | ||
56 | |||
57 | struct call_data_struct { | ||
58 | void (*func) (void *info); | ||
59 | void *info; | ||
60 | long wait; | ||
61 | atomic_t started; | ||
62 | atomic_t finished; | ||
63 | }; | ||
64 | |||
65 | static volatile struct call_data_struct *call_data; | ||
66 | |||
67 | #define IPI_CALL_FUNC 0 | ||
68 | #define IPI_CPU_STOP 1 | ||
69 | |||
70 | /* This needs to be cacheline aligned because it is written to by *other* CPUs. */ | ||
71 | static DEFINE_PER_CPU(u64, ipi_operation) ____cacheline_aligned; | ||
72 | |||
73 | extern void cpu_halt (void); | ||
74 | |||
75 | void | ||
76 | lock_ipi_calllock(void) | ||
77 | { | ||
78 | spin_lock_irq(&call_lock); | ||
79 | } | ||
80 | |||
81 | void | ||
82 | unlock_ipi_calllock(void) | ||
83 | { | ||
84 | spin_unlock_irq(&call_lock); | ||
85 | } | ||
86 | |||
87 | static void | ||
88 | stop_this_cpu (void) | ||
89 | { | ||
90 | /* | ||
91 | * Remove this CPU: | ||
92 | */ | ||
93 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
94 | max_xtp(); | ||
95 | local_irq_disable(); | ||
96 | cpu_halt(); | ||
97 | } | ||
98 | |||
99 | void | ||
100 | cpu_die(void) | ||
101 | { | ||
102 | max_xtp(); | ||
103 | local_irq_disable(); | ||
104 | cpu_halt(); | ||
105 | /* Should never be here */ | ||
106 | BUG(); | ||
107 | for (;;); | ||
108 | } | ||
109 | |||
110 | irqreturn_t | ||
111 | handle_IPI (int irq, void *dev_id, struct pt_regs *regs) | ||
112 | { | ||
113 | int this_cpu = get_cpu(); | ||
114 | unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); | ||
115 | unsigned long ops; | ||
116 | |||
117 | mb(); /* Order interrupt and bit testing. */ | ||
118 | while ((ops = xchg(pending_ipis, 0)) != 0) { | ||
119 | mb(); /* Order bit clearing and data access. */ | ||
120 | do { | ||
121 | unsigned long which; | ||
122 | |||
123 | which = ffz(~ops); | ||
124 | ops &= ~(1 << which); | ||
125 | |||
126 | switch (which) { | ||
127 | case IPI_CALL_FUNC: | ||
128 | { | ||
129 | struct call_data_struct *data; | ||
130 | void (*func)(void *info); | ||
131 | void *info; | ||
132 | int wait; | ||
133 | |||
134 | /* release the 'pointer lock' */ | ||
135 | data = (struct call_data_struct *) call_data; | ||
136 | func = data->func; | ||
137 | info = data->info; | ||
138 | wait = data->wait; | ||
139 | |||
140 | mb(); | ||
141 | atomic_inc(&data->started); | ||
142 | /* | ||
143 | * At this point the structure may be gone unless | ||
144 | * wait is true. | ||
145 | */ | ||
146 | (*func)(info); | ||
147 | |||
148 | /* Notify the sending CPU that the task is done. */ | ||
149 | mb(); | ||
150 | if (wait) | ||
151 | atomic_inc(&data->finished); | ||
152 | } | ||
153 | break; | ||
154 | |||
155 | case IPI_CPU_STOP: | ||
156 | stop_this_cpu(); | ||
157 | break; | ||
158 | |||
159 | default: | ||
160 | printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", this_cpu, which); | ||
161 | break; | ||
162 | } | ||
163 | } while (ops); | ||
164 | mb(); /* Order data access and bit testing. */ | ||
165 | } | ||
166 | put_cpu(); | ||
167 | return IRQ_HANDLED; | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Called with preeemption disabled. | ||
172 | */ | ||
173 | static inline void | ||
174 | send_IPI_single (int dest_cpu, int op) | ||
175 | { | ||
176 | set_bit(op, &per_cpu(ipi_operation, dest_cpu)); | ||
177 | platform_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0); | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * Called with preeemption disabled. | ||
182 | */ | ||
183 | static inline void | ||
184 | send_IPI_allbutself (int op) | ||
185 | { | ||
186 | unsigned int i; | ||
187 | |||
188 | for (i = 0; i < NR_CPUS; i++) { | ||
189 | if (cpu_online(i) && i != smp_processor_id()) | ||
190 | send_IPI_single(i, op); | ||
191 | } | ||
192 | } | ||
193 | |||
194 | /* | ||
195 | * Called with preeemption disabled. | ||
196 | */ | ||
197 | static inline void | ||
198 | send_IPI_all (int op) | ||
199 | { | ||
200 | int i; | ||
201 | |||
202 | for (i = 0; i < NR_CPUS; i++) | ||
203 | if (cpu_online(i)) | ||
204 | send_IPI_single(i, op); | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Called with preeemption disabled. | ||
209 | */ | ||
210 | static inline void | ||
211 | send_IPI_self (int op) | ||
212 | { | ||
213 | send_IPI_single(smp_processor_id(), op); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * Called with preeemption disabled. | ||
218 | */ | ||
219 | void | ||
220 | smp_send_reschedule (int cpu) | ||
221 | { | ||
222 | platform_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); | ||
223 | } | ||
224 | |||
225 | void | ||
226 | smp_flush_tlb_all (void) | ||
227 | { | ||
228 | on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1, 1); | ||
229 | } | ||
230 | |||
231 | void | ||
232 | smp_flush_tlb_mm (struct mm_struct *mm) | ||
233 | { | ||
234 | /* this happens for the common case of a single-threaded fork(): */ | ||
235 | if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) | ||
236 | { | ||
237 | local_finish_flush_tlb_mm(mm); | ||
238 | return; | ||
239 | } | ||
240 | |||
241 | /* | ||
242 | * We could optimize this further by using mm->cpu_vm_mask to track which CPUs | ||
243 | * have been running in the address space. It's not clear that this is worth the | ||
244 | * trouble though: to avoid races, we have to raise the IPI on the target CPU | ||
245 | * anyhow, and once a CPU is interrupted, the cost of local_flush_tlb_all() is | ||
246 | * rather trivial. | ||
247 | */ | ||
248 | on_each_cpu((void (*)(void *))local_finish_flush_tlb_mm, mm, 1, 1); | ||
249 | } | ||
250 | |||
251 | /* | ||
252 | * Run a function on another CPU | ||
253 | * <func> The function to run. This must be fast and non-blocking. | ||
254 | * <info> An arbitrary pointer to pass to the function. | ||
255 | * <nonatomic> Currently unused. | ||
256 | * <wait> If true, wait until function has completed on other CPUs. | ||
257 | * [RETURNS] 0 on success, else a negative status code. | ||
258 | * | ||
259 | * Does not return until the remote CPU is nearly ready to execute <func> | ||
260 | * or is or has executed. | ||
261 | */ | ||
262 | |||
263 | int | ||
264 | smp_call_function_single (int cpuid, void (*func) (void *info), void *info, int nonatomic, | ||
265 | int wait) | ||
266 | { | ||
267 | struct call_data_struct data; | ||
268 | int cpus = 1; | ||
269 | int me = get_cpu(); /* prevent preemption and reschedule on another processor */ | ||
270 | |||
271 | if (cpuid == me) { | ||
272 | printk("%s: trying to call self\n", __FUNCTION__); | ||
273 | put_cpu(); | ||
274 | return -EBUSY; | ||
275 | } | ||
276 | |||
277 | data.func = func; | ||
278 | data.info = info; | ||
279 | atomic_set(&data.started, 0); | ||
280 | data.wait = wait; | ||
281 | if (wait) | ||
282 | atomic_set(&data.finished, 0); | ||
283 | |||
284 | spin_lock_bh(&call_lock); | ||
285 | |||
286 | call_data = &data; | ||
287 | mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ | ||
288 | send_IPI_single(cpuid, IPI_CALL_FUNC); | ||
289 | |||
290 | /* Wait for response */ | ||
291 | while (atomic_read(&data.started) != cpus) | ||
292 | cpu_relax(); | ||
293 | |||
294 | if (wait) | ||
295 | while (atomic_read(&data.finished) != cpus) | ||
296 | cpu_relax(); | ||
297 | call_data = NULL; | ||
298 | |||
299 | spin_unlock_bh(&call_lock); | ||
300 | put_cpu(); | ||
301 | return 0; | ||
302 | } | ||
303 | EXPORT_SYMBOL(smp_call_function_single); | ||
304 | |||
305 | /* | ||
306 | * this function sends a 'generic call function' IPI to all other CPUs | ||
307 | * in the system. | ||
308 | */ | ||
309 | |||
310 | /* | ||
311 | * [SUMMARY] Run a function on all other CPUs. | ||
312 | * <func> The function to run. This must be fast and non-blocking. | ||
313 | * <info> An arbitrary pointer to pass to the function. | ||
314 | * <nonatomic> currently unused. | ||
315 | * <wait> If true, wait (atomically) until function has completed on other CPUs. | ||
316 | * [RETURNS] 0 on success, else a negative status code. | ||
317 | * | ||
318 | * Does not return until remote CPUs are nearly ready to execute <func> or are or have | ||
319 | * executed. | ||
320 | * | ||
321 | * You must not call this function with disabled interrupts or from a | ||
322 | * hardware interrupt handler or from a bottom half handler. | ||
323 | */ | ||
324 | int | ||
325 | smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) | ||
326 | { | ||
327 | struct call_data_struct data; | ||
328 | int cpus = num_online_cpus()-1; | ||
329 | |||
330 | if (!cpus) | ||
331 | return 0; | ||
332 | |||
333 | /* Can deadlock when called with interrupts disabled */ | ||
334 | WARN_ON(irqs_disabled()); | ||
335 | |||
336 | data.func = func; | ||
337 | data.info = info; | ||
338 | atomic_set(&data.started, 0); | ||
339 | data.wait = wait; | ||
340 | if (wait) | ||
341 | atomic_set(&data.finished, 0); | ||
342 | |||
343 | spin_lock(&call_lock); | ||
344 | |||
345 | call_data = &data; | ||
346 | mb(); /* ensure store to call_data precedes setting of IPI_CALL_FUNC */ | ||
347 | send_IPI_allbutself(IPI_CALL_FUNC); | ||
348 | |||
349 | /* Wait for response */ | ||
350 | while (atomic_read(&data.started) != cpus) | ||
351 | cpu_relax(); | ||
352 | |||
353 | if (wait) | ||
354 | while (atomic_read(&data.finished) != cpus) | ||
355 | cpu_relax(); | ||
356 | call_data = NULL; | ||
357 | |||
358 | spin_unlock(&call_lock); | ||
359 | return 0; | ||
360 | } | ||
361 | EXPORT_SYMBOL(smp_call_function); | ||
362 | |||
363 | /* | ||
364 | * this function calls the 'stop' function on all other CPUs in the system. | ||
365 | */ | ||
366 | void | ||
367 | smp_send_stop (void) | ||
368 | { | ||
369 | send_IPI_allbutself(IPI_CPU_STOP); | ||
370 | } | ||
371 | |||
372 | int __init | ||
373 | setup_profiling_timer (unsigned int multiplier) | ||
374 | { | ||
375 | return -EINVAL; | ||
376 | } | ||
diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c new file mode 100644 index 000000000000..5318f0cbfc26 --- /dev/null +++ b/arch/ia64/kernel/smpboot.c | |||
@@ -0,0 +1,692 @@ | |||
1 | /* | ||
2 | * SMP boot-related support | ||
3 | * | ||
4 | * Copyright (C) 1998-2003 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * | ||
7 | * 01/05/16 Rohit Seth <rohit.seth@intel.com> Moved SMP booting functions from smp.c to here. | ||
8 | * 01/04/27 David Mosberger <davidm@hpl.hp.com> Added ITC synching code. | ||
9 | * 02/07/31 David Mosberger <davidm@hpl.hp.com> Switch over to hotplug-CPU boot-sequence. | ||
10 | * smp_boot_cpus()/smp_commence() is replaced by | ||
11 | * smp_prepare_cpus()/__cpu_up()/smp_cpus_done(). | ||
12 | */ | ||
13 | #include <linux/config.h> | ||
14 | |||
15 | #include <linux/module.h> | ||
16 | #include <linux/acpi.h> | ||
17 | #include <linux/bootmem.h> | ||
18 | #include <linux/cpu.h> | ||
19 | #include <linux/delay.h> | ||
20 | #include <linux/init.h> | ||
21 | #include <linux/interrupt.h> | ||
22 | #include <linux/irq.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/kernel_stat.h> | ||
25 | #include <linux/mm.h> | ||
26 | #include <linux/notifier.h> | ||
27 | #include <linux/smp.h> | ||
28 | #include <linux/smp_lock.h> | ||
29 | #include <linux/spinlock.h> | ||
30 | #include <linux/efi.h> | ||
31 | #include <linux/percpu.h> | ||
32 | #include <linux/bitops.h> | ||
33 | |||
34 | #include <asm/atomic.h> | ||
35 | #include <asm/cache.h> | ||
36 | #include <asm/current.h> | ||
37 | #include <asm/delay.h> | ||
38 | #include <asm/ia32.h> | ||
39 | #include <asm/io.h> | ||
40 | #include <asm/irq.h> | ||
41 | #include <asm/machvec.h> | ||
42 | #include <asm/mca.h> | ||
43 | #include <asm/page.h> | ||
44 | #include <asm/pgalloc.h> | ||
45 | #include <asm/pgtable.h> | ||
46 | #include <asm/processor.h> | ||
47 | #include <asm/ptrace.h> | ||
48 | #include <asm/sal.h> | ||
49 | #include <asm/system.h> | ||
50 | #include <asm/tlbflush.h> | ||
51 | #include <asm/unistd.h> | ||
52 | |||
53 | #define SMP_DEBUG 0 | ||
54 | |||
55 | #if SMP_DEBUG | ||
56 | #define Dprintk(x...) printk(x) | ||
57 | #else | ||
58 | #define Dprintk(x...) | ||
59 | #endif | ||
60 | |||
61 | |||
62 | /* | ||
63 | * ITC synchronization related stuff: | ||
64 | */ | ||
65 | #define MASTER 0 | ||
66 | #define SLAVE (SMP_CACHE_BYTES/8) | ||
67 | |||
68 | #define NUM_ROUNDS 64 /* magic value */ | ||
69 | #define NUM_ITERS 5 /* likewise */ | ||
70 | |||
71 | static DEFINE_SPINLOCK(itc_sync_lock); | ||
72 | static volatile unsigned long go[SLAVE + 1]; | ||
73 | |||
74 | #define DEBUG_ITC_SYNC 0 | ||
75 | |||
76 | extern void __devinit calibrate_delay (void); | ||
77 | extern void start_ap (void); | ||
78 | extern unsigned long ia64_iobase; | ||
79 | |||
80 | task_t *task_for_booting_cpu; | ||
81 | |||
82 | /* | ||
83 | * State for each CPU | ||
84 | */ | ||
85 | DEFINE_PER_CPU(int, cpu_state); | ||
86 | |||
87 | /* Bitmasks of currently online, and possible CPUs */ | ||
88 | cpumask_t cpu_online_map; | ||
89 | EXPORT_SYMBOL(cpu_online_map); | ||
90 | cpumask_t cpu_possible_map; | ||
91 | EXPORT_SYMBOL(cpu_possible_map); | ||
92 | |||
93 | /* which logical CPU number maps to which CPU (physical APIC ID) */ | ||
94 | volatile int ia64_cpu_to_sapicid[NR_CPUS]; | ||
95 | EXPORT_SYMBOL(ia64_cpu_to_sapicid); | ||
96 | |||
97 | static volatile cpumask_t cpu_callin_map; | ||
98 | |||
99 | struct smp_boot_data smp_boot_data __initdata; | ||
100 | |||
101 | unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */ | ||
102 | |||
103 | char __initdata no_int_routing; | ||
104 | |||
105 | unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ | ||
106 | |||
107 | static int __init | ||
108 | nointroute (char *str) | ||
109 | { | ||
110 | no_int_routing = 1; | ||
111 | printk ("no_int_routing on\n"); | ||
112 | return 1; | ||
113 | } | ||
114 | |||
115 | __setup("nointroute", nointroute); | ||
116 | |||
117 | void | ||
118 | sync_master (void *arg) | ||
119 | { | ||
120 | unsigned long flags, i; | ||
121 | |||
122 | go[MASTER] = 0; | ||
123 | |||
124 | local_irq_save(flags); | ||
125 | { | ||
126 | for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { | ||
127 | while (!go[MASTER]); | ||
128 | go[MASTER] = 0; | ||
129 | go[SLAVE] = ia64_get_itc(); | ||
130 | } | ||
131 | } | ||
132 | local_irq_restore(flags); | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | * Return the number of cycles by which our itc differs from the itc on the master | ||
137 | * (time-keeper) CPU. A positive number indicates our itc is ahead of the master, | ||
138 | * negative that it is behind. | ||
139 | */ | ||
140 | static inline long | ||
141 | get_delta (long *rt, long *master) | ||
142 | { | ||
143 | unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; | ||
144 | unsigned long tcenter, t0, t1, tm; | ||
145 | long i; | ||
146 | |||
147 | for (i = 0; i < NUM_ITERS; ++i) { | ||
148 | t0 = ia64_get_itc(); | ||
149 | go[MASTER] = 1; | ||
150 | while (!(tm = go[SLAVE])); | ||
151 | go[SLAVE] = 0; | ||
152 | t1 = ia64_get_itc(); | ||
153 | |||
154 | if (t1 - t0 < best_t1 - best_t0) | ||
155 | best_t0 = t0, best_t1 = t1, best_tm = tm; | ||
156 | } | ||
157 | |||
158 | *rt = best_t1 - best_t0; | ||
159 | *master = best_tm - best_t0; | ||
160 | |||
161 | /* average best_t0 and best_t1 without overflow: */ | ||
162 | tcenter = (best_t0/2 + best_t1/2); | ||
163 | if (best_t0 % 2 + best_t1 % 2 == 2) | ||
164 | ++tcenter; | ||
165 | return tcenter - best_tm; | ||
166 | } | ||
167 | |||
168 | /* | ||
169 | * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU | ||
170 | * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of | ||
171 | * unaccounted-for errors (such as getting a machine check in the middle of a calibration | ||
172 | * step). The basic idea is for the slave to ask the master what itc value it has and to | ||
173 | * read its own itc before and after the master responds. Each iteration gives us three | ||
174 | * timestamps: | ||
175 | * | ||
176 | * slave master | ||
177 | * | ||
178 | * t0 ---\ | ||
179 | * ---\ | ||
180 | * ---> | ||
181 | * tm | ||
182 | * /--- | ||
183 | * /--- | ||
184 | * t1 <--- | ||
185 | * | ||
186 | * | ||
187 | * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0 | ||
188 | * and t1. If we achieve this, the clocks are synchronized provided the interconnect | ||
189 | * between the slave and the master is symmetric. Even if the interconnect were | ||
190 | * asymmetric, we would still know that the synchronization error is smaller than the | ||
191 | * roundtrip latency (t0 - t1). | ||
192 | * | ||
193 | * When the interconnect is quiet and symmetric, this lets us synchronize the itc to | ||
194 | * within one or two cycles. However, we can only *guarantee* that the synchronization is | ||
195 | * accurate to within a round-trip time, which is typically in the range of several | ||
196 | * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually | ||
197 | * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better | ||
198 | * than half a micro second or so. | ||
199 | */ | ||
200 | void | ||
201 | ia64_sync_itc (unsigned int master) | ||
202 | { | ||
203 | long i, delta, adj, adjust_latency = 0, done = 0; | ||
204 | unsigned long flags, rt, master_time_stamp, bound; | ||
205 | #if DEBUG_ITC_SYNC | ||
206 | struct { | ||
207 | long rt; /* roundtrip time */ | ||
208 | long master; /* master's timestamp */ | ||
209 | long diff; /* difference between midpoint and master's timestamp */ | ||
210 | long lat; /* estimate of itc adjustment latency */ | ||
211 | } t[NUM_ROUNDS]; | ||
212 | #endif | ||
213 | |||
214 | /* | ||
215 | * Make sure local timer ticks are disabled while we sync. If | ||
216 | * they were enabled, we'd have to worry about nasty issues | ||
217 | * like setting the ITC ahead of (or a long time before) the | ||
218 | * next scheduled tick. | ||
219 | */ | ||
220 | BUG_ON((ia64_get_itv() & (1 << 16)) == 0); | ||
221 | |||
222 | go[MASTER] = 1; | ||
223 | |||
224 | if (smp_call_function_single(master, sync_master, NULL, 1, 0) < 0) { | ||
225 | printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master); | ||
226 | return; | ||
227 | } | ||
228 | |||
229 | while (go[MASTER]); /* wait for master to be ready */ | ||
230 | |||
231 | spin_lock_irqsave(&itc_sync_lock, flags); | ||
232 | { | ||
233 | for (i = 0; i < NUM_ROUNDS; ++i) { | ||
234 | delta = get_delta(&rt, &master_time_stamp); | ||
235 | if (delta == 0) { | ||
236 | done = 1; /* let's lock on to this... */ | ||
237 | bound = rt; | ||
238 | } | ||
239 | |||
240 | if (!done) { | ||
241 | if (i > 0) { | ||
242 | adjust_latency += -delta; | ||
243 | adj = -delta + adjust_latency/4; | ||
244 | } else | ||
245 | adj = -delta; | ||
246 | |||
247 | ia64_set_itc(ia64_get_itc() + adj); | ||
248 | } | ||
249 | #if DEBUG_ITC_SYNC | ||
250 | t[i].rt = rt; | ||
251 | t[i].master = master_time_stamp; | ||
252 | t[i].diff = delta; | ||
253 | t[i].lat = adjust_latency/4; | ||
254 | #endif | ||
255 | } | ||
256 | } | ||
257 | spin_unlock_irqrestore(&itc_sync_lock, flags); | ||
258 | |||
259 | #if DEBUG_ITC_SYNC | ||
260 | for (i = 0; i < NUM_ROUNDS; ++i) | ||
261 | printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", | ||
262 | t[i].rt, t[i].master, t[i].diff, t[i].lat); | ||
263 | #endif | ||
264 | |||
265 | printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, " | ||
266 | "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt); | ||
267 | } | ||
268 | |||
269 | /* | ||
270 | * Ideally sets up per-cpu profiling hooks. Doesn't do much now... | ||
271 | */ | ||
272 | static inline void __devinit | ||
273 | smp_setup_percpu_timer (void) | ||
274 | { | ||
275 | } | ||
276 | |||
277 | static void __devinit | ||
278 | smp_callin (void) | ||
279 | { | ||
280 | int cpuid, phys_id; | ||
281 | extern void ia64_init_itm(void); | ||
282 | |||
283 | #ifdef CONFIG_PERFMON | ||
284 | extern void pfm_init_percpu(void); | ||
285 | #endif | ||
286 | |||
287 | cpuid = smp_processor_id(); | ||
288 | phys_id = hard_smp_processor_id(); | ||
289 | |||
290 | if (cpu_online(cpuid)) { | ||
291 | printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", | ||
292 | phys_id, cpuid); | ||
293 | BUG(); | ||
294 | } | ||
295 | |||
296 | lock_ipi_calllock(); | ||
297 | cpu_set(cpuid, cpu_online_map); | ||
298 | unlock_ipi_calllock(); | ||
299 | |||
300 | smp_setup_percpu_timer(); | ||
301 | |||
302 | ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ | ||
303 | |||
304 | #ifdef CONFIG_PERFMON | ||
305 | pfm_init_percpu(); | ||
306 | #endif | ||
307 | |||
308 | local_irq_enable(); | ||
309 | |||
310 | if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { | ||
311 | /* | ||
312 | * Synchronize the ITC with the BP. Need to do this after irqs are | ||
313 | * enabled because ia64_sync_itc() calls smp_call_function_single(), which | ||
314 | * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls | ||
315 | * local_bh_enable(), which bugs out if irqs are not enabled... | ||
316 | */ | ||
317 | Dprintk("Going to syncup ITC with BP.\n"); | ||
318 | ia64_sync_itc(0); | ||
319 | } | ||
320 | |||
321 | /* | ||
322 | * Get our bogomips. | ||
323 | */ | ||
324 | ia64_init_itm(); | ||
325 | calibrate_delay(); | ||
326 | local_cpu_data->loops_per_jiffy = loops_per_jiffy; | ||
327 | |||
328 | #ifdef CONFIG_IA32_SUPPORT | ||
329 | ia32_gdt_init(); | ||
330 | #endif | ||
331 | |||
332 | /* | ||
333 | * Allow the master to continue. | ||
334 | */ | ||
335 | cpu_set(cpuid, cpu_callin_map); | ||
336 | Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid); | ||
337 | } | ||
338 | |||
339 | |||
340 | /* | ||
341 | * Activate a secondary processor. head.S calls this. | ||
342 | */ | ||
343 | int __devinit | ||
344 | start_secondary (void *unused) | ||
345 | { | ||
346 | /* Early console may use I/O ports */ | ||
347 | ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); | ||
348 | |||
349 | Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); | ||
350 | efi_map_pal_code(); | ||
351 | cpu_init(); | ||
352 | smp_callin(); | ||
353 | |||
354 | cpu_idle(); | ||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | ||
359 | { | ||
360 | return NULL; | ||
361 | } | ||
362 | |||
363 | struct create_idle { | ||
364 | struct task_struct *idle; | ||
365 | struct completion done; | ||
366 | int cpu; | ||
367 | }; | ||
368 | |||
369 | void | ||
370 | do_fork_idle(void *_c_idle) | ||
371 | { | ||
372 | struct create_idle *c_idle = _c_idle; | ||
373 | |||
374 | c_idle->idle = fork_idle(c_idle->cpu); | ||
375 | complete(&c_idle->done); | ||
376 | } | ||
377 | |||
378 | static int __devinit | ||
379 | do_boot_cpu (int sapicid, int cpu) | ||
380 | { | ||
381 | int timeout; | ||
382 | struct create_idle c_idle = { | ||
383 | .cpu = cpu, | ||
384 | .done = COMPLETION_INITIALIZER(c_idle.done), | ||
385 | }; | ||
386 | DECLARE_WORK(work, do_fork_idle, &c_idle); | ||
387 | /* | ||
388 | * We can't use kernel_thread since we must avoid to reschedule the child. | ||
389 | */ | ||
390 | if (!keventd_up() || current_is_keventd()) | ||
391 | work.func(work.data); | ||
392 | else { | ||
393 | schedule_work(&work); | ||
394 | wait_for_completion(&c_idle.done); | ||
395 | } | ||
396 | |||
397 | if (IS_ERR(c_idle.idle)) | ||
398 | panic("failed fork for CPU %d", cpu); | ||
399 | task_for_booting_cpu = c_idle.idle; | ||
400 | |||
401 | Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); | ||
402 | |||
403 | platform_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); | ||
404 | |||
405 | /* | ||
406 | * Wait 10s total for the AP to start | ||
407 | */ | ||
408 | Dprintk("Waiting on callin_map ..."); | ||
409 | for (timeout = 0; timeout < 100000; timeout++) { | ||
410 | if (cpu_isset(cpu, cpu_callin_map)) | ||
411 | break; /* It has booted */ | ||
412 | udelay(100); | ||
413 | } | ||
414 | Dprintk("\n"); | ||
415 | |||
416 | if (!cpu_isset(cpu, cpu_callin_map)) { | ||
417 | printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); | ||
418 | ia64_cpu_to_sapicid[cpu] = -1; | ||
419 | cpu_clear(cpu, cpu_online_map); /* was set in smp_callin() */ | ||
420 | return -EINVAL; | ||
421 | } | ||
422 | return 0; | ||
423 | } | ||
424 | |||
425 | static int __init | ||
426 | decay (char *str) | ||
427 | { | ||
428 | int ticks; | ||
429 | get_option (&str, &ticks); | ||
430 | return 1; | ||
431 | } | ||
432 | |||
433 | __setup("decay=", decay); | ||
434 | |||
435 | /* | ||
436 | * Initialize the logical CPU number to SAPICID mapping | ||
437 | */ | ||
438 | void __init | ||
439 | smp_build_cpu_map (void) | ||
440 | { | ||
441 | int sapicid, cpu, i; | ||
442 | int boot_cpu_id = hard_smp_processor_id(); | ||
443 | |||
444 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
445 | ia64_cpu_to_sapicid[cpu] = -1; | ||
446 | #ifdef CONFIG_HOTPLUG_CPU | ||
447 | cpu_set(cpu, cpu_possible_map); | ||
448 | #endif | ||
449 | } | ||
450 | |||
451 | ia64_cpu_to_sapicid[0] = boot_cpu_id; | ||
452 | cpus_clear(cpu_present_map); | ||
453 | cpu_set(0, cpu_present_map); | ||
454 | cpu_set(0, cpu_possible_map); | ||
455 | for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { | ||
456 | sapicid = smp_boot_data.cpu_phys_id[i]; | ||
457 | if (sapicid == boot_cpu_id) | ||
458 | continue; | ||
459 | cpu_set(cpu, cpu_present_map); | ||
460 | cpu_set(cpu, cpu_possible_map); | ||
461 | ia64_cpu_to_sapicid[cpu] = sapicid; | ||
462 | cpu++; | ||
463 | } | ||
464 | } | ||
465 | |||
466 | #ifdef CONFIG_NUMA | ||
467 | |||
468 | /* on which node is each logical CPU (one cacheline even for 64 CPUs) */ | ||
469 | u8 cpu_to_node_map[NR_CPUS] __cacheline_aligned; | ||
470 | EXPORT_SYMBOL(cpu_to_node_map); | ||
471 | /* which logical CPUs are on which nodes */ | ||
472 | cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; | ||
473 | |||
474 | /* | ||
475 | * Build cpu to node mapping and initialize the per node cpu masks. | ||
476 | */ | ||
477 | void __init | ||
478 | build_cpu_to_node_map (void) | ||
479 | { | ||
480 | int cpu, i, node; | ||
481 | |||
482 | for(node=0; node<MAX_NUMNODES; node++) | ||
483 | cpus_clear(node_to_cpu_mask[node]); | ||
484 | for(cpu = 0; cpu < NR_CPUS; ++cpu) { | ||
485 | /* | ||
486 | * All Itanium NUMA platforms I know use ACPI, so maybe we | ||
487 | * can drop this ifdef completely. [EF] | ||
488 | */ | ||
489 | #ifdef CONFIG_ACPI_NUMA | ||
490 | node = -1; | ||
491 | for (i = 0; i < NR_CPUS; ++i) | ||
492 | if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { | ||
493 | node = node_cpuid[i].nid; | ||
494 | break; | ||
495 | } | ||
496 | #else | ||
497 | # error Fixme: Dunno how to build CPU-to-node map. | ||
498 | #endif | ||
499 | cpu_to_node_map[cpu] = (node >= 0) ? node : 0; | ||
500 | if (node >= 0) | ||
501 | cpu_set(cpu, node_to_cpu_mask[node]); | ||
502 | } | ||
503 | } | ||
504 | |||
505 | #endif /* CONFIG_NUMA */ | ||
506 | |||
507 | /* | ||
508 | * Cycle through the APs sending Wakeup IPIs to boot each. | ||
509 | */ | ||
510 | void __init | ||
511 | smp_prepare_cpus (unsigned int max_cpus) | ||
512 | { | ||
513 | int boot_cpu_id = hard_smp_processor_id(); | ||
514 | |||
515 | /* | ||
516 | * Initialize the per-CPU profiling counter/multiplier | ||
517 | */ | ||
518 | |||
519 | smp_setup_percpu_timer(); | ||
520 | |||
521 | /* | ||
522 | * We have the boot CPU online for sure. | ||
523 | */ | ||
524 | cpu_set(0, cpu_online_map); | ||
525 | cpu_set(0, cpu_callin_map); | ||
526 | |||
527 | local_cpu_data->loops_per_jiffy = loops_per_jiffy; | ||
528 | ia64_cpu_to_sapicid[0] = boot_cpu_id; | ||
529 | |||
530 | printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id); | ||
531 | |||
532 | current_thread_info()->cpu = 0; | ||
533 | |||
534 | /* | ||
535 | * If SMP should be disabled, then really disable it! | ||
536 | */ | ||
537 | if (!max_cpus) { | ||
538 | printk(KERN_INFO "SMP mode deactivated.\n"); | ||
539 | cpus_clear(cpu_online_map); | ||
540 | cpus_clear(cpu_present_map); | ||
541 | cpus_clear(cpu_possible_map); | ||
542 | cpu_set(0, cpu_online_map); | ||
543 | cpu_set(0, cpu_present_map); | ||
544 | cpu_set(0, cpu_possible_map); | ||
545 | return; | ||
546 | } | ||
547 | } | ||
548 | |||
549 | void __devinit smp_prepare_boot_cpu(void) | ||
550 | { | ||
551 | cpu_set(smp_processor_id(), cpu_online_map); | ||
552 | cpu_set(smp_processor_id(), cpu_callin_map); | ||
553 | } | ||
554 | |||
555 | #ifdef CONFIG_HOTPLUG_CPU | ||
556 | extern void fixup_irqs(void); | ||
557 | /* must be called with cpucontrol mutex held */ | ||
558 | static int __devinit cpu_enable(unsigned int cpu) | ||
559 | { | ||
560 | per_cpu(cpu_state,cpu) = CPU_UP_PREPARE; | ||
561 | wmb(); | ||
562 | |||
563 | while (!cpu_online(cpu)) | ||
564 | cpu_relax(); | ||
565 | return 0; | ||
566 | } | ||
567 | |||
568 | int __cpu_disable(void) | ||
569 | { | ||
570 | int cpu = smp_processor_id(); | ||
571 | |||
572 | /* | ||
573 | * dont permit boot processor for now | ||
574 | */ | ||
575 | if (cpu == 0) | ||
576 | return -EBUSY; | ||
577 | |||
578 | fixup_irqs(); | ||
579 | local_flush_tlb_all(); | ||
580 | printk ("Disabled cpu %u\n", smp_processor_id()); | ||
581 | return 0; | ||
582 | } | ||
583 | |||
584 | void __cpu_die(unsigned int cpu) | ||
585 | { | ||
586 | unsigned int i; | ||
587 | |||
588 | for (i = 0; i < 100; i++) { | ||
589 | /* They ack this in play_dead by setting CPU_DEAD */ | ||
590 | if (per_cpu(cpu_state, cpu) == CPU_DEAD) | ||
591 | { | ||
592 | /* | ||
593 | * TBD: Enable this when physical removal | ||
594 | * or when we put the processor is put in | ||
595 | * SAL_BOOT_RENDEZ mode | ||
596 | * cpu_clear(cpu, cpu_callin_map); | ||
597 | */ | ||
598 | return; | ||
599 | } | ||
600 | msleep(100); | ||
601 | } | ||
602 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | ||
603 | } | ||
604 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
605 | static int __devinit cpu_enable(unsigned int cpu) | ||
606 | { | ||
607 | return 0; | ||
608 | } | ||
609 | |||
610 | int __cpu_disable(void) | ||
611 | { | ||
612 | return -ENOSYS; | ||
613 | } | ||
614 | |||
615 | void __cpu_die(unsigned int cpu) | ||
616 | { | ||
617 | /* We said "no" in __cpu_disable */ | ||
618 | BUG(); | ||
619 | } | ||
620 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
621 | |||
622 | void | ||
623 | smp_cpus_done (unsigned int dummy) | ||
624 | { | ||
625 | int cpu; | ||
626 | unsigned long bogosum = 0; | ||
627 | |||
628 | /* | ||
629 | * Allow the user to impress friends. | ||
630 | */ | ||
631 | |||
632 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
633 | if (cpu_online(cpu)) | ||
634 | bogosum += cpu_data(cpu)->loops_per_jiffy; | ||
635 | |||
636 | printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", | ||
637 | (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); | ||
638 | } | ||
639 | |||
640 | int __devinit | ||
641 | __cpu_up (unsigned int cpu) | ||
642 | { | ||
643 | int ret; | ||
644 | int sapicid; | ||
645 | |||
646 | sapicid = ia64_cpu_to_sapicid[cpu]; | ||
647 | if (sapicid == -1) | ||
648 | return -EINVAL; | ||
649 | |||
650 | /* | ||
651 | * Already booted.. just enable and get outa idle lool | ||
652 | */ | ||
653 | if (cpu_isset(cpu, cpu_callin_map)) | ||
654 | { | ||
655 | cpu_enable(cpu); | ||
656 | local_irq_enable(); | ||
657 | while (!cpu_isset(cpu, cpu_online_map)) | ||
658 | mb(); | ||
659 | return 0; | ||
660 | } | ||
661 | /* Processor goes to start_secondary(), sets online flag */ | ||
662 | ret = do_boot_cpu(sapicid, cpu); | ||
663 | if (ret < 0) | ||
664 | return ret; | ||
665 | |||
666 | return 0; | ||
667 | } | ||
668 | |||
669 | /* | ||
670 | * Assume that CPU's have been discovered by some platform-dependent interface. For | ||
671 | * SoftSDV/Lion, that would be ACPI. | ||
672 | * | ||
673 | * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). | ||
674 | */ | ||
675 | void __init | ||
676 | init_smp_config(void) | ||
677 | { | ||
678 | struct fptr { | ||
679 | unsigned long fp; | ||
680 | unsigned long gp; | ||
681 | } *ap_startup; | ||
682 | long sal_ret; | ||
683 | |||
684 | /* Tell SAL where to drop the AP's. */ | ||
685 | ap_startup = (struct fptr *) start_ap; | ||
686 | sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, | ||
687 | ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0); | ||
688 | if (sal_ret < 0) | ||
689 | printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n", | ||
690 | ia64_sal_strerror(sal_ret)); | ||
691 | } | ||
692 | |||
diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c new file mode 100644 index 000000000000..3ac216e1c8bb --- /dev/null +++ b/arch/ia64/kernel/sys_ia64.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * This file contains various system calls that have different calling | ||
3 | * conventions on different platforms. | ||
4 | * | ||
5 | * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co | ||
6 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
7 | */ | ||
8 | #include <linux/config.h> | ||
9 | #include <linux/errno.h> | ||
10 | #include <linux/fs.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/mman.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/shm.h> | ||
15 | #include <linux/file.h> /* doh, must come after sched.h... */ | ||
16 | #include <linux/smp.h> | ||
17 | #include <linux/smp_lock.h> | ||
18 | #include <linux/syscalls.h> | ||
19 | #include <linux/highuid.h> | ||
20 | #include <linux/hugetlb.h> | ||
21 | |||
22 | #include <asm/shmparam.h> | ||
23 | #include <asm/uaccess.h> | ||
24 | |||
25 | unsigned long | ||
26 | arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, | ||
27 | unsigned long pgoff, unsigned long flags) | ||
28 | { | ||
29 | long map_shared = (flags & MAP_SHARED); | ||
30 | unsigned long start_addr, align_mask = PAGE_SIZE - 1; | ||
31 | struct mm_struct *mm = current->mm; | ||
32 | struct vm_area_struct *vma; | ||
33 | |||
34 | if (len > RGN_MAP_LIMIT) | ||
35 | return -ENOMEM; | ||
36 | |||
37 | #ifdef CONFIG_HUGETLB_PAGE | ||
38 | if (REGION_NUMBER(addr) == REGION_HPAGE) | ||
39 | addr = 0; | ||
40 | #endif | ||
41 | if (!addr) | ||
42 | addr = mm->free_area_cache; | ||
43 | |||
44 | if (map_shared && (TASK_SIZE > 0xfffffffful)) | ||
45 | /* | ||
46 | * For 64-bit tasks, align shared segments to 1MB to avoid potential | ||
47 | * performance penalty due to virtual aliasing (see ASDM). For 32-bit | ||
48 | * tasks, we prefer to avoid exhausting the address space too quickly by | ||
49 | * limiting alignment to a single page. | ||
50 | */ | ||
51 | align_mask = SHMLBA - 1; | ||
52 | |||
53 | full_search: | ||
54 | start_addr = addr = (addr + align_mask) & ~align_mask; | ||
55 | |||
56 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | ||
57 | /* At this point: (!vma || addr < vma->vm_end). */ | ||
58 | if (TASK_SIZE - len < addr || RGN_MAP_LIMIT - len < REGION_OFFSET(addr)) { | ||
59 | if (start_addr != TASK_UNMAPPED_BASE) { | ||
60 | /* Start a new search --- just in case we missed some holes. */ | ||
61 | addr = TASK_UNMAPPED_BASE; | ||
62 | goto full_search; | ||
63 | } | ||
64 | return -ENOMEM; | ||
65 | } | ||
66 | if (!vma || addr + len <= vma->vm_start) { | ||
67 | /* Remember the address where we stopped this search: */ | ||
68 | mm->free_area_cache = addr + len; | ||
69 | return addr; | ||
70 | } | ||
71 | addr = (vma->vm_end + align_mask) & ~align_mask; | ||
72 | } | ||
73 | } | ||
74 | |||
75 | asmlinkage long | ||
76 | ia64_getpriority (int which, int who) | ||
77 | { | ||
78 | long prio; | ||
79 | |||
80 | prio = sys_getpriority(which, who); | ||
81 | if (prio >= 0) { | ||
82 | force_successful_syscall_return(); | ||
83 | prio = 20 - prio; | ||
84 | } | ||
85 | return prio; | ||
86 | } | ||
87 | |||
88 | /* XXX obsolete, but leave it here until the old libc is gone... */ | ||
89 | asmlinkage unsigned long | ||
90 | sys_getpagesize (void) | ||
91 | { | ||
92 | return PAGE_SIZE; | ||
93 | } | ||
94 | |||
95 | asmlinkage unsigned long | ||
96 | ia64_shmat (int shmid, void __user *shmaddr, int shmflg) | ||
97 | { | ||
98 | unsigned long raddr; | ||
99 | int retval; | ||
100 | |||
101 | retval = do_shmat(shmid, shmaddr, shmflg, &raddr); | ||
102 | if (retval < 0) | ||
103 | return retval; | ||
104 | |||
105 | force_successful_syscall_return(); | ||
106 | return raddr; | ||
107 | } | ||
108 | |||
109 | asmlinkage unsigned long | ||
110 | ia64_brk (unsigned long brk) | ||
111 | { | ||
112 | unsigned long rlim, retval, newbrk, oldbrk; | ||
113 | struct mm_struct *mm = current->mm; | ||
114 | |||
115 | /* | ||
116 | * Most of this replicates the code in sys_brk() except for an additional safety | ||
117 | * check and the clearing of r8. However, we can't call sys_brk() because we need | ||
118 | * to acquire the mmap_sem before we can do the test... | ||
119 | */ | ||
120 | down_write(&mm->mmap_sem); | ||
121 | |||
122 | if (brk < mm->end_code) | ||
123 | goto out; | ||
124 | newbrk = PAGE_ALIGN(brk); | ||
125 | oldbrk = PAGE_ALIGN(mm->brk); | ||
126 | if (oldbrk == newbrk) | ||
127 | goto set_brk; | ||
128 | |||
129 | /* Always allow shrinking brk. */ | ||
130 | if (brk <= mm->brk) { | ||
131 | if (!do_munmap(mm, newbrk, oldbrk-newbrk)) | ||
132 | goto set_brk; | ||
133 | goto out; | ||
134 | } | ||
135 | |||
136 | /* Check against unimplemented/unmapped addresses: */ | ||
137 | if ((newbrk - oldbrk) > RGN_MAP_LIMIT || REGION_OFFSET(newbrk) > RGN_MAP_LIMIT) | ||
138 | goto out; | ||
139 | |||
140 | /* Check against rlimit.. */ | ||
141 | rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; | ||
142 | if (rlim < RLIM_INFINITY && brk - mm->start_data > rlim) | ||
143 | goto out; | ||
144 | |||
145 | /* Check against existing mmap mappings. */ | ||
146 | if (find_vma_intersection(mm, oldbrk, newbrk+PAGE_SIZE)) | ||
147 | goto out; | ||
148 | |||
149 | /* Ok, looks good - let it rip. */ | ||
150 | if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) | ||
151 | goto out; | ||
152 | set_brk: | ||
153 | mm->brk = brk; | ||
154 | out: | ||
155 | retval = mm->brk; | ||
156 | up_write(&mm->mmap_sem); | ||
157 | force_successful_syscall_return(); | ||
158 | return retval; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * On IA-64, we return the two file descriptors in ret0 and ret1 (r8 | ||
163 | * and r9) as this is faster than doing a copy_to_user(). | ||
164 | */ | ||
165 | asmlinkage long | ||
166 | sys_pipe (void) | ||
167 | { | ||
168 | struct pt_regs *regs = ia64_task_regs(current); | ||
169 | int fd[2]; | ||
170 | int retval; | ||
171 | |||
172 | retval = do_pipe(fd); | ||
173 | if (retval) | ||
174 | goto out; | ||
175 | retval = fd[0]; | ||
176 | regs->r9 = fd[1]; | ||
177 | out: | ||
178 | return retval; | ||
179 | } | ||
180 | |||
181 | static inline unsigned long | ||
182 | do_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, unsigned long pgoff) | ||
183 | { | ||
184 | unsigned long roff; | ||
185 | struct file *file = NULL; | ||
186 | |||
187 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
188 | if (!(flags & MAP_ANONYMOUS)) { | ||
189 | file = fget(fd); | ||
190 | if (!file) | ||
191 | return -EBADF; | ||
192 | |||
193 | if (!file->f_op || !file->f_op->mmap) { | ||
194 | addr = -ENODEV; | ||
195 | goto out; | ||
196 | } | ||
197 | } | ||
198 | |||
199 | /* | ||
200 | * A zero mmap always succeeds in Linux, independent of whether or not the | ||
201 | * remaining arguments are valid. | ||
202 | */ | ||
203 | if (len == 0) | ||
204 | goto out; | ||
205 | |||
206 | /* Careful about overflows.. */ | ||
207 | len = PAGE_ALIGN(len); | ||
208 | if (!len || len > TASK_SIZE) { | ||
209 | addr = -EINVAL; | ||
210 | goto out; | ||
211 | } | ||
212 | |||
213 | /* | ||
214 | * Don't permit mappings into unmapped space, the virtual page table of a region, | ||
215 | * or across a region boundary. Note: RGN_MAP_LIMIT is equal to 2^n-PAGE_SIZE | ||
216 | * (for some integer n <= 61) and len > 0. | ||
217 | */ | ||
218 | roff = REGION_OFFSET(addr); | ||
219 | if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) { | ||
220 | addr = -EINVAL; | ||
221 | goto out; | ||
222 | } | ||
223 | |||
224 | down_write(¤t->mm->mmap_sem); | ||
225 | addr = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
226 | up_write(¤t->mm->mmap_sem); | ||
227 | |||
228 | out: if (file) | ||
229 | fput(file); | ||
230 | return addr; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * mmap2() is like mmap() except that the offset is expressed in units | ||
235 | * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces | ||
236 | * of) files that are larger than the address space of the CPU. | ||
237 | */ | ||
238 | asmlinkage unsigned long | ||
239 | sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) | ||
240 | { | ||
241 | addr = do_mmap2(addr, len, prot, flags, fd, pgoff); | ||
242 | if (!IS_ERR((void *) addr)) | ||
243 | force_successful_syscall_return(); | ||
244 | return addr; | ||
245 | } | ||
246 | |||
247 | asmlinkage unsigned long | ||
248 | sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off) | ||
249 | { | ||
250 | if (offset_in_page(off) != 0) | ||
251 | return -EINVAL; | ||
252 | |||
253 | addr = do_mmap2(addr, len, prot, flags, fd, off >> PAGE_SHIFT); | ||
254 | if (!IS_ERR((void *) addr)) | ||
255 | force_successful_syscall_return(); | ||
256 | return addr; | ||
257 | } | ||
258 | |||
259 | asmlinkage unsigned long | ||
260 | ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, | ||
261 | unsigned long new_addr) | ||
262 | { | ||
263 | extern unsigned long do_mremap (unsigned long addr, | ||
264 | unsigned long old_len, | ||
265 | unsigned long new_len, | ||
266 | unsigned long flags, | ||
267 | unsigned long new_addr); | ||
268 | |||
269 | down_write(¤t->mm->mmap_sem); | ||
270 | { | ||
271 | addr = do_mremap(addr, old_len, new_len, flags, new_addr); | ||
272 | } | ||
273 | up_write(¤t->mm->mmap_sem); | ||
274 | |||
275 | if (IS_ERR((void *) addr)) | ||
276 | return addr; | ||
277 | |||
278 | force_successful_syscall_return(); | ||
279 | return addr; | ||
280 | } | ||
281 | |||
282 | #ifndef CONFIG_PCI | ||
283 | |||
284 | asmlinkage long | ||
285 | sys_pciconfig_read (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, | ||
286 | void *buf) | ||
287 | { | ||
288 | return -ENOSYS; | ||
289 | } | ||
290 | |||
291 | asmlinkage long | ||
292 | sys_pciconfig_write (unsigned long bus, unsigned long dfn, unsigned long off, unsigned long len, | ||
293 | void *buf) | ||
294 | { | ||
295 | return -ENOSYS; | ||
296 | } | ||
297 | |||
298 | #endif /* CONFIG_PCI */ | ||
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c new file mode 100644 index 000000000000..8b8a5a45b621 --- /dev/null +++ b/arch/ia64/kernel/time.c | |||
@@ -0,0 +1,255 @@ | |||
1 | /* | ||
2 | * linux/arch/ia64/kernel/time.c | ||
3 | * | ||
4 | * Copyright (C) 1998-2003 Hewlett-Packard Co | ||
5 | * Stephane Eranian <eranian@hpl.hp.com> | ||
6 | * David Mosberger <davidm@hpl.hp.com> | ||
7 | * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> | ||
8 | * Copyright (C) 1999-2000 VA Linux Systems | ||
9 | * Copyright (C) 1999-2000 Walt Drummond <drummond@valinux.com> | ||
10 | */ | ||
11 | #include <linux/config.h> | ||
12 | |||
13 | #include <linux/cpu.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/module.h> | ||
17 | #include <linux/profile.h> | ||
18 | #include <linux/sched.h> | ||
19 | #include <linux/time.h> | ||
20 | #include <linux/interrupt.h> | ||
21 | #include <linux/efi.h> | ||
22 | #include <linux/profile.h> | ||
23 | #include <linux/timex.h> | ||
24 | |||
25 | #include <asm/machvec.h> | ||
26 | #include <asm/delay.h> | ||
27 | #include <asm/hw_irq.h> | ||
28 | #include <asm/ptrace.h> | ||
29 | #include <asm/sal.h> | ||
30 | #include <asm/sections.h> | ||
31 | #include <asm/system.h> | ||
32 | |||
33 | extern unsigned long wall_jiffies; | ||
34 | |||
35 | u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; | ||
36 | |||
37 | EXPORT_SYMBOL(jiffies_64); | ||
38 | |||
39 | #define TIME_KEEPER_ID 0 /* smp_processor_id() of time-keeper */ | ||
40 | |||
41 | #ifdef CONFIG_IA64_DEBUG_IRQ | ||
42 | |||
43 | unsigned long last_cli_ip; | ||
44 | EXPORT_SYMBOL(last_cli_ip); | ||
45 | |||
46 | #endif | ||
47 | |||
48 | static struct time_interpolator itc_interpolator = { | ||
49 | .shift = 16, | ||
50 | .mask = 0xffffffffffffffffLL, | ||
51 | .source = TIME_SOURCE_CPU | ||
52 | }; | ||
53 | |||
54 | static irqreturn_t | ||
55 | timer_interrupt (int irq, void *dev_id, struct pt_regs *regs) | ||
56 | { | ||
57 | unsigned long new_itm; | ||
58 | |||
59 | if (unlikely(cpu_is_offline(smp_processor_id()))) { | ||
60 | return IRQ_HANDLED; | ||
61 | } | ||
62 | |||
63 | platform_timer_interrupt(irq, dev_id, regs); | ||
64 | |||
65 | new_itm = local_cpu_data->itm_next; | ||
66 | |||
67 | if (!time_after(ia64_get_itc(), new_itm)) | ||
68 | printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", | ||
69 | ia64_get_itc(), new_itm); | ||
70 | |||
71 | profile_tick(CPU_PROFILING, regs); | ||
72 | |||
73 | while (1) { | ||
74 | update_process_times(user_mode(regs)); | ||
75 | |||
76 | new_itm += local_cpu_data->itm_delta; | ||
77 | |||
78 | if (smp_processor_id() == TIME_KEEPER_ID) { | ||
79 | /* | ||
80 | * Here we are in the timer irq handler. We have irqs locally | ||
81 | * disabled, but we don't know if the timer_bh is running on | ||
82 | * another CPU. We need to avoid to SMP race by acquiring the | ||
83 | * xtime_lock. | ||
84 | */ | ||
85 | write_seqlock(&xtime_lock); | ||
86 | do_timer(regs); | ||
87 | local_cpu_data->itm_next = new_itm; | ||
88 | write_sequnlock(&xtime_lock); | ||
89 | } else | ||
90 | local_cpu_data->itm_next = new_itm; | ||
91 | |||
92 | if (time_after(new_itm, ia64_get_itc())) | ||
93 | break; | ||
94 | } | ||
95 | |||
96 | do { | ||
97 | /* | ||
98 | * If we're too close to the next clock tick for | ||
99 | * comfort, we increase the safety margin by | ||
100 | * intentionally dropping the next tick(s). We do NOT | ||
101 | * update itm.next because that would force us to call | ||
102 | * do_timer() which in turn would let our clock run | ||
103 | * too fast (with the potentially devastating effect | ||
104 | * of losing monotony of time). | ||
105 | */ | ||
106 | while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) | ||
107 | new_itm += local_cpu_data->itm_delta; | ||
108 | ia64_set_itm(new_itm); | ||
109 | /* double check, in case we got hit by a (slow) PMI: */ | ||
110 | } while (time_after_eq(ia64_get_itc(), new_itm)); | ||
111 | return IRQ_HANDLED; | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Encapsulate access to the itm structure for SMP. | ||
116 | */ | ||
117 | void | ||
118 | ia64_cpu_local_tick (void) | ||
119 | { | ||
120 | int cpu = smp_processor_id(); | ||
121 | unsigned long shift = 0, delta; | ||
122 | |||
123 | /* arrange for the cycle counter to generate a timer interrupt: */ | ||
124 | ia64_set_itv(IA64_TIMER_VECTOR); | ||
125 | |||
126 | delta = local_cpu_data->itm_delta; | ||
127 | /* | ||
128 | * Stagger the timer tick for each CPU so they don't occur all at (almost) the | ||
129 | * same time: | ||
130 | */ | ||
131 | if (cpu) { | ||
132 | unsigned long hi = 1UL << ia64_fls(cpu); | ||
133 | shift = (2*(cpu - hi) + 1) * delta/hi/2; | ||
134 | } | ||
135 | local_cpu_data->itm_next = ia64_get_itc() + delta + shift; | ||
136 | ia64_set_itm(local_cpu_data->itm_next); | ||
137 | } | ||
138 | |||
139 | static int nojitter; | ||
140 | |||
141 | static int __init nojitter_setup(char *str) | ||
142 | { | ||
143 | nojitter = 1; | ||
144 | printk("Jitter checking for ITC timers disabled\n"); | ||
145 | return 1; | ||
146 | } | ||
147 | |||
148 | __setup("nojitter", nojitter_setup); | ||
149 | |||
150 | |||
151 | void __devinit | ||
152 | ia64_init_itm (void) | ||
153 | { | ||
154 | unsigned long platform_base_freq, itc_freq; | ||
155 | struct pal_freq_ratio itc_ratio, proc_ratio; | ||
156 | long status, platform_base_drift, itc_drift; | ||
157 | |||
158 | /* | ||
159 | * According to SAL v2.6, we need to use a SAL call to determine the platform base | ||
160 | * frequency and then a PAL call to determine the frequency ratio between the ITC | ||
161 | * and the base frequency. | ||
162 | */ | ||
163 | status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, | ||
164 | &platform_base_freq, &platform_base_drift); | ||
165 | if (status != 0) { | ||
166 | printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status)); | ||
167 | } else { | ||
168 | status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio); | ||
169 | if (status != 0) | ||
170 | printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status); | ||
171 | } | ||
172 | if (status != 0) { | ||
173 | /* invent "random" values */ | ||
174 | printk(KERN_ERR | ||
175 | "SAL/PAL failed to obtain frequency info---inventing reasonable values\n"); | ||
176 | platform_base_freq = 100000000; | ||
177 | platform_base_drift = -1; /* no drift info */ | ||
178 | itc_ratio.num = 3; | ||
179 | itc_ratio.den = 1; | ||
180 | } | ||
181 | if (platform_base_freq < 40000000) { | ||
182 | printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n", | ||
183 | platform_base_freq); | ||
184 | platform_base_freq = 75000000; | ||
185 | platform_base_drift = -1; | ||
186 | } | ||
187 | if (!proc_ratio.den) | ||
188 | proc_ratio.den = 1; /* avoid division by zero */ | ||
189 | if (!itc_ratio.den) | ||
190 | itc_ratio.den = 1; /* avoid division by zero */ | ||
191 | |||
192 | itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; | ||
193 | |||
194 | local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ; | ||
195 | printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%lu/%lu, " | ||
196 | "ITC freq=%lu.%03luMHz", smp_processor_id(), | ||
197 | platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, | ||
198 | itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); | ||
199 | |||
200 | if (platform_base_drift != -1) { | ||
201 | itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den; | ||
202 | printk("+/-%ldppm\n", itc_drift); | ||
203 | } else { | ||
204 | itc_drift = -1; | ||
205 | printk("\n"); | ||
206 | } | ||
207 | |||
208 | local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den; | ||
209 | local_cpu_data->itc_freq = itc_freq; | ||
210 | local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC; | ||
211 | local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<<IA64_NSEC_PER_CYC_SHIFT) | ||
212 | + itc_freq/2)/itc_freq; | ||
213 | |||
214 | if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { | ||
215 | itc_interpolator.frequency = local_cpu_data->itc_freq; | ||
216 | itc_interpolator.drift = itc_drift; | ||
217 | #ifdef CONFIG_SMP | ||
218 | /* On IA64 in an SMP configuration ITCs are never accurately synchronized. | ||
219 | * Jitter compensation requires a cmpxchg which may limit | ||
220 | * the scalability of the syscalls for retrieving time. | ||
221 | * The ITC synchronization is usually successful to within a few | ||
222 | * ITC ticks but this is not a sure thing. If you need to improve | ||
223 | * timer performance in SMP situations then boot the kernel with the | ||
224 | * "nojitter" option. However, doing so may result in time fluctuating (maybe | ||
225 | * even going backward) if the ITC offsets between the individual CPUs | ||
226 | * are too large. | ||
227 | */ | ||
228 | if (!nojitter) itc_interpolator.jitter = 1; | ||
229 | #endif | ||
230 | register_time_interpolator(&itc_interpolator); | ||
231 | } | ||
232 | |||
233 | /* Setup the CPU local timer tick */ | ||
234 | ia64_cpu_local_tick(); | ||
235 | } | ||
236 | |||
237 | static struct irqaction timer_irqaction = { | ||
238 | .handler = timer_interrupt, | ||
239 | .flags = SA_INTERRUPT, | ||
240 | .name = "timer" | ||
241 | }; | ||
242 | |||
243 | void __init | ||
244 | time_init (void) | ||
245 | { | ||
246 | register_percpu_irq(IA64_TIMER_VECTOR, &timer_irqaction); | ||
247 | efi_gettimeofday(&xtime); | ||
248 | ia64_init_itm(); | ||
249 | |||
250 | /* | ||
251 | * Initialize wall_to_monotonic such that adding it to xtime will yield zero, the | ||
252 | * tv_nsec field must be normalized (i.e., 0 <= nsec < NSEC_PER_SEC). | ||
253 | */ | ||
254 | set_normalized_timespec(&wall_to_monotonic, -xtime.tv_sec, -xtime.tv_nsec); | ||
255 | } | ||
diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c new file mode 100644 index 000000000000..f1aafd4c05f9 --- /dev/null +++ b/arch/ia64/kernel/topology.c | |||
@@ -0,0 +1,92 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * This file contains NUMA specific variables and functions which can | ||
7 | * be split away from DISCONTIGMEM and are used on NUMA machines with | ||
8 | * contiguous memory. | ||
9 | * 2002/08/07 Erich Focht <efocht@ess.nec.de> | ||
10 | * Populate cpu entries in sysfs for non-numa systems as well | ||
11 | * Intel Corporation - Ashok Raj | ||
12 | */ | ||
13 | |||
14 | #include <linux/config.h> | ||
15 | #include <linux/cpu.h> | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/mm.h> | ||
18 | #include <linux/node.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/bootmem.h> | ||
21 | #include <linux/nodemask.h> | ||
22 | #include <asm/mmzone.h> | ||
23 | #include <asm/numa.h> | ||
24 | #include <asm/cpu.h> | ||
25 | |||
26 | #ifdef CONFIG_NUMA | ||
27 | static struct node *sysfs_nodes; | ||
28 | #endif | ||
29 | static struct ia64_cpu *sysfs_cpus; | ||
30 | |||
31 | int arch_register_cpu(int num) | ||
32 | { | ||
33 | struct node *parent = NULL; | ||
34 | |||
35 | #ifdef CONFIG_NUMA | ||
36 | parent = &sysfs_nodes[cpu_to_node(num)]; | ||
37 | #endif /* CONFIG_NUMA */ | ||
38 | |||
39 | return register_cpu(&sysfs_cpus[num].cpu, num, parent); | ||
40 | } | ||
41 | |||
42 | #ifdef CONFIG_HOTPLUG_CPU | ||
43 | |||
44 | void arch_unregister_cpu(int num) | ||
45 | { | ||
46 | struct node *parent = NULL; | ||
47 | |||
48 | #ifdef CONFIG_NUMA | ||
49 | int node = cpu_to_node(num); | ||
50 | parent = &sysfs_nodes[node]; | ||
51 | #endif /* CONFIG_NUMA */ | ||
52 | |||
53 | return unregister_cpu(&sysfs_cpus[num].cpu, parent); | ||
54 | } | ||
55 | EXPORT_SYMBOL(arch_register_cpu); | ||
56 | EXPORT_SYMBOL(arch_unregister_cpu); | ||
57 | #endif /*CONFIG_HOTPLUG_CPU*/ | ||
58 | |||
59 | |||
60 | static int __init topology_init(void) | ||
61 | { | ||
62 | int i, err = 0; | ||
63 | |||
64 | #ifdef CONFIG_NUMA | ||
65 | sysfs_nodes = kmalloc(sizeof(struct node) * MAX_NUMNODES, GFP_KERNEL); | ||
66 | if (!sysfs_nodes) { | ||
67 | err = -ENOMEM; | ||
68 | goto out; | ||
69 | } | ||
70 | memset(sysfs_nodes, 0, sizeof(struct node) * MAX_NUMNODES); | ||
71 | |||
72 | /* MCD - Do we want to register all ONLINE nodes, or all POSSIBLE nodes? */ | ||
73 | for_each_online_node(i) | ||
74 | if ((err = register_node(&sysfs_nodes[i], i, 0))) | ||
75 | goto out; | ||
76 | #endif | ||
77 | |||
78 | sysfs_cpus = kmalloc(sizeof(struct ia64_cpu) * NR_CPUS, GFP_KERNEL); | ||
79 | if (!sysfs_cpus) { | ||
80 | err = -ENOMEM; | ||
81 | goto out; | ||
82 | } | ||
83 | memset(sysfs_cpus, 0, sizeof(struct ia64_cpu) * NR_CPUS); | ||
84 | |||
85 | for_each_present_cpu(i) | ||
86 | if((err = arch_register_cpu(i))) | ||
87 | goto out; | ||
88 | out: | ||
89 | return err; | ||
90 | } | ||
91 | |||
92 | __initcall(topology_init); | ||
diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c new file mode 100644 index 000000000000..e82ad78081b3 --- /dev/null +++ b/arch/ia64/kernel/traps.c | |||
@@ -0,0 +1,609 @@ | |||
1 | /* | ||
2 | * Architecture-specific trap handling. | ||
3 | * | ||
4 | * Copyright (C) 1998-2003 Hewlett-Packard Co | ||
5 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
6 | * | ||
7 | * 05/12/00 grao <goutham.rao@intel.com> : added isr in siginfo for SIGFPE | ||
8 | */ | ||
9 | |||
10 | #include <linux/config.h> | ||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/tty.h> | ||
15 | #include <linux/vt_kern.h> /* For unblank_screen() */ | ||
16 | #include <linux/module.h> /* for EXPORT_SYMBOL */ | ||
17 | #include <linux/hardirq.h> | ||
18 | |||
19 | #include <asm/fpswa.h> | ||
20 | #include <asm/ia32.h> | ||
21 | #include <asm/intrinsics.h> | ||
22 | #include <asm/processor.h> | ||
23 | #include <asm/uaccess.h> | ||
24 | |||
25 | extern spinlock_t timerlist_lock; | ||
26 | |||
27 | fpswa_interface_t *fpswa_interface; | ||
28 | EXPORT_SYMBOL(fpswa_interface); | ||
29 | |||
30 | void __init | ||
31 | trap_init (void) | ||
32 | { | ||
33 | if (ia64_boot_param->fpswa) | ||
34 | /* FPSWA fixup: make the interface pointer a kernel virtual address: */ | ||
35 | fpswa_interface = __va(ia64_boot_param->fpswa); | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * Unlock any spinlocks which will prevent us from getting the message out (timerlist_lock | ||
40 | * is acquired through the console unblank code) | ||
41 | */ | ||
42 | void | ||
43 | bust_spinlocks (int yes) | ||
44 | { | ||
45 | int loglevel_save = console_loglevel; | ||
46 | |||
47 | if (yes) { | ||
48 | oops_in_progress = 1; | ||
49 | return; | ||
50 | } | ||
51 | |||
52 | #ifdef CONFIG_VT | ||
53 | unblank_screen(); | ||
54 | #endif | ||
55 | oops_in_progress = 0; | ||
56 | /* | ||
57 | * OK, the message is on the console. Now we call printk() without | ||
58 | * oops_in_progress set so that printk will give klogd a poke. Hold onto | ||
59 | * your hats... | ||
60 | */ | ||
61 | console_loglevel = 15; /* NMI oopser may have shut the console up */ | ||
62 | printk(" "); | ||
63 | console_loglevel = loglevel_save; | ||
64 | } | ||
65 | |||
66 | void | ||
67 | die (const char *str, struct pt_regs *regs, long err) | ||
68 | { | ||
69 | static struct { | ||
70 | spinlock_t lock; | ||
71 | u32 lock_owner; | ||
72 | int lock_owner_depth; | ||
73 | } die = { | ||
74 | .lock = SPIN_LOCK_UNLOCKED, | ||
75 | .lock_owner = -1, | ||
76 | .lock_owner_depth = 0 | ||
77 | }; | ||
78 | static int die_counter; | ||
79 | |||
80 | if (die.lock_owner != smp_processor_id()) { | ||
81 | console_verbose(); | ||
82 | spin_lock_irq(&die.lock); | ||
83 | die.lock_owner = smp_processor_id(); | ||
84 | die.lock_owner_depth = 0; | ||
85 | bust_spinlocks(1); | ||
86 | } | ||
87 | |||
88 | if (++die.lock_owner_depth < 3) { | ||
89 | printk("%s[%d]: %s %ld [%d]\n", | ||
90 | current->comm, current->pid, str, err, ++die_counter); | ||
91 | show_regs(regs); | ||
92 | } else | ||
93 | printk(KERN_ERR "Recursive die() failure, output suppressed\n"); | ||
94 | |||
95 | bust_spinlocks(0); | ||
96 | die.lock_owner = -1; | ||
97 | spin_unlock_irq(&die.lock); | ||
98 | do_exit(SIGSEGV); | ||
99 | } | ||
100 | |||
101 | void | ||
102 | die_if_kernel (char *str, struct pt_regs *regs, long err) | ||
103 | { | ||
104 | if (!user_mode(regs)) | ||
105 | die(str, regs, err); | ||
106 | } | ||
107 | |||
108 | void | ||
109 | ia64_bad_break (unsigned long break_num, struct pt_regs *regs) | ||
110 | { | ||
111 | siginfo_t siginfo; | ||
112 | int sig, code; | ||
113 | |||
114 | /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ | ||
115 | siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); | ||
116 | siginfo.si_imm = break_num; | ||
117 | siginfo.si_flags = 0; /* clear __ISR_VALID */ | ||
118 | siginfo.si_isr = 0; | ||
119 | |||
120 | switch (break_num) { | ||
121 | case 0: /* unknown error (used by GCC for __builtin_abort()) */ | ||
122 | die_if_kernel("bugcheck!", regs, break_num); | ||
123 | sig = SIGILL; code = ILL_ILLOPC; | ||
124 | break; | ||
125 | |||
126 | case 1: /* integer divide by zero */ | ||
127 | sig = SIGFPE; code = FPE_INTDIV; | ||
128 | break; | ||
129 | |||
130 | case 2: /* integer overflow */ | ||
131 | sig = SIGFPE; code = FPE_INTOVF; | ||
132 | break; | ||
133 | |||
134 | case 3: /* range check/bounds check */ | ||
135 | sig = SIGFPE; code = FPE_FLTSUB; | ||
136 | break; | ||
137 | |||
138 | case 4: /* null pointer dereference */ | ||
139 | sig = SIGSEGV; code = SEGV_MAPERR; | ||
140 | break; | ||
141 | |||
142 | case 5: /* misaligned data */ | ||
143 | sig = SIGSEGV; code = BUS_ADRALN; | ||
144 | break; | ||
145 | |||
146 | case 6: /* decimal overflow */ | ||
147 | sig = SIGFPE; code = __FPE_DECOVF; | ||
148 | break; | ||
149 | |||
150 | case 7: /* decimal divide by zero */ | ||
151 | sig = SIGFPE; code = __FPE_DECDIV; | ||
152 | break; | ||
153 | |||
154 | case 8: /* packed decimal error */ | ||
155 | sig = SIGFPE; code = __FPE_DECERR; | ||
156 | break; | ||
157 | |||
158 | case 9: /* invalid ASCII digit */ | ||
159 | sig = SIGFPE; code = __FPE_INVASC; | ||
160 | break; | ||
161 | |||
162 | case 10: /* invalid decimal digit */ | ||
163 | sig = SIGFPE; code = __FPE_INVDEC; | ||
164 | break; | ||
165 | |||
166 | case 11: /* paragraph stack overflow */ | ||
167 | sig = SIGSEGV; code = __SEGV_PSTKOVF; | ||
168 | break; | ||
169 | |||
170 | case 0x3f000 ... 0x3ffff: /* bundle-update in progress */ | ||
171 | sig = SIGILL; code = __ILL_BNDMOD; | ||
172 | break; | ||
173 | |||
174 | default: | ||
175 | if (break_num < 0x40000 || break_num > 0x100000) | ||
176 | die_if_kernel("Bad break", regs, break_num); | ||
177 | |||
178 | if (break_num < 0x80000) { | ||
179 | sig = SIGILL; code = __ILL_BREAK; | ||
180 | } else { | ||
181 | sig = SIGTRAP; code = TRAP_BRKPT; | ||
182 | } | ||
183 | } | ||
184 | siginfo.si_signo = sig; | ||
185 | siginfo.si_errno = 0; | ||
186 | siginfo.si_code = code; | ||
187 | force_sig_info(sig, &siginfo, current); | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * disabled_fph_fault() is called when a user-level process attempts to access f32..f127 | ||
192 | * and it doesn't own the fp-high register partition. When this happens, we save the | ||
193 | * current fph partition in the task_struct of the fpu-owner (if necessary) and then load | ||
194 | * the fp-high partition of the current task (if necessary). Note that the kernel has | ||
195 | * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes | ||
196 | * care of clearing psr.dfh. | ||
197 | */ | ||
198 | static inline void | ||
199 | disabled_fph_fault (struct pt_regs *regs) | ||
200 | { | ||
201 | struct ia64_psr *psr = ia64_psr(regs); | ||
202 | |||
203 | /* first, grant user-level access to fph partition: */ | ||
204 | psr->dfh = 0; | ||
205 | #ifndef CONFIG_SMP | ||
206 | { | ||
207 | struct task_struct *fpu_owner | ||
208 | = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); | ||
209 | |||
210 | if (ia64_is_local_fpu_owner(current)) | ||
211 | return; | ||
212 | |||
213 | if (fpu_owner) | ||
214 | ia64_flush_fph(fpu_owner); | ||
215 | } | ||
216 | #endif /* !CONFIG_SMP */ | ||
217 | ia64_set_local_fpu_owner(current); | ||
218 | if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) { | ||
219 | __ia64_load_fpu(current->thread.fph); | ||
220 | psr->mfh = 0; | ||
221 | } else { | ||
222 | __ia64_init_fpu(); | ||
223 | /* | ||
224 | * Set mfh because the state in thread.fph does not match the state in | ||
225 | * the fph partition. | ||
226 | */ | ||
227 | psr->mfh = 1; | ||
228 | } | ||
229 | } | ||
230 | |||
231 | static inline int | ||
232 | fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs, | ||
233 | struct pt_regs *regs) | ||
234 | { | ||
235 | fp_state_t fp_state; | ||
236 | fpswa_ret_t ret; | ||
237 | |||
238 | if (!fpswa_interface) | ||
239 | return -1; | ||
240 | |||
241 | memset(&fp_state, 0, sizeof(fp_state_t)); | ||
242 | |||
243 | /* | ||
244 | * compute fp_state. only FP registers f6 - f11 are used by the | ||
245 | * kernel, so set those bits in the mask and set the low volatile | ||
246 | * pointer to point to these registers. | ||
247 | */ | ||
248 | fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ | ||
249 | |||
250 | fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; | ||
251 | /* | ||
252 | * unsigned long (*EFI_FPSWA) ( | ||
253 | * unsigned long trap_type, | ||
254 | * void *Bundle, | ||
255 | * unsigned long *pipsr, | ||
256 | * unsigned long *pfsr, | ||
257 | * unsigned long *pisr, | ||
258 | * unsigned long *ppreds, | ||
259 | * unsigned long *pifs, | ||
260 | * void *fp_state); | ||
261 | */ | ||
262 | ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle, | ||
263 | (unsigned long *) ipsr, (unsigned long *) fpsr, | ||
264 | (unsigned long *) isr, (unsigned long *) pr, | ||
265 | (unsigned long *) ifs, &fp_state); | ||
266 | |||
267 | return ret.status; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Handle floating-point assist faults and traps. | ||
272 | */ | ||
273 | static int | ||
274 | handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) | ||
275 | { | ||
276 | long exception, bundle[2]; | ||
277 | unsigned long fault_ip; | ||
278 | struct siginfo siginfo; | ||
279 | static int fpu_swa_count = 0; | ||
280 | static unsigned long last_time; | ||
281 | |||
282 | fault_ip = regs->cr_iip; | ||
283 | if (!fp_fault && (ia64_psr(regs)->ri == 0)) | ||
284 | fault_ip -= 16; | ||
285 | if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle))) | ||
286 | return -1; | ||
287 | |||
288 | if (jiffies - last_time > 5*HZ) | ||
289 | fpu_swa_count = 0; | ||
290 | if ((fpu_swa_count < 4) && !(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { | ||
291 | last_time = jiffies; | ||
292 | ++fpu_swa_count; | ||
293 | printk(KERN_WARNING | ||
294 | "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", | ||
295 | current->comm, current->pid, regs->cr_iip + ia64_psr(regs)->ri, isr); | ||
296 | } | ||
297 | |||
298 | exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, | ||
299 | ®s->cr_ifs, regs); | ||
300 | if (fp_fault) { | ||
301 | if (exception == 0) { | ||
302 | /* emulation was successful */ | ||
303 | ia64_increment_ip(regs); | ||
304 | } else if (exception == -1) { | ||
305 | printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); | ||
306 | return -1; | ||
307 | } else { | ||
308 | /* is next instruction a trap? */ | ||
309 | if (exception & 2) { | ||
310 | ia64_increment_ip(regs); | ||
311 | } | ||
312 | siginfo.si_signo = SIGFPE; | ||
313 | siginfo.si_errno = 0; | ||
314 | siginfo.si_code = __SI_FAULT; /* default code */ | ||
315 | siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); | ||
316 | if (isr & 0x11) { | ||
317 | siginfo.si_code = FPE_FLTINV; | ||
318 | } else if (isr & 0x22) { | ||
319 | /* denormal operand gets the same si_code as underflow | ||
320 | * see arch/i386/kernel/traps.c:math_error() */ | ||
321 | siginfo.si_code = FPE_FLTUND; | ||
322 | } else if (isr & 0x44) { | ||
323 | siginfo.si_code = FPE_FLTDIV; | ||
324 | } | ||
325 | siginfo.si_isr = isr; | ||
326 | siginfo.si_flags = __ISR_VALID; | ||
327 | siginfo.si_imm = 0; | ||
328 | force_sig_info(SIGFPE, &siginfo, current); | ||
329 | } | ||
330 | } else { | ||
331 | if (exception == -1) { | ||
332 | printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); | ||
333 | return -1; | ||
334 | } else if (exception != 0) { | ||
335 | /* raise exception */ | ||
336 | siginfo.si_signo = SIGFPE; | ||
337 | siginfo.si_errno = 0; | ||
338 | siginfo.si_code = __SI_FAULT; /* default code */ | ||
339 | siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); | ||
340 | if (isr & 0x880) { | ||
341 | siginfo.si_code = FPE_FLTOVF; | ||
342 | } else if (isr & 0x1100) { | ||
343 | siginfo.si_code = FPE_FLTUND; | ||
344 | } else if (isr & 0x2200) { | ||
345 | siginfo.si_code = FPE_FLTRES; | ||
346 | } | ||
347 | siginfo.si_isr = isr; | ||
348 | siginfo.si_flags = __ISR_VALID; | ||
349 | siginfo.si_imm = 0; | ||
350 | force_sig_info(SIGFPE, &siginfo, current); | ||
351 | } | ||
352 | } | ||
353 | return 0; | ||
354 | } | ||
355 | |||
356 | struct illegal_op_return { | ||
357 | unsigned long fkt, arg1, arg2, arg3; | ||
358 | }; | ||
359 | |||
360 | struct illegal_op_return | ||
361 | ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, | ||
362 | long arg4, long arg5, long arg6, long arg7, | ||
363 | struct pt_regs regs) | ||
364 | { | ||
365 | struct illegal_op_return rv; | ||
366 | struct siginfo si; | ||
367 | char buf[128]; | ||
368 | |||
369 | #ifdef CONFIG_IA64_BRL_EMU | ||
370 | { | ||
371 | extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long); | ||
372 | |||
373 | rv = ia64_emulate_brl(®s, ec); | ||
374 | if (rv.fkt != (unsigned long) -1) | ||
375 | return rv; | ||
376 | } | ||
377 | #endif | ||
378 | |||
379 | sprintf(buf, "IA-64 Illegal operation fault"); | ||
380 | die_if_kernel(buf, ®s, 0); | ||
381 | |||
382 | memset(&si, 0, sizeof(si)); | ||
383 | si.si_signo = SIGILL; | ||
384 | si.si_code = ILL_ILLOPC; | ||
385 | si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(®s)->ri); | ||
386 | force_sig_info(SIGILL, &si, current); | ||
387 | rv.fkt = 0; | ||
388 | return rv; | ||
389 | } | ||
390 | |||
391 | void | ||
392 | ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, | ||
393 | unsigned long iim, unsigned long itir, long arg5, long arg6, | ||
394 | long arg7, struct pt_regs regs) | ||
395 | { | ||
396 | unsigned long code, error = isr, iip; | ||
397 | struct siginfo siginfo; | ||
398 | char buf[128]; | ||
399 | int result, sig; | ||
400 | static const char *reason[] = { | ||
401 | "IA-64 Illegal Operation fault", | ||
402 | "IA-64 Privileged Operation fault", | ||
403 | "IA-64 Privileged Register fault", | ||
404 | "IA-64 Reserved Register/Field fault", | ||
405 | "Disabled Instruction Set Transition fault", | ||
406 | "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", | ||
407 | "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", | ||
408 | "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" | ||
409 | }; | ||
410 | |||
411 | if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { | ||
412 | /* | ||
413 | * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel | ||
414 | * the lfetch. | ||
415 | */ | ||
416 | ia64_psr(®s)->ed = 1; | ||
417 | return; | ||
418 | } | ||
419 | |||
420 | iip = regs.cr_iip + ia64_psr(®s)->ri; | ||
421 | |||
422 | switch (vector) { | ||
423 | case 24: /* General Exception */ | ||
424 | code = (isr >> 4) & 0xf; | ||
425 | sprintf(buf, "General Exception: %s%s", reason[code], | ||
426 | (code == 3) ? ((isr & (1UL << 37)) | ||
427 | ? " (RSE access)" : " (data access)") : ""); | ||
428 | if (code == 8) { | ||
429 | # ifdef CONFIG_IA64_PRINT_HAZARDS | ||
430 | printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", | ||
431 | current->comm, current->pid, | ||
432 | regs.cr_iip + ia64_psr(®s)->ri, regs.pr); | ||
433 | # endif | ||
434 | return; | ||
435 | } | ||
436 | break; | ||
437 | |||
438 | case 25: /* Disabled FP-Register */ | ||
439 | if (isr & 2) { | ||
440 | disabled_fph_fault(®s); | ||
441 | return; | ||
442 | } | ||
443 | sprintf(buf, "Disabled FPL fault---not supposed to happen!"); | ||
444 | break; | ||
445 | |||
446 | case 26: /* NaT Consumption */ | ||
447 | if (user_mode(®s)) { | ||
448 | void __user *addr; | ||
449 | |||
450 | if (((isr >> 4) & 0xf) == 2) { | ||
451 | /* NaT page consumption */ | ||
452 | sig = SIGSEGV; | ||
453 | code = SEGV_ACCERR; | ||
454 | addr = (void __user *) ifa; | ||
455 | } else { | ||
456 | /* register NaT consumption */ | ||
457 | sig = SIGILL; | ||
458 | code = ILL_ILLOPN; | ||
459 | addr = (void __user *) (regs.cr_iip | ||
460 | + ia64_psr(®s)->ri); | ||
461 | } | ||
462 | siginfo.si_signo = sig; | ||
463 | siginfo.si_code = code; | ||
464 | siginfo.si_errno = 0; | ||
465 | siginfo.si_addr = addr; | ||
466 | siginfo.si_imm = vector; | ||
467 | siginfo.si_flags = __ISR_VALID; | ||
468 | siginfo.si_isr = isr; | ||
469 | force_sig_info(sig, &siginfo, current); | ||
470 | return; | ||
471 | } else if (ia64_done_with_exception(®s)) | ||
472 | return; | ||
473 | sprintf(buf, "NaT consumption"); | ||
474 | break; | ||
475 | |||
476 | case 31: /* Unsupported Data Reference */ | ||
477 | if (user_mode(®s)) { | ||
478 | siginfo.si_signo = SIGILL; | ||
479 | siginfo.si_code = ILL_ILLOPN; | ||
480 | siginfo.si_errno = 0; | ||
481 | siginfo.si_addr = (void __user *) iip; | ||
482 | siginfo.si_imm = vector; | ||
483 | siginfo.si_flags = __ISR_VALID; | ||
484 | siginfo.si_isr = isr; | ||
485 | force_sig_info(SIGILL, &siginfo, current); | ||
486 | return; | ||
487 | } | ||
488 | sprintf(buf, "Unsupported data reference"); | ||
489 | break; | ||
490 | |||
491 | case 29: /* Debug */ | ||
492 | case 35: /* Taken Branch Trap */ | ||
493 | case 36: /* Single Step Trap */ | ||
494 | if (fsys_mode(current, ®s)) { | ||
495 | extern char __kernel_syscall_via_break[]; | ||
496 | /* | ||
497 | * Got a trap in fsys-mode: Taken Branch Trap and Single Step trap | ||
498 | * need special handling; Debug trap is not supposed to happen. | ||
499 | */ | ||
500 | if (unlikely(vector == 29)) { | ||
501 | die("Got debug trap in fsys-mode---not supposed to happen!", | ||
502 | ®s, 0); | ||
503 | return; | ||
504 | } | ||
505 | /* re-do the system call via break 0x100000: */ | ||
506 | regs.cr_iip = (unsigned long) __kernel_syscall_via_break; | ||
507 | ia64_psr(®s)->ri = 0; | ||
508 | ia64_psr(®s)->cpl = 3; | ||
509 | return; | ||
510 | } | ||
511 | switch (vector) { | ||
512 | case 29: | ||
513 | siginfo.si_code = TRAP_HWBKPT; | ||
514 | #ifdef CONFIG_ITANIUM | ||
515 | /* | ||
516 | * Erratum 10 (IFA may contain incorrect address) now has | ||
517 | * "NoFix" status. There are no plans for fixing this. | ||
518 | */ | ||
519 | if (ia64_psr(®s)->is == 0) | ||
520 | ifa = regs.cr_iip; | ||
521 | #endif | ||
522 | break; | ||
523 | case 35: siginfo.si_code = TRAP_BRANCH; ifa = 0; break; | ||
524 | case 36: siginfo.si_code = TRAP_TRACE; ifa = 0; break; | ||
525 | } | ||
526 | siginfo.si_signo = SIGTRAP; | ||
527 | siginfo.si_errno = 0; | ||
528 | siginfo.si_addr = (void __user *) ifa; | ||
529 | siginfo.si_imm = 0; | ||
530 | siginfo.si_flags = __ISR_VALID; | ||
531 | siginfo.si_isr = isr; | ||
532 | force_sig_info(SIGTRAP, &siginfo, current); | ||
533 | return; | ||
534 | |||
535 | case 32: /* fp fault */ | ||
536 | case 33: /* fp trap */ | ||
537 | result = handle_fpu_swa((vector == 32) ? 1 : 0, ®s, isr); | ||
538 | if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { | ||
539 | siginfo.si_signo = SIGFPE; | ||
540 | siginfo.si_errno = 0; | ||
541 | siginfo.si_code = FPE_FLTINV; | ||
542 | siginfo.si_addr = (void __user *) iip; | ||
543 | siginfo.si_flags = __ISR_VALID; | ||
544 | siginfo.si_isr = isr; | ||
545 | siginfo.si_imm = 0; | ||
546 | force_sig_info(SIGFPE, &siginfo, current); | ||
547 | } | ||
548 | return; | ||
549 | |||
550 | case 34: | ||
551 | if (isr & 0x2) { | ||
552 | /* Lower-Privilege Transfer Trap */ | ||
553 | /* | ||
554 | * Just clear PSR.lp and then return immediately: all the | ||
555 | * interesting work (e.g., signal delivery is done in the kernel | ||
556 | * exit path). | ||
557 | */ | ||
558 | ia64_psr(®s)->lp = 0; | ||
559 | return; | ||
560 | } else { | ||
561 | /* Unimplemented Instr. Address Trap */ | ||
562 | if (user_mode(®s)) { | ||
563 | siginfo.si_signo = SIGILL; | ||
564 | siginfo.si_code = ILL_BADIADDR; | ||
565 | siginfo.si_errno = 0; | ||
566 | siginfo.si_flags = 0; | ||
567 | siginfo.si_isr = 0; | ||
568 | siginfo.si_imm = 0; | ||
569 | siginfo.si_addr = (void __user *) iip; | ||
570 | force_sig_info(SIGILL, &siginfo, current); | ||
571 | return; | ||
572 | } | ||
573 | sprintf(buf, "Unimplemented Instruction Address fault"); | ||
574 | } | ||
575 | break; | ||
576 | |||
577 | case 45: | ||
578 | #ifdef CONFIG_IA32_SUPPORT | ||
579 | if (ia32_exception(®s, isr) == 0) | ||
580 | return; | ||
581 | #endif | ||
582 | printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); | ||
583 | printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", | ||
584 | iip, ifa, isr); | ||
585 | force_sig(SIGSEGV, current); | ||
586 | break; | ||
587 | |||
588 | case 46: | ||
589 | #ifdef CONFIG_IA32_SUPPORT | ||
590 | if (ia32_intercept(®s, isr) == 0) | ||
591 | return; | ||
592 | #endif | ||
593 | printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); | ||
594 | printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", | ||
595 | iip, ifa, isr, iim); | ||
596 | force_sig(SIGSEGV, current); | ||
597 | return; | ||
598 | |||
599 | case 47: | ||
600 | sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); | ||
601 | break; | ||
602 | |||
603 | default: | ||
604 | sprintf(buf, "Fault %lu", vector); | ||
605 | break; | ||
606 | } | ||
607 | die_if_kernel(buf, ®s, error); | ||
608 | force_sig(SIGILL, current); | ||
609 | } | ||
diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c new file mode 100644 index 000000000000..43b45b65ee5a --- /dev/null +++ b/arch/ia64/kernel/unaligned.c | |||
@@ -0,0 +1,1521 @@ | |||
1 | /* | ||
2 | * Architecture-specific unaligned trap handling. | ||
3 | * | ||
4 | * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co | ||
5 | * Stephane Eranian <eranian@hpl.hp.com> | ||
6 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
7 | * | ||
8 | * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix | ||
9 | * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame | ||
10 | * stacked register returns an undefined value; it does NOT trigger a | ||
11 | * "rsvd register fault"). | ||
12 | * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops. | ||
13 | * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes. | ||
14 | * 2001/01/17 Add support emulation of unaligned kernel accesses. | ||
15 | */ | ||
16 | #include <linux/kernel.h> | ||
17 | #include <linux/sched.h> | ||
18 | #include <linux/smp_lock.h> | ||
19 | #include <linux/tty.h> | ||
20 | |||
21 | #include <asm/intrinsics.h> | ||
22 | #include <asm/processor.h> | ||
23 | #include <asm/rse.h> | ||
24 | #include <asm/uaccess.h> | ||
25 | #include <asm/unaligned.h> | ||
26 | |||
27 | extern void die_if_kernel(char *str, struct pt_regs *regs, long err) __attribute__ ((noreturn)); | ||
28 | |||
29 | #undef DEBUG_UNALIGNED_TRAP | ||
30 | |||
31 | #ifdef DEBUG_UNALIGNED_TRAP | ||
32 | # define DPRINT(a...) do { printk("%s %u: ", __FUNCTION__, __LINE__); printk (a); } while (0) | ||
33 | # define DDUMP(str,vp,len) dump(str, vp, len) | ||
34 | |||
35 | static void | ||
36 | dump (const char *str, void *vp, size_t len) | ||
37 | { | ||
38 | unsigned char *cp = vp; | ||
39 | int i; | ||
40 | |||
41 | printk("%s", str); | ||
42 | for (i = 0; i < len; ++i) | ||
43 | printk (" %02x", *cp++); | ||
44 | printk("\n"); | ||
45 | } | ||
46 | #else | ||
47 | # define DPRINT(a...) | ||
48 | # define DDUMP(str,vp,len) | ||
49 | #endif | ||
50 | |||
51 | #define IA64_FIRST_STACKED_GR 32 | ||
52 | #define IA64_FIRST_ROTATING_FR 32 | ||
53 | #define SIGN_EXT9 0xffffffffffffff00ul | ||
54 | |||
55 | /* | ||
56 | * For M-unit: | ||
57 | * | ||
58 | * opcode | m | x6 | | ||
59 | * --------|------|---------| | ||
60 | * [40-37] | [36] | [35:30] | | ||
61 | * --------|------|---------| | ||
62 | * 4 | 1 | 6 | = 11 bits | ||
63 | * -------------------------- | ||
64 | * However bits [31:30] are not directly useful to distinguish between | ||
65 | * load/store so we can use [35:32] instead, which gives the following | ||
66 | * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer | ||
67 | * checking the m-bit until later in the load/store emulation. | ||
68 | */ | ||
69 | #define IA64_OPCODE_MASK 0x1ef | ||
70 | #define IA64_OPCODE_SHIFT 32 | ||
71 | |||
72 | /* | ||
73 | * Table C-28 Integer Load/Store | ||
74 | * | ||
75 | * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF | ||
76 | * | ||
77 | * ld8.fill, st8.fill MUST be aligned because the RNATs are based on | ||
78 | * the address (bits [8:3]), so we must failed. | ||
79 | */ | ||
80 | #define LD_OP 0x080 | ||
81 | #define LDS_OP 0x081 | ||
82 | #define LDA_OP 0x082 | ||
83 | #define LDSA_OP 0x083 | ||
84 | #define LDBIAS_OP 0x084 | ||
85 | #define LDACQ_OP 0x085 | ||
86 | /* 0x086, 0x087 are not relevant */ | ||
87 | #define LDCCLR_OP 0x088 | ||
88 | #define LDCNC_OP 0x089 | ||
89 | #define LDCCLRACQ_OP 0x08a | ||
90 | #define ST_OP 0x08c | ||
91 | #define STREL_OP 0x08d | ||
92 | /* 0x08e,0x8f are not relevant */ | ||
93 | |||
94 | /* | ||
95 | * Table C-29 Integer Load +Reg | ||
96 | * | ||
97 | * we use the ld->m (bit [36:36]) field to determine whether or not we have | ||
98 | * a load/store of this form. | ||
99 | */ | ||
100 | |||
101 | /* | ||
102 | * Table C-30 Integer Load/Store +Imm | ||
103 | * | ||
104 | * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF | ||
105 | * | ||
106 | * ld8.fill, st8.fill must be aligned because the Nat register are based on | ||
107 | * the address, so we must fail and the program must be fixed. | ||
108 | */ | ||
109 | #define LD_IMM_OP 0x0a0 | ||
110 | #define LDS_IMM_OP 0x0a1 | ||
111 | #define LDA_IMM_OP 0x0a2 | ||
112 | #define LDSA_IMM_OP 0x0a3 | ||
113 | #define LDBIAS_IMM_OP 0x0a4 | ||
114 | #define LDACQ_IMM_OP 0x0a5 | ||
115 | /* 0x0a6, 0xa7 are not relevant */ | ||
116 | #define LDCCLR_IMM_OP 0x0a8 | ||
117 | #define LDCNC_IMM_OP 0x0a9 | ||
118 | #define LDCCLRACQ_IMM_OP 0x0aa | ||
119 | #define ST_IMM_OP 0x0ac | ||
120 | #define STREL_IMM_OP 0x0ad | ||
121 | /* 0x0ae,0xaf are not relevant */ | ||
122 | |||
123 | /* | ||
124 | * Table C-32 Floating-point Load/Store | ||
125 | */ | ||
126 | #define LDF_OP 0x0c0 | ||
127 | #define LDFS_OP 0x0c1 | ||
128 | #define LDFA_OP 0x0c2 | ||
129 | #define LDFSA_OP 0x0c3 | ||
130 | /* 0x0c6 is irrelevant */ | ||
131 | #define LDFCCLR_OP 0x0c8 | ||
132 | #define LDFCNC_OP 0x0c9 | ||
133 | /* 0x0cb is irrelevant */ | ||
134 | #define STF_OP 0x0cc | ||
135 | |||
136 | /* | ||
137 | * Table C-33 Floating-point Load +Reg | ||
138 | * | ||
139 | * we use the ld->m (bit [36:36]) field to determine whether or not we have | ||
140 | * a load/store of this form. | ||
141 | */ | ||
142 | |||
143 | /* | ||
144 | * Table C-34 Floating-point Load/Store +Imm | ||
145 | */ | ||
146 | #define LDF_IMM_OP 0x0e0 | ||
147 | #define LDFS_IMM_OP 0x0e1 | ||
148 | #define LDFA_IMM_OP 0x0e2 | ||
149 | #define LDFSA_IMM_OP 0x0e3 | ||
150 | /* 0x0e6 is irrelevant */ | ||
151 | #define LDFCCLR_IMM_OP 0x0e8 | ||
152 | #define LDFCNC_IMM_OP 0x0e9 | ||
153 | #define STF_IMM_OP 0x0ec | ||
154 | |||
155 | typedef struct { | ||
156 | unsigned long qp:6; /* [0:5] */ | ||
157 | unsigned long r1:7; /* [6:12] */ | ||
158 | unsigned long imm:7; /* [13:19] */ | ||
159 | unsigned long r3:7; /* [20:26] */ | ||
160 | unsigned long x:1; /* [27:27] */ | ||
161 | unsigned long hint:2; /* [28:29] */ | ||
162 | unsigned long x6_sz:2; /* [30:31] */ | ||
163 | unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */ | ||
164 | unsigned long m:1; /* [36:36] */ | ||
165 | unsigned long op:4; /* [37:40] */ | ||
166 | unsigned long pad:23; /* [41:63] */ | ||
167 | } load_store_t; | ||
168 | |||
169 | |||
170 | typedef enum { | ||
171 | UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */ | ||
172 | UPD_REG /* ldXZ r1=[r3],r2 */ | ||
173 | } update_t; | ||
174 | |||
175 | /* | ||
176 | * We use tables to keep track of the offsets of registers in the saved state. | ||
177 | * This way we save having big switch/case statements. | ||
178 | * | ||
179 | * We use bit 0 to indicate switch_stack or pt_regs. | ||
180 | * The offset is simply shifted by 1 bit. | ||
181 | * A 2-byte value should be enough to hold any kind of offset | ||
182 | * | ||
183 | * In case the calling convention changes (and thus pt_regs/switch_stack) | ||
184 | * simply use RSW instead of RPT or vice-versa. | ||
185 | */ | ||
186 | |||
187 | #define RPO(x) ((size_t) &((struct pt_regs *)0)->x) | ||
188 | #define RSO(x) ((size_t) &((struct switch_stack *)0)->x) | ||
189 | |||
190 | #define RPT(x) (RPO(x) << 1) | ||
191 | #define RSW(x) (1| RSO(x)<<1) | ||
192 | |||
193 | #define GR_OFFS(x) (gr_info[x]>>1) | ||
194 | #define GR_IN_SW(x) (gr_info[x] & 0x1) | ||
195 | |||
196 | #define FR_OFFS(x) (fr_info[x]>>1) | ||
197 | #define FR_IN_SW(x) (fr_info[x] & 0x1) | ||
198 | |||
199 | static u16 gr_info[32]={ | ||
200 | 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ | ||
201 | |||
202 | RPT(r1), RPT(r2), RPT(r3), | ||
203 | |||
204 | RSW(r4), RSW(r5), RSW(r6), RSW(r7), | ||
205 | |||
206 | RPT(r8), RPT(r9), RPT(r10), RPT(r11), | ||
207 | RPT(r12), RPT(r13), RPT(r14), RPT(r15), | ||
208 | |||
209 | RPT(r16), RPT(r17), RPT(r18), RPT(r19), | ||
210 | RPT(r20), RPT(r21), RPT(r22), RPT(r23), | ||
211 | RPT(r24), RPT(r25), RPT(r26), RPT(r27), | ||
212 | RPT(r28), RPT(r29), RPT(r30), RPT(r31) | ||
213 | }; | ||
214 | |||
215 | static u16 fr_info[32]={ | ||
216 | 0, /* constant : WE SHOULD NEVER GET THIS */ | ||
217 | 0, /* constant : WE SHOULD NEVER GET THIS */ | ||
218 | |||
219 | RSW(f2), RSW(f3), RSW(f4), RSW(f5), | ||
220 | |||
221 | RPT(f6), RPT(f7), RPT(f8), RPT(f9), | ||
222 | RPT(f10), RPT(f11), | ||
223 | |||
224 | RSW(f12), RSW(f13), RSW(f14), | ||
225 | RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19), | ||
226 | RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24), | ||
227 | RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29), | ||
228 | RSW(f30), RSW(f31) | ||
229 | }; | ||
230 | |||
231 | /* Invalidate ALAT entry for integer register REGNO. */ | ||
232 | static void | ||
233 | invala_gr (int regno) | ||
234 | { | ||
235 | # define F(reg) case reg: ia64_invala_gr(reg); break | ||
236 | |||
237 | switch (regno) { | ||
238 | F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); | ||
239 | F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); | ||
240 | F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); | ||
241 | F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); | ||
242 | F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); | ||
243 | F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); | ||
244 | F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); | ||
245 | F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); | ||
246 | F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); | ||
247 | F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); | ||
248 | F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); | ||
249 | F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); | ||
250 | F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); | ||
251 | F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); | ||
252 | F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); | ||
253 | F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); | ||
254 | } | ||
255 | # undef F | ||
256 | } | ||
257 | |||
258 | /* Invalidate ALAT entry for floating-point register REGNO. */ | ||
259 | static void | ||
260 | invala_fr (int regno) | ||
261 | { | ||
262 | # define F(reg) case reg: ia64_invala_fr(reg); break | ||
263 | |||
264 | switch (regno) { | ||
265 | F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); | ||
266 | F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); | ||
267 | F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); | ||
268 | F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); | ||
269 | F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); | ||
270 | F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); | ||
271 | F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); | ||
272 | F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); | ||
273 | F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); | ||
274 | F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); | ||
275 | F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); | ||
276 | F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); | ||
277 | F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); | ||
278 | F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); | ||
279 | F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); | ||
280 | F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); | ||
281 | } | ||
282 | # undef F | ||
283 | } | ||
284 | |||
285 | static inline unsigned long | ||
286 | rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg) | ||
287 | { | ||
288 | reg += rrb; | ||
289 | if (reg >= sor) | ||
290 | reg -= sor; | ||
291 | return reg; | ||
292 | } | ||
293 | |||
294 | static void | ||
295 | set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) | ||
296 | { | ||
297 | struct switch_stack *sw = (struct switch_stack *) regs - 1; | ||
298 | unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end; | ||
299 | unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; | ||
300 | unsigned long rnats, nat_mask; | ||
301 | unsigned long on_kbs; | ||
302 | long sof = (regs->cr_ifs) & 0x7f; | ||
303 | long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); | ||
304 | long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; | ||
305 | long ridx = r1 - 32; | ||
306 | |||
307 | if (ridx >= sof) { | ||
308 | /* this should never happen, as the "rsvd register fault" has higher priority */ | ||
309 | DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof); | ||
310 | return; | ||
311 | } | ||
312 | |||
313 | if (ridx < sor) | ||
314 | ridx = rotate_reg(sor, rrb_gr, ridx); | ||
315 | |||
316 | DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", | ||
317 | r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); | ||
318 | |||
319 | on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); | ||
320 | addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); | ||
321 | if (addr >= kbs) { | ||
322 | /* the register is on the kernel backing store: easy... */ | ||
323 | rnat_addr = ia64_rse_rnat_addr(addr); | ||
324 | if ((unsigned long) rnat_addr >= sw->ar_bspstore) | ||
325 | rnat_addr = &sw->ar_rnat; | ||
326 | nat_mask = 1UL << ia64_rse_slot_num(addr); | ||
327 | |||
328 | *addr = val; | ||
329 | if (nat) | ||
330 | *rnat_addr |= nat_mask; | ||
331 | else | ||
332 | *rnat_addr &= ~nat_mask; | ||
333 | return; | ||
334 | } | ||
335 | |||
336 | if (!user_stack(current, regs)) { | ||
337 | DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1); | ||
338 | return; | ||
339 | } | ||
340 | |||
341 | bspstore = (unsigned long *)regs->ar_bspstore; | ||
342 | ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); | ||
343 | bsp = ia64_rse_skip_regs(ubs_end, -sof); | ||
344 | addr = ia64_rse_skip_regs(bsp, ridx); | ||
345 | |||
346 | DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); | ||
347 | |||
348 | ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); | ||
349 | |||
350 | rnat_addr = ia64_rse_rnat_addr(addr); | ||
351 | |||
352 | ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); | ||
353 | DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n", | ||
354 | (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1); | ||
355 | |||
356 | nat_mask = 1UL << ia64_rse_slot_num(addr); | ||
357 | if (nat) | ||
358 | rnats |= nat_mask; | ||
359 | else | ||
360 | rnats &= ~nat_mask; | ||
361 | ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats); | ||
362 | |||
363 | DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats); | ||
364 | } | ||
365 | |||
366 | |||
367 | static void | ||
368 | get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat) | ||
369 | { | ||
370 | struct switch_stack *sw = (struct switch_stack *) regs - 1; | ||
371 | unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore; | ||
372 | unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; | ||
373 | unsigned long rnats, nat_mask; | ||
374 | unsigned long on_kbs; | ||
375 | long sof = (regs->cr_ifs) & 0x7f; | ||
376 | long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); | ||
377 | long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; | ||
378 | long ridx = r1 - 32; | ||
379 | |||
380 | if (ridx >= sof) { | ||
381 | /* read of out-of-frame register returns an undefined value; 0 in our case. */ | ||
382 | DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof); | ||
383 | goto fail; | ||
384 | } | ||
385 | |||
386 | if (ridx < sor) | ||
387 | ridx = rotate_reg(sor, rrb_gr, ridx); | ||
388 | |||
389 | DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", | ||
390 | r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); | ||
391 | |||
392 | on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); | ||
393 | addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); | ||
394 | if (addr >= kbs) { | ||
395 | /* the register is on the kernel backing store: easy... */ | ||
396 | *val = *addr; | ||
397 | if (nat) { | ||
398 | rnat_addr = ia64_rse_rnat_addr(addr); | ||
399 | if ((unsigned long) rnat_addr >= sw->ar_bspstore) | ||
400 | rnat_addr = &sw->ar_rnat; | ||
401 | nat_mask = 1UL << ia64_rse_slot_num(addr); | ||
402 | *nat = (*rnat_addr & nat_mask) != 0; | ||
403 | } | ||
404 | return; | ||
405 | } | ||
406 | |||
407 | if (!user_stack(current, regs)) { | ||
408 | DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1); | ||
409 | goto fail; | ||
410 | } | ||
411 | |||
412 | bspstore = (unsigned long *)regs->ar_bspstore; | ||
413 | ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); | ||
414 | bsp = ia64_rse_skip_regs(ubs_end, -sof); | ||
415 | addr = ia64_rse_skip_regs(bsp, ridx); | ||
416 | |||
417 | DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); | ||
418 | |||
419 | ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); | ||
420 | |||
421 | if (nat) { | ||
422 | rnat_addr = ia64_rse_rnat_addr(addr); | ||
423 | nat_mask = 1UL << ia64_rse_slot_num(addr); | ||
424 | |||
425 | DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats); | ||
426 | |||
427 | ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); | ||
428 | *nat = (rnats & nat_mask) != 0; | ||
429 | } | ||
430 | return; | ||
431 | |||
432 | fail: | ||
433 | *val = 0; | ||
434 | if (nat) | ||
435 | *nat = 0; | ||
436 | return; | ||
437 | } | ||
438 | |||
439 | |||
440 | static void | ||
441 | setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) | ||
442 | { | ||
443 | struct switch_stack *sw = (struct switch_stack *) regs - 1; | ||
444 | unsigned long addr; | ||
445 | unsigned long bitmask; | ||
446 | unsigned long *unat; | ||
447 | |||
448 | /* | ||
449 | * First takes care of stacked registers | ||
450 | */ | ||
451 | if (regnum >= IA64_FIRST_STACKED_GR) { | ||
452 | set_rse_reg(regs, regnum, val, nat); | ||
453 | return; | ||
454 | } | ||
455 | |||
456 | /* | ||
457 | * Using r0 as a target raises a General Exception fault which has higher priority | ||
458 | * than the Unaligned Reference fault. | ||
459 | */ | ||
460 | |||
461 | /* | ||
462 | * Now look at registers in [0-31] range and init correct UNAT | ||
463 | */ | ||
464 | if (GR_IN_SW(regnum)) { | ||
465 | addr = (unsigned long)sw; | ||
466 | unat = &sw->ar_unat; | ||
467 | } else { | ||
468 | addr = (unsigned long)regs; | ||
469 | unat = &sw->caller_unat; | ||
470 | } | ||
471 | DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n", | ||
472 | addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)); | ||
473 | /* | ||
474 | * add offset from base of struct | ||
475 | * and do it ! | ||
476 | */ | ||
477 | addr += GR_OFFS(regnum); | ||
478 | |||
479 | *(unsigned long *)addr = val; | ||
480 | |||
481 | /* | ||
482 | * We need to clear the corresponding UNAT bit to fully emulate the load | ||
483 | * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 | ||
484 | */ | ||
485 | bitmask = 1UL << (addr >> 3 & 0x3f); | ||
486 | DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat); | ||
487 | if (nat) { | ||
488 | *unat |= bitmask; | ||
489 | } else { | ||
490 | *unat &= ~bitmask; | ||
491 | } | ||
492 | DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat); | ||
493 | } | ||
494 | |||
495 | /* | ||
496 | * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the | ||
497 | * range from 32-127, result is in the range from 0-95. | ||
498 | */ | ||
499 | static inline unsigned long | ||
500 | fph_index (struct pt_regs *regs, long regnum) | ||
501 | { | ||
502 | unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; | ||
503 | return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); | ||
504 | } | ||
505 | |||
506 | static void | ||
507 | setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) | ||
508 | { | ||
509 | struct switch_stack *sw = (struct switch_stack *)regs - 1; | ||
510 | unsigned long addr; | ||
511 | |||
512 | /* | ||
513 | * From EAS-2.5: FPDisableFault has higher priority than Unaligned | ||
514 | * Fault. Thus, when we get here, we know the partition is enabled. | ||
515 | * To update f32-f127, there are three choices: | ||
516 | * | ||
517 | * (1) save f32-f127 to thread.fph and update the values there | ||
518 | * (2) use a gigantic switch statement to directly access the registers | ||
519 | * (3) generate code on the fly to update the desired register | ||
520 | * | ||
521 | * For now, we are using approach (1). | ||
522 | */ | ||
523 | if (regnum >= IA64_FIRST_ROTATING_FR) { | ||
524 | ia64_sync_fph(current); | ||
525 | current->thread.fph[fph_index(regs, regnum)] = *fpval; | ||
526 | } else { | ||
527 | /* | ||
528 | * pt_regs or switch_stack ? | ||
529 | */ | ||
530 | if (FR_IN_SW(regnum)) { | ||
531 | addr = (unsigned long)sw; | ||
532 | } else { | ||
533 | addr = (unsigned long)regs; | ||
534 | } | ||
535 | |||
536 | DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)); | ||
537 | |||
538 | addr += FR_OFFS(regnum); | ||
539 | *(struct ia64_fpreg *)addr = *fpval; | ||
540 | |||
541 | /* | ||
542 | * mark the low partition as being used now | ||
543 | * | ||
544 | * It is highly unlikely that this bit is not already set, but | ||
545 | * let's do it for safety. | ||
546 | */ | ||
547 | regs->cr_ipsr |= IA64_PSR_MFL; | ||
548 | } | ||
549 | } | ||
550 | |||
551 | /* | ||
552 | * Those 2 inline functions generate the spilled versions of the constant floating point | ||
553 | * registers which can be used with stfX | ||
554 | */ | ||
555 | static inline void | ||
556 | float_spill_f0 (struct ia64_fpreg *final) | ||
557 | { | ||
558 | ia64_stf_spill(final, 0); | ||
559 | } | ||
560 | |||
561 | static inline void | ||
562 | float_spill_f1 (struct ia64_fpreg *final) | ||
563 | { | ||
564 | ia64_stf_spill(final, 1); | ||
565 | } | ||
566 | |||
567 | static void | ||
568 | getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) | ||
569 | { | ||
570 | struct switch_stack *sw = (struct switch_stack *) regs - 1; | ||
571 | unsigned long addr; | ||
572 | |||
573 | /* | ||
574 | * From EAS-2.5: FPDisableFault has higher priority than | ||
575 | * Unaligned Fault. Thus, when we get here, we know the partition is | ||
576 | * enabled. | ||
577 | * | ||
578 | * When regnum > 31, the register is still live and we need to force a save | ||
579 | * to current->thread.fph to get access to it. See discussion in setfpreg() | ||
580 | * for reasons and other ways of doing this. | ||
581 | */ | ||
582 | if (regnum >= IA64_FIRST_ROTATING_FR) { | ||
583 | ia64_flush_fph(current); | ||
584 | *fpval = current->thread.fph[fph_index(regs, regnum)]; | ||
585 | } else { | ||
586 | /* | ||
587 | * f0 = 0.0, f1= 1.0. Those registers are constant and are thus | ||
588 | * not saved, we must generate their spilled form on the fly | ||
589 | */ | ||
590 | switch(regnum) { | ||
591 | case 0: | ||
592 | float_spill_f0(fpval); | ||
593 | break; | ||
594 | case 1: | ||
595 | float_spill_f1(fpval); | ||
596 | break; | ||
597 | default: | ||
598 | /* | ||
599 | * pt_regs or switch_stack ? | ||
600 | */ | ||
601 | addr = FR_IN_SW(regnum) ? (unsigned long)sw | ||
602 | : (unsigned long)regs; | ||
603 | |||
604 | DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n", | ||
605 | FR_IN_SW(regnum), addr, FR_OFFS(regnum)); | ||
606 | |||
607 | addr += FR_OFFS(regnum); | ||
608 | *fpval = *(struct ia64_fpreg *)addr; | ||
609 | } | ||
610 | } | ||
611 | } | ||
612 | |||
613 | |||
614 | static void | ||
615 | getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) | ||
616 | { | ||
617 | struct switch_stack *sw = (struct switch_stack *) regs - 1; | ||
618 | unsigned long addr, *unat; | ||
619 | |||
620 | if (regnum >= IA64_FIRST_STACKED_GR) { | ||
621 | get_rse_reg(regs, regnum, val, nat); | ||
622 | return; | ||
623 | } | ||
624 | |||
625 | /* | ||
626 | * take care of r0 (read-only always evaluate to 0) | ||
627 | */ | ||
628 | if (regnum == 0) { | ||
629 | *val = 0; | ||
630 | if (nat) | ||
631 | *nat = 0; | ||
632 | return; | ||
633 | } | ||
634 | |||
635 | /* | ||
636 | * Now look at registers in [0-31] range and init correct UNAT | ||
637 | */ | ||
638 | if (GR_IN_SW(regnum)) { | ||
639 | addr = (unsigned long)sw; | ||
640 | unat = &sw->ar_unat; | ||
641 | } else { | ||
642 | addr = (unsigned long)regs; | ||
643 | unat = &sw->caller_unat; | ||
644 | } | ||
645 | |||
646 | DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); | ||
647 | |||
648 | addr += GR_OFFS(regnum); | ||
649 | |||
650 | *val = *(unsigned long *)addr; | ||
651 | |||
652 | /* | ||
653 | * do it only when requested | ||
654 | */ | ||
655 | if (nat) | ||
656 | *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL; | ||
657 | } | ||
658 | |||
659 | static void | ||
660 | emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa) | ||
661 | { | ||
662 | /* | ||
663 | * IMPORTANT: | ||
664 | * Given the way we handle unaligned speculative loads, we should | ||
665 | * not get to this point in the code but we keep this sanity check, | ||
666 | * just in case. | ||
667 | */ | ||
668 | if (ld.x6_op == 1 || ld.x6_op == 3) { | ||
669 | printk(KERN_ERR "%s: register update on speculative load, error\n", __FUNCTION__); | ||
670 | die_if_kernel("unaligned reference on speculative load with register update\n", | ||
671 | regs, 30); | ||
672 | } | ||
673 | |||
674 | |||
675 | /* | ||
676 | * at this point, we know that the base register to update is valid i.e., | ||
677 | * it's not r0 | ||
678 | */ | ||
679 | if (type == UPD_IMMEDIATE) { | ||
680 | unsigned long imm; | ||
681 | |||
682 | /* | ||
683 | * Load +Imm: ldXZ r1=[r3],imm(9) | ||
684 | * | ||
685 | * | ||
686 | * form imm9: [13:19] contain the first 7 bits | ||
687 | */ | ||
688 | imm = ld.x << 7 | ld.imm; | ||
689 | |||
690 | /* | ||
691 | * sign extend (1+8bits) if m set | ||
692 | */ | ||
693 | if (ld.m) imm |= SIGN_EXT9; | ||
694 | |||
695 | /* | ||
696 | * ifa == r3 and we know that the NaT bit on r3 was clear so | ||
697 | * we can directly use ifa. | ||
698 | */ | ||
699 | ifa += imm; | ||
700 | |||
701 | setreg(ld.r3, ifa, 0, regs); | ||
702 | |||
703 | DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa); | ||
704 | |||
705 | } else if (ld.m) { | ||
706 | unsigned long r2; | ||
707 | int nat_r2; | ||
708 | |||
709 | /* | ||
710 | * Load +Reg Opcode: ldXZ r1=[r3],r2 | ||
711 | * | ||
712 | * Note: that we update r3 even in the case of ldfX.a | ||
713 | * (where the load does not happen) | ||
714 | * | ||
715 | * The way the load algorithm works, we know that r3 does not | ||
716 | * have its NaT bit set (would have gotten NaT consumption | ||
717 | * before getting the unaligned fault). So we can use ifa | ||
718 | * which equals r3 at this point. | ||
719 | * | ||
720 | * IMPORTANT: | ||
721 | * The above statement holds ONLY because we know that we | ||
722 | * never reach this code when trying to do a ldX.s. | ||
723 | * If we ever make it to here on an ldfX.s then | ||
724 | */ | ||
725 | getreg(ld.imm, &r2, &nat_r2, regs); | ||
726 | |||
727 | ifa += r2; | ||
728 | |||
729 | /* | ||
730 | * propagate Nat r2 -> r3 | ||
731 | */ | ||
732 | setreg(ld.r3, ifa, nat_r2, regs); | ||
733 | |||
734 | DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2); | ||
735 | } | ||
736 | } | ||
737 | |||
738 | |||
739 | static int | ||
740 | emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) | ||
741 | { | ||
742 | unsigned int len = 1 << ld.x6_sz; | ||
743 | unsigned long val = 0; | ||
744 | |||
745 | /* | ||
746 | * r0, as target, doesn't need to be checked because Illegal Instruction | ||
747 | * faults have higher priority than unaligned faults. | ||
748 | * | ||
749 | * r0 cannot be found as the base as it would never generate an | ||
750 | * unaligned reference. | ||
751 | */ | ||
752 | |||
753 | /* | ||
754 | * ldX.a we will emulate load and also invalidate the ALAT entry. | ||
755 | * See comment below for explanation on how we handle ldX.a | ||
756 | */ | ||
757 | |||
758 | if (len != 2 && len != 4 && len != 8) { | ||
759 | DPRINT("unknown size: x6=%d\n", ld.x6_sz); | ||
760 | return -1; | ||
761 | } | ||
762 | /* this assumes little-endian byte-order: */ | ||
763 | if (copy_from_user(&val, (void __user *) ifa, len)) | ||
764 | return -1; | ||
765 | setreg(ld.r1, val, 0, regs); | ||
766 | |||
767 | /* | ||
768 | * check for updates on any kind of loads | ||
769 | */ | ||
770 | if (ld.op == 0x5 || ld.m) | ||
771 | emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); | ||
772 | |||
773 | /* | ||
774 | * handling of various loads (based on EAS2.4): | ||
775 | * | ||
776 | * ldX.acq (ordered load): | ||
777 | * - acquire semantics would have been used, so force fence instead. | ||
778 | * | ||
779 | * ldX.c.clr (check load and clear): | ||
780 | * - if we get to this handler, it's because the entry was not in the ALAT. | ||
781 | * Therefore the operation reverts to a normal load | ||
782 | * | ||
783 | * ldX.c.nc (check load no clear): | ||
784 | * - same as previous one | ||
785 | * | ||
786 | * ldX.c.clr.acq (ordered check load and clear): | ||
787 | * - same as above for c.clr part. The load needs to have acquire semantics. So | ||
788 | * we use the fence semantics which is stronger and thus ensures correctness. | ||
789 | * | ||
790 | * ldX.a (advanced load): | ||
791 | * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the | ||
792 | * address doesn't match requested size alignment. This means that we would | ||
793 | * possibly need more than one load to get the result. | ||
794 | * | ||
795 | * The load part can be handled just like a normal load, however the difficult | ||
796 | * part is to get the right thing into the ALAT. The critical piece of information | ||
797 | * in the base address of the load & size. To do that, a ld.a must be executed, | ||
798 | * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now | ||
799 | * if we use the same target register, we will be okay for the check.a instruction. | ||
800 | * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry | ||
801 | * which would overlap within [r3,r3+X] (the size of the load was store in the | ||
802 | * ALAT). If such an entry is found the entry is invalidated. But this is not good | ||
803 | * enough, take the following example: | ||
804 | * r3=3 | ||
805 | * ld4.a r1=[r3] | ||
806 | * | ||
807 | * Could be emulated by doing: | ||
808 | * ld1.a r1=[r3],1 | ||
809 | * store to temporary; | ||
810 | * ld1.a r1=[r3],1 | ||
811 | * store & shift to temporary; | ||
812 | * ld1.a r1=[r3],1 | ||
813 | * store & shift to temporary; | ||
814 | * ld1.a r1=[r3] | ||
815 | * store & shift to temporary; | ||
816 | * r1=temporary | ||
817 | * | ||
818 | * So in this case, you would get the right value is r1 but the wrong info in | ||
819 | * the ALAT. Notice that you could do it in reverse to finish with address 3 | ||
820 | * but you would still get the size wrong. To get the size right, one needs to | ||
821 | * execute exactly the same kind of load. You could do it from a aligned | ||
822 | * temporary location, but you would get the address wrong. | ||
823 | * | ||
824 | * So no matter what, it is not possible to emulate an advanced load | ||
825 | * correctly. But is that really critical ? | ||
826 | * | ||
827 | * We will always convert ld.a into a normal load with ALAT invalidated. This | ||
828 | * will enable compiler to do optimization where certain code path after ld.a | ||
829 | * is not required to have ld.c/chk.a, e.g., code path with no intervening stores. | ||
830 | * | ||
831 | * If there is a store after the advanced load, one must either do a ld.c.* or | ||
832 | * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no | ||
833 | * entry found in ALAT), and that's perfectly ok because: | ||
834 | * | ||
835 | * - ld.c.*, if the entry is not present a normal load is executed | ||
836 | * - chk.a.*, if the entry is not present, execution jumps to recovery code | ||
837 | * | ||
838 | * In either case, the load can be potentially retried in another form. | ||
839 | * | ||
840 | * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick | ||
841 | * up a stale entry later). The register base update MUST also be performed. | ||
842 | */ | ||
843 | |||
844 | /* | ||
845 | * when the load has the .acq completer then | ||
846 | * use ordering fence. | ||
847 | */ | ||
848 | if (ld.x6_op == 0x5 || ld.x6_op == 0xa) | ||
849 | mb(); | ||
850 | |||
851 | /* | ||
852 | * invalidate ALAT entry in case of advanced load | ||
853 | */ | ||
854 | if (ld.x6_op == 0x2) | ||
855 | invala_gr(ld.r1); | ||
856 | |||
857 | return 0; | ||
858 | } | ||
859 | |||
860 | static int | ||
861 | emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs) | ||
862 | { | ||
863 | unsigned long r2; | ||
864 | unsigned int len = 1 << ld.x6_sz; | ||
865 | |||
866 | /* | ||
867 | * if we get to this handler, Nat bits on both r3 and r2 have already | ||
868 | * been checked. so we don't need to do it | ||
869 | * | ||
870 | * extract the value to be stored | ||
871 | */ | ||
872 | getreg(ld.imm, &r2, NULL, regs); | ||
873 | |||
874 | /* | ||
875 | * we rely on the macros in unaligned.h for now i.e., | ||
876 | * we let the compiler figure out how to read memory gracefully. | ||
877 | * | ||
878 | * We need this switch/case because the way the inline function | ||
879 | * works. The code is optimized by the compiler and looks like | ||
880 | * a single switch/case. | ||
881 | */ | ||
882 | DPRINT("st%d [%lx]=%lx\n", len, ifa, r2); | ||
883 | |||
884 | if (len != 2 && len != 4 && len != 8) { | ||
885 | DPRINT("unknown size: x6=%d\n", ld.x6_sz); | ||
886 | return -1; | ||
887 | } | ||
888 | |||
889 | /* this assumes little-endian byte-order: */ | ||
890 | if (copy_to_user((void __user *) ifa, &r2, len)) | ||
891 | return -1; | ||
892 | |||
893 | /* | ||
894 | * stX [r3]=r2,imm(9) | ||
895 | * | ||
896 | * NOTE: | ||
897 | * ld.r3 can never be r0, because r0 would not generate an | ||
898 | * unaligned access. | ||
899 | */ | ||
900 | if (ld.op == 0x5) { | ||
901 | unsigned long imm; | ||
902 | |||
903 | /* | ||
904 | * form imm9: [12:6] contain first 7bits | ||
905 | */ | ||
906 | imm = ld.x << 7 | ld.r1; | ||
907 | /* | ||
908 | * sign extend (8bits) if m set | ||
909 | */ | ||
910 | if (ld.m) imm |= SIGN_EXT9; | ||
911 | /* | ||
912 | * ifa == r3 (NaT is necessarily cleared) | ||
913 | */ | ||
914 | ifa += imm; | ||
915 | |||
916 | DPRINT("imm=%lx r3=%lx\n", imm, ifa); | ||
917 | |||
918 | setreg(ld.r3, ifa, 0, regs); | ||
919 | } | ||
920 | /* | ||
921 | * we don't have alat_invalidate_multiple() so we need | ||
922 | * to do the complete flush :-<< | ||
923 | */ | ||
924 | ia64_invala(); | ||
925 | |||
926 | /* | ||
927 | * stX.rel: use fence instead of release | ||
928 | */ | ||
929 | if (ld.x6_op == 0xd) | ||
930 | mb(); | ||
931 | |||
932 | return 0; | ||
933 | } | ||
934 | |||
935 | /* | ||
936 | * floating point operations sizes in bytes | ||
937 | */ | ||
938 | static const unsigned char float_fsz[4]={ | ||
939 | 10, /* extended precision (e) */ | ||
940 | 8, /* integer (8) */ | ||
941 | 4, /* single precision (s) */ | ||
942 | 8 /* double precision (d) */ | ||
943 | }; | ||
944 | |||
945 | static inline void | ||
946 | mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) | ||
947 | { | ||
948 | ia64_ldfe(6, init); | ||
949 | ia64_stop(); | ||
950 | ia64_stf_spill(final, 6); | ||
951 | } | ||
952 | |||
953 | static inline void | ||
954 | mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) | ||
955 | { | ||
956 | ia64_ldf8(6, init); | ||
957 | ia64_stop(); | ||
958 | ia64_stf_spill(final, 6); | ||
959 | } | ||
960 | |||
961 | static inline void | ||
962 | mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final) | ||
963 | { | ||
964 | ia64_ldfs(6, init); | ||
965 | ia64_stop(); | ||
966 | ia64_stf_spill(final, 6); | ||
967 | } | ||
968 | |||
969 | static inline void | ||
970 | mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final) | ||
971 | { | ||
972 | ia64_ldfd(6, init); | ||
973 | ia64_stop(); | ||
974 | ia64_stf_spill(final, 6); | ||
975 | } | ||
976 | |||
977 | static inline void | ||
978 | float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) | ||
979 | { | ||
980 | ia64_ldf_fill(6, init); | ||
981 | ia64_stop(); | ||
982 | ia64_stfe(final, 6); | ||
983 | } | ||
984 | |||
985 | static inline void | ||
986 | float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) | ||
987 | { | ||
988 | ia64_ldf_fill(6, init); | ||
989 | ia64_stop(); | ||
990 | ia64_stf8(final, 6); | ||
991 | } | ||
992 | |||
993 | static inline void | ||
994 | float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final) | ||
995 | { | ||
996 | ia64_ldf_fill(6, init); | ||
997 | ia64_stop(); | ||
998 | ia64_stfs(final, 6); | ||
999 | } | ||
1000 | |||
1001 | static inline void | ||
1002 | float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final) | ||
1003 | { | ||
1004 | ia64_ldf_fill(6, init); | ||
1005 | ia64_stop(); | ||
1006 | ia64_stfd(final, 6); | ||
1007 | } | ||
1008 | |||
1009 | static int | ||
1010 | emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs) | ||
1011 | { | ||
1012 | struct ia64_fpreg fpr_init[2]; | ||
1013 | struct ia64_fpreg fpr_final[2]; | ||
1014 | unsigned long len = float_fsz[ld.x6_sz]; | ||
1015 | |||
1016 | /* | ||
1017 | * fr0 & fr1 don't need to be checked because Illegal Instruction faults have | ||
1018 | * higher priority than unaligned faults. | ||
1019 | * | ||
1020 | * r0 cannot be found as the base as it would never generate an unaligned | ||
1021 | * reference. | ||
1022 | */ | ||
1023 | |||
1024 | /* | ||
1025 | * make sure we get clean buffers | ||
1026 | */ | ||
1027 | memset(&fpr_init, 0, sizeof(fpr_init)); | ||
1028 | memset(&fpr_final, 0, sizeof(fpr_final)); | ||
1029 | |||
1030 | /* | ||
1031 | * ldfpX.a: we don't try to emulate anything but we must | ||
1032 | * invalidate the ALAT entry and execute updates, if any. | ||
1033 | */ | ||
1034 | if (ld.x6_op != 0x2) { | ||
1035 | /* | ||
1036 | * This assumes little-endian byte-order. Note that there is no "ldfpe" | ||
1037 | * instruction: | ||
1038 | */ | ||
1039 | if (copy_from_user(&fpr_init[0], (void __user *) ifa, len) | ||
1040 | || copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len)) | ||
1041 | return -1; | ||
1042 | |||
1043 | DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz); | ||
1044 | DDUMP("frp_init =", &fpr_init, 2*len); | ||
1045 | /* | ||
1046 | * XXX fixme | ||
1047 | * Could optimize inlines by using ldfpX & 2 spills | ||
1048 | */ | ||
1049 | switch( ld.x6_sz ) { | ||
1050 | case 0: | ||
1051 | mem2float_extended(&fpr_init[0], &fpr_final[0]); | ||
1052 | mem2float_extended(&fpr_init[1], &fpr_final[1]); | ||
1053 | break; | ||
1054 | case 1: | ||
1055 | mem2float_integer(&fpr_init[0], &fpr_final[0]); | ||
1056 | mem2float_integer(&fpr_init[1], &fpr_final[1]); | ||
1057 | break; | ||
1058 | case 2: | ||
1059 | mem2float_single(&fpr_init[0], &fpr_final[0]); | ||
1060 | mem2float_single(&fpr_init[1], &fpr_final[1]); | ||
1061 | break; | ||
1062 | case 3: | ||
1063 | mem2float_double(&fpr_init[0], &fpr_final[0]); | ||
1064 | mem2float_double(&fpr_init[1], &fpr_final[1]); | ||
1065 | break; | ||
1066 | } | ||
1067 | DDUMP("fpr_final =", &fpr_final, 2*len); | ||
1068 | /* | ||
1069 | * XXX fixme | ||
1070 | * | ||
1071 | * A possible optimization would be to drop fpr_final and directly | ||
1072 | * use the storage from the saved context i.e., the actual final | ||
1073 | * destination (pt_regs, switch_stack or thread structure). | ||
1074 | */ | ||
1075 | setfpreg(ld.r1, &fpr_final[0], regs); | ||
1076 | setfpreg(ld.imm, &fpr_final[1], regs); | ||
1077 | } | ||
1078 | |||
1079 | /* | ||
1080 | * Check for updates: only immediate updates are available for this | ||
1081 | * instruction. | ||
1082 | */ | ||
1083 | if (ld.m) { | ||
1084 | /* | ||
1085 | * the immediate is implicit given the ldsz of the operation: | ||
1086 | * single: 8 (2x4) and for all others it's 16 (2x8) | ||
1087 | */ | ||
1088 | ifa += len<<1; | ||
1089 | |||
1090 | /* | ||
1091 | * IMPORTANT: | ||
1092 | * the fact that we force the NaT of r3 to zero is ONLY valid | ||
1093 | * as long as we don't come here with a ldfpX.s. | ||
1094 | * For this reason we keep this sanity check | ||
1095 | */ | ||
1096 | if (ld.x6_op == 1 || ld.x6_op == 3) | ||
1097 | printk(KERN_ERR "%s: register update on speculative load pair, error\n", | ||
1098 | __FUNCTION__); | ||
1099 | |||
1100 | setreg(ld.r3, ifa, 0, regs); | ||
1101 | } | ||
1102 | |||
1103 | /* | ||
1104 | * Invalidate ALAT entries, if any, for both registers. | ||
1105 | */ | ||
1106 | if (ld.x6_op == 0x2) { | ||
1107 | invala_fr(ld.r1); | ||
1108 | invala_fr(ld.imm); | ||
1109 | } | ||
1110 | return 0; | ||
1111 | } | ||
1112 | |||
1113 | |||
1114 | static int | ||
1115 | emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) | ||
1116 | { | ||
1117 | struct ia64_fpreg fpr_init; | ||
1118 | struct ia64_fpreg fpr_final; | ||
1119 | unsigned long len = float_fsz[ld.x6_sz]; | ||
1120 | |||
1121 | /* | ||
1122 | * fr0 & fr1 don't need to be checked because Illegal Instruction | ||
1123 | * faults have higher priority than unaligned faults. | ||
1124 | * | ||
1125 | * r0 cannot be found as the base as it would never generate an | ||
1126 | * unaligned reference. | ||
1127 | */ | ||
1128 | |||
1129 | /* | ||
1130 | * make sure we get clean buffers | ||
1131 | */ | ||
1132 | memset(&fpr_init,0, sizeof(fpr_init)); | ||
1133 | memset(&fpr_final,0, sizeof(fpr_final)); | ||
1134 | |||
1135 | /* | ||
1136 | * ldfX.a we don't try to emulate anything but we must | ||
1137 | * invalidate the ALAT entry. | ||
1138 | * See comments in ldX for descriptions on how the various loads are handled. | ||
1139 | */ | ||
1140 | if (ld.x6_op != 0x2) { | ||
1141 | if (copy_from_user(&fpr_init, (void __user *) ifa, len)) | ||
1142 | return -1; | ||
1143 | |||
1144 | DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); | ||
1145 | DDUMP("fpr_init =", &fpr_init, len); | ||
1146 | /* | ||
1147 | * we only do something for x6_op={0,8,9} | ||
1148 | */ | ||
1149 | switch( ld.x6_sz ) { | ||
1150 | case 0: | ||
1151 | mem2float_extended(&fpr_init, &fpr_final); | ||
1152 | break; | ||
1153 | case 1: | ||
1154 | mem2float_integer(&fpr_init, &fpr_final); | ||
1155 | break; | ||
1156 | case 2: | ||
1157 | mem2float_single(&fpr_init, &fpr_final); | ||
1158 | break; | ||
1159 | case 3: | ||
1160 | mem2float_double(&fpr_init, &fpr_final); | ||
1161 | break; | ||
1162 | } | ||
1163 | DDUMP("fpr_final =", &fpr_final, len); | ||
1164 | /* | ||
1165 | * XXX fixme | ||
1166 | * | ||
1167 | * A possible optimization would be to drop fpr_final and directly | ||
1168 | * use the storage from the saved context i.e., the actual final | ||
1169 | * destination (pt_regs, switch_stack or thread structure). | ||
1170 | */ | ||
1171 | setfpreg(ld.r1, &fpr_final, regs); | ||
1172 | } | ||
1173 | |||
1174 | /* | ||
1175 | * check for updates on any loads | ||
1176 | */ | ||
1177 | if (ld.op == 0x7 || ld.m) | ||
1178 | emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); | ||
1179 | |||
1180 | /* | ||
1181 | * invalidate ALAT entry in case of advanced floating point loads | ||
1182 | */ | ||
1183 | if (ld.x6_op == 0x2) | ||
1184 | invala_fr(ld.r1); | ||
1185 | |||
1186 | return 0; | ||
1187 | } | ||
1188 | |||
1189 | |||
1190 | static int | ||
1191 | emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs) | ||
1192 | { | ||
1193 | struct ia64_fpreg fpr_init; | ||
1194 | struct ia64_fpreg fpr_final; | ||
1195 | unsigned long len = float_fsz[ld.x6_sz]; | ||
1196 | |||
1197 | /* | ||
1198 | * make sure we get clean buffers | ||
1199 | */ | ||
1200 | memset(&fpr_init,0, sizeof(fpr_init)); | ||
1201 | memset(&fpr_final,0, sizeof(fpr_final)); | ||
1202 | |||
1203 | /* | ||
1204 | * if we get to this handler, Nat bits on both r3 and r2 have already | ||
1205 | * been checked. so we don't need to do it | ||
1206 | * | ||
1207 | * extract the value to be stored | ||
1208 | */ | ||
1209 | getfpreg(ld.imm, &fpr_init, regs); | ||
1210 | /* | ||
1211 | * during this step, we extract the spilled registers from the saved | ||
1212 | * context i.e., we refill. Then we store (no spill) to temporary | ||
1213 | * aligned location | ||
1214 | */ | ||
1215 | switch( ld.x6_sz ) { | ||
1216 | case 0: | ||
1217 | float2mem_extended(&fpr_init, &fpr_final); | ||
1218 | break; | ||
1219 | case 1: | ||
1220 | float2mem_integer(&fpr_init, &fpr_final); | ||
1221 | break; | ||
1222 | case 2: | ||
1223 | float2mem_single(&fpr_init, &fpr_final); | ||
1224 | break; | ||
1225 | case 3: | ||
1226 | float2mem_double(&fpr_init, &fpr_final); | ||
1227 | break; | ||
1228 | } | ||
1229 | DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); | ||
1230 | DDUMP("fpr_init =", &fpr_init, len); | ||
1231 | DDUMP("fpr_final =", &fpr_final, len); | ||
1232 | |||
1233 | if (copy_to_user((void __user *) ifa, &fpr_final, len)) | ||
1234 | return -1; | ||
1235 | |||
1236 | /* | ||
1237 | * stfX [r3]=r2,imm(9) | ||
1238 | * | ||
1239 | * NOTE: | ||
1240 | * ld.r3 can never be r0, because r0 would not generate an | ||
1241 | * unaligned access. | ||
1242 | */ | ||
1243 | if (ld.op == 0x7) { | ||
1244 | unsigned long imm; | ||
1245 | |||
1246 | /* | ||
1247 | * form imm9: [12:6] contain first 7bits | ||
1248 | */ | ||
1249 | imm = ld.x << 7 | ld.r1; | ||
1250 | /* | ||
1251 | * sign extend (8bits) if m set | ||
1252 | */ | ||
1253 | if (ld.m) | ||
1254 | imm |= SIGN_EXT9; | ||
1255 | /* | ||
1256 | * ifa == r3 (NaT is necessarily cleared) | ||
1257 | */ | ||
1258 | ifa += imm; | ||
1259 | |||
1260 | DPRINT("imm=%lx r3=%lx\n", imm, ifa); | ||
1261 | |||
1262 | setreg(ld.r3, ifa, 0, regs); | ||
1263 | } | ||
1264 | /* | ||
1265 | * we don't have alat_invalidate_multiple() so we need | ||
1266 | * to do the complete flush :-<< | ||
1267 | */ | ||
1268 | ia64_invala(); | ||
1269 | |||
1270 | return 0; | ||
1271 | } | ||
1272 | |||
1273 | /* | ||
1274 | * Make sure we log the unaligned access, so that user/sysadmin can notice it and | ||
1275 | * eventually fix the program. However, we don't want to do that for every access so we | ||
1276 | * pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be | ||
1277 | * either... | ||
1278 | */ | ||
1279 | static int | ||
1280 | within_logging_rate_limit (void) | ||
1281 | { | ||
1282 | static unsigned long count, last_time; | ||
1283 | |||
1284 | if (jiffies - last_time > 5*HZ) | ||
1285 | count = 0; | ||
1286 | if (++count < 5) { | ||
1287 | last_time = jiffies; | ||
1288 | return 1; | ||
1289 | } | ||
1290 | return 0; | ||
1291 | |||
1292 | } | ||
1293 | |||
1294 | void | ||
1295 | ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) | ||
1296 | { | ||
1297 | struct ia64_psr *ipsr = ia64_psr(regs); | ||
1298 | mm_segment_t old_fs = get_fs(); | ||
1299 | unsigned long bundle[2]; | ||
1300 | unsigned long opcode; | ||
1301 | struct siginfo si; | ||
1302 | const struct exception_table_entry *eh = NULL; | ||
1303 | union { | ||
1304 | unsigned long l; | ||
1305 | load_store_t insn; | ||
1306 | } u; | ||
1307 | int ret = -1; | ||
1308 | |||
1309 | if (ia64_psr(regs)->be) { | ||
1310 | /* we don't support big-endian accesses */ | ||
1311 | die_if_kernel("big-endian unaligned accesses are not supported", regs, 0); | ||
1312 | goto force_sigbus; | ||
1313 | } | ||
1314 | |||
1315 | /* | ||
1316 | * Treat kernel accesses for which there is an exception handler entry the same as | ||
1317 | * user-level unaligned accesses. Otherwise, a clever program could trick this | ||
1318 | * handler into reading an arbitrary kernel addresses... | ||
1319 | */ | ||
1320 | if (!user_mode(regs)) | ||
1321 | eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri); | ||
1322 | if (user_mode(regs) || eh) { | ||
1323 | if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0) | ||
1324 | goto force_sigbus; | ||
1325 | |||
1326 | if (!(current->thread.flags & IA64_THREAD_UAC_NOPRINT) | ||
1327 | && within_logging_rate_limit()) | ||
1328 | { | ||
1329 | char buf[200]; /* comm[] is at most 16 bytes... */ | ||
1330 | size_t len; | ||
1331 | |||
1332 | len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, " | ||
1333 | "ip=0x%016lx\n\r", current->comm, current->pid, | ||
1334 | ifa, regs->cr_iip + ipsr->ri); | ||
1335 | /* | ||
1336 | * Don't call tty_write_message() if we're in the kernel; we might | ||
1337 | * be holding locks... | ||
1338 | */ | ||
1339 | if (user_mode(regs)) | ||
1340 | tty_write_message(current->signal->tty, buf); | ||
1341 | buf[len-1] = '\0'; /* drop '\r' */ | ||
1342 | printk(KERN_WARNING "%s", buf); /* watch for command names containing %s */ | ||
1343 | } | ||
1344 | } else { | ||
1345 | if (within_logging_rate_limit()) | ||
1346 | printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n", | ||
1347 | ifa, regs->cr_iip + ipsr->ri); | ||
1348 | set_fs(KERNEL_DS); | ||
1349 | } | ||
1350 | |||
1351 | DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n", | ||
1352 | regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it); | ||
1353 | |||
1354 | if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16)) | ||
1355 | goto failure; | ||
1356 | |||
1357 | /* | ||
1358 | * extract the instruction from the bundle given the slot number | ||
1359 | */ | ||
1360 | switch (ipsr->ri) { | ||
1361 | case 0: u.l = (bundle[0] >> 5); break; | ||
1362 | case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break; | ||
1363 | case 2: u.l = (bundle[1] >> 23); break; | ||
1364 | } | ||
1365 | opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK; | ||
1366 | |||
1367 | DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d " | ||
1368 | "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm, | ||
1369 | u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op); | ||
1370 | |||
1371 | /* | ||
1372 | * IMPORTANT: | ||
1373 | * Notice that the switch statement DOES not cover all possible instructions | ||
1374 | * that DO generate unaligned references. This is made on purpose because for some | ||
1375 | * instructions it DOES NOT make sense to try and emulate the access. Sometimes it | ||
1376 | * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e., | ||
1377 | * the program will get a signal and die: | ||
1378 | * | ||
1379 | * load/store: | ||
1380 | * - ldX.spill | ||
1381 | * - stX.spill | ||
1382 | * Reason: RNATs are based on addresses | ||
1383 | * - ld16 | ||
1384 | * - st16 | ||
1385 | * Reason: ld16 and st16 are supposed to occur in a single | ||
1386 | * memory op | ||
1387 | * | ||
1388 | * synchronization: | ||
1389 | * - cmpxchg | ||
1390 | * - fetchadd | ||
1391 | * - xchg | ||
1392 | * Reason: ATOMIC operations cannot be emulated properly using multiple | ||
1393 | * instructions. | ||
1394 | * | ||
1395 | * speculative loads: | ||
1396 | * - ldX.sZ | ||
1397 | * Reason: side effects, code must be ready to deal with failure so simpler | ||
1398 | * to let the load fail. | ||
1399 | * --------------------------------------------------------------------------------- | ||
1400 | * XXX fixme | ||
1401 | * | ||
1402 | * I would like to get rid of this switch case and do something | ||
1403 | * more elegant. | ||
1404 | */ | ||
1405 | switch (opcode) { | ||
1406 | case LDS_OP: | ||
1407 | case LDSA_OP: | ||
1408 | if (u.insn.x) | ||
1409 | /* oops, really a semaphore op (cmpxchg, etc) */ | ||
1410 | goto failure; | ||
1411 | /* no break */ | ||
1412 | case LDS_IMM_OP: | ||
1413 | case LDSA_IMM_OP: | ||
1414 | case LDFS_OP: | ||
1415 | case LDFSA_OP: | ||
1416 | case LDFS_IMM_OP: | ||
1417 | /* | ||
1418 | * The instruction will be retried with deferred exceptions turned on, and | ||
1419 | * we should get Nat bit installed | ||
1420 | * | ||
1421 | * IMPORTANT: When PSR_ED is set, the register & immediate update forms | ||
1422 | * are actually executed even though the operation failed. So we don't | ||
1423 | * need to take care of this. | ||
1424 | */ | ||
1425 | DPRINT("forcing PSR_ED\n"); | ||
1426 | regs->cr_ipsr |= IA64_PSR_ED; | ||
1427 | goto done; | ||
1428 | |||
1429 | case LD_OP: | ||
1430 | case LDA_OP: | ||
1431 | case LDBIAS_OP: | ||
1432 | case LDACQ_OP: | ||
1433 | case LDCCLR_OP: | ||
1434 | case LDCNC_OP: | ||
1435 | case LDCCLRACQ_OP: | ||
1436 | if (u.insn.x) | ||
1437 | /* oops, really a semaphore op (cmpxchg, etc) */ | ||
1438 | goto failure; | ||
1439 | /* no break */ | ||
1440 | case LD_IMM_OP: | ||
1441 | case LDA_IMM_OP: | ||
1442 | case LDBIAS_IMM_OP: | ||
1443 | case LDACQ_IMM_OP: | ||
1444 | case LDCCLR_IMM_OP: | ||
1445 | case LDCNC_IMM_OP: | ||
1446 | case LDCCLRACQ_IMM_OP: | ||
1447 | ret = emulate_load_int(ifa, u.insn, regs); | ||
1448 | break; | ||
1449 | |||
1450 | case ST_OP: | ||
1451 | case STREL_OP: | ||
1452 | if (u.insn.x) | ||
1453 | /* oops, really a semaphore op (cmpxchg, etc) */ | ||
1454 | goto failure; | ||
1455 | /* no break */ | ||
1456 | case ST_IMM_OP: | ||
1457 | case STREL_IMM_OP: | ||
1458 | ret = emulate_store_int(ifa, u.insn, regs); | ||
1459 | break; | ||
1460 | |||
1461 | case LDF_OP: | ||
1462 | case LDFA_OP: | ||
1463 | case LDFCCLR_OP: | ||
1464 | case LDFCNC_OP: | ||
1465 | case LDF_IMM_OP: | ||
1466 | case LDFA_IMM_OP: | ||
1467 | case LDFCCLR_IMM_OP: | ||
1468 | case LDFCNC_IMM_OP: | ||
1469 | if (u.insn.x) | ||
1470 | ret = emulate_load_floatpair(ifa, u.insn, regs); | ||
1471 | else | ||
1472 | ret = emulate_load_float(ifa, u.insn, regs); | ||
1473 | break; | ||
1474 | |||
1475 | case STF_OP: | ||
1476 | case STF_IMM_OP: | ||
1477 | ret = emulate_store_float(ifa, u.insn, regs); | ||
1478 | break; | ||
1479 | |||
1480 | default: | ||
1481 | goto failure; | ||
1482 | } | ||
1483 | DPRINT("ret=%d\n", ret); | ||
1484 | if (ret) | ||
1485 | goto failure; | ||
1486 | |||
1487 | if (ipsr->ri == 2) | ||
1488 | /* | ||
1489 | * given today's architecture this case is not likely to happen because a | ||
1490 | * memory access instruction (M) can never be in the last slot of a | ||
1491 | * bundle. But let's keep it for now. | ||
1492 | */ | ||
1493 | regs->cr_iip += 16; | ||
1494 | ipsr->ri = (ipsr->ri + 1) & 0x3; | ||
1495 | |||
1496 | DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip); | ||
1497 | done: | ||
1498 | set_fs(old_fs); /* restore original address limit */ | ||
1499 | return; | ||
1500 | |||
1501 | failure: | ||
1502 | /* something went wrong... */ | ||
1503 | if (!user_mode(regs)) { | ||
1504 | if (eh) { | ||
1505 | ia64_handle_exception(regs, eh); | ||
1506 | goto done; | ||
1507 | } | ||
1508 | die_if_kernel("error during unaligned kernel access\n", regs, ret); | ||
1509 | /* NOT_REACHED */ | ||
1510 | } | ||
1511 | force_sigbus: | ||
1512 | si.si_signo = SIGBUS; | ||
1513 | si.si_errno = 0; | ||
1514 | si.si_code = BUS_ADRALN; | ||
1515 | si.si_addr = (void __user *) ifa; | ||
1516 | si.si_flags = 0; | ||
1517 | si.si_isr = 0; | ||
1518 | si.si_imm = 0; | ||
1519 | force_sig_info(SIGBUS, &si, current); | ||
1520 | goto done; | ||
1521 | } | ||
diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c new file mode 100644 index 000000000000..d494ff647cac --- /dev/null +++ b/arch/ia64/kernel/unwind.c | |||
@@ -0,0 +1,2306 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1999-2004 Hewlett-Packard Co | ||
3 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
4 | * Copyright (C) 2003 Fenghua Yu <fenghua.yu@intel.com> | ||
5 | * - Change pt_regs_off() to make it less dependant on pt_regs structure. | ||
6 | */ | ||
7 | /* | ||
8 | * This file implements call frame unwind support for the Linux | ||
9 | * kernel. Parsing and processing the unwind information is | ||
10 | * time-consuming, so this implementation translates the unwind | ||
11 | * descriptors into unwind scripts. These scripts are very simple | ||
12 | * (basically a sequence of assignments) and efficient to execute. | ||
13 | * They are cached for later re-use. Each script is specific for a | ||
14 | * given instruction pointer address and the set of predicate values | ||
15 | * that the script depends on (most unwind descriptors are | ||
16 | * unconditional and scripts often do not depend on predicates at | ||
17 | * all). This code is based on the unwind conventions described in | ||
18 | * the "IA-64 Software Conventions and Runtime Architecture" manual. | ||
19 | * | ||
20 | * SMP conventions: | ||
21 | * o updates to the global unwind data (in structure "unw") are serialized | ||
22 | * by the unw.lock spinlock | ||
23 | * o each unwind script has its own read-write lock; a thread must acquire | ||
24 | * a read lock before executing a script and must acquire a write lock | ||
25 | * before modifying a script | ||
26 | * o if both the unw.lock spinlock and a script's read-write lock must be | ||
27 | * acquired, then the read-write lock must be acquired first. | ||
28 | */ | ||
29 | #include <linux/module.h> | ||
30 | #include <linux/bootmem.h> | ||
31 | #include <linux/elf.h> | ||
32 | #include <linux/kernel.h> | ||
33 | #include <linux/sched.h> | ||
34 | #include <linux/slab.h> | ||
35 | |||
36 | #include <asm/unwind.h> | ||
37 | |||
38 | #include <asm/delay.h> | ||
39 | #include <asm/page.h> | ||
40 | #include <asm/ptrace.h> | ||
41 | #include <asm/ptrace_offsets.h> | ||
42 | #include <asm/rse.h> | ||
43 | #include <asm/sections.h> | ||
44 | #include <asm/system.h> | ||
45 | #include <asm/uaccess.h> | ||
46 | |||
47 | #include "entry.h" | ||
48 | #include "unwind_i.h" | ||
49 | |||
50 | #define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */ | ||
51 | #define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE) | ||
52 | |||
53 | #define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1) | ||
54 | #define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE) | ||
55 | |||
56 | #define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */ | ||
57 | |||
58 | #ifdef UNW_DEBUG | ||
59 | static unsigned int unw_debug_level = UNW_DEBUG; | ||
60 | # define UNW_DEBUG_ON(n) unw_debug_level >= n | ||
61 | /* Do not code a printk level, not all debug lines end in newline */ | ||
62 | # define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__) | ||
63 | # define inline | ||
64 | #else /* !UNW_DEBUG */ | ||
65 | # define UNW_DEBUG_ON(n) 0 | ||
66 | # define UNW_DPRINT(n, ...) | ||
67 | #endif /* UNW_DEBUG */ | ||
68 | |||
69 | #if UNW_STATS | ||
70 | # define STAT(x...) x | ||
71 | #else | ||
72 | # define STAT(x...) | ||
73 | #endif | ||
74 | |||
75 | #define alloc_reg_state() kmalloc(sizeof(struct unw_reg_state), GFP_ATOMIC) | ||
76 | #define free_reg_state(usr) kfree(usr) | ||
77 | #define alloc_labeled_state() kmalloc(sizeof(struct unw_labeled_state), GFP_ATOMIC) | ||
78 | #define free_labeled_state(usr) kfree(usr) | ||
79 | |||
80 | typedef unsigned long unw_word; | ||
81 | typedef unsigned char unw_hash_index_t; | ||
82 | |||
83 | static struct { | ||
84 | spinlock_t lock; /* spinlock for unwind data */ | ||
85 | |||
86 | /* list of unwind tables (one per load-module) */ | ||
87 | struct unw_table *tables; | ||
88 | |||
89 | unsigned long r0; /* constant 0 for r0 */ | ||
90 | |||
91 | /* table of registers that prologues can save (and order in which they're saved): */ | ||
92 | const unsigned char save_order[8]; | ||
93 | |||
94 | /* maps a preserved register index (preg_index) to corresponding switch_stack offset: */ | ||
95 | unsigned short sw_off[sizeof(struct unw_frame_info) / 8]; | ||
96 | |||
97 | unsigned short lru_head; /* index of lead-recently used script */ | ||
98 | unsigned short lru_tail; /* index of most-recently used script */ | ||
99 | |||
100 | /* index into unw_frame_info for preserved register i */ | ||
101 | unsigned short preg_index[UNW_NUM_REGS]; | ||
102 | |||
103 | short pt_regs_offsets[32]; | ||
104 | |||
105 | /* unwind table for the kernel: */ | ||
106 | struct unw_table kernel_table; | ||
107 | |||
108 | /* unwind table describing the gate page (kernel code that is mapped into user space): */ | ||
109 | size_t gate_table_size; | ||
110 | unsigned long *gate_table; | ||
111 | |||
112 | /* hash table that maps instruction pointer to script index: */ | ||
113 | unsigned short hash[UNW_HASH_SIZE]; | ||
114 | |||
115 | /* script cache: */ | ||
116 | struct unw_script cache[UNW_CACHE_SIZE]; | ||
117 | |||
118 | # ifdef UNW_DEBUG | ||
119 | const char *preg_name[UNW_NUM_REGS]; | ||
120 | # endif | ||
121 | # if UNW_STATS | ||
122 | struct { | ||
123 | struct { | ||
124 | int lookups; | ||
125 | int hinted_hits; | ||
126 | int normal_hits; | ||
127 | int collision_chain_traversals; | ||
128 | } cache; | ||
129 | struct { | ||
130 | unsigned long build_time; | ||
131 | unsigned long run_time; | ||
132 | unsigned long parse_time; | ||
133 | int builds; | ||
134 | int news; | ||
135 | int collisions; | ||
136 | int runs; | ||
137 | } script; | ||
138 | struct { | ||
139 | unsigned long init_time; | ||
140 | unsigned long unwind_time; | ||
141 | int inits; | ||
142 | int unwinds; | ||
143 | } api; | ||
144 | } stat; | ||
145 | # endif | ||
146 | } unw = { | ||
147 | .tables = &unw.kernel_table, | ||
148 | .lock = SPIN_LOCK_UNLOCKED, | ||
149 | .save_order = { | ||
150 | UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, | ||
151 | UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR | ||
152 | }, | ||
153 | .preg_index = { | ||
154 | offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_GR */ | ||
155 | offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_MEM */ | ||
156 | offsetof(struct unw_frame_info, bsp_loc)/8, | ||
157 | offsetof(struct unw_frame_info, bspstore_loc)/8, | ||
158 | offsetof(struct unw_frame_info, pfs_loc)/8, | ||
159 | offsetof(struct unw_frame_info, rnat_loc)/8, | ||
160 | offsetof(struct unw_frame_info, psp)/8, | ||
161 | offsetof(struct unw_frame_info, rp_loc)/8, | ||
162 | offsetof(struct unw_frame_info, r4)/8, | ||
163 | offsetof(struct unw_frame_info, r5)/8, | ||
164 | offsetof(struct unw_frame_info, r6)/8, | ||
165 | offsetof(struct unw_frame_info, r7)/8, | ||
166 | offsetof(struct unw_frame_info, unat_loc)/8, | ||
167 | offsetof(struct unw_frame_info, pr_loc)/8, | ||
168 | offsetof(struct unw_frame_info, lc_loc)/8, | ||
169 | offsetof(struct unw_frame_info, fpsr_loc)/8, | ||
170 | offsetof(struct unw_frame_info, b1_loc)/8, | ||
171 | offsetof(struct unw_frame_info, b2_loc)/8, | ||
172 | offsetof(struct unw_frame_info, b3_loc)/8, | ||
173 | offsetof(struct unw_frame_info, b4_loc)/8, | ||
174 | offsetof(struct unw_frame_info, b5_loc)/8, | ||
175 | offsetof(struct unw_frame_info, f2_loc)/8, | ||
176 | offsetof(struct unw_frame_info, f3_loc)/8, | ||
177 | offsetof(struct unw_frame_info, f4_loc)/8, | ||
178 | offsetof(struct unw_frame_info, f5_loc)/8, | ||
179 | offsetof(struct unw_frame_info, fr_loc[16 - 16])/8, | ||
180 | offsetof(struct unw_frame_info, fr_loc[17 - 16])/8, | ||
181 | offsetof(struct unw_frame_info, fr_loc[18 - 16])/8, | ||
182 | offsetof(struct unw_frame_info, fr_loc[19 - 16])/8, | ||
183 | offsetof(struct unw_frame_info, fr_loc[20 - 16])/8, | ||
184 | offsetof(struct unw_frame_info, fr_loc[21 - 16])/8, | ||
185 | offsetof(struct unw_frame_info, fr_loc[22 - 16])/8, | ||
186 | offsetof(struct unw_frame_info, fr_loc[23 - 16])/8, | ||
187 | offsetof(struct unw_frame_info, fr_loc[24 - 16])/8, | ||
188 | offsetof(struct unw_frame_info, fr_loc[25 - 16])/8, | ||
189 | offsetof(struct unw_frame_info, fr_loc[26 - 16])/8, | ||
190 | offsetof(struct unw_frame_info, fr_loc[27 - 16])/8, | ||
191 | offsetof(struct unw_frame_info, fr_loc[28 - 16])/8, | ||
192 | offsetof(struct unw_frame_info, fr_loc[29 - 16])/8, | ||
193 | offsetof(struct unw_frame_info, fr_loc[30 - 16])/8, | ||
194 | offsetof(struct unw_frame_info, fr_loc[31 - 16])/8, | ||
195 | }, | ||
196 | .pt_regs_offsets = { | ||
197 | [0] = -1, | ||
198 | offsetof(struct pt_regs, r1), | ||
199 | offsetof(struct pt_regs, r2), | ||
200 | offsetof(struct pt_regs, r3), | ||
201 | [4] = -1, [5] = -1, [6] = -1, [7] = -1, | ||
202 | offsetof(struct pt_regs, r8), | ||
203 | offsetof(struct pt_regs, r9), | ||
204 | offsetof(struct pt_regs, r10), | ||
205 | offsetof(struct pt_regs, r11), | ||
206 | offsetof(struct pt_regs, r12), | ||
207 | offsetof(struct pt_regs, r13), | ||
208 | offsetof(struct pt_regs, r14), | ||
209 | offsetof(struct pt_regs, r15), | ||
210 | offsetof(struct pt_regs, r16), | ||
211 | offsetof(struct pt_regs, r17), | ||
212 | offsetof(struct pt_regs, r18), | ||
213 | offsetof(struct pt_regs, r19), | ||
214 | offsetof(struct pt_regs, r20), | ||
215 | offsetof(struct pt_regs, r21), | ||
216 | offsetof(struct pt_regs, r22), | ||
217 | offsetof(struct pt_regs, r23), | ||
218 | offsetof(struct pt_regs, r24), | ||
219 | offsetof(struct pt_regs, r25), | ||
220 | offsetof(struct pt_regs, r26), | ||
221 | offsetof(struct pt_regs, r27), | ||
222 | offsetof(struct pt_regs, r28), | ||
223 | offsetof(struct pt_regs, r29), | ||
224 | offsetof(struct pt_regs, r30), | ||
225 | offsetof(struct pt_regs, r31), | ||
226 | }, | ||
227 | .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 }, | ||
228 | #ifdef UNW_DEBUG | ||
229 | .preg_name = { | ||
230 | "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", "ar.rnat", "psp", "rp", | ||
231 | "r4", "r5", "r6", "r7", | ||
232 | "ar.unat", "pr", "ar.lc", "ar.fpsr", | ||
233 | "b1", "b2", "b3", "b4", "b5", | ||
234 | "f2", "f3", "f4", "f5", | ||
235 | "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", | ||
236 | "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" | ||
237 | } | ||
238 | #endif | ||
239 | }; | ||
240 | |||
241 | static inline int | ||
242 | read_only (void *addr) | ||
243 | { | ||
244 | return (unsigned long) ((char *) addr - (char *) &unw.r0) < sizeof(unw.r0); | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * Returns offset of rREG in struct pt_regs. | ||
249 | */ | ||
250 | static inline unsigned long | ||
251 | pt_regs_off (unsigned long reg) | ||
252 | { | ||
253 | short off = -1; | ||
254 | |||
255 | if (reg < ARRAY_SIZE(unw.pt_regs_offsets)) | ||
256 | off = unw.pt_regs_offsets[reg]; | ||
257 | |||
258 | if (off < 0) { | ||
259 | UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __FUNCTION__, reg); | ||
260 | off = 0; | ||
261 | } | ||
262 | return (unsigned long) off; | ||
263 | } | ||
264 | |||
265 | static inline struct pt_regs * | ||
266 | get_scratch_regs (struct unw_frame_info *info) | ||
267 | { | ||
268 | if (!info->pt) { | ||
269 | /* This should not happen with valid unwind info. */ | ||
270 | UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __FUNCTION__); | ||
271 | if (info->flags & UNW_FLAG_INTERRUPT_FRAME) | ||
272 | info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1); | ||
273 | else | ||
274 | info->pt = info->sp - 16; | ||
275 | } | ||
276 | UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __FUNCTION__, info->sp, info->pt); | ||
277 | return (struct pt_regs *) info->pt; | ||
278 | } | ||
279 | |||
280 | /* Unwind accessors. */ | ||
281 | |||
282 | int | ||
283 | unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char *nat, int write) | ||
284 | { | ||
285 | unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat; | ||
286 | struct unw_ireg *ireg; | ||
287 | struct pt_regs *pt; | ||
288 | |||
289 | if ((unsigned) regnum - 1 >= 127) { | ||
290 | if (regnum == 0 && !write) { | ||
291 | *val = 0; /* read r0 always returns 0 */ | ||
292 | *nat = 0; | ||
293 | return 0; | ||
294 | } | ||
295 | UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n", | ||
296 | __FUNCTION__, regnum); | ||
297 | return -1; | ||
298 | } | ||
299 | |||
300 | if (regnum < 32) { | ||
301 | if (regnum >= 4 && regnum <= 7) { | ||
302 | /* access a preserved register */ | ||
303 | ireg = &info->r4 + (regnum - 4); | ||
304 | addr = ireg->loc; | ||
305 | if (addr) { | ||
306 | nat_addr = addr + ireg->nat.off; | ||
307 | switch (ireg->nat.type) { | ||
308 | case UNW_NAT_VAL: | ||
309 | /* simulate getf.sig/setf.sig */ | ||
310 | if (write) { | ||
311 | if (*nat) { | ||
312 | /* write NaTVal and be done with it */ | ||
313 | addr[0] = 0; | ||
314 | addr[1] = 0x1fffe; | ||
315 | return 0; | ||
316 | } | ||
317 | addr[1] = 0x1003e; | ||
318 | } else { | ||
319 | if (addr[0] == 0 && addr[1] == 0x1ffe) { | ||
320 | /* return NaT and be done with it */ | ||
321 | *val = 0; | ||
322 | *nat = 1; | ||
323 | return 0; | ||
324 | } | ||
325 | } | ||
326 | /* fall through */ | ||
327 | case UNW_NAT_NONE: | ||
328 | dummy_nat = 0; | ||
329 | nat_addr = &dummy_nat; | ||
330 | break; | ||
331 | |||
332 | case UNW_NAT_MEMSTK: | ||
333 | nat_mask = (1UL << ((long) addr & 0x1f8)/8); | ||
334 | break; | ||
335 | |||
336 | case UNW_NAT_REGSTK: | ||
337 | nat_addr = ia64_rse_rnat_addr(addr); | ||
338 | if ((unsigned long) addr < info->regstk.limit | ||
339 | || (unsigned long) addr >= info->regstk.top) | ||
340 | { | ||
341 | UNW_DPRINT(0, "unwind.%s: %p outside of regstk " | ||
342 | "[0x%lx-0x%lx)\n", | ||
343 | __FUNCTION__, (void *) addr, | ||
344 | info->regstk.limit, | ||
345 | info->regstk.top); | ||
346 | return -1; | ||
347 | } | ||
348 | if ((unsigned long) nat_addr >= info->regstk.top) | ||
349 | nat_addr = &info->sw->ar_rnat; | ||
350 | nat_mask = (1UL << ia64_rse_slot_num(addr)); | ||
351 | break; | ||
352 | } | ||
353 | } else { | ||
354 | addr = &info->sw->r4 + (regnum - 4); | ||
355 | nat_addr = &info->sw->ar_unat; | ||
356 | nat_mask = (1UL << ((long) addr & 0x1f8)/8); | ||
357 | } | ||
358 | } else { | ||
359 | /* access a scratch register */ | ||
360 | pt = get_scratch_regs(info); | ||
361 | addr = (unsigned long *) ((unsigned long)pt + pt_regs_off(regnum)); | ||
362 | if (info->pri_unat_loc) | ||
363 | nat_addr = info->pri_unat_loc; | ||
364 | else | ||
365 | nat_addr = &info->sw->ar_unat; | ||
366 | nat_mask = (1UL << ((long) addr & 0x1f8)/8); | ||
367 | } | ||
368 | } else { | ||
369 | /* access a stacked register */ | ||
370 | addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 32); | ||
371 | nat_addr = ia64_rse_rnat_addr(addr); | ||
372 | if ((unsigned long) addr < info->regstk.limit | ||
373 | || (unsigned long) addr >= info->regstk.top) | ||
374 | { | ||
375 | UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside " | ||
376 | "of rbs\n", __FUNCTION__); | ||
377 | return -1; | ||
378 | } | ||
379 | if ((unsigned long) nat_addr >= info->regstk.top) | ||
380 | nat_addr = &info->sw->ar_rnat; | ||
381 | nat_mask = (1UL << ia64_rse_slot_num(addr)); | ||
382 | } | ||
383 | |||
384 | if (write) { | ||
385 | if (read_only(addr)) { | ||
386 | UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", | ||
387 | __FUNCTION__); | ||
388 | } else { | ||
389 | *addr = *val; | ||
390 | if (*nat) | ||
391 | *nat_addr |= nat_mask; | ||
392 | else | ||
393 | *nat_addr &= ~nat_mask; | ||
394 | } | ||
395 | } else { | ||
396 | if ((*nat_addr & nat_mask) == 0) { | ||
397 | *val = *addr; | ||
398 | *nat = 0; | ||
399 | } else { | ||
400 | *val = 0; /* if register is a NaT, *addr may contain kernel data! */ | ||
401 | *nat = 1; | ||
402 | } | ||
403 | } | ||
404 | return 0; | ||
405 | } | ||
406 | EXPORT_SYMBOL(unw_access_gr); | ||
407 | |||
408 | int | ||
409 | unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int write) | ||
410 | { | ||
411 | unsigned long *addr; | ||
412 | struct pt_regs *pt; | ||
413 | |||
414 | switch (regnum) { | ||
415 | /* scratch: */ | ||
416 | case 0: pt = get_scratch_regs(info); addr = &pt->b0; break; | ||
417 | case 6: pt = get_scratch_regs(info); addr = &pt->b6; break; | ||
418 | case 7: pt = get_scratch_regs(info); addr = &pt->b7; break; | ||
419 | |||
420 | /* preserved: */ | ||
421 | case 1: case 2: case 3: case 4: case 5: | ||
422 | addr = *(&info->b1_loc + (regnum - 1)); | ||
423 | if (!addr) | ||
424 | addr = &info->sw->b1 + (regnum - 1); | ||
425 | break; | ||
426 | |||
427 | default: | ||
428 | UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n", | ||
429 | __FUNCTION__, regnum); | ||
430 | return -1; | ||
431 | } | ||
432 | if (write) | ||
433 | if (read_only(addr)) { | ||
434 | UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", | ||
435 | __FUNCTION__); | ||
436 | } else | ||
437 | *addr = *val; | ||
438 | else | ||
439 | *val = *addr; | ||
440 | return 0; | ||
441 | } | ||
442 | EXPORT_SYMBOL(unw_access_br); | ||
443 | |||
444 | int | ||
445 | unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val, int write) | ||
446 | { | ||
447 | struct ia64_fpreg *addr = NULL; | ||
448 | struct pt_regs *pt; | ||
449 | |||
450 | if ((unsigned) (regnum - 2) >= 126) { | ||
451 | UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n", | ||
452 | __FUNCTION__, regnum); | ||
453 | return -1; | ||
454 | } | ||
455 | |||
456 | if (regnum <= 5) { | ||
457 | addr = *(&info->f2_loc + (regnum - 2)); | ||
458 | if (!addr) | ||
459 | addr = &info->sw->f2 + (regnum - 2); | ||
460 | } else if (regnum <= 15) { | ||
461 | if (regnum <= 11) { | ||
462 | pt = get_scratch_regs(info); | ||
463 | addr = &pt->f6 + (regnum - 6); | ||
464 | } | ||
465 | else | ||
466 | addr = &info->sw->f12 + (regnum - 12); | ||
467 | } else if (regnum <= 31) { | ||
468 | addr = info->fr_loc[regnum - 16]; | ||
469 | if (!addr) | ||
470 | addr = &info->sw->f16 + (regnum - 16); | ||
471 | } else { | ||
472 | struct task_struct *t = info->task; | ||
473 | |||
474 | if (write) | ||
475 | ia64_sync_fph(t); | ||
476 | else | ||
477 | ia64_flush_fph(t); | ||
478 | addr = t->thread.fph + (regnum - 32); | ||
479 | } | ||
480 | |||
481 | if (write) | ||
482 | if (read_only(addr)) { | ||
483 | UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", | ||
484 | __FUNCTION__); | ||
485 | } else | ||
486 | *addr = *val; | ||
487 | else | ||
488 | *val = *addr; | ||
489 | return 0; | ||
490 | } | ||
491 | EXPORT_SYMBOL(unw_access_fr); | ||
492 | |||
493 | int | ||
494 | unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int write) | ||
495 | { | ||
496 | unsigned long *addr; | ||
497 | struct pt_regs *pt; | ||
498 | |||
499 | switch (regnum) { | ||
500 | case UNW_AR_BSP: | ||
501 | addr = info->bsp_loc; | ||
502 | if (!addr) | ||
503 | addr = &info->sw->ar_bspstore; | ||
504 | break; | ||
505 | |||
506 | case UNW_AR_BSPSTORE: | ||
507 | addr = info->bspstore_loc; | ||
508 | if (!addr) | ||
509 | addr = &info->sw->ar_bspstore; | ||
510 | break; | ||
511 | |||
512 | case UNW_AR_PFS: | ||
513 | addr = info->pfs_loc; | ||
514 | if (!addr) | ||
515 | addr = &info->sw->ar_pfs; | ||
516 | break; | ||
517 | |||
518 | case UNW_AR_RNAT: | ||
519 | addr = info->rnat_loc; | ||
520 | if (!addr) | ||
521 | addr = &info->sw->ar_rnat; | ||
522 | break; | ||
523 | |||
524 | case UNW_AR_UNAT: | ||
525 | addr = info->unat_loc; | ||
526 | if (!addr) | ||
527 | addr = &info->sw->ar_unat; | ||
528 | break; | ||
529 | |||
530 | case UNW_AR_LC: | ||
531 | addr = info->lc_loc; | ||
532 | if (!addr) | ||
533 | addr = &info->sw->ar_lc; | ||
534 | break; | ||
535 | |||
536 | case UNW_AR_EC: | ||
537 | if (!info->cfm_loc) | ||
538 | return -1; | ||
539 | if (write) | ||
540 | *info->cfm_loc = | ||
541 | (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 0x3f) << 52); | ||
542 | else | ||
543 | *val = (*info->cfm_loc >> 52) & 0x3f; | ||
544 | return 0; | ||
545 | |||
546 | case UNW_AR_FPSR: | ||
547 | addr = info->fpsr_loc; | ||
548 | if (!addr) | ||
549 | addr = &info->sw->ar_fpsr; | ||
550 | break; | ||
551 | |||
552 | case UNW_AR_RSC: | ||
553 | pt = get_scratch_regs(info); | ||
554 | addr = &pt->ar_rsc; | ||
555 | break; | ||
556 | |||
557 | case UNW_AR_CCV: | ||
558 | pt = get_scratch_regs(info); | ||
559 | addr = &pt->ar_ccv; | ||
560 | break; | ||
561 | |||
562 | case UNW_AR_CSD: | ||
563 | pt = get_scratch_regs(info); | ||
564 | addr = &pt->ar_csd; | ||
565 | break; | ||
566 | |||
567 | case UNW_AR_SSD: | ||
568 | pt = get_scratch_regs(info); | ||
569 | addr = &pt->ar_ssd; | ||
570 | break; | ||
571 | |||
572 | default: | ||
573 | UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n", | ||
574 | __FUNCTION__, regnum); | ||
575 | return -1; | ||
576 | } | ||
577 | |||
578 | if (write) { | ||
579 | if (read_only(addr)) { | ||
580 | UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", | ||
581 | __FUNCTION__); | ||
582 | } else | ||
583 | *addr = *val; | ||
584 | } else | ||
585 | *val = *addr; | ||
586 | return 0; | ||
587 | } | ||
588 | EXPORT_SYMBOL(unw_access_ar); | ||
589 | |||
590 | int | ||
591 | unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write) | ||
592 | { | ||
593 | unsigned long *addr; | ||
594 | |||
595 | addr = info->pr_loc; | ||
596 | if (!addr) | ||
597 | addr = &info->sw->pr; | ||
598 | |||
599 | if (write) { | ||
600 | if (read_only(addr)) { | ||
601 | UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", | ||
602 | __FUNCTION__); | ||
603 | } else | ||
604 | *addr = *val; | ||
605 | } else | ||
606 | *val = *addr; | ||
607 | return 0; | ||
608 | } | ||
609 | EXPORT_SYMBOL(unw_access_pr); | ||
610 | |||
611 | |||
612 | /* Routines to manipulate the state stack. */ | ||
613 | |||
614 | static inline void | ||
615 | push (struct unw_state_record *sr) | ||
616 | { | ||
617 | struct unw_reg_state *rs; | ||
618 | |||
619 | rs = alloc_reg_state(); | ||
620 | if (!rs) { | ||
621 | printk(KERN_ERR "unwind: cannot stack reg state!\n"); | ||
622 | return; | ||
623 | } | ||
624 | memcpy(rs, &sr->curr, sizeof(*rs)); | ||
625 | sr->curr.next = rs; | ||
626 | } | ||
627 | |||
628 | static void | ||
629 | pop (struct unw_state_record *sr) | ||
630 | { | ||
631 | struct unw_reg_state *rs = sr->curr.next; | ||
632 | |||
633 | if (!rs) { | ||
634 | printk(KERN_ERR "unwind: stack underflow!\n"); | ||
635 | return; | ||
636 | } | ||
637 | memcpy(&sr->curr, rs, sizeof(*rs)); | ||
638 | free_reg_state(rs); | ||
639 | } | ||
640 | |||
641 | /* Make a copy of the state stack. Non-recursive to avoid stack overflows. */ | ||
642 | static struct unw_reg_state * | ||
643 | dup_state_stack (struct unw_reg_state *rs) | ||
644 | { | ||
645 | struct unw_reg_state *copy, *prev = NULL, *first = NULL; | ||
646 | |||
647 | while (rs) { | ||
648 | copy = alloc_reg_state(); | ||
649 | if (!copy) { | ||
650 | printk(KERN_ERR "unwind.dup_state_stack: out of memory\n"); | ||
651 | return NULL; | ||
652 | } | ||
653 | memcpy(copy, rs, sizeof(*copy)); | ||
654 | if (first) | ||
655 | prev->next = copy; | ||
656 | else | ||
657 | first = copy; | ||
658 | rs = rs->next; | ||
659 | prev = copy; | ||
660 | } | ||
661 | return first; | ||
662 | } | ||
663 | |||
664 | /* Free all stacked register states (but not RS itself). */ | ||
665 | static void | ||
666 | free_state_stack (struct unw_reg_state *rs) | ||
667 | { | ||
668 | struct unw_reg_state *p, *next; | ||
669 | |||
670 | for (p = rs->next; p != NULL; p = next) { | ||
671 | next = p->next; | ||
672 | free_reg_state(p); | ||
673 | } | ||
674 | rs->next = NULL; | ||
675 | } | ||
676 | |||
677 | /* Unwind decoder routines */ | ||
678 | |||
679 | static enum unw_register_index __attribute_const__ | ||
680 | decode_abreg (unsigned char abreg, int memory) | ||
681 | { | ||
682 | switch (abreg) { | ||
683 | case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04); | ||
684 | case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22); | ||
685 | case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30); | ||
686 | case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41); | ||
687 | case 0x60: return UNW_REG_PR; | ||
688 | case 0x61: return UNW_REG_PSP; | ||
689 | case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR; | ||
690 | case 0x63: return UNW_REG_RP; | ||
691 | case 0x64: return UNW_REG_BSP; | ||
692 | case 0x65: return UNW_REG_BSPSTORE; | ||
693 | case 0x66: return UNW_REG_RNAT; | ||
694 | case 0x67: return UNW_REG_UNAT; | ||
695 | case 0x68: return UNW_REG_FPSR; | ||
696 | case 0x69: return UNW_REG_PFS; | ||
697 | case 0x6a: return UNW_REG_LC; | ||
698 | default: | ||
699 | break; | ||
700 | } | ||
701 | UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __FUNCTION__, abreg); | ||
702 | return UNW_REG_LC; | ||
703 | } | ||
704 | |||
705 | static void | ||
706 | set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned long val) | ||
707 | { | ||
708 | reg->val = val; | ||
709 | reg->where = where; | ||
710 | if (reg->when == UNW_WHEN_NEVER) | ||
711 | reg->when = when; | ||
712 | } | ||
713 | |||
714 | static void | ||
715 | alloc_spill_area (unsigned long *offp, unsigned long regsize, | ||
716 | struct unw_reg_info *lo, struct unw_reg_info *hi) | ||
717 | { | ||
718 | struct unw_reg_info *reg; | ||
719 | |||
720 | for (reg = hi; reg >= lo; --reg) { | ||
721 | if (reg->where == UNW_WHERE_SPILL_HOME) { | ||
722 | reg->where = UNW_WHERE_PSPREL; | ||
723 | *offp -= regsize; | ||
724 | reg->val = *offp; | ||
725 | } | ||
726 | } | ||
727 | } | ||
728 | |||
729 | static inline void | ||
730 | spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word t) | ||
731 | { | ||
732 | struct unw_reg_info *reg; | ||
733 | |||
734 | for (reg = *regp; reg <= lim; ++reg) { | ||
735 | if (reg->where == UNW_WHERE_SPILL_HOME) { | ||
736 | reg->when = t; | ||
737 | *regp = reg + 1; | ||
738 | return; | ||
739 | } | ||
740 | } | ||
741 | UNW_DPRINT(0, "unwind.%s: excess spill!\n", __FUNCTION__); | ||
742 | } | ||
743 | |||
744 | static inline void | ||
745 | finish_prologue (struct unw_state_record *sr) | ||
746 | { | ||
747 | struct unw_reg_info *reg; | ||
748 | unsigned long off; | ||
749 | int i; | ||
750 | |||
751 | /* | ||
752 | * First, resolve implicit register save locations (see Section "11.4.2.3 Rules | ||
753 | * for Using Unwind Descriptors", rule 3): | ||
754 | */ | ||
755 | for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) { | ||
756 | reg = sr->curr.reg + unw.save_order[i]; | ||
757 | if (reg->where == UNW_WHERE_GR_SAVE) { | ||
758 | reg->where = UNW_WHERE_GR; | ||
759 | reg->val = sr->gr_save_loc++; | ||
760 | } | ||
761 | } | ||
762 | |||
763 | /* | ||
764 | * Next, compute when the fp, general, and branch registers get | ||
765 | * saved. This must come before alloc_spill_area() because | ||
766 | * we need to know which registers are spilled to their home | ||
767 | * locations. | ||
768 | */ | ||
769 | if (sr->imask) { | ||
770 | unsigned char kind, mask = 0, *cp = sr->imask; | ||
771 | int t; | ||
772 | static const unsigned char limit[3] = { | ||
773 | UNW_REG_F31, UNW_REG_R7, UNW_REG_B5 | ||
774 | }; | ||
775 | struct unw_reg_info *(regs[3]); | ||
776 | |||
777 | regs[0] = sr->curr.reg + UNW_REG_F2; | ||
778 | regs[1] = sr->curr.reg + UNW_REG_R4; | ||
779 | regs[2] = sr->curr.reg + UNW_REG_B1; | ||
780 | |||
781 | for (t = 0; t < sr->region_len; ++t) { | ||
782 | if ((t & 3) == 0) | ||
783 | mask = *cp++; | ||
784 | kind = (mask >> 2*(3-(t & 3))) & 3; | ||
785 | if (kind > 0) | ||
786 | spill_next_when(®s[kind - 1], sr->curr.reg + limit[kind - 1], | ||
787 | sr->region_start + t); | ||
788 | } | ||
789 | } | ||
790 | /* | ||
791 | * Next, lay out the memory stack spill area: | ||
792 | */ | ||
793 | if (sr->any_spills) { | ||
794 | off = sr->spill_offset; | ||
795 | alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, sr->curr.reg + UNW_REG_F31); | ||
796 | alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1, sr->curr.reg + UNW_REG_B5); | ||
797 | alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4, sr->curr.reg + UNW_REG_R7); | ||
798 | } | ||
799 | } | ||
800 | |||
801 | /* | ||
802 | * Region header descriptors. | ||
803 | */ | ||
804 | |||
805 | static void | ||
806 | desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char grsave, | ||
807 | struct unw_state_record *sr) | ||
808 | { | ||
809 | int i, region_start; | ||
810 | |||
811 | if (!(sr->in_body || sr->first_region)) | ||
812 | finish_prologue(sr); | ||
813 | sr->first_region = 0; | ||
814 | |||
815 | /* check if we're done: */ | ||
816 | if (sr->when_target < sr->region_start + sr->region_len) { | ||
817 | sr->done = 1; | ||
818 | return; | ||
819 | } | ||
820 | |||
821 | region_start = sr->region_start + sr->region_len; | ||
822 | |||
823 | for (i = 0; i < sr->epilogue_count; ++i) | ||
824 | pop(sr); | ||
825 | sr->epilogue_count = 0; | ||
826 | sr->epilogue_start = UNW_WHEN_NEVER; | ||
827 | |||
828 | sr->region_start = region_start; | ||
829 | sr->region_len = rlen; | ||
830 | sr->in_body = body; | ||
831 | |||
832 | if (!body) { | ||
833 | push(sr); | ||
834 | |||
835 | for (i = 0; i < 4; ++i) { | ||
836 | if (mask & 0x8) | ||
837 | set_reg(sr->curr.reg + unw.save_order[i], UNW_WHERE_GR, | ||
838 | sr->region_start + sr->region_len - 1, grsave++); | ||
839 | mask <<= 1; | ||
840 | } | ||
841 | sr->gr_save_loc = grsave; | ||
842 | sr->any_spills = 0; | ||
843 | sr->imask = NULL; | ||
844 | sr->spill_offset = 0x10; /* default to psp+16 */ | ||
845 | } | ||
846 | } | ||
847 | |||
848 | /* | ||
849 | * Prologue descriptors. | ||
850 | */ | ||
851 | |||
852 | static inline void | ||
853 | desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr) | ||
854 | { | ||
855 | if (abi == 3 && context == 'i') { | ||
856 | sr->flags |= UNW_FLAG_INTERRUPT_FRAME; | ||
857 | UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __FUNCTION__); | ||
858 | } | ||
859 | else | ||
860 | UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n", | ||
861 | __FUNCTION__, abi, context); | ||
862 | } | ||
863 | |||
864 | static inline void | ||
865 | desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record *sr) | ||
866 | { | ||
867 | int i; | ||
868 | |||
869 | for (i = 0; i < 5; ++i) { | ||
870 | if (brmask & 1) | ||
871 | set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR, | ||
872 | sr->region_start + sr->region_len - 1, gr++); | ||
873 | brmask >>= 1; | ||
874 | } | ||
875 | } | ||
876 | |||
877 | static inline void | ||
878 | desc_br_mem (unsigned char brmask, struct unw_state_record *sr) | ||
879 | { | ||
880 | int i; | ||
881 | |||
882 | for (i = 0; i < 5; ++i) { | ||
883 | if (brmask & 1) { | ||
884 | set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME, | ||
885 | sr->region_start + sr->region_len - 1, 0); | ||
886 | sr->any_spills = 1; | ||
887 | } | ||
888 | brmask >>= 1; | ||
889 | } | ||
890 | } | ||
891 | |||
892 | static inline void | ||
893 | desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record *sr) | ||
894 | { | ||
895 | int i; | ||
896 | |||
897 | for (i = 0; i < 4; ++i) { | ||
898 | if ((grmask & 1) != 0) { | ||
899 | set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, | ||
900 | sr->region_start + sr->region_len - 1, 0); | ||
901 | sr->any_spills = 1; | ||
902 | } | ||
903 | grmask >>= 1; | ||
904 | } | ||
905 | for (i = 0; i < 20; ++i) { | ||
906 | if ((frmask & 1) != 0) { | ||
907 | int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4; | ||
908 | set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME, | ||
909 | sr->region_start + sr->region_len - 1, 0); | ||
910 | sr->any_spills = 1; | ||
911 | } | ||
912 | frmask >>= 1; | ||
913 | } | ||
914 | } | ||
915 | |||
916 | static inline void | ||
917 | desc_fr_mem (unsigned char frmask, struct unw_state_record *sr) | ||
918 | { | ||
919 | int i; | ||
920 | |||
921 | for (i = 0; i < 4; ++i) { | ||
922 | if ((frmask & 1) != 0) { | ||
923 | set_reg(sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME, | ||
924 | sr->region_start + sr->region_len - 1, 0); | ||
925 | sr->any_spills = 1; | ||
926 | } | ||
927 | frmask >>= 1; | ||
928 | } | ||
929 | } | ||
930 | |||
931 | static inline void | ||
932 | desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record *sr) | ||
933 | { | ||
934 | int i; | ||
935 | |||
936 | for (i = 0; i < 4; ++i) { | ||
937 | if ((grmask & 1) != 0) | ||
938 | set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR, | ||
939 | sr->region_start + sr->region_len - 1, gr++); | ||
940 | grmask >>= 1; | ||
941 | } | ||
942 | } | ||
943 | |||
944 | static inline void | ||
945 | desc_gr_mem (unsigned char grmask, struct unw_state_record *sr) | ||
946 | { | ||
947 | int i; | ||
948 | |||
949 | for (i = 0; i < 4; ++i) { | ||
950 | if ((grmask & 1) != 0) { | ||
951 | set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, | ||
952 | sr->region_start + sr->region_len - 1, 0); | ||
953 | sr->any_spills = 1; | ||
954 | } | ||
955 | grmask >>= 1; | ||
956 | } | ||
957 | } | ||
958 | |||
959 | static inline void | ||
960 | desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr) | ||
961 | { | ||
962 | set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE, | ||
963 | sr->region_start + min_t(int, t, sr->region_len - 1), 16*size); | ||
964 | } | ||
965 | |||
966 | static inline void | ||
967 | desc_mem_stack_v (unw_word t, struct unw_state_record *sr) | ||
968 | { | ||
969 | sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, sr->region_len - 1); | ||
970 | } | ||
971 | |||
972 | static inline void | ||
973 | desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr) | ||
974 | { | ||
975 | set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + sr->region_len - 1, dst); | ||
976 | } | ||
977 | |||
978 | static inline void | ||
979 | desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record *sr) | ||
980 | { | ||
981 | set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + sr->region_len - 1, | ||
982 | 0x10 - 4*pspoff); | ||
983 | } | ||
984 | |||
985 | static inline void | ||
986 | desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr) | ||
987 | { | ||
988 | set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + sr->region_len - 1, | ||
989 | 4*spoff); | ||
990 | } | ||
991 | |||
992 | static inline void | ||
993 | desc_rp_br (unsigned char dst, struct unw_state_record *sr) | ||
994 | { | ||
995 | sr->return_link_reg = dst; | ||
996 | } | ||
997 | |||
998 | static inline void | ||
999 | desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr) | ||
1000 | { | ||
1001 | struct unw_reg_info *reg = sr->curr.reg + regnum; | ||
1002 | |||
1003 | if (reg->where == UNW_WHERE_NONE) | ||
1004 | reg->where = UNW_WHERE_GR_SAVE; | ||
1005 | reg->when = sr->region_start + min_t(int, t, sr->region_len - 1); | ||
1006 | } | ||
1007 | |||
1008 | static inline void | ||
1009 | desc_spill_base (unw_word pspoff, struct unw_state_record *sr) | ||
1010 | { | ||
1011 | sr->spill_offset = 0x10 - 4*pspoff; | ||
1012 | } | ||
1013 | |||
1014 | static inline unsigned char * | ||
1015 | desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr) | ||
1016 | { | ||
1017 | sr->imask = imaskp; | ||
1018 | return imaskp + (2*sr->region_len + 7)/8; | ||
1019 | } | ||
1020 | |||
1021 | /* | ||
1022 | * Body descriptors. | ||
1023 | */ | ||
1024 | static inline void | ||
1025 | desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr) | ||
1026 | { | ||
1027 | sr->epilogue_start = sr->region_start + sr->region_len - 1 - t; | ||
1028 | sr->epilogue_count = ecount + 1; | ||
1029 | } | ||
1030 | |||
1031 | static inline void | ||
1032 | desc_copy_state (unw_word label, struct unw_state_record *sr) | ||
1033 | { | ||
1034 | struct unw_labeled_state *ls; | ||
1035 | |||
1036 | for (ls = sr->labeled_states; ls; ls = ls->next) { | ||
1037 | if (ls->label == label) { | ||
1038 | free_state_stack(&sr->curr); | ||
1039 | memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr)); | ||
1040 | sr->curr.next = dup_state_stack(ls->saved_state.next); | ||
1041 | return; | ||
1042 | } | ||
1043 | } | ||
1044 | printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label); | ||
1045 | } | ||
1046 | |||
1047 | static inline void | ||
1048 | desc_label_state (unw_word label, struct unw_state_record *sr) | ||
1049 | { | ||
1050 | struct unw_labeled_state *ls; | ||
1051 | |||
1052 | ls = alloc_labeled_state(); | ||
1053 | if (!ls) { | ||
1054 | printk(KERN_ERR "unwind.desc_label_state(): out of memory\n"); | ||
1055 | return; | ||
1056 | } | ||
1057 | ls->label = label; | ||
1058 | memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state)); | ||
1059 | ls->saved_state.next = dup_state_stack(sr->curr.next); | ||
1060 | |||
1061 | /* insert into list of labeled states: */ | ||
1062 | ls->next = sr->labeled_states; | ||
1063 | sr->labeled_states = ls; | ||
1064 | } | ||
1065 | |||
1066 | /* | ||
1067 | * General descriptors. | ||
1068 | */ | ||
1069 | |||
1070 | static inline int | ||
1071 | desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr) | ||
1072 | { | ||
1073 | if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len - 1)) | ||
1074 | return 0; | ||
1075 | if (qp > 0) { | ||
1076 | if ((sr->pr_val & (1UL << qp)) == 0) | ||
1077 | return 0; | ||
1078 | sr->pr_mask |= (1UL << qp); | ||
1079 | } | ||
1080 | return 1; | ||
1081 | } | ||
1082 | |||
1083 | static inline void | ||
1084 | desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct unw_state_record *sr) | ||
1085 | { | ||
1086 | struct unw_reg_info *r; | ||
1087 | |||
1088 | if (!desc_is_active(qp, t, sr)) | ||
1089 | return; | ||
1090 | |||
1091 | r = sr->curr.reg + decode_abreg(abreg, 0); | ||
1092 | r->where = UNW_WHERE_NONE; | ||
1093 | r->when = UNW_WHEN_NEVER; | ||
1094 | r->val = 0; | ||
1095 | } | ||
1096 | |||
1097 | static inline void | ||
1098 | desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned char x, | ||
1099 | unsigned char ytreg, struct unw_state_record *sr) | ||
1100 | { | ||
1101 | enum unw_where where = UNW_WHERE_GR; | ||
1102 | struct unw_reg_info *r; | ||
1103 | |||
1104 | if (!desc_is_active(qp, t, sr)) | ||
1105 | return; | ||
1106 | |||
1107 | if (x) | ||
1108 | where = UNW_WHERE_BR; | ||
1109 | else if (ytreg & 0x80) | ||
1110 | where = UNW_WHERE_FR; | ||
1111 | |||
1112 | r = sr->curr.reg + decode_abreg(abreg, 0); | ||
1113 | r->where = where; | ||
1114 | r->when = sr->region_start + min_t(int, t, sr->region_len - 1); | ||
1115 | r->val = (ytreg & 0x7f); | ||
1116 | } | ||
1117 | |||
1118 | static inline void | ||
1119 | desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word pspoff, | ||
1120 | struct unw_state_record *sr) | ||
1121 | { | ||
1122 | struct unw_reg_info *r; | ||
1123 | |||
1124 | if (!desc_is_active(qp, t, sr)) | ||
1125 | return; | ||
1126 | |||
1127 | r = sr->curr.reg + decode_abreg(abreg, 1); | ||
1128 | r->where = UNW_WHERE_PSPREL; | ||
1129 | r->when = sr->region_start + min_t(int, t, sr->region_len - 1); | ||
1130 | r->val = 0x10 - 4*pspoff; | ||
1131 | } | ||
1132 | |||
1133 | static inline void | ||
1134 | desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word spoff, | ||
1135 | struct unw_state_record *sr) | ||
1136 | { | ||
1137 | struct unw_reg_info *r; | ||
1138 | |||
1139 | if (!desc_is_active(qp, t, sr)) | ||
1140 | return; | ||
1141 | |||
1142 | r = sr->curr.reg + decode_abreg(abreg, 1); | ||
1143 | r->where = UNW_WHERE_SPREL; | ||
1144 | r->when = sr->region_start + min_t(int, t, sr->region_len - 1); | ||
1145 | r->val = 4*spoff; | ||
1146 | } | ||
1147 | |||
1148 | #define UNW_DEC_BAD_CODE(code) printk(KERN_ERR "unwind: unknown code 0x%02x\n", \ | ||
1149 | code); | ||
1150 | |||
1151 | /* | ||
1152 | * region headers: | ||
1153 | */ | ||
1154 | #define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg) | ||
1155 | #define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg) | ||
1156 | /* | ||
1157 | * prologue descriptors: | ||
1158 | */ | ||
1159 | #define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg) | ||
1160 | #define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg) | ||
1161 | #define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg) | ||
1162 | #define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg) | ||
1163 | #define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg) | ||
1164 | #define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg) | ||
1165 | #define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg) | ||
1166 | #define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg) | ||
1167 | #define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg) | ||
1168 | #define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg) | ||
1169 | #define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg) | ||
1170 | #define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg) | ||
1171 | #define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg) | ||
1172 | #define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg) | ||
1173 | #define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg) | ||
1174 | #define UNW_DEC_PRIUNAT_GR(fmt,r,arg) desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg) | ||
1175 | #define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg) desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg) | ||
1176 | #define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg) desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg) | ||
1177 | #define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg) | ||
1178 | #define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg) | ||
1179 | #define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg)) | ||
1180 | /* | ||
1181 | * body descriptors: | ||
1182 | */ | ||
1183 | #define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg) | ||
1184 | #define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg) | ||
1185 | #define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg) | ||
1186 | /* | ||
1187 | * general unwind descriptors: | ||
1188 | */ | ||
1189 | #define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg) | ||
1190 | #define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg) | ||
1191 | #define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg) | ||
1192 | #define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg) | ||
1193 | #define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg) | ||
1194 | #define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg) | ||
1195 | #define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg) | ||
1196 | #define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg) | ||
1197 | |||
1198 | #include "unwind_decoder.c" | ||
1199 | |||
1200 | |||
1201 | /* Unwind scripts. */ | ||
1202 | |||
1203 | static inline unw_hash_index_t | ||
1204 | hash (unsigned long ip) | ||
1205 | { | ||
1206 | # define hashmagic 0x9e3779b97f4a7c16UL /* based on (sqrt(5)/2-1)*2^64 */ | ||
1207 | |||
1208 | return (ip >> 4)*hashmagic >> (64 - UNW_LOG_HASH_SIZE); | ||
1209 | #undef hashmagic | ||
1210 | } | ||
1211 | |||
1212 | static inline long | ||
1213 | cache_match (struct unw_script *script, unsigned long ip, unsigned long pr) | ||
1214 | { | ||
1215 | read_lock(&script->lock); | ||
1216 | if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0) | ||
1217 | /* keep the read lock... */ | ||
1218 | return 1; | ||
1219 | read_unlock(&script->lock); | ||
1220 | return 0; | ||
1221 | } | ||
1222 | |||
1223 | static inline struct unw_script * | ||
1224 | script_lookup (struct unw_frame_info *info) | ||
1225 | { | ||
1226 | struct unw_script *script = unw.cache + info->hint; | ||
1227 | unsigned short index; | ||
1228 | unsigned long ip, pr; | ||
1229 | |||
1230 | if (UNW_DEBUG_ON(0)) | ||
1231 | return NULL; /* Always regenerate scripts in debug mode */ | ||
1232 | |||
1233 | STAT(++unw.stat.cache.lookups); | ||
1234 | |||
1235 | ip = info->ip; | ||
1236 | pr = info->pr; | ||
1237 | |||
1238 | if (cache_match(script, ip, pr)) { | ||
1239 | STAT(++unw.stat.cache.hinted_hits); | ||
1240 | return script; | ||
1241 | } | ||
1242 | |||
1243 | index = unw.hash[hash(ip)]; | ||
1244 | if (index >= UNW_CACHE_SIZE) | ||
1245 | return NULL; | ||
1246 | |||
1247 | script = unw.cache + index; | ||
1248 | while (1) { | ||
1249 | if (cache_match(script, ip, pr)) { | ||
1250 | /* update hint; no locking required as single-word writes are atomic */ | ||
1251 | STAT(++unw.stat.cache.normal_hits); | ||
1252 | unw.cache[info->prev_script].hint = script - unw.cache; | ||
1253 | return script; | ||
1254 | } | ||
1255 | if (script->coll_chain >= UNW_HASH_SIZE) | ||
1256 | return NULL; | ||
1257 | script = unw.cache + script->coll_chain; | ||
1258 | STAT(++unw.stat.cache.collision_chain_traversals); | ||
1259 | } | ||
1260 | } | ||
1261 | |||
1262 | /* | ||
1263 | * On returning, a write lock for the SCRIPT is still being held. | ||
1264 | */ | ||
1265 | static inline struct unw_script * | ||
1266 | script_new (unsigned long ip) | ||
1267 | { | ||
1268 | struct unw_script *script, *prev, *tmp; | ||
1269 | unw_hash_index_t index; | ||
1270 | unsigned short head; | ||
1271 | |||
1272 | STAT(++unw.stat.script.news); | ||
1273 | |||
1274 | /* | ||
1275 | * Can't (easily) use cmpxchg() here because of ABA problem | ||
1276 | * that is intrinsic in cmpxchg()... | ||
1277 | */ | ||
1278 | head = unw.lru_head; | ||
1279 | script = unw.cache + head; | ||
1280 | unw.lru_head = script->lru_chain; | ||
1281 | |||
1282 | /* | ||
1283 | * We'd deadlock here if we interrupted a thread that is holding a read lock on | ||
1284 | * script->lock. Thus, if the write_trylock() fails, we simply bail out. The | ||
1285 | * alternative would be to disable interrupts whenever we hold a read-lock, but | ||
1286 | * that seems silly. | ||
1287 | */ | ||
1288 | if (!write_trylock(&script->lock)) | ||
1289 | return NULL; | ||
1290 | |||
1291 | /* re-insert script at the tail of the LRU chain: */ | ||
1292 | unw.cache[unw.lru_tail].lru_chain = head; | ||
1293 | unw.lru_tail = head; | ||
1294 | |||
1295 | /* remove the old script from the hash table (if it's there): */ | ||
1296 | if (script->ip) { | ||
1297 | index = hash(script->ip); | ||
1298 | tmp = unw.cache + unw.hash[index]; | ||
1299 | prev = NULL; | ||
1300 | while (1) { | ||
1301 | if (tmp == script) { | ||
1302 | if (prev) | ||
1303 | prev->coll_chain = tmp->coll_chain; | ||
1304 | else | ||
1305 | unw.hash[index] = tmp->coll_chain; | ||
1306 | break; | ||
1307 | } else | ||
1308 | prev = tmp; | ||
1309 | if (tmp->coll_chain >= UNW_CACHE_SIZE) | ||
1310 | /* old script wasn't in the hash-table */ | ||
1311 | break; | ||
1312 | tmp = unw.cache + tmp->coll_chain; | ||
1313 | } | ||
1314 | } | ||
1315 | |||
1316 | /* enter new script in the hash table */ | ||
1317 | index = hash(ip); | ||
1318 | script->coll_chain = unw.hash[index]; | ||
1319 | unw.hash[index] = script - unw.cache; | ||
1320 | |||
1321 | script->ip = ip; /* set new IP while we're holding the locks */ | ||
1322 | |||
1323 | STAT(if (script->coll_chain < UNW_CACHE_SIZE) ++unw.stat.script.collisions); | ||
1324 | |||
1325 | script->flags = 0; | ||
1326 | script->hint = 0; | ||
1327 | script->count = 0; | ||
1328 | return script; | ||
1329 | } | ||
1330 | |||
1331 | static void | ||
1332 | script_finalize (struct unw_script *script, struct unw_state_record *sr) | ||
1333 | { | ||
1334 | script->pr_mask = sr->pr_mask; | ||
1335 | script->pr_val = sr->pr_val; | ||
1336 | /* | ||
1337 | * We could down-grade our write-lock on script->lock here but | ||
1338 | * the rwlock API doesn't offer atomic lock downgrading, so | ||
1339 | * we'll just keep the write-lock and release it later when | ||
1340 | * we're done using the script. | ||
1341 | */ | ||
1342 | } | ||
1343 | |||
1344 | static inline void | ||
1345 | script_emit (struct unw_script *script, struct unw_insn insn) | ||
1346 | { | ||
1347 | if (script->count >= UNW_MAX_SCRIPT_LEN) { | ||
1348 | UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n", | ||
1349 | __FUNCTION__, UNW_MAX_SCRIPT_LEN); | ||
1350 | return; | ||
1351 | } | ||
1352 | script->insn[script->count++] = insn; | ||
1353 | } | ||
1354 | |||
1355 | static inline void | ||
1356 | emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script) | ||
1357 | { | ||
1358 | struct unw_reg_info *r = sr->curr.reg + i; | ||
1359 | enum unw_insn_opcode opc; | ||
1360 | struct unw_insn insn; | ||
1361 | unsigned long val = 0; | ||
1362 | |||
1363 | switch (r->where) { | ||
1364 | case UNW_WHERE_GR: | ||
1365 | if (r->val >= 32) { | ||
1366 | /* register got spilled to a stacked register */ | ||
1367 | opc = UNW_INSN_SETNAT_TYPE; | ||
1368 | val = UNW_NAT_REGSTK; | ||
1369 | } else | ||
1370 | /* register got spilled to a scratch register */ | ||
1371 | opc = UNW_INSN_SETNAT_MEMSTK; | ||
1372 | break; | ||
1373 | |||
1374 | case UNW_WHERE_FR: | ||
1375 | opc = UNW_INSN_SETNAT_TYPE; | ||
1376 | val = UNW_NAT_VAL; | ||
1377 | break; | ||
1378 | |||
1379 | case UNW_WHERE_BR: | ||
1380 | opc = UNW_INSN_SETNAT_TYPE; | ||
1381 | val = UNW_NAT_NONE; | ||
1382 | break; | ||
1383 | |||
1384 | case UNW_WHERE_PSPREL: | ||
1385 | case UNW_WHERE_SPREL: | ||
1386 | opc = UNW_INSN_SETNAT_MEMSTK; | ||
1387 | break; | ||
1388 | |||
1389 | default: | ||
1390 | UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n", | ||
1391 | __FUNCTION__, r->where); | ||
1392 | return; | ||
1393 | } | ||
1394 | insn.opc = opc; | ||
1395 | insn.dst = unw.preg_index[i]; | ||
1396 | insn.val = val; | ||
1397 | script_emit(script, insn); | ||
1398 | } | ||
1399 | |||
1400 | static void | ||
1401 | compile_reg (struct unw_state_record *sr, int i, struct unw_script *script) | ||
1402 | { | ||
1403 | struct unw_reg_info *r = sr->curr.reg + i; | ||
1404 | enum unw_insn_opcode opc; | ||
1405 | unsigned long val, rval; | ||
1406 | struct unw_insn insn; | ||
1407 | long need_nat_info; | ||
1408 | |||
1409 | if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target) | ||
1410 | return; | ||
1411 | |||
1412 | opc = UNW_INSN_MOVE; | ||
1413 | val = rval = r->val; | ||
1414 | need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7); | ||
1415 | |||
1416 | switch (r->where) { | ||
1417 | case UNW_WHERE_GR: | ||
1418 | if (rval >= 32) { | ||
1419 | opc = UNW_INSN_MOVE_STACKED; | ||
1420 | val = rval - 32; | ||
1421 | } else if (rval >= 4 && rval <= 7) { | ||
1422 | if (need_nat_info) { | ||
1423 | opc = UNW_INSN_MOVE2; | ||
1424 | need_nat_info = 0; | ||
1425 | } | ||
1426 | val = unw.preg_index[UNW_REG_R4 + (rval - 4)]; | ||
1427 | } else if (rval == 0) { | ||
1428 | opc = UNW_INSN_MOVE_CONST; | ||
1429 | val = 0; | ||
1430 | } else { | ||
1431 | /* register got spilled to a scratch register */ | ||
1432 | opc = UNW_INSN_MOVE_SCRATCH; | ||
1433 | val = pt_regs_off(rval); | ||
1434 | } | ||
1435 | break; | ||
1436 | |||
1437 | case UNW_WHERE_FR: | ||
1438 | if (rval <= 5) | ||
1439 | val = unw.preg_index[UNW_REG_F2 + (rval - 2)]; | ||
1440 | else if (rval >= 16 && rval <= 31) | ||
1441 | val = unw.preg_index[UNW_REG_F16 + (rval - 16)]; | ||
1442 | else { | ||
1443 | opc = UNW_INSN_MOVE_SCRATCH; | ||
1444 | if (rval <= 11) | ||
1445 | val = offsetof(struct pt_regs, f6) + 16*(rval - 6); | ||
1446 | else | ||
1447 | UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n", | ||
1448 | __FUNCTION__, rval); | ||
1449 | } | ||
1450 | break; | ||
1451 | |||
1452 | case UNW_WHERE_BR: | ||
1453 | if (rval >= 1 && rval <= 5) | ||
1454 | val = unw.preg_index[UNW_REG_B1 + (rval - 1)]; | ||
1455 | else { | ||
1456 | opc = UNW_INSN_MOVE_SCRATCH; | ||
1457 | if (rval == 0) | ||
1458 | val = offsetof(struct pt_regs, b0); | ||
1459 | else if (rval == 6) | ||
1460 | val = offsetof(struct pt_regs, b6); | ||
1461 | else | ||
1462 | val = offsetof(struct pt_regs, b7); | ||
1463 | } | ||
1464 | break; | ||
1465 | |||
1466 | case UNW_WHERE_SPREL: | ||
1467 | opc = UNW_INSN_ADD_SP; | ||
1468 | break; | ||
1469 | |||
1470 | case UNW_WHERE_PSPREL: | ||
1471 | opc = UNW_INSN_ADD_PSP; | ||
1472 | break; | ||
1473 | |||
1474 | default: | ||
1475 | UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n", | ||
1476 | __FUNCTION__, i, r->where); | ||
1477 | break; | ||
1478 | } | ||
1479 | insn.opc = opc; | ||
1480 | insn.dst = unw.preg_index[i]; | ||
1481 | insn.val = val; | ||
1482 | script_emit(script, insn); | ||
1483 | if (need_nat_info) | ||
1484 | emit_nat_info(sr, i, script); | ||
1485 | |||
1486 | if (i == UNW_REG_PSP) { | ||
1487 | /* | ||
1488 | * info->psp must contain the _value_ of the previous | ||
1489 | * sp, not it's save location. We get this by | ||
1490 | * dereferencing the value we just stored in | ||
1491 | * info->psp: | ||
1492 | */ | ||
1493 | insn.opc = UNW_INSN_LOAD; | ||
1494 | insn.dst = insn.val = unw.preg_index[UNW_REG_PSP]; | ||
1495 | script_emit(script, insn); | ||
1496 | } | ||
1497 | } | ||
1498 | |||
1499 | static inline const struct unw_table_entry * | ||
1500 | lookup (struct unw_table *table, unsigned long rel_ip) | ||
1501 | { | ||
1502 | const struct unw_table_entry *e = NULL; | ||
1503 | unsigned long lo, hi, mid; | ||
1504 | |||
1505 | /* do a binary search for right entry: */ | ||
1506 | for (lo = 0, hi = table->length; lo < hi; ) { | ||
1507 | mid = (lo + hi) / 2; | ||
1508 | e = &table->array[mid]; | ||
1509 | if (rel_ip < e->start_offset) | ||
1510 | hi = mid; | ||
1511 | else if (rel_ip >= e->end_offset) | ||
1512 | lo = mid + 1; | ||
1513 | else | ||
1514 | break; | ||
1515 | } | ||
1516 | if (rel_ip < e->start_offset || rel_ip >= e->end_offset) | ||
1517 | return NULL; | ||
1518 | return e; | ||
1519 | } | ||
1520 | |||
1521 | /* | ||
1522 | * Build an unwind script that unwinds from state OLD_STATE to the | ||
1523 | * entrypoint of the function that called OLD_STATE. | ||
1524 | */ | ||
1525 | static inline struct unw_script * | ||
1526 | build_script (struct unw_frame_info *info) | ||
1527 | { | ||
1528 | const struct unw_table_entry *e = NULL; | ||
1529 | struct unw_script *script = NULL; | ||
1530 | struct unw_labeled_state *ls, *next; | ||
1531 | unsigned long ip = info->ip; | ||
1532 | struct unw_state_record sr; | ||
1533 | struct unw_table *table; | ||
1534 | struct unw_reg_info *r; | ||
1535 | struct unw_insn insn; | ||
1536 | u8 *dp, *desc_end; | ||
1537 | u64 hdr; | ||
1538 | int i; | ||
1539 | STAT(unsigned long start, parse_start;) | ||
1540 | |||
1541 | STAT(++unw.stat.script.builds; start = ia64_get_itc()); | ||
1542 | |||
1543 | /* build state record */ | ||
1544 | memset(&sr, 0, sizeof(sr)); | ||
1545 | for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) | ||
1546 | r->when = UNW_WHEN_NEVER; | ||
1547 | sr.pr_val = info->pr; | ||
1548 | |||
1549 | UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __FUNCTION__, ip); | ||
1550 | script = script_new(ip); | ||
1551 | if (!script) { | ||
1552 | UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __FUNCTION__); | ||
1553 | STAT(unw.stat.script.build_time += ia64_get_itc() - start); | ||
1554 | return NULL; | ||
1555 | } | ||
1556 | unw.cache[info->prev_script].hint = script - unw.cache; | ||
1557 | |||
1558 | /* search the kernels and the modules' unwind tables for IP: */ | ||
1559 | |||
1560 | STAT(parse_start = ia64_get_itc()); | ||
1561 | |||
1562 | for (table = unw.tables; table; table = table->next) { | ||
1563 | if (ip >= table->start && ip < table->end) { | ||
1564 | e = lookup(table, ip - table->segment_base); | ||
1565 | break; | ||
1566 | } | ||
1567 | } | ||
1568 | if (!e) { | ||
1569 | /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */ | ||
1570 | UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n", | ||
1571 | __FUNCTION__, ip, unw.cache[info->prev_script].ip); | ||
1572 | sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; | ||
1573 | sr.curr.reg[UNW_REG_RP].when = -1; | ||
1574 | sr.curr.reg[UNW_REG_RP].val = 0; | ||
1575 | compile_reg(&sr, UNW_REG_RP, script); | ||
1576 | script_finalize(script, &sr); | ||
1577 | STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); | ||
1578 | STAT(unw.stat.script.build_time += ia64_get_itc() - start); | ||
1579 | return script; | ||
1580 | } | ||
1581 | |||
1582 | sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + e->start_offset))/16 | ||
1583 | + (ip & 0xfUL)); | ||
1584 | hdr = *(u64 *) (table->segment_base + e->info_offset); | ||
1585 | dp = (u8 *) (table->segment_base + e->info_offset + 8); | ||
1586 | desc_end = dp + 8*UNW_LENGTH(hdr); | ||
1587 | |||
1588 | while (!sr.done && dp < desc_end) | ||
1589 | dp = unw_decode(dp, sr.in_body, &sr); | ||
1590 | |||
1591 | if (sr.when_target > sr.epilogue_start) { | ||
1592 | /* | ||
1593 | * sp has been restored and all values on the memory stack below | ||
1594 | * psp also have been restored. | ||
1595 | */ | ||
1596 | sr.curr.reg[UNW_REG_PSP].val = 0; | ||
1597 | sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE; | ||
1598 | sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER; | ||
1599 | for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) | ||
1600 | if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10) | ||
1601 | || r->where == UNW_WHERE_SPREL) | ||
1602 | { | ||
1603 | r->val = 0; | ||
1604 | r->where = UNW_WHERE_NONE; | ||
1605 | r->when = UNW_WHEN_NEVER; | ||
1606 | } | ||
1607 | } | ||
1608 | |||
1609 | script->flags = sr.flags; | ||
1610 | |||
1611 | /* | ||
1612 | * If RP did't get saved, generate entry for the return link | ||
1613 | * register. | ||
1614 | */ | ||
1615 | if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) { | ||
1616 | sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; | ||
1617 | sr.curr.reg[UNW_REG_RP].when = -1; | ||
1618 | sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg; | ||
1619 | UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n", | ||
1620 | __FUNCTION__, ip, sr.curr.reg[UNW_REG_RP].where, | ||
1621 | sr.curr.reg[UNW_REG_RP].val); | ||
1622 | } | ||
1623 | |||
1624 | #ifdef UNW_DEBUG | ||
1625 | UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n", | ||
1626 | __FUNCTION__, table->segment_base + e->start_offset, sr.when_target); | ||
1627 | for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) { | ||
1628 | if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) { | ||
1629 | UNW_DPRINT(1, " %s <- ", unw.preg_name[r - sr.curr.reg]); | ||
1630 | switch (r->where) { | ||
1631 | case UNW_WHERE_GR: UNW_DPRINT(1, "r%lu", r->val); break; | ||
1632 | case UNW_WHERE_FR: UNW_DPRINT(1, "f%lu", r->val); break; | ||
1633 | case UNW_WHERE_BR: UNW_DPRINT(1, "b%lu", r->val); break; | ||
1634 | case UNW_WHERE_SPREL: UNW_DPRINT(1, "[sp+0x%lx]", r->val); break; | ||
1635 | case UNW_WHERE_PSPREL: UNW_DPRINT(1, "[psp+0x%lx]", r->val); break; | ||
1636 | case UNW_WHERE_NONE: | ||
1637 | UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - sr.curr.reg], r->val); | ||
1638 | break; | ||
1639 | |||
1640 | default: | ||
1641 | UNW_DPRINT(1, "BADWHERE(%d)", r->where); | ||
1642 | break; | ||
1643 | } | ||
1644 | UNW_DPRINT(1, "\t\t%d\n", r->when); | ||
1645 | } | ||
1646 | } | ||
1647 | #endif | ||
1648 | |||
1649 | STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); | ||
1650 | |||
1651 | /* translate state record into unwinder instructions: */ | ||
1652 | |||
1653 | /* | ||
1654 | * First, set psp if we're dealing with a fixed-size frame; | ||
1655 | * subsequent instructions may depend on this value. | ||
1656 | */ | ||
1657 | if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when | ||
1658 | && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE) | ||
1659 | && sr.curr.reg[UNW_REG_PSP].val != 0) { | ||
1660 | /* new psp is sp plus frame size */ | ||
1661 | insn.opc = UNW_INSN_ADD; | ||
1662 | insn.dst = offsetof(struct unw_frame_info, psp)/8; | ||
1663 | insn.val = sr.curr.reg[UNW_REG_PSP].val; /* frame size */ | ||
1664 | script_emit(script, insn); | ||
1665 | } | ||
1666 | |||
1667 | /* determine where the primary UNaT is: */ | ||
1668 | if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) | ||
1669 | i = UNW_REG_PRI_UNAT_MEM; | ||
1670 | else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when) | ||
1671 | i = UNW_REG_PRI_UNAT_GR; | ||
1672 | else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) | ||
1673 | i = UNW_REG_PRI_UNAT_MEM; | ||
1674 | else | ||
1675 | i = UNW_REG_PRI_UNAT_GR; | ||
1676 | |||
1677 | compile_reg(&sr, i, script); | ||
1678 | |||
1679 | for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i) | ||
1680 | compile_reg(&sr, i, script); | ||
1681 | |||
1682 | /* free labeled register states & stack: */ | ||
1683 | |||
1684 | STAT(parse_start = ia64_get_itc()); | ||
1685 | for (ls = sr.labeled_states; ls; ls = next) { | ||
1686 | next = ls->next; | ||
1687 | free_state_stack(&ls->saved_state); | ||
1688 | free_labeled_state(ls); | ||
1689 | } | ||
1690 | free_state_stack(&sr.curr); | ||
1691 | STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); | ||
1692 | |||
1693 | script_finalize(script, &sr); | ||
1694 | STAT(unw.stat.script.build_time += ia64_get_itc() - start); | ||
1695 | return script; | ||
1696 | } | ||
1697 | |||
1698 | /* | ||
1699 | * Apply the unwinding actions represented by OPS and update SR to | ||
1700 | * reflect the state that existed upon entry to the function that this | ||
1701 | * unwinder represents. | ||
1702 | */ | ||
1703 | static inline void | ||
1704 | run_script (struct unw_script *script, struct unw_frame_info *state) | ||
1705 | { | ||
1706 | struct unw_insn *ip, *limit, next_insn; | ||
1707 | unsigned long opc, dst, val, off; | ||
1708 | unsigned long *s = (unsigned long *) state; | ||
1709 | STAT(unsigned long start;) | ||
1710 | |||
1711 | STAT(++unw.stat.script.runs; start = ia64_get_itc()); | ||
1712 | state->flags = script->flags; | ||
1713 | ip = script->insn; | ||
1714 | limit = script->insn + script->count; | ||
1715 | next_insn = *ip; | ||
1716 | |||
1717 | while (ip++ < limit) { | ||
1718 | opc = next_insn.opc; | ||
1719 | dst = next_insn.dst; | ||
1720 | val = next_insn.val; | ||
1721 | next_insn = *ip; | ||
1722 | |||
1723 | redo: | ||
1724 | switch (opc) { | ||
1725 | case UNW_INSN_ADD: | ||
1726 | s[dst] += val; | ||
1727 | break; | ||
1728 | |||
1729 | case UNW_INSN_MOVE2: | ||
1730 | if (!s[val]) | ||
1731 | goto lazy_init; | ||
1732 | s[dst+1] = s[val+1]; | ||
1733 | s[dst] = s[val]; | ||
1734 | break; | ||
1735 | |||
1736 | case UNW_INSN_MOVE: | ||
1737 | if (!s[val]) | ||
1738 | goto lazy_init; | ||
1739 | s[dst] = s[val]; | ||
1740 | break; | ||
1741 | |||
1742 | case UNW_INSN_MOVE_SCRATCH: | ||
1743 | if (state->pt) { | ||
1744 | s[dst] = (unsigned long) get_scratch_regs(state) + val; | ||
1745 | } else { | ||
1746 | s[dst] = 0; | ||
1747 | UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n", | ||
1748 | __FUNCTION__, dst, val); | ||
1749 | } | ||
1750 | break; | ||
1751 | |||
1752 | case UNW_INSN_MOVE_CONST: | ||
1753 | if (val == 0) | ||
1754 | s[dst] = (unsigned long) &unw.r0; | ||
1755 | else { | ||
1756 | s[dst] = 0; | ||
1757 | UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n", | ||
1758 | __FUNCTION__, val); | ||
1759 | } | ||
1760 | break; | ||
1761 | |||
1762 | |||
1763 | case UNW_INSN_MOVE_STACKED: | ||
1764 | s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned long *)state->bsp, | ||
1765 | val); | ||
1766 | break; | ||
1767 | |||
1768 | case UNW_INSN_ADD_PSP: | ||
1769 | s[dst] = state->psp + val; | ||
1770 | break; | ||
1771 | |||
1772 | case UNW_INSN_ADD_SP: | ||
1773 | s[dst] = state->sp + val; | ||
1774 | break; | ||
1775 | |||
1776 | case UNW_INSN_SETNAT_MEMSTK: | ||
1777 | if (!state->pri_unat_loc) | ||
1778 | state->pri_unat_loc = &state->sw->ar_unat; | ||
1779 | /* register off. is a multiple of 8, so the least 3 bits (type) are 0 */ | ||
1780 | s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK; | ||
1781 | break; | ||
1782 | |||
1783 | case UNW_INSN_SETNAT_TYPE: | ||
1784 | s[dst+1] = val; | ||
1785 | break; | ||
1786 | |||
1787 | case UNW_INSN_LOAD: | ||
1788 | #ifdef UNW_DEBUG | ||
1789 | if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) != 0 | ||
1790 | || s[val] < TASK_SIZE) | ||
1791 | { | ||
1792 | UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n", | ||
1793 | __FUNCTION__, s[val]); | ||
1794 | break; | ||
1795 | } | ||
1796 | #endif | ||
1797 | s[dst] = *(unsigned long *) s[val]; | ||
1798 | break; | ||
1799 | } | ||
1800 | } | ||
1801 | STAT(unw.stat.script.run_time += ia64_get_itc() - start); | ||
1802 | return; | ||
1803 | |||
1804 | lazy_init: | ||
1805 | off = unw.sw_off[val]; | ||
1806 | s[val] = (unsigned long) state->sw + off; | ||
1807 | if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct switch_stack, r7)) | ||
1808 | /* | ||
1809 | * We're initializing a general register: init NaT info, too. Note that | ||
1810 | * the offset is a multiple of 8 which gives us the 3 bits needed for | ||
1811 | * the type field. | ||
1812 | */ | ||
1813 | s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | UNW_NAT_MEMSTK; | ||
1814 | goto redo; | ||
1815 | } | ||
1816 | |||
1817 | static int | ||
1818 | find_save_locs (struct unw_frame_info *info) | ||
1819 | { | ||
1820 | int have_write_lock = 0; | ||
1821 | struct unw_script *scr; | ||
1822 | unsigned long flags = 0; | ||
1823 | |||
1824 | if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) { | ||
1825 | /* don't let obviously bad addresses pollute the cache */ | ||
1826 | /* FIXME: should really be level 0 but it occurs too often. KAO */ | ||
1827 | UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __FUNCTION__, info->ip); | ||
1828 | info->rp_loc = NULL; | ||
1829 | return -1; | ||
1830 | } | ||
1831 | |||
1832 | scr = script_lookup(info); | ||
1833 | if (!scr) { | ||
1834 | spin_lock_irqsave(&unw.lock, flags); | ||
1835 | scr = build_script(info); | ||
1836 | if (!scr) { | ||
1837 | spin_unlock_irqrestore(&unw.lock, flags); | ||
1838 | UNW_DPRINT(0, | ||
1839 | "unwind.%s: failed to locate/build unwind script for ip %lx\n", | ||
1840 | __FUNCTION__, info->ip); | ||
1841 | return -1; | ||
1842 | } | ||
1843 | have_write_lock = 1; | ||
1844 | } | ||
1845 | info->hint = scr->hint; | ||
1846 | info->prev_script = scr - unw.cache; | ||
1847 | |||
1848 | run_script(scr, info); | ||
1849 | |||
1850 | if (have_write_lock) { | ||
1851 | write_unlock(&scr->lock); | ||
1852 | spin_unlock_irqrestore(&unw.lock, flags); | ||
1853 | } else | ||
1854 | read_unlock(&scr->lock); | ||
1855 | return 0; | ||
1856 | } | ||
1857 | |||
1858 | int | ||
1859 | unw_unwind (struct unw_frame_info *info) | ||
1860 | { | ||
1861 | unsigned long prev_ip, prev_sp, prev_bsp; | ||
1862 | unsigned long ip, pr, num_regs; | ||
1863 | STAT(unsigned long start, flags;) | ||
1864 | int retval; | ||
1865 | |||
1866 | STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = ia64_get_itc()); | ||
1867 | |||
1868 | prev_ip = info->ip; | ||
1869 | prev_sp = info->sp; | ||
1870 | prev_bsp = info->bsp; | ||
1871 | |||
1872 | /* restore the ip */ | ||
1873 | if (!info->rp_loc) { | ||
1874 | /* FIXME: should really be level 0 but it occurs too often. KAO */ | ||
1875 | UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n", | ||
1876 | __FUNCTION__, info->ip); | ||
1877 | STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); | ||
1878 | return -1; | ||
1879 | } | ||
1880 | ip = info->ip = *info->rp_loc; | ||
1881 | if (ip < GATE_ADDR) { | ||
1882 | UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __FUNCTION__, ip); | ||
1883 | STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); | ||
1884 | return -1; | ||
1885 | } | ||
1886 | |||
1887 | /* restore the cfm: */ | ||
1888 | if (!info->pfs_loc) { | ||
1889 | UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __FUNCTION__); | ||
1890 | STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); | ||
1891 | return -1; | ||
1892 | } | ||
1893 | info->cfm_loc = info->pfs_loc; | ||
1894 | |||
1895 | /* restore the bsp: */ | ||
1896 | pr = info->pr; | ||
1897 | num_regs = 0; | ||
1898 | if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) { | ||
1899 | info->pt = info->sp + 16; | ||
1900 | if ((pr & (1UL << PRED_NON_SYSCALL)) != 0) | ||
1901 | num_regs = *info->cfm_loc & 0x7f; /* size of frame */ | ||
1902 | info->pfs_loc = | ||
1903 | (unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs)); | ||
1904 | UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __FUNCTION__, info->pt); | ||
1905 | } else | ||
1906 | num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of locals */ | ||
1907 | info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs); | ||
1908 | if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) { | ||
1909 | UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n", | ||
1910 | __FUNCTION__, info->bsp, info->regstk.limit, info->regstk.top); | ||
1911 | STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); | ||
1912 | return -1; | ||
1913 | } | ||
1914 | |||
1915 | /* restore the sp: */ | ||
1916 | info->sp = info->psp; | ||
1917 | if (info->sp < info->memstk.top || info->sp > info->memstk.limit) { | ||
1918 | UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n", | ||
1919 | __FUNCTION__, info->sp, info->memstk.top, info->memstk.limit); | ||
1920 | STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); | ||
1921 | return -1; | ||
1922 | } | ||
1923 | |||
1924 | if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) { | ||
1925 | UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n", | ||
1926 | __FUNCTION__, ip); | ||
1927 | STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); | ||
1928 | return -1; | ||
1929 | } | ||
1930 | |||
1931 | /* as we unwind, the saved ar.unat becomes the primary unat: */ | ||
1932 | info->pri_unat_loc = info->unat_loc; | ||
1933 | |||
1934 | /* finally, restore the predicates: */ | ||
1935 | unw_get_pr(info, &info->pr); | ||
1936 | |||
1937 | retval = find_save_locs(info); | ||
1938 | STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); | ||
1939 | return retval; | ||
1940 | } | ||
1941 | EXPORT_SYMBOL(unw_unwind); | ||
1942 | |||
1943 | int | ||
1944 | unw_unwind_to_user (struct unw_frame_info *info) | ||
1945 | { | ||
1946 | unsigned long ip, sp; | ||
1947 | |||
1948 | while (unw_unwind(info) >= 0) { | ||
1949 | if (unw_get_rp(info, &ip) < 0) { | ||
1950 | unw_get_ip(info, &ip); | ||
1951 | UNW_DPRINT(0, "unwind.%s: failed to read return pointer (ip=0x%lx)\n", | ||
1952 | __FUNCTION__, ip); | ||
1953 | return -1; | ||
1954 | } | ||
1955 | unw_get_sp(info, &sp); | ||
1956 | if (sp >= (unsigned long)info->task + IA64_STK_OFFSET) | ||
1957 | break; | ||
1958 | if (ip < FIXADDR_USER_END) | ||
1959 | return 0; | ||
1960 | } | ||
1961 | unw_get_ip(info, &ip); | ||
1962 | UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", __FUNCTION__, ip); | ||
1963 | return -1; | ||
1964 | } | ||
1965 | EXPORT_SYMBOL(unw_unwind_to_user); | ||
1966 | |||
1967 | static void | ||
1968 | init_frame_info (struct unw_frame_info *info, struct task_struct *t, | ||
1969 | struct switch_stack *sw, unsigned long stktop) | ||
1970 | { | ||
1971 | unsigned long rbslimit, rbstop, stklimit; | ||
1972 | STAT(unsigned long start, flags;) | ||
1973 | |||
1974 | STAT(local_irq_save(flags); ++unw.stat.api.inits; start = ia64_get_itc()); | ||
1975 | |||
1976 | /* | ||
1977 | * Subtle stuff here: we _could_ unwind through the switch_stack frame but we | ||
1978 | * don't want to do that because it would be slow as each preserved register would | ||
1979 | * have to be processed. Instead, what we do here is zero out the frame info and | ||
1980 | * start the unwind process at the function that created the switch_stack frame. | ||
1981 | * When a preserved value in switch_stack needs to be accessed, run_script() will | ||
1982 | * initialize the appropriate pointer on demand. | ||
1983 | */ | ||
1984 | memset(info, 0, sizeof(*info)); | ||
1985 | |||
1986 | rbslimit = (unsigned long) t + IA64_RBS_OFFSET; | ||
1987 | rbstop = sw->ar_bspstore; | ||
1988 | if (rbstop - (unsigned long) t >= IA64_STK_OFFSET) | ||
1989 | rbstop = rbslimit; | ||
1990 | |||
1991 | stklimit = (unsigned long) t + IA64_STK_OFFSET; | ||
1992 | if (stktop <= rbstop) | ||
1993 | stktop = rbstop; | ||
1994 | |||
1995 | info->regstk.limit = rbslimit; | ||
1996 | info->regstk.top = rbstop; | ||
1997 | info->memstk.limit = stklimit; | ||
1998 | info->memstk.top = stktop; | ||
1999 | info->task = t; | ||
2000 | info->sw = sw; | ||
2001 | info->sp = info->psp = stktop; | ||
2002 | info->pr = sw->pr; | ||
2003 | UNW_DPRINT(3, "unwind.%s:\n" | ||
2004 | " task 0x%lx\n" | ||
2005 | " rbs = [0x%lx-0x%lx)\n" | ||
2006 | " stk = [0x%lx-0x%lx)\n" | ||
2007 | " pr 0x%lx\n" | ||
2008 | " sw 0x%lx\n" | ||
2009 | " sp 0x%lx\n", | ||
2010 | __FUNCTION__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit, | ||
2011 | info->pr, (unsigned long) info->sw, info->sp); | ||
2012 | STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags)); | ||
2013 | } | ||
2014 | |||
2015 | void | ||
2016 | unw_init_from_interruption (struct unw_frame_info *info, struct task_struct *t, | ||
2017 | struct pt_regs *pt, struct switch_stack *sw) | ||
2018 | { | ||
2019 | unsigned long sof; | ||
2020 | |||
2021 | init_frame_info(info, t, sw, pt->r12); | ||
2022 | info->cfm_loc = &pt->cr_ifs; | ||
2023 | info->unat_loc = &pt->ar_unat; | ||
2024 | info->pfs_loc = &pt->ar_pfs; | ||
2025 | sof = *info->cfm_loc & 0x7f; | ||
2026 | info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sof); | ||
2027 | info->ip = pt->cr_iip + ia64_psr(pt)->ri; | ||
2028 | info->pt = (unsigned long) pt; | ||
2029 | UNW_DPRINT(3, "unwind.%s:\n" | ||
2030 | " bsp 0x%lx\n" | ||
2031 | " sof 0x%lx\n" | ||
2032 | " ip 0x%lx\n", | ||
2033 | __FUNCTION__, info->bsp, sof, info->ip); | ||
2034 | find_save_locs(info); | ||
2035 | } | ||
2036 | |||
2037 | void | ||
2038 | unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw) | ||
2039 | { | ||
2040 | unsigned long sol; | ||
2041 | |||
2042 | init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16); | ||
2043 | info->cfm_loc = &sw->ar_pfs; | ||
2044 | sol = (*info->cfm_loc >> 7) & 0x7f; | ||
2045 | info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sol); | ||
2046 | info->ip = sw->b0; | ||
2047 | UNW_DPRINT(3, "unwind.%s:\n" | ||
2048 | " bsp 0x%lx\n" | ||
2049 | " sol 0x%lx\n" | ||
2050 | " ip 0x%lx\n", | ||
2051 | __FUNCTION__, info->bsp, sol, info->ip); | ||
2052 | find_save_locs(info); | ||
2053 | } | ||
2054 | |||
2055 | EXPORT_SYMBOL(unw_init_frame_info); | ||
2056 | |||
2057 | void | ||
2058 | unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t) | ||
2059 | { | ||
2060 | struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16); | ||
2061 | |||
2062 | UNW_DPRINT(1, "unwind.%s\n", __FUNCTION__); | ||
2063 | unw_init_frame_info(info, t, sw); | ||
2064 | } | ||
2065 | EXPORT_SYMBOL(unw_init_from_blocked_task); | ||
2066 | |||
2067 | static void | ||
2068 | init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base, | ||
2069 | unsigned long gp, const void *table_start, const void *table_end) | ||
2070 | { | ||
2071 | const struct unw_table_entry *start = table_start, *end = table_end; | ||
2072 | |||
2073 | table->name = name; | ||
2074 | table->segment_base = segment_base; | ||
2075 | table->gp = gp; | ||
2076 | table->start = segment_base + start[0].start_offset; | ||
2077 | table->end = segment_base + end[-1].end_offset; | ||
2078 | table->array = start; | ||
2079 | table->length = end - start; | ||
2080 | } | ||
2081 | |||
2082 | void * | ||
2083 | unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, | ||
2084 | const void *table_start, const void *table_end) | ||
2085 | { | ||
2086 | const struct unw_table_entry *start = table_start, *end = table_end; | ||
2087 | struct unw_table *table; | ||
2088 | unsigned long flags; | ||
2089 | |||
2090 | if (end - start <= 0) { | ||
2091 | UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n", | ||
2092 | __FUNCTION__); | ||
2093 | return NULL; | ||
2094 | } | ||
2095 | |||
2096 | table = kmalloc(sizeof(*table), GFP_USER); | ||
2097 | if (!table) | ||
2098 | return NULL; | ||
2099 | |||
2100 | init_unwind_table(table, name, segment_base, gp, table_start, table_end); | ||
2101 | |||
2102 | spin_lock_irqsave(&unw.lock, flags); | ||
2103 | { | ||
2104 | /* keep kernel unwind table at the front (it's searched most commonly): */ | ||
2105 | table->next = unw.tables->next; | ||
2106 | unw.tables->next = table; | ||
2107 | } | ||
2108 | spin_unlock_irqrestore(&unw.lock, flags); | ||
2109 | |||
2110 | return table; | ||
2111 | } | ||
2112 | |||
2113 | void | ||
2114 | unw_remove_unwind_table (void *handle) | ||
2115 | { | ||
2116 | struct unw_table *table, *prev; | ||
2117 | struct unw_script *tmp; | ||
2118 | unsigned long flags; | ||
2119 | long index; | ||
2120 | |||
2121 | if (!handle) { | ||
2122 | UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n", | ||
2123 | __FUNCTION__); | ||
2124 | return; | ||
2125 | } | ||
2126 | |||
2127 | table = handle; | ||
2128 | if (table == &unw.kernel_table) { | ||
2129 | UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a " | ||
2130 | "no-can-do!\n", __FUNCTION__); | ||
2131 | return; | ||
2132 | } | ||
2133 | |||
2134 | spin_lock_irqsave(&unw.lock, flags); | ||
2135 | { | ||
2136 | /* first, delete the table: */ | ||
2137 | |||
2138 | for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next) | ||
2139 | if (prev->next == table) | ||
2140 | break; | ||
2141 | if (!prev) { | ||
2142 | UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n", | ||
2143 | __FUNCTION__, (void *) table); | ||
2144 | spin_unlock_irqrestore(&unw.lock, flags); | ||
2145 | return; | ||
2146 | } | ||
2147 | prev->next = table->next; | ||
2148 | } | ||
2149 | spin_unlock_irqrestore(&unw.lock, flags); | ||
2150 | |||
2151 | /* next, remove hash table entries for this table */ | ||
2152 | |||
2153 | for (index = 0; index <= UNW_HASH_SIZE; ++index) { | ||
2154 | tmp = unw.cache + unw.hash[index]; | ||
2155 | if (unw.hash[index] >= UNW_CACHE_SIZE | ||
2156 | || tmp->ip < table->start || tmp->ip >= table->end) | ||
2157 | continue; | ||
2158 | |||
2159 | write_lock(&tmp->lock); | ||
2160 | { | ||
2161 | if (tmp->ip >= table->start && tmp->ip < table->end) { | ||
2162 | unw.hash[index] = tmp->coll_chain; | ||
2163 | tmp->ip = 0; | ||
2164 | } | ||
2165 | } | ||
2166 | write_unlock(&tmp->lock); | ||
2167 | } | ||
2168 | |||
2169 | kfree(table); | ||
2170 | } | ||
2171 | |||
2172 | static int __init | ||
2173 | create_gate_table (void) | ||
2174 | { | ||
2175 | const struct unw_table_entry *entry, *start, *end; | ||
2176 | unsigned long *lp, segbase = GATE_ADDR; | ||
2177 | size_t info_size, size; | ||
2178 | char *info; | ||
2179 | Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); | ||
2180 | int i; | ||
2181 | |||
2182 | for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr) | ||
2183 | if (phdr->p_type == PT_IA_64_UNWIND) { | ||
2184 | punw = phdr; | ||
2185 | break; | ||
2186 | } | ||
2187 | |||
2188 | if (!punw) { | ||
2189 | printk("%s: failed to find gate DSO's unwind table!\n", __FUNCTION__); | ||
2190 | return 0; | ||
2191 | } | ||
2192 | |||
2193 | start = (const struct unw_table_entry *) punw->p_vaddr; | ||
2194 | end = (struct unw_table_entry *) ((char *) start + punw->p_memsz); | ||
2195 | size = 0; | ||
2196 | |||
2197 | unw_add_unwind_table("linux-gate.so", segbase, 0, start, end); | ||
2198 | |||
2199 | for (entry = start; entry < end; ++entry) | ||
2200 | size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); | ||
2201 | size += 8; /* reserve space for "end of table" marker */ | ||
2202 | |||
2203 | unw.gate_table = kmalloc(size, GFP_KERNEL); | ||
2204 | if (!unw.gate_table) { | ||
2205 | unw.gate_table_size = 0; | ||
2206 | printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __FUNCTION__); | ||
2207 | return 0; | ||
2208 | } | ||
2209 | unw.gate_table_size = size; | ||
2210 | |||
2211 | lp = unw.gate_table; | ||
2212 | info = (char *) unw.gate_table + size; | ||
2213 | |||
2214 | for (entry = start; entry < end; ++entry, lp += 3) { | ||
2215 | info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); | ||
2216 | info -= info_size; | ||
2217 | memcpy(info, (char *) segbase + entry->info_offset, info_size); | ||
2218 | |||
2219 | lp[0] = segbase + entry->start_offset; /* start */ | ||
2220 | lp[1] = segbase + entry->end_offset; /* end */ | ||
2221 | lp[2] = info - (char *) unw.gate_table; /* info */ | ||
2222 | } | ||
2223 | *lp = 0; /* end-of-table marker */ | ||
2224 | return 0; | ||
2225 | } | ||
2226 | |||
2227 | __initcall(create_gate_table); | ||
2228 | |||
2229 | void __init | ||
2230 | unw_init (void) | ||
2231 | { | ||
2232 | extern char __gp[]; | ||
2233 | extern void unw_hash_index_t_is_too_narrow (void); | ||
2234 | long i, off; | ||
2235 | |||
2236 | if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE) | ||
2237 | unw_hash_index_t_is_too_narrow(); | ||
2238 | |||
2239 | unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(AR_UNAT); | ||
2240 | unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE); | ||
2241 | unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_UNAT); | ||
2242 | unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0); | ||
2243 | unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(AR_UNAT); | ||
2244 | unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR); | ||
2245 | unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC); | ||
2246 | unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR); | ||
2247 | for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8) | ||
2248 | unw.sw_off[unw.preg_index[i]] = off; | ||
2249 | for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8) | ||
2250 | unw.sw_off[unw.preg_index[i]] = off; | ||
2251 | for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16) | ||
2252 | unw.sw_off[unw.preg_index[i]] = off; | ||
2253 | for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16) | ||
2254 | unw.sw_off[unw.preg_index[i]] = off; | ||
2255 | |||
2256 | for (i = 0; i < UNW_CACHE_SIZE; ++i) { | ||
2257 | if (i > 0) | ||
2258 | unw.cache[i].lru_chain = (i - 1); | ||
2259 | unw.cache[i].coll_chain = -1; | ||
2260 | rwlock_init(&unw.cache[i].lock); | ||
2261 | } | ||
2262 | unw.lru_head = UNW_CACHE_SIZE - 1; | ||
2263 | unw.lru_tail = 0; | ||
2264 | |||
2265 | init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) __gp, | ||
2266 | __start_unwind, __end_unwind); | ||
2267 | } | ||
2268 | |||
2269 | /* | ||
2270 | * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED | ||
2271 | * | ||
2272 | * This system call has been deprecated. The new and improved way to get | ||
2273 | * at the kernel's unwind info is via the gate DSO. The address of the | ||
2274 | * ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR. | ||
2275 | * | ||
2276 | * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED | ||
2277 | * | ||
2278 | * This system call copies the unwind data into the buffer pointed to by BUF and returns | ||
2279 | * the size of the unwind data. If BUF_SIZE is smaller than the size of the unwind data | ||
2280 | * or if BUF is NULL, nothing is copied, but the system call still returns the size of the | ||
2281 | * unwind data. | ||
2282 | * | ||
2283 | * The first portion of the unwind data contains an unwind table and rest contains the | ||
2284 | * associated unwind info (in no particular order). The unwind table consists of a table | ||
2285 | * of entries of the form: | ||
2286 | * | ||
2287 | * u64 start; (64-bit address of start of function) | ||
2288 | * u64 end; (64-bit address of start of function) | ||
2289 | * u64 info; (BUF-relative offset to unwind info) | ||
2290 | * | ||
2291 | * The end of the unwind table is indicated by an entry with a START address of zero. | ||
2292 | * | ||
2293 | * Please see the IA-64 Software Conventions and Runtime Architecture manual for details | ||
2294 | * on the format of the unwind info. | ||
2295 | * | ||
2296 | * ERRORS | ||
2297 | * EFAULT BUF points outside your accessible address space. | ||
2298 | */ | ||
2299 | asmlinkage long | ||
2300 | sys_getunwind (void __user *buf, size_t buf_size) | ||
2301 | { | ||
2302 | if (buf && buf_size >= unw.gate_table_size) | ||
2303 | if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0) | ||
2304 | return -EFAULT; | ||
2305 | return unw.gate_table_size; | ||
2306 | } | ||
diff --git a/arch/ia64/kernel/unwind_decoder.c b/arch/ia64/kernel/unwind_decoder.c new file mode 100644 index 000000000000..50ac2d82f9bf --- /dev/null +++ b/arch/ia64/kernel/unwind_decoder.c | |||
@@ -0,0 +1,459 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2000 Hewlett-Packard Co | ||
3 | * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com> | ||
4 | * | ||
5 | * Generic IA-64 unwind info decoder. | ||
6 | * | ||
7 | * This file is used both by the Linux kernel and objdump. Please keep | ||
8 | * the two copies of this file in sync. | ||
9 | * | ||
10 | * You need to customize the decoder by defining the following | ||
11 | * macros/constants before including this file: | ||
12 | * | ||
13 | * Types: | ||
14 | * unw_word Unsigned integer type with at least 64 bits | ||
15 | * | ||
16 | * Register names: | ||
17 | * UNW_REG_BSP | ||
18 | * UNW_REG_BSPSTORE | ||
19 | * UNW_REG_FPSR | ||
20 | * UNW_REG_LC | ||
21 | * UNW_REG_PFS | ||
22 | * UNW_REG_PR | ||
23 | * UNW_REG_RNAT | ||
24 | * UNW_REG_PSP | ||
25 | * UNW_REG_RP | ||
26 | * UNW_REG_UNAT | ||
27 | * | ||
28 | * Decoder action macros: | ||
29 | * UNW_DEC_BAD_CODE(code) | ||
30 | * UNW_DEC_ABI(fmt,abi,context,arg) | ||
31 | * UNW_DEC_BR_GR(fmt,brmask,gr,arg) | ||
32 | * UNW_DEC_BR_MEM(fmt,brmask,arg) | ||
33 | * UNW_DEC_COPY_STATE(fmt,label,arg) | ||
34 | * UNW_DEC_EPILOGUE(fmt,t,ecount,arg) | ||
35 | * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg) | ||
36 | * UNW_DEC_FR_MEM(fmt,frmask,arg) | ||
37 | * UNW_DEC_GR_GR(fmt,grmask,gr,arg) | ||
38 | * UNW_DEC_GR_MEM(fmt,grmask,arg) | ||
39 | * UNW_DEC_LABEL_STATE(fmt,label,arg) | ||
40 | * UNW_DEC_MEM_STACK_F(fmt,t,size,arg) | ||
41 | * UNW_DEC_MEM_STACK_V(fmt,t,arg) | ||
42 | * UNW_DEC_PRIUNAT_GR(fmt,r,arg) | ||
43 | * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) | ||
44 | * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) | ||
45 | * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg) | ||
46 | * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg) | ||
47 | * UNW_DEC_PROLOGUE(fmt,body,rlen,arg) | ||
48 | * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg) | ||
49 | * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg) | ||
50 | * UNW_DEC_REG_REG(fmt,src,dst,arg) | ||
51 | * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg) | ||
52 | * UNW_DEC_REG_WHEN(fmt,reg,t,arg) | ||
53 | * UNW_DEC_RESTORE(fmt,t,abreg,arg) | ||
54 | * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg) | ||
55 | * UNW_DEC_SPILL_BASE(fmt,pspoff,arg) | ||
56 | * UNW_DEC_SPILL_MASK(fmt,imaskp,arg) | ||
57 | * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg) | ||
58 | * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg) | ||
59 | * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg) | ||
60 | * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg) | ||
61 | * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg) | ||
62 | * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg) | ||
63 | */ | ||
64 | |||
65 | static unw_word | ||
66 | unw_decode_uleb128 (unsigned char **dpp) | ||
67 | { | ||
68 | unsigned shift = 0; | ||
69 | unw_word byte, result = 0; | ||
70 | unsigned char *bp = *dpp; | ||
71 | |||
72 | while (1) | ||
73 | { | ||
74 | byte = *bp++; | ||
75 | result |= (byte & 0x7f) << shift; | ||
76 | if ((byte & 0x80) == 0) | ||
77 | break; | ||
78 | shift += 7; | ||
79 | } | ||
80 | *dpp = bp; | ||
81 | return result; | ||
82 | } | ||
83 | |||
84 | static unsigned char * | ||
85 | unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg) | ||
86 | { | ||
87 | unsigned char byte1, abreg; | ||
88 | unw_word t, off; | ||
89 | |||
90 | byte1 = *dp++; | ||
91 | t = unw_decode_uleb128 (&dp); | ||
92 | off = unw_decode_uleb128 (&dp); | ||
93 | abreg = (byte1 & 0x7f); | ||
94 | if (byte1 & 0x80) | ||
95 | UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg); | ||
96 | else | ||
97 | UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg); | ||
98 | return dp; | ||
99 | } | ||
100 | |||
101 | static unsigned char * | ||
102 | unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg) | ||
103 | { | ||
104 | unsigned char byte1, byte2, abreg, x, ytreg; | ||
105 | unw_word t; | ||
106 | |||
107 | byte1 = *dp++; byte2 = *dp++; | ||
108 | t = unw_decode_uleb128 (&dp); | ||
109 | abreg = (byte1 & 0x7f); | ||
110 | ytreg = byte2; | ||
111 | x = (byte1 >> 7) & 1; | ||
112 | if ((byte1 & 0x80) == 0 && ytreg == 0) | ||
113 | UNW_DEC_RESTORE(X2, t, abreg, arg); | ||
114 | else | ||
115 | UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg); | ||
116 | return dp; | ||
117 | } | ||
118 | |||
119 | static unsigned char * | ||
120 | unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg) | ||
121 | { | ||
122 | unsigned char byte1, byte2, abreg, qp; | ||
123 | unw_word t, off; | ||
124 | |||
125 | byte1 = *dp++; byte2 = *dp++; | ||
126 | t = unw_decode_uleb128 (&dp); | ||
127 | off = unw_decode_uleb128 (&dp); | ||
128 | |||
129 | qp = (byte1 & 0x3f); | ||
130 | abreg = (byte2 & 0x7f); | ||
131 | |||
132 | if (byte1 & 0x80) | ||
133 | UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg); | ||
134 | else | ||
135 | UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg); | ||
136 | return dp; | ||
137 | } | ||
138 | |||
139 | static unsigned char * | ||
140 | unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg) | ||
141 | { | ||
142 | unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg; | ||
143 | unw_word t; | ||
144 | |||
145 | byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; | ||
146 | t = unw_decode_uleb128 (&dp); | ||
147 | |||
148 | qp = (byte1 & 0x3f); | ||
149 | abreg = (byte2 & 0x7f); | ||
150 | x = (byte2 >> 7) & 1; | ||
151 | ytreg = byte3; | ||
152 | |||
153 | if ((byte2 & 0x80) == 0 && byte3 == 0) | ||
154 | UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg); | ||
155 | else | ||
156 | UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg); | ||
157 | return dp; | ||
158 | } | ||
159 | |||
160 | static unsigned char * | ||
161 | unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg) | ||
162 | { | ||
163 | int body = (code & 0x20) != 0; | ||
164 | unw_word rlen; | ||
165 | |||
166 | rlen = (code & 0x1f); | ||
167 | UNW_DEC_PROLOGUE(R1, body, rlen, arg); | ||
168 | return dp; | ||
169 | } | ||
170 | |||
171 | static unsigned char * | ||
172 | unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg) | ||
173 | { | ||
174 | unsigned char byte1, mask, grsave; | ||
175 | unw_word rlen; | ||
176 | |||
177 | byte1 = *dp++; | ||
178 | |||
179 | mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); | ||
180 | grsave = (byte1 & 0x7f); | ||
181 | rlen = unw_decode_uleb128 (&dp); | ||
182 | UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg); | ||
183 | return dp; | ||
184 | } | ||
185 | |||
186 | static unsigned char * | ||
187 | unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg) | ||
188 | { | ||
189 | unw_word rlen; | ||
190 | |||
191 | rlen = unw_decode_uleb128 (&dp); | ||
192 | UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg); | ||
193 | return dp; | ||
194 | } | ||
195 | |||
196 | static unsigned char * | ||
197 | unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg) | ||
198 | { | ||
199 | unsigned char brmask = (code & 0x1f); | ||
200 | |||
201 | UNW_DEC_BR_MEM(P1, brmask, arg); | ||
202 | return dp; | ||
203 | } | ||
204 | |||
205 | static unsigned char * | ||
206 | unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg) | ||
207 | { | ||
208 | if ((code & 0x10) == 0) | ||
209 | { | ||
210 | unsigned char byte1 = *dp++; | ||
211 | |||
212 | UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1), | ||
213 | (byte1 & 0x7f), arg); | ||
214 | } | ||
215 | else if ((code & 0x08) == 0) | ||
216 | { | ||
217 | unsigned char byte1 = *dp++, r, dst; | ||
218 | |||
219 | r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); | ||
220 | dst = (byte1 & 0x7f); | ||
221 | switch (r) | ||
222 | { | ||
223 | case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break; | ||
224 | case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break; | ||
225 | case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break; | ||
226 | case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break; | ||
227 | case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break; | ||
228 | case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break; | ||
229 | case 6: UNW_DEC_RP_BR(P3, dst, arg); break; | ||
230 | case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break; | ||
231 | case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break; | ||
232 | case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break; | ||
233 | case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break; | ||
234 | case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break; | ||
235 | default: UNW_DEC_BAD_CODE(r); break; | ||
236 | } | ||
237 | } | ||
238 | else if ((code & 0x7) == 0) | ||
239 | UNW_DEC_SPILL_MASK(P4, dp, arg); | ||
240 | else if ((code & 0x7) == 1) | ||
241 | { | ||
242 | unw_word grmask, frmask, byte1, byte2, byte3; | ||
243 | |||
244 | byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; | ||
245 | grmask = ((byte1 >> 4) & 0xf); | ||
246 | frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3; | ||
247 | UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg); | ||
248 | } | ||
249 | else | ||
250 | UNW_DEC_BAD_CODE(code); | ||
251 | return dp; | ||
252 | } | ||
253 | |||
254 | static unsigned char * | ||
255 | unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg) | ||
256 | { | ||
257 | int gregs = (code & 0x10) != 0; | ||
258 | unsigned char mask = (code & 0x0f); | ||
259 | |||
260 | if (gregs) | ||
261 | UNW_DEC_GR_MEM(P6, mask, arg); | ||
262 | else | ||
263 | UNW_DEC_FR_MEM(P6, mask, arg); | ||
264 | return dp; | ||
265 | } | ||
266 | |||
267 | static unsigned char * | ||
268 | unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg) | ||
269 | { | ||
270 | unsigned char r, byte1, byte2; | ||
271 | unw_word t, size; | ||
272 | |||
273 | if ((code & 0x10) == 0) | ||
274 | { | ||
275 | r = (code & 0xf); | ||
276 | t = unw_decode_uleb128 (&dp); | ||
277 | switch (r) | ||
278 | { | ||
279 | case 0: | ||
280 | size = unw_decode_uleb128 (&dp); | ||
281 | UNW_DEC_MEM_STACK_F(P7, t, size, arg); | ||
282 | break; | ||
283 | |||
284 | case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break; | ||
285 | case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break; | ||
286 | case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break; | ||
287 | case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break; | ||
288 | case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break; | ||
289 | case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break; | ||
290 | case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break; | ||
291 | case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break; | ||
292 | case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break; | ||
293 | case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break; | ||
294 | case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break; | ||
295 | case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break; | ||
296 | case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break; | ||
297 | case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break; | ||
298 | case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break; | ||
299 | default: UNW_DEC_BAD_CODE(r); break; | ||
300 | } | ||
301 | } | ||
302 | else | ||
303 | { | ||
304 | switch (code & 0xf) | ||
305 | { | ||
306 | case 0x0: /* p8 */ | ||
307 | { | ||
308 | r = *dp++; | ||
309 | t = unw_decode_uleb128 (&dp); | ||
310 | switch (r) | ||
311 | { | ||
312 | case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break; | ||
313 | case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break; | ||
314 | case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break; | ||
315 | case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break; | ||
316 | case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break; | ||
317 | case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break; | ||
318 | case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break; | ||
319 | case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break; | ||
320 | case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break; | ||
321 | case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break; | ||
322 | case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break; | ||
323 | case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break; | ||
324 | case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break; | ||
325 | case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break; | ||
326 | case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break; | ||
327 | case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break; | ||
328 | case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break; | ||
329 | case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break; | ||
330 | case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break; | ||
331 | default: UNW_DEC_BAD_CODE(r); break; | ||
332 | } | ||
333 | } | ||
334 | break; | ||
335 | |||
336 | case 0x1: | ||
337 | byte1 = *dp++; byte2 = *dp++; | ||
338 | UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg); | ||
339 | break; | ||
340 | |||
341 | case 0xf: /* p10 */ | ||
342 | byte1 = *dp++; byte2 = *dp++; | ||
343 | UNW_DEC_ABI(P10, byte1, byte2, arg); | ||
344 | break; | ||
345 | |||
346 | case 0x9: | ||
347 | return unw_decode_x1 (dp, code, arg); | ||
348 | |||
349 | case 0xa: | ||
350 | return unw_decode_x2 (dp, code, arg); | ||
351 | |||
352 | case 0xb: | ||
353 | return unw_decode_x3 (dp, code, arg); | ||
354 | |||
355 | case 0xc: | ||
356 | return unw_decode_x4 (dp, code, arg); | ||
357 | |||
358 | default: | ||
359 | UNW_DEC_BAD_CODE(code); | ||
360 | break; | ||
361 | } | ||
362 | } | ||
363 | return dp; | ||
364 | } | ||
365 | |||
366 | static unsigned char * | ||
367 | unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg) | ||
368 | { | ||
369 | unw_word label = (code & 0x1f); | ||
370 | |||
371 | if ((code & 0x20) != 0) | ||
372 | UNW_DEC_COPY_STATE(B1, label, arg); | ||
373 | else | ||
374 | UNW_DEC_LABEL_STATE(B1, label, arg); | ||
375 | return dp; | ||
376 | } | ||
377 | |||
378 | static unsigned char * | ||
379 | unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg) | ||
380 | { | ||
381 | unw_word t; | ||
382 | |||
383 | t = unw_decode_uleb128 (&dp); | ||
384 | UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg); | ||
385 | return dp; | ||
386 | } | ||
387 | |||
388 | static unsigned char * | ||
389 | unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg) | ||
390 | { | ||
391 | unw_word t, ecount, label; | ||
392 | |||
393 | if ((code & 0x10) == 0) | ||
394 | { | ||
395 | t = unw_decode_uleb128 (&dp); | ||
396 | ecount = unw_decode_uleb128 (&dp); | ||
397 | UNW_DEC_EPILOGUE(B3, t, ecount, arg); | ||
398 | } | ||
399 | else if ((code & 0x07) == 0) | ||
400 | { | ||
401 | label = unw_decode_uleb128 (&dp); | ||
402 | if ((code & 0x08) != 0) | ||
403 | UNW_DEC_COPY_STATE(B4, label, arg); | ||
404 | else | ||
405 | UNW_DEC_LABEL_STATE(B4, label, arg); | ||
406 | } | ||
407 | else | ||
408 | switch (code & 0x7) | ||
409 | { | ||
410 | case 1: return unw_decode_x1 (dp, code, arg); | ||
411 | case 2: return unw_decode_x2 (dp, code, arg); | ||
412 | case 3: return unw_decode_x3 (dp, code, arg); | ||
413 | case 4: return unw_decode_x4 (dp, code, arg); | ||
414 | default: UNW_DEC_BAD_CODE(code); break; | ||
415 | } | ||
416 | return dp; | ||
417 | } | ||
418 | |||
419 | typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *); | ||
420 | |||
421 | static unw_decoder unw_decode_table[2][8] = | ||
422 | { | ||
423 | /* prologue table: */ | ||
424 | { | ||
425 | unw_decode_r1, /* 0 */ | ||
426 | unw_decode_r1, | ||
427 | unw_decode_r2, | ||
428 | unw_decode_r3, | ||
429 | unw_decode_p1, /* 4 */ | ||
430 | unw_decode_p2_p5, | ||
431 | unw_decode_p6, | ||
432 | unw_decode_p7_p10 | ||
433 | }, | ||
434 | { | ||
435 | unw_decode_r1, /* 0 */ | ||
436 | unw_decode_r1, | ||
437 | unw_decode_r2, | ||
438 | unw_decode_r3, | ||
439 | unw_decode_b1, /* 4 */ | ||
440 | unw_decode_b1, | ||
441 | unw_decode_b2, | ||
442 | unw_decode_b3_x4 | ||
443 | } | ||
444 | }; | ||
445 | |||
446 | /* | ||
447 | * Decode one descriptor and return address of next descriptor. | ||
448 | */ | ||
449 | static inline unsigned char * | ||
450 | unw_decode (unsigned char *dp, int inside_body, void *arg) | ||
451 | { | ||
452 | unw_decoder decoder; | ||
453 | unsigned char code; | ||
454 | |||
455 | code = *dp++; | ||
456 | decoder = unw_decode_table[inside_body][code >> 5]; | ||
457 | dp = (*decoder) (dp, code, arg); | ||
458 | return dp; | ||
459 | } | ||
diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h new file mode 100644 index 000000000000..96693a6ae370 --- /dev/null +++ b/arch/ia64/kernel/unwind_i.h | |||
@@ -0,0 +1,164 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co | ||
3 | * David Mosberger-Tang <davidm@hpl.hp.com> | ||
4 | * | ||
5 | * Kernel unwind support. | ||
6 | */ | ||
7 | |||
8 | #define UNW_VER(x) ((x) >> 48) | ||
9 | #define UNW_FLAG_MASK 0x0000ffff00000000 | ||
10 | #define UNW_FLAG_OSMASK 0x0000f00000000000 | ||
11 | #define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L) | ||
12 | #define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L) | ||
13 | #define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL) | ||
14 | |||
15 | enum unw_register_index { | ||
16 | /* primary unat: */ | ||
17 | UNW_REG_PRI_UNAT_GR, | ||
18 | UNW_REG_PRI_UNAT_MEM, | ||
19 | |||
20 | /* register stack */ | ||
21 | UNW_REG_BSP, /* register stack pointer */ | ||
22 | UNW_REG_BSPSTORE, | ||
23 | UNW_REG_PFS, /* previous function state */ | ||
24 | UNW_REG_RNAT, | ||
25 | /* memory stack */ | ||
26 | UNW_REG_PSP, /* previous memory stack pointer */ | ||
27 | /* return pointer: */ | ||
28 | UNW_REG_RP, | ||
29 | |||
30 | /* preserved registers: */ | ||
31 | UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7, | ||
32 | UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR, | ||
33 | UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5, | ||
34 | UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5, | ||
35 | UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19, | ||
36 | UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23, | ||
37 | UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27, | ||
38 | UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31, | ||
39 | UNW_NUM_REGS | ||
40 | }; | ||
41 | |||
42 | struct unw_info_block { | ||
43 | u64 header; | ||
44 | u64 desc[0]; /* unwind descriptors */ | ||
45 | /* personality routine and language-specific data follow behind descriptors */ | ||
46 | }; | ||
47 | |||
48 | struct unw_table { | ||
49 | struct unw_table *next; /* must be first member! */ | ||
50 | const char *name; | ||
51 | unsigned long gp; /* global pointer for this load-module */ | ||
52 | unsigned long segment_base; /* base for offsets in the unwind table entries */ | ||
53 | unsigned long start; | ||
54 | unsigned long end; | ||
55 | const struct unw_table_entry *array; | ||
56 | unsigned long length; | ||
57 | }; | ||
58 | |||
59 | enum unw_where { | ||
60 | UNW_WHERE_NONE, /* register isn't saved at all */ | ||
61 | UNW_WHERE_GR, /* register is saved in a general register */ | ||
62 | UNW_WHERE_FR, /* register is saved in a floating-point register */ | ||
63 | UNW_WHERE_BR, /* register is saved in a branch register */ | ||
64 | UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */ | ||
65 | UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */ | ||
66 | /* | ||
67 | * At the end of each prologue these locations get resolved to | ||
68 | * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively: | ||
69 | */ | ||
70 | UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */ | ||
71 | UNW_WHERE_GR_SAVE /* register is saved in next general register */ | ||
72 | }; | ||
73 | |||
74 | #define UNW_WHEN_NEVER 0x7fffffff | ||
75 | |||
76 | struct unw_reg_info { | ||
77 | unsigned long val; /* save location: register number or offset */ | ||
78 | enum unw_where where; /* where the register gets saved */ | ||
79 | int when; /* when the register gets saved */ | ||
80 | }; | ||
81 | |||
82 | struct unw_reg_state { | ||
83 | struct unw_reg_state *next; /* next (outer) element on state stack */ | ||
84 | struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */ | ||
85 | }; | ||
86 | |||
87 | struct unw_labeled_state { | ||
88 | struct unw_labeled_state *next; /* next labeled state (or NULL) */ | ||
89 | unsigned long label; /* label for this state */ | ||
90 | struct unw_reg_state saved_state; | ||
91 | }; | ||
92 | |||
93 | struct unw_state_record { | ||
94 | unsigned int first_region : 1; /* is this the first region? */ | ||
95 | unsigned int done : 1; /* are we done scanning descriptors? */ | ||
96 | unsigned int any_spills : 1; /* got any register spills? */ | ||
97 | unsigned int in_body : 1; /* are we inside a body (as opposed to a prologue)? */ | ||
98 | unsigned long flags; /* see UNW_FLAG_* in unwind.h */ | ||
99 | |||
100 | u8 *imask; /* imask of spill_mask record or NULL */ | ||
101 | unsigned long pr_val; /* predicate values */ | ||
102 | unsigned long pr_mask; /* predicate mask */ | ||
103 | long spill_offset; /* psp-relative offset for spill base */ | ||
104 | int region_start; | ||
105 | int region_len; | ||
106 | int epilogue_start; | ||
107 | int epilogue_count; | ||
108 | int when_target; | ||
109 | |||
110 | u8 gr_save_loc; /* next general register to use for saving a register */ | ||
111 | u8 return_link_reg; /* branch register in which the return link is passed */ | ||
112 | |||
113 | struct unw_labeled_state *labeled_states; /* list of all labeled states */ | ||
114 | struct unw_reg_state curr; /* current state */ | ||
115 | }; | ||
116 | |||
117 | enum unw_nat_type { | ||
118 | UNW_NAT_NONE, /* NaT not represented */ | ||
119 | UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */ | ||
120 | UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */ | ||
121 | UNW_NAT_REGSTK /* NaT is in rnat */ | ||
122 | }; | ||
123 | |||
124 | enum unw_insn_opcode { | ||
125 | UNW_INSN_ADD, /* s[dst] += val */ | ||
126 | UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */ | ||
127 | UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */ | ||
128 | UNW_INSN_MOVE, /* s[dst] = s[val] */ | ||
129 | UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1] */ | ||
130 | UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val) */ | ||
131 | UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK; | ||
132 | s[dst+1].nat.off = *s.pri_unat - s[dst] */ | ||
133 | UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */ | ||
134 | UNW_INSN_LOAD, /* s[dst] = *s[val] */ | ||
135 | UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */ | ||
136 | UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */ | ||
137 | }; | ||
138 | |||
139 | struct unw_insn { | ||
140 | unsigned int opc : 4; | ||
141 | unsigned int dst : 9; | ||
142 | signed int val : 19; | ||
143 | }; | ||
144 | |||
145 | /* | ||
146 | * Preserved general static registers (r4-r7) give rise to two script | ||
147 | * instructions; everything else yields at most one instruction; at | ||
148 | * the end of the script, the psp gets popped, accounting for one more | ||
149 | * instruction. | ||
150 | */ | ||
151 | #define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5) | ||
152 | |||
153 | struct unw_script { | ||
154 | unsigned long ip; /* ip this script is for */ | ||
155 | unsigned long pr_mask; /* mask of predicates script depends on */ | ||
156 | unsigned long pr_val; /* predicate values this script is for */ | ||
157 | rwlock_t lock; | ||
158 | unsigned int flags; /* see UNW_FLAG_* in unwind.h */ | ||
159 | unsigned short lru_chain; /* used for least-recently-used chain */ | ||
160 | unsigned short coll_chain; /* used for hash collisions */ | ||
161 | unsigned short hint; /* hint for next script to try (or -1) */ | ||
162 | unsigned short count; /* number of instructions in script */ | ||
163 | struct unw_insn insn[UNW_MAX_SCRIPT_LEN]; | ||
164 | }; | ||
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..b9f0db4c1b04 --- /dev/null +++ b/arch/ia64/kernel/vmlinux.lds.S | |||
@@ -0,0 +1,251 @@ | |||
1 | #include <linux/config.h> | ||
2 | |||
3 | #include <asm/cache.h> | ||
4 | #include <asm/ptrace.h> | ||
5 | #include <asm/system.h> | ||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | #define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) | ||
9 | #include <asm-generic/vmlinux.lds.h> | ||
10 | |||
11 | OUTPUT_FORMAT("elf64-ia64-little") | ||
12 | OUTPUT_ARCH(ia64) | ||
13 | ENTRY(phys_start) | ||
14 | jiffies = jiffies_64; | ||
15 | PHDRS { | ||
16 | code PT_LOAD; | ||
17 | percpu PT_LOAD; | ||
18 | data PT_LOAD; | ||
19 | } | ||
20 | SECTIONS | ||
21 | { | ||
22 | /* Sections to be discarded */ | ||
23 | /DISCARD/ : { | ||
24 | *(.exit.text) | ||
25 | *(.exit.data) | ||
26 | *(.exitcall.exit) | ||
27 | *(.IA_64.unwind.exit.text) | ||
28 | *(.IA_64.unwind_info.exit.text) | ||
29 | } | ||
30 | |||
31 | v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ | ||
32 | phys_start = _start - LOAD_OFFSET; | ||
33 | |||
34 | code : { } :code | ||
35 | . = KERNEL_START; | ||
36 | |||
37 | _text = .; | ||
38 | _stext = .; | ||
39 | |||
40 | .text : AT(ADDR(.text) - LOAD_OFFSET) | ||
41 | { | ||
42 | *(.text.ivt) | ||
43 | *(.text) | ||
44 | SCHED_TEXT | ||
45 | LOCK_TEXT | ||
46 | *(.gnu.linkonce.t*) | ||
47 | } | ||
48 | .text2 : AT(ADDR(.text2) - LOAD_OFFSET) | ||
49 | { *(.text2) } | ||
50 | #ifdef CONFIG_SMP | ||
51 | .text.lock : AT(ADDR(.text.lock) - LOAD_OFFSET) | ||
52 | { *(.text.lock) } | ||
53 | #endif | ||
54 | _etext = .; | ||
55 | |||
56 | /* Read-only data */ | ||
57 | |||
58 | /* Exception table */ | ||
59 | . = ALIGN(16); | ||
60 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) | ||
61 | { | ||
62 | __start___ex_table = .; | ||
63 | *(__ex_table) | ||
64 | __stop___ex_table = .; | ||
65 | } | ||
66 | |||
67 | .data.patch.vtop : AT(ADDR(.data.patch.vtop) - LOAD_OFFSET) | ||
68 | { | ||
69 | __start___vtop_patchlist = .; | ||
70 | *(.data.patch.vtop) | ||
71 | __end___vtop_patchlist = .; | ||
72 | } | ||
73 | |||
74 | .data.patch.mckinley_e9 : AT(ADDR(.data.patch.mckinley_e9) - LOAD_OFFSET) | ||
75 | { | ||
76 | __start___mckinley_e9_bundles = .; | ||
77 | *(.data.patch.mckinley_e9) | ||
78 | __end___mckinley_e9_bundles = .; | ||
79 | } | ||
80 | |||
81 | /* Global data */ | ||
82 | _data = .; | ||
83 | |||
84 | #if defined(CONFIG_IA64_GENERIC) | ||
85 | /* Machine Vector */ | ||
86 | . = ALIGN(16); | ||
87 | .machvec : AT(ADDR(.machvec) - LOAD_OFFSET) | ||
88 | { | ||
89 | machvec_start = .; | ||
90 | *(.machvec) | ||
91 | machvec_end = .; | ||
92 | } | ||
93 | #endif | ||
94 | |||
95 | /* Unwind info & table: */ | ||
96 | . = ALIGN(8); | ||
97 | .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) | ||
98 | { *(.IA_64.unwind_info*) } | ||
99 | .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) | ||
100 | { | ||
101 | __start_unwind = .; | ||
102 | *(.IA_64.unwind*) | ||
103 | __end_unwind = .; | ||
104 | } | ||
105 | |||
106 | RODATA | ||
107 | |||
108 | .opd : AT(ADDR(.opd) - LOAD_OFFSET) | ||
109 | { *(.opd) } | ||
110 | |||
111 | /* Initialization code and data: */ | ||
112 | |||
113 | . = ALIGN(PAGE_SIZE); | ||
114 | __init_begin = .; | ||
115 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) | ||
116 | { | ||
117 | _sinittext = .; | ||
118 | *(.init.text) | ||
119 | _einittext = .; | ||
120 | } | ||
121 | |||
122 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) | ||
123 | { *(.init.data) } | ||
124 | |||
125 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) | ||
126 | { | ||
127 | __initramfs_start = .; | ||
128 | *(.init.ramfs) | ||
129 | __initramfs_end = .; | ||
130 | } | ||
131 | |||
132 | . = ALIGN(16); | ||
133 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) | ||
134 | { | ||
135 | __setup_start = .; | ||
136 | *(.init.setup) | ||
137 | __setup_end = .; | ||
138 | } | ||
139 | .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) | ||
140 | { | ||
141 | __initcall_start = .; | ||
142 | *(.initcall1.init) | ||
143 | *(.initcall2.init) | ||
144 | *(.initcall3.init) | ||
145 | *(.initcall4.init) | ||
146 | *(.initcall5.init) | ||
147 | *(.initcall6.init) | ||
148 | *(.initcall7.init) | ||
149 | __initcall_end = .; | ||
150 | } | ||
151 | __con_initcall_start = .; | ||
152 | .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) | ||
153 | { *(.con_initcall.init) } | ||
154 | __con_initcall_end = .; | ||
155 | __security_initcall_start = .; | ||
156 | .security_initcall.init : AT(ADDR(.security_initcall.init) - LOAD_OFFSET) | ||
157 | { *(.security_initcall.init) } | ||
158 | __security_initcall_end = .; | ||
159 | . = ALIGN(PAGE_SIZE); | ||
160 | __init_end = .; | ||
161 | |||
162 | /* The initial task and kernel stack */ | ||
163 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) | ||
164 | { *(.data.init_task) } | ||
165 | |||
166 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) | ||
167 | { *(__special_page_section) | ||
168 | __start_gate_section = .; | ||
169 | *(.data.gate) | ||
170 | __stop_gate_section = .; | ||
171 | } | ||
172 | . = ALIGN(PAGE_SIZE); /* make sure the gate page doesn't expose kernel data */ | ||
173 | |||
174 | .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) | ||
175 | { *(.data.cacheline_aligned) } | ||
176 | |||
177 | /* Per-cpu data: */ | ||
178 | percpu : { } :percpu | ||
179 | . = ALIGN(PERCPU_PAGE_SIZE); | ||
180 | __phys_per_cpu_start = .; | ||
181 | .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) | ||
182 | { | ||
183 | __per_cpu_start = .; | ||
184 | *(.data.percpu) | ||
185 | __per_cpu_end = .; | ||
186 | } | ||
187 | . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; /* ensure percpu data fits into percpu page size */ | ||
188 | |||
189 | data : { } :data | ||
190 | .data : AT(ADDR(.data) - LOAD_OFFSET) | ||
191 | { *(.data) *(.data1) *(.gnu.linkonce.d*) CONSTRUCTORS } | ||
192 | |||
193 | . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ | ||
194 | .got : AT(ADDR(.got) - LOAD_OFFSET) | ||
195 | { *(.got.plt) *(.got) } | ||
196 | __gp = ADDR(.got) + 0x200000; | ||
197 | /* We want the small data sections together, so single-instruction offsets | ||
198 | can access them all, and initialized data all before uninitialized, so | ||
199 | we can shorten the on-disk segment size. */ | ||
200 | .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) | ||
201 | { *(.sdata) *(.sdata1) *(.srdata) } | ||
202 | _edata = .; | ||
203 | _bss = .; | ||
204 | .sbss : AT(ADDR(.sbss) - LOAD_OFFSET) | ||
205 | { *(.sbss) *(.scommon) } | ||
206 | .bss : AT(ADDR(.bss) - LOAD_OFFSET) | ||
207 | { *(.bss) *(COMMON) } | ||
208 | |||
209 | _end = .; | ||
210 | |||
211 | code : { } :code | ||
212 | /* Stabs debugging sections. */ | ||
213 | .stab 0 : { *(.stab) } | ||
214 | .stabstr 0 : { *(.stabstr) } | ||
215 | .stab.excl 0 : { *(.stab.excl) } | ||
216 | .stab.exclstr 0 : { *(.stab.exclstr) } | ||
217 | .stab.index 0 : { *(.stab.index) } | ||
218 | .stab.indexstr 0 : { *(.stab.indexstr) } | ||
219 | /* DWARF debug sections. | ||
220 | Symbols in the DWARF debugging sections are relative to the beginning | ||
221 | of the section so we begin them at 0. */ | ||
222 | /* DWARF 1 */ | ||
223 | .debug 0 : { *(.debug) } | ||
224 | .line 0 : { *(.line) } | ||
225 | /* GNU DWARF 1 extensions */ | ||
226 | .debug_srcinfo 0 : { *(.debug_srcinfo) } | ||
227 | .debug_sfnames 0 : { *(.debug_sfnames) } | ||
228 | /* DWARF 1.1 and DWARF 2 */ | ||
229 | .debug_aranges 0 : { *(.debug_aranges) } | ||
230 | .debug_pubnames 0 : { *(.debug_pubnames) } | ||
231 | /* DWARF 2 */ | ||
232 | .debug_info 0 : { *(.debug_info) } | ||
233 | .debug_abbrev 0 : { *(.debug_abbrev) } | ||
234 | .debug_line 0 : { *(.debug_line) } | ||
235 | .debug_frame 0 : { *(.debug_frame) } | ||
236 | .debug_str 0 : { *(.debug_str) } | ||
237 | .debug_loc 0 : { *(.debug_loc) } | ||
238 | .debug_macinfo 0 : { *(.debug_macinfo) } | ||
239 | /* SGI/MIPS DWARF 2 extensions */ | ||
240 | .debug_weaknames 0 : { *(.debug_weaknames) } | ||
241 | .debug_funcnames 0 : { *(.debug_funcnames) } | ||
242 | .debug_typenames 0 : { *(.debug_typenames) } | ||
243 | .debug_varnames 0 : { *(.debug_varnames) } | ||
244 | /* These must appear regardless of . */ | ||
245 | /* Discard them for now since Intel SoftSDV cannot handle them. | ||
246 | .comment 0 : { *(.comment) } | ||
247 | .note 0 : { *(.note) } | ||
248 | */ | ||
249 | /DISCARD/ : { *(.comment) } | ||
250 | /DISCARD/ : { *(.note) } | ||
251 | } | ||