diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2010-10-23 05:23:37 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2010-10-27 08:30:02 -0400 |
commit | 329b84e42e3ee348b114fd0bfe4b2421e6139257 (patch) | |
tree | 95fefceaf95025c4f06b0bce21bb49c4b34b3f6e /arch/x86/platform | |
parent | 9694d4afc1ebe1e46cacfb78b107cd8f9fb550ba (diff) |
x86: Move uv to platform
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Mike Travis <travis@sgi.com>
Diffstat (limited to 'arch/x86/platform')
-rw-r--r-- | arch/x86/platform/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/platform/uv/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/platform/uv/bios_uv.c | 215 | ||||
-rw-r--r-- | arch/x86/platform/uv/tlb_uv.c | 1661 | ||||
-rw-r--r-- | arch/x86/platform/uv/uv_irq.c | 285 | ||||
-rw-r--r-- | arch/x86/platform/uv/uv_sysfs.c | 76 | ||||
-rw-r--r-- | arch/x86/platform/uv/uv_time.c | 423 |
7 files changed, 2662 insertions, 0 deletions
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 06761ed53747..8519b01f1acb 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile | |||
@@ -4,3 +4,4 @@ obj-y += mrst/ | |||
4 | obj-y += scx200/ | 4 | obj-y += scx200/ |
5 | obj-y += sfi/ | 5 | obj-y += sfi/ |
6 | obj-y += visws/ | 6 | obj-y += visws/ |
7 | obj-y += uv/ | ||
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile new file mode 100644 index 000000000000..6c40995fefb8 --- /dev/null +++ b/arch/x86/platform/uv/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o | |||
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c new file mode 100644 index 000000000000..8bc57baaa9ad --- /dev/null +++ b/arch/x86/platform/uv/bios_uv.c | |||
@@ -0,0 +1,215 @@ | |||
1 | /* | ||
2 | * BIOS run time interface routines. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | * | ||
18 | * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. | ||
19 | * Copyright (c) Russ Anderson <rja@sgi.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/efi.h> | ||
23 | #include <asm/efi.h> | ||
24 | #include <linux/io.h> | ||
25 | #include <asm/uv/bios.h> | ||
26 | #include <asm/uv/uv_hub.h> | ||
27 | |||
28 | static struct uv_systab uv_systab; | ||
29 | |||
30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | ||
31 | { | ||
32 | struct uv_systab *tab = &uv_systab; | ||
33 | s64 ret; | ||
34 | |||
35 | if (!tab->function) | ||
36 | /* | ||
37 | * BIOS does not support UV systab | ||
38 | */ | ||
39 | return BIOS_STATUS_UNIMPLEMENTED; | ||
40 | |||
41 | ret = efi_call6((void *)__va(tab->function), (u64)which, | ||
42 | a1, a2, a3, a4, a5); | ||
43 | return ret; | ||
44 | } | ||
45 | EXPORT_SYMBOL_GPL(uv_bios_call); | ||
46 | |||
47 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | ||
48 | u64 a4, u64 a5) | ||
49 | { | ||
50 | unsigned long bios_flags; | ||
51 | s64 ret; | ||
52 | |||
53 | local_irq_save(bios_flags); | ||
54 | ret = uv_bios_call(which, a1, a2, a3, a4, a5); | ||
55 | local_irq_restore(bios_flags); | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | |||
60 | s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | ||
61 | u64 a4, u64 a5) | ||
62 | { | ||
63 | s64 ret; | ||
64 | |||
65 | preempt_disable(); | ||
66 | ret = uv_bios_call(which, a1, a2, a3, a4, a5); | ||
67 | preempt_enable(); | ||
68 | |||
69 | return ret; | ||
70 | } | ||
71 | |||
72 | |||
73 | long sn_partition_id; | ||
74 | EXPORT_SYMBOL_GPL(sn_partition_id); | ||
75 | long sn_coherency_id; | ||
76 | EXPORT_SYMBOL_GPL(sn_coherency_id); | ||
77 | long sn_region_size; | ||
78 | EXPORT_SYMBOL_GPL(sn_region_size); | ||
79 | long system_serial_number; | ||
80 | EXPORT_SYMBOL_GPL(system_serial_number); | ||
81 | int uv_type; | ||
82 | EXPORT_SYMBOL_GPL(uv_type); | ||
83 | |||
84 | |||
85 | s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | ||
86 | long *region, long *ssn) | ||
87 | { | ||
88 | s64 ret; | ||
89 | u64 v0, v1; | ||
90 | union partition_info_u part; | ||
91 | |||
92 | ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, | ||
93 | (u64)(&v0), (u64)(&v1), 0, 0); | ||
94 | if (ret != BIOS_STATUS_SUCCESS) | ||
95 | return ret; | ||
96 | |||
97 | part.val = v0; | ||
98 | if (uvtype) | ||
99 | *uvtype = part.hub_version; | ||
100 | if (partid) | ||
101 | *partid = part.partition_id; | ||
102 | if (coher) | ||
103 | *coher = part.coherence_id; | ||
104 | if (region) | ||
105 | *region = part.region_size; | ||
106 | if (ssn) | ||
107 | *ssn = v1; | ||
108 | return ret; | ||
109 | } | ||
110 | EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); | ||
111 | |||
112 | int | ||
113 | uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, | ||
114 | unsigned long *intr_mmr_offset) | ||
115 | { | ||
116 | u64 watchlist; | ||
117 | s64 ret; | ||
118 | |||
119 | /* | ||
120 | * bios returns watchlist number or negative error number. | ||
121 | */ | ||
122 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, | ||
123 | mq_size, (u64)intr_mmr_offset, | ||
124 | (u64)&watchlist, 0); | ||
125 | if (ret < BIOS_STATUS_SUCCESS) | ||
126 | return ret; | ||
127 | |||
128 | return watchlist; | ||
129 | } | ||
130 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); | ||
131 | |||
132 | int | ||
133 | uv_bios_mq_watchlist_free(int blade, int watchlist_num) | ||
134 | { | ||
135 | return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, | ||
136 | blade, watchlist_num, 0, 0, 0); | ||
137 | } | ||
138 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); | ||
139 | |||
140 | s64 | ||
141 | uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) | ||
142 | { | ||
143 | return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, | ||
144 | perms, 0, 0); | ||
145 | } | ||
146 | EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); | ||
147 | |||
148 | s64 | ||
149 | uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) | ||
150 | { | ||
151 | s64 ret; | ||
152 | |||
153 | ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, | ||
154 | (u64)addr, buf, (u64)len, 0); | ||
155 | return ret; | ||
156 | } | ||
157 | EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); | ||
158 | |||
159 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | ||
160 | { | ||
161 | return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, | ||
162 | (u64)ticks_per_second, 0, 0, 0); | ||
163 | } | ||
164 | EXPORT_SYMBOL_GPL(uv_bios_freq_base); | ||
165 | |||
166 | /* | ||
167 | * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target | ||
168 | * @decode: true to enable target, false to disable target | ||
169 | * @domain: PCI domain number | ||
170 | * @bus: PCI bus number | ||
171 | * | ||
172 | * Returns: | ||
173 | * 0: Success | ||
174 | * -EINVAL: Invalid domain or bus number | ||
175 | * -ENOSYS: Capability not available | ||
176 | * -EBUSY: Legacy VGA I/O cannot be retargeted at this time | ||
177 | */ | ||
178 | int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) | ||
179 | { | ||
180 | return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, | ||
181 | (u64)decode, (u64)domain, (u64)bus, 0, 0); | ||
182 | } | ||
183 | EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); | ||
184 | |||
185 | |||
186 | #ifdef CONFIG_EFI | ||
187 | void uv_bios_init(void) | ||
188 | { | ||
189 | struct uv_systab *tab; | ||
190 | |||
191 | if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || | ||
192 | (efi.uv_systab == (unsigned long)NULL)) { | ||
193 | printk(KERN_CRIT "No EFI UV System Table.\n"); | ||
194 | uv_systab.function = (unsigned long)NULL; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | tab = (struct uv_systab *)ioremap(efi.uv_systab, | ||
199 | sizeof(struct uv_systab)); | ||
200 | if (strncmp(tab->signature, "UVST", 4) != 0) | ||
201 | printk(KERN_ERR "bad signature in UV system table!"); | ||
202 | |||
203 | /* | ||
204 | * Copy table to permanent spot for later use. | ||
205 | */ | ||
206 | memcpy(&uv_systab, tab, sizeof(struct uv_systab)); | ||
207 | iounmap(tab); | ||
208 | |||
209 | printk(KERN_INFO "EFI UV System Table Revision %d\n", | ||
210 | uv_systab.revision); | ||
211 | } | ||
212 | #else /* !CONFIG_EFI */ | ||
213 | |||
214 | void uv_bios_init(void) { } | ||
215 | #endif | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c new file mode 100644 index 000000000000..20ea20a39e2a --- /dev/null +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -0,0 +1,1661 @@ | |||
1 | /* | ||
2 | * SGI UltraViolet TLB flush routines. | ||
3 | * | ||
4 | * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. | ||
5 | * | ||
6 | * This code is released under the GNU General Public License version 2 or | ||
7 | * later. | ||
8 | */ | ||
9 | #include <linux/seq_file.h> | ||
10 | #include <linux/proc_fs.h> | ||
11 | #include <linux/debugfs.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/slab.h> | ||
14 | |||
15 | #include <asm/mmu_context.h> | ||
16 | #include <asm/uv/uv.h> | ||
17 | #include <asm/uv/uv_mmrs.h> | ||
18 | #include <asm/uv/uv_hub.h> | ||
19 | #include <asm/uv/uv_bau.h> | ||
20 | #include <asm/apic.h> | ||
21 | #include <asm/idle.h> | ||
22 | #include <asm/tsc.h> | ||
23 | #include <asm/irq_vectors.h> | ||
24 | #include <asm/timer.h> | ||
25 | |||
26 | /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ | ||
27 | static int timeout_base_ns[] = { | ||
28 | 20, | ||
29 | 160, | ||
30 | 1280, | ||
31 | 10240, | ||
32 | 81920, | ||
33 | 655360, | ||
34 | 5242880, | ||
35 | 167772160 | ||
36 | }; | ||
37 | static int timeout_us; | ||
38 | static int nobau; | ||
39 | static int baudisabled; | ||
40 | static spinlock_t disable_lock; | ||
41 | static cycles_t congested_cycles; | ||
42 | |||
43 | /* tunables: */ | ||
44 | static int max_bau_concurrent = MAX_BAU_CONCURRENT; | ||
45 | static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; | ||
46 | static int plugged_delay = PLUGGED_DELAY; | ||
47 | static int plugsb4reset = PLUGSB4RESET; | ||
48 | static int timeoutsb4reset = TIMEOUTSB4RESET; | ||
49 | static int ipi_reset_limit = IPI_RESET_LIMIT; | ||
50 | static int complete_threshold = COMPLETE_THRESHOLD; | ||
51 | static int congested_response_us = CONGESTED_RESPONSE_US; | ||
52 | static int congested_reps = CONGESTED_REPS; | ||
53 | static int congested_period = CONGESTED_PERIOD; | ||
54 | static struct dentry *tunables_dir; | ||
55 | static struct dentry *tunables_file; | ||
56 | |||
57 | static int __init setup_nobau(char *arg) | ||
58 | { | ||
59 | nobau = 1; | ||
60 | return 0; | ||
61 | } | ||
62 | early_param("nobau", setup_nobau); | ||
63 | |||
64 | /* base pnode in this partition */ | ||
65 | static int uv_partition_base_pnode __read_mostly; | ||
66 | /* position of pnode (which is nasid>>1): */ | ||
67 | static int uv_nshift __read_mostly; | ||
68 | static unsigned long uv_mmask __read_mostly; | ||
69 | |||
70 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | ||
71 | static DEFINE_PER_CPU(struct bau_control, bau_control); | ||
72 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
73 | |||
74 | /* | ||
75 | * Determine the first node on a uvhub. 'Nodes' are used for kernel | ||
76 | * memory allocation. | ||
77 | */ | ||
78 | static int __init uvhub_to_first_node(int uvhub) | ||
79 | { | ||
80 | int node, b; | ||
81 | |||
82 | for_each_online_node(node) { | ||
83 | b = uv_node_to_blade_id(node); | ||
84 | if (uvhub == b) | ||
85 | return node; | ||
86 | } | ||
87 | return -1; | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * Determine the apicid of the first cpu on a uvhub. | ||
92 | */ | ||
93 | static int __init uvhub_to_first_apicid(int uvhub) | ||
94 | { | ||
95 | int cpu; | ||
96 | |||
97 | for_each_present_cpu(cpu) | ||
98 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
99 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
100 | return -1; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Free a software acknowledge hardware resource by clearing its Pending | ||
105 | * bit. This will return a reply to the sender. | ||
106 | * If the message has timed out, a reply has already been sent by the | ||
107 | * hardware but the resource has not been released. In that case our | ||
108 | * clear of the Timeout bit (as well) will free the resource. No reply will | ||
109 | * be sent (the hardware will only do one reply per message). | ||
110 | */ | ||
111 | static inline void uv_reply_to_message(struct msg_desc *mdp, | ||
112 | struct bau_control *bcp) | ||
113 | { | ||
114 | unsigned long dw; | ||
115 | struct bau_payload_queue_entry *msg; | ||
116 | |||
117 | msg = mdp->msg; | ||
118 | if (!msg->canceled) { | ||
119 | dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | | ||
120 | msg->sw_ack_vector; | ||
121 | uv_write_local_mmr( | ||
122 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | ||
123 | } | ||
124 | msg->replied_to = 1; | ||
125 | msg->sw_ack_vector = 0; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Process the receipt of a RETRY message | ||
130 | */ | ||
131 | static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, | ||
132 | struct bau_control *bcp) | ||
133 | { | ||
134 | int i; | ||
135 | int cancel_count = 0; | ||
136 | int slot2; | ||
137 | unsigned long msg_res; | ||
138 | unsigned long mmr = 0; | ||
139 | struct bau_payload_queue_entry *msg; | ||
140 | struct bau_payload_queue_entry *msg2; | ||
141 | struct ptc_stats *stat; | ||
142 | |||
143 | msg = mdp->msg; | ||
144 | stat = bcp->statp; | ||
145 | stat->d_retries++; | ||
146 | /* | ||
147 | * cancel any message from msg+1 to the retry itself | ||
148 | */ | ||
149 | for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { | ||
150 | if (msg2 > mdp->va_queue_last) | ||
151 | msg2 = mdp->va_queue_first; | ||
152 | if (msg2 == msg) | ||
153 | break; | ||
154 | |||
155 | /* same conditions for cancellation as uv_do_reset */ | ||
156 | if ((msg2->replied_to == 0) && (msg2->canceled == 0) && | ||
157 | (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & | ||
158 | msg->sw_ack_vector) == 0) && | ||
159 | (msg2->sending_cpu == msg->sending_cpu) && | ||
160 | (msg2->msg_type != MSG_NOOP)) { | ||
161 | slot2 = msg2 - mdp->va_queue_first; | ||
162 | mmr = uv_read_local_mmr | ||
163 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
164 | msg_res = msg2->sw_ack_vector; | ||
165 | /* | ||
166 | * This is a message retry; clear the resources held | ||
167 | * by the previous message only if they timed out. | ||
168 | * If it has not timed out we have an unexpected | ||
169 | * situation to report. | ||
170 | */ | ||
171 | if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { | ||
172 | /* | ||
173 | * is the resource timed out? | ||
174 | * make everyone ignore the cancelled message. | ||
175 | */ | ||
176 | msg2->canceled = 1; | ||
177 | stat->d_canceled++; | ||
178 | cancel_count++; | ||
179 | uv_write_local_mmr( | ||
180 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
181 | (msg_res << UV_SW_ACK_NPENDING) | | ||
182 | msg_res); | ||
183 | } | ||
184 | } | ||
185 | } | ||
186 | if (!cancel_count) | ||
187 | stat->d_nocanceled++; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Do all the things a cpu should do for a TLB shootdown message. | ||
192 | * Other cpu's may come here at the same time for this message. | ||
193 | */ | ||
194 | static void uv_bau_process_message(struct msg_desc *mdp, | ||
195 | struct bau_control *bcp) | ||
196 | { | ||
197 | int msg_ack_count; | ||
198 | short socket_ack_count = 0; | ||
199 | struct ptc_stats *stat; | ||
200 | struct bau_payload_queue_entry *msg; | ||
201 | struct bau_control *smaster = bcp->socket_master; | ||
202 | |||
203 | /* | ||
204 | * This must be a normal message, or retry of a normal message | ||
205 | */ | ||
206 | msg = mdp->msg; | ||
207 | stat = bcp->statp; | ||
208 | if (msg->address == TLB_FLUSH_ALL) { | ||
209 | local_flush_tlb(); | ||
210 | stat->d_alltlb++; | ||
211 | } else { | ||
212 | __flush_tlb_one(msg->address); | ||
213 | stat->d_onetlb++; | ||
214 | } | ||
215 | stat->d_requestee++; | ||
216 | |||
217 | /* | ||
218 | * One cpu on each uvhub has the additional job on a RETRY | ||
219 | * of releasing the resource held by the message that is | ||
220 | * being retried. That message is identified by sending | ||
221 | * cpu number. | ||
222 | */ | ||
223 | if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) | ||
224 | uv_bau_process_retry_msg(mdp, bcp); | ||
225 | |||
226 | /* | ||
227 | * This is a sw_ack message, so we have to reply to it. | ||
228 | * Count each responding cpu on the socket. This avoids | ||
229 | * pinging the count's cache line back and forth between | ||
230 | * the sockets. | ||
231 | */ | ||
232 | socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) | ||
233 | &smaster->socket_acknowledge_count[mdp->msg_slot]); | ||
234 | if (socket_ack_count == bcp->cpus_in_socket) { | ||
235 | /* | ||
236 | * Both sockets dump their completed count total into | ||
237 | * the message's count. | ||
238 | */ | ||
239 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | ||
240 | msg_ack_count = atomic_add_short_return(socket_ack_count, | ||
241 | (struct atomic_short *)&msg->acknowledge_count); | ||
242 | |||
243 | if (msg_ack_count == bcp->cpus_in_uvhub) { | ||
244 | /* | ||
245 | * All cpus in uvhub saw it; reply | ||
246 | */ | ||
247 | uv_reply_to_message(mdp, bcp); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | return; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Determine the first cpu on a uvhub. | ||
256 | */ | ||
257 | static int uvhub_to_first_cpu(int uvhub) | ||
258 | { | ||
259 | int cpu; | ||
260 | for_each_present_cpu(cpu) | ||
261 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
262 | return cpu; | ||
263 | return -1; | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * Last resort when we get a large number of destination timeouts is | ||
268 | * to clear resources held by a given cpu. | ||
269 | * Do this with IPI so that all messages in the BAU message queue | ||
270 | * can be identified by their nonzero sw_ack_vector field. | ||
271 | * | ||
272 | * This is entered for a single cpu on the uvhub. | ||
273 | * The sender want's this uvhub to free a specific message's | ||
274 | * sw_ack resources. | ||
275 | */ | ||
276 | static void | ||
277 | uv_do_reset(void *ptr) | ||
278 | { | ||
279 | int i; | ||
280 | int slot; | ||
281 | int count = 0; | ||
282 | unsigned long mmr; | ||
283 | unsigned long msg_res; | ||
284 | struct bau_control *bcp; | ||
285 | struct reset_args *rap; | ||
286 | struct bau_payload_queue_entry *msg; | ||
287 | struct ptc_stats *stat; | ||
288 | |||
289 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
290 | rap = (struct reset_args *)ptr; | ||
291 | stat = bcp->statp; | ||
292 | stat->d_resets++; | ||
293 | |||
294 | /* | ||
295 | * We're looking for the given sender, and | ||
296 | * will free its sw_ack resource. | ||
297 | * If all cpu's finally responded after the timeout, its | ||
298 | * message 'replied_to' was set. | ||
299 | */ | ||
300 | for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { | ||
301 | /* uv_do_reset: same conditions for cancellation as | ||
302 | uv_bau_process_retry_msg() */ | ||
303 | if ((msg->replied_to == 0) && | ||
304 | (msg->canceled == 0) && | ||
305 | (msg->sending_cpu == rap->sender) && | ||
306 | (msg->sw_ack_vector) && | ||
307 | (msg->msg_type != MSG_NOOP)) { | ||
308 | /* | ||
309 | * make everyone else ignore this message | ||
310 | */ | ||
311 | msg->canceled = 1; | ||
312 | slot = msg - bcp->va_queue_first; | ||
313 | count++; | ||
314 | /* | ||
315 | * only reset the resource if it is still pending | ||
316 | */ | ||
317 | mmr = uv_read_local_mmr | ||
318 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
319 | msg_res = msg->sw_ack_vector; | ||
320 | if (mmr & msg_res) { | ||
321 | stat->d_rcanceled++; | ||
322 | uv_write_local_mmr( | ||
323 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
324 | (msg_res << UV_SW_ACK_NPENDING) | | ||
325 | msg_res); | ||
326 | } | ||
327 | } | ||
328 | } | ||
329 | return; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Use IPI to get all target uvhubs to release resources held by | ||
334 | * a given sending cpu number. | ||
335 | */ | ||
336 | static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, | ||
337 | int sender) | ||
338 | { | ||
339 | int uvhub; | ||
340 | int cpu; | ||
341 | cpumask_t mask; | ||
342 | struct reset_args reset_args; | ||
343 | |||
344 | reset_args.sender = sender; | ||
345 | |||
346 | cpus_clear(mask); | ||
347 | /* find a single cpu for each uvhub in this distribution mask */ | ||
348 | for (uvhub = 0; | ||
349 | uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; | ||
350 | uvhub++) { | ||
351 | if (!bau_uvhub_isset(uvhub, distribution)) | ||
352 | continue; | ||
353 | /* find a cpu for this uvhub */ | ||
354 | cpu = uvhub_to_first_cpu(uvhub); | ||
355 | cpu_set(cpu, mask); | ||
356 | } | ||
357 | /* IPI all cpus; Preemption is already disabled */ | ||
358 | smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); | ||
359 | return; | ||
360 | } | ||
361 | |||
362 | static inline unsigned long | ||
363 | cycles_2_us(unsigned long long cyc) | ||
364 | { | ||
365 | unsigned long long ns; | ||
366 | unsigned long us; | ||
367 | ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) | ||
368 | >> CYC2NS_SCALE_FACTOR; | ||
369 | us = ns / 1000; | ||
370 | return us; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * wait for all cpus on this hub to finish their sends and go quiet | ||
375 | * leaves uvhub_quiesce set so that no new broadcasts are started by | ||
376 | * bau_flush_send_and_wait() | ||
377 | */ | ||
378 | static inline void | ||
379 | quiesce_local_uvhub(struct bau_control *hmaster) | ||
380 | { | ||
381 | atomic_add_short_return(1, (struct atomic_short *) | ||
382 | &hmaster->uvhub_quiesce); | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * mark this quiet-requestor as done | ||
387 | */ | ||
388 | static inline void | ||
389 | end_uvhub_quiesce(struct bau_control *hmaster) | ||
390 | { | ||
391 | atomic_add_short_return(-1, (struct atomic_short *) | ||
392 | &hmaster->uvhub_quiesce); | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * Wait for completion of a broadcast software ack message | ||
397 | * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP | ||
398 | */ | ||
399 | static int uv_wait_completion(struct bau_desc *bau_desc, | ||
400 | unsigned long mmr_offset, int right_shift, int this_cpu, | ||
401 | struct bau_control *bcp, struct bau_control *smaster, long try) | ||
402 | { | ||
403 | unsigned long descriptor_status; | ||
404 | cycles_t ttime; | ||
405 | struct ptc_stats *stat = bcp->statp; | ||
406 | struct bau_control *hmaster; | ||
407 | |||
408 | hmaster = bcp->uvhub_master; | ||
409 | |||
410 | /* spin on the status MMR, waiting for it to go idle */ | ||
411 | while ((descriptor_status = (((unsigned long) | ||
412 | uv_read_local_mmr(mmr_offset) >> | ||
413 | right_shift) & UV_ACT_STATUS_MASK)) != | ||
414 | DESC_STATUS_IDLE) { | ||
415 | /* | ||
416 | * Our software ack messages may be blocked because there are | ||
417 | * no swack resources available. As long as none of them | ||
418 | * has timed out hardware will NACK our message and its | ||
419 | * state will stay IDLE. | ||
420 | */ | ||
421 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { | ||
422 | stat->s_stimeout++; | ||
423 | return FLUSH_GIVEUP; | ||
424 | } else if (descriptor_status == | ||
425 | DESC_STATUS_DESTINATION_TIMEOUT) { | ||
426 | stat->s_dtimeout++; | ||
427 | ttime = get_cycles(); | ||
428 | |||
429 | /* | ||
430 | * Our retries may be blocked by all destination | ||
431 | * swack resources being consumed, and a timeout | ||
432 | * pending. In that case hardware returns the | ||
433 | * ERROR that looks like a destination timeout. | ||
434 | */ | ||
435 | if (cycles_2_us(ttime - bcp->send_message) < | ||
436 | timeout_us) { | ||
437 | bcp->conseccompletes = 0; | ||
438 | return FLUSH_RETRY_PLUGGED; | ||
439 | } | ||
440 | |||
441 | bcp->conseccompletes = 0; | ||
442 | return FLUSH_RETRY_TIMEOUT; | ||
443 | } else { | ||
444 | /* | ||
445 | * descriptor_status is still BUSY | ||
446 | */ | ||
447 | cpu_relax(); | ||
448 | } | ||
449 | } | ||
450 | bcp->conseccompletes++; | ||
451 | return FLUSH_COMPLETE; | ||
452 | } | ||
453 | |||
454 | static inline cycles_t | ||
455 | sec_2_cycles(unsigned long sec) | ||
456 | { | ||
457 | unsigned long ns; | ||
458 | cycles_t cyc; | ||
459 | |||
460 | ns = sec * 1000000000; | ||
461 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
462 | return cyc; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * conditionally add 1 to *v, unless *v is >= u | ||
467 | * return 0 if we cannot add 1 to *v because it is >= u | ||
468 | * return 1 if we can add 1 to *v because it is < u | ||
469 | * the add is atomic | ||
470 | * | ||
471 | * This is close to atomic_add_unless(), but this allows the 'u' value | ||
472 | * to be lowered below the current 'v'. atomic_add_unless can only stop | ||
473 | * on equal. | ||
474 | */ | ||
475 | static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | ||
476 | { | ||
477 | spin_lock(lock); | ||
478 | if (atomic_read(v) >= u) { | ||
479 | spin_unlock(lock); | ||
480 | return 0; | ||
481 | } | ||
482 | atomic_inc(v); | ||
483 | spin_unlock(lock); | ||
484 | return 1; | ||
485 | } | ||
486 | |||
487 | /* | ||
488 | * Our retries are blocked by all destination swack resources being | ||
489 | * in use, and a timeout is pending. In that case hardware immediately | ||
490 | * returns the ERROR that looks like a destination timeout. | ||
491 | */ | ||
492 | static void | ||
493 | destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, | ||
494 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
495 | { | ||
496 | udelay(bcp->plugged_delay); | ||
497 | bcp->plugged_tries++; | ||
498 | if (bcp->plugged_tries >= bcp->plugsb4reset) { | ||
499 | bcp->plugged_tries = 0; | ||
500 | quiesce_local_uvhub(hmaster); | ||
501 | spin_lock(&hmaster->queue_lock); | ||
502 | uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
503 | spin_unlock(&hmaster->queue_lock); | ||
504 | end_uvhub_quiesce(hmaster); | ||
505 | bcp->ipi_attempts++; | ||
506 | stat->s_resets_plug++; | ||
507 | } | ||
508 | } | ||
509 | |||
510 | static void | ||
511 | destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, | ||
512 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
513 | { | ||
514 | hmaster->max_bau_concurrent = 1; | ||
515 | bcp->timeout_tries++; | ||
516 | if (bcp->timeout_tries >= bcp->timeoutsb4reset) { | ||
517 | bcp->timeout_tries = 0; | ||
518 | quiesce_local_uvhub(hmaster); | ||
519 | spin_lock(&hmaster->queue_lock); | ||
520 | uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
521 | spin_unlock(&hmaster->queue_lock); | ||
522 | end_uvhub_quiesce(hmaster); | ||
523 | bcp->ipi_attempts++; | ||
524 | stat->s_resets_timeout++; | ||
525 | } | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Completions are taking a very long time due to a congested numalink | ||
530 | * network. | ||
531 | */ | ||
532 | static void | ||
533 | disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) | ||
534 | { | ||
535 | int tcpu; | ||
536 | struct bau_control *tbcp; | ||
537 | |||
538 | /* let only one cpu do this disabling */ | ||
539 | spin_lock(&disable_lock); | ||
540 | if (!baudisabled && bcp->period_requests && | ||
541 | ((bcp->period_time / bcp->period_requests) > congested_cycles)) { | ||
542 | /* it becomes this cpu's job to turn on the use of the | ||
543 | BAU again */ | ||
544 | baudisabled = 1; | ||
545 | bcp->set_bau_off = 1; | ||
546 | bcp->set_bau_on_time = get_cycles() + | ||
547 | sec_2_cycles(bcp->congested_period); | ||
548 | stat->s_bau_disabled++; | ||
549 | for_each_present_cpu(tcpu) { | ||
550 | tbcp = &per_cpu(bau_control, tcpu); | ||
551 | tbcp->baudisabled = 1; | ||
552 | } | ||
553 | } | ||
554 | spin_unlock(&disable_lock); | ||
555 | } | ||
556 | |||
557 | /** | ||
558 | * uv_flush_send_and_wait | ||
559 | * | ||
560 | * Send a broadcast and wait for it to complete. | ||
561 | * | ||
562 | * The flush_mask contains the cpus the broadcast is to be sent to including | ||
563 | * cpus that are on the local uvhub. | ||
564 | * | ||
565 | * Returns 0 if all flushing represented in the mask was done. | ||
566 | * Returns 1 if it gives up entirely and the original cpu mask is to be | ||
567 | * returned to the kernel. | ||
568 | */ | ||
569 | int uv_flush_send_and_wait(struct bau_desc *bau_desc, | ||
570 | struct cpumask *flush_mask, struct bau_control *bcp) | ||
571 | { | ||
572 | int right_shift; | ||
573 | int completion_status = 0; | ||
574 | int seq_number = 0; | ||
575 | long try = 0; | ||
576 | int cpu = bcp->uvhub_cpu; | ||
577 | int this_cpu = bcp->cpu; | ||
578 | unsigned long mmr_offset; | ||
579 | unsigned long index; | ||
580 | cycles_t time1; | ||
581 | cycles_t time2; | ||
582 | cycles_t elapsed; | ||
583 | struct ptc_stats *stat = bcp->statp; | ||
584 | struct bau_control *smaster = bcp->socket_master; | ||
585 | struct bau_control *hmaster = bcp->uvhub_master; | ||
586 | |||
587 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
588 | &hmaster->active_descriptor_count, | ||
589 | hmaster->max_bau_concurrent)) { | ||
590 | stat->s_throttles++; | ||
591 | do { | ||
592 | cpu_relax(); | ||
593 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
594 | &hmaster->active_descriptor_count, | ||
595 | hmaster->max_bau_concurrent)); | ||
596 | } | ||
597 | while (hmaster->uvhub_quiesce) | ||
598 | cpu_relax(); | ||
599 | |||
600 | if (cpu < UV_CPUS_PER_ACT_STATUS) { | ||
601 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | ||
602 | right_shift = cpu * UV_ACT_STATUS_SIZE; | ||
603 | } else { | ||
604 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; | ||
605 | right_shift = | ||
606 | ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); | ||
607 | } | ||
608 | time1 = get_cycles(); | ||
609 | do { | ||
610 | if (try == 0) { | ||
611 | bau_desc->header.msg_type = MSG_REGULAR; | ||
612 | seq_number = bcp->message_number++; | ||
613 | } else { | ||
614 | bau_desc->header.msg_type = MSG_RETRY; | ||
615 | stat->s_retry_messages++; | ||
616 | } | ||
617 | bau_desc->header.sequence = seq_number; | ||
618 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | ||
619 | bcp->uvhub_cpu; | ||
620 | bcp->send_message = get_cycles(); | ||
621 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | ||
622 | try++; | ||
623 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | ||
624 | right_shift, this_cpu, bcp, smaster, try); | ||
625 | |||
626 | if (completion_status == FLUSH_RETRY_PLUGGED) { | ||
627 | destination_plugged(bau_desc, bcp, hmaster, stat); | ||
628 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { | ||
629 | destination_timeout(bau_desc, bcp, hmaster, stat); | ||
630 | } | ||
631 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { | ||
632 | bcp->ipi_attempts = 0; | ||
633 | completion_status = FLUSH_GIVEUP; | ||
634 | break; | ||
635 | } | ||
636 | cpu_relax(); | ||
637 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || | ||
638 | (completion_status == FLUSH_RETRY_TIMEOUT)); | ||
639 | time2 = get_cycles(); | ||
640 | bcp->plugged_tries = 0; | ||
641 | bcp->timeout_tries = 0; | ||
642 | if ((completion_status == FLUSH_COMPLETE) && | ||
643 | (bcp->conseccompletes > bcp->complete_threshold) && | ||
644 | (hmaster->max_bau_concurrent < | ||
645 | hmaster->max_bau_concurrent_constant)) | ||
646 | hmaster->max_bau_concurrent++; | ||
647 | while (hmaster->uvhub_quiesce) | ||
648 | cpu_relax(); | ||
649 | atomic_dec(&hmaster->active_descriptor_count); | ||
650 | if (time2 > time1) { | ||
651 | elapsed = time2 - time1; | ||
652 | stat->s_time += elapsed; | ||
653 | if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { | ||
654 | bcp->period_requests++; | ||
655 | bcp->period_time += elapsed; | ||
656 | if ((elapsed > congested_cycles) && | ||
657 | (bcp->period_requests > bcp->congested_reps)) { | ||
658 | disable_for_congestion(bcp, stat); | ||
659 | } | ||
660 | } | ||
661 | } else | ||
662 | stat->s_requestor--; | ||
663 | if (completion_status == FLUSH_COMPLETE && try > 1) | ||
664 | stat->s_retriesok++; | ||
665 | else if (completion_status == FLUSH_GIVEUP) { | ||
666 | stat->s_giveup++; | ||
667 | return 1; | ||
668 | } | ||
669 | return 0; | ||
670 | } | ||
671 | |||
672 | /** | ||
673 | * uv_flush_tlb_others - globally purge translation cache of a virtual | ||
674 | * address or all TLB's | ||
675 | * @cpumask: mask of all cpu's in which the address is to be removed | ||
676 | * @mm: mm_struct containing virtual address range | ||
677 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) | ||
678 | * @cpu: the current cpu | ||
679 | * | ||
680 | * This is the entry point for initiating any UV global TLB shootdown. | ||
681 | * | ||
682 | * Purges the translation caches of all specified processors of the given | ||
683 | * virtual address, or purges all TLB's on specified processors. | ||
684 | * | ||
685 | * The caller has derived the cpumask from the mm_struct. This function | ||
686 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) | ||
687 | * | ||
688 | * The cpumask is converted into a uvhubmask of the uvhubs containing | ||
689 | * those cpus. | ||
690 | * | ||
691 | * Note that this function should be called with preemption disabled. | ||
692 | * | ||
693 | * Returns NULL if all remote flushing was done. | ||
694 | * Returns pointer to cpumask if some remote flushing remains to be | ||
695 | * done. The returned pointer is valid till preemption is re-enabled. | ||
696 | */ | ||
697 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | ||
698 | struct mm_struct *mm, | ||
699 | unsigned long va, unsigned int cpu) | ||
700 | { | ||
701 | int tcpu; | ||
702 | int uvhub; | ||
703 | int locals = 0; | ||
704 | int remotes = 0; | ||
705 | int hubs = 0; | ||
706 | struct bau_desc *bau_desc; | ||
707 | struct cpumask *flush_mask; | ||
708 | struct ptc_stats *stat; | ||
709 | struct bau_control *bcp; | ||
710 | struct bau_control *tbcp; | ||
711 | |||
712 | /* kernel was booted 'nobau' */ | ||
713 | if (nobau) | ||
714 | return cpumask; | ||
715 | |||
716 | bcp = &per_cpu(bau_control, cpu); | ||
717 | stat = bcp->statp; | ||
718 | |||
719 | /* bau was disabled due to slow response */ | ||
720 | if (bcp->baudisabled) { | ||
721 | /* the cpu that disabled it must re-enable it */ | ||
722 | if (bcp->set_bau_off) { | ||
723 | if (get_cycles() >= bcp->set_bau_on_time) { | ||
724 | stat->s_bau_reenabled++; | ||
725 | baudisabled = 0; | ||
726 | for_each_present_cpu(tcpu) { | ||
727 | tbcp = &per_cpu(bau_control, tcpu); | ||
728 | tbcp->baudisabled = 0; | ||
729 | tbcp->period_requests = 0; | ||
730 | tbcp->period_time = 0; | ||
731 | } | ||
732 | } | ||
733 | } | ||
734 | return cpumask; | ||
735 | } | ||
736 | |||
737 | /* | ||
738 | * Each sending cpu has a per-cpu mask which it fills from the caller's | ||
739 | * cpu mask. All cpus are converted to uvhubs and copied to the | ||
740 | * activation descriptor. | ||
741 | */ | ||
742 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | ||
743 | /* don't actually do a shootdown of the local cpu */ | ||
744 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | ||
745 | if (cpu_isset(cpu, *cpumask)) | ||
746 | stat->s_ntargself++; | ||
747 | |||
748 | bau_desc = bcp->descriptor_base; | ||
749 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | ||
750 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | ||
751 | |||
752 | /* cpu statistics */ | ||
753 | for_each_cpu(tcpu, flush_mask) { | ||
754 | uvhub = uv_cpu_to_blade_id(tcpu); | ||
755 | bau_uvhub_set(uvhub, &bau_desc->distribution); | ||
756 | if (uvhub == bcp->uvhub) | ||
757 | locals++; | ||
758 | else | ||
759 | remotes++; | ||
760 | } | ||
761 | if ((locals + remotes) == 0) | ||
762 | return NULL; | ||
763 | stat->s_requestor++; | ||
764 | stat->s_ntargcpu += remotes + locals; | ||
765 | stat->s_ntargremotes += remotes; | ||
766 | stat->s_ntarglocals += locals; | ||
767 | remotes = bau_uvhub_weight(&bau_desc->distribution); | ||
768 | |||
769 | /* uvhub statistics */ | ||
770 | hubs = bau_uvhub_weight(&bau_desc->distribution); | ||
771 | if (locals) { | ||
772 | stat->s_ntarglocaluvhub++; | ||
773 | stat->s_ntargremoteuvhub += (hubs - 1); | ||
774 | } else | ||
775 | stat->s_ntargremoteuvhub += hubs; | ||
776 | stat->s_ntarguvhub += hubs; | ||
777 | if (hubs >= 16) | ||
778 | stat->s_ntarguvhub16++; | ||
779 | else if (hubs >= 8) | ||
780 | stat->s_ntarguvhub8++; | ||
781 | else if (hubs >= 4) | ||
782 | stat->s_ntarguvhub4++; | ||
783 | else if (hubs >= 2) | ||
784 | stat->s_ntarguvhub2++; | ||
785 | else | ||
786 | stat->s_ntarguvhub1++; | ||
787 | |||
788 | bau_desc->payload.address = va; | ||
789 | bau_desc->payload.sending_cpu = cpu; | ||
790 | |||
791 | /* | ||
792 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, | ||
793 | * or 1 if it gave up and the original cpumask should be returned. | ||
794 | */ | ||
795 | if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) | ||
796 | return NULL; | ||
797 | else | ||
798 | return cpumask; | ||
799 | } | ||
800 | |||
801 | /* | ||
802 | * The BAU message interrupt comes here. (registered by set_intr_gate) | ||
803 | * See entry_64.S | ||
804 | * | ||
805 | * We received a broadcast assist message. | ||
806 | * | ||
807 | * Interrupts are disabled; this interrupt could represent | ||
808 | * the receipt of several messages. | ||
809 | * | ||
810 | * All cores/threads on this hub get this interrupt. | ||
811 | * The last one to see it does the software ack. | ||
812 | * (the resource will not be freed until noninterruptable cpus see this | ||
813 | * interrupt; hardware may timeout the s/w ack and reply ERROR) | ||
814 | */ | ||
815 | void uv_bau_message_interrupt(struct pt_regs *regs) | ||
816 | { | ||
817 | int count = 0; | ||
818 | cycles_t time_start; | ||
819 | struct bau_payload_queue_entry *msg; | ||
820 | struct bau_control *bcp; | ||
821 | struct ptc_stats *stat; | ||
822 | struct msg_desc msgdesc; | ||
823 | |||
824 | time_start = get_cycles(); | ||
825 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
826 | stat = bcp->statp; | ||
827 | msgdesc.va_queue_first = bcp->va_queue_first; | ||
828 | msgdesc.va_queue_last = bcp->va_queue_last; | ||
829 | msg = bcp->bau_msg_head; | ||
830 | while (msg->sw_ack_vector) { | ||
831 | count++; | ||
832 | msgdesc.msg_slot = msg - msgdesc.va_queue_first; | ||
833 | msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; | ||
834 | msgdesc.msg = msg; | ||
835 | uv_bau_process_message(&msgdesc, bcp); | ||
836 | msg++; | ||
837 | if (msg > msgdesc.va_queue_last) | ||
838 | msg = msgdesc.va_queue_first; | ||
839 | bcp->bau_msg_head = msg; | ||
840 | } | ||
841 | stat->d_time += (get_cycles() - time_start); | ||
842 | if (!count) | ||
843 | stat->d_nomsg++; | ||
844 | else if (count > 1) | ||
845 | stat->d_multmsg++; | ||
846 | ack_APIC_irq(); | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * uv_enable_timeouts | ||
851 | * | ||
852 | * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have | ||
853 | * shootdown message timeouts enabled. The timeout does not cause | ||
854 | * an interrupt, but causes an error message to be returned to | ||
855 | * the sender. | ||
856 | */ | ||
857 | static void uv_enable_timeouts(void) | ||
858 | { | ||
859 | int uvhub; | ||
860 | int nuvhubs; | ||
861 | int pnode; | ||
862 | unsigned long mmr_image; | ||
863 | |||
864 | nuvhubs = uv_num_possible_blades(); | ||
865 | |||
866 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | ||
867 | if (!uv_blade_nr_possible_cpus(uvhub)) | ||
868 | continue; | ||
869 | |||
870 | pnode = uv_blade_to_pnode(uvhub); | ||
871 | mmr_image = | ||
872 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); | ||
873 | /* | ||
874 | * Set the timeout period and then lock it in, in three | ||
875 | * steps; captures and locks in the period. | ||
876 | * | ||
877 | * To program the period, the SOFT_ACK_MODE must be off. | ||
878 | */ | ||
879 | mmr_image &= ~((unsigned long)1 << | ||
880 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); | ||
881 | uv_write_global_mmr64 | ||
882 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | ||
883 | /* | ||
884 | * Set the 4-bit period. | ||
885 | */ | ||
886 | mmr_image &= ~((unsigned long)0xf << | ||
887 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); | ||
888 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << | ||
889 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); | ||
890 | uv_write_global_mmr64 | ||
891 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | ||
892 | /* | ||
893 | * Subsequent reversals of the timebase bit (3) cause an | ||
894 | * immediate timeout of one or all INTD resources as | ||
895 | * indicated in bits 2:0 (7 causes all of them to timeout). | ||
896 | */ | ||
897 | mmr_image |= ((unsigned long)1 << | ||
898 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); | ||
899 | uv_write_global_mmr64 | ||
900 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | ||
901 | } | ||
902 | } | ||
903 | |||
904 | static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) | ||
905 | { | ||
906 | if (*offset < num_possible_cpus()) | ||
907 | return offset; | ||
908 | return NULL; | ||
909 | } | ||
910 | |||
911 | static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) | ||
912 | { | ||
913 | (*offset)++; | ||
914 | if (*offset < num_possible_cpus()) | ||
915 | return offset; | ||
916 | return NULL; | ||
917 | } | ||
918 | |||
919 | static void uv_ptc_seq_stop(struct seq_file *file, void *data) | ||
920 | { | ||
921 | } | ||
922 | |||
923 | static inline unsigned long long | ||
924 | microsec_2_cycles(unsigned long microsec) | ||
925 | { | ||
926 | unsigned long ns; | ||
927 | unsigned long long cyc; | ||
928 | |||
929 | ns = microsec * 1000; | ||
930 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
931 | return cyc; | ||
932 | } | ||
933 | |||
934 | /* | ||
935 | * Display the statistics thru /proc. | ||
936 | * 'data' points to the cpu number | ||
937 | */ | ||
938 | static int uv_ptc_seq_show(struct seq_file *file, void *data) | ||
939 | { | ||
940 | struct ptc_stats *stat; | ||
941 | int cpu; | ||
942 | |||
943 | cpu = *(loff_t *)data; | ||
944 | |||
945 | if (!cpu) { | ||
946 | seq_printf(file, | ||
947 | "# cpu sent stime self locals remotes ncpus localhub "); | ||
948 | seq_printf(file, | ||
949 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | ||
950 | seq_printf(file, | ||
951 | "numuvhubs4 numuvhubs2 numuvhubs1 dto "); | ||
952 | seq_printf(file, | ||
953 | "retries rok resetp resett giveup sto bz throt "); | ||
954 | seq_printf(file, | ||
955 | "sw_ack recv rtime all "); | ||
956 | seq_printf(file, | ||
957 | "one mult none retry canc nocan reset rcan "); | ||
958 | seq_printf(file, | ||
959 | "disable enable\n"); | ||
960 | } | ||
961 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | ||
962 | stat = &per_cpu(ptcstats, cpu); | ||
963 | /* source side statistics */ | ||
964 | seq_printf(file, | ||
965 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
966 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | ||
967 | stat->s_ntargself, stat->s_ntarglocals, | ||
968 | stat->s_ntargremotes, stat->s_ntargcpu, | ||
969 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | ||
970 | stat->s_ntarguvhub, stat->s_ntarguvhub16); | ||
971 | seq_printf(file, "%ld %ld %ld %ld %ld ", | ||
972 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | ||
973 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | ||
974 | stat->s_dtimeout); | ||
975 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | ||
976 | stat->s_retry_messages, stat->s_retriesok, | ||
977 | stat->s_resets_plug, stat->s_resets_timeout, | ||
978 | stat->s_giveup, stat->s_stimeout, | ||
979 | stat->s_busy, stat->s_throttles); | ||
980 | |||
981 | /* destination side statistics */ | ||
982 | seq_printf(file, | ||
983 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
984 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), | ||
985 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), | ||
986 | stat->d_requestee, cycles_2_us(stat->d_time), | ||
987 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, | ||
988 | stat->d_nomsg, stat->d_retries, stat->d_canceled, | ||
989 | stat->d_nocanceled, stat->d_resets, | ||
990 | stat->d_rcanceled); | ||
991 | seq_printf(file, "%ld %ld\n", | ||
992 | stat->s_bau_disabled, stat->s_bau_reenabled); | ||
993 | } | ||
994 | |||
995 | return 0; | ||
996 | } | ||
997 | |||
998 | /* | ||
999 | * Display the tunables thru debugfs | ||
1000 | */ | ||
1001 | static ssize_t tunables_read(struct file *file, char __user *userbuf, | ||
1002 | size_t count, loff_t *ppos) | ||
1003 | { | ||
1004 | char *buf; | ||
1005 | int ret; | ||
1006 | |||
1007 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | ||
1008 | "max_bau_concurrent plugged_delay plugsb4reset", | ||
1009 | "timeoutsb4reset ipi_reset_limit complete_threshold", | ||
1010 | "congested_response_us congested_reps congested_period", | ||
1011 | max_bau_concurrent, plugged_delay, plugsb4reset, | ||
1012 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | ||
1013 | congested_response_us, congested_reps, congested_period); | ||
1014 | |||
1015 | if (!buf) | ||
1016 | return -ENOMEM; | ||
1017 | |||
1018 | ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); | ||
1019 | kfree(buf); | ||
1020 | return ret; | ||
1021 | } | ||
1022 | |||
1023 | /* | ||
1024 | * -1: resetf the statistics | ||
1025 | * 0: display meaning of the statistics | ||
1026 | */ | ||
1027 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | ||
1028 | size_t count, loff_t *data) | ||
1029 | { | ||
1030 | int cpu; | ||
1031 | long input_arg; | ||
1032 | char optstr[64]; | ||
1033 | struct ptc_stats *stat; | ||
1034 | |||
1035 | if (count == 0 || count > sizeof(optstr)) | ||
1036 | return -EINVAL; | ||
1037 | if (copy_from_user(optstr, user, count)) | ||
1038 | return -EFAULT; | ||
1039 | optstr[count - 1] = '\0'; | ||
1040 | if (strict_strtol(optstr, 10, &input_arg) < 0) { | ||
1041 | printk(KERN_DEBUG "%s is invalid\n", optstr); | ||
1042 | return -EINVAL; | ||
1043 | } | ||
1044 | |||
1045 | if (input_arg == 0) { | ||
1046 | printk(KERN_DEBUG "# cpu: cpu number\n"); | ||
1047 | printk(KERN_DEBUG "Sender statistics:\n"); | ||
1048 | printk(KERN_DEBUG | ||
1049 | "sent: number of shootdown messages sent\n"); | ||
1050 | printk(KERN_DEBUG | ||
1051 | "stime: time spent sending messages\n"); | ||
1052 | printk(KERN_DEBUG | ||
1053 | "numuvhubs: number of hubs targeted with shootdown\n"); | ||
1054 | printk(KERN_DEBUG | ||
1055 | "numuvhubs16: number times 16 or more hubs targeted\n"); | ||
1056 | printk(KERN_DEBUG | ||
1057 | "numuvhubs8: number times 8 or more hubs targeted\n"); | ||
1058 | printk(KERN_DEBUG | ||
1059 | "numuvhubs4: number times 4 or more hubs targeted\n"); | ||
1060 | printk(KERN_DEBUG | ||
1061 | "numuvhubs2: number times 2 or more hubs targeted\n"); | ||
1062 | printk(KERN_DEBUG | ||
1063 | "numuvhubs1: number times 1 hub targeted\n"); | ||
1064 | printk(KERN_DEBUG | ||
1065 | "numcpus: number of cpus targeted with shootdown\n"); | ||
1066 | printk(KERN_DEBUG | ||
1067 | "dto: number of destination timeouts\n"); | ||
1068 | printk(KERN_DEBUG | ||
1069 | "retries: destination timeout retries sent\n"); | ||
1070 | printk(KERN_DEBUG | ||
1071 | "rok: : destination timeouts successfully retried\n"); | ||
1072 | printk(KERN_DEBUG | ||
1073 | "resetp: ipi-style resource resets for plugs\n"); | ||
1074 | printk(KERN_DEBUG | ||
1075 | "resett: ipi-style resource resets for timeouts\n"); | ||
1076 | printk(KERN_DEBUG | ||
1077 | "giveup: fall-backs to ipi-style shootdowns\n"); | ||
1078 | printk(KERN_DEBUG | ||
1079 | "sto: number of source timeouts\n"); | ||
1080 | printk(KERN_DEBUG | ||
1081 | "bz: number of stay-busy's\n"); | ||
1082 | printk(KERN_DEBUG | ||
1083 | "throt: number times spun in throttle\n"); | ||
1084 | printk(KERN_DEBUG "Destination side statistics:\n"); | ||
1085 | printk(KERN_DEBUG | ||
1086 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); | ||
1087 | printk(KERN_DEBUG | ||
1088 | "recv: shootdown messages received\n"); | ||
1089 | printk(KERN_DEBUG | ||
1090 | "rtime: time spent processing messages\n"); | ||
1091 | printk(KERN_DEBUG | ||
1092 | "all: shootdown all-tlb messages\n"); | ||
1093 | printk(KERN_DEBUG | ||
1094 | "one: shootdown one-tlb messages\n"); | ||
1095 | printk(KERN_DEBUG | ||
1096 | "mult: interrupts that found multiple messages\n"); | ||
1097 | printk(KERN_DEBUG | ||
1098 | "none: interrupts that found no messages\n"); | ||
1099 | printk(KERN_DEBUG | ||
1100 | "retry: number of retry messages processed\n"); | ||
1101 | printk(KERN_DEBUG | ||
1102 | "canc: number messages canceled by retries\n"); | ||
1103 | printk(KERN_DEBUG | ||
1104 | "nocan: number retries that found nothing to cancel\n"); | ||
1105 | printk(KERN_DEBUG | ||
1106 | "reset: number of ipi-style reset requests processed\n"); | ||
1107 | printk(KERN_DEBUG | ||
1108 | "rcan: number messages canceled by reset requests\n"); | ||
1109 | printk(KERN_DEBUG | ||
1110 | "disable: number times use of the BAU was disabled\n"); | ||
1111 | printk(KERN_DEBUG | ||
1112 | "enable: number times use of the BAU was re-enabled\n"); | ||
1113 | } else if (input_arg == -1) { | ||
1114 | for_each_present_cpu(cpu) { | ||
1115 | stat = &per_cpu(ptcstats, cpu); | ||
1116 | memset(stat, 0, sizeof(struct ptc_stats)); | ||
1117 | } | ||
1118 | } | ||
1119 | |||
1120 | return count; | ||
1121 | } | ||
1122 | |||
1123 | static int local_atoi(const char *name) | ||
1124 | { | ||
1125 | int val = 0; | ||
1126 | |||
1127 | for (;; name++) { | ||
1128 | switch (*name) { | ||
1129 | case '0' ... '9': | ||
1130 | val = 10*val+(*name-'0'); | ||
1131 | break; | ||
1132 | default: | ||
1133 | return val; | ||
1134 | } | ||
1135 | } | ||
1136 | } | ||
1137 | |||
1138 | /* | ||
1139 | * set the tunables | ||
1140 | * 0 values reset them to defaults | ||
1141 | */ | ||
1142 | static ssize_t tunables_write(struct file *file, const char __user *user, | ||
1143 | size_t count, loff_t *data) | ||
1144 | { | ||
1145 | int cpu; | ||
1146 | int cnt = 0; | ||
1147 | int val; | ||
1148 | char *p; | ||
1149 | char *q; | ||
1150 | char instr[64]; | ||
1151 | struct bau_control *bcp; | ||
1152 | |||
1153 | if (count == 0 || count > sizeof(instr)-1) | ||
1154 | return -EINVAL; | ||
1155 | if (copy_from_user(instr, user, count)) | ||
1156 | return -EFAULT; | ||
1157 | |||
1158 | instr[count] = '\0'; | ||
1159 | /* count the fields */ | ||
1160 | p = instr + strspn(instr, WHITESPACE); | ||
1161 | q = p; | ||
1162 | for (; *p; p = q + strspn(q, WHITESPACE)) { | ||
1163 | q = p + strcspn(p, WHITESPACE); | ||
1164 | cnt++; | ||
1165 | if (q == p) | ||
1166 | break; | ||
1167 | } | ||
1168 | if (cnt != 9) { | ||
1169 | printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); | ||
1170 | return -EINVAL; | ||
1171 | } | ||
1172 | |||
1173 | p = instr + strspn(instr, WHITESPACE); | ||
1174 | q = p; | ||
1175 | for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { | ||
1176 | q = p + strcspn(p, WHITESPACE); | ||
1177 | val = local_atoi(p); | ||
1178 | switch (cnt) { | ||
1179 | case 0: | ||
1180 | if (val == 0) { | ||
1181 | max_bau_concurrent = MAX_BAU_CONCURRENT; | ||
1182 | max_bau_concurrent_constant = | ||
1183 | MAX_BAU_CONCURRENT; | ||
1184 | continue; | ||
1185 | } | ||
1186 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
1187 | if (val < 1 || val > bcp->cpus_in_uvhub) { | ||
1188 | printk(KERN_DEBUG | ||
1189 | "Error: BAU max concurrent %d is invalid\n", | ||
1190 | val); | ||
1191 | return -EINVAL; | ||
1192 | } | ||
1193 | max_bau_concurrent = val; | ||
1194 | max_bau_concurrent_constant = val; | ||
1195 | continue; | ||
1196 | case 1: | ||
1197 | if (val == 0) | ||
1198 | plugged_delay = PLUGGED_DELAY; | ||
1199 | else | ||
1200 | plugged_delay = val; | ||
1201 | continue; | ||
1202 | case 2: | ||
1203 | if (val == 0) | ||
1204 | plugsb4reset = PLUGSB4RESET; | ||
1205 | else | ||
1206 | plugsb4reset = val; | ||
1207 | continue; | ||
1208 | case 3: | ||
1209 | if (val == 0) | ||
1210 | timeoutsb4reset = TIMEOUTSB4RESET; | ||
1211 | else | ||
1212 | timeoutsb4reset = val; | ||
1213 | continue; | ||
1214 | case 4: | ||
1215 | if (val == 0) | ||
1216 | ipi_reset_limit = IPI_RESET_LIMIT; | ||
1217 | else | ||
1218 | ipi_reset_limit = val; | ||
1219 | continue; | ||
1220 | case 5: | ||
1221 | if (val == 0) | ||
1222 | complete_threshold = COMPLETE_THRESHOLD; | ||
1223 | else | ||
1224 | complete_threshold = val; | ||
1225 | continue; | ||
1226 | case 6: | ||
1227 | if (val == 0) | ||
1228 | congested_response_us = CONGESTED_RESPONSE_US; | ||
1229 | else | ||
1230 | congested_response_us = val; | ||
1231 | continue; | ||
1232 | case 7: | ||
1233 | if (val == 0) | ||
1234 | congested_reps = CONGESTED_REPS; | ||
1235 | else | ||
1236 | congested_reps = val; | ||
1237 | continue; | ||
1238 | case 8: | ||
1239 | if (val == 0) | ||
1240 | congested_period = CONGESTED_PERIOD; | ||
1241 | else | ||
1242 | congested_period = val; | ||
1243 | continue; | ||
1244 | } | ||
1245 | if (q == p) | ||
1246 | break; | ||
1247 | } | ||
1248 | for_each_present_cpu(cpu) { | ||
1249 | bcp = &per_cpu(bau_control, cpu); | ||
1250 | bcp->max_bau_concurrent = max_bau_concurrent; | ||
1251 | bcp->max_bau_concurrent_constant = max_bau_concurrent; | ||
1252 | bcp->plugged_delay = plugged_delay; | ||
1253 | bcp->plugsb4reset = plugsb4reset; | ||
1254 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1255 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1256 | bcp->complete_threshold = complete_threshold; | ||
1257 | bcp->congested_response_us = congested_response_us; | ||
1258 | bcp->congested_reps = congested_reps; | ||
1259 | bcp->congested_period = congested_period; | ||
1260 | } | ||
1261 | return count; | ||
1262 | } | ||
1263 | |||
1264 | static const struct seq_operations uv_ptc_seq_ops = { | ||
1265 | .start = uv_ptc_seq_start, | ||
1266 | .next = uv_ptc_seq_next, | ||
1267 | .stop = uv_ptc_seq_stop, | ||
1268 | .show = uv_ptc_seq_show | ||
1269 | }; | ||
1270 | |||
1271 | static int uv_ptc_proc_open(struct inode *inode, struct file *file) | ||
1272 | { | ||
1273 | return seq_open(file, &uv_ptc_seq_ops); | ||
1274 | } | ||
1275 | |||
1276 | static int tunables_open(struct inode *inode, struct file *file) | ||
1277 | { | ||
1278 | return 0; | ||
1279 | } | ||
1280 | |||
1281 | static const struct file_operations proc_uv_ptc_operations = { | ||
1282 | .open = uv_ptc_proc_open, | ||
1283 | .read = seq_read, | ||
1284 | .write = uv_ptc_proc_write, | ||
1285 | .llseek = seq_lseek, | ||
1286 | .release = seq_release, | ||
1287 | }; | ||
1288 | |||
1289 | static const struct file_operations tunables_fops = { | ||
1290 | .open = tunables_open, | ||
1291 | .read = tunables_read, | ||
1292 | .write = tunables_write, | ||
1293 | .llseek = default_llseek, | ||
1294 | }; | ||
1295 | |||
1296 | static int __init uv_ptc_init(void) | ||
1297 | { | ||
1298 | struct proc_dir_entry *proc_uv_ptc; | ||
1299 | |||
1300 | if (!is_uv_system()) | ||
1301 | return 0; | ||
1302 | |||
1303 | proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, | ||
1304 | &proc_uv_ptc_operations); | ||
1305 | if (!proc_uv_ptc) { | ||
1306 | printk(KERN_ERR "unable to create %s proc entry\n", | ||
1307 | UV_PTC_BASENAME); | ||
1308 | return -EINVAL; | ||
1309 | } | ||
1310 | |||
1311 | tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); | ||
1312 | if (!tunables_dir) { | ||
1313 | printk(KERN_ERR "unable to create debugfs directory %s\n", | ||
1314 | UV_BAU_TUNABLES_DIR); | ||
1315 | return -EINVAL; | ||
1316 | } | ||
1317 | tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, | ||
1318 | tunables_dir, NULL, &tunables_fops); | ||
1319 | if (!tunables_file) { | ||
1320 | printk(KERN_ERR "unable to create debugfs file %s\n", | ||
1321 | UV_BAU_TUNABLES_FILE); | ||
1322 | return -EINVAL; | ||
1323 | } | ||
1324 | return 0; | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * initialize the sending side's sending buffers | ||
1329 | */ | ||
1330 | static void | ||
1331 | uv_activation_descriptor_init(int node, int pnode) | ||
1332 | { | ||
1333 | int i; | ||
1334 | int cpu; | ||
1335 | unsigned long pa; | ||
1336 | unsigned long m; | ||
1337 | unsigned long n; | ||
1338 | struct bau_desc *bau_desc; | ||
1339 | struct bau_desc *bd2; | ||
1340 | struct bau_control *bcp; | ||
1341 | |||
1342 | /* | ||
1343 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) | ||
1344 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub | ||
1345 | */ | ||
1346 | bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* | ||
1347 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); | ||
1348 | BUG_ON(!bau_desc); | ||
1349 | |||
1350 | pa = uv_gpa(bau_desc); /* need the real nasid*/ | ||
1351 | n = pa >> uv_nshift; | ||
1352 | m = pa & uv_mmask; | ||
1353 | |||
1354 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | ||
1355 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | ||
1356 | |||
1357 | /* | ||
1358 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | ||
1359 | * cpu even though we only use the first one; one descriptor can | ||
1360 | * describe a broadcast to 256 uv hubs. | ||
1361 | */ | ||
1362 | for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); | ||
1363 | i++, bd2++) { | ||
1364 | memset(bd2, 0, sizeof(struct bau_desc)); | ||
1365 | bd2->header.sw_ack_flag = 1; | ||
1366 | /* | ||
1367 | * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub | ||
1368 | * in the partition. The bit map will indicate uvhub numbers, | ||
1369 | * which are 0-N in a partition. Pnodes are unique system-wide. | ||
1370 | */ | ||
1371 | bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; | ||
1372 | bd2->header.dest_subnodeid = 0x10; /* the LB */ | ||
1373 | bd2->header.command = UV_NET_ENDPOINT_INTD; | ||
1374 | bd2->header.int_both = 1; | ||
1375 | /* | ||
1376 | * all others need to be set to zero: | ||
1377 | * fairness chaining multilevel count replied_to | ||
1378 | */ | ||
1379 | } | ||
1380 | for_each_present_cpu(cpu) { | ||
1381 | if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) | ||
1382 | continue; | ||
1383 | bcp = &per_cpu(bau_control, cpu); | ||
1384 | bcp->descriptor_base = bau_desc; | ||
1385 | } | ||
1386 | } | ||
1387 | |||
1388 | /* | ||
1389 | * initialize the destination side's receiving buffers | ||
1390 | * entered for each uvhub in the partition | ||
1391 | * - node is first node (kernel memory notion) on the uvhub | ||
1392 | * - pnode is the uvhub's physical identifier | ||
1393 | */ | ||
1394 | static void | ||
1395 | uv_payload_queue_init(int node, int pnode) | ||
1396 | { | ||
1397 | int pn; | ||
1398 | int cpu; | ||
1399 | char *cp; | ||
1400 | unsigned long pa; | ||
1401 | struct bau_payload_queue_entry *pqp; | ||
1402 | struct bau_payload_queue_entry *pqp_malloc; | ||
1403 | struct bau_control *bcp; | ||
1404 | |||
1405 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( | ||
1406 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), | ||
1407 | GFP_KERNEL, node); | ||
1408 | BUG_ON(!pqp); | ||
1409 | pqp_malloc = pqp; | ||
1410 | |||
1411 | cp = (char *)pqp + 31; | ||
1412 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); | ||
1413 | |||
1414 | for_each_present_cpu(cpu) { | ||
1415 | if (pnode != uv_cpu_to_pnode(cpu)) | ||
1416 | continue; | ||
1417 | /* for every cpu on this pnode: */ | ||
1418 | bcp = &per_cpu(bau_control, cpu); | ||
1419 | bcp->va_queue_first = pqp; | ||
1420 | bcp->bau_msg_head = pqp; | ||
1421 | bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | ||
1422 | } | ||
1423 | /* | ||
1424 | * need the pnode of where the memory was really allocated | ||
1425 | */ | ||
1426 | pa = uv_gpa(pqp); | ||
1427 | pn = pa >> uv_nshift; | ||
1428 | uv_write_global_mmr64(pnode, | ||
1429 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, | ||
1430 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | | ||
1431 | uv_physnodeaddr(pqp)); | ||
1432 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, | ||
1433 | uv_physnodeaddr(pqp)); | ||
1434 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, | ||
1435 | (unsigned long) | ||
1436 | uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); | ||
1437 | /* in effect, all msg_type's are set to MSG_NOOP */ | ||
1438 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); | ||
1439 | } | ||
1440 | |||
1441 | /* | ||
1442 | * Initialization of each UV hub's structures | ||
1443 | */ | ||
1444 | static void __init uv_init_uvhub(int uvhub, int vector) | ||
1445 | { | ||
1446 | int node; | ||
1447 | int pnode; | ||
1448 | unsigned long apicid; | ||
1449 | |||
1450 | node = uvhub_to_first_node(uvhub); | ||
1451 | pnode = uv_blade_to_pnode(uvhub); | ||
1452 | uv_activation_descriptor_init(node, pnode); | ||
1453 | uv_payload_queue_init(node, pnode); | ||
1454 | /* | ||
1455 | * the below initialization can't be in firmware because the | ||
1456 | * messaging IRQ will be determined by the OS | ||
1457 | */ | ||
1458 | apicid = uvhub_to_first_apicid(uvhub); | ||
1459 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | ||
1460 | ((apicid << 32) | vector)); | ||
1461 | } | ||
1462 | |||
1463 | /* | ||
1464 | * We will set BAU_MISC_CONTROL with a timeout period. | ||
1465 | * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. | ||
1466 | * So the destination timeout period has be be calculated from them. | ||
1467 | */ | ||
1468 | static int | ||
1469 | calculate_destination_timeout(void) | ||
1470 | { | ||
1471 | unsigned long mmr_image; | ||
1472 | int mult1; | ||
1473 | int mult2; | ||
1474 | int index; | ||
1475 | int base; | ||
1476 | int ret; | ||
1477 | unsigned long ts_ns; | ||
1478 | |||
1479 | mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; | ||
1480 | mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); | ||
1481 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; | ||
1482 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); | ||
1483 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; | ||
1484 | base = timeout_base_ns[index]; | ||
1485 | ts_ns = base * mult1 * mult2; | ||
1486 | ret = ts_ns / 1000; | ||
1487 | return ret; | ||
1488 | } | ||
1489 | |||
1490 | /* | ||
1491 | * initialize the bau_control structure for each cpu | ||
1492 | */ | ||
1493 | static void __init uv_init_per_cpu(int nuvhubs) | ||
1494 | { | ||
1495 | int i; | ||
1496 | int cpu; | ||
1497 | int pnode; | ||
1498 | int uvhub; | ||
1499 | int have_hmaster; | ||
1500 | short socket = 0; | ||
1501 | unsigned short socket_mask; | ||
1502 | unsigned char *uvhub_mask; | ||
1503 | struct bau_control *bcp; | ||
1504 | struct uvhub_desc *bdp; | ||
1505 | struct socket_desc *sdp; | ||
1506 | struct bau_control *hmaster = NULL; | ||
1507 | struct bau_control *smaster = NULL; | ||
1508 | struct socket_desc { | ||
1509 | short num_cpus; | ||
1510 | short cpu_number[16]; | ||
1511 | }; | ||
1512 | struct uvhub_desc { | ||
1513 | unsigned short socket_mask; | ||
1514 | short num_cpus; | ||
1515 | short uvhub; | ||
1516 | short pnode; | ||
1517 | struct socket_desc socket[2]; | ||
1518 | }; | ||
1519 | struct uvhub_desc *uvhub_descs; | ||
1520 | |||
1521 | timeout_us = calculate_destination_timeout(); | ||
1522 | |||
1523 | uvhub_descs = (struct uvhub_desc *) | ||
1524 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); | ||
1525 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); | ||
1526 | uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); | ||
1527 | for_each_present_cpu(cpu) { | ||
1528 | bcp = &per_cpu(bau_control, cpu); | ||
1529 | memset(bcp, 0, sizeof(struct bau_control)); | ||
1530 | pnode = uv_cpu_hub_info(cpu)->pnode; | ||
1531 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | ||
1532 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); | ||
1533 | bdp = &uvhub_descs[uvhub]; | ||
1534 | bdp->num_cpus++; | ||
1535 | bdp->uvhub = uvhub; | ||
1536 | bdp->pnode = pnode; | ||
1537 | /* kludge: 'assuming' one node per socket, and assuming that | ||
1538 | disabling a socket just leaves a gap in node numbers */ | ||
1539 | socket = (cpu_to_node(cpu) & 1); | ||
1540 | bdp->socket_mask |= (1 << socket); | ||
1541 | sdp = &bdp->socket[socket]; | ||
1542 | sdp->cpu_number[sdp->num_cpus] = cpu; | ||
1543 | sdp->num_cpus++; | ||
1544 | } | ||
1545 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | ||
1546 | if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) | ||
1547 | continue; | ||
1548 | have_hmaster = 0; | ||
1549 | bdp = &uvhub_descs[uvhub]; | ||
1550 | socket_mask = bdp->socket_mask; | ||
1551 | socket = 0; | ||
1552 | while (socket_mask) { | ||
1553 | if (!(socket_mask & 1)) | ||
1554 | goto nextsocket; | ||
1555 | sdp = &bdp->socket[socket]; | ||
1556 | for (i = 0; i < sdp->num_cpus; i++) { | ||
1557 | cpu = sdp->cpu_number[i]; | ||
1558 | bcp = &per_cpu(bau_control, cpu); | ||
1559 | bcp->cpu = cpu; | ||
1560 | if (i == 0) { | ||
1561 | smaster = bcp; | ||
1562 | if (!have_hmaster) { | ||
1563 | have_hmaster++; | ||
1564 | hmaster = bcp; | ||
1565 | } | ||
1566 | } | ||
1567 | bcp->cpus_in_uvhub = bdp->num_cpus; | ||
1568 | bcp->cpus_in_socket = sdp->num_cpus; | ||
1569 | bcp->socket_master = smaster; | ||
1570 | bcp->uvhub = bdp->uvhub; | ||
1571 | bcp->uvhub_master = hmaster; | ||
1572 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> | ||
1573 | blade_processor_id; | ||
1574 | } | ||
1575 | nextsocket: | ||
1576 | socket++; | ||
1577 | socket_mask = (socket_mask >> 1); | ||
1578 | } | ||
1579 | } | ||
1580 | kfree(uvhub_descs); | ||
1581 | kfree(uvhub_mask); | ||
1582 | for_each_present_cpu(cpu) { | ||
1583 | bcp = &per_cpu(bau_control, cpu); | ||
1584 | bcp->baudisabled = 0; | ||
1585 | bcp->statp = &per_cpu(ptcstats, cpu); | ||
1586 | /* time interval to catch a hardware stay-busy bug */ | ||
1587 | bcp->timeout_interval = microsec_2_cycles(2*timeout_us); | ||
1588 | bcp->max_bau_concurrent = max_bau_concurrent; | ||
1589 | bcp->max_bau_concurrent_constant = max_bau_concurrent; | ||
1590 | bcp->plugged_delay = plugged_delay; | ||
1591 | bcp->plugsb4reset = plugsb4reset; | ||
1592 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1593 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1594 | bcp->complete_threshold = complete_threshold; | ||
1595 | bcp->congested_response_us = congested_response_us; | ||
1596 | bcp->congested_reps = congested_reps; | ||
1597 | bcp->congested_period = congested_period; | ||
1598 | } | ||
1599 | } | ||
1600 | |||
1601 | /* | ||
1602 | * Initialization of BAU-related structures | ||
1603 | */ | ||
1604 | static int __init uv_bau_init(void) | ||
1605 | { | ||
1606 | int uvhub; | ||
1607 | int pnode; | ||
1608 | int nuvhubs; | ||
1609 | int cur_cpu; | ||
1610 | int vector; | ||
1611 | unsigned long mmr; | ||
1612 | |||
1613 | if (!is_uv_system()) | ||
1614 | return 0; | ||
1615 | |||
1616 | if (nobau) | ||
1617 | return 0; | ||
1618 | |||
1619 | for_each_possible_cpu(cur_cpu) | ||
1620 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | ||
1621 | GFP_KERNEL, cpu_to_node(cur_cpu)); | ||
1622 | |||
1623 | uv_nshift = uv_hub_info->m_val; | ||
1624 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; | ||
1625 | nuvhubs = uv_num_possible_blades(); | ||
1626 | spin_lock_init(&disable_lock); | ||
1627 | congested_cycles = microsec_2_cycles(congested_response_us); | ||
1628 | |||
1629 | uv_init_per_cpu(nuvhubs); | ||
1630 | |||
1631 | uv_partition_base_pnode = 0x7fffffff; | ||
1632 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) | ||
1633 | if (uv_blade_nr_possible_cpus(uvhub) && | ||
1634 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) | ||
1635 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); | ||
1636 | |||
1637 | vector = UV_BAU_MESSAGE; | ||
1638 | for_each_possible_blade(uvhub) | ||
1639 | if (uv_blade_nr_possible_cpus(uvhub)) | ||
1640 | uv_init_uvhub(uvhub, vector); | ||
1641 | |||
1642 | uv_enable_timeouts(); | ||
1643 | alloc_intr_gate(vector, uv_bau_message_intr1); | ||
1644 | |||
1645 | for_each_possible_blade(uvhub) { | ||
1646 | if (uv_blade_nr_possible_cpus(uvhub)) { | ||
1647 | pnode = uv_blade_to_pnode(uvhub); | ||
1648 | /* INIT the bau */ | ||
1649 | uv_write_global_mmr64(pnode, | ||
1650 | UVH_LB_BAU_SB_ACTIVATION_CONTROL, | ||
1651 | ((unsigned long)1 << 63)); | ||
1652 | mmr = 1; /* should be 1 to broadcast to both sockets */ | ||
1653 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, | ||
1654 | mmr); | ||
1655 | } | ||
1656 | } | ||
1657 | |||
1658 | return 0; | ||
1659 | } | ||
1660 | core_initcall(uv_bau_init); | ||
1661 | fs_initcall(uv_ptc_init); | ||
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c new file mode 100644 index 000000000000..7b24460917d5 --- /dev/null +++ b/arch/x86/platform/uv/uv_irq.c | |||
@@ -0,0 +1,285 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * SGI UV IRQ functions | ||
7 | * | ||
8 | * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/rbtree.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/irq.h> | ||
15 | |||
16 | #include <asm/apic.h> | ||
17 | #include <asm/uv/uv_irq.h> | ||
18 | #include <asm/uv/uv_hub.h> | ||
19 | |||
20 | /* MMR offset and pnode of hub sourcing interrupts for a given irq */ | ||
21 | struct uv_irq_2_mmr_pnode{ | ||
22 | struct rb_node list; | ||
23 | unsigned long offset; | ||
24 | int pnode; | ||
25 | int irq; | ||
26 | }; | ||
27 | |||
28 | static spinlock_t uv_irq_lock; | ||
29 | static struct rb_root uv_irq_root; | ||
30 | |||
31 | static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); | ||
32 | |||
33 | static void uv_noop(struct irq_data *data) { } | ||
34 | |||
35 | static void uv_ack_apic(struct irq_data *data) | ||
36 | { | ||
37 | ack_APIC_irq(); | ||
38 | } | ||
39 | |||
40 | static struct irq_chip uv_irq_chip = { | ||
41 | .name = "UV-CORE", | ||
42 | .irq_mask = uv_noop, | ||
43 | .irq_unmask = uv_noop, | ||
44 | .irq_eoi = uv_ack_apic, | ||
45 | .irq_set_affinity = uv_set_irq_affinity, | ||
46 | }; | ||
47 | |||
48 | /* | ||
49 | * Add offset and pnode information of the hub sourcing interrupts to the | ||
50 | * rb tree for a specific irq. | ||
51 | */ | ||
52 | static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) | ||
53 | { | ||
54 | struct rb_node **link = &uv_irq_root.rb_node; | ||
55 | struct rb_node *parent = NULL; | ||
56 | struct uv_irq_2_mmr_pnode *n; | ||
57 | struct uv_irq_2_mmr_pnode *e; | ||
58 | unsigned long irqflags; | ||
59 | |||
60 | n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, | ||
61 | uv_blade_to_memory_nid(blade)); | ||
62 | if (!n) | ||
63 | return -ENOMEM; | ||
64 | |||
65 | n->irq = irq; | ||
66 | n->offset = offset; | ||
67 | n->pnode = uv_blade_to_pnode(blade); | ||
68 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
69 | /* Find the right place in the rbtree: */ | ||
70 | while (*link) { | ||
71 | parent = *link; | ||
72 | e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); | ||
73 | |||
74 | if (unlikely(irq == e->irq)) { | ||
75 | /* irq entry exists */ | ||
76 | e->pnode = uv_blade_to_pnode(blade); | ||
77 | e->offset = offset; | ||
78 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
79 | kfree(n); | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | if (irq < e->irq) | ||
84 | link = &(*link)->rb_left; | ||
85 | else | ||
86 | link = &(*link)->rb_right; | ||
87 | } | ||
88 | |||
89 | /* Insert the node into the rbtree. */ | ||
90 | rb_link_node(&n->list, parent, link); | ||
91 | rb_insert_color(&n->list, &uv_irq_root); | ||
92 | |||
93 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | /* Retrieve offset and pnode information from the rb tree for a specific irq */ | ||
98 | int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) | ||
99 | { | ||
100 | struct uv_irq_2_mmr_pnode *e; | ||
101 | struct rb_node *n; | ||
102 | unsigned long irqflags; | ||
103 | |||
104 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
105 | n = uv_irq_root.rb_node; | ||
106 | while (n) { | ||
107 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
108 | |||
109 | if (e->irq == irq) { | ||
110 | *offset = e->offset; | ||
111 | *pnode = e->pnode; | ||
112 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | if (irq < e->irq) | ||
117 | n = n->rb_left; | ||
118 | else | ||
119 | n = n->rb_right; | ||
120 | } | ||
121 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
122 | return -1; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
127 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
128 | */ | ||
129 | static int | ||
130 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
131 | unsigned long mmr_offset, int limit) | ||
132 | { | ||
133 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | ||
134 | struct irq_cfg *cfg = get_irq_chip_data(irq); | ||
135 | unsigned long mmr_value; | ||
136 | struct uv_IO_APIC_route_entry *entry; | ||
137 | int mmr_pnode, err; | ||
138 | |||
139 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
140 | sizeof(unsigned long)); | ||
141 | |||
142 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
143 | if (err != 0) | ||
144 | return err; | ||
145 | |||
146 | if (limit == UV_AFFINITY_CPU) | ||
147 | irq_set_status_flags(irq, IRQ_NO_BALANCING); | ||
148 | else | ||
149 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | ||
150 | |||
151 | set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
152 | irq_name); | ||
153 | |||
154 | mmr_value = 0; | ||
155 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
156 | entry->vector = cfg->vector; | ||
157 | entry->delivery_mode = apic->irq_delivery_mode; | ||
158 | entry->dest_mode = apic->irq_dest_mode; | ||
159 | entry->polarity = 0; | ||
160 | entry->trigger = 0; | ||
161 | entry->mask = 0; | ||
162 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
163 | |||
164 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
165 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
166 | |||
167 | if (cfg->move_in_progress) | ||
168 | send_cleanup_vector(cfg); | ||
169 | |||
170 | return irq; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
175 | * longer allowed to be sent. | ||
176 | */ | ||
177 | static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) | ||
178 | { | ||
179 | unsigned long mmr_value; | ||
180 | struct uv_IO_APIC_route_entry *entry; | ||
181 | |||
182 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
183 | sizeof(unsigned long)); | ||
184 | |||
185 | mmr_value = 0; | ||
186 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
187 | entry->mask = 1; | ||
188 | |||
189 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
190 | } | ||
191 | |||
192 | static int | ||
193 | uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, | ||
194 | bool force) | ||
195 | { | ||
196 | struct irq_cfg *cfg = data->chip_data; | ||
197 | unsigned int dest; | ||
198 | unsigned long mmr_value, mmr_offset; | ||
199 | struct uv_IO_APIC_route_entry *entry; | ||
200 | int mmr_pnode; | ||
201 | |||
202 | if (__ioapic_set_affinity(data, mask, &dest)) | ||
203 | return -1; | ||
204 | |||
205 | mmr_value = 0; | ||
206 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
207 | |||
208 | entry->vector = cfg->vector; | ||
209 | entry->delivery_mode = apic->irq_delivery_mode; | ||
210 | entry->dest_mode = apic->irq_dest_mode; | ||
211 | entry->polarity = 0; | ||
212 | entry->trigger = 0; | ||
213 | entry->mask = 0; | ||
214 | entry->dest = dest; | ||
215 | |||
216 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ | ||
217 | if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) | ||
218 | return -1; | ||
219 | |||
220 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
221 | |||
222 | if (cfg->move_in_progress) | ||
223 | send_cleanup_vector(cfg); | ||
224 | |||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Set up a mapping of an available irq and vector, and enable the specified | ||
230 | * MMR that defines the MSI that is to be sent to the specified CPU when an | ||
231 | * interrupt is raised. | ||
232 | */ | ||
233 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | ||
234 | unsigned long mmr_offset, int limit) | ||
235 | { | ||
236 | int irq, ret; | ||
237 | |||
238 | irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); | ||
239 | |||
240 | if (irq <= 0) | ||
241 | return -EBUSY; | ||
242 | |||
243 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, | ||
244 | limit); | ||
245 | if (ret == irq) | ||
246 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); | ||
247 | else | ||
248 | destroy_irq(irq); | ||
249 | |||
250 | return ret; | ||
251 | } | ||
252 | EXPORT_SYMBOL_GPL(uv_setup_irq); | ||
253 | |||
254 | /* | ||
255 | * Tear down a mapping of an irq and vector, and disable the specified MMR that | ||
256 | * defined the MSI that was to be sent to the specified CPU when an interrupt | ||
257 | * was raised. | ||
258 | * | ||
259 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). | ||
260 | */ | ||
261 | void uv_teardown_irq(unsigned int irq) | ||
262 | { | ||
263 | struct uv_irq_2_mmr_pnode *e; | ||
264 | struct rb_node *n; | ||
265 | unsigned long irqflags; | ||
266 | |||
267 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
268 | n = uv_irq_root.rb_node; | ||
269 | while (n) { | ||
270 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
271 | if (e->irq == irq) { | ||
272 | arch_disable_uv_irq(e->pnode, e->offset); | ||
273 | rb_erase(n, &uv_irq_root); | ||
274 | kfree(e); | ||
275 | break; | ||
276 | } | ||
277 | if (irq < e->irq) | ||
278 | n = n->rb_left; | ||
279 | else | ||
280 | n = n->rb_right; | ||
281 | } | ||
282 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
283 | destroy_irq(irq); | ||
284 | } | ||
285 | EXPORT_SYMBOL_GPL(uv_teardown_irq); | ||
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c new file mode 100644 index 000000000000..309c70fb7759 --- /dev/null +++ b/arch/x86/platform/uv/uv_sysfs.c | |||
@@ -0,0 +1,76 @@ | |||
1 | /* | ||
2 | * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | * | ||
18 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
19 | * Copyright (c) Russ Anderson | ||
20 | */ | ||
21 | |||
22 | #include <linux/sysdev.h> | ||
23 | #include <asm/uv/bios.h> | ||
24 | #include <asm/uv/uv.h> | ||
25 | |||
26 | struct kobject *sgi_uv_kobj; | ||
27 | |||
28 | static ssize_t partition_id_show(struct kobject *kobj, | ||
29 | struct kobj_attribute *attr, char *buf) | ||
30 | { | ||
31 | return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); | ||
32 | } | ||
33 | |||
34 | static ssize_t coherence_id_show(struct kobject *kobj, | ||
35 | struct kobj_attribute *attr, char *buf) | ||
36 | { | ||
37 | return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); | ||
38 | } | ||
39 | |||
40 | static struct kobj_attribute partition_id_attr = | ||
41 | __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); | ||
42 | |||
43 | static struct kobj_attribute coherence_id_attr = | ||
44 | __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); | ||
45 | |||
46 | |||
47 | static int __init sgi_uv_sysfs_init(void) | ||
48 | { | ||
49 | unsigned long ret; | ||
50 | |||
51 | if (!is_uv_system()) | ||
52 | return -ENODEV; | ||
53 | |||
54 | if (!sgi_uv_kobj) | ||
55 | sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); | ||
56 | if (!sgi_uv_kobj) { | ||
57 | printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); | ||
58 | return -EINVAL; | ||
59 | } | ||
60 | |||
61 | ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); | ||
62 | if (ret) { | ||
63 | printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); | ||
64 | return ret; | ||
65 | } | ||
66 | |||
67 | ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); | ||
68 | if (ret) { | ||
69 | printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); | ||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | device_initcall(sgi_uv_sysfs_init); | ||
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c new file mode 100644 index 000000000000..56e421bc379b --- /dev/null +++ b/arch/x86/platform/uv/uv_time.c | |||
@@ -0,0 +1,423 @@ | |||
1 | /* | ||
2 | * SGI RTC clock/timer routines. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | * | ||
18 | * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. | ||
19 | * Copyright (c) Dimitri Sivanich | ||
20 | */ | ||
21 | #include <linux/clockchips.h> | ||
22 | #include <linux/slab.h> | ||
23 | |||
24 | #include <asm/uv/uv_mmrs.h> | ||
25 | #include <asm/uv/uv_hub.h> | ||
26 | #include <asm/uv/bios.h> | ||
27 | #include <asm/uv/uv.h> | ||
28 | #include <asm/apic.h> | ||
29 | #include <asm/cpu.h> | ||
30 | |||
31 | #define RTC_NAME "sgi_rtc" | ||
32 | |||
33 | static cycle_t uv_read_rtc(struct clocksource *cs); | ||
34 | static int uv_rtc_next_event(unsigned long, struct clock_event_device *); | ||
35 | static void uv_rtc_timer_setup(enum clock_event_mode, | ||
36 | struct clock_event_device *); | ||
37 | |||
38 | static struct clocksource clocksource_uv = { | ||
39 | .name = RTC_NAME, | ||
40 | .rating = 400, | ||
41 | .read = uv_read_rtc, | ||
42 | .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, | ||
43 | .shift = 10, | ||
44 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
45 | }; | ||
46 | |||
47 | static struct clock_event_device clock_event_device_uv = { | ||
48 | .name = RTC_NAME, | ||
49 | .features = CLOCK_EVT_FEAT_ONESHOT, | ||
50 | .shift = 20, | ||
51 | .rating = 400, | ||
52 | .irq = -1, | ||
53 | .set_next_event = uv_rtc_next_event, | ||
54 | .set_mode = uv_rtc_timer_setup, | ||
55 | .event_handler = NULL, | ||
56 | }; | ||
57 | |||
58 | static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); | ||
59 | |||
60 | /* There is one of these allocated per node */ | ||
61 | struct uv_rtc_timer_head { | ||
62 | spinlock_t lock; | ||
63 | /* next cpu waiting for timer, local node relative: */ | ||
64 | int next_cpu; | ||
65 | /* number of cpus on this node: */ | ||
66 | int ncpus; | ||
67 | struct { | ||
68 | int lcpu; /* systemwide logical cpu number */ | ||
69 | u64 expires; /* next timer expiration for this cpu */ | ||
70 | } cpu[1]; | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * Access to uv_rtc_timer_head via blade id. | ||
75 | */ | ||
76 | static struct uv_rtc_timer_head **blade_info __read_mostly; | ||
77 | |||
78 | static int uv_rtc_evt_enable; | ||
79 | |||
80 | /* | ||
81 | * Hardware interface routines | ||
82 | */ | ||
83 | |||
84 | /* Send IPIs to another node */ | ||
85 | static void uv_rtc_send_IPI(int cpu) | ||
86 | { | ||
87 | unsigned long apicid, val; | ||
88 | int pnode; | ||
89 | |||
90 | apicid = cpu_physical_id(cpu); | ||
91 | pnode = uv_apicid_to_pnode(apicid); | ||
92 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | ||
93 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | | ||
94 | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); | ||
95 | |||
96 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | ||
97 | } | ||
98 | |||
99 | /* Check for an RTC interrupt pending */ | ||
100 | static int uv_intr_pending(int pnode) | ||
101 | { | ||
102 | return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & | ||
103 | UVH_EVENT_OCCURRED0_RTC1_MASK; | ||
104 | } | ||
105 | |||
106 | /* Setup interrupt and return non-zero if early expiration occurred. */ | ||
107 | static int uv_setup_intr(int cpu, u64 expires) | ||
108 | { | ||
109 | u64 val; | ||
110 | int pnode = uv_cpu_to_pnode(cpu); | ||
111 | |||
112 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, | ||
113 | UVH_RTC1_INT_CONFIG_M_MASK); | ||
114 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); | ||
115 | |||
116 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, | ||
117 | UVH_EVENT_OCCURRED0_RTC1_MASK); | ||
118 | |||
119 | val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | | ||
120 | ((u64)cpu_physical_id(cpu) << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); | ||
121 | |||
122 | /* Set configuration */ | ||
123 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); | ||
124 | /* Initialize comparator value */ | ||
125 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); | ||
126 | |||
127 | if (uv_read_rtc(NULL) <= expires) | ||
128 | return 0; | ||
129 | |||
130 | return !uv_intr_pending(pnode); | ||
131 | } | ||
132 | |||
133 | /* | ||
134 | * Per-cpu timer tracking routines | ||
135 | */ | ||
136 | |||
137 | static __init void uv_rtc_deallocate_timers(void) | ||
138 | { | ||
139 | int bid; | ||
140 | |||
141 | for_each_possible_blade(bid) { | ||
142 | kfree(blade_info[bid]); | ||
143 | } | ||
144 | kfree(blade_info); | ||
145 | } | ||
146 | |||
147 | /* Allocate per-node list of cpu timer expiration times. */ | ||
148 | static __init int uv_rtc_allocate_timers(void) | ||
149 | { | ||
150 | int cpu; | ||
151 | |||
152 | blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL); | ||
153 | if (!blade_info) | ||
154 | return -ENOMEM; | ||
155 | memset(blade_info, 0, uv_possible_blades * sizeof(void *)); | ||
156 | |||
157 | for_each_present_cpu(cpu) { | ||
158 | int nid = cpu_to_node(cpu); | ||
159 | int bid = uv_cpu_to_blade_id(cpu); | ||
160 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
161 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
162 | |||
163 | if (!head) { | ||
164 | head = kmalloc_node(sizeof(struct uv_rtc_timer_head) + | ||
165 | (uv_blade_nr_possible_cpus(bid) * | ||
166 | 2 * sizeof(u64)), | ||
167 | GFP_KERNEL, nid); | ||
168 | if (!head) { | ||
169 | uv_rtc_deallocate_timers(); | ||
170 | return -ENOMEM; | ||
171 | } | ||
172 | spin_lock_init(&head->lock); | ||
173 | head->ncpus = uv_blade_nr_possible_cpus(bid); | ||
174 | head->next_cpu = -1; | ||
175 | blade_info[bid] = head; | ||
176 | } | ||
177 | |||
178 | head->cpu[bcpu].lcpu = cpu; | ||
179 | head->cpu[bcpu].expires = ULLONG_MAX; | ||
180 | } | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | /* Find and set the next expiring timer. */ | ||
186 | static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode) | ||
187 | { | ||
188 | u64 lowest = ULLONG_MAX; | ||
189 | int c, bcpu = -1; | ||
190 | |||
191 | head->next_cpu = -1; | ||
192 | for (c = 0; c < head->ncpus; c++) { | ||
193 | u64 exp = head->cpu[c].expires; | ||
194 | if (exp < lowest) { | ||
195 | bcpu = c; | ||
196 | lowest = exp; | ||
197 | } | ||
198 | } | ||
199 | if (bcpu >= 0) { | ||
200 | head->next_cpu = bcpu; | ||
201 | c = head->cpu[bcpu].lcpu; | ||
202 | if (uv_setup_intr(c, lowest)) | ||
203 | /* If we didn't set it up in time, trigger */ | ||
204 | uv_rtc_send_IPI(c); | ||
205 | } else { | ||
206 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, | ||
207 | UVH_RTC1_INT_CONFIG_M_MASK); | ||
208 | } | ||
209 | } | ||
210 | |||
211 | /* | ||
212 | * Set expiration time for current cpu. | ||
213 | * | ||
214 | * Returns 1 if we missed the expiration time. | ||
215 | */ | ||
216 | static int uv_rtc_set_timer(int cpu, u64 expires) | ||
217 | { | ||
218 | int pnode = uv_cpu_to_pnode(cpu); | ||
219 | int bid = uv_cpu_to_blade_id(cpu); | ||
220 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
221 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
222 | u64 *t = &head->cpu[bcpu].expires; | ||
223 | unsigned long flags; | ||
224 | int next_cpu; | ||
225 | |||
226 | spin_lock_irqsave(&head->lock, flags); | ||
227 | |||
228 | next_cpu = head->next_cpu; | ||
229 | *t = expires; | ||
230 | |||
231 | /* Will this one be next to go off? */ | ||
232 | if (next_cpu < 0 || bcpu == next_cpu || | ||
233 | expires < head->cpu[next_cpu].expires) { | ||
234 | head->next_cpu = bcpu; | ||
235 | if (uv_setup_intr(cpu, expires)) { | ||
236 | *t = ULLONG_MAX; | ||
237 | uv_rtc_find_next_timer(head, pnode); | ||
238 | spin_unlock_irqrestore(&head->lock, flags); | ||
239 | return -ETIME; | ||
240 | } | ||
241 | } | ||
242 | |||
243 | spin_unlock_irqrestore(&head->lock, flags); | ||
244 | return 0; | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * Unset expiration time for current cpu. | ||
249 | * | ||
250 | * Returns 1 if this timer was pending. | ||
251 | */ | ||
252 | static int uv_rtc_unset_timer(int cpu, int force) | ||
253 | { | ||
254 | int pnode = uv_cpu_to_pnode(cpu); | ||
255 | int bid = uv_cpu_to_blade_id(cpu); | ||
256 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
257 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
258 | u64 *t = &head->cpu[bcpu].expires; | ||
259 | unsigned long flags; | ||
260 | int rc = 0; | ||
261 | |||
262 | spin_lock_irqsave(&head->lock, flags); | ||
263 | |||
264 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) | ||
265 | rc = 1; | ||
266 | |||
267 | if (rc) { | ||
268 | *t = ULLONG_MAX; | ||
269 | /* Was the hardware setup for this timer? */ | ||
270 | if (head->next_cpu == bcpu) | ||
271 | uv_rtc_find_next_timer(head, pnode); | ||
272 | } | ||
273 | |||
274 | spin_unlock_irqrestore(&head->lock, flags); | ||
275 | |||
276 | return rc; | ||
277 | } | ||
278 | |||
279 | |||
280 | /* | ||
281 | * Kernel interface routines. | ||
282 | */ | ||
283 | |||
284 | /* | ||
285 | * Read the RTC. | ||
286 | * | ||
287 | * Starting with HUB rev 2.0, the UV RTC register is replicated across all | ||
288 | * cachelines of it's own page. This allows faster simultaneous reads | ||
289 | * from a given socket. | ||
290 | */ | ||
291 | static cycle_t uv_read_rtc(struct clocksource *cs) | ||
292 | { | ||
293 | unsigned long offset; | ||
294 | |||
295 | if (uv_get_min_hub_revision_id() == 1) | ||
296 | offset = 0; | ||
297 | else | ||
298 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | ||
299 | |||
300 | return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | ||
301 | } | ||
302 | |||
303 | /* | ||
304 | * Program the next event, relative to now | ||
305 | */ | ||
306 | static int uv_rtc_next_event(unsigned long delta, | ||
307 | struct clock_event_device *ced) | ||
308 | { | ||
309 | int ced_cpu = cpumask_first(ced->cpumask); | ||
310 | |||
311 | return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL)); | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * Setup the RTC timer in oneshot mode | ||
316 | */ | ||
317 | static void uv_rtc_timer_setup(enum clock_event_mode mode, | ||
318 | struct clock_event_device *evt) | ||
319 | { | ||
320 | int ced_cpu = cpumask_first(evt->cpumask); | ||
321 | |||
322 | switch (mode) { | ||
323 | case CLOCK_EVT_MODE_PERIODIC: | ||
324 | case CLOCK_EVT_MODE_ONESHOT: | ||
325 | case CLOCK_EVT_MODE_RESUME: | ||
326 | /* Nothing to do here yet */ | ||
327 | break; | ||
328 | case CLOCK_EVT_MODE_UNUSED: | ||
329 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
330 | uv_rtc_unset_timer(ced_cpu, 1); | ||
331 | break; | ||
332 | } | ||
333 | } | ||
334 | |||
335 | static void uv_rtc_interrupt(void) | ||
336 | { | ||
337 | int cpu = smp_processor_id(); | ||
338 | struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); | ||
339 | |||
340 | if (!ced || !ced->event_handler) | ||
341 | return; | ||
342 | |||
343 | if (uv_rtc_unset_timer(cpu, 0) != 1) | ||
344 | return; | ||
345 | |||
346 | ced->event_handler(ced); | ||
347 | } | ||
348 | |||
349 | static int __init uv_enable_evt_rtc(char *str) | ||
350 | { | ||
351 | uv_rtc_evt_enable = 1; | ||
352 | |||
353 | return 1; | ||
354 | } | ||
355 | __setup("uvrtcevt", uv_enable_evt_rtc); | ||
356 | |||
357 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) | ||
358 | { | ||
359 | struct clock_event_device *ced = &__get_cpu_var(cpu_ced); | ||
360 | |||
361 | *ced = clock_event_device_uv; | ||
362 | ced->cpumask = cpumask_of(smp_processor_id()); | ||
363 | clockevents_register_device(ced); | ||
364 | } | ||
365 | |||
366 | static __init int uv_rtc_setup_clock(void) | ||
367 | { | ||
368 | int rc; | ||
369 | |||
370 | if (!is_uv_system()) | ||
371 | return -ENODEV; | ||
372 | |||
373 | clocksource_uv.mult = clocksource_hz2mult(sn_rtc_cycles_per_second, | ||
374 | clocksource_uv.shift); | ||
375 | |||
376 | /* If single blade, prefer tsc */ | ||
377 | if (uv_num_possible_blades() == 1) | ||
378 | clocksource_uv.rating = 250; | ||
379 | |||
380 | rc = clocksource_register(&clocksource_uv); | ||
381 | if (rc) | ||
382 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); | ||
383 | else | ||
384 | printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", | ||
385 | sn_rtc_cycles_per_second/(unsigned long)1E6); | ||
386 | |||
387 | if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) | ||
388 | return rc; | ||
389 | |||
390 | /* Setup and register clockevents */ | ||
391 | rc = uv_rtc_allocate_timers(); | ||
392 | if (rc) | ||
393 | goto error; | ||
394 | |||
395 | x86_platform_ipi_callback = uv_rtc_interrupt; | ||
396 | |||
397 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, | ||
398 | NSEC_PER_SEC, clock_event_device_uv.shift); | ||
399 | |||
400 | clock_event_device_uv.min_delta_ns = NSEC_PER_SEC / | ||
401 | sn_rtc_cycles_per_second; | ||
402 | |||
403 | clock_event_device_uv.max_delta_ns = clocksource_uv.mask * | ||
404 | (NSEC_PER_SEC / sn_rtc_cycles_per_second); | ||
405 | |||
406 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); | ||
407 | if (rc) { | ||
408 | x86_platform_ipi_callback = NULL; | ||
409 | uv_rtc_deallocate_timers(); | ||
410 | goto error; | ||
411 | } | ||
412 | |||
413 | printk(KERN_INFO "UV RTC clockevents registered\n"); | ||
414 | |||
415 | return 0; | ||
416 | |||
417 | error: | ||
418 | clocksource_unregister(&clocksource_uv); | ||
419 | printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); | ||
420 | |||
421 | return rc; | ||
422 | } | ||
423 | arch_initcall(uv_rtc_setup_clock); | ||