diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/platform/uv | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/platform/uv')
-rw-r--r-- | arch/x86/platform/uv/Makefile | 1 | ||||
-rw-r--r-- | arch/x86/platform/uv/bios_uv.c | 215 | ||||
-rw-r--r-- | arch/x86/platform/uv/tlb_uv.c | 1857 | ||||
-rw-r--r-- | arch/x86/platform/uv/uv_irq.c | 285 | ||||
-rw-r--r-- | arch/x86/platform/uv/uv_sysfs.c | 76 | ||||
-rw-r--r-- | arch/x86/platform/uv/uv_time.c | 429 |
6 files changed, 2863 insertions, 0 deletions
diff --git a/arch/x86/platform/uv/Makefile b/arch/x86/platform/uv/Makefile new file mode 100644 index 000000000000..6c40995fefb8 --- /dev/null +++ b/arch/x86/platform/uv/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o | |||
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c new file mode 100644 index 000000000000..8bc57baaa9ad --- /dev/null +++ b/arch/x86/platform/uv/bios_uv.c | |||
@@ -0,0 +1,215 @@ | |||
1 | /* | ||
2 | * BIOS run time interface routines. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | * | ||
18 | * Copyright (c) 2008-2009 Silicon Graphics, Inc. All Rights Reserved. | ||
19 | * Copyright (c) Russ Anderson <rja@sgi.com> | ||
20 | */ | ||
21 | |||
22 | #include <linux/efi.h> | ||
23 | #include <asm/efi.h> | ||
24 | #include <linux/io.h> | ||
25 | #include <asm/uv/bios.h> | ||
26 | #include <asm/uv/uv_hub.h> | ||
27 | |||
28 | static struct uv_systab uv_systab; | ||
29 | |||
30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | ||
31 | { | ||
32 | struct uv_systab *tab = &uv_systab; | ||
33 | s64 ret; | ||
34 | |||
35 | if (!tab->function) | ||
36 | /* | ||
37 | * BIOS does not support UV systab | ||
38 | */ | ||
39 | return BIOS_STATUS_UNIMPLEMENTED; | ||
40 | |||
41 | ret = efi_call6((void *)__va(tab->function), (u64)which, | ||
42 | a1, a2, a3, a4, a5); | ||
43 | return ret; | ||
44 | } | ||
45 | EXPORT_SYMBOL_GPL(uv_bios_call); | ||
46 | |||
47 | s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | ||
48 | u64 a4, u64 a5) | ||
49 | { | ||
50 | unsigned long bios_flags; | ||
51 | s64 ret; | ||
52 | |||
53 | local_irq_save(bios_flags); | ||
54 | ret = uv_bios_call(which, a1, a2, a3, a4, a5); | ||
55 | local_irq_restore(bios_flags); | ||
56 | |||
57 | return ret; | ||
58 | } | ||
59 | |||
60 | s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | ||
61 | u64 a4, u64 a5) | ||
62 | { | ||
63 | s64 ret; | ||
64 | |||
65 | preempt_disable(); | ||
66 | ret = uv_bios_call(which, a1, a2, a3, a4, a5); | ||
67 | preempt_enable(); | ||
68 | |||
69 | return ret; | ||
70 | } | ||
71 | |||
72 | |||
73 | long sn_partition_id; | ||
74 | EXPORT_SYMBOL_GPL(sn_partition_id); | ||
75 | long sn_coherency_id; | ||
76 | EXPORT_SYMBOL_GPL(sn_coherency_id); | ||
77 | long sn_region_size; | ||
78 | EXPORT_SYMBOL_GPL(sn_region_size); | ||
79 | long system_serial_number; | ||
80 | EXPORT_SYMBOL_GPL(system_serial_number); | ||
81 | int uv_type; | ||
82 | EXPORT_SYMBOL_GPL(uv_type); | ||
83 | |||
84 | |||
85 | s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | ||
86 | long *region, long *ssn) | ||
87 | { | ||
88 | s64 ret; | ||
89 | u64 v0, v1; | ||
90 | union partition_info_u part; | ||
91 | |||
92 | ret = uv_bios_call_irqsave(UV_BIOS_GET_SN_INFO, fc, | ||
93 | (u64)(&v0), (u64)(&v1), 0, 0); | ||
94 | if (ret != BIOS_STATUS_SUCCESS) | ||
95 | return ret; | ||
96 | |||
97 | part.val = v0; | ||
98 | if (uvtype) | ||
99 | *uvtype = part.hub_version; | ||
100 | if (partid) | ||
101 | *partid = part.partition_id; | ||
102 | if (coher) | ||
103 | *coher = part.coherence_id; | ||
104 | if (region) | ||
105 | *region = part.region_size; | ||
106 | if (ssn) | ||
107 | *ssn = v1; | ||
108 | return ret; | ||
109 | } | ||
110 | EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); | ||
111 | |||
112 | int | ||
113 | uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, | ||
114 | unsigned long *intr_mmr_offset) | ||
115 | { | ||
116 | u64 watchlist; | ||
117 | s64 ret; | ||
118 | |||
119 | /* | ||
120 | * bios returns watchlist number or negative error number. | ||
121 | */ | ||
122 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, | ||
123 | mq_size, (u64)intr_mmr_offset, | ||
124 | (u64)&watchlist, 0); | ||
125 | if (ret < BIOS_STATUS_SUCCESS) | ||
126 | return ret; | ||
127 | |||
128 | return watchlist; | ||
129 | } | ||
130 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); | ||
131 | |||
132 | int | ||
133 | uv_bios_mq_watchlist_free(int blade, int watchlist_num) | ||
134 | { | ||
135 | return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, | ||
136 | blade, watchlist_num, 0, 0, 0); | ||
137 | } | ||
138 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); | ||
139 | |||
140 | s64 | ||
141 | uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) | ||
142 | { | ||
143 | return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, | ||
144 | perms, 0, 0); | ||
145 | } | ||
146 | EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); | ||
147 | |||
148 | s64 | ||
149 | uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) | ||
150 | { | ||
151 | s64 ret; | ||
152 | |||
153 | ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, | ||
154 | (u64)addr, buf, (u64)len, 0); | ||
155 | return ret; | ||
156 | } | ||
157 | EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); | ||
158 | |||
159 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | ||
160 | { | ||
161 | return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, | ||
162 | (u64)ticks_per_second, 0, 0, 0); | ||
163 | } | ||
164 | EXPORT_SYMBOL_GPL(uv_bios_freq_base); | ||
165 | |||
166 | /* | ||
167 | * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target | ||
168 | * @decode: true to enable target, false to disable target | ||
169 | * @domain: PCI domain number | ||
170 | * @bus: PCI bus number | ||
171 | * | ||
172 | * Returns: | ||
173 | * 0: Success | ||
174 | * -EINVAL: Invalid domain or bus number | ||
175 | * -ENOSYS: Capability not available | ||
176 | * -EBUSY: Legacy VGA I/O cannot be retargeted at this time | ||
177 | */ | ||
178 | int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) | ||
179 | { | ||
180 | return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, | ||
181 | (u64)decode, (u64)domain, (u64)bus, 0, 0); | ||
182 | } | ||
183 | EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); | ||
184 | |||
185 | |||
186 | #ifdef CONFIG_EFI | ||
187 | void uv_bios_init(void) | ||
188 | { | ||
189 | struct uv_systab *tab; | ||
190 | |||
191 | if ((efi.uv_systab == EFI_INVALID_TABLE_ADDR) || | ||
192 | (efi.uv_systab == (unsigned long)NULL)) { | ||
193 | printk(KERN_CRIT "No EFI UV System Table.\n"); | ||
194 | uv_systab.function = (unsigned long)NULL; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | tab = (struct uv_systab *)ioremap(efi.uv_systab, | ||
199 | sizeof(struct uv_systab)); | ||
200 | if (strncmp(tab->signature, "UVST", 4) != 0) | ||
201 | printk(KERN_ERR "bad signature in UV system table!"); | ||
202 | |||
203 | /* | ||
204 | * Copy table to permanent spot for later use. | ||
205 | */ | ||
206 | memcpy(&uv_systab, tab, sizeof(struct uv_systab)); | ||
207 | iounmap(tab); | ||
208 | |||
209 | printk(KERN_INFO "EFI UV System Table Revision %d\n", | ||
210 | uv_systab.revision); | ||
211 | } | ||
212 | #else /* !CONFIG_EFI */ | ||
213 | |||
214 | void uv_bios_init(void) { } | ||
215 | #endif | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c new file mode 100644 index 000000000000..68e467f69fec --- /dev/null +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -0,0 +1,1857 @@ | |||
1 | /* | ||
2 | * SGI UltraViolet TLB flush routines. | ||
3 | * | ||
4 | * (c) 2008-2011 Cliff Wickman <cpw@sgi.com>, SGI. | ||
5 | * | ||
6 | * This code is released under the GNU General Public License version 2 or | ||
7 | * later. | ||
8 | */ | ||
9 | #include <linux/seq_file.h> | ||
10 | #include <linux/proc_fs.h> | ||
11 | #include <linux/debugfs.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/delay.h> | ||
15 | |||
16 | #include <asm/mmu_context.h> | ||
17 | #include <asm/uv/uv.h> | ||
18 | #include <asm/uv/uv_mmrs.h> | ||
19 | #include <asm/uv/uv_hub.h> | ||
20 | #include <asm/uv/uv_bau.h> | ||
21 | #include <asm/apic.h> | ||
22 | #include <asm/idle.h> | ||
23 | #include <asm/tsc.h> | ||
24 | #include <asm/irq_vectors.h> | ||
25 | #include <asm/timer.h> | ||
26 | |||
27 | /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ | ||
28 | static int timeout_base_ns[] = { | ||
29 | 20, | ||
30 | 160, | ||
31 | 1280, | ||
32 | 10240, | ||
33 | 81920, | ||
34 | 655360, | ||
35 | 5242880, | ||
36 | 167772160 | ||
37 | }; | ||
38 | |||
39 | static int timeout_us; | ||
40 | static int nobau; | ||
41 | static int baudisabled; | ||
42 | static spinlock_t disable_lock; | ||
43 | static cycles_t congested_cycles; | ||
44 | |||
45 | /* tunables: */ | ||
46 | static int max_concurr = MAX_BAU_CONCURRENT; | ||
47 | static int max_concurr_const = MAX_BAU_CONCURRENT; | ||
48 | static int plugged_delay = PLUGGED_DELAY; | ||
49 | static int plugsb4reset = PLUGSB4RESET; | ||
50 | static int timeoutsb4reset = TIMEOUTSB4RESET; | ||
51 | static int ipi_reset_limit = IPI_RESET_LIMIT; | ||
52 | static int complete_threshold = COMPLETE_THRESHOLD; | ||
53 | static int congested_respns_us = CONGESTED_RESPONSE_US; | ||
54 | static int congested_reps = CONGESTED_REPS; | ||
55 | static int congested_period = CONGESTED_PERIOD; | ||
56 | |||
57 | static struct tunables tunables[] = { | ||
58 | {&max_concurr, MAX_BAU_CONCURRENT}, /* must be [0] */ | ||
59 | {&plugged_delay, PLUGGED_DELAY}, | ||
60 | {&plugsb4reset, PLUGSB4RESET}, | ||
61 | {&timeoutsb4reset, TIMEOUTSB4RESET}, | ||
62 | {&ipi_reset_limit, IPI_RESET_LIMIT}, | ||
63 | {&complete_threshold, COMPLETE_THRESHOLD}, | ||
64 | {&congested_respns_us, CONGESTED_RESPONSE_US}, | ||
65 | {&congested_reps, CONGESTED_REPS}, | ||
66 | {&congested_period, CONGESTED_PERIOD} | ||
67 | }; | ||
68 | |||
69 | static struct dentry *tunables_dir; | ||
70 | static struct dentry *tunables_file; | ||
71 | |||
72 | /* these correspond to the statistics printed by ptc_seq_show() */ | ||
73 | static char *stat_description[] = { | ||
74 | "sent: number of shootdown messages sent", | ||
75 | "stime: time spent sending messages", | ||
76 | "numuvhubs: number of hubs targeted with shootdown", | ||
77 | "numuvhubs16: number times 16 or more hubs targeted", | ||
78 | "numuvhubs8: number times 8 or more hubs targeted", | ||
79 | "numuvhubs4: number times 4 or more hubs targeted", | ||
80 | "numuvhubs2: number times 2 or more hubs targeted", | ||
81 | "numuvhubs1: number times 1 hub targeted", | ||
82 | "numcpus: number of cpus targeted with shootdown", | ||
83 | "dto: number of destination timeouts", | ||
84 | "retries: destination timeout retries sent", | ||
85 | "rok: : destination timeouts successfully retried", | ||
86 | "resetp: ipi-style resource resets for plugs", | ||
87 | "resett: ipi-style resource resets for timeouts", | ||
88 | "giveup: fall-backs to ipi-style shootdowns", | ||
89 | "sto: number of source timeouts", | ||
90 | "bz: number of stay-busy's", | ||
91 | "throt: number times spun in throttle", | ||
92 | "swack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE", | ||
93 | "recv: shootdown messages received", | ||
94 | "rtime: time spent processing messages", | ||
95 | "all: shootdown all-tlb messages", | ||
96 | "one: shootdown one-tlb messages", | ||
97 | "mult: interrupts that found multiple messages", | ||
98 | "none: interrupts that found no messages", | ||
99 | "retry: number of retry messages processed", | ||
100 | "canc: number messages canceled by retries", | ||
101 | "nocan: number retries that found nothing to cancel", | ||
102 | "reset: number of ipi-style reset requests processed", | ||
103 | "rcan: number messages canceled by reset requests", | ||
104 | "disable: number times use of the BAU was disabled", | ||
105 | "enable: number times use of the BAU was re-enabled" | ||
106 | }; | ||
107 | |||
108 | static int __init | ||
109 | setup_nobau(char *arg) | ||
110 | { | ||
111 | nobau = 1; | ||
112 | return 0; | ||
113 | } | ||
114 | early_param("nobau", setup_nobau); | ||
115 | |||
116 | /* base pnode in this partition */ | ||
117 | static int uv_base_pnode __read_mostly; | ||
118 | /* position of pnode (which is nasid>>1): */ | ||
119 | static int uv_nshift __read_mostly; | ||
120 | static unsigned long uv_mmask __read_mostly; | ||
121 | |||
122 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | ||
123 | static DEFINE_PER_CPU(struct bau_control, bau_control); | ||
124 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
125 | |||
126 | /* | ||
127 | * Determine the first node on a uvhub. 'Nodes' are used for kernel | ||
128 | * memory allocation. | ||
129 | */ | ||
130 | static int __init uvhub_to_first_node(int uvhub) | ||
131 | { | ||
132 | int node, b; | ||
133 | |||
134 | for_each_online_node(node) { | ||
135 | b = uv_node_to_blade_id(node); | ||
136 | if (uvhub == b) | ||
137 | return node; | ||
138 | } | ||
139 | return -1; | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Determine the apicid of the first cpu on a uvhub. | ||
144 | */ | ||
145 | static int __init uvhub_to_first_apicid(int uvhub) | ||
146 | { | ||
147 | int cpu; | ||
148 | |||
149 | for_each_present_cpu(cpu) | ||
150 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
151 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
152 | return -1; | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * Free a software acknowledge hardware resource by clearing its Pending | ||
157 | * bit. This will return a reply to the sender. | ||
158 | * If the message has timed out, a reply has already been sent by the | ||
159 | * hardware but the resource has not been released. In that case our | ||
160 | * clear of the Timeout bit (as well) will free the resource. No reply will | ||
161 | * be sent (the hardware will only do one reply per message). | ||
162 | */ | ||
163 | static void reply_to_message(struct msg_desc *mdp, struct bau_control *bcp) | ||
164 | { | ||
165 | unsigned long dw; | ||
166 | struct bau_pq_entry *msg; | ||
167 | |||
168 | msg = mdp->msg; | ||
169 | if (!msg->canceled) { | ||
170 | dw = (msg->swack_vec << UV_SW_ACK_NPENDING) | msg->swack_vec; | ||
171 | write_mmr_sw_ack(dw); | ||
172 | } | ||
173 | msg->replied_to = 1; | ||
174 | msg->swack_vec = 0; | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Process the receipt of a RETRY message | ||
179 | */ | ||
180 | static void bau_process_retry_msg(struct msg_desc *mdp, | ||
181 | struct bau_control *bcp) | ||
182 | { | ||
183 | int i; | ||
184 | int cancel_count = 0; | ||
185 | unsigned long msg_res; | ||
186 | unsigned long mmr = 0; | ||
187 | struct bau_pq_entry *msg = mdp->msg; | ||
188 | struct bau_pq_entry *msg2; | ||
189 | struct ptc_stats *stat = bcp->statp; | ||
190 | |||
191 | stat->d_retries++; | ||
192 | /* | ||
193 | * cancel any message from msg+1 to the retry itself | ||
194 | */ | ||
195 | for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { | ||
196 | if (msg2 > mdp->queue_last) | ||
197 | msg2 = mdp->queue_first; | ||
198 | if (msg2 == msg) | ||
199 | break; | ||
200 | |||
201 | /* same conditions for cancellation as do_reset */ | ||
202 | if ((msg2->replied_to == 0) && (msg2->canceled == 0) && | ||
203 | (msg2->swack_vec) && ((msg2->swack_vec & | ||
204 | msg->swack_vec) == 0) && | ||
205 | (msg2->sending_cpu == msg->sending_cpu) && | ||
206 | (msg2->msg_type != MSG_NOOP)) { | ||
207 | mmr = read_mmr_sw_ack(); | ||
208 | msg_res = msg2->swack_vec; | ||
209 | /* | ||
210 | * This is a message retry; clear the resources held | ||
211 | * by the previous message only if they timed out. | ||
212 | * If it has not timed out we have an unexpected | ||
213 | * situation to report. | ||
214 | */ | ||
215 | if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { | ||
216 | unsigned long mr; | ||
217 | /* | ||
218 | * is the resource timed out? | ||
219 | * make everyone ignore the cancelled message. | ||
220 | */ | ||
221 | msg2->canceled = 1; | ||
222 | stat->d_canceled++; | ||
223 | cancel_count++; | ||
224 | mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; | ||
225 | write_mmr_sw_ack(mr); | ||
226 | } | ||
227 | } | ||
228 | } | ||
229 | if (!cancel_count) | ||
230 | stat->d_nocanceled++; | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * Do all the things a cpu should do for a TLB shootdown message. | ||
235 | * Other cpu's may come here at the same time for this message. | ||
236 | */ | ||
237 | static void bau_process_message(struct msg_desc *mdp, | ||
238 | struct bau_control *bcp) | ||
239 | { | ||
240 | short socket_ack_count = 0; | ||
241 | short *sp; | ||
242 | struct atomic_short *asp; | ||
243 | struct ptc_stats *stat = bcp->statp; | ||
244 | struct bau_pq_entry *msg = mdp->msg; | ||
245 | struct bau_control *smaster = bcp->socket_master; | ||
246 | |||
247 | /* | ||
248 | * This must be a normal message, or retry of a normal message | ||
249 | */ | ||
250 | if (msg->address == TLB_FLUSH_ALL) { | ||
251 | local_flush_tlb(); | ||
252 | stat->d_alltlb++; | ||
253 | } else { | ||
254 | __flush_tlb_one(msg->address); | ||
255 | stat->d_onetlb++; | ||
256 | } | ||
257 | stat->d_requestee++; | ||
258 | |||
259 | /* | ||
260 | * One cpu on each uvhub has the additional job on a RETRY | ||
261 | * of releasing the resource held by the message that is | ||
262 | * being retried. That message is identified by sending | ||
263 | * cpu number. | ||
264 | */ | ||
265 | if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) | ||
266 | bau_process_retry_msg(mdp, bcp); | ||
267 | |||
268 | /* | ||
269 | * This is a swack message, so we have to reply to it. | ||
270 | * Count each responding cpu on the socket. This avoids | ||
271 | * pinging the count's cache line back and forth between | ||
272 | * the sockets. | ||
273 | */ | ||
274 | sp = &smaster->socket_acknowledge_count[mdp->msg_slot]; | ||
275 | asp = (struct atomic_short *)sp; | ||
276 | socket_ack_count = atom_asr(1, asp); | ||
277 | if (socket_ack_count == bcp->cpus_in_socket) { | ||
278 | int msg_ack_count; | ||
279 | /* | ||
280 | * Both sockets dump their completed count total into | ||
281 | * the message's count. | ||
282 | */ | ||
283 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | ||
284 | asp = (struct atomic_short *)&msg->acknowledge_count; | ||
285 | msg_ack_count = atom_asr(socket_ack_count, asp); | ||
286 | |||
287 | if (msg_ack_count == bcp->cpus_in_uvhub) { | ||
288 | /* | ||
289 | * All cpus in uvhub saw it; reply | ||
290 | */ | ||
291 | reply_to_message(mdp, bcp); | ||
292 | } | ||
293 | } | ||
294 | |||
295 | return; | ||
296 | } | ||
297 | |||
298 | /* | ||
299 | * Determine the first cpu on a uvhub. | ||
300 | */ | ||
301 | static int uvhub_to_first_cpu(int uvhub) | ||
302 | { | ||
303 | int cpu; | ||
304 | for_each_present_cpu(cpu) | ||
305 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
306 | return cpu; | ||
307 | return -1; | ||
308 | } | ||
309 | |||
310 | /* | ||
311 | * Last resort when we get a large number of destination timeouts is | ||
312 | * to clear resources held by a given cpu. | ||
313 | * Do this with IPI so that all messages in the BAU message queue | ||
314 | * can be identified by their nonzero swack_vec field. | ||
315 | * | ||
316 | * This is entered for a single cpu on the uvhub. | ||
317 | * The sender want's this uvhub to free a specific message's | ||
318 | * swack resources. | ||
319 | */ | ||
320 | static void do_reset(void *ptr) | ||
321 | { | ||
322 | int i; | ||
323 | struct bau_control *bcp = &per_cpu(bau_control, smp_processor_id()); | ||
324 | struct reset_args *rap = (struct reset_args *)ptr; | ||
325 | struct bau_pq_entry *msg; | ||
326 | struct ptc_stats *stat = bcp->statp; | ||
327 | |||
328 | stat->d_resets++; | ||
329 | /* | ||
330 | * We're looking for the given sender, and | ||
331 | * will free its swack resource. | ||
332 | * If all cpu's finally responded after the timeout, its | ||
333 | * message 'replied_to' was set. | ||
334 | */ | ||
335 | for (msg = bcp->queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { | ||
336 | unsigned long msg_res; | ||
337 | /* do_reset: same conditions for cancellation as | ||
338 | bau_process_retry_msg() */ | ||
339 | if ((msg->replied_to == 0) && | ||
340 | (msg->canceled == 0) && | ||
341 | (msg->sending_cpu == rap->sender) && | ||
342 | (msg->swack_vec) && | ||
343 | (msg->msg_type != MSG_NOOP)) { | ||
344 | unsigned long mmr; | ||
345 | unsigned long mr; | ||
346 | /* | ||
347 | * make everyone else ignore this message | ||
348 | */ | ||
349 | msg->canceled = 1; | ||
350 | /* | ||
351 | * only reset the resource if it is still pending | ||
352 | */ | ||
353 | mmr = read_mmr_sw_ack(); | ||
354 | msg_res = msg->swack_vec; | ||
355 | mr = (msg_res << UV_SW_ACK_NPENDING) | msg_res; | ||
356 | if (mmr & msg_res) { | ||
357 | stat->d_rcanceled++; | ||
358 | write_mmr_sw_ack(mr); | ||
359 | } | ||
360 | } | ||
361 | } | ||
362 | return; | ||
363 | } | ||
364 | |||
365 | /* | ||
366 | * Use IPI to get all target uvhubs to release resources held by | ||
367 | * a given sending cpu number. | ||
368 | */ | ||
369 | static void reset_with_ipi(struct bau_targ_hubmask *distribution, int sender) | ||
370 | { | ||
371 | int uvhub; | ||
372 | int maskbits; | ||
373 | cpumask_t mask; | ||
374 | struct reset_args reset_args; | ||
375 | |||
376 | reset_args.sender = sender; | ||
377 | cpus_clear(mask); | ||
378 | /* find a single cpu for each uvhub in this distribution mask */ | ||
379 | maskbits = sizeof(struct bau_targ_hubmask) * BITSPERBYTE; | ||
380 | for (uvhub = 0; uvhub < maskbits; uvhub++) { | ||
381 | int cpu; | ||
382 | if (!bau_uvhub_isset(uvhub, distribution)) | ||
383 | continue; | ||
384 | /* find a cpu for this uvhub */ | ||
385 | cpu = uvhub_to_first_cpu(uvhub); | ||
386 | cpu_set(cpu, mask); | ||
387 | } | ||
388 | |||
389 | /* IPI all cpus; preemption is already disabled */ | ||
390 | smp_call_function_many(&mask, do_reset, (void *)&reset_args, 1); | ||
391 | return; | ||
392 | } | ||
393 | |||
394 | static inline unsigned long cycles_2_us(unsigned long long cyc) | ||
395 | { | ||
396 | unsigned long long ns; | ||
397 | unsigned long us; | ||
398 | int cpu = smp_processor_id(); | ||
399 | |||
400 | ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; | ||
401 | us = ns / 1000; | ||
402 | return us; | ||
403 | } | ||
404 | |||
405 | /* | ||
406 | * wait for all cpus on this hub to finish their sends and go quiet | ||
407 | * leaves uvhub_quiesce set so that no new broadcasts are started by | ||
408 | * bau_flush_send_and_wait() | ||
409 | */ | ||
410 | static inline void quiesce_local_uvhub(struct bau_control *hmaster) | ||
411 | { | ||
412 | atom_asr(1, (struct atomic_short *)&hmaster->uvhub_quiesce); | ||
413 | } | ||
414 | |||
415 | /* | ||
416 | * mark this quiet-requestor as done | ||
417 | */ | ||
418 | static inline void end_uvhub_quiesce(struct bau_control *hmaster) | ||
419 | { | ||
420 | atom_asr(-1, (struct atomic_short *)&hmaster->uvhub_quiesce); | ||
421 | } | ||
422 | |||
423 | static unsigned long uv1_read_status(unsigned long mmr_offset, int right_shift) | ||
424 | { | ||
425 | unsigned long descriptor_status; | ||
426 | |||
427 | descriptor_status = uv_read_local_mmr(mmr_offset); | ||
428 | descriptor_status >>= right_shift; | ||
429 | descriptor_status &= UV_ACT_STATUS_MASK; | ||
430 | return descriptor_status; | ||
431 | } | ||
432 | |||
433 | /* | ||
434 | * Wait for completion of a broadcast software ack message | ||
435 | * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP | ||
436 | */ | ||
437 | static int uv1_wait_completion(struct bau_desc *bau_desc, | ||
438 | unsigned long mmr_offset, int right_shift, | ||
439 | struct bau_control *bcp, long try) | ||
440 | { | ||
441 | unsigned long descriptor_status; | ||
442 | cycles_t ttm; | ||
443 | struct ptc_stats *stat = bcp->statp; | ||
444 | |||
445 | descriptor_status = uv1_read_status(mmr_offset, right_shift); | ||
446 | /* spin on the status MMR, waiting for it to go idle */ | ||
447 | while ((descriptor_status != DS_IDLE)) { | ||
448 | /* | ||
449 | * Our software ack messages may be blocked because | ||
450 | * there are no swack resources available. As long | ||
451 | * as none of them has timed out hardware will NACK | ||
452 | * our message and its state will stay IDLE. | ||
453 | */ | ||
454 | if (descriptor_status == DS_SOURCE_TIMEOUT) { | ||
455 | stat->s_stimeout++; | ||
456 | return FLUSH_GIVEUP; | ||
457 | } else if (descriptor_status == DS_DESTINATION_TIMEOUT) { | ||
458 | stat->s_dtimeout++; | ||
459 | ttm = get_cycles(); | ||
460 | |||
461 | /* | ||
462 | * Our retries may be blocked by all destination | ||
463 | * swack resources being consumed, and a timeout | ||
464 | * pending. In that case hardware returns the | ||
465 | * ERROR that looks like a destination timeout. | ||
466 | */ | ||
467 | if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { | ||
468 | bcp->conseccompletes = 0; | ||
469 | return FLUSH_RETRY_PLUGGED; | ||
470 | } | ||
471 | |||
472 | bcp->conseccompletes = 0; | ||
473 | return FLUSH_RETRY_TIMEOUT; | ||
474 | } else { | ||
475 | /* | ||
476 | * descriptor_status is still BUSY | ||
477 | */ | ||
478 | cpu_relax(); | ||
479 | } | ||
480 | descriptor_status = uv1_read_status(mmr_offset, right_shift); | ||
481 | } | ||
482 | bcp->conseccompletes++; | ||
483 | return FLUSH_COMPLETE; | ||
484 | } | ||
485 | |||
486 | /* | ||
487 | * UV2 has an extra bit of status in the ACTIVATION_STATUS_2 register. | ||
488 | */ | ||
489 | static unsigned long uv2_read_status(unsigned long offset, int rshft, int cpu) | ||
490 | { | ||
491 | unsigned long descriptor_status; | ||
492 | unsigned long descriptor_status2; | ||
493 | |||
494 | descriptor_status = ((read_lmmr(offset) >> rshft) & UV_ACT_STATUS_MASK); | ||
495 | descriptor_status2 = (read_mmr_uv2_status() >> cpu) & 0x1UL; | ||
496 | descriptor_status = (descriptor_status << 1) | descriptor_status2; | ||
497 | return descriptor_status; | ||
498 | } | ||
499 | |||
500 | static int uv2_wait_completion(struct bau_desc *bau_desc, | ||
501 | unsigned long mmr_offset, int right_shift, | ||
502 | struct bau_control *bcp, long try) | ||
503 | { | ||
504 | unsigned long descriptor_stat; | ||
505 | cycles_t ttm; | ||
506 | int cpu = bcp->uvhub_cpu; | ||
507 | struct ptc_stats *stat = bcp->statp; | ||
508 | |||
509 | descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); | ||
510 | |||
511 | /* spin on the status MMR, waiting for it to go idle */ | ||
512 | while (descriptor_stat != UV2H_DESC_IDLE) { | ||
513 | /* | ||
514 | * Our software ack messages may be blocked because | ||
515 | * there are no swack resources available. As long | ||
516 | * as none of them has timed out hardware will NACK | ||
517 | * our message and its state will stay IDLE. | ||
518 | */ | ||
519 | if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) || | ||
520 | (descriptor_stat == UV2H_DESC_DEST_STRONG_NACK) || | ||
521 | (descriptor_stat == UV2H_DESC_DEST_PUT_ERR)) { | ||
522 | stat->s_stimeout++; | ||
523 | return FLUSH_GIVEUP; | ||
524 | } else if (descriptor_stat == UV2H_DESC_DEST_TIMEOUT) { | ||
525 | stat->s_dtimeout++; | ||
526 | ttm = get_cycles(); | ||
527 | /* | ||
528 | * Our retries may be blocked by all destination | ||
529 | * swack resources being consumed, and a timeout | ||
530 | * pending. In that case hardware returns the | ||
531 | * ERROR that looks like a destination timeout. | ||
532 | */ | ||
533 | if (cycles_2_us(ttm - bcp->send_message) < timeout_us) { | ||
534 | bcp->conseccompletes = 0; | ||
535 | return FLUSH_RETRY_PLUGGED; | ||
536 | } | ||
537 | bcp->conseccompletes = 0; | ||
538 | return FLUSH_RETRY_TIMEOUT; | ||
539 | } else { | ||
540 | /* | ||
541 | * descriptor_stat is still BUSY | ||
542 | */ | ||
543 | cpu_relax(); | ||
544 | } | ||
545 | descriptor_stat = uv2_read_status(mmr_offset, right_shift, cpu); | ||
546 | } | ||
547 | bcp->conseccompletes++; | ||
548 | return FLUSH_COMPLETE; | ||
549 | } | ||
550 | |||
551 | /* | ||
552 | * There are 2 status registers; each and array[32] of 2 bits. Set up for | ||
553 | * which register to read and position in that register based on cpu in | ||
554 | * current hub. | ||
555 | */ | ||
556 | static int wait_completion(struct bau_desc *bau_desc, | ||
557 | struct bau_control *bcp, long try) | ||
558 | { | ||
559 | int right_shift; | ||
560 | unsigned long mmr_offset; | ||
561 | int cpu = bcp->uvhub_cpu; | ||
562 | |||
563 | if (cpu < UV_CPUS_PER_AS) { | ||
564 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | ||
565 | right_shift = cpu * UV_ACT_STATUS_SIZE; | ||
566 | } else { | ||
567 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; | ||
568 | right_shift = ((cpu - UV_CPUS_PER_AS) * UV_ACT_STATUS_SIZE); | ||
569 | } | ||
570 | |||
571 | if (is_uv1_hub()) | ||
572 | return uv1_wait_completion(bau_desc, mmr_offset, right_shift, | ||
573 | bcp, try); | ||
574 | else | ||
575 | return uv2_wait_completion(bau_desc, mmr_offset, right_shift, | ||
576 | bcp, try); | ||
577 | } | ||
578 | |||
579 | static inline cycles_t sec_2_cycles(unsigned long sec) | ||
580 | { | ||
581 | unsigned long ns; | ||
582 | cycles_t cyc; | ||
583 | |||
584 | ns = sec * 1000000000; | ||
585 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
586 | return cyc; | ||
587 | } | ||
588 | |||
589 | /* | ||
590 | * Our retries are blocked by all destination sw ack resources being | ||
591 | * in use, and a timeout is pending. In that case hardware immediately | ||
592 | * returns the ERROR that looks like a destination timeout. | ||
593 | */ | ||
594 | static void destination_plugged(struct bau_desc *bau_desc, | ||
595 | struct bau_control *bcp, | ||
596 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
597 | { | ||
598 | udelay(bcp->plugged_delay); | ||
599 | bcp->plugged_tries++; | ||
600 | |||
601 | if (bcp->plugged_tries >= bcp->plugsb4reset) { | ||
602 | bcp->plugged_tries = 0; | ||
603 | |||
604 | quiesce_local_uvhub(hmaster); | ||
605 | |||
606 | spin_lock(&hmaster->queue_lock); | ||
607 | reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
608 | spin_unlock(&hmaster->queue_lock); | ||
609 | |||
610 | end_uvhub_quiesce(hmaster); | ||
611 | |||
612 | bcp->ipi_attempts++; | ||
613 | stat->s_resets_plug++; | ||
614 | } | ||
615 | } | ||
616 | |||
617 | static void destination_timeout(struct bau_desc *bau_desc, | ||
618 | struct bau_control *bcp, struct bau_control *hmaster, | ||
619 | struct ptc_stats *stat) | ||
620 | { | ||
621 | hmaster->max_concurr = 1; | ||
622 | bcp->timeout_tries++; | ||
623 | if (bcp->timeout_tries >= bcp->timeoutsb4reset) { | ||
624 | bcp->timeout_tries = 0; | ||
625 | |||
626 | quiesce_local_uvhub(hmaster); | ||
627 | |||
628 | spin_lock(&hmaster->queue_lock); | ||
629 | reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
630 | spin_unlock(&hmaster->queue_lock); | ||
631 | |||
632 | end_uvhub_quiesce(hmaster); | ||
633 | |||
634 | bcp->ipi_attempts++; | ||
635 | stat->s_resets_timeout++; | ||
636 | } | ||
637 | } | ||
638 | |||
639 | /* | ||
640 | * Completions are taking a very long time due to a congested numalink | ||
641 | * network. | ||
642 | */ | ||
643 | static void disable_for_congestion(struct bau_control *bcp, | ||
644 | struct ptc_stats *stat) | ||
645 | { | ||
646 | /* let only one cpu do this disabling */ | ||
647 | spin_lock(&disable_lock); | ||
648 | |||
649 | if (!baudisabled && bcp->period_requests && | ||
650 | ((bcp->period_time / bcp->period_requests) > congested_cycles)) { | ||
651 | int tcpu; | ||
652 | struct bau_control *tbcp; | ||
653 | /* it becomes this cpu's job to turn on the use of the | ||
654 | BAU again */ | ||
655 | baudisabled = 1; | ||
656 | bcp->set_bau_off = 1; | ||
657 | bcp->set_bau_on_time = get_cycles(); | ||
658 | bcp->set_bau_on_time += sec_2_cycles(bcp->cong_period); | ||
659 | stat->s_bau_disabled++; | ||
660 | for_each_present_cpu(tcpu) { | ||
661 | tbcp = &per_cpu(bau_control, tcpu); | ||
662 | tbcp->baudisabled = 1; | ||
663 | } | ||
664 | } | ||
665 | |||
666 | spin_unlock(&disable_lock); | ||
667 | } | ||
668 | |||
669 | static void count_max_concurr(int stat, struct bau_control *bcp, | ||
670 | struct bau_control *hmaster) | ||
671 | { | ||
672 | bcp->plugged_tries = 0; | ||
673 | bcp->timeout_tries = 0; | ||
674 | if (stat != FLUSH_COMPLETE) | ||
675 | return; | ||
676 | if (bcp->conseccompletes <= bcp->complete_threshold) | ||
677 | return; | ||
678 | if (hmaster->max_concurr >= hmaster->max_concurr_const) | ||
679 | return; | ||
680 | hmaster->max_concurr++; | ||
681 | } | ||
682 | |||
683 | static void record_send_stats(cycles_t time1, cycles_t time2, | ||
684 | struct bau_control *bcp, struct ptc_stats *stat, | ||
685 | int completion_status, int try) | ||
686 | { | ||
687 | cycles_t elapsed; | ||
688 | |||
689 | if (time2 > time1) { | ||
690 | elapsed = time2 - time1; | ||
691 | stat->s_time += elapsed; | ||
692 | |||
693 | if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { | ||
694 | bcp->period_requests++; | ||
695 | bcp->period_time += elapsed; | ||
696 | if ((elapsed > congested_cycles) && | ||
697 | (bcp->period_requests > bcp->cong_reps)) | ||
698 | disable_for_congestion(bcp, stat); | ||
699 | } | ||
700 | } else | ||
701 | stat->s_requestor--; | ||
702 | |||
703 | if (completion_status == FLUSH_COMPLETE && try > 1) | ||
704 | stat->s_retriesok++; | ||
705 | else if (completion_status == FLUSH_GIVEUP) | ||
706 | stat->s_giveup++; | ||
707 | } | ||
708 | |||
709 | /* | ||
710 | * Because of a uv1 hardware bug only a limited number of concurrent | ||
711 | * requests can be made. | ||
712 | */ | ||
713 | static void uv1_throttle(struct bau_control *hmaster, struct ptc_stats *stat) | ||
714 | { | ||
715 | spinlock_t *lock = &hmaster->uvhub_lock; | ||
716 | atomic_t *v; | ||
717 | |||
718 | v = &hmaster->active_descriptor_count; | ||
719 | if (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)) { | ||
720 | stat->s_throttles++; | ||
721 | do { | ||
722 | cpu_relax(); | ||
723 | } while (!atomic_inc_unless_ge(lock, v, hmaster->max_concurr)); | ||
724 | } | ||
725 | } | ||
726 | |||
727 | /* | ||
728 | * Handle the completion status of a message send. | ||
729 | */ | ||
730 | static void handle_cmplt(int completion_status, struct bau_desc *bau_desc, | ||
731 | struct bau_control *bcp, struct bau_control *hmaster, | ||
732 | struct ptc_stats *stat) | ||
733 | { | ||
734 | if (completion_status == FLUSH_RETRY_PLUGGED) | ||
735 | destination_plugged(bau_desc, bcp, hmaster, stat); | ||
736 | else if (completion_status == FLUSH_RETRY_TIMEOUT) | ||
737 | destination_timeout(bau_desc, bcp, hmaster, stat); | ||
738 | } | ||
739 | |||
740 | /* | ||
741 | * Send a broadcast and wait for it to complete. | ||
742 | * | ||
743 | * The flush_mask contains the cpus the broadcast is to be sent to including | ||
744 | * cpus that are on the local uvhub. | ||
745 | * | ||
746 | * Returns 0 if all flushing represented in the mask was done. | ||
747 | * Returns 1 if it gives up entirely and the original cpu mask is to be | ||
748 | * returned to the kernel. | ||
749 | */ | ||
750 | int uv_flush_send_and_wait(struct bau_desc *bau_desc, | ||
751 | struct cpumask *flush_mask, struct bau_control *bcp) | ||
752 | { | ||
753 | int seq_number = 0; | ||
754 | int completion_stat = 0; | ||
755 | long try = 0; | ||
756 | unsigned long index; | ||
757 | cycles_t time1; | ||
758 | cycles_t time2; | ||
759 | struct ptc_stats *stat = bcp->statp; | ||
760 | struct bau_control *hmaster = bcp->uvhub_master; | ||
761 | |||
762 | if (is_uv1_hub()) | ||
763 | uv1_throttle(hmaster, stat); | ||
764 | |||
765 | while (hmaster->uvhub_quiesce) | ||
766 | cpu_relax(); | ||
767 | |||
768 | time1 = get_cycles(); | ||
769 | do { | ||
770 | if (try == 0) { | ||
771 | bau_desc->header.msg_type = MSG_REGULAR; | ||
772 | seq_number = bcp->message_number++; | ||
773 | } else { | ||
774 | bau_desc->header.msg_type = MSG_RETRY; | ||
775 | stat->s_retry_messages++; | ||
776 | } | ||
777 | |||
778 | bau_desc->header.sequence = seq_number; | ||
779 | index = (1UL << AS_PUSH_SHIFT) | bcp->uvhub_cpu; | ||
780 | bcp->send_message = get_cycles(); | ||
781 | |||
782 | write_mmr_activation(index); | ||
783 | |||
784 | try++; | ||
785 | completion_stat = wait_completion(bau_desc, bcp, try); | ||
786 | |||
787 | handle_cmplt(completion_stat, bau_desc, bcp, hmaster, stat); | ||
788 | |||
789 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { | ||
790 | bcp->ipi_attempts = 0; | ||
791 | completion_stat = FLUSH_GIVEUP; | ||
792 | break; | ||
793 | } | ||
794 | cpu_relax(); | ||
795 | } while ((completion_stat == FLUSH_RETRY_PLUGGED) || | ||
796 | (completion_stat == FLUSH_RETRY_TIMEOUT)); | ||
797 | |||
798 | time2 = get_cycles(); | ||
799 | |||
800 | count_max_concurr(completion_stat, bcp, hmaster); | ||
801 | |||
802 | while (hmaster->uvhub_quiesce) | ||
803 | cpu_relax(); | ||
804 | |||
805 | atomic_dec(&hmaster->active_descriptor_count); | ||
806 | |||
807 | record_send_stats(time1, time2, bcp, stat, completion_stat, try); | ||
808 | |||
809 | if (completion_stat == FLUSH_GIVEUP) | ||
810 | return 1; | ||
811 | return 0; | ||
812 | } | ||
813 | |||
814 | /* | ||
815 | * The BAU is disabled. When the disabled time period has expired, the cpu | ||
816 | * that disabled it must re-enable it. | ||
817 | * Return 0 if it is re-enabled for all cpus. | ||
818 | */ | ||
819 | static int check_enable(struct bau_control *bcp, struct ptc_stats *stat) | ||
820 | { | ||
821 | int tcpu; | ||
822 | struct bau_control *tbcp; | ||
823 | |||
824 | if (bcp->set_bau_off) { | ||
825 | if (get_cycles() >= bcp->set_bau_on_time) { | ||
826 | stat->s_bau_reenabled++; | ||
827 | baudisabled = 0; | ||
828 | for_each_present_cpu(tcpu) { | ||
829 | tbcp = &per_cpu(bau_control, tcpu); | ||
830 | tbcp->baudisabled = 0; | ||
831 | tbcp->period_requests = 0; | ||
832 | tbcp->period_time = 0; | ||
833 | } | ||
834 | return 0; | ||
835 | } | ||
836 | } | ||
837 | return -1; | ||
838 | } | ||
839 | |||
840 | static void record_send_statistics(struct ptc_stats *stat, int locals, int hubs, | ||
841 | int remotes, struct bau_desc *bau_desc) | ||
842 | { | ||
843 | stat->s_requestor++; | ||
844 | stat->s_ntargcpu += remotes + locals; | ||
845 | stat->s_ntargremotes += remotes; | ||
846 | stat->s_ntarglocals += locals; | ||
847 | |||
848 | /* uvhub statistics */ | ||
849 | hubs = bau_uvhub_weight(&bau_desc->distribution); | ||
850 | if (locals) { | ||
851 | stat->s_ntarglocaluvhub++; | ||
852 | stat->s_ntargremoteuvhub += (hubs - 1); | ||
853 | } else | ||
854 | stat->s_ntargremoteuvhub += hubs; | ||
855 | |||
856 | stat->s_ntarguvhub += hubs; | ||
857 | |||
858 | if (hubs >= 16) | ||
859 | stat->s_ntarguvhub16++; | ||
860 | else if (hubs >= 8) | ||
861 | stat->s_ntarguvhub8++; | ||
862 | else if (hubs >= 4) | ||
863 | stat->s_ntarguvhub4++; | ||
864 | else if (hubs >= 2) | ||
865 | stat->s_ntarguvhub2++; | ||
866 | else | ||
867 | stat->s_ntarguvhub1++; | ||
868 | } | ||
869 | |||
870 | /* | ||
871 | * Translate a cpu mask to the uvhub distribution mask in the BAU | ||
872 | * activation descriptor. | ||
873 | */ | ||
874 | static int set_distrib_bits(struct cpumask *flush_mask, struct bau_control *bcp, | ||
875 | struct bau_desc *bau_desc, int *localsp, int *remotesp) | ||
876 | { | ||
877 | int cpu; | ||
878 | int pnode; | ||
879 | int cnt = 0; | ||
880 | struct hub_and_pnode *hpp; | ||
881 | |||
882 | for_each_cpu(cpu, flush_mask) { | ||
883 | /* | ||
884 | * The distribution vector is a bit map of pnodes, relative | ||
885 | * to the partition base pnode (and the partition base nasid | ||
886 | * in the header). | ||
887 | * Translate cpu to pnode and hub using a local memory array. | ||
888 | */ | ||
889 | hpp = &bcp->socket_master->thp[cpu]; | ||
890 | pnode = hpp->pnode - bcp->partition_base_pnode; | ||
891 | bau_uvhub_set(pnode, &bau_desc->distribution); | ||
892 | cnt++; | ||
893 | if (hpp->uvhub == bcp->uvhub) | ||
894 | (*localsp)++; | ||
895 | else | ||
896 | (*remotesp)++; | ||
897 | } | ||
898 | if (!cnt) | ||
899 | return 1; | ||
900 | return 0; | ||
901 | } | ||
902 | |||
903 | /* | ||
904 | * globally purge translation cache of a virtual address or all TLB's | ||
905 | * @cpumask: mask of all cpu's in which the address is to be removed | ||
906 | * @mm: mm_struct containing virtual address range | ||
907 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) | ||
908 | * @cpu: the current cpu | ||
909 | * | ||
910 | * This is the entry point for initiating any UV global TLB shootdown. | ||
911 | * | ||
912 | * Purges the translation caches of all specified processors of the given | ||
913 | * virtual address, or purges all TLB's on specified processors. | ||
914 | * | ||
915 | * The caller has derived the cpumask from the mm_struct. This function | ||
916 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) | ||
917 | * | ||
918 | * The cpumask is converted into a uvhubmask of the uvhubs containing | ||
919 | * those cpus. | ||
920 | * | ||
921 | * Note that this function should be called with preemption disabled. | ||
922 | * | ||
923 | * Returns NULL if all remote flushing was done. | ||
924 | * Returns pointer to cpumask if some remote flushing remains to be | ||
925 | * done. The returned pointer is valid till preemption is re-enabled. | ||
926 | */ | ||
927 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | ||
928 | struct mm_struct *mm, unsigned long va, | ||
929 | unsigned int cpu) | ||
930 | { | ||
931 | int locals = 0; | ||
932 | int remotes = 0; | ||
933 | int hubs = 0; | ||
934 | struct bau_desc *bau_desc; | ||
935 | struct cpumask *flush_mask; | ||
936 | struct ptc_stats *stat; | ||
937 | struct bau_control *bcp; | ||
938 | |||
939 | /* kernel was booted 'nobau' */ | ||
940 | if (nobau) | ||
941 | return cpumask; | ||
942 | |||
943 | bcp = &per_cpu(bau_control, cpu); | ||
944 | stat = bcp->statp; | ||
945 | |||
946 | /* bau was disabled due to slow response */ | ||
947 | if (bcp->baudisabled) { | ||
948 | if (check_enable(bcp, stat)) | ||
949 | return cpumask; | ||
950 | } | ||
951 | |||
952 | /* | ||
953 | * Each sending cpu has a per-cpu mask which it fills from the caller's | ||
954 | * cpu mask. All cpus are converted to uvhubs and copied to the | ||
955 | * activation descriptor. | ||
956 | */ | ||
957 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | ||
958 | /* don't actually do a shootdown of the local cpu */ | ||
959 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | ||
960 | |||
961 | if (cpu_isset(cpu, *cpumask)) | ||
962 | stat->s_ntargself++; | ||
963 | |||
964 | bau_desc = bcp->descriptor_base; | ||
965 | bau_desc += ITEMS_PER_DESC * bcp->uvhub_cpu; | ||
966 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | ||
967 | if (set_distrib_bits(flush_mask, bcp, bau_desc, &locals, &remotes)) | ||
968 | return NULL; | ||
969 | |||
970 | record_send_statistics(stat, locals, hubs, remotes, bau_desc); | ||
971 | |||
972 | bau_desc->payload.address = va; | ||
973 | bau_desc->payload.sending_cpu = cpu; | ||
974 | /* | ||
975 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, | ||
976 | * or 1 if it gave up and the original cpumask should be returned. | ||
977 | */ | ||
978 | if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) | ||
979 | return NULL; | ||
980 | else | ||
981 | return cpumask; | ||
982 | } | ||
983 | |||
984 | /* | ||
985 | * The BAU message interrupt comes here. (registered by set_intr_gate) | ||
986 | * See entry_64.S | ||
987 | * | ||
988 | * We received a broadcast assist message. | ||
989 | * | ||
990 | * Interrupts are disabled; this interrupt could represent | ||
991 | * the receipt of several messages. | ||
992 | * | ||
993 | * All cores/threads on this hub get this interrupt. | ||
994 | * The last one to see it does the software ack. | ||
995 | * (the resource will not be freed until noninterruptable cpus see this | ||
996 | * interrupt; hardware may timeout the s/w ack and reply ERROR) | ||
997 | */ | ||
998 | void uv_bau_message_interrupt(struct pt_regs *regs) | ||
999 | { | ||
1000 | int count = 0; | ||
1001 | cycles_t time_start; | ||
1002 | struct bau_pq_entry *msg; | ||
1003 | struct bau_control *bcp; | ||
1004 | struct ptc_stats *stat; | ||
1005 | struct msg_desc msgdesc; | ||
1006 | |||
1007 | time_start = get_cycles(); | ||
1008 | |||
1009 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
1010 | stat = bcp->statp; | ||
1011 | |||
1012 | msgdesc.queue_first = bcp->queue_first; | ||
1013 | msgdesc.queue_last = bcp->queue_last; | ||
1014 | |||
1015 | msg = bcp->bau_msg_head; | ||
1016 | while (msg->swack_vec) { | ||
1017 | count++; | ||
1018 | |||
1019 | msgdesc.msg_slot = msg - msgdesc.queue_first; | ||
1020 | msgdesc.swack_slot = ffs(msg->swack_vec) - 1; | ||
1021 | msgdesc.msg = msg; | ||
1022 | bau_process_message(&msgdesc, bcp); | ||
1023 | |||
1024 | msg++; | ||
1025 | if (msg > msgdesc.queue_last) | ||
1026 | msg = msgdesc.queue_first; | ||
1027 | bcp->bau_msg_head = msg; | ||
1028 | } | ||
1029 | stat->d_time += (get_cycles() - time_start); | ||
1030 | if (!count) | ||
1031 | stat->d_nomsg++; | ||
1032 | else if (count > 1) | ||
1033 | stat->d_multmsg++; | ||
1034 | |||
1035 | ack_APIC_irq(); | ||
1036 | } | ||
1037 | |||
1038 | /* | ||
1039 | * Each target uvhub (i.e. a uvhub that has cpu's) needs to have | ||
1040 | * shootdown message timeouts enabled. The timeout does not cause | ||
1041 | * an interrupt, but causes an error message to be returned to | ||
1042 | * the sender. | ||
1043 | */ | ||
1044 | static void __init enable_timeouts(void) | ||
1045 | { | ||
1046 | int uvhub; | ||
1047 | int nuvhubs; | ||
1048 | int pnode; | ||
1049 | unsigned long mmr_image; | ||
1050 | |||
1051 | nuvhubs = uv_num_possible_blades(); | ||
1052 | |||
1053 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | ||
1054 | if (!uv_blade_nr_possible_cpus(uvhub)) | ||
1055 | continue; | ||
1056 | |||
1057 | pnode = uv_blade_to_pnode(uvhub); | ||
1058 | mmr_image = read_mmr_misc_control(pnode); | ||
1059 | /* | ||
1060 | * Set the timeout period and then lock it in, in three | ||
1061 | * steps; captures and locks in the period. | ||
1062 | * | ||
1063 | * To program the period, the SOFT_ACK_MODE must be off. | ||
1064 | */ | ||
1065 | mmr_image &= ~(1L << SOFTACK_MSHIFT); | ||
1066 | write_mmr_misc_control(pnode, mmr_image); | ||
1067 | /* | ||
1068 | * Set the 4-bit period. | ||
1069 | */ | ||
1070 | mmr_image &= ~((unsigned long)0xf << SOFTACK_PSHIFT); | ||
1071 | mmr_image |= (SOFTACK_TIMEOUT_PERIOD << SOFTACK_PSHIFT); | ||
1072 | write_mmr_misc_control(pnode, mmr_image); | ||
1073 | /* | ||
1074 | * UV1: | ||
1075 | * Subsequent reversals of the timebase bit (3) cause an | ||
1076 | * immediate timeout of one or all INTD resources as | ||
1077 | * indicated in bits 2:0 (7 causes all of them to timeout). | ||
1078 | */ | ||
1079 | mmr_image |= (1L << SOFTACK_MSHIFT); | ||
1080 | if (is_uv2_hub()) { | ||
1081 | mmr_image |= (1L << UV2_LEG_SHFT); | ||
1082 | mmr_image |= (1L << UV2_EXT_SHFT); | ||
1083 | } | ||
1084 | write_mmr_misc_control(pnode, mmr_image); | ||
1085 | } | ||
1086 | } | ||
1087 | |||
1088 | static void *ptc_seq_start(struct seq_file *file, loff_t *offset) | ||
1089 | { | ||
1090 | if (*offset < num_possible_cpus()) | ||
1091 | return offset; | ||
1092 | return NULL; | ||
1093 | } | ||
1094 | |||
1095 | static void *ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) | ||
1096 | { | ||
1097 | (*offset)++; | ||
1098 | if (*offset < num_possible_cpus()) | ||
1099 | return offset; | ||
1100 | return NULL; | ||
1101 | } | ||
1102 | |||
1103 | static void ptc_seq_stop(struct seq_file *file, void *data) | ||
1104 | { | ||
1105 | } | ||
1106 | |||
1107 | static inline unsigned long long usec_2_cycles(unsigned long microsec) | ||
1108 | { | ||
1109 | unsigned long ns; | ||
1110 | unsigned long long cyc; | ||
1111 | |||
1112 | ns = microsec * 1000; | ||
1113 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
1114 | return cyc; | ||
1115 | } | ||
1116 | |||
1117 | /* | ||
1118 | * Display the statistics thru /proc/sgi_uv/ptc_statistics | ||
1119 | * 'data' points to the cpu number | ||
1120 | * Note: see the descriptions in stat_description[]. | ||
1121 | */ | ||
1122 | static int ptc_seq_show(struct seq_file *file, void *data) | ||
1123 | { | ||
1124 | struct ptc_stats *stat; | ||
1125 | int cpu; | ||
1126 | |||
1127 | cpu = *(loff_t *)data; | ||
1128 | if (!cpu) { | ||
1129 | seq_printf(file, | ||
1130 | "# cpu sent stime self locals remotes ncpus localhub "); | ||
1131 | seq_printf(file, | ||
1132 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | ||
1133 | seq_printf(file, | ||
1134 | "numuvhubs4 numuvhubs2 numuvhubs1 dto retries rok "); | ||
1135 | seq_printf(file, | ||
1136 | "resetp resett giveup sto bz throt swack recv rtime "); | ||
1137 | seq_printf(file, | ||
1138 | "all one mult none retry canc nocan reset rcan "); | ||
1139 | seq_printf(file, | ||
1140 | "disable enable\n"); | ||
1141 | } | ||
1142 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | ||
1143 | stat = &per_cpu(ptcstats, cpu); | ||
1144 | /* source side statistics */ | ||
1145 | seq_printf(file, | ||
1146 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
1147 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | ||
1148 | stat->s_ntargself, stat->s_ntarglocals, | ||
1149 | stat->s_ntargremotes, stat->s_ntargcpu, | ||
1150 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | ||
1151 | stat->s_ntarguvhub, stat->s_ntarguvhub16); | ||
1152 | seq_printf(file, "%ld %ld %ld %ld %ld ", | ||
1153 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | ||
1154 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | ||
1155 | stat->s_dtimeout); | ||
1156 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | ||
1157 | stat->s_retry_messages, stat->s_retriesok, | ||
1158 | stat->s_resets_plug, stat->s_resets_timeout, | ||
1159 | stat->s_giveup, stat->s_stimeout, | ||
1160 | stat->s_busy, stat->s_throttles); | ||
1161 | |||
1162 | /* destination side statistics */ | ||
1163 | seq_printf(file, | ||
1164 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
1165 | read_gmmr_sw_ack(uv_cpu_to_pnode(cpu)), | ||
1166 | stat->d_requestee, cycles_2_us(stat->d_time), | ||
1167 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, | ||
1168 | stat->d_nomsg, stat->d_retries, stat->d_canceled, | ||
1169 | stat->d_nocanceled, stat->d_resets, | ||
1170 | stat->d_rcanceled); | ||
1171 | seq_printf(file, "%ld %ld\n", | ||
1172 | stat->s_bau_disabled, stat->s_bau_reenabled); | ||
1173 | } | ||
1174 | return 0; | ||
1175 | } | ||
1176 | |||
1177 | /* | ||
1178 | * Display the tunables thru debugfs | ||
1179 | */ | ||
1180 | static ssize_t tunables_read(struct file *file, char __user *userbuf, | ||
1181 | size_t count, loff_t *ppos) | ||
1182 | { | ||
1183 | char *buf; | ||
1184 | int ret; | ||
1185 | |||
1186 | buf = kasprintf(GFP_KERNEL, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | ||
1187 | "max_concur plugged_delay plugsb4reset", | ||
1188 | "timeoutsb4reset ipi_reset_limit complete_threshold", | ||
1189 | "congested_response_us congested_reps congested_period", | ||
1190 | max_concurr, plugged_delay, plugsb4reset, | ||
1191 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | ||
1192 | congested_respns_us, congested_reps, congested_period); | ||
1193 | |||
1194 | if (!buf) | ||
1195 | return -ENOMEM; | ||
1196 | |||
1197 | ret = simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); | ||
1198 | kfree(buf); | ||
1199 | return ret; | ||
1200 | } | ||
1201 | |||
1202 | /* | ||
1203 | * handle a write to /proc/sgi_uv/ptc_statistics | ||
1204 | * -1: reset the statistics | ||
1205 | * 0: display meaning of the statistics | ||
1206 | */ | ||
1207 | static ssize_t ptc_proc_write(struct file *file, const char __user *user, | ||
1208 | size_t count, loff_t *data) | ||
1209 | { | ||
1210 | int cpu; | ||
1211 | int i; | ||
1212 | int elements; | ||
1213 | long input_arg; | ||
1214 | char optstr[64]; | ||
1215 | struct ptc_stats *stat; | ||
1216 | |||
1217 | if (count == 0 || count > sizeof(optstr)) | ||
1218 | return -EINVAL; | ||
1219 | if (copy_from_user(optstr, user, count)) | ||
1220 | return -EFAULT; | ||
1221 | optstr[count - 1] = '\0'; | ||
1222 | |||
1223 | if (strict_strtol(optstr, 10, &input_arg) < 0) { | ||
1224 | printk(KERN_DEBUG "%s is invalid\n", optstr); | ||
1225 | return -EINVAL; | ||
1226 | } | ||
1227 | |||
1228 | if (input_arg == 0) { | ||
1229 | elements = sizeof(stat_description)/sizeof(*stat_description); | ||
1230 | printk(KERN_DEBUG "# cpu: cpu number\n"); | ||
1231 | printk(KERN_DEBUG "Sender statistics:\n"); | ||
1232 | for (i = 0; i < elements; i++) | ||
1233 | printk(KERN_DEBUG "%s\n", stat_description[i]); | ||
1234 | } else if (input_arg == -1) { | ||
1235 | for_each_present_cpu(cpu) { | ||
1236 | stat = &per_cpu(ptcstats, cpu); | ||
1237 | memset(stat, 0, sizeof(struct ptc_stats)); | ||
1238 | } | ||
1239 | } | ||
1240 | |||
1241 | return count; | ||
1242 | } | ||
1243 | |||
1244 | static int local_atoi(const char *name) | ||
1245 | { | ||
1246 | int val = 0; | ||
1247 | |||
1248 | for (;; name++) { | ||
1249 | switch (*name) { | ||
1250 | case '0' ... '9': | ||
1251 | val = 10*val+(*name-'0'); | ||
1252 | break; | ||
1253 | default: | ||
1254 | return val; | ||
1255 | } | ||
1256 | } | ||
1257 | } | ||
1258 | |||
1259 | /* | ||
1260 | * Parse the values written to /sys/kernel/debug/sgi_uv/bau_tunables. | ||
1261 | * Zero values reset them to defaults. | ||
1262 | */ | ||
1263 | static int parse_tunables_write(struct bau_control *bcp, char *instr, | ||
1264 | int count) | ||
1265 | { | ||
1266 | char *p; | ||
1267 | char *q; | ||
1268 | int cnt = 0; | ||
1269 | int val; | ||
1270 | int e = sizeof(tunables) / sizeof(*tunables); | ||
1271 | |||
1272 | p = instr + strspn(instr, WHITESPACE); | ||
1273 | q = p; | ||
1274 | for (; *p; p = q + strspn(q, WHITESPACE)) { | ||
1275 | q = p + strcspn(p, WHITESPACE); | ||
1276 | cnt++; | ||
1277 | if (q == p) | ||
1278 | break; | ||
1279 | } | ||
1280 | if (cnt != e) { | ||
1281 | printk(KERN_INFO "bau tunable error: should be %d values\n", e); | ||
1282 | return -EINVAL; | ||
1283 | } | ||
1284 | |||
1285 | p = instr + strspn(instr, WHITESPACE); | ||
1286 | q = p; | ||
1287 | for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { | ||
1288 | q = p + strcspn(p, WHITESPACE); | ||
1289 | val = local_atoi(p); | ||
1290 | switch (cnt) { | ||
1291 | case 0: | ||
1292 | if (val == 0) { | ||
1293 | max_concurr = MAX_BAU_CONCURRENT; | ||
1294 | max_concurr_const = MAX_BAU_CONCURRENT; | ||
1295 | continue; | ||
1296 | } | ||
1297 | if (val < 1 || val > bcp->cpus_in_uvhub) { | ||
1298 | printk(KERN_DEBUG | ||
1299 | "Error: BAU max concurrent %d is invalid\n", | ||
1300 | val); | ||
1301 | return -EINVAL; | ||
1302 | } | ||
1303 | max_concurr = val; | ||
1304 | max_concurr_const = val; | ||
1305 | continue; | ||
1306 | default: | ||
1307 | if (val == 0) | ||
1308 | *tunables[cnt].tunp = tunables[cnt].deflt; | ||
1309 | else | ||
1310 | *tunables[cnt].tunp = val; | ||
1311 | continue; | ||
1312 | } | ||
1313 | if (q == p) | ||
1314 | break; | ||
1315 | } | ||
1316 | return 0; | ||
1317 | } | ||
1318 | |||
1319 | /* | ||
1320 | * Handle a write to debugfs. (/sys/kernel/debug/sgi_uv/bau_tunables) | ||
1321 | */ | ||
1322 | static ssize_t tunables_write(struct file *file, const char __user *user, | ||
1323 | size_t count, loff_t *data) | ||
1324 | { | ||
1325 | int cpu; | ||
1326 | int ret; | ||
1327 | char instr[100]; | ||
1328 | struct bau_control *bcp; | ||
1329 | |||
1330 | if (count == 0 || count > sizeof(instr)-1) | ||
1331 | return -EINVAL; | ||
1332 | if (copy_from_user(instr, user, count)) | ||
1333 | return -EFAULT; | ||
1334 | |||
1335 | instr[count] = '\0'; | ||
1336 | |||
1337 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
1338 | |||
1339 | ret = parse_tunables_write(bcp, instr, count); | ||
1340 | if (ret) | ||
1341 | return ret; | ||
1342 | |||
1343 | for_each_present_cpu(cpu) { | ||
1344 | bcp = &per_cpu(bau_control, cpu); | ||
1345 | bcp->max_concurr = max_concurr; | ||
1346 | bcp->max_concurr_const = max_concurr; | ||
1347 | bcp->plugged_delay = plugged_delay; | ||
1348 | bcp->plugsb4reset = plugsb4reset; | ||
1349 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1350 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1351 | bcp->complete_threshold = complete_threshold; | ||
1352 | bcp->cong_response_us = congested_respns_us; | ||
1353 | bcp->cong_reps = congested_reps; | ||
1354 | bcp->cong_period = congested_period; | ||
1355 | } | ||
1356 | return count; | ||
1357 | } | ||
1358 | |||
1359 | static const struct seq_operations uv_ptc_seq_ops = { | ||
1360 | .start = ptc_seq_start, | ||
1361 | .next = ptc_seq_next, | ||
1362 | .stop = ptc_seq_stop, | ||
1363 | .show = ptc_seq_show | ||
1364 | }; | ||
1365 | |||
1366 | static int ptc_proc_open(struct inode *inode, struct file *file) | ||
1367 | { | ||
1368 | return seq_open(file, &uv_ptc_seq_ops); | ||
1369 | } | ||
1370 | |||
1371 | static int tunables_open(struct inode *inode, struct file *file) | ||
1372 | { | ||
1373 | return 0; | ||
1374 | } | ||
1375 | |||
1376 | static const struct file_operations proc_uv_ptc_operations = { | ||
1377 | .open = ptc_proc_open, | ||
1378 | .read = seq_read, | ||
1379 | .write = ptc_proc_write, | ||
1380 | .llseek = seq_lseek, | ||
1381 | .release = seq_release, | ||
1382 | }; | ||
1383 | |||
1384 | static const struct file_operations tunables_fops = { | ||
1385 | .open = tunables_open, | ||
1386 | .read = tunables_read, | ||
1387 | .write = tunables_write, | ||
1388 | .llseek = default_llseek, | ||
1389 | }; | ||
1390 | |||
1391 | static int __init uv_ptc_init(void) | ||
1392 | { | ||
1393 | struct proc_dir_entry *proc_uv_ptc; | ||
1394 | |||
1395 | if (!is_uv_system()) | ||
1396 | return 0; | ||
1397 | |||
1398 | proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, | ||
1399 | &proc_uv_ptc_operations); | ||
1400 | if (!proc_uv_ptc) { | ||
1401 | printk(KERN_ERR "unable to create %s proc entry\n", | ||
1402 | UV_PTC_BASENAME); | ||
1403 | return -EINVAL; | ||
1404 | } | ||
1405 | |||
1406 | tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); | ||
1407 | if (!tunables_dir) { | ||
1408 | printk(KERN_ERR "unable to create debugfs directory %s\n", | ||
1409 | UV_BAU_TUNABLES_DIR); | ||
1410 | return -EINVAL; | ||
1411 | } | ||
1412 | tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, | ||
1413 | tunables_dir, NULL, &tunables_fops); | ||
1414 | if (!tunables_file) { | ||
1415 | printk(KERN_ERR "unable to create debugfs file %s\n", | ||
1416 | UV_BAU_TUNABLES_FILE); | ||
1417 | return -EINVAL; | ||
1418 | } | ||
1419 | return 0; | ||
1420 | } | ||
1421 | |||
1422 | /* | ||
1423 | * Initialize the sending side's sending buffers. | ||
1424 | */ | ||
1425 | static void activation_descriptor_init(int node, int pnode, int base_pnode) | ||
1426 | { | ||
1427 | int i; | ||
1428 | int cpu; | ||
1429 | unsigned long pa; | ||
1430 | unsigned long m; | ||
1431 | unsigned long n; | ||
1432 | size_t dsize; | ||
1433 | struct bau_desc *bau_desc; | ||
1434 | struct bau_desc *bd2; | ||
1435 | struct bau_control *bcp; | ||
1436 | |||
1437 | /* | ||
1438 | * each bau_desc is 64 bytes; there are 8 (ITEMS_PER_DESC) | ||
1439 | * per cpu; and one per cpu on the uvhub (ADP_SZ) | ||
1440 | */ | ||
1441 | dsize = sizeof(struct bau_desc) * ADP_SZ * ITEMS_PER_DESC; | ||
1442 | bau_desc = kmalloc_node(dsize, GFP_KERNEL, node); | ||
1443 | BUG_ON(!bau_desc); | ||
1444 | |||
1445 | pa = uv_gpa(bau_desc); /* need the real nasid*/ | ||
1446 | n = pa >> uv_nshift; | ||
1447 | m = pa & uv_mmask; | ||
1448 | |||
1449 | /* the 14-bit pnode */ | ||
1450 | write_mmr_descriptor_base(pnode, (n << UV_DESC_PSHIFT | m)); | ||
1451 | /* | ||
1452 | * Initializing all 8 (ITEMS_PER_DESC) descriptors for each | ||
1453 | * cpu even though we only use the first one; one descriptor can | ||
1454 | * describe a broadcast to 256 uv hubs. | ||
1455 | */ | ||
1456 | for (i = 0, bd2 = bau_desc; i < (ADP_SZ * ITEMS_PER_DESC); i++, bd2++) { | ||
1457 | memset(bd2, 0, sizeof(struct bau_desc)); | ||
1458 | bd2->header.swack_flag = 1; | ||
1459 | /* | ||
1460 | * The base_dest_nasid set in the message header is the nasid | ||
1461 | * of the first uvhub in the partition. The bit map will | ||
1462 | * indicate destination pnode numbers relative to that base. | ||
1463 | * They may not be consecutive if nasid striding is being used. | ||
1464 | */ | ||
1465 | bd2->header.base_dest_nasid = UV_PNODE_TO_NASID(base_pnode); | ||
1466 | bd2->header.dest_subnodeid = UV_LB_SUBNODEID; | ||
1467 | bd2->header.command = UV_NET_ENDPOINT_INTD; | ||
1468 | bd2->header.int_both = 1; | ||
1469 | /* | ||
1470 | * all others need to be set to zero: | ||
1471 | * fairness chaining multilevel count replied_to | ||
1472 | */ | ||
1473 | } | ||
1474 | for_each_present_cpu(cpu) { | ||
1475 | if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) | ||
1476 | continue; | ||
1477 | bcp = &per_cpu(bau_control, cpu); | ||
1478 | bcp->descriptor_base = bau_desc; | ||
1479 | } | ||
1480 | } | ||
1481 | |||
1482 | /* | ||
1483 | * initialize the destination side's receiving buffers | ||
1484 | * entered for each uvhub in the partition | ||
1485 | * - node is first node (kernel memory notion) on the uvhub | ||
1486 | * - pnode is the uvhub's physical identifier | ||
1487 | */ | ||
1488 | static void pq_init(int node, int pnode) | ||
1489 | { | ||
1490 | int cpu; | ||
1491 | size_t plsize; | ||
1492 | char *cp; | ||
1493 | void *vp; | ||
1494 | unsigned long pn; | ||
1495 | unsigned long first; | ||
1496 | unsigned long pn_first; | ||
1497 | unsigned long last; | ||
1498 | struct bau_pq_entry *pqp; | ||
1499 | struct bau_control *bcp; | ||
1500 | |||
1501 | plsize = (DEST_Q_SIZE + 1) * sizeof(struct bau_pq_entry); | ||
1502 | vp = kmalloc_node(plsize, GFP_KERNEL, node); | ||
1503 | pqp = (struct bau_pq_entry *)vp; | ||
1504 | BUG_ON(!pqp); | ||
1505 | |||
1506 | cp = (char *)pqp + 31; | ||
1507 | pqp = (struct bau_pq_entry *)(((unsigned long)cp >> 5) << 5); | ||
1508 | |||
1509 | for_each_present_cpu(cpu) { | ||
1510 | if (pnode != uv_cpu_to_pnode(cpu)) | ||
1511 | continue; | ||
1512 | /* for every cpu on this pnode: */ | ||
1513 | bcp = &per_cpu(bau_control, cpu); | ||
1514 | bcp->queue_first = pqp; | ||
1515 | bcp->bau_msg_head = pqp; | ||
1516 | bcp->queue_last = pqp + (DEST_Q_SIZE - 1); | ||
1517 | } | ||
1518 | /* | ||
1519 | * need the pnode of where the memory was really allocated | ||
1520 | */ | ||
1521 | pn = uv_gpa(pqp) >> uv_nshift; | ||
1522 | first = uv_physnodeaddr(pqp); | ||
1523 | pn_first = ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | first; | ||
1524 | last = uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1)); | ||
1525 | write_mmr_payload_first(pnode, pn_first); | ||
1526 | write_mmr_payload_tail(pnode, first); | ||
1527 | write_mmr_payload_last(pnode, last); | ||
1528 | |||
1529 | /* in effect, all msg_type's are set to MSG_NOOP */ | ||
1530 | memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); | ||
1531 | } | ||
1532 | |||
1533 | /* | ||
1534 | * Initialization of each UV hub's structures | ||
1535 | */ | ||
1536 | static void __init init_uvhub(int uvhub, int vector, int base_pnode) | ||
1537 | { | ||
1538 | int node; | ||
1539 | int pnode; | ||
1540 | unsigned long apicid; | ||
1541 | |||
1542 | node = uvhub_to_first_node(uvhub); | ||
1543 | pnode = uv_blade_to_pnode(uvhub); | ||
1544 | |||
1545 | activation_descriptor_init(node, pnode, base_pnode); | ||
1546 | |||
1547 | pq_init(node, pnode); | ||
1548 | /* | ||
1549 | * The below initialization can't be in firmware because the | ||
1550 | * messaging IRQ will be determined by the OS. | ||
1551 | */ | ||
1552 | apicid = uvhub_to_first_apicid(uvhub) | uv_apicid_hibits; | ||
1553 | write_mmr_data_config(pnode, ((apicid << 32) | vector)); | ||
1554 | } | ||
1555 | |||
1556 | /* | ||
1557 | * We will set BAU_MISC_CONTROL with a timeout period. | ||
1558 | * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. | ||
1559 | * So the destination timeout period has to be calculated from them. | ||
1560 | */ | ||
1561 | static int calculate_destination_timeout(void) | ||
1562 | { | ||
1563 | unsigned long mmr_image; | ||
1564 | int mult1; | ||
1565 | int mult2; | ||
1566 | int index; | ||
1567 | int base; | ||
1568 | int ret; | ||
1569 | unsigned long ts_ns; | ||
1570 | |||
1571 | if (is_uv1_hub()) { | ||
1572 | mult1 = SOFTACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; | ||
1573 | mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); | ||
1574 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; | ||
1575 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); | ||
1576 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; | ||
1577 | base = timeout_base_ns[index]; | ||
1578 | ts_ns = base * mult1 * mult2; | ||
1579 | ret = ts_ns / 1000; | ||
1580 | } else { | ||
1581 | /* 4 bits 0/1 for 10/80us, 3 bits of multiplier */ | ||
1582 | mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); | ||
1583 | mmr_image = (mmr_image & UV_SA_MASK) >> UV_SA_SHFT; | ||
1584 | if (mmr_image & (1L << UV2_ACK_UNITS_SHFT)) | ||
1585 | mult1 = 80; | ||
1586 | else | ||
1587 | mult1 = 10; | ||
1588 | base = mmr_image & UV2_ACK_MASK; | ||
1589 | ret = mult1 * base; | ||
1590 | } | ||
1591 | return ret; | ||
1592 | } | ||
1593 | |||
1594 | static void __init init_per_cpu_tunables(void) | ||
1595 | { | ||
1596 | int cpu; | ||
1597 | struct bau_control *bcp; | ||
1598 | |||
1599 | for_each_present_cpu(cpu) { | ||
1600 | bcp = &per_cpu(bau_control, cpu); | ||
1601 | bcp->baudisabled = 0; | ||
1602 | bcp->statp = &per_cpu(ptcstats, cpu); | ||
1603 | /* time interval to catch a hardware stay-busy bug */ | ||
1604 | bcp->timeout_interval = usec_2_cycles(2*timeout_us); | ||
1605 | bcp->max_concurr = max_concurr; | ||
1606 | bcp->max_concurr_const = max_concurr; | ||
1607 | bcp->plugged_delay = plugged_delay; | ||
1608 | bcp->plugsb4reset = plugsb4reset; | ||
1609 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1610 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1611 | bcp->complete_threshold = complete_threshold; | ||
1612 | bcp->cong_response_us = congested_respns_us; | ||
1613 | bcp->cong_reps = congested_reps; | ||
1614 | bcp->cong_period = congested_period; | ||
1615 | } | ||
1616 | } | ||
1617 | |||
1618 | /* | ||
1619 | * Scan all cpus to collect blade and socket summaries. | ||
1620 | */ | ||
1621 | static int __init get_cpu_topology(int base_pnode, | ||
1622 | struct uvhub_desc *uvhub_descs, | ||
1623 | unsigned char *uvhub_mask) | ||
1624 | { | ||
1625 | int cpu; | ||
1626 | int pnode; | ||
1627 | int uvhub; | ||
1628 | int socket; | ||
1629 | struct bau_control *bcp; | ||
1630 | struct uvhub_desc *bdp; | ||
1631 | struct socket_desc *sdp; | ||
1632 | |||
1633 | for_each_present_cpu(cpu) { | ||
1634 | bcp = &per_cpu(bau_control, cpu); | ||
1635 | |||
1636 | memset(bcp, 0, sizeof(struct bau_control)); | ||
1637 | |||
1638 | pnode = uv_cpu_hub_info(cpu)->pnode; | ||
1639 | if ((pnode - base_pnode) >= UV_DISTRIBUTION_SIZE) { | ||
1640 | printk(KERN_EMERG | ||
1641 | "cpu %d pnode %d-%d beyond %d; BAU disabled\n", | ||
1642 | cpu, pnode, base_pnode, UV_DISTRIBUTION_SIZE); | ||
1643 | return 1; | ||
1644 | } | ||
1645 | |||
1646 | bcp->osnode = cpu_to_node(cpu); | ||
1647 | bcp->partition_base_pnode = base_pnode; | ||
1648 | |||
1649 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | ||
1650 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); | ||
1651 | bdp = &uvhub_descs[uvhub]; | ||
1652 | |||
1653 | bdp->num_cpus++; | ||
1654 | bdp->uvhub = uvhub; | ||
1655 | bdp->pnode = pnode; | ||
1656 | |||
1657 | /* kludge: 'assuming' one node per socket, and assuming that | ||
1658 | disabling a socket just leaves a gap in node numbers */ | ||
1659 | socket = bcp->osnode & 1; | ||
1660 | bdp->socket_mask |= (1 << socket); | ||
1661 | sdp = &bdp->socket[socket]; | ||
1662 | sdp->cpu_number[sdp->num_cpus] = cpu; | ||
1663 | sdp->num_cpus++; | ||
1664 | if (sdp->num_cpus > MAX_CPUS_PER_SOCKET) { | ||
1665 | printk(KERN_EMERG "%d cpus per socket invalid\n", | ||
1666 | sdp->num_cpus); | ||
1667 | return 1; | ||
1668 | } | ||
1669 | } | ||
1670 | return 0; | ||
1671 | } | ||
1672 | |||
1673 | /* | ||
1674 | * Each socket is to get a local array of pnodes/hubs. | ||
1675 | */ | ||
1676 | static void make_per_cpu_thp(struct bau_control *smaster) | ||
1677 | { | ||
1678 | int cpu; | ||
1679 | size_t hpsz = sizeof(struct hub_and_pnode) * num_possible_cpus(); | ||
1680 | |||
1681 | smaster->thp = kmalloc_node(hpsz, GFP_KERNEL, smaster->osnode); | ||
1682 | memset(smaster->thp, 0, hpsz); | ||
1683 | for_each_present_cpu(cpu) { | ||
1684 | smaster->thp[cpu].pnode = uv_cpu_hub_info(cpu)->pnode; | ||
1685 | smaster->thp[cpu].uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | ||
1686 | } | ||
1687 | } | ||
1688 | |||
1689 | /* | ||
1690 | * Initialize all the per_cpu information for the cpu's on a given socket, | ||
1691 | * given what has been gathered into the socket_desc struct. | ||
1692 | * And reports the chosen hub and socket masters back to the caller. | ||
1693 | */ | ||
1694 | static int scan_sock(struct socket_desc *sdp, struct uvhub_desc *bdp, | ||
1695 | struct bau_control **smasterp, | ||
1696 | struct bau_control **hmasterp) | ||
1697 | { | ||
1698 | int i; | ||
1699 | int cpu; | ||
1700 | struct bau_control *bcp; | ||
1701 | |||
1702 | for (i = 0; i < sdp->num_cpus; i++) { | ||
1703 | cpu = sdp->cpu_number[i]; | ||
1704 | bcp = &per_cpu(bau_control, cpu); | ||
1705 | bcp->cpu = cpu; | ||
1706 | if (i == 0) { | ||
1707 | *smasterp = bcp; | ||
1708 | if (!(*hmasterp)) | ||
1709 | *hmasterp = bcp; | ||
1710 | } | ||
1711 | bcp->cpus_in_uvhub = bdp->num_cpus; | ||
1712 | bcp->cpus_in_socket = sdp->num_cpus; | ||
1713 | bcp->socket_master = *smasterp; | ||
1714 | bcp->uvhub = bdp->uvhub; | ||
1715 | bcp->uvhub_master = *hmasterp; | ||
1716 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
1717 | if (bcp->uvhub_cpu >= MAX_CPUS_PER_UVHUB) { | ||
1718 | printk(KERN_EMERG "%d cpus per uvhub invalid\n", | ||
1719 | bcp->uvhub_cpu); | ||
1720 | return 1; | ||
1721 | } | ||
1722 | } | ||
1723 | return 0; | ||
1724 | } | ||
1725 | |||
1726 | /* | ||
1727 | * Summarize the blade and socket topology into the per_cpu structures. | ||
1728 | */ | ||
1729 | static int __init summarize_uvhub_sockets(int nuvhubs, | ||
1730 | struct uvhub_desc *uvhub_descs, | ||
1731 | unsigned char *uvhub_mask) | ||
1732 | { | ||
1733 | int socket; | ||
1734 | int uvhub; | ||
1735 | unsigned short socket_mask; | ||
1736 | |||
1737 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | ||
1738 | struct uvhub_desc *bdp; | ||
1739 | struct bau_control *smaster = NULL; | ||
1740 | struct bau_control *hmaster = NULL; | ||
1741 | |||
1742 | if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) | ||
1743 | continue; | ||
1744 | |||
1745 | bdp = &uvhub_descs[uvhub]; | ||
1746 | socket_mask = bdp->socket_mask; | ||
1747 | socket = 0; | ||
1748 | while (socket_mask) { | ||
1749 | struct socket_desc *sdp; | ||
1750 | if ((socket_mask & 1)) { | ||
1751 | sdp = &bdp->socket[socket]; | ||
1752 | if (scan_sock(sdp, bdp, &smaster, &hmaster)) | ||
1753 | return 1; | ||
1754 | } | ||
1755 | socket++; | ||
1756 | socket_mask = (socket_mask >> 1); | ||
1757 | make_per_cpu_thp(smaster); | ||
1758 | } | ||
1759 | } | ||
1760 | return 0; | ||
1761 | } | ||
1762 | |||
1763 | /* | ||
1764 | * initialize the bau_control structure for each cpu | ||
1765 | */ | ||
1766 | static int __init init_per_cpu(int nuvhubs, int base_part_pnode) | ||
1767 | { | ||
1768 | unsigned char *uvhub_mask; | ||
1769 | void *vp; | ||
1770 | struct uvhub_desc *uvhub_descs; | ||
1771 | |||
1772 | timeout_us = calculate_destination_timeout(); | ||
1773 | |||
1774 | vp = kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); | ||
1775 | uvhub_descs = (struct uvhub_desc *)vp; | ||
1776 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); | ||
1777 | uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); | ||
1778 | |||
1779 | if (get_cpu_topology(base_part_pnode, uvhub_descs, uvhub_mask)) | ||
1780 | return 1; | ||
1781 | |||
1782 | if (summarize_uvhub_sockets(nuvhubs, uvhub_descs, uvhub_mask)) | ||
1783 | return 1; | ||
1784 | |||
1785 | kfree(uvhub_descs); | ||
1786 | kfree(uvhub_mask); | ||
1787 | init_per_cpu_tunables(); | ||
1788 | return 0; | ||
1789 | } | ||
1790 | |||
1791 | /* | ||
1792 | * Initialization of BAU-related structures | ||
1793 | */ | ||
1794 | static int __init uv_bau_init(void) | ||
1795 | { | ||
1796 | int uvhub; | ||
1797 | int pnode; | ||
1798 | int nuvhubs; | ||
1799 | int cur_cpu; | ||
1800 | int cpus; | ||
1801 | int vector; | ||
1802 | cpumask_var_t *mask; | ||
1803 | |||
1804 | if (!is_uv_system()) | ||
1805 | return 0; | ||
1806 | |||
1807 | if (nobau) | ||
1808 | return 0; | ||
1809 | |||
1810 | for_each_possible_cpu(cur_cpu) { | ||
1811 | mask = &per_cpu(uv_flush_tlb_mask, cur_cpu); | ||
1812 | zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cur_cpu)); | ||
1813 | } | ||
1814 | |||
1815 | uv_nshift = uv_hub_info->m_val; | ||
1816 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; | ||
1817 | nuvhubs = uv_num_possible_blades(); | ||
1818 | spin_lock_init(&disable_lock); | ||
1819 | congested_cycles = usec_2_cycles(congested_respns_us); | ||
1820 | |||
1821 | uv_base_pnode = 0x7fffffff; | ||
1822 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | ||
1823 | cpus = uv_blade_nr_possible_cpus(uvhub); | ||
1824 | if (cpus && (uv_blade_to_pnode(uvhub) < uv_base_pnode)) | ||
1825 | uv_base_pnode = uv_blade_to_pnode(uvhub); | ||
1826 | } | ||
1827 | |||
1828 | if (init_per_cpu(nuvhubs, uv_base_pnode)) { | ||
1829 | nobau = 1; | ||
1830 | return 0; | ||
1831 | } | ||
1832 | |||
1833 | vector = UV_BAU_MESSAGE; | ||
1834 | for_each_possible_blade(uvhub) | ||
1835 | if (uv_blade_nr_possible_cpus(uvhub)) | ||
1836 | init_uvhub(uvhub, vector, uv_base_pnode); | ||
1837 | |||
1838 | enable_timeouts(); | ||
1839 | alloc_intr_gate(vector, uv_bau_message_intr1); | ||
1840 | |||
1841 | for_each_possible_blade(uvhub) { | ||
1842 | if (uv_blade_nr_possible_cpus(uvhub)) { | ||
1843 | unsigned long val; | ||
1844 | unsigned long mmr; | ||
1845 | pnode = uv_blade_to_pnode(uvhub); | ||
1846 | /* INIT the bau */ | ||
1847 | val = 1L << 63; | ||
1848 | write_gmmr_activation(pnode, val); | ||
1849 | mmr = 1; /* should be 1 to broadcast to both sockets */ | ||
1850 | write_mmr_data_broadcast(pnode, mmr); | ||
1851 | } | ||
1852 | } | ||
1853 | |||
1854 | return 0; | ||
1855 | } | ||
1856 | core_initcall(uv_bau_init); | ||
1857 | fs_initcall(uv_ptc_init); | ||
diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c new file mode 100644 index 000000000000..374a05d8ad22 --- /dev/null +++ b/arch/x86/platform/uv/uv_irq.c | |||
@@ -0,0 +1,285 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * SGI UV IRQ functions | ||
7 | * | ||
8 | * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/rbtree.h> | ||
13 | #include <linux/slab.h> | ||
14 | #include <linux/irq.h> | ||
15 | |||
16 | #include <asm/apic.h> | ||
17 | #include <asm/uv/uv_irq.h> | ||
18 | #include <asm/uv/uv_hub.h> | ||
19 | |||
20 | /* MMR offset and pnode of hub sourcing interrupts for a given irq */ | ||
21 | struct uv_irq_2_mmr_pnode{ | ||
22 | struct rb_node list; | ||
23 | unsigned long offset; | ||
24 | int pnode; | ||
25 | int irq; | ||
26 | }; | ||
27 | |||
28 | static spinlock_t uv_irq_lock; | ||
29 | static struct rb_root uv_irq_root; | ||
30 | |||
31 | static int uv_set_irq_affinity(struct irq_data *, const struct cpumask *, bool); | ||
32 | |||
33 | static void uv_noop(struct irq_data *data) { } | ||
34 | |||
35 | static void uv_ack_apic(struct irq_data *data) | ||
36 | { | ||
37 | ack_APIC_irq(); | ||
38 | } | ||
39 | |||
40 | static struct irq_chip uv_irq_chip = { | ||
41 | .name = "UV-CORE", | ||
42 | .irq_mask = uv_noop, | ||
43 | .irq_unmask = uv_noop, | ||
44 | .irq_eoi = uv_ack_apic, | ||
45 | .irq_set_affinity = uv_set_irq_affinity, | ||
46 | }; | ||
47 | |||
48 | /* | ||
49 | * Add offset and pnode information of the hub sourcing interrupts to the | ||
50 | * rb tree for a specific irq. | ||
51 | */ | ||
52 | static int uv_set_irq_2_mmr_info(int irq, unsigned long offset, unsigned blade) | ||
53 | { | ||
54 | struct rb_node **link = &uv_irq_root.rb_node; | ||
55 | struct rb_node *parent = NULL; | ||
56 | struct uv_irq_2_mmr_pnode *n; | ||
57 | struct uv_irq_2_mmr_pnode *e; | ||
58 | unsigned long irqflags; | ||
59 | |||
60 | n = kmalloc_node(sizeof(struct uv_irq_2_mmr_pnode), GFP_KERNEL, | ||
61 | uv_blade_to_memory_nid(blade)); | ||
62 | if (!n) | ||
63 | return -ENOMEM; | ||
64 | |||
65 | n->irq = irq; | ||
66 | n->offset = offset; | ||
67 | n->pnode = uv_blade_to_pnode(blade); | ||
68 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
69 | /* Find the right place in the rbtree: */ | ||
70 | while (*link) { | ||
71 | parent = *link; | ||
72 | e = rb_entry(parent, struct uv_irq_2_mmr_pnode, list); | ||
73 | |||
74 | if (unlikely(irq == e->irq)) { | ||
75 | /* irq entry exists */ | ||
76 | e->pnode = uv_blade_to_pnode(blade); | ||
77 | e->offset = offset; | ||
78 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
79 | kfree(n); | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | if (irq < e->irq) | ||
84 | link = &(*link)->rb_left; | ||
85 | else | ||
86 | link = &(*link)->rb_right; | ||
87 | } | ||
88 | |||
89 | /* Insert the node into the rbtree. */ | ||
90 | rb_link_node(&n->list, parent, link); | ||
91 | rb_insert_color(&n->list, &uv_irq_root); | ||
92 | |||
93 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | /* Retrieve offset and pnode information from the rb tree for a specific irq */ | ||
98 | int uv_irq_2_mmr_info(int irq, unsigned long *offset, int *pnode) | ||
99 | { | ||
100 | struct uv_irq_2_mmr_pnode *e; | ||
101 | struct rb_node *n; | ||
102 | unsigned long irqflags; | ||
103 | |||
104 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
105 | n = uv_irq_root.rb_node; | ||
106 | while (n) { | ||
107 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
108 | |||
109 | if (e->irq == irq) { | ||
110 | *offset = e->offset; | ||
111 | *pnode = e->pnode; | ||
112 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | if (irq < e->irq) | ||
117 | n = n->rb_left; | ||
118 | else | ||
119 | n = n->rb_right; | ||
120 | } | ||
121 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
122 | return -1; | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * Re-target the irq to the specified CPU and enable the specified MMR located | ||
127 | * on the specified blade to allow the sending of MSIs to the specified CPU. | ||
128 | */ | ||
129 | static int | ||
130 | arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | ||
131 | unsigned long mmr_offset, int limit) | ||
132 | { | ||
133 | const struct cpumask *eligible_cpu = cpumask_of(cpu); | ||
134 | struct irq_cfg *cfg = irq_get_chip_data(irq); | ||
135 | unsigned long mmr_value; | ||
136 | struct uv_IO_APIC_route_entry *entry; | ||
137 | int mmr_pnode, err; | ||
138 | |||
139 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
140 | sizeof(unsigned long)); | ||
141 | |||
142 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
143 | if (err != 0) | ||
144 | return err; | ||
145 | |||
146 | if (limit == UV_AFFINITY_CPU) | ||
147 | irq_set_status_flags(irq, IRQ_NO_BALANCING); | ||
148 | else | ||
149 | irq_set_status_flags(irq, IRQ_MOVE_PCNTXT); | ||
150 | |||
151 | irq_set_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, | ||
152 | irq_name); | ||
153 | |||
154 | mmr_value = 0; | ||
155 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
156 | entry->vector = cfg->vector; | ||
157 | entry->delivery_mode = apic->irq_delivery_mode; | ||
158 | entry->dest_mode = apic->irq_dest_mode; | ||
159 | entry->polarity = 0; | ||
160 | entry->trigger = 0; | ||
161 | entry->mask = 0; | ||
162 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
163 | |||
164 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | ||
165 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
166 | |||
167 | if (cfg->move_in_progress) | ||
168 | send_cleanup_vector(cfg); | ||
169 | |||
170 | return irq; | ||
171 | } | ||
172 | |||
173 | /* | ||
174 | * Disable the specified MMR located on the specified blade so that MSIs are | ||
175 | * longer allowed to be sent. | ||
176 | */ | ||
177 | static void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) | ||
178 | { | ||
179 | unsigned long mmr_value; | ||
180 | struct uv_IO_APIC_route_entry *entry; | ||
181 | |||
182 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != | ||
183 | sizeof(unsigned long)); | ||
184 | |||
185 | mmr_value = 0; | ||
186 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
187 | entry->mask = 1; | ||
188 | |||
189 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
190 | } | ||
191 | |||
192 | static int | ||
193 | uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, | ||
194 | bool force) | ||
195 | { | ||
196 | struct irq_cfg *cfg = data->chip_data; | ||
197 | unsigned int dest; | ||
198 | unsigned long mmr_value, mmr_offset; | ||
199 | struct uv_IO_APIC_route_entry *entry; | ||
200 | int mmr_pnode; | ||
201 | |||
202 | if (__ioapic_set_affinity(data, mask, &dest)) | ||
203 | return -1; | ||
204 | |||
205 | mmr_value = 0; | ||
206 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | ||
207 | |||
208 | entry->vector = cfg->vector; | ||
209 | entry->delivery_mode = apic->irq_delivery_mode; | ||
210 | entry->dest_mode = apic->irq_dest_mode; | ||
211 | entry->polarity = 0; | ||
212 | entry->trigger = 0; | ||
213 | entry->mask = 0; | ||
214 | entry->dest = dest; | ||
215 | |||
216 | /* Get previously stored MMR and pnode of hub sourcing interrupts */ | ||
217 | if (uv_irq_2_mmr_info(data->irq, &mmr_offset, &mmr_pnode)) | ||
218 | return -1; | ||
219 | |||
220 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | ||
221 | |||
222 | if (cfg->move_in_progress) | ||
223 | send_cleanup_vector(cfg); | ||
224 | |||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | /* | ||
229 | * Set up a mapping of an available irq and vector, and enable the specified | ||
230 | * MMR that defines the MSI that is to be sent to the specified CPU when an | ||
231 | * interrupt is raised. | ||
232 | */ | ||
233 | int uv_setup_irq(char *irq_name, int cpu, int mmr_blade, | ||
234 | unsigned long mmr_offset, int limit) | ||
235 | { | ||
236 | int irq, ret; | ||
237 | |||
238 | irq = create_irq_nr(NR_IRQS_LEGACY, uv_blade_to_memory_nid(mmr_blade)); | ||
239 | |||
240 | if (irq <= 0) | ||
241 | return -EBUSY; | ||
242 | |||
243 | ret = arch_enable_uv_irq(irq_name, irq, cpu, mmr_blade, mmr_offset, | ||
244 | limit); | ||
245 | if (ret == irq) | ||
246 | uv_set_irq_2_mmr_info(irq, mmr_offset, mmr_blade); | ||
247 | else | ||
248 | destroy_irq(irq); | ||
249 | |||
250 | return ret; | ||
251 | } | ||
252 | EXPORT_SYMBOL_GPL(uv_setup_irq); | ||
253 | |||
254 | /* | ||
255 | * Tear down a mapping of an irq and vector, and disable the specified MMR that | ||
256 | * defined the MSI that was to be sent to the specified CPU when an interrupt | ||
257 | * was raised. | ||
258 | * | ||
259 | * Set mmr_blade and mmr_offset to what was passed in on uv_setup_irq(). | ||
260 | */ | ||
261 | void uv_teardown_irq(unsigned int irq) | ||
262 | { | ||
263 | struct uv_irq_2_mmr_pnode *e; | ||
264 | struct rb_node *n; | ||
265 | unsigned long irqflags; | ||
266 | |||
267 | spin_lock_irqsave(&uv_irq_lock, irqflags); | ||
268 | n = uv_irq_root.rb_node; | ||
269 | while (n) { | ||
270 | e = rb_entry(n, struct uv_irq_2_mmr_pnode, list); | ||
271 | if (e->irq == irq) { | ||
272 | arch_disable_uv_irq(e->pnode, e->offset); | ||
273 | rb_erase(n, &uv_irq_root); | ||
274 | kfree(e); | ||
275 | break; | ||
276 | } | ||
277 | if (irq < e->irq) | ||
278 | n = n->rb_left; | ||
279 | else | ||
280 | n = n->rb_right; | ||
281 | } | ||
282 | spin_unlock_irqrestore(&uv_irq_lock, irqflags); | ||
283 | destroy_irq(irq); | ||
284 | } | ||
285 | EXPORT_SYMBOL_GPL(uv_teardown_irq); | ||
diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c new file mode 100644 index 000000000000..309c70fb7759 --- /dev/null +++ b/arch/x86/platform/uv/uv_sysfs.c | |||
@@ -0,0 +1,76 @@ | |||
1 | /* | ||
2 | * This file supports the /sys/firmware/sgi_uv interfaces for SGI UV. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | * | ||
18 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
19 | * Copyright (c) Russ Anderson | ||
20 | */ | ||
21 | |||
22 | #include <linux/sysdev.h> | ||
23 | #include <asm/uv/bios.h> | ||
24 | #include <asm/uv/uv.h> | ||
25 | |||
26 | struct kobject *sgi_uv_kobj; | ||
27 | |||
28 | static ssize_t partition_id_show(struct kobject *kobj, | ||
29 | struct kobj_attribute *attr, char *buf) | ||
30 | { | ||
31 | return snprintf(buf, PAGE_SIZE, "%ld\n", sn_partition_id); | ||
32 | } | ||
33 | |||
34 | static ssize_t coherence_id_show(struct kobject *kobj, | ||
35 | struct kobj_attribute *attr, char *buf) | ||
36 | { | ||
37 | return snprintf(buf, PAGE_SIZE, "%ld\n", partition_coherence_id()); | ||
38 | } | ||
39 | |||
40 | static struct kobj_attribute partition_id_attr = | ||
41 | __ATTR(partition_id, S_IRUGO, partition_id_show, NULL); | ||
42 | |||
43 | static struct kobj_attribute coherence_id_attr = | ||
44 | __ATTR(coherence_id, S_IRUGO, coherence_id_show, NULL); | ||
45 | |||
46 | |||
47 | static int __init sgi_uv_sysfs_init(void) | ||
48 | { | ||
49 | unsigned long ret; | ||
50 | |||
51 | if (!is_uv_system()) | ||
52 | return -ENODEV; | ||
53 | |||
54 | if (!sgi_uv_kobj) | ||
55 | sgi_uv_kobj = kobject_create_and_add("sgi_uv", firmware_kobj); | ||
56 | if (!sgi_uv_kobj) { | ||
57 | printk(KERN_WARNING "kobject_create_and_add sgi_uv failed\n"); | ||
58 | return -EINVAL; | ||
59 | } | ||
60 | |||
61 | ret = sysfs_create_file(sgi_uv_kobj, &partition_id_attr.attr); | ||
62 | if (ret) { | ||
63 | printk(KERN_WARNING "sysfs_create_file partition_id failed\n"); | ||
64 | return ret; | ||
65 | } | ||
66 | |||
67 | ret = sysfs_create_file(sgi_uv_kobj, &coherence_id_attr.attr); | ||
68 | if (ret) { | ||
69 | printk(KERN_WARNING "sysfs_create_file coherence_id failed\n"); | ||
70 | return ret; | ||
71 | } | ||
72 | |||
73 | return 0; | ||
74 | } | ||
75 | |||
76 | device_initcall(sgi_uv_sysfs_init); | ||
diff --git a/arch/x86/platform/uv/uv_time.c b/arch/x86/platform/uv/uv_time.c new file mode 100644 index 000000000000..9f29a01ee1b3 --- /dev/null +++ b/arch/x86/platform/uv/uv_time.c | |||
@@ -0,0 +1,429 @@ | |||
1 | /* | ||
2 | * SGI RTC clock/timer routines. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | * | ||
18 | * Copyright (c) 2009 Silicon Graphics, Inc. All Rights Reserved. | ||
19 | * Copyright (c) Dimitri Sivanich | ||
20 | */ | ||
21 | #include <linux/clockchips.h> | ||
22 | #include <linux/slab.h> | ||
23 | |||
24 | #include <asm/uv/uv_mmrs.h> | ||
25 | #include <asm/uv/uv_hub.h> | ||
26 | #include <asm/uv/bios.h> | ||
27 | #include <asm/uv/uv.h> | ||
28 | #include <asm/apic.h> | ||
29 | #include <asm/cpu.h> | ||
30 | |||
31 | #define RTC_NAME "sgi_rtc" | ||
32 | |||
33 | static cycle_t uv_read_rtc(struct clocksource *cs); | ||
34 | static int uv_rtc_next_event(unsigned long, struct clock_event_device *); | ||
35 | static void uv_rtc_timer_setup(enum clock_event_mode, | ||
36 | struct clock_event_device *); | ||
37 | |||
38 | static struct clocksource clocksource_uv = { | ||
39 | .name = RTC_NAME, | ||
40 | .rating = 400, | ||
41 | .read = uv_read_rtc, | ||
42 | .mask = (cycle_t)UVH_RTC_REAL_TIME_CLOCK_MASK, | ||
43 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
44 | }; | ||
45 | |||
46 | static struct clock_event_device clock_event_device_uv = { | ||
47 | .name = RTC_NAME, | ||
48 | .features = CLOCK_EVT_FEAT_ONESHOT, | ||
49 | .shift = 20, | ||
50 | .rating = 400, | ||
51 | .irq = -1, | ||
52 | .set_next_event = uv_rtc_next_event, | ||
53 | .set_mode = uv_rtc_timer_setup, | ||
54 | .event_handler = NULL, | ||
55 | }; | ||
56 | |||
57 | static DEFINE_PER_CPU(struct clock_event_device, cpu_ced); | ||
58 | |||
59 | /* There is one of these allocated per node */ | ||
60 | struct uv_rtc_timer_head { | ||
61 | spinlock_t lock; | ||
62 | /* next cpu waiting for timer, local node relative: */ | ||
63 | int next_cpu; | ||
64 | /* number of cpus on this node: */ | ||
65 | int ncpus; | ||
66 | struct { | ||
67 | int lcpu; /* systemwide logical cpu number */ | ||
68 | u64 expires; /* next timer expiration for this cpu */ | ||
69 | } cpu[1]; | ||
70 | }; | ||
71 | |||
72 | /* | ||
73 | * Access to uv_rtc_timer_head via blade id. | ||
74 | */ | ||
75 | static struct uv_rtc_timer_head **blade_info __read_mostly; | ||
76 | |||
77 | static int uv_rtc_evt_enable; | ||
78 | |||
79 | /* | ||
80 | * Hardware interface routines | ||
81 | */ | ||
82 | |||
83 | /* Send IPIs to another node */ | ||
84 | static void uv_rtc_send_IPI(int cpu) | ||
85 | { | ||
86 | unsigned long apicid, val; | ||
87 | int pnode; | ||
88 | |||
89 | apicid = cpu_physical_id(cpu); | ||
90 | pnode = uv_apicid_to_pnode(apicid); | ||
91 | apicid |= uv_apicid_hibits; | ||
92 | val = (1UL << UVH_IPI_INT_SEND_SHFT) | | ||
93 | (apicid << UVH_IPI_INT_APIC_ID_SHFT) | | ||
94 | (X86_PLATFORM_IPI_VECTOR << UVH_IPI_INT_VECTOR_SHFT); | ||
95 | |||
96 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | ||
97 | } | ||
98 | |||
99 | /* Check for an RTC interrupt pending */ | ||
100 | static int uv_intr_pending(int pnode) | ||
101 | { | ||
102 | if (is_uv1_hub()) | ||
103 | return uv_read_global_mmr64(pnode, UVH_EVENT_OCCURRED0) & | ||
104 | UV1H_EVENT_OCCURRED0_RTC1_MASK; | ||
105 | else | ||
106 | return uv_read_global_mmr64(pnode, UV2H_EVENT_OCCURRED2) & | ||
107 | UV2H_EVENT_OCCURRED2_RTC_1_MASK; | ||
108 | } | ||
109 | |||
110 | /* Setup interrupt and return non-zero if early expiration occurred. */ | ||
111 | static int uv_setup_intr(int cpu, u64 expires) | ||
112 | { | ||
113 | u64 val; | ||
114 | unsigned long apicid = cpu_physical_id(cpu) | uv_apicid_hibits; | ||
115 | int pnode = uv_cpu_to_pnode(cpu); | ||
116 | |||
117 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, | ||
118 | UVH_RTC1_INT_CONFIG_M_MASK); | ||
119 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, -1L); | ||
120 | |||
121 | if (is_uv1_hub()) | ||
122 | uv_write_global_mmr64(pnode, UVH_EVENT_OCCURRED0_ALIAS, | ||
123 | UV1H_EVENT_OCCURRED0_RTC1_MASK); | ||
124 | else | ||
125 | uv_write_global_mmr64(pnode, UV2H_EVENT_OCCURRED2_ALIAS, | ||
126 | UV2H_EVENT_OCCURRED2_RTC_1_MASK); | ||
127 | |||
128 | val = (X86_PLATFORM_IPI_VECTOR << UVH_RTC1_INT_CONFIG_VECTOR_SHFT) | | ||
129 | ((u64)apicid << UVH_RTC1_INT_CONFIG_APIC_ID_SHFT); | ||
130 | |||
131 | /* Set configuration */ | ||
132 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, val); | ||
133 | /* Initialize comparator value */ | ||
134 | uv_write_global_mmr64(pnode, UVH_INT_CMPB, expires); | ||
135 | |||
136 | if (uv_read_rtc(NULL) <= expires) | ||
137 | return 0; | ||
138 | |||
139 | return !uv_intr_pending(pnode); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Per-cpu timer tracking routines | ||
144 | */ | ||
145 | |||
146 | static __init void uv_rtc_deallocate_timers(void) | ||
147 | { | ||
148 | int bid; | ||
149 | |||
150 | for_each_possible_blade(bid) { | ||
151 | kfree(blade_info[bid]); | ||
152 | } | ||
153 | kfree(blade_info); | ||
154 | } | ||
155 | |||
156 | /* Allocate per-node list of cpu timer expiration times. */ | ||
157 | static __init int uv_rtc_allocate_timers(void) | ||
158 | { | ||
159 | int cpu; | ||
160 | |||
161 | blade_info = kmalloc(uv_possible_blades * sizeof(void *), GFP_KERNEL); | ||
162 | if (!blade_info) | ||
163 | return -ENOMEM; | ||
164 | memset(blade_info, 0, uv_possible_blades * sizeof(void *)); | ||
165 | |||
166 | for_each_present_cpu(cpu) { | ||
167 | int nid = cpu_to_node(cpu); | ||
168 | int bid = uv_cpu_to_blade_id(cpu); | ||
169 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
170 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
171 | |||
172 | if (!head) { | ||
173 | head = kmalloc_node(sizeof(struct uv_rtc_timer_head) + | ||
174 | (uv_blade_nr_possible_cpus(bid) * | ||
175 | 2 * sizeof(u64)), | ||
176 | GFP_KERNEL, nid); | ||
177 | if (!head) { | ||
178 | uv_rtc_deallocate_timers(); | ||
179 | return -ENOMEM; | ||
180 | } | ||
181 | spin_lock_init(&head->lock); | ||
182 | head->ncpus = uv_blade_nr_possible_cpus(bid); | ||
183 | head->next_cpu = -1; | ||
184 | blade_info[bid] = head; | ||
185 | } | ||
186 | |||
187 | head->cpu[bcpu].lcpu = cpu; | ||
188 | head->cpu[bcpu].expires = ULLONG_MAX; | ||
189 | } | ||
190 | |||
191 | return 0; | ||
192 | } | ||
193 | |||
194 | /* Find and set the next expiring timer. */ | ||
195 | static void uv_rtc_find_next_timer(struct uv_rtc_timer_head *head, int pnode) | ||
196 | { | ||
197 | u64 lowest = ULLONG_MAX; | ||
198 | int c, bcpu = -1; | ||
199 | |||
200 | head->next_cpu = -1; | ||
201 | for (c = 0; c < head->ncpus; c++) { | ||
202 | u64 exp = head->cpu[c].expires; | ||
203 | if (exp < lowest) { | ||
204 | bcpu = c; | ||
205 | lowest = exp; | ||
206 | } | ||
207 | } | ||
208 | if (bcpu >= 0) { | ||
209 | head->next_cpu = bcpu; | ||
210 | c = head->cpu[bcpu].lcpu; | ||
211 | if (uv_setup_intr(c, lowest)) | ||
212 | /* If we didn't set it up in time, trigger */ | ||
213 | uv_rtc_send_IPI(c); | ||
214 | } else { | ||
215 | uv_write_global_mmr64(pnode, UVH_RTC1_INT_CONFIG, | ||
216 | UVH_RTC1_INT_CONFIG_M_MASK); | ||
217 | } | ||
218 | } | ||
219 | |||
220 | /* | ||
221 | * Set expiration time for current cpu. | ||
222 | * | ||
223 | * Returns 1 if we missed the expiration time. | ||
224 | */ | ||
225 | static int uv_rtc_set_timer(int cpu, u64 expires) | ||
226 | { | ||
227 | int pnode = uv_cpu_to_pnode(cpu); | ||
228 | int bid = uv_cpu_to_blade_id(cpu); | ||
229 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
230 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
231 | u64 *t = &head->cpu[bcpu].expires; | ||
232 | unsigned long flags; | ||
233 | int next_cpu; | ||
234 | |||
235 | spin_lock_irqsave(&head->lock, flags); | ||
236 | |||
237 | next_cpu = head->next_cpu; | ||
238 | *t = expires; | ||
239 | |||
240 | /* Will this one be next to go off? */ | ||
241 | if (next_cpu < 0 || bcpu == next_cpu || | ||
242 | expires < head->cpu[next_cpu].expires) { | ||
243 | head->next_cpu = bcpu; | ||
244 | if (uv_setup_intr(cpu, expires)) { | ||
245 | *t = ULLONG_MAX; | ||
246 | uv_rtc_find_next_timer(head, pnode); | ||
247 | spin_unlock_irqrestore(&head->lock, flags); | ||
248 | return -ETIME; | ||
249 | } | ||
250 | } | ||
251 | |||
252 | spin_unlock_irqrestore(&head->lock, flags); | ||
253 | return 0; | ||
254 | } | ||
255 | |||
256 | /* | ||
257 | * Unset expiration time for current cpu. | ||
258 | * | ||
259 | * Returns 1 if this timer was pending. | ||
260 | */ | ||
261 | static int uv_rtc_unset_timer(int cpu, int force) | ||
262 | { | ||
263 | int pnode = uv_cpu_to_pnode(cpu); | ||
264 | int bid = uv_cpu_to_blade_id(cpu); | ||
265 | struct uv_rtc_timer_head *head = blade_info[bid]; | ||
266 | int bcpu = uv_cpu_hub_info(cpu)->blade_processor_id; | ||
267 | u64 *t = &head->cpu[bcpu].expires; | ||
268 | unsigned long flags; | ||
269 | int rc = 0; | ||
270 | |||
271 | spin_lock_irqsave(&head->lock, flags); | ||
272 | |||
273 | if ((head->next_cpu == bcpu && uv_read_rtc(NULL) >= *t) || force) | ||
274 | rc = 1; | ||
275 | |||
276 | if (rc) { | ||
277 | *t = ULLONG_MAX; | ||
278 | /* Was the hardware setup for this timer? */ | ||
279 | if (head->next_cpu == bcpu) | ||
280 | uv_rtc_find_next_timer(head, pnode); | ||
281 | } | ||
282 | |||
283 | spin_unlock_irqrestore(&head->lock, flags); | ||
284 | |||
285 | return rc; | ||
286 | } | ||
287 | |||
288 | |||
289 | /* | ||
290 | * Kernel interface routines. | ||
291 | */ | ||
292 | |||
293 | /* | ||
294 | * Read the RTC. | ||
295 | * | ||
296 | * Starting with HUB rev 2.0, the UV RTC register is replicated across all | ||
297 | * cachelines of it's own page. This allows faster simultaneous reads | ||
298 | * from a given socket. | ||
299 | */ | ||
300 | static cycle_t uv_read_rtc(struct clocksource *cs) | ||
301 | { | ||
302 | unsigned long offset; | ||
303 | |||
304 | if (uv_get_min_hub_revision_id() == 1) | ||
305 | offset = 0; | ||
306 | else | ||
307 | offset = (uv_blade_processor_id() * L1_CACHE_BYTES) % PAGE_SIZE; | ||
308 | |||
309 | return (cycle_t)uv_read_local_mmr(UVH_RTC | offset); | ||
310 | } | ||
311 | |||
312 | /* | ||
313 | * Program the next event, relative to now | ||
314 | */ | ||
315 | static int uv_rtc_next_event(unsigned long delta, | ||
316 | struct clock_event_device *ced) | ||
317 | { | ||
318 | int ced_cpu = cpumask_first(ced->cpumask); | ||
319 | |||
320 | return uv_rtc_set_timer(ced_cpu, delta + uv_read_rtc(NULL)); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * Setup the RTC timer in oneshot mode | ||
325 | */ | ||
326 | static void uv_rtc_timer_setup(enum clock_event_mode mode, | ||
327 | struct clock_event_device *evt) | ||
328 | { | ||
329 | int ced_cpu = cpumask_first(evt->cpumask); | ||
330 | |||
331 | switch (mode) { | ||
332 | case CLOCK_EVT_MODE_PERIODIC: | ||
333 | case CLOCK_EVT_MODE_ONESHOT: | ||
334 | case CLOCK_EVT_MODE_RESUME: | ||
335 | /* Nothing to do here yet */ | ||
336 | break; | ||
337 | case CLOCK_EVT_MODE_UNUSED: | ||
338 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
339 | uv_rtc_unset_timer(ced_cpu, 1); | ||
340 | break; | ||
341 | } | ||
342 | } | ||
343 | |||
344 | static void uv_rtc_interrupt(void) | ||
345 | { | ||
346 | int cpu = smp_processor_id(); | ||
347 | struct clock_event_device *ced = &per_cpu(cpu_ced, cpu); | ||
348 | |||
349 | if (!ced || !ced->event_handler) | ||
350 | return; | ||
351 | |||
352 | if (uv_rtc_unset_timer(cpu, 0) != 1) | ||
353 | return; | ||
354 | |||
355 | ced->event_handler(ced); | ||
356 | } | ||
357 | |||
358 | static int __init uv_enable_evt_rtc(char *str) | ||
359 | { | ||
360 | uv_rtc_evt_enable = 1; | ||
361 | |||
362 | return 1; | ||
363 | } | ||
364 | __setup("uvrtcevt", uv_enable_evt_rtc); | ||
365 | |||
366 | static __init void uv_rtc_register_clockevents(struct work_struct *dummy) | ||
367 | { | ||
368 | struct clock_event_device *ced = &__get_cpu_var(cpu_ced); | ||
369 | |||
370 | *ced = clock_event_device_uv; | ||
371 | ced->cpumask = cpumask_of(smp_processor_id()); | ||
372 | clockevents_register_device(ced); | ||
373 | } | ||
374 | |||
375 | static __init int uv_rtc_setup_clock(void) | ||
376 | { | ||
377 | int rc; | ||
378 | |||
379 | if (!is_uv_system()) | ||
380 | return -ENODEV; | ||
381 | |||
382 | /* If single blade, prefer tsc */ | ||
383 | if (uv_num_possible_blades() == 1) | ||
384 | clocksource_uv.rating = 250; | ||
385 | |||
386 | rc = clocksource_register_hz(&clocksource_uv, sn_rtc_cycles_per_second); | ||
387 | if (rc) | ||
388 | printk(KERN_INFO "UV RTC clocksource failed rc %d\n", rc); | ||
389 | else | ||
390 | printk(KERN_INFO "UV RTC clocksource registered freq %lu MHz\n", | ||
391 | sn_rtc_cycles_per_second/(unsigned long)1E6); | ||
392 | |||
393 | if (rc || !uv_rtc_evt_enable || x86_platform_ipi_callback) | ||
394 | return rc; | ||
395 | |||
396 | /* Setup and register clockevents */ | ||
397 | rc = uv_rtc_allocate_timers(); | ||
398 | if (rc) | ||
399 | goto error; | ||
400 | |||
401 | x86_platform_ipi_callback = uv_rtc_interrupt; | ||
402 | |||
403 | clock_event_device_uv.mult = div_sc(sn_rtc_cycles_per_second, | ||
404 | NSEC_PER_SEC, clock_event_device_uv.shift); | ||
405 | |||
406 | clock_event_device_uv.min_delta_ns = NSEC_PER_SEC / | ||
407 | sn_rtc_cycles_per_second; | ||
408 | |||
409 | clock_event_device_uv.max_delta_ns = clocksource_uv.mask * | ||
410 | (NSEC_PER_SEC / sn_rtc_cycles_per_second); | ||
411 | |||
412 | rc = schedule_on_each_cpu(uv_rtc_register_clockevents); | ||
413 | if (rc) { | ||
414 | x86_platform_ipi_callback = NULL; | ||
415 | uv_rtc_deallocate_timers(); | ||
416 | goto error; | ||
417 | } | ||
418 | |||
419 | printk(KERN_INFO "UV RTC clockevents registered\n"); | ||
420 | |||
421 | return 0; | ||
422 | |||
423 | error: | ||
424 | clocksource_unregister(&clocksource_uv); | ||
425 | printk(KERN_INFO "UV RTC clockevents failed rc %d\n", rc); | ||
426 | |||
427 | return rc; | ||
428 | } | ||
429 | arch_initcall(uv_rtc_setup_clock); | ||