diff options
author | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
---|---|---|
committer | Glenn Elliott <gelliott@cs.unc.edu> | 2012-03-04 19:47:13 -0500 |
commit | c71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch) | |
tree | ecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /arch/x86/kernel/tlb_uv.c | |
parent | ea53c912f8a86a8567697115b6a0d8152beee5c8 (diff) | |
parent | 6a00f206debf8a5c8899055726ad127dbeeed098 (diff) |
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts:
litmus/sched_cedf.c
Diffstat (limited to 'arch/x86/kernel/tlb_uv.c')
-rw-r--r-- | arch/x86/kernel/tlb_uv.c | 1655 |
1 files changed, 0 insertions, 1655 deletions
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c deleted file mode 100644 index 312ef0292815..000000000000 --- a/arch/x86/kernel/tlb_uv.c +++ /dev/null | |||
@@ -1,1655 +0,0 @@ | |||
1 | /* | ||
2 | * SGI UltraViolet TLB flush routines. | ||
3 | * | ||
4 | * (c) 2008-2010 Cliff Wickman <cpw@sgi.com>, SGI. | ||
5 | * | ||
6 | * This code is released under the GNU General Public License version 2 or | ||
7 | * later. | ||
8 | */ | ||
9 | #include <linux/seq_file.h> | ||
10 | #include <linux/proc_fs.h> | ||
11 | #include <linux/debugfs.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/slab.h> | ||
14 | |||
15 | #include <asm/mmu_context.h> | ||
16 | #include <asm/uv/uv.h> | ||
17 | #include <asm/uv/uv_mmrs.h> | ||
18 | #include <asm/uv/uv_hub.h> | ||
19 | #include <asm/uv/uv_bau.h> | ||
20 | #include <asm/apic.h> | ||
21 | #include <asm/idle.h> | ||
22 | #include <asm/tsc.h> | ||
23 | #include <asm/irq_vectors.h> | ||
24 | #include <asm/timer.h> | ||
25 | |||
26 | /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ | ||
27 | static int timeout_base_ns[] = { | ||
28 | 20, | ||
29 | 160, | ||
30 | 1280, | ||
31 | 10240, | ||
32 | 81920, | ||
33 | 655360, | ||
34 | 5242880, | ||
35 | 167772160 | ||
36 | }; | ||
37 | static int timeout_us; | ||
38 | static int nobau; | ||
39 | static int baudisabled; | ||
40 | static spinlock_t disable_lock; | ||
41 | static cycles_t congested_cycles; | ||
42 | |||
43 | /* tunables: */ | ||
44 | static int max_bau_concurrent = MAX_BAU_CONCURRENT; | ||
45 | static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; | ||
46 | static int plugged_delay = PLUGGED_DELAY; | ||
47 | static int plugsb4reset = PLUGSB4RESET; | ||
48 | static int timeoutsb4reset = TIMEOUTSB4RESET; | ||
49 | static int ipi_reset_limit = IPI_RESET_LIMIT; | ||
50 | static int complete_threshold = COMPLETE_THRESHOLD; | ||
51 | static int congested_response_us = CONGESTED_RESPONSE_US; | ||
52 | static int congested_reps = CONGESTED_REPS; | ||
53 | static int congested_period = CONGESTED_PERIOD; | ||
54 | static struct dentry *tunables_dir; | ||
55 | static struct dentry *tunables_file; | ||
56 | |||
57 | static int __init setup_nobau(char *arg) | ||
58 | { | ||
59 | nobau = 1; | ||
60 | return 0; | ||
61 | } | ||
62 | early_param("nobau", setup_nobau); | ||
63 | |||
64 | /* base pnode in this partition */ | ||
65 | static int uv_partition_base_pnode __read_mostly; | ||
66 | /* position of pnode (which is nasid>>1): */ | ||
67 | static int uv_nshift __read_mostly; | ||
68 | static unsigned long uv_mmask __read_mostly; | ||
69 | |||
70 | static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | ||
71 | static DEFINE_PER_CPU(struct bau_control, bau_control); | ||
72 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | ||
73 | |||
74 | /* | ||
75 | * Determine the first node on a uvhub. 'Nodes' are used for kernel | ||
76 | * memory allocation. | ||
77 | */ | ||
78 | static int __init uvhub_to_first_node(int uvhub) | ||
79 | { | ||
80 | int node, b; | ||
81 | |||
82 | for_each_online_node(node) { | ||
83 | b = uv_node_to_blade_id(node); | ||
84 | if (uvhub == b) | ||
85 | return node; | ||
86 | } | ||
87 | return -1; | ||
88 | } | ||
89 | |||
90 | /* | ||
91 | * Determine the apicid of the first cpu on a uvhub. | ||
92 | */ | ||
93 | static int __init uvhub_to_first_apicid(int uvhub) | ||
94 | { | ||
95 | int cpu; | ||
96 | |||
97 | for_each_present_cpu(cpu) | ||
98 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
99 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
100 | return -1; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Free a software acknowledge hardware resource by clearing its Pending | ||
105 | * bit. This will return a reply to the sender. | ||
106 | * If the message has timed out, a reply has already been sent by the | ||
107 | * hardware but the resource has not been released. In that case our | ||
108 | * clear of the Timeout bit (as well) will free the resource. No reply will | ||
109 | * be sent (the hardware will only do one reply per message). | ||
110 | */ | ||
111 | static inline void uv_reply_to_message(struct msg_desc *mdp, | ||
112 | struct bau_control *bcp) | ||
113 | { | ||
114 | unsigned long dw; | ||
115 | struct bau_payload_queue_entry *msg; | ||
116 | |||
117 | msg = mdp->msg; | ||
118 | if (!msg->canceled) { | ||
119 | dw = (msg->sw_ack_vector << UV_SW_ACK_NPENDING) | | ||
120 | msg->sw_ack_vector; | ||
121 | uv_write_local_mmr( | ||
122 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw); | ||
123 | } | ||
124 | msg->replied_to = 1; | ||
125 | msg->sw_ack_vector = 0; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * Process the receipt of a RETRY message | ||
130 | */ | ||
131 | static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, | ||
132 | struct bau_control *bcp) | ||
133 | { | ||
134 | int i; | ||
135 | int cancel_count = 0; | ||
136 | int slot2; | ||
137 | unsigned long msg_res; | ||
138 | unsigned long mmr = 0; | ||
139 | struct bau_payload_queue_entry *msg; | ||
140 | struct bau_payload_queue_entry *msg2; | ||
141 | struct ptc_stats *stat; | ||
142 | |||
143 | msg = mdp->msg; | ||
144 | stat = bcp->statp; | ||
145 | stat->d_retries++; | ||
146 | /* | ||
147 | * cancel any message from msg+1 to the retry itself | ||
148 | */ | ||
149 | for (msg2 = msg+1, i = 0; i < DEST_Q_SIZE; msg2++, i++) { | ||
150 | if (msg2 > mdp->va_queue_last) | ||
151 | msg2 = mdp->va_queue_first; | ||
152 | if (msg2 == msg) | ||
153 | break; | ||
154 | |||
155 | /* same conditions for cancellation as uv_do_reset */ | ||
156 | if ((msg2->replied_to == 0) && (msg2->canceled == 0) && | ||
157 | (msg2->sw_ack_vector) && ((msg2->sw_ack_vector & | ||
158 | msg->sw_ack_vector) == 0) && | ||
159 | (msg2->sending_cpu == msg->sending_cpu) && | ||
160 | (msg2->msg_type != MSG_NOOP)) { | ||
161 | slot2 = msg2 - mdp->va_queue_first; | ||
162 | mmr = uv_read_local_mmr | ||
163 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
164 | msg_res = msg2->sw_ack_vector; | ||
165 | /* | ||
166 | * This is a message retry; clear the resources held | ||
167 | * by the previous message only if they timed out. | ||
168 | * If it has not timed out we have an unexpected | ||
169 | * situation to report. | ||
170 | */ | ||
171 | if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { | ||
172 | /* | ||
173 | * is the resource timed out? | ||
174 | * make everyone ignore the cancelled message. | ||
175 | */ | ||
176 | msg2->canceled = 1; | ||
177 | stat->d_canceled++; | ||
178 | cancel_count++; | ||
179 | uv_write_local_mmr( | ||
180 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
181 | (msg_res << UV_SW_ACK_NPENDING) | | ||
182 | msg_res); | ||
183 | } | ||
184 | } | ||
185 | } | ||
186 | if (!cancel_count) | ||
187 | stat->d_nocanceled++; | ||
188 | } | ||
189 | |||
190 | /* | ||
191 | * Do all the things a cpu should do for a TLB shootdown message. | ||
192 | * Other cpu's may come here at the same time for this message. | ||
193 | */ | ||
194 | static void uv_bau_process_message(struct msg_desc *mdp, | ||
195 | struct bau_control *bcp) | ||
196 | { | ||
197 | int msg_ack_count; | ||
198 | short socket_ack_count = 0; | ||
199 | struct ptc_stats *stat; | ||
200 | struct bau_payload_queue_entry *msg; | ||
201 | struct bau_control *smaster = bcp->socket_master; | ||
202 | |||
203 | /* | ||
204 | * This must be a normal message, or retry of a normal message | ||
205 | */ | ||
206 | msg = mdp->msg; | ||
207 | stat = bcp->statp; | ||
208 | if (msg->address == TLB_FLUSH_ALL) { | ||
209 | local_flush_tlb(); | ||
210 | stat->d_alltlb++; | ||
211 | } else { | ||
212 | __flush_tlb_one(msg->address); | ||
213 | stat->d_onetlb++; | ||
214 | } | ||
215 | stat->d_requestee++; | ||
216 | |||
217 | /* | ||
218 | * One cpu on each uvhub has the additional job on a RETRY | ||
219 | * of releasing the resource held by the message that is | ||
220 | * being retried. That message is identified by sending | ||
221 | * cpu number. | ||
222 | */ | ||
223 | if (msg->msg_type == MSG_RETRY && bcp == bcp->uvhub_master) | ||
224 | uv_bau_process_retry_msg(mdp, bcp); | ||
225 | |||
226 | /* | ||
227 | * This is a sw_ack message, so we have to reply to it. | ||
228 | * Count each responding cpu on the socket. This avoids | ||
229 | * pinging the count's cache line back and forth between | ||
230 | * the sockets. | ||
231 | */ | ||
232 | socket_ack_count = atomic_add_short_return(1, (struct atomic_short *) | ||
233 | &smaster->socket_acknowledge_count[mdp->msg_slot]); | ||
234 | if (socket_ack_count == bcp->cpus_in_socket) { | ||
235 | /* | ||
236 | * Both sockets dump their completed count total into | ||
237 | * the message's count. | ||
238 | */ | ||
239 | smaster->socket_acknowledge_count[mdp->msg_slot] = 0; | ||
240 | msg_ack_count = atomic_add_short_return(socket_ack_count, | ||
241 | (struct atomic_short *)&msg->acknowledge_count); | ||
242 | |||
243 | if (msg_ack_count == bcp->cpus_in_uvhub) { | ||
244 | /* | ||
245 | * All cpus in uvhub saw it; reply | ||
246 | */ | ||
247 | uv_reply_to_message(mdp, bcp); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | return; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Determine the first cpu on a uvhub. | ||
256 | */ | ||
257 | static int uvhub_to_first_cpu(int uvhub) | ||
258 | { | ||
259 | int cpu; | ||
260 | for_each_present_cpu(cpu) | ||
261 | if (uvhub == uv_cpu_to_blade_id(cpu)) | ||
262 | return cpu; | ||
263 | return -1; | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * Last resort when we get a large number of destination timeouts is | ||
268 | * to clear resources held by a given cpu. | ||
269 | * Do this with IPI so that all messages in the BAU message queue | ||
270 | * can be identified by their nonzero sw_ack_vector field. | ||
271 | * | ||
272 | * This is entered for a single cpu on the uvhub. | ||
273 | * The sender want's this uvhub to free a specific message's | ||
274 | * sw_ack resources. | ||
275 | */ | ||
276 | static void | ||
277 | uv_do_reset(void *ptr) | ||
278 | { | ||
279 | int i; | ||
280 | int slot; | ||
281 | int count = 0; | ||
282 | unsigned long mmr; | ||
283 | unsigned long msg_res; | ||
284 | struct bau_control *bcp; | ||
285 | struct reset_args *rap; | ||
286 | struct bau_payload_queue_entry *msg; | ||
287 | struct ptc_stats *stat; | ||
288 | |||
289 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
290 | rap = (struct reset_args *)ptr; | ||
291 | stat = bcp->statp; | ||
292 | stat->d_resets++; | ||
293 | |||
294 | /* | ||
295 | * We're looking for the given sender, and | ||
296 | * will free its sw_ack resource. | ||
297 | * If all cpu's finally responded after the timeout, its | ||
298 | * message 'replied_to' was set. | ||
299 | */ | ||
300 | for (msg = bcp->va_queue_first, i = 0; i < DEST_Q_SIZE; msg++, i++) { | ||
301 | /* uv_do_reset: same conditions for cancellation as | ||
302 | uv_bau_process_retry_msg() */ | ||
303 | if ((msg->replied_to == 0) && | ||
304 | (msg->canceled == 0) && | ||
305 | (msg->sending_cpu == rap->sender) && | ||
306 | (msg->sw_ack_vector) && | ||
307 | (msg->msg_type != MSG_NOOP)) { | ||
308 | /* | ||
309 | * make everyone else ignore this message | ||
310 | */ | ||
311 | msg->canceled = 1; | ||
312 | slot = msg - bcp->va_queue_first; | ||
313 | count++; | ||
314 | /* | ||
315 | * only reset the resource if it is still pending | ||
316 | */ | ||
317 | mmr = uv_read_local_mmr | ||
318 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | ||
319 | msg_res = msg->sw_ack_vector; | ||
320 | if (mmr & msg_res) { | ||
321 | stat->d_rcanceled++; | ||
322 | uv_write_local_mmr( | ||
323 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | ||
324 | (msg_res << UV_SW_ACK_NPENDING) | | ||
325 | msg_res); | ||
326 | } | ||
327 | } | ||
328 | } | ||
329 | return; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * Use IPI to get all target uvhubs to release resources held by | ||
334 | * a given sending cpu number. | ||
335 | */ | ||
336 | static void uv_reset_with_ipi(struct bau_target_uvhubmask *distribution, | ||
337 | int sender) | ||
338 | { | ||
339 | int uvhub; | ||
340 | int cpu; | ||
341 | cpumask_t mask; | ||
342 | struct reset_args reset_args; | ||
343 | |||
344 | reset_args.sender = sender; | ||
345 | |||
346 | cpus_clear(mask); | ||
347 | /* find a single cpu for each uvhub in this distribution mask */ | ||
348 | for (uvhub = 0; | ||
349 | uvhub < sizeof(struct bau_target_uvhubmask) * BITSPERBYTE; | ||
350 | uvhub++) { | ||
351 | if (!bau_uvhub_isset(uvhub, distribution)) | ||
352 | continue; | ||
353 | /* find a cpu for this uvhub */ | ||
354 | cpu = uvhub_to_first_cpu(uvhub); | ||
355 | cpu_set(cpu, mask); | ||
356 | } | ||
357 | /* IPI all cpus; Preemption is already disabled */ | ||
358 | smp_call_function_many(&mask, uv_do_reset, (void *)&reset_args, 1); | ||
359 | return; | ||
360 | } | ||
361 | |||
362 | static inline unsigned long | ||
363 | cycles_2_us(unsigned long long cyc) | ||
364 | { | ||
365 | unsigned long long ns; | ||
366 | unsigned long us; | ||
367 | ns = (cyc * per_cpu(cyc2ns, smp_processor_id())) | ||
368 | >> CYC2NS_SCALE_FACTOR; | ||
369 | us = ns / 1000; | ||
370 | return us; | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * wait for all cpus on this hub to finish their sends and go quiet | ||
375 | * leaves uvhub_quiesce set so that no new broadcasts are started by | ||
376 | * bau_flush_send_and_wait() | ||
377 | */ | ||
378 | static inline void | ||
379 | quiesce_local_uvhub(struct bau_control *hmaster) | ||
380 | { | ||
381 | atomic_add_short_return(1, (struct atomic_short *) | ||
382 | &hmaster->uvhub_quiesce); | ||
383 | } | ||
384 | |||
385 | /* | ||
386 | * mark this quiet-requestor as done | ||
387 | */ | ||
388 | static inline void | ||
389 | end_uvhub_quiesce(struct bau_control *hmaster) | ||
390 | { | ||
391 | atomic_add_short_return(-1, (struct atomic_short *) | ||
392 | &hmaster->uvhub_quiesce); | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * Wait for completion of a broadcast software ack message | ||
397 | * return COMPLETE, RETRY(PLUGGED or TIMEOUT) or GIVEUP | ||
398 | */ | ||
399 | static int uv_wait_completion(struct bau_desc *bau_desc, | ||
400 | unsigned long mmr_offset, int right_shift, int this_cpu, | ||
401 | struct bau_control *bcp, struct bau_control *smaster, long try) | ||
402 | { | ||
403 | unsigned long descriptor_status; | ||
404 | cycles_t ttime; | ||
405 | struct ptc_stats *stat = bcp->statp; | ||
406 | struct bau_control *hmaster; | ||
407 | |||
408 | hmaster = bcp->uvhub_master; | ||
409 | |||
410 | /* spin on the status MMR, waiting for it to go idle */ | ||
411 | while ((descriptor_status = (((unsigned long) | ||
412 | uv_read_local_mmr(mmr_offset) >> | ||
413 | right_shift) & UV_ACT_STATUS_MASK)) != | ||
414 | DESC_STATUS_IDLE) { | ||
415 | /* | ||
416 | * Our software ack messages may be blocked because there are | ||
417 | * no swack resources available. As long as none of them | ||
418 | * has timed out hardware will NACK our message and its | ||
419 | * state will stay IDLE. | ||
420 | */ | ||
421 | if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) { | ||
422 | stat->s_stimeout++; | ||
423 | return FLUSH_GIVEUP; | ||
424 | } else if (descriptor_status == | ||
425 | DESC_STATUS_DESTINATION_TIMEOUT) { | ||
426 | stat->s_dtimeout++; | ||
427 | ttime = get_cycles(); | ||
428 | |||
429 | /* | ||
430 | * Our retries may be blocked by all destination | ||
431 | * swack resources being consumed, and a timeout | ||
432 | * pending. In that case hardware returns the | ||
433 | * ERROR that looks like a destination timeout. | ||
434 | */ | ||
435 | if (cycles_2_us(ttime - bcp->send_message) < | ||
436 | timeout_us) { | ||
437 | bcp->conseccompletes = 0; | ||
438 | return FLUSH_RETRY_PLUGGED; | ||
439 | } | ||
440 | |||
441 | bcp->conseccompletes = 0; | ||
442 | return FLUSH_RETRY_TIMEOUT; | ||
443 | } else { | ||
444 | /* | ||
445 | * descriptor_status is still BUSY | ||
446 | */ | ||
447 | cpu_relax(); | ||
448 | } | ||
449 | } | ||
450 | bcp->conseccompletes++; | ||
451 | return FLUSH_COMPLETE; | ||
452 | } | ||
453 | |||
454 | static inline cycles_t | ||
455 | sec_2_cycles(unsigned long sec) | ||
456 | { | ||
457 | unsigned long ns; | ||
458 | cycles_t cyc; | ||
459 | |||
460 | ns = sec * 1000000000; | ||
461 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
462 | return cyc; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * conditionally add 1 to *v, unless *v is >= u | ||
467 | * return 0 if we cannot add 1 to *v because it is >= u | ||
468 | * return 1 if we can add 1 to *v because it is < u | ||
469 | * the add is atomic | ||
470 | * | ||
471 | * This is close to atomic_add_unless(), but this allows the 'u' value | ||
472 | * to be lowered below the current 'v'. atomic_add_unless can only stop | ||
473 | * on equal. | ||
474 | */ | ||
475 | static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | ||
476 | { | ||
477 | spin_lock(lock); | ||
478 | if (atomic_read(v) >= u) { | ||
479 | spin_unlock(lock); | ||
480 | return 0; | ||
481 | } | ||
482 | atomic_inc(v); | ||
483 | spin_unlock(lock); | ||
484 | return 1; | ||
485 | } | ||
486 | |||
487 | /* | ||
488 | * Our retries are blocked by all destination swack resources being | ||
489 | * in use, and a timeout is pending. In that case hardware immediately | ||
490 | * returns the ERROR that looks like a destination timeout. | ||
491 | */ | ||
492 | static void | ||
493 | destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, | ||
494 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
495 | { | ||
496 | udelay(bcp->plugged_delay); | ||
497 | bcp->plugged_tries++; | ||
498 | if (bcp->plugged_tries >= bcp->plugsb4reset) { | ||
499 | bcp->plugged_tries = 0; | ||
500 | quiesce_local_uvhub(hmaster); | ||
501 | spin_lock(&hmaster->queue_lock); | ||
502 | uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
503 | spin_unlock(&hmaster->queue_lock); | ||
504 | end_uvhub_quiesce(hmaster); | ||
505 | bcp->ipi_attempts++; | ||
506 | stat->s_resets_plug++; | ||
507 | } | ||
508 | } | ||
509 | |||
510 | static void | ||
511 | destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, | ||
512 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
513 | { | ||
514 | hmaster->max_bau_concurrent = 1; | ||
515 | bcp->timeout_tries++; | ||
516 | if (bcp->timeout_tries >= bcp->timeoutsb4reset) { | ||
517 | bcp->timeout_tries = 0; | ||
518 | quiesce_local_uvhub(hmaster); | ||
519 | spin_lock(&hmaster->queue_lock); | ||
520 | uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
521 | spin_unlock(&hmaster->queue_lock); | ||
522 | end_uvhub_quiesce(hmaster); | ||
523 | bcp->ipi_attempts++; | ||
524 | stat->s_resets_timeout++; | ||
525 | } | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Completions are taking a very long time due to a congested numalink | ||
530 | * network. | ||
531 | */ | ||
532 | static void | ||
533 | disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) | ||
534 | { | ||
535 | int tcpu; | ||
536 | struct bau_control *tbcp; | ||
537 | |||
538 | /* let only one cpu do this disabling */ | ||
539 | spin_lock(&disable_lock); | ||
540 | if (!baudisabled && bcp->period_requests && | ||
541 | ((bcp->period_time / bcp->period_requests) > congested_cycles)) { | ||
542 | /* it becomes this cpu's job to turn on the use of the | ||
543 | BAU again */ | ||
544 | baudisabled = 1; | ||
545 | bcp->set_bau_off = 1; | ||
546 | bcp->set_bau_on_time = get_cycles() + | ||
547 | sec_2_cycles(bcp->congested_period); | ||
548 | stat->s_bau_disabled++; | ||
549 | for_each_present_cpu(tcpu) { | ||
550 | tbcp = &per_cpu(bau_control, tcpu); | ||
551 | tbcp->baudisabled = 1; | ||
552 | } | ||
553 | } | ||
554 | spin_unlock(&disable_lock); | ||
555 | } | ||
556 | |||
557 | /** | ||
558 | * uv_flush_send_and_wait | ||
559 | * | ||
560 | * Send a broadcast and wait for it to complete. | ||
561 | * | ||
562 | * The flush_mask contains the cpus the broadcast is to be sent to including | ||
563 | * cpus that are on the local uvhub. | ||
564 | * | ||
565 | * Returns 0 if all flushing represented in the mask was done. | ||
566 | * Returns 1 if it gives up entirely and the original cpu mask is to be | ||
567 | * returned to the kernel. | ||
568 | */ | ||
569 | int uv_flush_send_and_wait(struct bau_desc *bau_desc, | ||
570 | struct cpumask *flush_mask, struct bau_control *bcp) | ||
571 | { | ||
572 | int right_shift; | ||
573 | int completion_status = 0; | ||
574 | int seq_number = 0; | ||
575 | long try = 0; | ||
576 | int cpu = bcp->uvhub_cpu; | ||
577 | int this_cpu = bcp->cpu; | ||
578 | unsigned long mmr_offset; | ||
579 | unsigned long index; | ||
580 | cycles_t time1; | ||
581 | cycles_t time2; | ||
582 | cycles_t elapsed; | ||
583 | struct ptc_stats *stat = bcp->statp; | ||
584 | struct bau_control *smaster = bcp->socket_master; | ||
585 | struct bau_control *hmaster = bcp->uvhub_master; | ||
586 | |||
587 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
588 | &hmaster->active_descriptor_count, | ||
589 | hmaster->max_bau_concurrent)) { | ||
590 | stat->s_throttles++; | ||
591 | do { | ||
592 | cpu_relax(); | ||
593 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | ||
594 | &hmaster->active_descriptor_count, | ||
595 | hmaster->max_bau_concurrent)); | ||
596 | } | ||
597 | while (hmaster->uvhub_quiesce) | ||
598 | cpu_relax(); | ||
599 | |||
600 | if (cpu < UV_CPUS_PER_ACT_STATUS) { | ||
601 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0; | ||
602 | right_shift = cpu * UV_ACT_STATUS_SIZE; | ||
603 | } else { | ||
604 | mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1; | ||
605 | right_shift = | ||
606 | ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE); | ||
607 | } | ||
608 | time1 = get_cycles(); | ||
609 | do { | ||
610 | if (try == 0) { | ||
611 | bau_desc->header.msg_type = MSG_REGULAR; | ||
612 | seq_number = bcp->message_number++; | ||
613 | } else { | ||
614 | bau_desc->header.msg_type = MSG_RETRY; | ||
615 | stat->s_retry_messages++; | ||
616 | } | ||
617 | bau_desc->header.sequence = seq_number; | ||
618 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | ||
619 | bcp->uvhub_cpu; | ||
620 | bcp->send_message = get_cycles(); | ||
621 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | ||
622 | try++; | ||
623 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | ||
624 | right_shift, this_cpu, bcp, smaster, try); | ||
625 | |||
626 | if (completion_status == FLUSH_RETRY_PLUGGED) { | ||
627 | destination_plugged(bau_desc, bcp, hmaster, stat); | ||
628 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { | ||
629 | destination_timeout(bau_desc, bcp, hmaster, stat); | ||
630 | } | ||
631 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { | ||
632 | bcp->ipi_attempts = 0; | ||
633 | completion_status = FLUSH_GIVEUP; | ||
634 | break; | ||
635 | } | ||
636 | cpu_relax(); | ||
637 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || | ||
638 | (completion_status == FLUSH_RETRY_TIMEOUT)); | ||
639 | time2 = get_cycles(); | ||
640 | bcp->plugged_tries = 0; | ||
641 | bcp->timeout_tries = 0; | ||
642 | if ((completion_status == FLUSH_COMPLETE) && | ||
643 | (bcp->conseccompletes > bcp->complete_threshold) && | ||
644 | (hmaster->max_bau_concurrent < | ||
645 | hmaster->max_bau_concurrent_constant)) | ||
646 | hmaster->max_bau_concurrent++; | ||
647 | while (hmaster->uvhub_quiesce) | ||
648 | cpu_relax(); | ||
649 | atomic_dec(&hmaster->active_descriptor_count); | ||
650 | if (time2 > time1) { | ||
651 | elapsed = time2 - time1; | ||
652 | stat->s_time += elapsed; | ||
653 | if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { | ||
654 | bcp->period_requests++; | ||
655 | bcp->period_time += elapsed; | ||
656 | if ((elapsed > congested_cycles) && | ||
657 | (bcp->period_requests > bcp->congested_reps)) { | ||
658 | disable_for_congestion(bcp, stat); | ||
659 | } | ||
660 | } | ||
661 | } else | ||
662 | stat->s_requestor--; | ||
663 | if (completion_status == FLUSH_COMPLETE && try > 1) | ||
664 | stat->s_retriesok++; | ||
665 | else if (completion_status == FLUSH_GIVEUP) { | ||
666 | stat->s_giveup++; | ||
667 | return 1; | ||
668 | } | ||
669 | return 0; | ||
670 | } | ||
671 | |||
672 | /** | ||
673 | * uv_flush_tlb_others - globally purge translation cache of a virtual | ||
674 | * address or all TLB's | ||
675 | * @cpumask: mask of all cpu's in which the address is to be removed | ||
676 | * @mm: mm_struct containing virtual address range | ||
677 | * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) | ||
678 | * @cpu: the current cpu | ||
679 | * | ||
680 | * This is the entry point for initiating any UV global TLB shootdown. | ||
681 | * | ||
682 | * Purges the translation caches of all specified processors of the given | ||
683 | * virtual address, or purges all TLB's on specified processors. | ||
684 | * | ||
685 | * The caller has derived the cpumask from the mm_struct. This function | ||
686 | * is called only if there are bits set in the mask. (e.g. flush_tlb_page()) | ||
687 | * | ||
688 | * The cpumask is converted into a uvhubmask of the uvhubs containing | ||
689 | * those cpus. | ||
690 | * | ||
691 | * Note that this function should be called with preemption disabled. | ||
692 | * | ||
693 | * Returns NULL if all remote flushing was done. | ||
694 | * Returns pointer to cpumask if some remote flushing remains to be | ||
695 | * done. The returned pointer is valid till preemption is re-enabled. | ||
696 | */ | ||
697 | const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | ||
698 | struct mm_struct *mm, | ||
699 | unsigned long va, unsigned int cpu) | ||
700 | { | ||
701 | int tcpu; | ||
702 | int uvhub; | ||
703 | int locals = 0; | ||
704 | int remotes = 0; | ||
705 | int hubs = 0; | ||
706 | struct bau_desc *bau_desc; | ||
707 | struct cpumask *flush_mask; | ||
708 | struct ptc_stats *stat; | ||
709 | struct bau_control *bcp; | ||
710 | struct bau_control *tbcp; | ||
711 | |||
712 | /* kernel was booted 'nobau' */ | ||
713 | if (nobau) | ||
714 | return cpumask; | ||
715 | |||
716 | bcp = &per_cpu(bau_control, cpu); | ||
717 | stat = bcp->statp; | ||
718 | |||
719 | /* bau was disabled due to slow response */ | ||
720 | if (bcp->baudisabled) { | ||
721 | /* the cpu that disabled it must re-enable it */ | ||
722 | if (bcp->set_bau_off) { | ||
723 | if (get_cycles() >= bcp->set_bau_on_time) { | ||
724 | stat->s_bau_reenabled++; | ||
725 | baudisabled = 0; | ||
726 | for_each_present_cpu(tcpu) { | ||
727 | tbcp = &per_cpu(bau_control, tcpu); | ||
728 | tbcp->baudisabled = 0; | ||
729 | tbcp->period_requests = 0; | ||
730 | tbcp->period_time = 0; | ||
731 | } | ||
732 | } | ||
733 | } | ||
734 | return cpumask; | ||
735 | } | ||
736 | |||
737 | /* | ||
738 | * Each sending cpu has a per-cpu mask which it fills from the caller's | ||
739 | * cpu mask. All cpus are converted to uvhubs and copied to the | ||
740 | * activation descriptor. | ||
741 | */ | ||
742 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | ||
743 | /* don't actually do a shootdown of the local cpu */ | ||
744 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | ||
745 | if (cpu_isset(cpu, *cpumask)) | ||
746 | stat->s_ntargself++; | ||
747 | |||
748 | bau_desc = bcp->descriptor_base; | ||
749 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | ||
750 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | ||
751 | |||
752 | /* cpu statistics */ | ||
753 | for_each_cpu(tcpu, flush_mask) { | ||
754 | uvhub = uv_cpu_to_blade_id(tcpu); | ||
755 | bau_uvhub_set(uvhub, &bau_desc->distribution); | ||
756 | if (uvhub == bcp->uvhub) | ||
757 | locals++; | ||
758 | else | ||
759 | remotes++; | ||
760 | } | ||
761 | if ((locals + remotes) == 0) | ||
762 | return NULL; | ||
763 | stat->s_requestor++; | ||
764 | stat->s_ntargcpu += remotes + locals; | ||
765 | stat->s_ntargremotes += remotes; | ||
766 | stat->s_ntarglocals += locals; | ||
767 | remotes = bau_uvhub_weight(&bau_desc->distribution); | ||
768 | |||
769 | /* uvhub statistics */ | ||
770 | hubs = bau_uvhub_weight(&bau_desc->distribution); | ||
771 | if (locals) { | ||
772 | stat->s_ntarglocaluvhub++; | ||
773 | stat->s_ntargremoteuvhub += (hubs - 1); | ||
774 | } else | ||
775 | stat->s_ntargremoteuvhub += hubs; | ||
776 | stat->s_ntarguvhub += hubs; | ||
777 | if (hubs >= 16) | ||
778 | stat->s_ntarguvhub16++; | ||
779 | else if (hubs >= 8) | ||
780 | stat->s_ntarguvhub8++; | ||
781 | else if (hubs >= 4) | ||
782 | stat->s_ntarguvhub4++; | ||
783 | else if (hubs >= 2) | ||
784 | stat->s_ntarguvhub2++; | ||
785 | else | ||
786 | stat->s_ntarguvhub1++; | ||
787 | |||
788 | bau_desc->payload.address = va; | ||
789 | bau_desc->payload.sending_cpu = cpu; | ||
790 | |||
791 | /* | ||
792 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, | ||
793 | * or 1 if it gave up and the original cpumask should be returned. | ||
794 | */ | ||
795 | if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) | ||
796 | return NULL; | ||
797 | else | ||
798 | return cpumask; | ||
799 | } | ||
800 | |||
801 | /* | ||
802 | * The BAU message interrupt comes here. (registered by set_intr_gate) | ||
803 | * See entry_64.S | ||
804 | * | ||
805 | * We received a broadcast assist message. | ||
806 | * | ||
807 | * Interrupts are disabled; this interrupt could represent | ||
808 | * the receipt of several messages. | ||
809 | * | ||
810 | * All cores/threads on this hub get this interrupt. | ||
811 | * The last one to see it does the software ack. | ||
812 | * (the resource will not be freed until noninterruptable cpus see this | ||
813 | * interrupt; hardware may timeout the s/w ack and reply ERROR) | ||
814 | */ | ||
815 | void uv_bau_message_interrupt(struct pt_regs *regs) | ||
816 | { | ||
817 | int count = 0; | ||
818 | cycles_t time_start; | ||
819 | struct bau_payload_queue_entry *msg; | ||
820 | struct bau_control *bcp; | ||
821 | struct ptc_stats *stat; | ||
822 | struct msg_desc msgdesc; | ||
823 | |||
824 | time_start = get_cycles(); | ||
825 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
826 | stat = bcp->statp; | ||
827 | msgdesc.va_queue_first = bcp->va_queue_first; | ||
828 | msgdesc.va_queue_last = bcp->va_queue_last; | ||
829 | msg = bcp->bau_msg_head; | ||
830 | while (msg->sw_ack_vector) { | ||
831 | count++; | ||
832 | msgdesc.msg_slot = msg - msgdesc.va_queue_first; | ||
833 | msgdesc.sw_ack_slot = ffs(msg->sw_ack_vector) - 1; | ||
834 | msgdesc.msg = msg; | ||
835 | uv_bau_process_message(&msgdesc, bcp); | ||
836 | msg++; | ||
837 | if (msg > msgdesc.va_queue_last) | ||
838 | msg = msgdesc.va_queue_first; | ||
839 | bcp->bau_msg_head = msg; | ||
840 | } | ||
841 | stat->d_time += (get_cycles() - time_start); | ||
842 | if (!count) | ||
843 | stat->d_nomsg++; | ||
844 | else if (count > 1) | ||
845 | stat->d_multmsg++; | ||
846 | ack_APIC_irq(); | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * uv_enable_timeouts | ||
851 | * | ||
852 | * Each target uvhub (i.e. a uvhub that has no cpu's) needs to have | ||
853 | * shootdown message timeouts enabled. The timeout does not cause | ||
854 | * an interrupt, but causes an error message to be returned to | ||
855 | * the sender. | ||
856 | */ | ||
857 | static void uv_enable_timeouts(void) | ||
858 | { | ||
859 | int uvhub; | ||
860 | int nuvhubs; | ||
861 | int pnode; | ||
862 | unsigned long mmr_image; | ||
863 | |||
864 | nuvhubs = uv_num_possible_blades(); | ||
865 | |||
866 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | ||
867 | if (!uv_blade_nr_possible_cpus(uvhub)) | ||
868 | continue; | ||
869 | |||
870 | pnode = uv_blade_to_pnode(uvhub); | ||
871 | mmr_image = | ||
872 | uv_read_global_mmr64(pnode, UVH_LB_BAU_MISC_CONTROL); | ||
873 | /* | ||
874 | * Set the timeout period and then lock it in, in three | ||
875 | * steps; captures and locks in the period. | ||
876 | * | ||
877 | * To program the period, the SOFT_ACK_MODE must be off. | ||
878 | */ | ||
879 | mmr_image &= ~((unsigned long)1 << | ||
880 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); | ||
881 | uv_write_global_mmr64 | ||
882 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | ||
883 | /* | ||
884 | * Set the 4-bit period. | ||
885 | */ | ||
886 | mmr_image &= ~((unsigned long)0xf << | ||
887 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); | ||
888 | mmr_image |= (UV_INTD_SOFT_ACK_TIMEOUT_PERIOD << | ||
889 | UVH_LB_BAU_MISC_CONTROL_INTD_SOFT_ACK_TIMEOUT_PERIOD_SHFT); | ||
890 | uv_write_global_mmr64 | ||
891 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | ||
892 | /* | ||
893 | * Subsequent reversals of the timebase bit (3) cause an | ||
894 | * immediate timeout of one or all INTD resources as | ||
895 | * indicated in bits 2:0 (7 causes all of them to timeout). | ||
896 | */ | ||
897 | mmr_image |= ((unsigned long)1 << | ||
898 | UVH_LB_BAU_MISC_CONTROL_ENABLE_INTD_SOFT_ACK_MODE_SHFT); | ||
899 | uv_write_global_mmr64 | ||
900 | (pnode, UVH_LB_BAU_MISC_CONTROL, mmr_image); | ||
901 | } | ||
902 | } | ||
903 | |||
904 | static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset) | ||
905 | { | ||
906 | if (*offset < num_possible_cpus()) | ||
907 | return offset; | ||
908 | return NULL; | ||
909 | } | ||
910 | |||
911 | static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset) | ||
912 | { | ||
913 | (*offset)++; | ||
914 | if (*offset < num_possible_cpus()) | ||
915 | return offset; | ||
916 | return NULL; | ||
917 | } | ||
918 | |||
919 | static void uv_ptc_seq_stop(struct seq_file *file, void *data) | ||
920 | { | ||
921 | } | ||
922 | |||
923 | static inline unsigned long long | ||
924 | microsec_2_cycles(unsigned long microsec) | ||
925 | { | ||
926 | unsigned long ns; | ||
927 | unsigned long long cyc; | ||
928 | |||
929 | ns = microsec * 1000; | ||
930 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
931 | return cyc; | ||
932 | } | ||
933 | |||
934 | /* | ||
935 | * Display the statistics thru /proc. | ||
936 | * 'data' points to the cpu number | ||
937 | */ | ||
938 | static int uv_ptc_seq_show(struct seq_file *file, void *data) | ||
939 | { | ||
940 | struct ptc_stats *stat; | ||
941 | int cpu; | ||
942 | |||
943 | cpu = *(loff_t *)data; | ||
944 | |||
945 | if (!cpu) { | ||
946 | seq_printf(file, | ||
947 | "# cpu sent stime self locals remotes ncpus localhub "); | ||
948 | seq_printf(file, | ||
949 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); | ||
950 | seq_printf(file, | ||
951 | "numuvhubs4 numuvhubs2 numuvhubs1 dto "); | ||
952 | seq_printf(file, | ||
953 | "retries rok resetp resett giveup sto bz throt "); | ||
954 | seq_printf(file, | ||
955 | "sw_ack recv rtime all "); | ||
956 | seq_printf(file, | ||
957 | "one mult none retry canc nocan reset rcan "); | ||
958 | seq_printf(file, | ||
959 | "disable enable\n"); | ||
960 | } | ||
961 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | ||
962 | stat = &per_cpu(ptcstats, cpu); | ||
963 | /* source side statistics */ | ||
964 | seq_printf(file, | ||
965 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
966 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | ||
967 | stat->s_ntargself, stat->s_ntarglocals, | ||
968 | stat->s_ntargremotes, stat->s_ntargcpu, | ||
969 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | ||
970 | stat->s_ntarguvhub, stat->s_ntarguvhub16); | ||
971 | seq_printf(file, "%ld %ld %ld %ld %ld ", | ||
972 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | ||
973 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | ||
974 | stat->s_dtimeout); | ||
975 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | ||
976 | stat->s_retry_messages, stat->s_retriesok, | ||
977 | stat->s_resets_plug, stat->s_resets_timeout, | ||
978 | stat->s_giveup, stat->s_stimeout, | ||
979 | stat->s_busy, stat->s_throttles); | ||
980 | |||
981 | /* destination side statistics */ | ||
982 | seq_printf(file, | ||
983 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | ||
984 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), | ||
985 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), | ||
986 | stat->d_requestee, cycles_2_us(stat->d_time), | ||
987 | stat->d_alltlb, stat->d_onetlb, stat->d_multmsg, | ||
988 | stat->d_nomsg, stat->d_retries, stat->d_canceled, | ||
989 | stat->d_nocanceled, stat->d_resets, | ||
990 | stat->d_rcanceled); | ||
991 | seq_printf(file, "%ld %ld\n", | ||
992 | stat->s_bau_disabled, stat->s_bau_reenabled); | ||
993 | } | ||
994 | |||
995 | return 0; | ||
996 | } | ||
997 | |||
998 | /* | ||
999 | * Display the tunables thru debugfs | ||
1000 | */ | ||
1001 | static ssize_t tunables_read(struct file *file, char __user *userbuf, | ||
1002 | size_t count, loff_t *ppos) | ||
1003 | { | ||
1004 | char buf[300]; | ||
1005 | int ret; | ||
1006 | |||
1007 | ret = snprintf(buf, 300, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | ||
1008 | "max_bau_concurrent plugged_delay plugsb4reset", | ||
1009 | "timeoutsb4reset ipi_reset_limit complete_threshold", | ||
1010 | "congested_response_us congested_reps congested_period", | ||
1011 | max_bau_concurrent, plugged_delay, plugsb4reset, | ||
1012 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | ||
1013 | congested_response_us, congested_reps, congested_period); | ||
1014 | |||
1015 | return simple_read_from_buffer(userbuf, count, ppos, buf, ret); | ||
1016 | } | ||
1017 | |||
1018 | /* | ||
1019 | * -1: resetf the statistics | ||
1020 | * 0: display meaning of the statistics | ||
1021 | */ | ||
1022 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | ||
1023 | size_t count, loff_t *data) | ||
1024 | { | ||
1025 | int cpu; | ||
1026 | long input_arg; | ||
1027 | char optstr[64]; | ||
1028 | struct ptc_stats *stat; | ||
1029 | |||
1030 | if (count == 0 || count > sizeof(optstr)) | ||
1031 | return -EINVAL; | ||
1032 | if (copy_from_user(optstr, user, count)) | ||
1033 | return -EFAULT; | ||
1034 | optstr[count - 1] = '\0'; | ||
1035 | if (strict_strtol(optstr, 10, &input_arg) < 0) { | ||
1036 | printk(KERN_DEBUG "%s is invalid\n", optstr); | ||
1037 | return -EINVAL; | ||
1038 | } | ||
1039 | |||
1040 | if (input_arg == 0) { | ||
1041 | printk(KERN_DEBUG "# cpu: cpu number\n"); | ||
1042 | printk(KERN_DEBUG "Sender statistics:\n"); | ||
1043 | printk(KERN_DEBUG | ||
1044 | "sent: number of shootdown messages sent\n"); | ||
1045 | printk(KERN_DEBUG | ||
1046 | "stime: time spent sending messages\n"); | ||
1047 | printk(KERN_DEBUG | ||
1048 | "numuvhubs: number of hubs targeted with shootdown\n"); | ||
1049 | printk(KERN_DEBUG | ||
1050 | "numuvhubs16: number times 16 or more hubs targeted\n"); | ||
1051 | printk(KERN_DEBUG | ||
1052 | "numuvhubs8: number times 8 or more hubs targeted\n"); | ||
1053 | printk(KERN_DEBUG | ||
1054 | "numuvhubs4: number times 4 or more hubs targeted\n"); | ||
1055 | printk(KERN_DEBUG | ||
1056 | "numuvhubs2: number times 2 or more hubs targeted\n"); | ||
1057 | printk(KERN_DEBUG | ||
1058 | "numuvhubs1: number times 1 hub targeted\n"); | ||
1059 | printk(KERN_DEBUG | ||
1060 | "numcpus: number of cpus targeted with shootdown\n"); | ||
1061 | printk(KERN_DEBUG | ||
1062 | "dto: number of destination timeouts\n"); | ||
1063 | printk(KERN_DEBUG | ||
1064 | "retries: destination timeout retries sent\n"); | ||
1065 | printk(KERN_DEBUG | ||
1066 | "rok: : destination timeouts successfully retried\n"); | ||
1067 | printk(KERN_DEBUG | ||
1068 | "resetp: ipi-style resource resets for plugs\n"); | ||
1069 | printk(KERN_DEBUG | ||
1070 | "resett: ipi-style resource resets for timeouts\n"); | ||
1071 | printk(KERN_DEBUG | ||
1072 | "giveup: fall-backs to ipi-style shootdowns\n"); | ||
1073 | printk(KERN_DEBUG | ||
1074 | "sto: number of source timeouts\n"); | ||
1075 | printk(KERN_DEBUG | ||
1076 | "bz: number of stay-busy's\n"); | ||
1077 | printk(KERN_DEBUG | ||
1078 | "throt: number times spun in throttle\n"); | ||
1079 | printk(KERN_DEBUG "Destination side statistics:\n"); | ||
1080 | printk(KERN_DEBUG | ||
1081 | "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n"); | ||
1082 | printk(KERN_DEBUG | ||
1083 | "recv: shootdown messages received\n"); | ||
1084 | printk(KERN_DEBUG | ||
1085 | "rtime: time spent processing messages\n"); | ||
1086 | printk(KERN_DEBUG | ||
1087 | "all: shootdown all-tlb messages\n"); | ||
1088 | printk(KERN_DEBUG | ||
1089 | "one: shootdown one-tlb messages\n"); | ||
1090 | printk(KERN_DEBUG | ||
1091 | "mult: interrupts that found multiple messages\n"); | ||
1092 | printk(KERN_DEBUG | ||
1093 | "none: interrupts that found no messages\n"); | ||
1094 | printk(KERN_DEBUG | ||
1095 | "retry: number of retry messages processed\n"); | ||
1096 | printk(KERN_DEBUG | ||
1097 | "canc: number messages canceled by retries\n"); | ||
1098 | printk(KERN_DEBUG | ||
1099 | "nocan: number retries that found nothing to cancel\n"); | ||
1100 | printk(KERN_DEBUG | ||
1101 | "reset: number of ipi-style reset requests processed\n"); | ||
1102 | printk(KERN_DEBUG | ||
1103 | "rcan: number messages canceled by reset requests\n"); | ||
1104 | printk(KERN_DEBUG | ||
1105 | "disable: number times use of the BAU was disabled\n"); | ||
1106 | printk(KERN_DEBUG | ||
1107 | "enable: number times use of the BAU was re-enabled\n"); | ||
1108 | } else if (input_arg == -1) { | ||
1109 | for_each_present_cpu(cpu) { | ||
1110 | stat = &per_cpu(ptcstats, cpu); | ||
1111 | memset(stat, 0, sizeof(struct ptc_stats)); | ||
1112 | } | ||
1113 | } | ||
1114 | |||
1115 | return count; | ||
1116 | } | ||
1117 | |||
1118 | static int local_atoi(const char *name) | ||
1119 | { | ||
1120 | int val = 0; | ||
1121 | |||
1122 | for (;; name++) { | ||
1123 | switch (*name) { | ||
1124 | case '0' ... '9': | ||
1125 | val = 10*val+(*name-'0'); | ||
1126 | break; | ||
1127 | default: | ||
1128 | return val; | ||
1129 | } | ||
1130 | } | ||
1131 | } | ||
1132 | |||
1133 | /* | ||
1134 | * set the tunables | ||
1135 | * 0 values reset them to defaults | ||
1136 | */ | ||
1137 | static ssize_t tunables_write(struct file *file, const char __user *user, | ||
1138 | size_t count, loff_t *data) | ||
1139 | { | ||
1140 | int cpu; | ||
1141 | int cnt = 0; | ||
1142 | int val; | ||
1143 | char *p; | ||
1144 | char *q; | ||
1145 | char instr[64]; | ||
1146 | struct bau_control *bcp; | ||
1147 | |||
1148 | if (count == 0 || count > sizeof(instr)-1) | ||
1149 | return -EINVAL; | ||
1150 | if (copy_from_user(instr, user, count)) | ||
1151 | return -EFAULT; | ||
1152 | |||
1153 | instr[count] = '\0'; | ||
1154 | /* count the fields */ | ||
1155 | p = instr + strspn(instr, WHITESPACE); | ||
1156 | q = p; | ||
1157 | for (; *p; p = q + strspn(q, WHITESPACE)) { | ||
1158 | q = p + strcspn(p, WHITESPACE); | ||
1159 | cnt++; | ||
1160 | if (q == p) | ||
1161 | break; | ||
1162 | } | ||
1163 | if (cnt != 9) { | ||
1164 | printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); | ||
1165 | return -EINVAL; | ||
1166 | } | ||
1167 | |||
1168 | p = instr + strspn(instr, WHITESPACE); | ||
1169 | q = p; | ||
1170 | for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { | ||
1171 | q = p + strcspn(p, WHITESPACE); | ||
1172 | val = local_atoi(p); | ||
1173 | switch (cnt) { | ||
1174 | case 0: | ||
1175 | if (val == 0) { | ||
1176 | max_bau_concurrent = MAX_BAU_CONCURRENT; | ||
1177 | max_bau_concurrent_constant = | ||
1178 | MAX_BAU_CONCURRENT; | ||
1179 | continue; | ||
1180 | } | ||
1181 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
1182 | if (val < 1 || val > bcp->cpus_in_uvhub) { | ||
1183 | printk(KERN_DEBUG | ||
1184 | "Error: BAU max concurrent %d is invalid\n", | ||
1185 | val); | ||
1186 | return -EINVAL; | ||
1187 | } | ||
1188 | max_bau_concurrent = val; | ||
1189 | max_bau_concurrent_constant = val; | ||
1190 | continue; | ||
1191 | case 1: | ||
1192 | if (val == 0) | ||
1193 | plugged_delay = PLUGGED_DELAY; | ||
1194 | else | ||
1195 | plugged_delay = val; | ||
1196 | continue; | ||
1197 | case 2: | ||
1198 | if (val == 0) | ||
1199 | plugsb4reset = PLUGSB4RESET; | ||
1200 | else | ||
1201 | plugsb4reset = val; | ||
1202 | continue; | ||
1203 | case 3: | ||
1204 | if (val == 0) | ||
1205 | timeoutsb4reset = TIMEOUTSB4RESET; | ||
1206 | else | ||
1207 | timeoutsb4reset = val; | ||
1208 | continue; | ||
1209 | case 4: | ||
1210 | if (val == 0) | ||
1211 | ipi_reset_limit = IPI_RESET_LIMIT; | ||
1212 | else | ||
1213 | ipi_reset_limit = val; | ||
1214 | continue; | ||
1215 | case 5: | ||
1216 | if (val == 0) | ||
1217 | complete_threshold = COMPLETE_THRESHOLD; | ||
1218 | else | ||
1219 | complete_threshold = val; | ||
1220 | continue; | ||
1221 | case 6: | ||
1222 | if (val == 0) | ||
1223 | congested_response_us = CONGESTED_RESPONSE_US; | ||
1224 | else | ||
1225 | congested_response_us = val; | ||
1226 | continue; | ||
1227 | case 7: | ||
1228 | if (val == 0) | ||
1229 | congested_reps = CONGESTED_REPS; | ||
1230 | else | ||
1231 | congested_reps = val; | ||
1232 | continue; | ||
1233 | case 8: | ||
1234 | if (val == 0) | ||
1235 | congested_period = CONGESTED_PERIOD; | ||
1236 | else | ||
1237 | congested_period = val; | ||
1238 | continue; | ||
1239 | } | ||
1240 | if (q == p) | ||
1241 | break; | ||
1242 | } | ||
1243 | for_each_present_cpu(cpu) { | ||
1244 | bcp = &per_cpu(bau_control, cpu); | ||
1245 | bcp->max_bau_concurrent = max_bau_concurrent; | ||
1246 | bcp->max_bau_concurrent_constant = max_bau_concurrent; | ||
1247 | bcp->plugged_delay = plugged_delay; | ||
1248 | bcp->plugsb4reset = plugsb4reset; | ||
1249 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1250 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1251 | bcp->complete_threshold = complete_threshold; | ||
1252 | bcp->congested_response_us = congested_response_us; | ||
1253 | bcp->congested_reps = congested_reps; | ||
1254 | bcp->congested_period = congested_period; | ||
1255 | } | ||
1256 | return count; | ||
1257 | } | ||
1258 | |||
1259 | static const struct seq_operations uv_ptc_seq_ops = { | ||
1260 | .start = uv_ptc_seq_start, | ||
1261 | .next = uv_ptc_seq_next, | ||
1262 | .stop = uv_ptc_seq_stop, | ||
1263 | .show = uv_ptc_seq_show | ||
1264 | }; | ||
1265 | |||
1266 | static int uv_ptc_proc_open(struct inode *inode, struct file *file) | ||
1267 | { | ||
1268 | return seq_open(file, &uv_ptc_seq_ops); | ||
1269 | } | ||
1270 | |||
1271 | static int tunables_open(struct inode *inode, struct file *file) | ||
1272 | { | ||
1273 | return 0; | ||
1274 | } | ||
1275 | |||
1276 | static const struct file_operations proc_uv_ptc_operations = { | ||
1277 | .open = uv_ptc_proc_open, | ||
1278 | .read = seq_read, | ||
1279 | .write = uv_ptc_proc_write, | ||
1280 | .llseek = seq_lseek, | ||
1281 | .release = seq_release, | ||
1282 | }; | ||
1283 | |||
1284 | static const struct file_operations tunables_fops = { | ||
1285 | .open = tunables_open, | ||
1286 | .read = tunables_read, | ||
1287 | .write = tunables_write, | ||
1288 | }; | ||
1289 | |||
1290 | static int __init uv_ptc_init(void) | ||
1291 | { | ||
1292 | struct proc_dir_entry *proc_uv_ptc; | ||
1293 | |||
1294 | if (!is_uv_system()) | ||
1295 | return 0; | ||
1296 | |||
1297 | proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, | ||
1298 | &proc_uv_ptc_operations); | ||
1299 | if (!proc_uv_ptc) { | ||
1300 | printk(KERN_ERR "unable to create %s proc entry\n", | ||
1301 | UV_PTC_BASENAME); | ||
1302 | return -EINVAL; | ||
1303 | } | ||
1304 | |||
1305 | tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); | ||
1306 | if (!tunables_dir) { | ||
1307 | printk(KERN_ERR "unable to create debugfs directory %s\n", | ||
1308 | UV_BAU_TUNABLES_DIR); | ||
1309 | return -EINVAL; | ||
1310 | } | ||
1311 | tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, | ||
1312 | tunables_dir, NULL, &tunables_fops); | ||
1313 | if (!tunables_file) { | ||
1314 | printk(KERN_ERR "unable to create debugfs file %s\n", | ||
1315 | UV_BAU_TUNABLES_FILE); | ||
1316 | return -EINVAL; | ||
1317 | } | ||
1318 | return 0; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * initialize the sending side's sending buffers | ||
1323 | */ | ||
1324 | static void | ||
1325 | uv_activation_descriptor_init(int node, int pnode) | ||
1326 | { | ||
1327 | int i; | ||
1328 | int cpu; | ||
1329 | unsigned long pa; | ||
1330 | unsigned long m; | ||
1331 | unsigned long n; | ||
1332 | struct bau_desc *bau_desc; | ||
1333 | struct bau_desc *bd2; | ||
1334 | struct bau_control *bcp; | ||
1335 | |||
1336 | /* | ||
1337 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) | ||
1338 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per uvhub | ||
1339 | */ | ||
1340 | bau_desc = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* | ||
1341 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); | ||
1342 | BUG_ON(!bau_desc); | ||
1343 | |||
1344 | pa = uv_gpa(bau_desc); /* need the real nasid*/ | ||
1345 | n = pa >> uv_nshift; | ||
1346 | m = pa & uv_mmask; | ||
1347 | |||
1348 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE, | ||
1349 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | ||
1350 | |||
1351 | /* | ||
1352 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | ||
1353 | * cpu even though we only use the first one; one descriptor can | ||
1354 | * describe a broadcast to 256 uv hubs. | ||
1355 | */ | ||
1356 | for (i = 0, bd2 = bau_desc; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); | ||
1357 | i++, bd2++) { | ||
1358 | memset(bd2, 0, sizeof(struct bau_desc)); | ||
1359 | bd2->header.sw_ack_flag = 1; | ||
1360 | /* | ||
1361 | * base_dest_nodeid is the nasid (pnode<<1) of the first uvhub | ||
1362 | * in the partition. The bit map will indicate uvhub numbers, | ||
1363 | * which are 0-N in a partition. Pnodes are unique system-wide. | ||
1364 | */ | ||
1365 | bd2->header.base_dest_nodeid = uv_partition_base_pnode << 1; | ||
1366 | bd2->header.dest_subnodeid = 0x10; /* the LB */ | ||
1367 | bd2->header.command = UV_NET_ENDPOINT_INTD; | ||
1368 | bd2->header.int_both = 1; | ||
1369 | /* | ||
1370 | * all others need to be set to zero: | ||
1371 | * fairness chaining multilevel count replied_to | ||
1372 | */ | ||
1373 | } | ||
1374 | for_each_present_cpu(cpu) { | ||
1375 | if (pnode != uv_blade_to_pnode(uv_cpu_to_blade_id(cpu))) | ||
1376 | continue; | ||
1377 | bcp = &per_cpu(bau_control, cpu); | ||
1378 | bcp->descriptor_base = bau_desc; | ||
1379 | } | ||
1380 | } | ||
1381 | |||
1382 | /* | ||
1383 | * initialize the destination side's receiving buffers | ||
1384 | * entered for each uvhub in the partition | ||
1385 | * - node is first node (kernel memory notion) on the uvhub | ||
1386 | * - pnode is the uvhub's physical identifier | ||
1387 | */ | ||
1388 | static void | ||
1389 | uv_payload_queue_init(int node, int pnode) | ||
1390 | { | ||
1391 | int pn; | ||
1392 | int cpu; | ||
1393 | char *cp; | ||
1394 | unsigned long pa; | ||
1395 | struct bau_payload_queue_entry *pqp; | ||
1396 | struct bau_payload_queue_entry *pqp_malloc; | ||
1397 | struct bau_control *bcp; | ||
1398 | |||
1399 | pqp = (struct bau_payload_queue_entry *) kmalloc_node( | ||
1400 | (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry), | ||
1401 | GFP_KERNEL, node); | ||
1402 | BUG_ON(!pqp); | ||
1403 | pqp_malloc = pqp; | ||
1404 | |||
1405 | cp = (char *)pqp + 31; | ||
1406 | pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5); | ||
1407 | |||
1408 | for_each_present_cpu(cpu) { | ||
1409 | if (pnode != uv_cpu_to_pnode(cpu)) | ||
1410 | continue; | ||
1411 | /* for every cpu on this pnode: */ | ||
1412 | bcp = &per_cpu(bau_control, cpu); | ||
1413 | bcp->va_queue_first = pqp; | ||
1414 | bcp->bau_msg_head = pqp; | ||
1415 | bcp->va_queue_last = pqp + (DEST_Q_SIZE - 1); | ||
1416 | } | ||
1417 | /* | ||
1418 | * need the pnode of where the memory was really allocated | ||
1419 | */ | ||
1420 | pa = uv_gpa(pqp); | ||
1421 | pn = pa >> uv_nshift; | ||
1422 | uv_write_global_mmr64(pnode, | ||
1423 | UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST, | ||
1424 | ((unsigned long)pn << UV_PAYLOADQ_PNODE_SHIFT) | | ||
1425 | uv_physnodeaddr(pqp)); | ||
1426 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL, | ||
1427 | uv_physnodeaddr(pqp)); | ||
1428 | uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST, | ||
1429 | (unsigned long) | ||
1430 | uv_physnodeaddr(pqp + (DEST_Q_SIZE - 1))); | ||
1431 | /* in effect, all msg_type's are set to MSG_NOOP */ | ||
1432 | memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE); | ||
1433 | } | ||
1434 | |||
1435 | /* | ||
1436 | * Initialization of each UV hub's structures | ||
1437 | */ | ||
1438 | static void __init uv_init_uvhub(int uvhub, int vector) | ||
1439 | { | ||
1440 | int node; | ||
1441 | int pnode; | ||
1442 | unsigned long apicid; | ||
1443 | |||
1444 | node = uvhub_to_first_node(uvhub); | ||
1445 | pnode = uv_blade_to_pnode(uvhub); | ||
1446 | uv_activation_descriptor_init(node, pnode); | ||
1447 | uv_payload_queue_init(node, pnode); | ||
1448 | /* | ||
1449 | * the below initialization can't be in firmware because the | ||
1450 | * messaging IRQ will be determined by the OS | ||
1451 | */ | ||
1452 | apicid = uvhub_to_first_apicid(uvhub); | ||
1453 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG, | ||
1454 | ((apicid << 32) | vector)); | ||
1455 | } | ||
1456 | |||
1457 | /* | ||
1458 | * We will set BAU_MISC_CONTROL with a timeout period. | ||
1459 | * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. | ||
1460 | * So the destination timeout period has be be calculated from them. | ||
1461 | */ | ||
1462 | static int | ||
1463 | calculate_destination_timeout(void) | ||
1464 | { | ||
1465 | unsigned long mmr_image; | ||
1466 | int mult1; | ||
1467 | int mult2; | ||
1468 | int index; | ||
1469 | int base; | ||
1470 | int ret; | ||
1471 | unsigned long ts_ns; | ||
1472 | |||
1473 | mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; | ||
1474 | mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); | ||
1475 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; | ||
1476 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); | ||
1477 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; | ||
1478 | base = timeout_base_ns[index]; | ||
1479 | ts_ns = base * mult1 * mult2; | ||
1480 | ret = ts_ns / 1000; | ||
1481 | return ret; | ||
1482 | } | ||
1483 | |||
1484 | /* | ||
1485 | * initialize the bau_control structure for each cpu | ||
1486 | */ | ||
1487 | static void __init uv_init_per_cpu(int nuvhubs) | ||
1488 | { | ||
1489 | int i; | ||
1490 | int cpu; | ||
1491 | int pnode; | ||
1492 | int uvhub; | ||
1493 | int have_hmaster; | ||
1494 | short socket = 0; | ||
1495 | unsigned short socket_mask; | ||
1496 | unsigned char *uvhub_mask; | ||
1497 | struct bau_control *bcp; | ||
1498 | struct uvhub_desc *bdp; | ||
1499 | struct socket_desc *sdp; | ||
1500 | struct bau_control *hmaster = NULL; | ||
1501 | struct bau_control *smaster = NULL; | ||
1502 | struct socket_desc { | ||
1503 | short num_cpus; | ||
1504 | short cpu_number[16]; | ||
1505 | }; | ||
1506 | struct uvhub_desc { | ||
1507 | unsigned short socket_mask; | ||
1508 | short num_cpus; | ||
1509 | short uvhub; | ||
1510 | short pnode; | ||
1511 | struct socket_desc socket[2]; | ||
1512 | }; | ||
1513 | struct uvhub_desc *uvhub_descs; | ||
1514 | |||
1515 | timeout_us = calculate_destination_timeout(); | ||
1516 | |||
1517 | uvhub_descs = (struct uvhub_desc *) | ||
1518 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); | ||
1519 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); | ||
1520 | uvhub_mask = kzalloc((nuvhubs+7)/8, GFP_KERNEL); | ||
1521 | for_each_present_cpu(cpu) { | ||
1522 | bcp = &per_cpu(bau_control, cpu); | ||
1523 | memset(bcp, 0, sizeof(struct bau_control)); | ||
1524 | pnode = uv_cpu_hub_info(cpu)->pnode; | ||
1525 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | ||
1526 | *(uvhub_mask + (uvhub/8)) |= (1 << (uvhub%8)); | ||
1527 | bdp = &uvhub_descs[uvhub]; | ||
1528 | bdp->num_cpus++; | ||
1529 | bdp->uvhub = uvhub; | ||
1530 | bdp->pnode = pnode; | ||
1531 | /* kludge: 'assuming' one node per socket, and assuming that | ||
1532 | disabling a socket just leaves a gap in node numbers */ | ||
1533 | socket = (cpu_to_node(cpu) & 1); | ||
1534 | bdp->socket_mask |= (1 << socket); | ||
1535 | sdp = &bdp->socket[socket]; | ||
1536 | sdp->cpu_number[sdp->num_cpus] = cpu; | ||
1537 | sdp->num_cpus++; | ||
1538 | } | ||
1539 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) { | ||
1540 | if (!(*(uvhub_mask + (uvhub/8)) & (1 << (uvhub%8)))) | ||
1541 | continue; | ||
1542 | have_hmaster = 0; | ||
1543 | bdp = &uvhub_descs[uvhub]; | ||
1544 | socket_mask = bdp->socket_mask; | ||
1545 | socket = 0; | ||
1546 | while (socket_mask) { | ||
1547 | if (!(socket_mask & 1)) | ||
1548 | goto nextsocket; | ||
1549 | sdp = &bdp->socket[socket]; | ||
1550 | for (i = 0; i < sdp->num_cpus; i++) { | ||
1551 | cpu = sdp->cpu_number[i]; | ||
1552 | bcp = &per_cpu(bau_control, cpu); | ||
1553 | bcp->cpu = cpu; | ||
1554 | if (i == 0) { | ||
1555 | smaster = bcp; | ||
1556 | if (!have_hmaster) { | ||
1557 | have_hmaster++; | ||
1558 | hmaster = bcp; | ||
1559 | } | ||
1560 | } | ||
1561 | bcp->cpus_in_uvhub = bdp->num_cpus; | ||
1562 | bcp->cpus_in_socket = sdp->num_cpus; | ||
1563 | bcp->socket_master = smaster; | ||
1564 | bcp->uvhub = bdp->uvhub; | ||
1565 | bcp->uvhub_master = hmaster; | ||
1566 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> | ||
1567 | blade_processor_id; | ||
1568 | } | ||
1569 | nextsocket: | ||
1570 | socket++; | ||
1571 | socket_mask = (socket_mask >> 1); | ||
1572 | } | ||
1573 | } | ||
1574 | kfree(uvhub_descs); | ||
1575 | kfree(uvhub_mask); | ||
1576 | for_each_present_cpu(cpu) { | ||
1577 | bcp = &per_cpu(bau_control, cpu); | ||
1578 | bcp->baudisabled = 0; | ||
1579 | bcp->statp = &per_cpu(ptcstats, cpu); | ||
1580 | /* time interval to catch a hardware stay-busy bug */ | ||
1581 | bcp->timeout_interval = microsec_2_cycles(2*timeout_us); | ||
1582 | bcp->max_bau_concurrent = max_bau_concurrent; | ||
1583 | bcp->max_bau_concurrent_constant = max_bau_concurrent; | ||
1584 | bcp->plugged_delay = plugged_delay; | ||
1585 | bcp->plugsb4reset = plugsb4reset; | ||
1586 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1587 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1588 | bcp->complete_threshold = complete_threshold; | ||
1589 | bcp->congested_response_us = congested_response_us; | ||
1590 | bcp->congested_reps = congested_reps; | ||
1591 | bcp->congested_period = congested_period; | ||
1592 | } | ||
1593 | } | ||
1594 | |||
1595 | /* | ||
1596 | * Initialization of BAU-related structures | ||
1597 | */ | ||
1598 | static int __init uv_bau_init(void) | ||
1599 | { | ||
1600 | int uvhub; | ||
1601 | int pnode; | ||
1602 | int nuvhubs; | ||
1603 | int cur_cpu; | ||
1604 | int vector; | ||
1605 | unsigned long mmr; | ||
1606 | |||
1607 | if (!is_uv_system()) | ||
1608 | return 0; | ||
1609 | |||
1610 | if (nobau) | ||
1611 | return 0; | ||
1612 | |||
1613 | for_each_possible_cpu(cur_cpu) | ||
1614 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | ||
1615 | GFP_KERNEL, cpu_to_node(cur_cpu)); | ||
1616 | |||
1617 | uv_nshift = uv_hub_info->m_val; | ||
1618 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; | ||
1619 | nuvhubs = uv_num_possible_blades(); | ||
1620 | spin_lock_init(&disable_lock); | ||
1621 | congested_cycles = microsec_2_cycles(congested_response_us); | ||
1622 | |||
1623 | uv_init_per_cpu(nuvhubs); | ||
1624 | |||
1625 | uv_partition_base_pnode = 0x7fffffff; | ||
1626 | for (uvhub = 0; uvhub < nuvhubs; uvhub++) | ||
1627 | if (uv_blade_nr_possible_cpus(uvhub) && | ||
1628 | (uv_blade_to_pnode(uvhub) < uv_partition_base_pnode)) | ||
1629 | uv_partition_base_pnode = uv_blade_to_pnode(uvhub); | ||
1630 | |||
1631 | vector = UV_BAU_MESSAGE; | ||
1632 | for_each_possible_blade(uvhub) | ||
1633 | if (uv_blade_nr_possible_cpus(uvhub)) | ||
1634 | uv_init_uvhub(uvhub, vector); | ||
1635 | |||
1636 | uv_enable_timeouts(); | ||
1637 | alloc_intr_gate(vector, uv_bau_message_intr1); | ||
1638 | |||
1639 | for_each_possible_blade(uvhub) { | ||
1640 | if (uv_blade_nr_possible_cpus(uvhub)) { | ||
1641 | pnode = uv_blade_to_pnode(uvhub); | ||
1642 | /* INIT the bau */ | ||
1643 | uv_write_global_mmr64(pnode, | ||
1644 | UVH_LB_BAU_SB_ACTIVATION_CONTROL, | ||
1645 | ((unsigned long)1 << 63)); | ||
1646 | mmr = 1; /* should be 1 to broadcast to both sockets */ | ||
1647 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, | ||
1648 | mmr); | ||
1649 | } | ||
1650 | } | ||
1651 | |||
1652 | return 0; | ||
1653 | } | ||
1654 | core_initcall(uv_bau_init); | ||
1655 | fs_initcall(uv_ptc_init); | ||