diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-13 13:38:37 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-13 13:38:37 -0400 |
commit | 36450e9c953b2a6838def5945de8ae508141e834 (patch) | |
tree | 13657a037a6f41bb915f5700a6b590bbe27ea167 /arch/x86/kernel | |
parent | 2f2c779583e9646097b57599f8efeb8eca7bd654 (diff) | |
parent | 93a7ca0c3ebe5d931126f1fb732cb9c4518383d4 (diff) |
Merge branch 'x86-uv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'x86-uv-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
x86, UV: Initialize BAU MMRs only on hubs with cpus
x86, UV: Modularize BAU send and wait
x86, UV: BAU broadcast to the local hub
x86, UV: Correct BAU regular message type
x86, UV: Remove BAU check for stay-busy
x86, UV: Correct BAU discovery of hubs and sockets
x86, UV: Correct BAU software acknowledge
x86, UV: BAU structure rearranging
x86, UV: Shorten access to BAU statistics structure
x86, UV: Disable BAU on network congestion
x86, UV: BAU tunables into a debugfs file
x86, UV: Calculate BAU destination timeout
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/tlb_uv.c | 756 |
1 files changed, 506 insertions, 250 deletions
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 7fea555929e2..59efb5390b37 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -8,6 +8,7 @@ | |||
8 | */ | 8 | */ |
9 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
10 | #include <linux/proc_fs.h> | 10 | #include <linux/proc_fs.h> |
11 | #include <linux/debugfs.h> | ||
11 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
12 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
13 | 14 | ||
@@ -22,19 +23,37 @@ | |||
22 | #include <asm/irq_vectors.h> | 23 | #include <asm/irq_vectors.h> |
23 | #include <asm/timer.h> | 24 | #include <asm/timer.h> |
24 | 25 | ||
25 | struct msg_desc { | 26 | /* timeouts in nanoseconds (indexed by UVH_AGING_PRESCALE_SEL urgency7 30:28) */ |
26 | struct bau_payload_queue_entry *msg; | 27 | static int timeout_base_ns[] = { |
27 | int msg_slot; | 28 | 20, |
28 | int sw_ack_slot; | 29 | 160, |
29 | struct bau_payload_queue_entry *va_queue_first; | 30 | 1280, |
30 | struct bau_payload_queue_entry *va_queue_last; | 31 | 10240, |
32 | 81920, | ||
33 | 655360, | ||
34 | 5242880, | ||
35 | 167772160 | ||
31 | }; | 36 | }; |
32 | 37 | static int timeout_us; | |
33 | #define UV_INTD_SOFT_ACK_TIMEOUT_PERIOD 0x000000000bUL | ||
34 | |||
35 | static int uv_bau_max_concurrent __read_mostly; | ||
36 | |||
37 | static int nobau; | 38 | static int nobau; |
39 | static int baudisabled; | ||
40 | static spinlock_t disable_lock; | ||
41 | static cycles_t congested_cycles; | ||
42 | |||
43 | /* tunables: */ | ||
44 | static int max_bau_concurrent = MAX_BAU_CONCURRENT; | ||
45 | static int max_bau_concurrent_constant = MAX_BAU_CONCURRENT; | ||
46 | static int plugged_delay = PLUGGED_DELAY; | ||
47 | static int plugsb4reset = PLUGSB4RESET; | ||
48 | static int timeoutsb4reset = TIMEOUTSB4RESET; | ||
49 | static int ipi_reset_limit = IPI_RESET_LIMIT; | ||
50 | static int complete_threshold = COMPLETE_THRESHOLD; | ||
51 | static int congested_response_us = CONGESTED_RESPONSE_US; | ||
52 | static int congested_reps = CONGESTED_REPS; | ||
53 | static int congested_period = CONGESTED_PERIOD; | ||
54 | static struct dentry *tunables_dir; | ||
55 | static struct dentry *tunables_file; | ||
56 | |||
38 | static int __init setup_nobau(char *arg) | 57 | static int __init setup_nobau(char *arg) |
39 | { | 58 | { |
40 | nobau = 1; | 59 | nobau = 1; |
@@ -52,10 +71,6 @@ static DEFINE_PER_CPU(struct ptc_stats, ptcstats); | |||
52 | static DEFINE_PER_CPU(struct bau_control, bau_control); | 71 | static DEFINE_PER_CPU(struct bau_control, bau_control); |
53 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); | 72 | static DEFINE_PER_CPU(cpumask_var_t, uv_flush_tlb_mask); |
54 | 73 | ||
55 | struct reset_args { | ||
56 | int sender; | ||
57 | }; | ||
58 | |||
59 | /* | 74 | /* |
60 | * Determine the first node on a uvhub. 'Nodes' are used for kernel | 75 | * Determine the first node on a uvhub. 'Nodes' are used for kernel |
61 | * memory allocation. | 76 | * memory allocation. |
@@ -126,7 +141,7 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, | |||
126 | struct ptc_stats *stat; | 141 | struct ptc_stats *stat; |
127 | 142 | ||
128 | msg = mdp->msg; | 143 | msg = mdp->msg; |
129 | stat = &per_cpu(ptcstats, bcp->cpu); | 144 | stat = bcp->statp; |
130 | stat->d_retries++; | 145 | stat->d_retries++; |
131 | /* | 146 | /* |
132 | * cancel any message from msg+1 to the retry itself | 147 | * cancel any message from msg+1 to the retry itself |
@@ -146,15 +161,14 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, | |||
146 | slot2 = msg2 - mdp->va_queue_first; | 161 | slot2 = msg2 - mdp->va_queue_first; |
147 | mmr = uv_read_local_mmr | 162 | mmr = uv_read_local_mmr |
148 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | 163 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); |
149 | msg_res = ((msg2->sw_ack_vector << 8) | | 164 | msg_res = msg2->sw_ack_vector; |
150 | msg2->sw_ack_vector); | ||
151 | /* | 165 | /* |
152 | * This is a message retry; clear the resources held | 166 | * This is a message retry; clear the resources held |
153 | * by the previous message only if they timed out. | 167 | * by the previous message only if they timed out. |
154 | * If it has not timed out we have an unexpected | 168 | * If it has not timed out we have an unexpected |
155 | * situation to report. | 169 | * situation to report. |
156 | */ | 170 | */ |
157 | if (mmr & (msg_res << 8)) { | 171 | if (mmr & (msg_res << UV_SW_ACK_NPENDING)) { |
158 | /* | 172 | /* |
159 | * is the resource timed out? | 173 | * is the resource timed out? |
160 | * make everyone ignore the cancelled message. | 174 | * make everyone ignore the cancelled message. |
@@ -164,9 +178,9 @@ static inline void uv_bau_process_retry_msg(struct msg_desc *mdp, | |||
164 | cancel_count++; | 178 | cancel_count++; |
165 | uv_write_local_mmr( | 179 | uv_write_local_mmr( |
166 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | 180 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, |
167 | (msg_res << 8) | msg_res); | 181 | (msg_res << UV_SW_ACK_NPENDING) | |
168 | } else | 182 | msg_res); |
169 | printk(KERN_INFO "note bau retry: no effect\n"); | 183 | } |
170 | } | 184 | } |
171 | } | 185 | } |
172 | if (!cancel_count) | 186 | if (!cancel_count) |
@@ -190,7 +204,7 @@ static void uv_bau_process_message(struct msg_desc *mdp, | |||
190 | * This must be a normal message, or retry of a normal message | 204 | * This must be a normal message, or retry of a normal message |
191 | */ | 205 | */ |
192 | msg = mdp->msg; | 206 | msg = mdp->msg; |
193 | stat = &per_cpu(ptcstats, bcp->cpu); | 207 | stat = bcp->statp; |
194 | if (msg->address == TLB_FLUSH_ALL) { | 208 | if (msg->address == TLB_FLUSH_ALL) { |
195 | local_flush_tlb(); | 209 | local_flush_tlb(); |
196 | stat->d_alltlb++; | 210 | stat->d_alltlb++; |
@@ -274,7 +288,7 @@ uv_do_reset(void *ptr) | |||
274 | 288 | ||
275 | bcp = &per_cpu(bau_control, smp_processor_id()); | 289 | bcp = &per_cpu(bau_control, smp_processor_id()); |
276 | rap = (struct reset_args *)ptr; | 290 | rap = (struct reset_args *)ptr; |
277 | stat = &per_cpu(ptcstats, bcp->cpu); | 291 | stat = bcp->statp; |
278 | stat->d_resets++; | 292 | stat->d_resets++; |
279 | 293 | ||
280 | /* | 294 | /* |
@@ -302,13 +316,13 @@ uv_do_reset(void *ptr) | |||
302 | */ | 316 | */ |
303 | mmr = uv_read_local_mmr | 317 | mmr = uv_read_local_mmr |
304 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); | 318 | (UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE); |
305 | msg_res = ((msg->sw_ack_vector << 8) | | 319 | msg_res = msg->sw_ack_vector; |
306 | msg->sw_ack_vector); | ||
307 | if (mmr & msg_res) { | 320 | if (mmr & msg_res) { |
308 | stat->d_rcanceled++; | 321 | stat->d_rcanceled++; |
309 | uv_write_local_mmr( | 322 | uv_write_local_mmr( |
310 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, | 323 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, |
311 | msg_res); | 324 | (msg_res << UV_SW_ACK_NPENDING) | |
325 | msg_res); | ||
312 | } | 326 | } |
313 | } | 327 | } |
314 | } | 328 | } |
@@ -386,17 +400,12 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
386 | unsigned long mmr_offset, int right_shift, int this_cpu, | 400 | unsigned long mmr_offset, int right_shift, int this_cpu, |
387 | struct bau_control *bcp, struct bau_control *smaster, long try) | 401 | struct bau_control *bcp, struct bau_control *smaster, long try) |
388 | { | 402 | { |
389 | int relaxes = 0; | ||
390 | unsigned long descriptor_status; | 403 | unsigned long descriptor_status; |
391 | unsigned long mmr; | ||
392 | unsigned long mask; | ||
393 | cycles_t ttime; | 404 | cycles_t ttime; |
394 | cycles_t timeout_time; | 405 | struct ptc_stats *stat = bcp->statp; |
395 | struct ptc_stats *stat = &per_cpu(ptcstats, this_cpu); | ||
396 | struct bau_control *hmaster; | 406 | struct bau_control *hmaster; |
397 | 407 | ||
398 | hmaster = bcp->uvhub_master; | 408 | hmaster = bcp->uvhub_master; |
399 | timeout_time = get_cycles() + bcp->timeout_interval; | ||
400 | 409 | ||
401 | /* spin on the status MMR, waiting for it to go idle */ | 410 | /* spin on the status MMR, waiting for it to go idle */ |
402 | while ((descriptor_status = (((unsigned long) | 411 | while ((descriptor_status = (((unsigned long) |
@@ -423,7 +432,8 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
423 | * pending. In that case hardware returns the | 432 | * pending. In that case hardware returns the |
424 | * ERROR that looks like a destination timeout. | 433 | * ERROR that looks like a destination timeout. |
425 | */ | 434 | */ |
426 | if (cycles_2_us(ttime - bcp->send_message) < BIOS_TO) { | 435 | if (cycles_2_us(ttime - bcp->send_message) < |
436 | timeout_us) { | ||
427 | bcp->conseccompletes = 0; | 437 | bcp->conseccompletes = 0; |
428 | return FLUSH_RETRY_PLUGGED; | 438 | return FLUSH_RETRY_PLUGGED; |
429 | } | 439 | } |
@@ -435,26 +445,6 @@ static int uv_wait_completion(struct bau_desc *bau_desc, | |||
435 | * descriptor_status is still BUSY | 445 | * descriptor_status is still BUSY |
436 | */ | 446 | */ |
437 | cpu_relax(); | 447 | cpu_relax(); |
438 | relaxes++; | ||
439 | if (relaxes >= 10000) { | ||
440 | relaxes = 0; | ||
441 | if (get_cycles() > timeout_time) { | ||
442 | quiesce_local_uvhub(hmaster); | ||
443 | |||
444 | /* single-thread the register change */ | ||
445 | spin_lock(&hmaster->masks_lock); | ||
446 | mmr = uv_read_local_mmr(mmr_offset); | ||
447 | mask = 0UL; | ||
448 | mask |= (3UL < right_shift); | ||
449 | mask = ~mask; | ||
450 | mmr &= mask; | ||
451 | uv_write_local_mmr(mmr_offset, mmr); | ||
452 | spin_unlock(&hmaster->masks_lock); | ||
453 | end_uvhub_quiesce(hmaster); | ||
454 | stat->s_busy++; | ||
455 | return FLUSH_GIVEUP; | ||
456 | } | ||
457 | } | ||
458 | } | 448 | } |
459 | } | 449 | } |
460 | bcp->conseccompletes++; | 450 | bcp->conseccompletes++; |
@@ -494,56 +484,116 @@ static inline int atomic_inc_unless_ge(spinlock_t *lock, atomic_t *v, int u) | |||
494 | return 1; | 484 | return 1; |
495 | } | 485 | } |
496 | 486 | ||
487 | /* | ||
488 | * Our retries are blocked by all destination swack resources being | ||
489 | * in use, and a timeout is pending. In that case hardware immediately | ||
490 | * returns the ERROR that looks like a destination timeout. | ||
491 | */ | ||
492 | static void | ||
493 | destination_plugged(struct bau_desc *bau_desc, struct bau_control *bcp, | ||
494 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
495 | { | ||
496 | udelay(bcp->plugged_delay); | ||
497 | bcp->plugged_tries++; | ||
498 | if (bcp->plugged_tries >= bcp->plugsb4reset) { | ||
499 | bcp->plugged_tries = 0; | ||
500 | quiesce_local_uvhub(hmaster); | ||
501 | spin_lock(&hmaster->queue_lock); | ||
502 | uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
503 | spin_unlock(&hmaster->queue_lock); | ||
504 | end_uvhub_quiesce(hmaster); | ||
505 | bcp->ipi_attempts++; | ||
506 | stat->s_resets_plug++; | ||
507 | } | ||
508 | } | ||
509 | |||
510 | static void | ||
511 | destination_timeout(struct bau_desc *bau_desc, struct bau_control *bcp, | ||
512 | struct bau_control *hmaster, struct ptc_stats *stat) | ||
513 | { | ||
514 | hmaster->max_bau_concurrent = 1; | ||
515 | bcp->timeout_tries++; | ||
516 | if (bcp->timeout_tries >= bcp->timeoutsb4reset) { | ||
517 | bcp->timeout_tries = 0; | ||
518 | quiesce_local_uvhub(hmaster); | ||
519 | spin_lock(&hmaster->queue_lock); | ||
520 | uv_reset_with_ipi(&bau_desc->distribution, bcp->cpu); | ||
521 | spin_unlock(&hmaster->queue_lock); | ||
522 | end_uvhub_quiesce(hmaster); | ||
523 | bcp->ipi_attempts++; | ||
524 | stat->s_resets_timeout++; | ||
525 | } | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * Completions are taking a very long time due to a congested numalink | ||
530 | * network. | ||
531 | */ | ||
532 | static void | ||
533 | disable_for_congestion(struct bau_control *bcp, struct ptc_stats *stat) | ||
534 | { | ||
535 | int tcpu; | ||
536 | struct bau_control *tbcp; | ||
537 | |||
538 | /* let only one cpu do this disabling */ | ||
539 | spin_lock(&disable_lock); | ||
540 | if (!baudisabled && bcp->period_requests && | ||
541 | ((bcp->period_time / bcp->period_requests) > congested_cycles)) { | ||
542 | /* it becomes this cpu's job to turn on the use of the | ||
543 | BAU again */ | ||
544 | baudisabled = 1; | ||
545 | bcp->set_bau_off = 1; | ||
546 | bcp->set_bau_on_time = get_cycles() + | ||
547 | sec_2_cycles(bcp->congested_period); | ||
548 | stat->s_bau_disabled++; | ||
549 | for_each_present_cpu(tcpu) { | ||
550 | tbcp = &per_cpu(bau_control, tcpu); | ||
551 | tbcp->baudisabled = 1; | ||
552 | } | ||
553 | } | ||
554 | spin_unlock(&disable_lock); | ||
555 | } | ||
556 | |||
497 | /** | 557 | /** |
498 | * uv_flush_send_and_wait | 558 | * uv_flush_send_and_wait |
499 | * | 559 | * |
500 | * Send a broadcast and wait for it to complete. | 560 | * Send a broadcast and wait for it to complete. |
501 | * | 561 | * |
502 | * The flush_mask contains the cpus the broadcast is to be sent to, plus | 562 | * The flush_mask contains the cpus the broadcast is to be sent to including |
503 | * cpus that are on the local uvhub. | 563 | * cpus that are on the local uvhub. |
504 | * | 564 | * |
505 | * Returns NULL if all flushing represented in the mask was done. The mask | 565 | * Returns 0 if all flushing represented in the mask was done. |
506 | * is zeroed. | 566 | * Returns 1 if it gives up entirely and the original cpu mask is to be |
507 | * Returns @flush_mask if some remote flushing remains to be done. The | 567 | * returned to the kernel. |
508 | * mask will have some bits still set, representing any cpus on the local | ||
509 | * uvhub (not current cpu) and any on remote uvhubs if the broadcast failed. | ||
510 | */ | 568 | */ |
511 | const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | 569 | int uv_flush_send_and_wait(struct bau_desc *bau_desc, |
512 | struct cpumask *flush_mask, | 570 | struct cpumask *flush_mask, struct bau_control *bcp) |
513 | struct bau_control *bcp) | ||
514 | { | 571 | { |
515 | int right_shift; | 572 | int right_shift; |
516 | int uvhub; | ||
517 | int bit; | ||
518 | int completion_status = 0; | 573 | int completion_status = 0; |
519 | int seq_number = 0; | 574 | int seq_number = 0; |
520 | long try = 0; | 575 | long try = 0; |
521 | int cpu = bcp->uvhub_cpu; | 576 | int cpu = bcp->uvhub_cpu; |
522 | int this_cpu = bcp->cpu; | 577 | int this_cpu = bcp->cpu; |
523 | int this_uvhub = bcp->uvhub; | ||
524 | unsigned long mmr_offset; | 578 | unsigned long mmr_offset; |
525 | unsigned long index; | 579 | unsigned long index; |
526 | cycles_t time1; | 580 | cycles_t time1; |
527 | cycles_t time2; | 581 | cycles_t time2; |
528 | struct ptc_stats *stat = &per_cpu(ptcstats, bcp->cpu); | 582 | cycles_t elapsed; |
583 | struct ptc_stats *stat = bcp->statp; | ||
529 | struct bau_control *smaster = bcp->socket_master; | 584 | struct bau_control *smaster = bcp->socket_master; |
530 | struct bau_control *hmaster = bcp->uvhub_master; | 585 | struct bau_control *hmaster = bcp->uvhub_master; |
531 | 586 | ||
532 | /* | ||
533 | * Spin here while there are hmaster->max_concurrent or more active | ||
534 | * descriptors. This is the per-uvhub 'throttle'. | ||
535 | */ | ||
536 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | 587 | if (!atomic_inc_unless_ge(&hmaster->uvhub_lock, |
537 | &hmaster->active_descriptor_count, | 588 | &hmaster->active_descriptor_count, |
538 | hmaster->max_concurrent)) { | 589 | hmaster->max_bau_concurrent)) { |
539 | stat->s_throttles++; | 590 | stat->s_throttles++; |
540 | do { | 591 | do { |
541 | cpu_relax(); | 592 | cpu_relax(); |
542 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, | 593 | } while (!atomic_inc_unless_ge(&hmaster->uvhub_lock, |
543 | &hmaster->active_descriptor_count, | 594 | &hmaster->active_descriptor_count, |
544 | hmaster->max_concurrent)); | 595 | hmaster->max_bau_concurrent)); |
545 | } | 596 | } |
546 | |||
547 | while (hmaster->uvhub_quiesce) | 597 | while (hmaster->uvhub_quiesce) |
548 | cpu_relax(); | 598 | cpu_relax(); |
549 | 599 | ||
@@ -557,23 +607,10 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
557 | } | 607 | } |
558 | time1 = get_cycles(); | 608 | time1 = get_cycles(); |
559 | do { | 609 | do { |
560 | /* | ||
561 | * Every message from any given cpu gets a unique message | ||
562 | * sequence number. But retries use that same number. | ||
563 | * Our message may have timed out at the destination because | ||
564 | * all sw-ack resources are in use and there is a timeout | ||
565 | * pending there. In that case, our last send never got | ||
566 | * placed into the queue and we need to persist until it | ||
567 | * does. | ||
568 | * | ||
569 | * Make any retry a type MSG_RETRY so that the destination will | ||
570 | * free any resource held by a previous message from this cpu. | ||
571 | */ | ||
572 | if (try == 0) { | 610 | if (try == 0) { |
573 | /* use message type set by the caller the first time */ | 611 | bau_desc->header.msg_type = MSG_REGULAR; |
574 | seq_number = bcp->message_number++; | 612 | seq_number = bcp->message_number++; |
575 | } else { | 613 | } else { |
576 | /* use RETRY type on all the rest; same sequence */ | ||
577 | bau_desc->header.msg_type = MSG_RETRY; | 614 | bau_desc->header.msg_type = MSG_RETRY; |
578 | stat->s_retry_messages++; | 615 | stat->s_retry_messages++; |
579 | } | 616 | } |
@@ -581,50 +618,17 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
581 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | | 618 | index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | |
582 | bcp->uvhub_cpu; | 619 | bcp->uvhub_cpu; |
583 | bcp->send_message = get_cycles(); | 620 | bcp->send_message = get_cycles(); |
584 | |||
585 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); | 621 | uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index); |
586 | |||
587 | try++; | 622 | try++; |
588 | completion_status = uv_wait_completion(bau_desc, mmr_offset, | 623 | completion_status = uv_wait_completion(bau_desc, mmr_offset, |
589 | right_shift, this_cpu, bcp, smaster, try); | 624 | right_shift, this_cpu, bcp, smaster, try); |
590 | 625 | ||
591 | if (completion_status == FLUSH_RETRY_PLUGGED) { | 626 | if (completion_status == FLUSH_RETRY_PLUGGED) { |
592 | /* | 627 | destination_plugged(bau_desc, bcp, hmaster, stat); |
593 | * Our retries may be blocked by all destination swack | ||
594 | * resources being consumed, and a timeout pending. In | ||
595 | * that case hardware immediately returns the ERROR | ||
596 | * that looks like a destination timeout. | ||
597 | */ | ||
598 | udelay(TIMEOUT_DELAY); | ||
599 | bcp->plugged_tries++; | ||
600 | if (bcp->plugged_tries >= PLUGSB4RESET) { | ||
601 | bcp->plugged_tries = 0; | ||
602 | quiesce_local_uvhub(hmaster); | ||
603 | spin_lock(&hmaster->queue_lock); | ||
604 | uv_reset_with_ipi(&bau_desc->distribution, | ||
605 | this_cpu); | ||
606 | spin_unlock(&hmaster->queue_lock); | ||
607 | end_uvhub_quiesce(hmaster); | ||
608 | bcp->ipi_attempts++; | ||
609 | stat->s_resets_plug++; | ||
610 | } | ||
611 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { | 628 | } else if (completion_status == FLUSH_RETRY_TIMEOUT) { |
612 | hmaster->max_concurrent = 1; | 629 | destination_timeout(bau_desc, bcp, hmaster, stat); |
613 | bcp->timeout_tries++; | ||
614 | udelay(TIMEOUT_DELAY); | ||
615 | if (bcp->timeout_tries >= TIMEOUTSB4RESET) { | ||
616 | bcp->timeout_tries = 0; | ||
617 | quiesce_local_uvhub(hmaster); | ||
618 | spin_lock(&hmaster->queue_lock); | ||
619 | uv_reset_with_ipi(&bau_desc->distribution, | ||
620 | this_cpu); | ||
621 | spin_unlock(&hmaster->queue_lock); | ||
622 | end_uvhub_quiesce(hmaster); | ||
623 | bcp->ipi_attempts++; | ||
624 | stat->s_resets_timeout++; | ||
625 | } | ||
626 | } | 630 | } |
627 | if (bcp->ipi_attempts >= 3) { | 631 | if (bcp->ipi_attempts >= bcp->ipi_reset_limit) { |
628 | bcp->ipi_attempts = 0; | 632 | bcp->ipi_attempts = 0; |
629 | completion_status = FLUSH_GIVEUP; | 633 | completion_status = FLUSH_GIVEUP; |
630 | break; | 634 | break; |
@@ -633,49 +637,36 @@ const struct cpumask *uv_flush_send_and_wait(struct bau_desc *bau_desc, | |||
633 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || | 637 | } while ((completion_status == FLUSH_RETRY_PLUGGED) || |
634 | (completion_status == FLUSH_RETRY_TIMEOUT)); | 638 | (completion_status == FLUSH_RETRY_TIMEOUT)); |
635 | time2 = get_cycles(); | 639 | time2 = get_cycles(); |
636 | 640 | bcp->plugged_tries = 0; | |
637 | if ((completion_status == FLUSH_COMPLETE) && (bcp->conseccompletes > 5) | 641 | bcp->timeout_tries = 0; |
638 | && (hmaster->max_concurrent < hmaster->max_concurrent_constant)) | 642 | if ((completion_status == FLUSH_COMPLETE) && |
639 | hmaster->max_concurrent++; | 643 | (bcp->conseccompletes > bcp->complete_threshold) && |
640 | 644 | (hmaster->max_bau_concurrent < | |
641 | /* | 645 | hmaster->max_bau_concurrent_constant)) |
642 | * hold any cpu not timing out here; no other cpu currently held by | 646 | hmaster->max_bau_concurrent++; |
643 | * the 'throttle' should enter the activation code | ||
644 | */ | ||
645 | while (hmaster->uvhub_quiesce) | 647 | while (hmaster->uvhub_quiesce) |
646 | cpu_relax(); | 648 | cpu_relax(); |
647 | atomic_dec(&hmaster->active_descriptor_count); | 649 | atomic_dec(&hmaster->active_descriptor_count); |
648 | 650 | if (time2 > time1) { | |
649 | /* guard against cycles wrap */ | 651 | elapsed = time2 - time1; |
650 | if (time2 > time1) | 652 | stat->s_time += elapsed; |
651 | stat->s_time += (time2 - time1); | 653 | if ((completion_status == FLUSH_COMPLETE) && (try == 1)) { |
652 | else | 654 | bcp->period_requests++; |
653 | stat->s_requestor--; /* don't count this one */ | 655 | bcp->period_time += elapsed; |
656 | if ((elapsed > congested_cycles) && | ||
657 | (bcp->period_requests > bcp->congested_reps)) { | ||
658 | disable_for_congestion(bcp, stat); | ||
659 | } | ||
660 | } | ||
661 | } else | ||
662 | stat->s_requestor--; | ||
654 | if (completion_status == FLUSH_COMPLETE && try > 1) | 663 | if (completion_status == FLUSH_COMPLETE && try > 1) |
655 | stat->s_retriesok++; | 664 | stat->s_retriesok++; |
656 | else if (completion_status == FLUSH_GIVEUP) { | 665 | else if (completion_status == FLUSH_GIVEUP) { |
657 | /* | ||
658 | * Cause the caller to do an IPI-style TLB shootdown on | ||
659 | * the target cpu's, all of which are still in the mask. | ||
660 | */ | ||
661 | stat->s_giveup++; | 666 | stat->s_giveup++; |
662 | return flush_mask; | 667 | return 1; |
663 | } | ||
664 | |||
665 | /* | ||
666 | * Success, so clear the remote cpu's from the mask so we don't | ||
667 | * use the IPI method of shootdown on them. | ||
668 | */ | ||
669 | for_each_cpu(bit, flush_mask) { | ||
670 | uvhub = uv_cpu_to_blade_id(bit); | ||
671 | if (uvhub == this_uvhub) | ||
672 | continue; | ||
673 | cpumask_clear_cpu(bit, flush_mask); | ||
674 | } | 668 | } |
675 | if (!cpumask_empty(flush_mask)) | 669 | return 0; |
676 | return flush_mask; | ||
677 | |||
678 | return NULL; | ||
679 | } | 670 | } |
680 | 671 | ||
681 | /** | 672 | /** |
@@ -707,70 +698,89 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
707 | struct mm_struct *mm, | 698 | struct mm_struct *mm, |
708 | unsigned long va, unsigned int cpu) | 699 | unsigned long va, unsigned int cpu) |
709 | { | 700 | { |
710 | int remotes; | ||
711 | int tcpu; | 701 | int tcpu; |
712 | int uvhub; | 702 | int uvhub; |
713 | int locals = 0; | 703 | int locals = 0; |
704 | int remotes = 0; | ||
705 | int hubs = 0; | ||
714 | struct bau_desc *bau_desc; | 706 | struct bau_desc *bau_desc; |
715 | struct cpumask *flush_mask; | 707 | struct cpumask *flush_mask; |
716 | struct ptc_stats *stat; | 708 | struct ptc_stats *stat; |
717 | struct bau_control *bcp; | 709 | struct bau_control *bcp; |
710 | struct bau_control *tbcp; | ||
718 | 711 | ||
712 | /* kernel was booted 'nobau' */ | ||
719 | if (nobau) | 713 | if (nobau) |
720 | return cpumask; | 714 | return cpumask; |
721 | 715 | ||
722 | bcp = &per_cpu(bau_control, cpu); | 716 | bcp = &per_cpu(bau_control, cpu); |
717 | stat = bcp->statp; | ||
718 | |||
719 | /* bau was disabled due to slow response */ | ||
720 | if (bcp->baudisabled) { | ||
721 | /* the cpu that disabled it must re-enable it */ | ||
722 | if (bcp->set_bau_off) { | ||
723 | if (get_cycles() >= bcp->set_bau_on_time) { | ||
724 | stat->s_bau_reenabled++; | ||
725 | baudisabled = 0; | ||
726 | for_each_present_cpu(tcpu) { | ||
727 | tbcp = &per_cpu(bau_control, tcpu); | ||
728 | tbcp->baudisabled = 0; | ||
729 | tbcp->period_requests = 0; | ||
730 | tbcp->period_time = 0; | ||
731 | } | ||
732 | } | ||
733 | } | ||
734 | return cpumask; | ||
735 | } | ||
736 | |||
723 | /* | 737 | /* |
724 | * Each sending cpu has a per-cpu mask which it fills from the caller's | 738 | * Each sending cpu has a per-cpu mask which it fills from the caller's |
725 | * cpu mask. Only remote cpus are converted to uvhubs and copied. | 739 | * cpu mask. All cpus are converted to uvhubs and copied to the |
740 | * activation descriptor. | ||
726 | */ | 741 | */ |
727 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); | 742 | flush_mask = (struct cpumask *)per_cpu(uv_flush_tlb_mask, cpu); |
728 | /* | 743 | /* don't actually do a shootdown of the local cpu */ |
729 | * copy cpumask to flush_mask, removing current cpu | ||
730 | * (current cpu should already have been flushed by the caller and | ||
731 | * should never be returned if we return flush_mask) | ||
732 | */ | ||
733 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); | 744 | cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu)); |
734 | if (cpu_isset(cpu, *cpumask)) | 745 | if (cpu_isset(cpu, *cpumask)) |
735 | locals++; /* current cpu was targeted */ | 746 | stat->s_ntargself++; |
736 | 747 | ||
737 | bau_desc = bcp->descriptor_base; | 748 | bau_desc = bcp->descriptor_base; |
738 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; | 749 | bau_desc += UV_ITEMS_PER_DESCRIPTOR * bcp->uvhub_cpu; |
739 | |||
740 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); | 750 | bau_uvhubs_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); |
741 | remotes = 0; | 751 | |
752 | /* cpu statistics */ | ||
742 | for_each_cpu(tcpu, flush_mask) { | 753 | for_each_cpu(tcpu, flush_mask) { |
743 | uvhub = uv_cpu_to_blade_id(tcpu); | 754 | uvhub = uv_cpu_to_blade_id(tcpu); |
744 | if (uvhub == bcp->uvhub) { | ||
745 | locals++; | ||
746 | continue; | ||
747 | } | ||
748 | bau_uvhub_set(uvhub, &bau_desc->distribution); | 755 | bau_uvhub_set(uvhub, &bau_desc->distribution); |
749 | remotes++; | 756 | if (uvhub == bcp->uvhub) |
750 | } | 757 | locals++; |
751 | if (remotes == 0) { | ||
752 | /* | ||
753 | * No off_hub flushing; return status for local hub. | ||
754 | * Return the caller's mask if all were local (the current | ||
755 | * cpu may be in that mask). | ||
756 | */ | ||
757 | if (locals) | ||
758 | return cpumask; | ||
759 | else | 758 | else |
760 | return NULL; | 759 | remotes++; |
761 | } | 760 | } |
762 | stat = &per_cpu(ptcstats, cpu); | 761 | if ((locals + remotes) == 0) |
762 | return NULL; | ||
763 | stat->s_requestor++; | 763 | stat->s_requestor++; |
764 | stat->s_ntargcpu += remotes; | 764 | stat->s_ntargcpu += remotes + locals; |
765 | stat->s_ntargremotes += remotes; | ||
766 | stat->s_ntarglocals += locals; | ||
765 | remotes = bau_uvhub_weight(&bau_desc->distribution); | 767 | remotes = bau_uvhub_weight(&bau_desc->distribution); |
766 | stat->s_ntarguvhub += remotes; | 768 | |
767 | if (remotes >= 16) | 769 | /* uvhub statistics */ |
770 | hubs = bau_uvhub_weight(&bau_desc->distribution); | ||
771 | if (locals) { | ||
772 | stat->s_ntarglocaluvhub++; | ||
773 | stat->s_ntargremoteuvhub += (hubs - 1); | ||
774 | } else | ||
775 | stat->s_ntargremoteuvhub += hubs; | ||
776 | stat->s_ntarguvhub += hubs; | ||
777 | if (hubs >= 16) | ||
768 | stat->s_ntarguvhub16++; | 778 | stat->s_ntarguvhub16++; |
769 | else if (remotes >= 8) | 779 | else if (hubs >= 8) |
770 | stat->s_ntarguvhub8++; | 780 | stat->s_ntarguvhub8++; |
771 | else if (remotes >= 4) | 781 | else if (hubs >= 4) |
772 | stat->s_ntarguvhub4++; | 782 | stat->s_ntarguvhub4++; |
773 | else if (remotes >= 2) | 783 | else if (hubs >= 2) |
774 | stat->s_ntarguvhub2++; | 784 | stat->s_ntarguvhub2++; |
775 | else | 785 | else |
776 | stat->s_ntarguvhub1++; | 786 | stat->s_ntarguvhub1++; |
@@ -779,10 +789,13 @@ const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | |||
779 | bau_desc->payload.sending_cpu = cpu; | 789 | bau_desc->payload.sending_cpu = cpu; |
780 | 790 | ||
781 | /* | 791 | /* |
782 | * uv_flush_send_and_wait returns null if all cpu's were messaged, or | 792 | * uv_flush_send_and_wait returns 0 if all cpu's were messaged, |
783 | * the adjusted flush_mask if any cpu's were not messaged. | 793 | * or 1 if it gave up and the original cpumask should be returned. |
784 | */ | 794 | */ |
785 | return uv_flush_send_and_wait(bau_desc, flush_mask, bcp); | 795 | if (!uv_flush_send_and_wait(bau_desc, flush_mask, bcp)) |
796 | return NULL; | ||
797 | else | ||
798 | return cpumask; | ||
786 | } | 799 | } |
787 | 800 | ||
788 | /* | 801 | /* |
@@ -810,7 +823,7 @@ void uv_bau_message_interrupt(struct pt_regs *regs) | |||
810 | 823 | ||
811 | time_start = get_cycles(); | 824 | time_start = get_cycles(); |
812 | bcp = &per_cpu(bau_control, smp_processor_id()); | 825 | bcp = &per_cpu(bau_control, smp_processor_id()); |
813 | stat = &per_cpu(ptcstats, smp_processor_id()); | 826 | stat = bcp->statp; |
814 | msgdesc.va_queue_first = bcp->va_queue_first; | 827 | msgdesc.va_queue_first = bcp->va_queue_first; |
815 | msgdesc.va_queue_last = bcp->va_queue_last; | 828 | msgdesc.va_queue_last = bcp->va_queue_last; |
816 | msg = bcp->bau_msg_head; | 829 | msg = bcp->bau_msg_head; |
@@ -908,12 +921,12 @@ static void uv_ptc_seq_stop(struct seq_file *file, void *data) | |||
908 | } | 921 | } |
909 | 922 | ||
910 | static inline unsigned long long | 923 | static inline unsigned long long |
911 | millisec_2_cycles(unsigned long millisec) | 924 | microsec_2_cycles(unsigned long microsec) |
912 | { | 925 | { |
913 | unsigned long ns; | 926 | unsigned long ns; |
914 | unsigned long long cyc; | 927 | unsigned long long cyc; |
915 | 928 | ||
916 | ns = millisec * 1000; | 929 | ns = microsec * 1000; |
917 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | 930 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); |
918 | return cyc; | 931 | return cyc; |
919 | } | 932 | } |
@@ -931,15 +944,19 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
931 | 944 | ||
932 | if (!cpu) { | 945 | if (!cpu) { |
933 | seq_printf(file, | 946 | seq_printf(file, |
934 | "# cpu sent stime numuvhubs numuvhubs16 numuvhubs8 "); | 947 | "# cpu sent stime self locals remotes ncpus localhub "); |
935 | seq_printf(file, | 948 | seq_printf(file, |
936 | "numuvhubs4 numuvhubs2 numuvhubs1 numcpus dto "); | 949 | "remotehub numuvhubs numuvhubs16 numuvhubs8 "); |
950 | seq_printf(file, | ||
951 | "numuvhubs4 numuvhubs2 numuvhubs1 dto "); | ||
937 | seq_printf(file, | 952 | seq_printf(file, |
938 | "retries rok resetp resett giveup sto bz throt "); | 953 | "retries rok resetp resett giveup sto bz throt "); |
939 | seq_printf(file, | 954 | seq_printf(file, |
940 | "sw_ack recv rtime all "); | 955 | "sw_ack recv rtime all "); |
941 | seq_printf(file, | 956 | seq_printf(file, |
942 | "one mult none retry canc nocan reset rcan\n"); | 957 | "one mult none retry canc nocan reset rcan "); |
958 | seq_printf(file, | ||
959 | "disable enable\n"); | ||
943 | } | 960 | } |
944 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { | 961 | if (cpu < num_possible_cpus() && cpu_online(cpu)) { |
945 | stat = &per_cpu(ptcstats, cpu); | 962 | stat = &per_cpu(ptcstats, cpu); |
@@ -947,18 +964,23 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
947 | seq_printf(file, | 964 | seq_printf(file, |
948 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", | 965 | "cpu %d %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
949 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), | 966 | cpu, stat->s_requestor, cycles_2_us(stat->s_time), |
950 | stat->s_ntarguvhub, stat->s_ntarguvhub16, | 967 | stat->s_ntargself, stat->s_ntarglocals, |
968 | stat->s_ntargremotes, stat->s_ntargcpu, | ||
969 | stat->s_ntarglocaluvhub, stat->s_ntargremoteuvhub, | ||
970 | stat->s_ntarguvhub, stat->s_ntarguvhub16); | ||
971 | seq_printf(file, "%ld %ld %ld %ld %ld ", | ||
951 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, | 972 | stat->s_ntarguvhub8, stat->s_ntarguvhub4, |
952 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, | 973 | stat->s_ntarguvhub2, stat->s_ntarguvhub1, |
953 | stat->s_ntargcpu, stat->s_dtimeout); | 974 | stat->s_dtimeout); |
954 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", | 975 | seq_printf(file, "%ld %ld %ld %ld %ld %ld %ld %ld ", |
955 | stat->s_retry_messages, stat->s_retriesok, | 976 | stat->s_retry_messages, stat->s_retriesok, |
956 | stat->s_resets_plug, stat->s_resets_timeout, | 977 | stat->s_resets_plug, stat->s_resets_timeout, |
957 | stat->s_giveup, stat->s_stimeout, | 978 | stat->s_giveup, stat->s_stimeout, |
958 | stat->s_busy, stat->s_throttles); | 979 | stat->s_busy, stat->s_throttles); |
980 | |||
959 | /* destination side statistics */ | 981 | /* destination side statistics */ |
960 | seq_printf(file, | 982 | seq_printf(file, |
961 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld\n", | 983 | "%lx %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld %ld ", |
962 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), | 984 | uv_read_global_mmr64(uv_cpu_to_pnode(cpu), |
963 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), | 985 | UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE), |
964 | stat->d_requestee, cycles_2_us(stat->d_time), | 986 | stat->d_requestee, cycles_2_us(stat->d_time), |
@@ -966,15 +988,36 @@ static int uv_ptc_seq_show(struct seq_file *file, void *data) | |||
966 | stat->d_nomsg, stat->d_retries, stat->d_canceled, | 988 | stat->d_nomsg, stat->d_retries, stat->d_canceled, |
967 | stat->d_nocanceled, stat->d_resets, | 989 | stat->d_nocanceled, stat->d_resets, |
968 | stat->d_rcanceled); | 990 | stat->d_rcanceled); |
991 | seq_printf(file, "%ld %ld\n", | ||
992 | stat->s_bau_disabled, stat->s_bau_reenabled); | ||
969 | } | 993 | } |
970 | 994 | ||
971 | return 0; | 995 | return 0; |
972 | } | 996 | } |
973 | 997 | ||
974 | /* | 998 | /* |
999 | * Display the tunables thru debugfs | ||
1000 | */ | ||
1001 | static ssize_t tunables_read(struct file *file, char __user *userbuf, | ||
1002 | size_t count, loff_t *ppos) | ||
1003 | { | ||
1004 | char buf[300]; | ||
1005 | int ret; | ||
1006 | |||
1007 | ret = snprintf(buf, 300, "%s %s %s\n%d %d %d %d %d %d %d %d %d\n", | ||
1008 | "max_bau_concurrent plugged_delay plugsb4reset", | ||
1009 | "timeoutsb4reset ipi_reset_limit complete_threshold", | ||
1010 | "congested_response_us congested_reps congested_period", | ||
1011 | max_bau_concurrent, plugged_delay, plugsb4reset, | ||
1012 | timeoutsb4reset, ipi_reset_limit, complete_threshold, | ||
1013 | congested_response_us, congested_reps, congested_period); | ||
1014 | |||
1015 | return simple_read_from_buffer(userbuf, count, ppos, buf, ret); | ||
1016 | } | ||
1017 | |||
1018 | /* | ||
975 | * -1: resetf the statistics | 1019 | * -1: resetf the statistics |
976 | * 0: display meaning of the statistics | 1020 | * 0: display meaning of the statistics |
977 | * >0: maximum concurrent active descriptors per uvhub (throttle) | ||
978 | */ | 1021 | */ |
979 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | 1022 | static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, |
980 | size_t count, loff_t *data) | 1023 | size_t count, loff_t *data) |
@@ -983,7 +1026,6 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | |||
983 | long input_arg; | 1026 | long input_arg; |
984 | char optstr[64]; | 1027 | char optstr[64]; |
985 | struct ptc_stats *stat; | 1028 | struct ptc_stats *stat; |
986 | struct bau_control *bcp; | ||
987 | 1029 | ||
988 | if (count == 0 || count > sizeof(optstr)) | 1030 | if (count == 0 || count > sizeof(optstr)) |
989 | return -EINVAL; | 1031 | return -EINVAL; |
@@ -1059,29 +1101,158 @@ static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user, | |||
1059 | "reset: number of ipi-style reset requests processed\n"); | 1101 | "reset: number of ipi-style reset requests processed\n"); |
1060 | printk(KERN_DEBUG | 1102 | printk(KERN_DEBUG |
1061 | "rcan: number messages canceled by reset requests\n"); | 1103 | "rcan: number messages canceled by reset requests\n"); |
1104 | printk(KERN_DEBUG | ||
1105 | "disable: number times use of the BAU was disabled\n"); | ||
1106 | printk(KERN_DEBUG | ||
1107 | "enable: number times use of the BAU was re-enabled\n"); | ||
1062 | } else if (input_arg == -1) { | 1108 | } else if (input_arg == -1) { |
1063 | for_each_present_cpu(cpu) { | 1109 | for_each_present_cpu(cpu) { |
1064 | stat = &per_cpu(ptcstats, cpu); | 1110 | stat = &per_cpu(ptcstats, cpu); |
1065 | memset(stat, 0, sizeof(struct ptc_stats)); | 1111 | memset(stat, 0, sizeof(struct ptc_stats)); |
1066 | } | 1112 | } |
1067 | } else { | 1113 | } |
1068 | uv_bau_max_concurrent = input_arg; | 1114 | |
1069 | bcp = &per_cpu(bau_control, smp_processor_id()); | 1115 | return count; |
1070 | if (uv_bau_max_concurrent < 1 || | 1116 | } |
1071 | uv_bau_max_concurrent > bcp->cpus_in_uvhub) { | 1117 | |
1072 | printk(KERN_DEBUG | 1118 | static int local_atoi(const char *name) |
1073 | "Error: BAU max concurrent %d; %d is invalid\n", | 1119 | { |
1074 | bcp->max_concurrent, uv_bau_max_concurrent); | 1120 | int val = 0; |
1075 | return -EINVAL; | 1121 | |
1076 | } | 1122 | for (;; name++) { |
1077 | printk(KERN_DEBUG "Set BAU max concurrent:%d\n", | 1123 | switch (*name) { |
1078 | uv_bau_max_concurrent); | 1124 | case '0' ... '9': |
1079 | for_each_present_cpu(cpu) { | 1125 | val = 10*val+(*name-'0'); |
1080 | bcp = &per_cpu(bau_control, cpu); | 1126 | break; |
1081 | bcp->max_concurrent = uv_bau_max_concurrent; | 1127 | default: |
1128 | return val; | ||
1082 | } | 1129 | } |
1083 | } | 1130 | } |
1131 | } | ||
1132 | |||
1133 | /* | ||
1134 | * set the tunables | ||
1135 | * 0 values reset them to defaults | ||
1136 | */ | ||
1137 | static ssize_t tunables_write(struct file *file, const char __user *user, | ||
1138 | size_t count, loff_t *data) | ||
1139 | { | ||
1140 | int cpu; | ||
1141 | int cnt = 0; | ||
1142 | int val; | ||
1143 | char *p; | ||
1144 | char *q; | ||
1145 | char instr[64]; | ||
1146 | struct bau_control *bcp; | ||
1147 | |||
1148 | if (count == 0 || count > sizeof(instr)-1) | ||
1149 | return -EINVAL; | ||
1150 | if (copy_from_user(instr, user, count)) | ||
1151 | return -EFAULT; | ||
1152 | |||
1153 | instr[count] = '\0'; | ||
1154 | /* count the fields */ | ||
1155 | p = instr + strspn(instr, WHITESPACE); | ||
1156 | q = p; | ||
1157 | for (; *p; p = q + strspn(q, WHITESPACE)) { | ||
1158 | q = p + strcspn(p, WHITESPACE); | ||
1159 | cnt++; | ||
1160 | if (q == p) | ||
1161 | break; | ||
1162 | } | ||
1163 | if (cnt != 9) { | ||
1164 | printk(KERN_INFO "bau tunable error: should be 9 numbers\n"); | ||
1165 | return -EINVAL; | ||
1166 | } | ||
1084 | 1167 | ||
1168 | p = instr + strspn(instr, WHITESPACE); | ||
1169 | q = p; | ||
1170 | for (cnt = 0; *p; p = q + strspn(q, WHITESPACE), cnt++) { | ||
1171 | q = p + strcspn(p, WHITESPACE); | ||
1172 | val = local_atoi(p); | ||
1173 | switch (cnt) { | ||
1174 | case 0: | ||
1175 | if (val == 0) { | ||
1176 | max_bau_concurrent = MAX_BAU_CONCURRENT; | ||
1177 | max_bau_concurrent_constant = | ||
1178 | MAX_BAU_CONCURRENT; | ||
1179 | continue; | ||
1180 | } | ||
1181 | bcp = &per_cpu(bau_control, smp_processor_id()); | ||
1182 | if (val < 1 || val > bcp->cpus_in_uvhub) { | ||
1183 | printk(KERN_DEBUG | ||
1184 | "Error: BAU max concurrent %d is invalid\n", | ||
1185 | val); | ||
1186 | return -EINVAL; | ||
1187 | } | ||
1188 | max_bau_concurrent = val; | ||
1189 | max_bau_concurrent_constant = val; | ||
1190 | continue; | ||
1191 | case 1: | ||
1192 | if (val == 0) | ||
1193 | plugged_delay = PLUGGED_DELAY; | ||
1194 | else | ||
1195 | plugged_delay = val; | ||
1196 | continue; | ||
1197 | case 2: | ||
1198 | if (val == 0) | ||
1199 | plugsb4reset = PLUGSB4RESET; | ||
1200 | else | ||
1201 | plugsb4reset = val; | ||
1202 | continue; | ||
1203 | case 3: | ||
1204 | if (val == 0) | ||
1205 | timeoutsb4reset = TIMEOUTSB4RESET; | ||
1206 | else | ||
1207 | timeoutsb4reset = val; | ||
1208 | continue; | ||
1209 | case 4: | ||
1210 | if (val == 0) | ||
1211 | ipi_reset_limit = IPI_RESET_LIMIT; | ||
1212 | else | ||
1213 | ipi_reset_limit = val; | ||
1214 | continue; | ||
1215 | case 5: | ||
1216 | if (val == 0) | ||
1217 | complete_threshold = COMPLETE_THRESHOLD; | ||
1218 | else | ||
1219 | complete_threshold = val; | ||
1220 | continue; | ||
1221 | case 6: | ||
1222 | if (val == 0) | ||
1223 | congested_response_us = CONGESTED_RESPONSE_US; | ||
1224 | else | ||
1225 | congested_response_us = val; | ||
1226 | continue; | ||
1227 | case 7: | ||
1228 | if (val == 0) | ||
1229 | congested_reps = CONGESTED_REPS; | ||
1230 | else | ||
1231 | congested_reps = val; | ||
1232 | continue; | ||
1233 | case 8: | ||
1234 | if (val == 0) | ||
1235 | congested_period = CONGESTED_PERIOD; | ||
1236 | else | ||
1237 | congested_period = val; | ||
1238 | continue; | ||
1239 | } | ||
1240 | if (q == p) | ||
1241 | break; | ||
1242 | } | ||
1243 | for_each_present_cpu(cpu) { | ||
1244 | bcp = &per_cpu(bau_control, cpu); | ||
1245 | bcp->max_bau_concurrent = max_bau_concurrent; | ||
1246 | bcp->max_bau_concurrent_constant = max_bau_concurrent; | ||
1247 | bcp->plugged_delay = plugged_delay; | ||
1248 | bcp->plugsb4reset = plugsb4reset; | ||
1249 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1250 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1251 | bcp->complete_threshold = complete_threshold; | ||
1252 | bcp->congested_response_us = congested_response_us; | ||
1253 | bcp->congested_reps = congested_reps; | ||
1254 | bcp->congested_period = congested_period; | ||
1255 | } | ||
1085 | return count; | 1256 | return count; |
1086 | } | 1257 | } |
1087 | 1258 | ||
@@ -1097,6 +1268,11 @@ static int uv_ptc_proc_open(struct inode *inode, struct file *file) | |||
1097 | return seq_open(file, &uv_ptc_seq_ops); | 1268 | return seq_open(file, &uv_ptc_seq_ops); |
1098 | } | 1269 | } |
1099 | 1270 | ||
1271 | static int tunables_open(struct inode *inode, struct file *file) | ||
1272 | { | ||
1273 | return 0; | ||
1274 | } | ||
1275 | |||
1100 | static const struct file_operations proc_uv_ptc_operations = { | 1276 | static const struct file_operations proc_uv_ptc_operations = { |
1101 | .open = uv_ptc_proc_open, | 1277 | .open = uv_ptc_proc_open, |
1102 | .read = seq_read, | 1278 | .read = seq_read, |
@@ -1105,6 +1281,12 @@ static const struct file_operations proc_uv_ptc_operations = { | |||
1105 | .release = seq_release, | 1281 | .release = seq_release, |
1106 | }; | 1282 | }; |
1107 | 1283 | ||
1284 | static const struct file_operations tunables_fops = { | ||
1285 | .open = tunables_open, | ||
1286 | .read = tunables_read, | ||
1287 | .write = tunables_write, | ||
1288 | }; | ||
1289 | |||
1108 | static int __init uv_ptc_init(void) | 1290 | static int __init uv_ptc_init(void) |
1109 | { | 1291 | { |
1110 | struct proc_dir_entry *proc_uv_ptc; | 1292 | struct proc_dir_entry *proc_uv_ptc; |
@@ -1119,6 +1301,20 @@ static int __init uv_ptc_init(void) | |||
1119 | UV_PTC_BASENAME); | 1301 | UV_PTC_BASENAME); |
1120 | return -EINVAL; | 1302 | return -EINVAL; |
1121 | } | 1303 | } |
1304 | |||
1305 | tunables_dir = debugfs_create_dir(UV_BAU_TUNABLES_DIR, NULL); | ||
1306 | if (!tunables_dir) { | ||
1307 | printk(KERN_ERR "unable to create debugfs directory %s\n", | ||
1308 | UV_BAU_TUNABLES_DIR); | ||
1309 | return -EINVAL; | ||
1310 | } | ||
1311 | tunables_file = debugfs_create_file(UV_BAU_TUNABLES_FILE, 0600, | ||
1312 | tunables_dir, NULL, &tunables_fops); | ||
1313 | if (!tunables_file) { | ||
1314 | printk(KERN_ERR "unable to create debugfs file %s\n", | ||
1315 | UV_BAU_TUNABLES_FILE); | ||
1316 | return -EINVAL; | ||
1317 | } | ||
1122 | return 0; | 1318 | return 0; |
1123 | } | 1319 | } |
1124 | 1320 | ||
@@ -1259,15 +1455,44 @@ static void __init uv_init_uvhub(int uvhub, int vector) | |||
1259 | } | 1455 | } |
1260 | 1456 | ||
1261 | /* | 1457 | /* |
1458 | * We will set BAU_MISC_CONTROL with a timeout period. | ||
1459 | * But the BIOS has set UVH_AGING_PRESCALE_SEL and UVH_TRANSACTION_TIMEOUT. | ||
1460 | * So the destination timeout period has be be calculated from them. | ||
1461 | */ | ||
1462 | static int | ||
1463 | calculate_destination_timeout(void) | ||
1464 | { | ||
1465 | unsigned long mmr_image; | ||
1466 | int mult1; | ||
1467 | int mult2; | ||
1468 | int index; | ||
1469 | int base; | ||
1470 | int ret; | ||
1471 | unsigned long ts_ns; | ||
1472 | |||
1473 | mult1 = UV_INTD_SOFT_ACK_TIMEOUT_PERIOD & BAU_MISC_CONTROL_MULT_MASK; | ||
1474 | mmr_image = uv_read_local_mmr(UVH_AGING_PRESCALE_SEL); | ||
1475 | index = (mmr_image >> BAU_URGENCY_7_SHIFT) & BAU_URGENCY_7_MASK; | ||
1476 | mmr_image = uv_read_local_mmr(UVH_TRANSACTION_TIMEOUT); | ||
1477 | mult2 = (mmr_image >> BAU_TRANS_SHIFT) & BAU_TRANS_MASK; | ||
1478 | base = timeout_base_ns[index]; | ||
1479 | ts_ns = base * mult1 * mult2; | ||
1480 | ret = ts_ns / 1000; | ||
1481 | return ret; | ||
1482 | } | ||
1483 | |||
1484 | /* | ||
1262 | * initialize the bau_control structure for each cpu | 1485 | * initialize the bau_control structure for each cpu |
1263 | */ | 1486 | */ |
1264 | static void uv_init_per_cpu(int nuvhubs) | 1487 | static void uv_init_per_cpu(int nuvhubs) |
1265 | { | 1488 | { |
1266 | int i, j, k; | 1489 | int i; |
1267 | int cpu; | 1490 | int cpu; |
1268 | int pnode; | 1491 | int pnode; |
1269 | int uvhub; | 1492 | int uvhub; |
1270 | short socket = 0; | 1493 | short socket = 0; |
1494 | unsigned short socket_mask; | ||
1495 | unsigned int uvhub_mask; | ||
1271 | struct bau_control *bcp; | 1496 | struct bau_control *bcp; |
1272 | struct uvhub_desc *bdp; | 1497 | struct uvhub_desc *bdp; |
1273 | struct socket_desc *sdp; | 1498 | struct socket_desc *sdp; |
@@ -1278,7 +1503,7 @@ static void uv_init_per_cpu(int nuvhubs) | |||
1278 | short cpu_number[16]; | 1503 | short cpu_number[16]; |
1279 | }; | 1504 | }; |
1280 | struct uvhub_desc { | 1505 | struct uvhub_desc { |
1281 | short num_sockets; | 1506 | unsigned short socket_mask; |
1282 | short num_cpus; | 1507 | short num_cpus; |
1283 | short uvhub; | 1508 | short uvhub; |
1284 | short pnode; | 1509 | short pnode; |
@@ -1286,57 +1511,83 @@ static void uv_init_per_cpu(int nuvhubs) | |||
1286 | }; | 1511 | }; |
1287 | struct uvhub_desc *uvhub_descs; | 1512 | struct uvhub_desc *uvhub_descs; |
1288 | 1513 | ||
1514 | timeout_us = calculate_destination_timeout(); | ||
1515 | |||
1289 | uvhub_descs = (struct uvhub_desc *) | 1516 | uvhub_descs = (struct uvhub_desc *) |
1290 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); | 1517 | kmalloc(nuvhubs * sizeof(struct uvhub_desc), GFP_KERNEL); |
1291 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); | 1518 | memset(uvhub_descs, 0, nuvhubs * sizeof(struct uvhub_desc)); |
1292 | for_each_present_cpu(cpu) { | 1519 | for_each_present_cpu(cpu) { |
1293 | bcp = &per_cpu(bau_control, cpu); | 1520 | bcp = &per_cpu(bau_control, cpu); |
1294 | memset(bcp, 0, sizeof(struct bau_control)); | 1521 | memset(bcp, 0, sizeof(struct bau_control)); |
1295 | spin_lock_init(&bcp->masks_lock); | ||
1296 | bcp->max_concurrent = uv_bau_max_concurrent; | ||
1297 | pnode = uv_cpu_hub_info(cpu)->pnode; | 1522 | pnode = uv_cpu_hub_info(cpu)->pnode; |
1298 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; | 1523 | uvhub = uv_cpu_hub_info(cpu)->numa_blade_id; |
1524 | uvhub_mask |= (1 << uvhub); | ||
1299 | bdp = &uvhub_descs[uvhub]; | 1525 | bdp = &uvhub_descs[uvhub]; |
1300 | bdp->num_cpus++; | 1526 | bdp->num_cpus++; |
1301 | bdp->uvhub = uvhub; | 1527 | bdp->uvhub = uvhub; |
1302 | bdp->pnode = pnode; | 1528 | bdp->pnode = pnode; |
1303 | /* time interval to catch a hardware stay-busy bug */ | 1529 | /* kludge: 'assuming' one node per socket, and assuming that |
1304 | bcp->timeout_interval = millisec_2_cycles(3); | 1530 | disabling a socket just leaves a gap in node numbers */ |
1305 | /* kludge: assume uv_hub.h is constant */ | 1531 | socket = (cpu_to_node(cpu) & 1);; |
1306 | socket = (cpu_physical_id(cpu)>>5)&1; | 1532 | bdp->socket_mask |= (1 << socket); |
1307 | if (socket >= bdp->num_sockets) | ||
1308 | bdp->num_sockets = socket+1; | ||
1309 | sdp = &bdp->socket[socket]; | 1533 | sdp = &bdp->socket[socket]; |
1310 | sdp->cpu_number[sdp->num_cpus] = cpu; | 1534 | sdp->cpu_number[sdp->num_cpus] = cpu; |
1311 | sdp->num_cpus++; | 1535 | sdp->num_cpus++; |
1312 | } | 1536 | } |
1313 | socket = 0; | 1537 | uvhub = 0; |
1314 | for_each_possible_blade(uvhub) { | 1538 | while (uvhub_mask) { |
1539 | if (!(uvhub_mask & 1)) | ||
1540 | goto nexthub; | ||
1315 | bdp = &uvhub_descs[uvhub]; | 1541 | bdp = &uvhub_descs[uvhub]; |
1316 | for (i = 0; i < bdp->num_sockets; i++) { | 1542 | socket_mask = bdp->socket_mask; |
1317 | sdp = &bdp->socket[i]; | 1543 | socket = 0; |
1318 | for (j = 0; j < sdp->num_cpus; j++) { | 1544 | while (socket_mask) { |
1319 | cpu = sdp->cpu_number[j]; | 1545 | if (!(socket_mask & 1)) |
1546 | goto nextsocket; | ||
1547 | sdp = &bdp->socket[socket]; | ||
1548 | for (i = 0; i < sdp->num_cpus; i++) { | ||
1549 | cpu = sdp->cpu_number[i]; | ||
1320 | bcp = &per_cpu(bau_control, cpu); | 1550 | bcp = &per_cpu(bau_control, cpu); |
1321 | bcp->cpu = cpu; | 1551 | bcp->cpu = cpu; |
1322 | if (j == 0) { | 1552 | if (i == 0) { |
1323 | smaster = bcp; | 1553 | smaster = bcp; |
1324 | if (i == 0) | 1554 | if (socket == 0) |
1325 | hmaster = bcp; | 1555 | hmaster = bcp; |
1326 | } | 1556 | } |
1327 | bcp->cpus_in_uvhub = bdp->num_cpus; | 1557 | bcp->cpus_in_uvhub = bdp->num_cpus; |
1328 | bcp->cpus_in_socket = sdp->num_cpus; | 1558 | bcp->cpus_in_socket = sdp->num_cpus; |
1329 | bcp->socket_master = smaster; | 1559 | bcp->socket_master = smaster; |
1560 | bcp->uvhub = bdp->uvhub; | ||
1330 | bcp->uvhub_master = hmaster; | 1561 | bcp->uvhub_master = hmaster; |
1331 | for (k = 0; k < DEST_Q_SIZE; k++) | 1562 | bcp->uvhub_cpu = uv_cpu_hub_info(cpu)-> |
1332 | bcp->socket_acknowledge_count[k] = 0; | 1563 | blade_processor_id; |
1333 | bcp->uvhub_cpu = | ||
1334 | uv_cpu_hub_info(cpu)->blade_processor_id; | ||
1335 | } | 1564 | } |
1565 | nextsocket: | ||
1336 | socket++; | 1566 | socket++; |
1567 | socket_mask = (socket_mask >> 1); | ||
1337 | } | 1568 | } |
1569 | nexthub: | ||
1570 | uvhub++; | ||
1571 | uvhub_mask = (uvhub_mask >> 1); | ||
1338 | } | 1572 | } |
1339 | kfree(uvhub_descs); | 1573 | kfree(uvhub_descs); |
1574 | for_each_present_cpu(cpu) { | ||
1575 | bcp = &per_cpu(bau_control, cpu); | ||
1576 | bcp->baudisabled = 0; | ||
1577 | bcp->statp = &per_cpu(ptcstats, cpu); | ||
1578 | /* time interval to catch a hardware stay-busy bug */ | ||
1579 | bcp->timeout_interval = microsec_2_cycles(2*timeout_us); | ||
1580 | bcp->max_bau_concurrent = max_bau_concurrent; | ||
1581 | bcp->max_bau_concurrent_constant = max_bau_concurrent; | ||
1582 | bcp->plugged_delay = plugged_delay; | ||
1583 | bcp->plugsb4reset = plugsb4reset; | ||
1584 | bcp->timeoutsb4reset = timeoutsb4reset; | ||
1585 | bcp->ipi_reset_limit = ipi_reset_limit; | ||
1586 | bcp->complete_threshold = complete_threshold; | ||
1587 | bcp->congested_response_us = congested_response_us; | ||
1588 | bcp->congested_reps = congested_reps; | ||
1589 | bcp->congested_period = congested_period; | ||
1590 | } | ||
1340 | } | 1591 | } |
1341 | 1592 | ||
1342 | /* | 1593 | /* |
@@ -1361,10 +1612,11 @@ static int __init uv_bau_init(void) | |||
1361 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), | 1612 | zalloc_cpumask_var_node(&per_cpu(uv_flush_tlb_mask, cur_cpu), |
1362 | GFP_KERNEL, cpu_to_node(cur_cpu)); | 1613 | GFP_KERNEL, cpu_to_node(cur_cpu)); |
1363 | 1614 | ||
1364 | uv_bau_max_concurrent = MAX_BAU_CONCURRENT; | ||
1365 | uv_nshift = uv_hub_info->m_val; | 1615 | uv_nshift = uv_hub_info->m_val; |
1366 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; | 1616 | uv_mmask = (1UL << uv_hub_info->m_val) - 1; |
1367 | nuvhubs = uv_num_possible_blades(); | 1617 | nuvhubs = uv_num_possible_blades(); |
1618 | spin_lock_init(&disable_lock); | ||
1619 | congested_cycles = microsec_2_cycles(congested_response_us); | ||
1368 | 1620 | ||
1369 | uv_init_per_cpu(nuvhubs); | 1621 | uv_init_per_cpu(nuvhubs); |
1370 | 1622 | ||
@@ -1383,15 +1635,19 @@ static int __init uv_bau_init(void) | |||
1383 | alloc_intr_gate(vector, uv_bau_message_intr1); | 1635 | alloc_intr_gate(vector, uv_bau_message_intr1); |
1384 | 1636 | ||
1385 | for_each_possible_blade(uvhub) { | 1637 | for_each_possible_blade(uvhub) { |
1386 | pnode = uv_blade_to_pnode(uvhub); | 1638 | if (uv_blade_nr_possible_cpus(uvhub)) { |
1387 | /* INIT the bau */ | 1639 | pnode = uv_blade_to_pnode(uvhub); |
1388 | uv_write_global_mmr64(pnode, UVH_LB_BAU_SB_ACTIVATION_CONTROL, | 1640 | /* INIT the bau */ |
1389 | ((unsigned long)1 << 63)); | 1641 | uv_write_global_mmr64(pnode, |
1390 | mmr = 1; /* should be 1 to broadcast to both sockets */ | 1642 | UVH_LB_BAU_SB_ACTIVATION_CONTROL, |
1391 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, mmr); | 1643 | ((unsigned long)1 << 63)); |
1644 | mmr = 1; /* should be 1 to broadcast to both sockets */ | ||
1645 | uv_write_global_mmr64(pnode, UVH_BAU_DATA_BROADCAST, | ||
1646 | mmr); | ||
1647 | } | ||
1392 | } | 1648 | } |
1393 | 1649 | ||
1394 | return 0; | 1650 | return 0; |
1395 | } | 1651 | } |
1396 | core_initcall(uv_bau_init); | 1652 | core_initcall(uv_bau_init); |
1397 | core_initcall(uv_ptc_init); | 1653 | fs_initcall(uv_ptc_init); |