diff options
author | Robin Holt <holt@sgi.com> | 2009-04-13 17:40:18 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-13 18:04:33 -0400 |
commit | a374c57b0764432a80303abee3d1afd1939b5a0a (patch) | |
tree | b3e15b590d5342e5de6c2b62da4dd61fb2ad0836 /drivers/misc/sgi-xp/xpc_uv.c | |
parent | a06bba4643ae10ac6b202dade1cde38bc5e08b25 (diff) |
sgi-xpc: prevent false heartbeat failures
The heartbeat timeout functionality in sgi-xpc is currently not trained to
the connection time. If a connection is made and the code is in the last
polling window prior to doing a timeout, the next polling window will see
the heartbeat as unchanged and initiate a no-heartbeat disconnect.
Signed-off-by: Robin Holt <holt@sgi.com>
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/misc/sgi-xp/xpc_uv.c')
-rw-r--r-- | drivers/misc/sgi-xp/xpc_uv.c | 123 |
1 files changed, 43 insertions, 80 deletions
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index f7fff4727edb..97f7cb21a0a2 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c | |||
@@ -46,8 +46,7 @@ struct uv_IO_APIC_route_entry { | |||
46 | }; | 46 | }; |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | static atomic64_t xpc_heartbeat_uv; | 49 | static struct xpc_heartbeat_uv *xpc_heartbeat_uv; |
50 | static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); | ||
51 | 50 | ||
52 | #define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) | 51 | #define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) |
53 | #define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ | 52 | #define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ |
@@ -423,41 +422,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, | |||
423 | /* syncing of remote_act_state was just done above */ | 422 | /* syncing of remote_act_state was just done above */ |
424 | break; | 423 | break; |
425 | 424 | ||
426 | case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: { | ||
427 | struct xpc_activate_mq_msg_heartbeat_req_uv *msg; | ||
428 | |||
429 | msg = container_of(msg_hdr, | ||
430 | struct xpc_activate_mq_msg_heartbeat_req_uv, | ||
431 | hdr); | ||
432 | part_uv->heartbeat = msg->heartbeat; | ||
433 | break; | ||
434 | } | ||
435 | case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: { | ||
436 | struct xpc_activate_mq_msg_heartbeat_req_uv *msg; | ||
437 | |||
438 | msg = container_of(msg_hdr, | ||
439 | struct xpc_activate_mq_msg_heartbeat_req_uv, | ||
440 | hdr); | ||
441 | part_uv->heartbeat = msg->heartbeat; | ||
442 | |||
443 | spin_lock_irqsave(&part_uv->flags_lock, irq_flags); | ||
444 | part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV; | ||
445 | spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); | ||
446 | break; | ||
447 | } | ||
448 | case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: { | ||
449 | struct xpc_activate_mq_msg_heartbeat_req_uv *msg; | ||
450 | |||
451 | msg = container_of(msg_hdr, | ||
452 | struct xpc_activate_mq_msg_heartbeat_req_uv, | ||
453 | hdr); | ||
454 | part_uv->heartbeat = msg->heartbeat; | ||
455 | |||
456 | spin_lock_irqsave(&part_uv->flags_lock, irq_flags); | ||
457 | part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV; | ||
458 | spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags); | ||
459 | break; | ||
460 | } | ||
461 | case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { | 425 | case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { |
462 | struct xpc_activate_mq_msg_activate_req_uv *msg; | 426 | struct xpc_activate_mq_msg_activate_req_uv *msg; |
463 | 427 | ||
@@ -475,6 +439,7 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part, | |||
475 | part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; | 439 | part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; |
476 | part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ | 440 | part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ |
477 | part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; | 441 | part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; |
442 | part_uv->heartbeat_gpa = msg->heartbeat_gpa; | ||
478 | 443 | ||
479 | if (msg->activate_gru_mq_desc_gpa != | 444 | if (msg->activate_gru_mq_desc_gpa != |
480 | part_uv->activate_gru_mq_desc_gpa) { | 445 | part_uv->activate_gru_mq_desc_gpa) { |
@@ -759,7 +724,7 @@ xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req) | |||
759 | 724 | ||
760 | /* | 725 | /* |
761 | * !!! Make our side think that the remote partition sent an activate | 726 | * !!! Make our side think that the remote partition sent an activate |
762 | * !!! message our way by doing what the activate IRQ handler would | 727 | * !!! mq message our way by doing what the activate IRQ handler would |
763 | * !!! do had one really been sent. | 728 | * !!! do had one really been sent. |
764 | */ | 729 | */ |
765 | 730 | ||
@@ -808,88 +773,80 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, | |||
808 | static int | 773 | static int |
809 | xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) | 774 | xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) |
810 | { | 775 | { |
811 | rp->sn.activate_gru_mq_desc_gpa = | 776 | xpc_heartbeat_uv = |
777 | &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat; | ||
778 | rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv); | ||
779 | rp->sn.uv.activate_gru_mq_desc_gpa = | ||
812 | uv_gpa(xpc_activate_mq_uv->gru_mq_desc); | 780 | uv_gpa(xpc_activate_mq_uv->gru_mq_desc); |
813 | return 0; | 781 | return 0; |
814 | } | 782 | } |
815 | 783 | ||
816 | static void | 784 | static void |
817 | xpc_send_heartbeat_uv(int msg_type) | 785 | xpc_allow_hb_uv(short partid) |
818 | { | 786 | { |
819 | short partid; | 787 | } |
820 | struct xpc_partition *part; | ||
821 | struct xpc_activate_mq_msg_heartbeat_req_uv msg; | ||
822 | |||
823 | /* | ||
824 | * !!! On uv we're broadcasting a heartbeat message every 5 seconds. | ||
825 | * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20 | ||
826 | * !!! seconds. This is an increase in numalink traffic. | ||
827 | * ??? Is this good? | ||
828 | */ | ||
829 | |||
830 | msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv); | ||
831 | |||
832 | partid = find_first_bit(xpc_heartbeating_to_mask_uv, | ||
833 | XP_MAX_NPARTITIONS_UV); | ||
834 | |||
835 | while (partid < XP_MAX_NPARTITIONS_UV) { | ||
836 | part = &xpc_partitions[partid]; | ||
837 | 788 | ||
838 | xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), | 789 | static void |
839 | msg_type); | 790 | xpc_disallow_hb_uv(short partid) |
791 | { | ||
792 | } | ||
840 | 793 | ||
841 | partid = find_next_bit(xpc_heartbeating_to_mask_uv, | 794 | static void |
842 | XP_MAX_NPARTITIONS_UV, partid + 1); | 795 | xpc_disallow_all_hbs_uv(void) |
843 | } | 796 | { |
844 | } | 797 | } |
845 | 798 | ||
846 | static void | 799 | static void |
847 | xpc_increment_heartbeat_uv(void) | 800 | xpc_increment_heartbeat_uv(void) |
848 | { | 801 | { |
849 | xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV); | 802 | xpc_heartbeat_uv->value++; |
850 | } | 803 | } |
851 | 804 | ||
852 | static void | 805 | static void |
853 | xpc_offline_heartbeat_uv(void) | 806 | xpc_offline_heartbeat_uv(void) |
854 | { | 807 | { |
855 | xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); | 808 | xpc_increment_heartbeat_uv(); |
809 | xpc_heartbeat_uv->offline = 1; | ||
856 | } | 810 | } |
857 | 811 | ||
858 | static void | 812 | static void |
859 | xpc_online_heartbeat_uv(void) | 813 | xpc_online_heartbeat_uv(void) |
860 | { | 814 | { |
861 | xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV); | 815 | xpc_increment_heartbeat_uv(); |
816 | xpc_heartbeat_uv->offline = 0; | ||
862 | } | 817 | } |
863 | 818 | ||
864 | static void | 819 | static void |
865 | xpc_heartbeat_init_uv(void) | 820 | xpc_heartbeat_init_uv(void) |
866 | { | 821 | { |
867 | atomic64_set(&xpc_heartbeat_uv, 0); | 822 | xpc_heartbeat_uv->value = 1; |
868 | bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); | 823 | xpc_heartbeat_uv->offline = 0; |
869 | xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0]; | ||
870 | } | 824 | } |
871 | 825 | ||
872 | static void | 826 | static void |
873 | xpc_heartbeat_exit_uv(void) | 827 | xpc_heartbeat_exit_uv(void) |
874 | { | 828 | { |
875 | xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); | 829 | xpc_offline_heartbeat_uv(); |
876 | } | 830 | } |
877 | 831 | ||
878 | static enum xp_retval | 832 | static enum xp_retval |
879 | xpc_get_remote_heartbeat_uv(struct xpc_partition *part) | 833 | xpc_get_remote_heartbeat_uv(struct xpc_partition *part) |
880 | { | 834 | { |
881 | struct xpc_partition_uv *part_uv = &part->sn.uv; | 835 | struct xpc_partition_uv *part_uv = &part->sn.uv; |
882 | enum xp_retval ret = xpNoHeartbeat; | 836 | enum xp_retval ret; |
883 | 837 | ||
884 | if (part_uv->remote_act_state != XPC_P_AS_INACTIVE && | 838 | ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat), |
885 | part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) { | 839 | part_uv->heartbeat_gpa, |
840 | sizeof(struct xpc_heartbeat_uv)); | ||
841 | if (ret != xpSuccess) | ||
842 | return ret; | ||
886 | 843 | ||
887 | if (part_uv->heartbeat != part->last_heartbeat || | 844 | if (part_uv->cached_heartbeat.value == part->last_heartbeat && |
888 | (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) { | 845 | !part_uv->cached_heartbeat.offline) { |
889 | 846 | ||
890 | part->last_heartbeat = part_uv->heartbeat; | 847 | ret = xpNoHeartbeat; |
891 | ret = xpSuccess; | 848 | } else { |
892 | } | 849 | part->last_heartbeat = part_uv->cached_heartbeat.value; |
893 | } | 850 | } |
894 | return ret; | 851 | return ret; |
895 | } | 852 | } |
@@ -904,8 +861,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, | |||
904 | 861 | ||
905 | part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ | 862 | part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ |
906 | part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; | 863 | part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; |
864 | part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa; | ||
907 | part->sn.uv.activate_gru_mq_desc_gpa = | 865 | part->sn.uv.activate_gru_mq_desc_gpa = |
908 | remote_rp->sn.activate_gru_mq_desc_gpa; | 866 | remote_rp->sn.uv.activate_gru_mq_desc_gpa; |
909 | 867 | ||
910 | /* | 868 | /* |
911 | * ??? Is it a good idea to make this conditional on what is | 869 | * ??? Is it a good idea to make this conditional on what is |
@@ -913,8 +871,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp, | |||
913 | */ | 871 | */ |
914 | if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { | 872 | if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { |
915 | msg.rp_gpa = uv_gpa(xpc_rsvd_page); | 873 | msg.rp_gpa = uv_gpa(xpc_rsvd_page); |
874 | msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa; | ||
916 | msg.activate_gru_mq_desc_gpa = | 875 | msg.activate_gru_mq_desc_gpa = |
917 | xpc_rsvd_page->sn.activate_gru_mq_desc_gpa; | 876 | xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa; |
918 | xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), | 877 | xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), |
919 | XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); | 878 | XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); |
920 | } | 879 | } |
@@ -1677,6 +1636,10 @@ xpc_init_uv(void) | |||
1677 | xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv; | 1636 | xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv; |
1678 | xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv; | 1637 | xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv; |
1679 | xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv; | 1638 | xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv; |
1639 | |||
1640 | xpc_allow_hb = xpc_allow_hb_uv; | ||
1641 | xpc_disallow_hb = xpc_disallow_hb_uv; | ||
1642 | xpc_disallow_all_hbs = xpc_disallow_all_hbs_uv; | ||
1680 | xpc_increment_heartbeat = xpc_increment_heartbeat_uv; | 1643 | xpc_increment_heartbeat = xpc_increment_heartbeat_uv; |
1681 | xpc_offline_heartbeat = xpc_offline_heartbeat_uv; | 1644 | xpc_offline_heartbeat = xpc_offline_heartbeat_uv; |
1682 | xpc_online_heartbeat = xpc_online_heartbeat_uv; | 1645 | xpc_online_heartbeat = xpc_online_heartbeat_uv; |