aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/sgi-xp/xpc_uv.c
diff options
context:
space:
mode:
authorRobin Holt <holt@sgi.com>2009-04-13 17:40:18 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-04-13 18:04:33 -0400
commita374c57b0764432a80303abee3d1afd1939b5a0a (patch)
treeb3e15b590d5342e5de6c2b62da4dd61fb2ad0836 /drivers/misc/sgi-xp/xpc_uv.c
parenta06bba4643ae10ac6b202dade1cde38bc5e08b25 (diff)
sgi-xpc: prevent false heartbeat failures
The heartbeat timeout functionality in sgi-xpc is currently not trained to the connection time. If a connection is made and the code is in the last polling window prior to doing a timeout, the next polling window will see the heartbeat as unchanged and initiate a no-heartbeat disconnect. Signed-off-by: Robin Holt <holt@sgi.com> Signed-off-by: Dean Nelson <dcn@sgi.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/misc/sgi-xp/xpc_uv.c')
-rw-r--r--drivers/misc/sgi-xp/xpc_uv.c123
1 files changed, 43 insertions, 80 deletions
diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c
index f7fff4727edb..97f7cb21a0a2 100644
--- a/drivers/misc/sgi-xp/xpc_uv.c
+++ b/drivers/misc/sgi-xp/xpc_uv.c
@@ -46,8 +46,7 @@ struct uv_IO_APIC_route_entry {
46}; 46};
47#endif 47#endif
48 48
49static atomic64_t xpc_heartbeat_uv; 49static struct xpc_heartbeat_uv *xpc_heartbeat_uv;
50static DECLARE_BITMAP(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV);
51 50
52#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) 51#define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES)
53#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \ 52#define XPC_ACTIVATE_MQ_SIZE_UV (4 * XP_MAX_NPARTITIONS_UV * \
@@ -423,41 +422,6 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
423 /* syncing of remote_act_state was just done above */ 422 /* syncing of remote_act_state was just done above */
424 break; 423 break;
425 424
426 case XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV: {
427 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
428
429 msg = container_of(msg_hdr,
430 struct xpc_activate_mq_msg_heartbeat_req_uv,
431 hdr);
432 part_uv->heartbeat = msg->heartbeat;
433 break;
434 }
435 case XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV: {
436 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
437
438 msg = container_of(msg_hdr,
439 struct xpc_activate_mq_msg_heartbeat_req_uv,
440 hdr);
441 part_uv->heartbeat = msg->heartbeat;
442
443 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
444 part_uv->flags |= XPC_P_HEARTBEAT_OFFLINE_UV;
445 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
446 break;
447 }
448 case XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV: {
449 struct xpc_activate_mq_msg_heartbeat_req_uv *msg;
450
451 msg = container_of(msg_hdr,
452 struct xpc_activate_mq_msg_heartbeat_req_uv,
453 hdr);
454 part_uv->heartbeat = msg->heartbeat;
455
456 spin_lock_irqsave(&part_uv->flags_lock, irq_flags);
457 part_uv->flags &= ~XPC_P_HEARTBEAT_OFFLINE_UV;
458 spin_unlock_irqrestore(&part_uv->flags_lock, irq_flags);
459 break;
460 }
461 case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: { 425 case XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV: {
462 struct xpc_activate_mq_msg_activate_req_uv *msg; 426 struct xpc_activate_mq_msg_activate_req_uv *msg;
463 427
@@ -475,6 +439,7 @@ xpc_handle_activate_mq_msg_uv(struct xpc_partition *part,
475 part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV; 439 part_uv->act_state_req = XPC_P_ASR_ACTIVATE_UV;
476 part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */ 440 part->remote_rp_pa = msg->rp_gpa; /* !!! _pa is _gpa */
477 part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies; 441 part->remote_rp_ts_jiffies = msg_hdr->rp_ts_jiffies;
442 part_uv->heartbeat_gpa = msg->heartbeat_gpa;
478 443
479 if (msg->activate_gru_mq_desc_gpa != 444 if (msg->activate_gru_mq_desc_gpa !=
480 part_uv->activate_gru_mq_desc_gpa) { 445 part_uv->activate_gru_mq_desc_gpa) {
@@ -759,7 +724,7 @@ xpc_send_local_activate_IRQ_uv(struct xpc_partition *part, int act_state_req)
759 724
760 /* 725 /*
761 * !!! Make our side think that the remote partition sent an activate 726 * !!! Make our side think that the remote partition sent an activate
762 * !!! message our way by doing what the activate IRQ handler would 727 * !!! mq message our way by doing what the activate IRQ handler would
763 * !!! do had one really been sent. 728 * !!! do had one really been sent.
764 */ 729 */
765 730
@@ -808,88 +773,80 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa,
808static int 773static int
809xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp) 774xpc_setup_rsvd_page_sn_uv(struct xpc_rsvd_page *rp)
810{ 775{
811 rp->sn.activate_gru_mq_desc_gpa = 776 xpc_heartbeat_uv =
777 &xpc_partitions[sn_partition_id].sn.uv.cached_heartbeat;
778 rp->sn.uv.heartbeat_gpa = uv_gpa(xpc_heartbeat_uv);
779 rp->sn.uv.activate_gru_mq_desc_gpa =
812 uv_gpa(xpc_activate_mq_uv->gru_mq_desc); 780 uv_gpa(xpc_activate_mq_uv->gru_mq_desc);
813 return 0; 781 return 0;
814} 782}
815 783
816static void 784static void
817xpc_send_heartbeat_uv(int msg_type) 785xpc_allow_hb_uv(short partid)
818{ 786{
819 short partid; 787}
820 struct xpc_partition *part;
821 struct xpc_activate_mq_msg_heartbeat_req_uv msg;
822
823 /*
824 * !!! On uv we're broadcasting a heartbeat message every 5 seconds.
825 * !!! Whereas on sn2 we're bte_copy'ng the heartbeat info every 20
826 * !!! seconds. This is an increase in numalink traffic.
827 * ??? Is this good?
828 */
829
830 msg.heartbeat = atomic64_inc_return(&xpc_heartbeat_uv);
831
832 partid = find_first_bit(xpc_heartbeating_to_mask_uv,
833 XP_MAX_NPARTITIONS_UV);
834
835 while (partid < XP_MAX_NPARTITIONS_UV) {
836 part = &xpc_partitions[partid];
837 788
838 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), 789static void
839 msg_type); 790xpc_disallow_hb_uv(short partid)
791{
792}
840 793
841 partid = find_next_bit(xpc_heartbeating_to_mask_uv, 794static void
842 XP_MAX_NPARTITIONS_UV, partid + 1); 795xpc_disallow_all_hbs_uv(void)
843 } 796{
844} 797}
845 798
846static void 799static void
847xpc_increment_heartbeat_uv(void) 800xpc_increment_heartbeat_uv(void)
848{ 801{
849 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_INC_HEARTBEAT_UV); 802 xpc_heartbeat_uv->value++;
850} 803}
851 804
852static void 805static void
853xpc_offline_heartbeat_uv(void) 806xpc_offline_heartbeat_uv(void)
854{ 807{
855 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); 808 xpc_increment_heartbeat_uv();
809 xpc_heartbeat_uv->offline = 1;
856} 810}
857 811
858static void 812static void
859xpc_online_heartbeat_uv(void) 813xpc_online_heartbeat_uv(void)
860{ 814{
861 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_ONLINE_HEARTBEAT_UV); 815 xpc_increment_heartbeat_uv();
816 xpc_heartbeat_uv->offline = 0;
862} 817}
863 818
864static void 819static void
865xpc_heartbeat_init_uv(void) 820xpc_heartbeat_init_uv(void)
866{ 821{
867 atomic64_set(&xpc_heartbeat_uv, 0); 822 xpc_heartbeat_uv->value = 1;
868 bitmap_zero(xpc_heartbeating_to_mask_uv, XP_MAX_NPARTITIONS_UV); 823 xpc_heartbeat_uv->offline = 0;
869 xpc_heartbeating_to_mask = &xpc_heartbeating_to_mask_uv[0];
870} 824}
871 825
872static void 826static void
873xpc_heartbeat_exit_uv(void) 827xpc_heartbeat_exit_uv(void)
874{ 828{
875 xpc_send_heartbeat_uv(XPC_ACTIVATE_MQ_MSG_OFFLINE_HEARTBEAT_UV); 829 xpc_offline_heartbeat_uv();
876} 830}
877 831
878static enum xp_retval 832static enum xp_retval
879xpc_get_remote_heartbeat_uv(struct xpc_partition *part) 833xpc_get_remote_heartbeat_uv(struct xpc_partition *part)
880{ 834{
881 struct xpc_partition_uv *part_uv = &part->sn.uv; 835 struct xpc_partition_uv *part_uv = &part->sn.uv;
882 enum xp_retval ret = xpNoHeartbeat; 836 enum xp_retval ret;
883 837
884 if (part_uv->remote_act_state != XPC_P_AS_INACTIVE && 838 ret = xp_remote_memcpy(uv_gpa(&part_uv->cached_heartbeat),
885 part_uv->remote_act_state != XPC_P_AS_DEACTIVATING) { 839 part_uv->heartbeat_gpa,
840 sizeof(struct xpc_heartbeat_uv));
841 if (ret != xpSuccess)
842 return ret;
886 843
887 if (part_uv->heartbeat != part->last_heartbeat || 844 if (part_uv->cached_heartbeat.value == part->last_heartbeat &&
888 (part_uv->flags & XPC_P_HEARTBEAT_OFFLINE_UV)) { 845 !part_uv->cached_heartbeat.offline) {
889 846
890 part->last_heartbeat = part_uv->heartbeat; 847 ret = xpNoHeartbeat;
891 ret = xpSuccess; 848 } else {
892 } 849 part->last_heartbeat = part_uv->cached_heartbeat.value;
893 } 850 }
894 return ret; 851 return ret;
895} 852}
@@ -904,8 +861,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
904 861
905 part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */ 862 part->remote_rp_pa = remote_rp_gpa; /* !!! _pa here is really _gpa */
906 part->remote_rp_ts_jiffies = remote_rp->ts_jiffies; 863 part->remote_rp_ts_jiffies = remote_rp->ts_jiffies;
864 part->sn.uv.heartbeat_gpa = remote_rp->sn.uv.heartbeat_gpa;
907 part->sn.uv.activate_gru_mq_desc_gpa = 865 part->sn.uv.activate_gru_mq_desc_gpa =
908 remote_rp->sn.activate_gru_mq_desc_gpa; 866 remote_rp->sn.uv.activate_gru_mq_desc_gpa;
909 867
910 /* 868 /*
911 * ??? Is it a good idea to make this conditional on what is 869 * ??? Is it a good idea to make this conditional on what is
@@ -913,8 +871,9 @@ xpc_request_partition_activation_uv(struct xpc_rsvd_page *remote_rp,
913 */ 871 */
914 if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) { 872 if (part->sn.uv.remote_act_state == XPC_P_AS_INACTIVE) {
915 msg.rp_gpa = uv_gpa(xpc_rsvd_page); 873 msg.rp_gpa = uv_gpa(xpc_rsvd_page);
874 msg.heartbeat_gpa = xpc_rsvd_page->sn.uv.heartbeat_gpa;
916 msg.activate_gru_mq_desc_gpa = 875 msg.activate_gru_mq_desc_gpa =
917 xpc_rsvd_page->sn.activate_gru_mq_desc_gpa; 876 xpc_rsvd_page->sn.uv.activate_gru_mq_desc_gpa;
918 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg), 877 xpc_send_activate_IRQ_part_uv(part, &msg, sizeof(msg),
919 XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV); 878 XPC_ACTIVATE_MQ_MSG_ACTIVATE_REQ_UV);
920 } 879 }
@@ -1677,6 +1636,10 @@ xpc_init_uv(void)
1677 xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv; 1636 xpc_process_activate_IRQ_rcvd = xpc_process_activate_IRQ_rcvd_uv;
1678 xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv; 1637 xpc_get_partition_rsvd_page_pa = xpc_get_partition_rsvd_page_pa_uv;
1679 xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv; 1638 xpc_setup_rsvd_page_sn = xpc_setup_rsvd_page_sn_uv;
1639
1640 xpc_allow_hb = xpc_allow_hb_uv;
1641 xpc_disallow_hb = xpc_disallow_hb_uv;
1642 xpc_disallow_all_hbs = xpc_disallow_all_hbs_uv;
1680 xpc_increment_heartbeat = xpc_increment_heartbeat_uv; 1643 xpc_increment_heartbeat = xpc_increment_heartbeat_uv;
1681 xpc_offline_heartbeat = xpc_offline_heartbeat_uv; 1644 xpc_offline_heartbeat = xpc_offline_heartbeat_uv;
1682 xpc_online_heartbeat = xpc_online_heartbeat_uv; 1645 xpc_online_heartbeat = xpc_online_heartbeat_uv;