aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDean Nelson <dcn@sgi.com>2005-11-09 15:41:57 -0500
committerTony Luck <tony.luck@intel.com>2005-11-10 14:32:41 -0500
commit780d09e895032207a6b070a44d392a3c60574b70 (patch)
tree63a01c3898bd6c20f9cce801a250e71dc47e351b
parentbaf47fb66020e5c3fe2386680fa2d79d1f8e0052 (diff)
[IA64] utilize notify_die() for XPC disengage
XPC (as in arch/ia64/sn/kernel/xp*) has a need to notify other partitions (SGI Altix) whenever a partition is going down in order to get them to disengage from accessing the halting partition's memory. If this is not done before the reset of the hardware, the other partitions can find themselves encountering MCAs that bring them down. Signed-off-by: Dean Nelson <dcn@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/sn/kernel/xpc.h2
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c102
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c8
3 files changed, 107 insertions, 5 deletions
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index fbcedc7c27fa..5483a9f227d4 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -163,7 +163,7 @@ struct xpc_vars {
163 u8 version; 163 u8 version;
164 u64 heartbeat; 164 u64 heartbeat;
165 u64 heartbeating_to_mask; 165 u64 heartbeating_to_mask;
166 u64 kdb_status; /* 0 = machine running */ 166 u64 heartbeat_offline; /* if 0, heartbeat should be changing */
167 int act_nasid; 167 int act_nasid;
168 int act_phys_cpuid; 168 int act_phys_cpuid;
169 u64 vars_part_pa; 169 u64 vars_part_pa;
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index cece3c7c69be..b617236524c6 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -57,6 +57,7 @@
57#include <linux/reboot.h> 57#include <linux/reboot.h>
58#include <asm/sn/intr.h> 58#include <asm/sn/intr.h>
59#include <asm/sn/sn_sal.h> 59#include <asm/sn/sn_sal.h>
60#include <asm/kdebug.h>
60#include <asm/uaccess.h> 61#include <asm/uaccess.h>
61#include "xpc.h" 62#include "xpc.h"
62 63
@@ -188,6 +189,11 @@ static struct notifier_block xpc_reboot_notifier = {
188 .notifier_call = xpc_system_reboot, 189 .notifier_call = xpc_system_reboot,
189}; 190};
190 191
192static int xpc_system_die(struct notifier_block *, unsigned long, void *);
193static struct notifier_block xpc_die_notifier = {
194 .notifier_call = xpc_system_die,
195};
196
191 197
192/* 198/*
193 * Timer function to enforce the timelimit on the partition disengage request. 199 * Timer function to enforce the timelimit on the partition disengage request.
@@ -997,6 +1003,9 @@ xpc_do_exit(enum xpc_retval reason)
997 /* take ourselves off of the reboot_notifier_list */ 1003 /* take ourselves off of the reboot_notifier_list */
998 (void) unregister_reboot_notifier(&xpc_reboot_notifier); 1004 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
999 1005
1006 /* take ourselves off of the die_notifier list */
1007 (void) unregister_die_notifier(&xpc_die_notifier);
1008
1000 /* close down protections for IPI operations */ 1009 /* close down protections for IPI operations */
1001 xpc_restrict_IPI_ops(); 1010 xpc_restrict_IPI_ops();
1002 1011
@@ -1011,6 +1020,63 @@ xpc_do_exit(enum xpc_retval reason)
1011 1020
1012 1021
1013/* 1022/*
1023 * Called when the system is about to be either restarted or halted.
1024 */
1025static void
1026xpc_die_disengage(void)
1027{
1028 struct xpc_partition *part;
1029 partid_t partid;
1030 unsigned long engaged;
1031 long time, print_time, disengage_request_timeout;
1032
1033
1034 /* keep xpc_hb_checker thread from doing anything (just in case) */
1035 xpc_exiting = 1;
1036
1037 xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */
1038
1039 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
1040 part = &xpc_partitions[partid];
1041
1042 if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part->
1043 remote_vars_version)) {
1044
1045 /* just in case it was left set by an earlier XPC */
1046 xpc_clear_partition_engaged(1UL << partid);
1047 continue;
1048 }
1049
1050 if (xpc_partition_engaged(1UL << partid) ||
1051 part->act_state != XPC_P_INACTIVE) {
1052 xpc_request_partition_disengage(part);
1053 xpc_mark_partition_disengaged(part);
1054 xpc_IPI_send_disengage(part);
1055 }
1056 }
1057
1058 print_time = rtc_time();
1059 disengage_request_timeout = print_time +
1060 (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second);
1061
1062 /* wait for all other partitions to disengage from us */
1063
1064 while ((engaged = xpc_partition_engaged(-1UL)) &&
1065 (time = rtc_time()) < disengage_request_timeout) {
1066
1067 if (time >= print_time) {
1068 dev_info(xpc_part, "waiting for remote partitions to "
1069 "disengage, engaged=0x%lx\n", engaged);
1070 print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL *
1071 sn_rtc_cycles_per_second);
1072 }
1073 }
1074 dev_info(xpc_part, "finished waiting for remote partitions to "
1075 "disengage, engaged=0x%lx\n", engaged);
1076}
1077
1078
1079/*
1014 * This function is called when the system is being rebooted. 1080 * This function is called when the system is being rebooted.
1015 */ 1081 */
1016static int 1082static int
@@ -1038,6 +1104,33 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
1038} 1104}
1039 1105
1040 1106
1107/*
1108 * This function is called when the system is being rebooted.
1109 */
1110static int
1111xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused)
1112{
1113 switch (event) {
1114 case DIE_MACHINE_RESTART:
1115 case DIE_MACHINE_HALT:
1116 xpc_die_disengage();
1117 break;
1118 case DIE_MCA_MONARCH_ENTER:
1119 case DIE_INIT_MONARCH_ENTER:
1120 xpc_vars->heartbeat++;
1121 xpc_vars->heartbeat_offline = 1;
1122 break;
1123 case DIE_MCA_MONARCH_LEAVE:
1124 case DIE_INIT_MONARCH_LEAVE:
1125 xpc_vars->heartbeat++;
1126 xpc_vars->heartbeat_offline = 0;
1127 break;
1128 }
1129
1130 return NOTIFY_DONE;
1131}
1132
1133
1041int __init 1134int __init
1042xpc_init(void) 1135xpc_init(void)
1043{ 1136{
@@ -1154,6 +1247,12 @@ xpc_init(void)
1154 dev_warn(xpc_part, "can't register reboot notifier\n"); 1247 dev_warn(xpc_part, "can't register reboot notifier\n");
1155 } 1248 }
1156 1249
1250 /* add ourselves to the die_notifier list (i.e., ia64die_chain) */
1251 ret = register_die_notifier(&xpc_die_notifier);
1252 if (ret != 0) {
1253 dev_warn(xpc_part, "can't register die notifier\n");
1254 }
1255
1157 1256
1158 /* 1257 /*
1159 * Set the beating to other partitions into motion. This is 1258 * Set the beating to other partitions into motion. This is
@@ -1179,6 +1278,9 @@ xpc_init(void)
1179 /* take ourselves off of the reboot_notifier_list */ 1278 /* take ourselves off of the reboot_notifier_list */
1180 (void) unregister_reboot_notifier(&xpc_reboot_notifier); 1279 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
1181 1280
1281 /* take ourselves off of the die_notifier list */
1282 (void) unregister_die_notifier(&xpc_die_notifier);
1283
1182 del_timer_sync(&xpc_hb_timer); 1284 del_timer_sync(&xpc_hb_timer);
1183 free_irq(SGI_XPC_ACTIVATE, NULL); 1285 free_irq(SGI_XPC_ACTIVATE, NULL);
1184 xpc_restrict_IPI_ops(); 1286 xpc_restrict_IPI_ops();
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 581e113d2d37..cdd6431853a1 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -436,13 +436,13 @@ xpc_check_remote_hb(void)
436 } 436 }
437 437
438 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" 438 dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat"
439 " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid, 439 " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n",
440 remote_vars->heartbeat, part->last_heartbeat, 440 partid, remote_vars->heartbeat, part->last_heartbeat,
441 remote_vars->kdb_status, 441 remote_vars->heartbeat_offline,
442 remote_vars->heartbeating_to_mask); 442 remote_vars->heartbeating_to_mask);
443 443
444 if (((remote_vars->heartbeat == part->last_heartbeat) && 444 if (((remote_vars->heartbeat == part->last_heartbeat) &&
445 (remote_vars->kdb_status == 0)) || 445 (remote_vars->heartbeat_offline == 0)) ||
446 !xpc_hb_allowed(sn_partition_id, remote_vars)) { 446 !xpc_hb_allowed(sn_partition_id, remote_vars)) {
447 447
448 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); 448 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);