diff options
author | Dean Nelson <dcn@sgi.com> | 2005-11-09 15:41:57 -0500 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2005-11-10 14:32:41 -0500 |
commit | 780d09e895032207a6b070a44d392a3c60574b70 (patch) | |
tree | 63a01c3898bd6c20f9cce801a250e71dc47e351b | |
parent | baf47fb66020e5c3fe2386680fa2d79d1f8e0052 (diff) |
[IA64] utilize notify_die() for XPC disengage
XPC (as in arch/ia64/sn/kernel/xp*) has a need to notify other partitions
(SGI Altix) whenever a partition is going down in order to get them to
disengage from accessing the halting partition's memory. If this is not
done before the reset of the hardware, the other partitions can find
themselves encountering MCAs that bring them down.
Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | arch/ia64/sn/kernel/xpc.h | 2 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc_main.c | 102 | ||||
-rw-r--r-- | arch/ia64/sn/kernel/xpc_partition.c | 8 |
3 files changed, 107 insertions, 5 deletions
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h index fbcedc7c27f..5483a9f227d 100644 --- a/arch/ia64/sn/kernel/xpc.h +++ b/arch/ia64/sn/kernel/xpc.h | |||
@@ -163,7 +163,7 @@ struct xpc_vars { | |||
163 | u8 version; | 163 | u8 version; |
164 | u64 heartbeat; | 164 | u64 heartbeat; |
165 | u64 heartbeating_to_mask; | 165 | u64 heartbeating_to_mask; |
166 | u64 kdb_status; /* 0 = machine running */ | 166 | u64 heartbeat_offline; /* if 0, heartbeat should be changing */ |
167 | int act_nasid; | 167 | int act_nasid; |
168 | int act_phys_cpuid; | 168 | int act_phys_cpuid; |
169 | u64 vars_part_pa; | 169 | u64 vars_part_pa; |
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c index cece3c7c69b..b617236524c 100644 --- a/arch/ia64/sn/kernel/xpc_main.c +++ b/arch/ia64/sn/kernel/xpc_main.c | |||
@@ -57,6 +57,7 @@ | |||
57 | #include <linux/reboot.h> | 57 | #include <linux/reboot.h> |
58 | #include <asm/sn/intr.h> | 58 | #include <asm/sn/intr.h> |
59 | #include <asm/sn/sn_sal.h> | 59 | #include <asm/sn/sn_sal.h> |
60 | #include <asm/kdebug.h> | ||
60 | #include <asm/uaccess.h> | 61 | #include <asm/uaccess.h> |
61 | #include "xpc.h" | 62 | #include "xpc.h" |
62 | 63 | ||
@@ -188,6 +189,11 @@ static struct notifier_block xpc_reboot_notifier = { | |||
188 | .notifier_call = xpc_system_reboot, | 189 | .notifier_call = xpc_system_reboot, |
189 | }; | 190 | }; |
190 | 191 | ||
192 | static int xpc_system_die(struct notifier_block *, unsigned long, void *); | ||
193 | static struct notifier_block xpc_die_notifier = { | ||
194 | .notifier_call = xpc_system_die, | ||
195 | }; | ||
196 | |||
191 | 197 | ||
192 | /* | 198 | /* |
193 | * Timer function to enforce the timelimit on the partition disengage request. | 199 | * Timer function to enforce the timelimit on the partition disengage request. |
@@ -997,6 +1003,9 @@ xpc_do_exit(enum xpc_retval reason) | |||
997 | /* take ourselves off of the reboot_notifier_list */ | 1003 | /* take ourselves off of the reboot_notifier_list */ |
998 | (void) unregister_reboot_notifier(&xpc_reboot_notifier); | 1004 | (void) unregister_reboot_notifier(&xpc_reboot_notifier); |
999 | 1005 | ||
1006 | /* take ourselves off of the die_notifier list */ | ||
1007 | (void) unregister_die_notifier(&xpc_die_notifier); | ||
1008 | |||
1000 | /* close down protections for IPI operations */ | 1009 | /* close down protections for IPI operations */ |
1001 | xpc_restrict_IPI_ops(); | 1010 | xpc_restrict_IPI_ops(); |
1002 | 1011 | ||
@@ -1011,6 +1020,63 @@ xpc_do_exit(enum xpc_retval reason) | |||
1011 | 1020 | ||
1012 | 1021 | ||
1013 | /* | 1022 | /* |
1023 | * Called when the system is about to be either restarted or halted. | ||
1024 | */ | ||
1025 | static void | ||
1026 | xpc_die_disengage(void) | ||
1027 | { | ||
1028 | struct xpc_partition *part; | ||
1029 | partid_t partid; | ||
1030 | unsigned long engaged; | ||
1031 | long time, print_time, disengage_request_timeout; | ||
1032 | |||
1033 | |||
1034 | /* keep xpc_hb_checker thread from doing anything (just in case) */ | ||
1035 | xpc_exiting = 1; | ||
1036 | |||
1037 | xpc_vars->heartbeating_to_mask = 0; /* indicate we're deactivated */ | ||
1038 | |||
1039 | for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { | ||
1040 | part = &xpc_partitions[partid]; | ||
1041 | |||
1042 | if (!XPC_SUPPORTS_DISENGAGE_REQUEST(part-> | ||
1043 | remote_vars_version)) { | ||
1044 | |||
1045 | /* just in case it was left set by an earlier XPC */ | ||
1046 | xpc_clear_partition_engaged(1UL << partid); | ||
1047 | continue; | ||
1048 | } | ||
1049 | |||
1050 | if (xpc_partition_engaged(1UL << partid) || | ||
1051 | part->act_state != XPC_P_INACTIVE) { | ||
1052 | xpc_request_partition_disengage(part); | ||
1053 | xpc_mark_partition_disengaged(part); | ||
1054 | xpc_IPI_send_disengage(part); | ||
1055 | } | ||
1056 | } | ||
1057 | |||
1058 | print_time = rtc_time(); | ||
1059 | disengage_request_timeout = print_time + | ||
1060 | (xpc_disengage_request_timelimit * sn_rtc_cycles_per_second); | ||
1061 | |||
1062 | /* wait for all other partitions to disengage from us */ | ||
1063 | |||
1064 | while ((engaged = xpc_partition_engaged(-1UL)) && | ||
1065 | (time = rtc_time()) < disengage_request_timeout) { | ||
1066 | |||
1067 | if (time >= print_time) { | ||
1068 | dev_info(xpc_part, "waiting for remote partitions to " | ||
1069 | "disengage, engaged=0x%lx\n", engaged); | ||
1070 | print_time = time + (XPC_DISENGAGE_PRINTMSG_INTERVAL * | ||
1071 | sn_rtc_cycles_per_second); | ||
1072 | } | ||
1073 | } | ||
1074 | dev_info(xpc_part, "finished waiting for remote partitions to " | ||
1075 | "disengage, engaged=0x%lx\n", engaged); | ||
1076 | } | ||
1077 | |||
1078 | |||
1079 | /* | ||
1014 | * This function is called when the system is being rebooted. | 1080 | * This function is called when the system is being rebooted. |
1015 | */ | 1081 | */ |
1016 | static int | 1082 | static int |
@@ -1038,6 +1104,33 @@ xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused) | |||
1038 | } | 1104 | } |
1039 | 1105 | ||
1040 | 1106 | ||
1107 | /* | ||
1108 | * This function is called when the system is being rebooted. | ||
1109 | */ | ||
1110 | static int | ||
1111 | xpc_system_die(struct notifier_block *nb, unsigned long event, void *unused) | ||
1112 | { | ||
1113 | switch (event) { | ||
1114 | case DIE_MACHINE_RESTART: | ||
1115 | case DIE_MACHINE_HALT: | ||
1116 | xpc_die_disengage(); | ||
1117 | break; | ||
1118 | case DIE_MCA_MONARCH_ENTER: | ||
1119 | case DIE_INIT_MONARCH_ENTER: | ||
1120 | xpc_vars->heartbeat++; | ||
1121 | xpc_vars->heartbeat_offline = 1; | ||
1122 | break; | ||
1123 | case DIE_MCA_MONARCH_LEAVE: | ||
1124 | case DIE_INIT_MONARCH_LEAVE: | ||
1125 | xpc_vars->heartbeat++; | ||
1126 | xpc_vars->heartbeat_offline = 0; | ||
1127 | break; | ||
1128 | } | ||
1129 | |||
1130 | return NOTIFY_DONE; | ||
1131 | } | ||
1132 | |||
1133 | |||
1041 | int __init | 1134 | int __init |
1042 | xpc_init(void) | 1135 | xpc_init(void) |
1043 | { | 1136 | { |
@@ -1154,6 +1247,12 @@ xpc_init(void) | |||
1154 | dev_warn(xpc_part, "can't register reboot notifier\n"); | 1247 | dev_warn(xpc_part, "can't register reboot notifier\n"); |
1155 | } | 1248 | } |
1156 | 1249 | ||
1250 | /* add ourselves to the die_notifier list (i.e., ia64die_chain) */ | ||
1251 | ret = register_die_notifier(&xpc_die_notifier); | ||
1252 | if (ret != 0) { | ||
1253 | dev_warn(xpc_part, "can't register die notifier\n"); | ||
1254 | } | ||
1255 | |||
1157 | 1256 | ||
1158 | /* | 1257 | /* |
1159 | * Set the beating to other partitions into motion. This is | 1258 | * Set the beating to other partitions into motion. This is |
@@ -1179,6 +1278,9 @@ xpc_init(void) | |||
1179 | /* take ourselves off of the reboot_notifier_list */ | 1278 | /* take ourselves off of the reboot_notifier_list */ |
1180 | (void) unregister_reboot_notifier(&xpc_reboot_notifier); | 1279 | (void) unregister_reboot_notifier(&xpc_reboot_notifier); |
1181 | 1280 | ||
1281 | /* take ourselves off of the die_notifier list */ | ||
1282 | (void) unregister_die_notifier(&xpc_die_notifier); | ||
1283 | |||
1182 | del_timer_sync(&xpc_hb_timer); | 1284 | del_timer_sync(&xpc_hb_timer); |
1183 | free_irq(SGI_XPC_ACTIVATE, NULL); | 1285 | free_irq(SGI_XPC_ACTIVATE, NULL); |
1184 | xpc_restrict_IPI_ops(); | 1286 | xpc_restrict_IPI_ops(); |
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c index 581e113d2d3..cdd6431853a 100644 --- a/arch/ia64/sn/kernel/xpc_partition.c +++ b/arch/ia64/sn/kernel/xpc_partition.c | |||
@@ -436,13 +436,13 @@ xpc_check_remote_hb(void) | |||
436 | } | 436 | } |
437 | 437 | ||
438 | dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" | 438 | dev_dbg(xpc_part, "partid = %d, heartbeat = %ld, last_heartbeat" |
439 | " = %ld, kdb_status = %ld, HB_mask = 0x%lx\n", partid, | 439 | " = %ld, heartbeat_offline = %ld, HB_mask = 0x%lx\n", |
440 | remote_vars->heartbeat, part->last_heartbeat, | 440 | partid, remote_vars->heartbeat, part->last_heartbeat, |
441 | remote_vars->kdb_status, | 441 | remote_vars->heartbeat_offline, |
442 | remote_vars->heartbeating_to_mask); | 442 | remote_vars->heartbeating_to_mask); |
443 | 443 | ||
444 | if (((remote_vars->heartbeat == part->last_heartbeat) && | 444 | if (((remote_vars->heartbeat == part->last_heartbeat) && |
445 | (remote_vars->kdb_status == 0)) || | 445 | (remote_vars->heartbeat_offline == 0)) || |
446 | !xpc_hb_allowed(sn_partition_id, remote_vars)) { | 446 | !xpc_hb_allowed(sn_partition_id, remote_vars)) { |
447 | 447 | ||
448 | XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); | 448 | XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); |