aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDean Nelson <dcn@sgi.com>2005-09-01 15:01:37 -0400
committerTony Luck <tony.luck@intel.com>2005-09-06 19:15:38 -0400
commita607c38971fd078865fa9bef39e6c1d4435680c8 (patch)
treecb7853f0d74ee6a9cd92ccc721096b57367d0390 /arch
parent4706df3d3c42af802597d82c8b1542c3d52eab23 (diff)
[IA64-SGI] get XPC to cleanly disengage from remote memory references
When XPC is being shutdown (i.e., rmmod, reboot) it doesn't ensure that other partitions with whom it was connected have completely disengaged from any attempt at cross-partition memory references. This can lead to MCAs in any of these other partitions when the partition is reset. Signed-off-by: Dean Nelson <dcn@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/ia64/sn/kernel/xpc.h288
-rw-r--r--arch/ia64/sn/kernel/xpc_channel.c216
-rw-r--r--arch/ia64/sn/kernel/xpc_main.c242
-rw-r--r--arch/ia64/sn/kernel/xpc_partition.c304
4 files changed, 813 insertions, 237 deletions
diff --git a/arch/ia64/sn/kernel/xpc.h b/arch/ia64/sn/kernel/xpc.h
index d0ee635daf2e..565822ab3d08 100644
--- a/arch/ia64/sn/kernel/xpc.h
+++ b/arch/ia64/sn/kernel/xpc.h
@@ -57,7 +57,7 @@
57#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2) 57#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
58 58
59#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */ 59#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
60#define XPC_HB_CHECK_DEFAULT_TIMEOUT 20 /* check HB every x secs */ 60#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
61 61
62/* define the process name of HB checker and the CPU it is pinned to */ 62/* define the process name of HB checker and the CPU it is pinned to */
63#define XPC_HB_CHECK_THREAD_NAME "xpc_hb" 63#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
@@ -67,11 +67,6 @@
67#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery" 67#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
68 68
69 69
70#define XPC_HB_ALLOWED(_p, _v) ((_v)->heartbeating_to_mask & (1UL << (_p)))
71#define XPC_ALLOW_HB(_p, _v) (_v)->heartbeating_to_mask |= (1UL << (_p))
72#define XPC_DISALLOW_HB(_p, _v) (_v)->heartbeating_to_mask &= (~(1UL << (_p)))
73
74
75/* 70/*
76 * Reserved Page provided by SAL. 71 * Reserved Page provided by SAL.
77 * 72 *
@@ -88,14 +83,38 @@ struct xpc_rsvd_page {
88 u8 version; 83 u8 version;
89 u8 pad[6]; /* pad to u64 align */ 84 u8 pad[6]; /* pad to u64 align */
90 volatile u64 vars_pa; 85 volatile u64 vars_pa;
86 struct timespec stamp; /* time when reserved page was initialized */
91 u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; 87 u64 part_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
92 u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned; 88 u64 mach_nasids[XP_NASID_MASK_WORDS] ____cacheline_aligned;
93}; 89};
94#define XPC_RP_VERSION _XPC_VERSION(1,0) /* version 1.0 of the reserved page */
95 90
96#define XPC_RSVD_PAGE_ALIGNED_SIZE \ 91#define XPC_RSVD_PAGE_ALIGNED_SIZE \
97 (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))) 92 (L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page)))
98 93
94#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
95
96#define XPC_SUPPORTS_RP_STAMP(_version) \
97 (_version >= _XPC_VERSION(1,1))
98
99/*
100 * compare stamps - the return value is:
101 *
102 * < 0, if stamp1 < stamp2
103 * = 0, if stamp1 == stamp2
104 * > 0, if stamp1 > stamp2
105 */
106static inline int
107xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
108{
109 int ret;
110
111
112 if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
113 ret = stamp1->tv_nsec - stamp2->tv_nsec;
114 }
115 return ret;
116}
117
99 118
100/* 119/*
101 * Define the structures by which XPC variables can be exported to other 120 * Define the structures by which XPC variables can be exported to other
@@ -121,12 +140,61 @@ struct xpc_vars {
121 u64 vars_part_pa; 140 u64 vars_part_pa;
122 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */ 141 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
123 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */ 142 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
124 AMO_t *act_amos; /* pointer to the first activation AMO */
125}; 143};
126#define XPC_V_VERSION _XPC_VERSION(3,0) /* version 3.0 of the cross vars */
127 144
128#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars))) 145#define XPC_VARS_ALIGNED_SIZE (L1_CACHE_ALIGN(sizeof(struct xpc_vars)))
129 146
147#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
148
149#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
150 (_version >= _XPC_VERSION(3,1))
151
152
153static inline int
154xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
155{
156 return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
157}
158
159static inline void
160xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
161{
162 u64 old_mask, new_mask;
163
164 do {
165 old_mask = vars->heartbeating_to_mask;
166 new_mask = (old_mask | (1UL << partid));
167 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
168 old_mask);
169}
170
171static inline void
172xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
173{
174 u64 old_mask, new_mask;
175
176 do {
177 old_mask = vars->heartbeating_to_mask;
178 new_mask = (old_mask & ~(1UL << partid));
179 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
180 old_mask);
181}
182
183
184/*
185 * The AMOs page consists of a number of AMO variables which are divided into
186 * four groups, The first two groups are used to identify an IRQ's sender.
187 * These two groups consist of 64 and 16 AMO variables respectively. The last
188 * two groups, consisting of just one AMO variable each, are used to identify
189 * the remote partitions that are currently engaged (from the viewpoint of
190 * the XPC running on the remote partition).
191 */
192#define XPC_NOTIFY_IRQ_AMOS 0
193#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
194#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
195#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
196
197
130/* 198/*
131 * The following structure describes the per partition specific variables. 199 * The following structure describes the per partition specific variables.
132 * 200 *
@@ -358,7 +426,7 @@ struct xpc_channel {
358 void *key; /* pointer to user's key */ 426 void *key; /* pointer to user's key */
359 427
360 struct semaphore msg_to_pull_sema; /* next msg to pull serialization */ 428 struct semaphore msg_to_pull_sema; /* next msg to pull serialization */
361 struct semaphore teardown_sema; /* wait for teardown completion */ 429 struct semaphore wdisconnect_sema; /* wait for channel disconnect */
362 430
363 struct xpc_openclose_args *local_openclose_args; /* args passed on */ 431 struct xpc_openclose_args *local_openclose_args; /* args passed on */
364 /* opening or closing of channel */ 432 /* opening or closing of channel */
@@ -410,6 +478,7 @@ struct xpc_channel {
410 478
411#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */ 479#define XPC_C_DISCONNECTED 0x00002000 /* channel is disconnected */
412#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */ 480#define XPC_C_DISCONNECTING 0x00004000 /* channel is being disconnected */
481#define XPC_C_WDISCONNECT 0x00008000 /* waiting for channel disconnect */
413 482
414 483
415 484
@@ -422,6 +491,8 @@ struct xpc_partition {
422 491
423 /* XPC HB infrastructure */ 492 /* XPC HB infrastructure */
424 493
494 u8 remote_rp_version; /* version# of partition's rsvd pg */
495 struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
425 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */ 496 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
426 u64 remote_vars_pa; /* phys addr of partition's vars */ 497 u64 remote_vars_pa; /* phys addr of partition's vars */
427 u64 remote_vars_part_pa; /* phys addr of partition's vars part */ 498 u64 remote_vars_part_pa; /* phys addr of partition's vars part */
@@ -432,10 +503,14 @@ struct xpc_partition {
432 u32 act_IRQ_rcvd; /* IRQs since activation */ 503 u32 act_IRQ_rcvd; /* IRQs since activation */
433 spinlock_t act_lock; /* protect updating of act_state */ 504 spinlock_t act_lock; /* protect updating of act_state */
434 u8 act_state; /* from XPC HB viewpoint */ 505 u8 act_state; /* from XPC HB viewpoint */
506 u8 remote_vars_version; /* version# of partition's vars */
435 enum xpc_retval reason; /* reason partition is deactivating */ 507 enum xpc_retval reason; /* reason partition is deactivating */
436 int reason_line; /* line# deactivation initiated from */ 508 int reason_line; /* line# deactivation initiated from */
437 int reactivate_nasid; /* nasid in partition to reactivate */ 509 int reactivate_nasid; /* nasid in partition to reactivate */
438 510
511 unsigned long disengage_request_timeout; /* timeout in XPC_TICKS */
512 struct timer_list disengage_request_timer;
513
439 514
440 /* XPC infrastructure referencing and teardown control */ 515 /* XPC infrastructure referencing and teardown control */
441 516
@@ -454,6 +529,7 @@ struct xpc_partition {
454 529
455 u8 nchannels; /* #of defined channels supported */ 530 u8 nchannels; /* #of defined channels supported */
456 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */ 531 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
532 atomic_t nchannels_engaged;/* #of channels engaged with remote part */
457 struct xpc_channel *channels;/* array of channel structures */ 533 struct xpc_channel *channels;/* array of channel structures */
458 534
459 void *local_GPs_base; /* base address of kmalloc'd space */ 535 void *local_GPs_base; /* base address of kmalloc'd space */
@@ -518,6 +594,7 @@ struct xpc_partition {
518#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */ 594#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
519 595
520 596
597
521/* 598/*
522 * struct xpc_partition IPI_timer #of seconds to wait before checking for 599 * struct xpc_partition IPI_timer #of seconds to wait before checking for
523 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until 600 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
@@ -526,6 +603,13 @@ struct xpc_partition {
526#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ) 603#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
527 604
528 605
606/* number of seconds to wait for other partitions to disengage */
607#define XPC_DISENGAGE_REQUEST_TIMELIMIT 90
608
609/* interval in seconds to print 'waiting disengagement' messages */
610#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
611
612
529#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0])) 613#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
530 614
531 615
@@ -550,8 +634,6 @@ extern void xpc_activate_partition(struct xpc_partition *);
550 634
551/* found in xpc_partition.c */ 635/* found in xpc_partition.c */
552extern int xpc_exiting; 636extern int xpc_exiting;
553extern int xpc_hb_interval;
554extern int xpc_hb_check_interval;
555extern struct xpc_vars *xpc_vars; 637extern struct xpc_vars *xpc_vars;
556extern struct xpc_rsvd_page *xpc_rsvd_page; 638extern struct xpc_rsvd_page *xpc_rsvd_page;
557extern struct xpc_vars_part *xpc_vars_part; 639extern struct xpc_vars_part *xpc_vars_part;
@@ -561,6 +643,7 @@ extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
561extern void xpc_allow_IPI_ops(void); 643extern void xpc_allow_IPI_ops(void);
562extern void xpc_restrict_IPI_ops(void); 644extern void xpc_restrict_IPI_ops(void);
563extern int xpc_identify_act_IRQ_sender(void); 645extern int xpc_identify_act_IRQ_sender(void);
646extern int xpc_partition_disengaged(struct xpc_partition *);
564extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *); 647extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
565extern void xpc_mark_partition_inactive(struct xpc_partition *); 648extern void xpc_mark_partition_inactive(struct xpc_partition *);
566extern void xpc_discovery(void); 649extern void xpc_discovery(void);
@@ -585,8 +668,8 @@ extern void xpc_connected_callout(struct xpc_channel *);
585extern void xpc_deliver_msg(struct xpc_channel *); 668extern void xpc_deliver_msg(struct xpc_channel *);
586extern void xpc_disconnect_channel(const int, struct xpc_channel *, 669extern void xpc_disconnect_channel(const int, struct xpc_channel *,
587 enum xpc_retval, unsigned long *); 670 enum xpc_retval, unsigned long *);
588extern void xpc_disconnected_callout(struct xpc_channel *); 671extern void xpc_disconnecting_callout(struct xpc_channel *);
589extern void xpc_partition_down(struct xpc_partition *, enum xpc_retval); 672extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
590extern void xpc_teardown_infrastructure(struct xpc_partition *); 673extern void xpc_teardown_infrastructure(struct xpc_partition *);
591 674
592 675
@@ -674,6 +757,157 @@ xpc_part_ref(struct xpc_partition *part)
674 757
675 758
676/* 759/*
760 * This next set of inlines are used to keep track of when a partition is
761 * potentially engaged in accessing memory belonging to another partition.
762 */
763
764static inline void
765xpc_mark_partition_engaged(struct xpc_partition *part)
766{
767 unsigned long irq_flags;
768 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
769 (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
770
771
772 local_irq_save(irq_flags);
773
774 /* set bit corresponding to our partid in remote partition's AMO */
775 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
776 (1UL << sn_partition_id));
777 /*
778 * We must always use the nofault function regardless of whether we
779 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
780 * didn't, we'd never know that the other partition is down and would
781 * keep sending IPIs and AMOs to it until the heartbeat times out.
782 */
783 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
784 variable), xp_nofault_PIOR_target));
785
786 local_irq_restore(irq_flags);
787}
788
789static inline void
790xpc_mark_partition_disengaged(struct xpc_partition *part)
791{
792 unsigned long irq_flags;
793 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
794 (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
795
796
797 local_irq_save(irq_flags);
798
799 /* clear bit corresponding to our partid in remote partition's AMO */
800 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
801 ~(1UL << sn_partition_id));
802 /*
803 * We must always use the nofault function regardless of whether we
804 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
805 * didn't, we'd never know that the other partition is down and would
806 * keep sending IPIs and AMOs to it until the heartbeat times out.
807 */
808 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
809 variable), xp_nofault_PIOR_target));
810
811 local_irq_restore(irq_flags);
812}
813
814static inline void
815xpc_request_partition_disengage(struct xpc_partition *part)
816{
817 unsigned long irq_flags;
818 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
819 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
820
821
822 local_irq_save(irq_flags);
823
824 /* set bit corresponding to our partid in remote partition's AMO */
825 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
826 (1UL << sn_partition_id));
827 /*
828 * We must always use the nofault function regardless of whether we
829 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
830 * didn't, we'd never know that the other partition is down and would
831 * keep sending IPIs and AMOs to it until the heartbeat times out.
832 */
833 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
834 variable), xp_nofault_PIOR_target));
835
836 local_irq_restore(irq_flags);
837}
838
839static inline void
840xpc_cancel_partition_disengage_request(struct xpc_partition *part)
841{
842 unsigned long irq_flags;
843 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
844 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
845
846
847 local_irq_save(irq_flags);
848
849 /* clear bit corresponding to our partid in remote partition's AMO */
850 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
851 ~(1UL << sn_partition_id));
852 /*
853 * We must always use the nofault function regardless of whether we
854 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
855 * didn't, we'd never know that the other partition is down and would
856 * keep sending IPIs and AMOs to it until the heartbeat times out.
857 */
858 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
859 variable), xp_nofault_PIOR_target));
860
861 local_irq_restore(irq_flags);
862}
863
864static inline u64
865xpc_partition_engaged(u64 partid_mask)
866{
867 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
868
869
870 /* return our partition's AMO variable ANDed with partid_mask */
871 return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
872 partid_mask);
873}
874
875static inline u64
876xpc_partition_disengage_requested(u64 partid_mask)
877{
878 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
879
880
881 /* return our partition's AMO variable ANDed with partid_mask */
882 return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
883 partid_mask);
884}
885
886static inline void
887xpc_clear_partition_engaged(u64 partid_mask)
888{
889 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
890
891
892 /* clear bit(s) based on partid_mask in our partition's AMO */
893 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
894 ~partid_mask);
895}
896
897static inline void
898xpc_clear_partition_disengage_request(u64 partid_mask)
899{
900 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
901
902
903 /* clear bit(s) based on partid_mask in our partition's AMO */
904 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
905 ~partid_mask);
906}
907
908
909
910/*
677 * The following set of macros and inlines are used for the sending and 911 * The following set of macros and inlines are used for the sending and
678 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs, 912 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
679 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and 913 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
@@ -722,13 +956,13 @@ xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
722 * Flag the appropriate AMO variable and send an IPI to the specified node. 956 * Flag the appropriate AMO variable and send an IPI to the specified node.
723 */ 957 */
724static inline void 958static inline void
725xpc_activate_IRQ_send(u64 amos_page, int from_nasid, int to_nasid, 959xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
726 int to_phys_cpuid) 960 int to_phys_cpuid)
727{ 961{
728 int w_index = XPC_NASID_W_INDEX(from_nasid); 962 int w_index = XPC_NASID_W_INDEX(from_nasid);
729 int b_index = XPC_NASID_B_INDEX(from_nasid); 963 int b_index = XPC_NASID_B_INDEX(from_nasid);
730 AMO_t *amos = (AMO_t *) __va(amos_page + 964 AMO_t *amos = (AMO_t *) __va(amos_page_pa +
731 (XP_MAX_PARTITIONS * sizeof(AMO_t))); 965 (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
732 966
733 967
734 (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid, 968 (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
@@ -756,6 +990,13 @@ xpc_IPI_send_reactivate(struct xpc_partition *part)
756 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid); 990 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
757} 991}
758 992
993static inline void
994xpc_IPI_send_disengage(struct xpc_partition *part)
995{
996 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
997 part->remote_act_nasid, part->remote_act_phys_cpuid);
998}
999
759 1000
760/* 1001/*
761 * IPIs associated with SGI_XPC_NOTIFY IRQ. 1002 * IPIs associated with SGI_XPC_NOTIFY IRQ.
@@ -903,17 +1144,18 @@ xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
903 * cacheable mapping for the entire region. This will prevent speculative 1144 * cacheable mapping for the entire region. This will prevent speculative
904 * reading of cached copies of our lines from being issued which will cause 1145 * reading of cached copies of our lines from being issued which will cause
905 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64 1146 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
906 * (XP_MAX_PARTITIONS) AMO variables for message notification (xpc_main.c) 1147 * (XP_MAX_PARTITIONS) AMO variables for message notification and an
907 * and an additional 16 AMO variables for partition activation (xpc_hb.c). 1148 * additional 16 (XP_NASID_MASK_WORDS) AMO variables for partition activation
1149 * and 2 AMO variables for partition deactivation.
908 */ 1150 */
909static inline AMO_t * 1151static inline AMO_t *
910xpc_IPI_init(partid_t partid) 1152xpc_IPI_init(int index)
911{ 1153{
912 AMO_t *part_amo = xpc_vars->amos_page + partid; 1154 AMO_t *amo = xpc_vars->amos_page + index;
913 1155
914 1156
915 xpc_IPI_receive(part_amo); 1157 (void) xpc_IPI_receive(amo); /* clear AMO variable */
916 return part_amo; 1158 return amo;
917} 1159}
918 1160
919 1161
diff --git a/arch/ia64/sn/kernel/xpc_channel.c b/arch/ia64/sn/kernel/xpc_channel.c
index 94698bea7be0..195ac1b8e262 100644
--- a/arch/ia64/sn/kernel/xpc_channel.c
+++ b/arch/ia64/sn/kernel/xpc_channel.c
@@ -57,6 +57,7 @@ xpc_initialize_channels(struct xpc_partition *part, partid_t partid)
57 57
58 spin_lock_init(&ch->lock); 58 spin_lock_init(&ch->lock);
59 sema_init(&ch->msg_to_pull_sema, 1); /* mutex */ 59 sema_init(&ch->msg_to_pull_sema, 1); /* mutex */
60 sema_init(&ch->wdisconnect_sema, 0); /* event wait */
60 61
61 atomic_set(&ch->n_on_msg_allocate_wq, 0); 62 atomic_set(&ch->n_on_msg_allocate_wq, 0);
62 init_waitqueue_head(&ch->msg_allocate_wq); 63 init_waitqueue_head(&ch->msg_allocate_wq);
@@ -166,6 +167,7 @@ xpc_setup_infrastructure(struct xpc_partition *part)
166 xpc_initialize_channels(part, partid); 167 xpc_initialize_channels(part, partid);
167 168
168 atomic_set(&part->nchannels_active, 0); 169 atomic_set(&part->nchannels_active, 0);
170 atomic_set(&part->nchannels_engaged, 0);
169 171
170 172
171 /* local_IPI_amo were set to 0 by an earlier memset() */ 173 /* local_IPI_amo were set to 0 by an earlier memset() */
@@ -555,8 +557,6 @@ xpc_allocate_msgqueues(struct xpc_channel *ch)
555 sema_init(&ch->notify_queue[i].sema, 0); 557 sema_init(&ch->notify_queue[i].sema, 0);
556 } 558 }
557 559
558 sema_init(&ch->teardown_sema, 0); /* event wait */
559
560 spin_lock_irqsave(&ch->lock, irq_flags); 560 spin_lock_irqsave(&ch->lock, irq_flags);
561 ch->flags |= XPC_C_SETUP; 561 ch->flags |= XPC_C_SETUP;
562 spin_unlock_irqrestore(&ch->lock, irq_flags); 562 spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -626,6 +626,55 @@ xpc_process_connect(struct xpc_channel *ch, unsigned long *irq_flags)
626 626
627 627
628/* 628/*
629 * Notify those who wanted to be notified upon delivery of their message.
630 */
631static void
632xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
633{
634 struct xpc_notify *notify;
635 u8 notify_type;
636 s64 get = ch->w_remote_GP.get - 1;
637
638
639 while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
640
641 notify = &ch->notify_queue[get % ch->local_nentries];
642
643 /*
644 * See if the notify entry indicates it was associated with
645 * a message who's sender wants to be notified. It is possible
646 * that it is, but someone else is doing or has done the
647 * notification.
648 */
649 notify_type = notify->type;
650 if (notify_type == 0 ||
651 cmpxchg(&notify->type, notify_type, 0) !=
652 notify_type) {
653 continue;
654 }
655
656 DBUG_ON(notify_type != XPC_N_CALL);
657
658 atomic_dec(&ch->n_to_notify);
659
660 if (notify->func != NULL) {
661 dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
662 "msg_number=%ld, partid=%d, channel=%d\n",
663 (void *) notify, get, ch->partid, ch->number);
664
665 notify->func(reason, ch->partid, ch->number,
666 notify->key);
667
668 dev_dbg(xpc_chan, "notify->func() returned, "
669 "notify=0x%p, msg_number=%ld, partid=%d, "
670 "channel=%d\n", (void *) notify, get,
671 ch->partid, ch->number);
672 }
673 }
674}
675
676
677/*
629 * Free up message queues and other stuff that were allocated for the specified 678 * Free up message queues and other stuff that were allocated for the specified
630 * channel. 679 * channel.
631 * 680 *
@@ -669,9 +718,6 @@ xpc_free_msgqueues(struct xpc_channel *ch)
669 ch->remote_msgqueue = NULL; 718 ch->remote_msgqueue = NULL;
670 kfree(ch->notify_queue); 719 kfree(ch->notify_queue);
671 ch->notify_queue = NULL; 720 ch->notify_queue = NULL;
672
673 /* in case someone is waiting for the teardown to complete */
674 up(&ch->teardown_sema);
675 } 721 }
676} 722}
677 723
@@ -683,7 +729,7 @@ static void
683xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags) 729xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
684{ 730{
685 struct xpc_partition *part = &xpc_partitions[ch->partid]; 731 struct xpc_partition *part = &xpc_partitions[ch->partid];
686 u32 ch_flags = ch->flags; 732 u32 channel_was_connected = (ch->flags & XPC_C_WASCONNECTED);
687 733
688 734
689 DBUG_ON(!spin_is_locked(&ch->lock)); 735 DBUG_ON(!spin_is_locked(&ch->lock));
@@ -701,12 +747,13 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
701 } 747 }
702 DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); 748 DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0);
703 749
704 /* it's now safe to free the channel's message queues */ 750 if (part->act_state == XPC_P_DEACTIVATING) {
705 751 /* can't proceed until the other side disengages from us */
706 xpc_free_msgqueues(ch); 752 if (xpc_partition_engaged(1UL << ch->partid)) {
707 DBUG_ON(ch->flags & XPC_C_SETUP); 753 return;
754 }
708 755
709 if (part->act_state != XPC_P_DEACTIVATING) { 756 } else {
710 757
711 /* as long as the other side is up do the full protocol */ 758 /* as long as the other side is up do the full protocol */
712 759
@@ -724,16 +771,33 @@ xpc_process_disconnect(struct xpc_channel *ch, unsigned long *irq_flags)
724 } 771 }
725 } 772 }
726 773
774 /* wake those waiting for notify completion */
775 if (atomic_read(&ch->n_to_notify) > 0) {
776 /* >>> we do callout while holding ch->lock */
777 xpc_notify_senders(ch, ch->reason, ch->w_local_GP.put);
778 }
779
727 /* both sides are disconnected now */ 780 /* both sides are disconnected now */
728 781
729 ch->flags = XPC_C_DISCONNECTED; /* clear all flags, but this one */ 782 /* it's now safe to free the channel's message queues */
783 xpc_free_msgqueues(ch);
784
785 /* mark disconnected, clear all other flags except XPC_C_WDISCONNECT */
786 ch->flags = (XPC_C_DISCONNECTED | (ch->flags & XPC_C_WDISCONNECT));
730 787
731 atomic_dec(&part->nchannels_active); 788 atomic_dec(&part->nchannels_active);
732 789
733 if (ch_flags & XPC_C_WASCONNECTED) { 790 if (channel_was_connected) {
734 dev_info(xpc_chan, "channel %d to partition %d disconnected, " 791 dev_info(xpc_chan, "channel %d to partition %d disconnected, "
735 "reason=%d\n", ch->number, ch->partid, ch->reason); 792 "reason=%d\n", ch->number, ch->partid, ch->reason);
736 } 793 }
794
795 /* wake the thread that is waiting for this channel to disconnect */
796 if (ch->flags & XPC_C_WDISCONNECT) {
797 spin_unlock_irqrestore(&ch->lock, *irq_flags);
798 up(&ch->wdisconnect_sema);
799 spin_lock_irqsave(&ch->lock, *irq_flags);
800 }
737} 801}
738 802
739 803
@@ -764,7 +828,7 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
764 /* 828 /*
765 * If RCLOSEREQUEST is set, we're probably waiting for 829 * If RCLOSEREQUEST is set, we're probably waiting for
766 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed 830 * RCLOSEREPLY. We should find it and a ROPENREQUEST packed
767 * with this RCLOSEQREUQEST in the IPI_flags. 831 * with this RCLOSEREQUEST in the IPI_flags.
768 */ 832 */
769 833
770 if (ch->flags & XPC_C_RCLOSEREQUEST) { 834 if (ch->flags & XPC_C_RCLOSEREQUEST) {
@@ -852,7 +916,7 @@ xpc_process_openclose_IPI(struct xpc_partition *part, int ch_number,
852 "channel=%d\n", args->msg_size, args->local_nentries, 916 "channel=%d\n", args->msg_size, args->local_nentries,
853 ch->partid, ch->number); 917 ch->partid, ch->number);
854 918
855 if ((ch->flags & XPC_C_DISCONNECTING) || 919 if ((ch->flags & (XPC_C_DISCONNECTING | XPC_C_WDISCONNECT)) ||
856 part->act_state == XPC_P_DEACTIVATING) { 920 part->act_state == XPC_P_DEACTIVATING) {
857 spin_unlock_irqrestore(&ch->lock, irq_flags); 921 spin_unlock_irqrestore(&ch->lock, irq_flags);
858 return; 922 return;
@@ -1040,55 +1104,6 @@ xpc_connect_channel(struct xpc_channel *ch)
1040 1104
1041 1105
1042/* 1106/*
1043 * Notify those who wanted to be notified upon delivery of their message.
1044 */
1045static void
1046xpc_notify_senders(struct xpc_channel *ch, enum xpc_retval reason, s64 put)
1047{
1048 struct xpc_notify *notify;
1049 u8 notify_type;
1050 s64 get = ch->w_remote_GP.get - 1;
1051
1052
1053 while (++get < put && atomic_read(&ch->n_to_notify) > 0) {
1054
1055 notify = &ch->notify_queue[get % ch->local_nentries];
1056
1057 /*
1058 * See if the notify entry indicates it was associated with
1059 * a message who's sender wants to be notified. It is possible
1060 * that it is, but someone else is doing or has done the
1061 * notification.
1062 */
1063 notify_type = notify->type;
1064 if (notify_type == 0 ||
1065 cmpxchg(&notify->type, notify_type, 0) !=
1066 notify_type) {
1067 continue;
1068 }
1069
1070 DBUG_ON(notify_type != XPC_N_CALL);
1071
1072 atomic_dec(&ch->n_to_notify);
1073
1074 if (notify->func != NULL) {
1075 dev_dbg(xpc_chan, "notify->func() called, notify=0x%p, "
1076 "msg_number=%ld, partid=%d, channel=%d\n",
1077 (void *) notify, get, ch->partid, ch->number);
1078
1079 notify->func(reason, ch->partid, ch->number,
1080 notify->key);
1081
1082 dev_dbg(xpc_chan, "notify->func() returned, "
1083 "notify=0x%p, msg_number=%ld, partid=%d, "
1084 "channel=%d\n", (void *) notify, get,
1085 ch->partid, ch->number);
1086 }
1087 }
1088}
1089
1090
1091/*
1092 * Clear some of the msg flags in the local message queue. 1107 * Clear some of the msg flags in the local message queue.
1093 */ 1108 */
1094static inline void 1109static inline void
@@ -1240,6 +1255,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
1240 u64 IPI_amo, IPI_flags; 1255 u64 IPI_amo, IPI_flags;
1241 struct xpc_channel *ch; 1256 struct xpc_channel *ch;
1242 int ch_number; 1257 int ch_number;
1258 u32 ch_flags;
1243 1259
1244 1260
1245 IPI_amo = xpc_get_IPI_flags(part); 1261 IPI_amo = xpc_get_IPI_flags(part);
@@ -1266,8 +1282,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
1266 xpc_process_openclose_IPI(part, ch_number, IPI_flags); 1282 xpc_process_openclose_IPI(part, ch_number, IPI_flags);
1267 } 1283 }
1268 1284
1285 ch_flags = ch->flags; /* need an atomic snapshot of flags */
1269 1286
1270 if (ch->flags & XPC_C_DISCONNECTING) { 1287 if (ch_flags & XPC_C_DISCONNECTING) {
1271 spin_lock_irqsave(&ch->lock, irq_flags); 1288 spin_lock_irqsave(&ch->lock, irq_flags);
1272 xpc_process_disconnect(ch, &irq_flags); 1289 xpc_process_disconnect(ch, &irq_flags);
1273 spin_unlock_irqrestore(&ch->lock, irq_flags); 1290 spin_unlock_irqrestore(&ch->lock, irq_flags);
@@ -1278,9 +1295,9 @@ xpc_process_channel_activity(struct xpc_partition *part)
1278 continue; 1295 continue;
1279 } 1296 }
1280 1297
1281 if (!(ch->flags & XPC_C_CONNECTED)) { 1298 if (!(ch_flags & XPC_C_CONNECTED)) {
1282 if (!(ch->flags & XPC_C_OPENREQUEST)) { 1299 if (!(ch_flags & XPC_C_OPENREQUEST)) {
1283 DBUG_ON(ch->flags & XPC_C_SETUP); 1300 DBUG_ON(ch_flags & XPC_C_SETUP);
1284 (void) xpc_connect_channel(ch); 1301 (void) xpc_connect_channel(ch);
1285 } else { 1302 } else {
1286 spin_lock_irqsave(&ch->lock, irq_flags); 1303 spin_lock_irqsave(&ch->lock, irq_flags);
@@ -1305,8 +1322,8 @@ xpc_process_channel_activity(struct xpc_partition *part)
1305 1322
1306 1323
1307/* 1324/*
1308 * XPC's heartbeat code calls this function to inform XPC that a partition has 1325 * XPC's heartbeat code calls this function to inform XPC that a partition is
1309 * gone down. XPC responds by tearing down the XPartition Communication 1326 * going down. XPC responds by tearing down the XPartition Communication
1310 * infrastructure used for the just downed partition. 1327 * infrastructure used for the just downed partition.
1311 * 1328 *
1312 * XPC's heartbeat code will never call this function and xpc_partition_up() 1329 * XPC's heartbeat code will never call this function and xpc_partition_up()
@@ -1314,7 +1331,7 @@ xpc_process_channel_activity(struct xpc_partition *part)
1314 * at the same time. 1331 * at the same time.
1315 */ 1332 */
1316void 1333void
1317xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason) 1334xpc_partition_going_down(struct xpc_partition *part, enum xpc_retval reason)
1318{ 1335{
1319 unsigned long irq_flags; 1336 unsigned long irq_flags;
1320 int ch_number; 1337 int ch_number;
@@ -1330,12 +1347,11 @@ xpc_partition_down(struct xpc_partition *part, enum xpc_retval reason)
1330 } 1347 }
1331 1348
1332 1349
1333 /* disconnect all channels associated with the downed partition */ 1350 /* disconnect channels associated with the partition going down */
1334 1351
1335 for (ch_number = 0; ch_number < part->nchannels; ch_number++) { 1352 for (ch_number = 0; ch_number < part->nchannels; ch_number++) {
1336 ch = &part->channels[ch_number]; 1353 ch = &part->channels[ch_number];
1337 1354
1338
1339 xpc_msgqueue_ref(ch); 1355 xpc_msgqueue_ref(ch);
1340 spin_lock_irqsave(&ch->lock, irq_flags); 1356 spin_lock_irqsave(&ch->lock, irq_flags);
1341 1357
@@ -1370,6 +1386,7 @@ xpc_teardown_infrastructure(struct xpc_partition *part)
1370 * this partition. 1386 * this partition.
1371 */ 1387 */
1372 1388
1389 DBUG_ON(atomic_read(&part->nchannels_engaged) != 0);
1373 DBUG_ON(atomic_read(&part->nchannels_active) != 0); 1390 DBUG_ON(atomic_read(&part->nchannels_active) != 0);
1374 DBUG_ON(part->setup_state != XPC_P_SETUP); 1391 DBUG_ON(part->setup_state != XPC_P_SETUP);
1375 part->setup_state = XPC_P_WTEARDOWN; 1392 part->setup_state = XPC_P_WTEARDOWN;
@@ -1506,8 +1523,12 @@ xpc_initiate_disconnect(int ch_number)
1506 1523
1507 spin_lock_irqsave(&ch->lock, irq_flags); 1524 spin_lock_irqsave(&ch->lock, irq_flags);
1508 1525
1509 XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering, 1526 if (!(ch->flags & XPC_C_DISCONNECTED)) {
1527 ch->flags |= XPC_C_WDISCONNECT;
1528
1529 XPC_DISCONNECT_CHANNEL(ch, xpcUnregistering,
1510 &irq_flags); 1530 &irq_flags);
1531 }
1511 1532
1512 spin_unlock_irqrestore(&ch->lock, irq_flags); 1533 spin_unlock_irqrestore(&ch->lock, irq_flags);
1513 1534
@@ -1523,8 +1544,9 @@ xpc_initiate_disconnect(int ch_number)
1523/* 1544/*
1524 * To disconnect a channel, and reflect it back to all who may be waiting. 1545 * To disconnect a channel, and reflect it back to all who may be waiting.
1525 * 1546 *
1526 * >>> An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by 1547 * An OPEN is not allowed until XPC_C_DISCONNECTING is cleared by
1527 * >>> xpc_free_msgqueues(). 1548 * xpc_process_disconnect(), and if set, XPC_C_WDISCONNECT is cleared by
1549 * xpc_disconnect_wait().
1528 * 1550 *
1529 * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN. 1551 * THE CHANNEL IS TO BE LOCKED BY THE CALLER AND WILL REMAIN LOCKED UPON RETURN.
1530 */ 1552 */
@@ -1532,7 +1554,7 @@ void
1532xpc_disconnect_channel(const int line, struct xpc_channel *ch, 1554xpc_disconnect_channel(const int line, struct xpc_channel *ch,
1533 enum xpc_retval reason, unsigned long *irq_flags) 1555 enum xpc_retval reason, unsigned long *irq_flags)
1534{ 1556{
1535 u32 flags; 1557 u32 channel_was_connected = (ch->flags & XPC_C_CONNECTED);
1536 1558
1537 1559
1538 DBUG_ON(!spin_is_locked(&ch->lock)); 1560 DBUG_ON(!spin_is_locked(&ch->lock));
@@ -1547,61 +1569,53 @@ xpc_disconnect_channel(const int line, struct xpc_channel *ch,
1547 1569
1548 XPC_SET_REASON(ch, reason, line); 1570 XPC_SET_REASON(ch, reason, line);
1549 1571
1550 flags = ch->flags; 1572 ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
1551 /* some of these may not have been set */ 1573 /* some of these may not have been set */
1552 ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY | 1574 ch->flags &= ~(XPC_C_OPENREQUEST | XPC_C_OPENREPLY |
1553 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY | 1575 XPC_C_ROPENREQUEST | XPC_C_ROPENREPLY |
1554 XPC_C_CONNECTING | XPC_C_CONNECTED); 1576 XPC_C_CONNECTING | XPC_C_CONNECTED);
1555 1577
1556 ch->flags |= (XPC_C_CLOSEREQUEST | XPC_C_DISCONNECTING);
1557 xpc_IPI_send_closerequest(ch, irq_flags); 1578 xpc_IPI_send_closerequest(ch, irq_flags);
1558 1579
1559 if (flags & XPC_C_CONNECTED) { 1580 if (channel_was_connected) {
1560 ch->flags |= XPC_C_WASCONNECTED; 1581 ch->flags |= XPC_C_WASCONNECTED;
1561 } 1582 }
1562 1583
1584 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1585
1586 /* wake all idle kthreads so they can exit */
1563 if (atomic_read(&ch->kthreads_idle) > 0) { 1587 if (atomic_read(&ch->kthreads_idle) > 0) {
1564 /* wake all idle kthreads so they can exit */
1565 wake_up_all(&ch->idle_wq); 1588 wake_up_all(&ch->idle_wq);
1566 } 1589 }
1567 1590
1568 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1569
1570
1571 /* wake those waiting to allocate an entry from the local msg queue */ 1591 /* wake those waiting to allocate an entry from the local msg queue */
1572
1573 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) { 1592 if (atomic_read(&ch->n_on_msg_allocate_wq) > 0) {
1574 wake_up(&ch->msg_allocate_wq); 1593 wake_up(&ch->msg_allocate_wq);
1575 } 1594 }
1576 1595
1577 /* wake those waiting for notify completion */
1578
1579 if (atomic_read(&ch->n_to_notify) > 0) {
1580 xpc_notify_senders(ch, reason, ch->w_local_GP.put);
1581 }
1582
1583 spin_lock_irqsave(&ch->lock, *irq_flags); 1596 spin_lock_irqsave(&ch->lock, *irq_flags);
1584} 1597}
1585 1598
1586 1599
1587void 1600void
1588xpc_disconnected_callout(struct xpc_channel *ch) 1601xpc_disconnecting_callout(struct xpc_channel *ch)
1589{ 1602{
1590 /* 1603 /*
1591 * Let the channel's registerer know that the channel is now 1604 * Let the channel's registerer know that the channel is being
1592 * disconnected. We don't want to do this if the registerer was never 1605 * disconnected. We don't want to do this if the registerer was never
1593 * informed of a connection being made, unless the disconnect was for 1606 * informed of a connection being made.
1594 * abnormal reasons.
1595 */ 1607 */
1596 1608
1597 if (ch->func != NULL) { 1609 if (ch->func != NULL) {
1598 dev_dbg(xpc_chan, "ch->func() called, reason=%d, partid=%d, " 1610 dev_dbg(xpc_chan, "ch->func() called, reason=xpcDisconnecting,"
1599 "channel=%d\n", ch->reason, ch->partid, ch->number); 1611 " partid=%d, channel=%d\n", ch->partid, ch->number);
1600 1612
1601 ch->func(ch->reason, ch->partid, ch->number, NULL, ch->key); 1613 ch->func(xpcDisconnecting, ch->partid, ch->number, NULL,
1614 ch->key);
1602 1615
1603 dev_dbg(xpc_chan, "ch->func() returned, reason=%d, partid=%d, " 1616 dev_dbg(xpc_chan, "ch->func() returned, reason="
1604 "channel=%d\n", ch->reason, ch->partid, ch->number); 1617 "xpcDisconnecting, partid=%d, channel=%d\n",
1618 ch->partid, ch->number);
1605 } 1619 }
1606} 1620}
1607 1621
@@ -1848,7 +1862,7 @@ xpc_send_msg(struct xpc_channel *ch, struct xpc_msg *msg, u8 notify_type,
1848 xpc_notify_func func, void *key) 1862 xpc_notify_func func, void *key)
1849{ 1863{
1850 enum xpc_retval ret = xpcSuccess; 1864 enum xpc_retval ret = xpcSuccess;
1851 struct xpc_notify *notify = NULL; // >>> to keep the compiler happy!! 1865 struct xpc_notify *notify = notify;
1852 s64 put, msg_number = msg->number; 1866 s64 put, msg_number = msg->number;
1853 1867
1854 1868
diff --git a/arch/ia64/sn/kernel/xpc_main.c b/arch/ia64/sn/kernel/xpc_main.c
index bb1d5cf30440..feece200b3c3 100644
--- a/arch/ia64/sn/kernel/xpc_main.c
+++ b/arch/ia64/sn/kernel/xpc_main.c
@@ -54,6 +54,7 @@
54#include <linux/interrupt.h> 54#include <linux/interrupt.h>
55#include <linux/slab.h> 55#include <linux/slab.h>
56#include <linux/delay.h> 56#include <linux/delay.h>
57#include <linux/reboot.h>
57#include <asm/sn/intr.h> 58#include <asm/sn/intr.h>
58#include <asm/sn/sn_sal.h> 59#include <asm/sn/sn_sal.h>
59#include <asm/uaccess.h> 60#include <asm/uaccess.h>
@@ -82,11 +83,13 @@ struct device *xpc_chan = &xpc_chan_dbg_subname;
82 83
83/* systune related variables for /proc/sys directories */ 84/* systune related variables for /proc/sys directories */
84 85
85static int xpc_hb_min = 1; 86static int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
86static int xpc_hb_max = 10; 87static int xpc_hb_min_interval = 1;
88static int xpc_hb_max_interval = 10;
87 89
88static int xpc_hb_check_min = 10; 90static int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_INTERVAL;
89static int xpc_hb_check_max = 120; 91static int xpc_hb_check_min_interval = 10;
92static int xpc_hb_check_max_interval = 120;
90 93
91static ctl_table xpc_sys_xpc_hb_dir[] = { 94static ctl_table xpc_sys_xpc_hb_dir[] = {
92 { 95 {
@@ -99,7 +102,8 @@ static ctl_table xpc_sys_xpc_hb_dir[] = {
99 &proc_dointvec_minmax, 102 &proc_dointvec_minmax,
100 &sysctl_intvec, 103 &sysctl_intvec,
101 NULL, 104 NULL,
102 &xpc_hb_min, &xpc_hb_max 105 &xpc_hb_min_interval,
106 &xpc_hb_max_interval
103 }, 107 },
104 { 108 {
105 2, 109 2,
@@ -111,7 +115,8 @@ static ctl_table xpc_sys_xpc_hb_dir[] = {
111 &proc_dointvec_minmax, 115 &proc_dointvec_minmax,
112 &sysctl_intvec, 116 &sysctl_intvec,
113 NULL, 117 NULL,
114 &xpc_hb_check_min, &xpc_hb_check_max 118 &xpc_hb_check_min_interval,
119 &xpc_hb_check_max_interval
115 }, 120 },
116 {0} 121 {0}
117}; 122};
@@ -148,11 +153,11 @@ static DECLARE_WAIT_QUEUE_HEAD(xpc_act_IRQ_wq);
148 153
149static unsigned long xpc_hb_check_timeout; 154static unsigned long xpc_hb_check_timeout;
150 155
151/* xpc_hb_checker thread exited notification */ 156/* used as an indication of when the xpc_hb_checker thread is inactive */
152static DECLARE_MUTEX_LOCKED(xpc_hb_checker_exited); 157static DECLARE_MUTEX_LOCKED(xpc_hb_checker_inactive);
153 158
154/* xpc_discovery thread exited notification */ 159/* used as an indication of when the xpc_discovery thread is inactive */
155static DECLARE_MUTEX_LOCKED(xpc_discovery_exited); 160static DECLARE_MUTEX_LOCKED(xpc_discovery_inactive);
156 161
157 162
158static struct timer_list xpc_hb_timer; 163static struct timer_list xpc_hb_timer;
@@ -161,6 +166,30 @@ static struct timer_list xpc_hb_timer;
161static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *); 166static void xpc_kthread_waitmsgs(struct xpc_partition *, struct xpc_channel *);
162 167
163 168
169static int xpc_system_reboot(struct notifier_block *, unsigned long, void *);
170static struct notifier_block xpc_reboot_notifier = {
171 .notifier_call = xpc_system_reboot,
172};
173
174
175/*
176 * Timer function to enforce the timelimit on the partition disengage request.
177 */
178static void
179xpc_timeout_partition_disengage_request(unsigned long data)
180{
181 struct xpc_partition *part = (struct xpc_partition *) data;
182
183
184 DBUG_ON(XPC_TICKS < part->disengage_request_timeout);
185
186 (void) xpc_partition_disengaged(part);
187
188 DBUG_ON(part->disengage_request_timeout != 0);
189 DBUG_ON(xpc_partition_engaged(1UL << XPC_PARTID(part)) != 0);
190}
191
192
164/* 193/*
165 * Notify the heartbeat check thread that an IRQ has been received. 194 * Notify the heartbeat check thread that an IRQ has been received.
166 */ 195 */
@@ -214,12 +243,6 @@ xpc_hb_checker(void *ignore)
214 243
215 while (!(volatile int) xpc_exiting) { 244 while (!(volatile int) xpc_exiting) {
216 245
217 /* wait for IRQ or timeout */
218 (void) wait_event_interruptible(xpc_act_IRQ_wq,
219 (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
220 jiffies >= xpc_hb_check_timeout ||
221 (volatile int) xpc_exiting));
222
223 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have " 246 dev_dbg(xpc_part, "woke up with %d ticks rem; %d IRQs have "
224 "been received\n", 247 "been received\n",
225 (int) (xpc_hb_check_timeout - jiffies), 248 (int) (xpc_hb_check_timeout - jiffies),
@@ -240,6 +263,7 @@ xpc_hb_checker(void *ignore)
240 } 263 }
241 264
242 265
266 /* check for outstanding IRQs */
243 new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd); 267 new_IRQ_count = atomic_read(&xpc_act_IRQ_rcvd);
244 if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) { 268 if (last_IRQ_count < new_IRQ_count || force_IRQ != 0) {
245 force_IRQ = 0; 269 force_IRQ = 0;
@@ -257,13 +281,19 @@ xpc_hb_checker(void *ignore)
257 xpc_hb_check_timeout = jiffies + 281 xpc_hb_check_timeout = jiffies +
258 (xpc_hb_check_interval * HZ); 282 (xpc_hb_check_interval * HZ);
259 } 283 }
284
285 /* wait for IRQ or timeout */
286 (void) wait_event_interruptible(xpc_act_IRQ_wq,
287 (last_IRQ_count < atomic_read(&xpc_act_IRQ_rcvd) ||
288 jiffies >= xpc_hb_check_timeout ||
289 (volatile int) xpc_exiting));
260 } 290 }
261 291
262 dev_dbg(xpc_part, "heartbeat checker is exiting\n"); 292 dev_dbg(xpc_part, "heartbeat checker is exiting\n");
263 293
264 294
265 /* mark this thread as inactive */ 295 /* mark this thread as inactive */
266 up(&xpc_hb_checker_exited); 296 up(&xpc_hb_checker_inactive);
267 return 0; 297 return 0;
268} 298}
269 299
@@ -283,7 +313,7 @@ xpc_initiate_discovery(void *ignore)
283 dev_dbg(xpc_part, "discovery thread is exiting\n"); 313 dev_dbg(xpc_part, "discovery thread is exiting\n");
284 314
285 /* mark this thread as inactive */ 315 /* mark this thread as inactive */
286 up(&xpc_discovery_exited); 316 up(&xpc_discovery_inactive);
287 return 0; 317 return 0;
288} 318}
289 319
@@ -309,7 +339,7 @@ xpc_make_first_contact(struct xpc_partition *part)
309 "partition %d\n", XPC_PARTID(part)); 339 "partition %d\n", XPC_PARTID(part));
310 340
311 /* wait a 1/4 of a second or so */ 341 /* wait a 1/4 of a second or so */
312 msleep_interruptible(250); 342 (void) msleep_interruptible(250);
313 343
314 if (part->act_state == XPC_P_DEACTIVATING) { 344 if (part->act_state == XPC_P_DEACTIVATING) {
315 return part->reason; 345 return part->reason;
@@ -336,7 +366,8 @@ static void
336xpc_channel_mgr(struct xpc_partition *part) 366xpc_channel_mgr(struct xpc_partition *part)
337{ 367{
338 while (part->act_state != XPC_P_DEACTIVATING || 368 while (part->act_state != XPC_P_DEACTIVATING ||
339 atomic_read(&part->nchannels_active) > 0) { 369 atomic_read(&part->nchannels_active) > 0 ||
370 !xpc_partition_disengaged(part)) {
340 371
341 xpc_process_channel_activity(part); 372 xpc_process_channel_activity(part);
342 373
@@ -360,7 +391,8 @@ xpc_channel_mgr(struct xpc_partition *part)
360 (volatile u64) part->local_IPI_amo != 0 || 391 (volatile u64) part->local_IPI_amo != 0 ||
361 ((volatile u8) part->act_state == 392 ((volatile u8) part->act_state ==
362 XPC_P_DEACTIVATING && 393 XPC_P_DEACTIVATING &&
363 atomic_read(&part->nchannels_active) == 0))); 394 atomic_read(&part->nchannels_active) == 0 &&
395 xpc_partition_disengaged(part))));
364 atomic_set(&part->channel_mgr_requests, 1); 396 atomic_set(&part->channel_mgr_requests, 1);
365 397
366 // >>> Does it need to wakeup periodically as well? In case we 398 // >>> Does it need to wakeup periodically as well? In case we
@@ -482,7 +514,7 @@ xpc_activating(void *__partid)
482 return 0; 514 return 0;
483 } 515 }
484 516
485 XPC_ALLOW_HB(partid, xpc_vars); 517 xpc_allow_hb(partid, xpc_vars);
486 xpc_IPI_send_activated(part); 518 xpc_IPI_send_activated(part);
487 519
488 520
@@ -492,6 +524,7 @@ xpc_activating(void *__partid)
492 */ 524 */
493 (void) xpc_partition_up(part); 525 (void) xpc_partition_up(part);
494 526
527 xpc_disallow_hb(partid, xpc_vars);
495 xpc_mark_partition_inactive(part); 528 xpc_mark_partition_inactive(part);
496 529
497 if (part->reason == xpcReactivating) { 530 if (part->reason == xpcReactivating) {
@@ -704,11 +737,14 @@ xpc_daemonize_kthread(void *args)
704 xpc_kthread_waitmsgs(part, ch); 737 xpc_kthread_waitmsgs(part, ch);
705 } 738 }
706 739
707 if (atomic_dec_return(&ch->kthreads_assigned) == 0 && 740 if (atomic_dec_return(&ch->kthreads_assigned) == 0) {
708 ((ch->flags & XPC_C_CONNECTCALLOUT) || 741 if (ch->flags & XPC_C_CONNECTCALLOUT) {
709 (ch->reason != xpcUnregistering && 742 xpc_disconnecting_callout(ch);
710 ch->reason != xpcOtherUnregistering))) { 743 }
711 xpc_disconnected_callout(ch); 744 if (atomic_dec_return(&part->nchannels_engaged) == 0) {
745 xpc_mark_partition_disengaged(part);
746 xpc_IPI_send_disengage(part);
747 }
712 } 748 }
713 749
714 750
@@ -740,6 +776,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
740 unsigned long irq_flags; 776 unsigned long irq_flags;
741 pid_t pid; 777 pid_t pid;
742 u64 args = XPC_PACK_ARGS(ch->partid, ch->number); 778 u64 args = XPC_PACK_ARGS(ch->partid, ch->number);
779 struct xpc_partition *part = &xpc_partitions[ch->partid];
743 780
744 781
745 while (needed-- > 0) { 782 while (needed-- > 0) {
@@ -770,9 +807,13 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
770 * kthread. That kthread is responsible for doing the 807 * kthread. That kthread is responsible for doing the
771 * counterpart to the following before it exits. 808 * counterpart to the following before it exits.
772 */ 809 */
773 (void) xpc_part_ref(&xpc_partitions[ch->partid]); 810 (void) xpc_part_ref(part);
774 xpc_msgqueue_ref(ch); 811 xpc_msgqueue_ref(ch);
775 atomic_inc(&ch->kthreads_assigned); 812 if (atomic_inc_return(&ch->kthreads_assigned) == 1) {
813 if (atomic_inc_return(&part->nchannels_engaged) == 1) {
814 xpc_mark_partition_engaged(part);
815 }
816 }
776 ch->kthreads_created++; // >>> temporary debug only!!! 817 ch->kthreads_created++; // >>> temporary debug only!!!
777 } 818 }
778} 819}
@@ -781,6 +822,7 @@ xpc_create_kthreads(struct xpc_channel *ch, int needed)
781void 822void
782xpc_disconnect_wait(int ch_number) 823xpc_disconnect_wait(int ch_number)
783{ 824{
825 unsigned long irq_flags;
784 partid_t partid; 826 partid_t partid;
785 struct xpc_partition *part; 827 struct xpc_partition *part;
786 struct xpc_channel *ch; 828 struct xpc_channel *ch;
@@ -793,10 +835,13 @@ xpc_disconnect_wait(int ch_number)
793 if (xpc_part_ref(part)) { 835 if (xpc_part_ref(part)) {
794 ch = &part->channels[ch_number]; 836 ch = &part->channels[ch_number];
795 837
796// >>> how do we keep from falling into the window between our check and going 838 if (ch->flags & XPC_C_WDISCONNECT) {
797// >>> down and coming back up where sema is re-inited? 839 if (!(ch->flags & XPC_C_DISCONNECTED)) {
798 if (ch->flags & XPC_C_SETUP) { 840 (void) down(&ch->wdisconnect_sema);
799 (void) down(&ch->teardown_sema); 841 }
842 spin_lock_irqsave(&ch->lock, irq_flags);
843 ch->flags &= ~XPC_C_WDISCONNECT;
844 spin_unlock_irqrestore(&ch->lock, irq_flags);
800 } 845 }
801 846
802 xpc_part_deref(part); 847 xpc_part_deref(part);
@@ -806,62 +851,89 @@ xpc_disconnect_wait(int ch_number)
806 851
807 852
808static void 853static void
809xpc_do_exit(void) 854xpc_do_exit(enum xpc_retval reason)
810{ 855{
811 partid_t partid; 856 partid_t partid;
812 int active_part_count; 857 int active_part_count;
813 struct xpc_partition *part; 858 struct xpc_partition *part;
859 unsigned long printmsg_time;
814 860
815 861
816 /* now it's time to eliminate our heartbeat */ 862 /* a 'rmmod XPC' and a 'reboot' cannot both end up here together */
817 del_timer_sync(&xpc_hb_timer); 863 DBUG_ON(xpc_exiting == 1);
818 xpc_vars->heartbeating_to_mask = 0;
819
820 /* indicate to others that our reserved page is uninitialized */
821 xpc_rsvd_page->vars_pa = 0;
822 864
823 /* 865 /*
824 * Ignore all incoming interrupts. Without interupts the heartbeat 866 * Let the heartbeat checker thread and the discovery thread
825 * checker won't activate any new partitions that may come up. 867 * (if one is running) know that they should exit. Also wake up
826 */ 868 * the heartbeat checker thread in case it's sleeping.
827 free_irq(SGI_XPC_ACTIVATE, NULL);
828
829 /*
830 * Cause the heartbeat checker and the discovery threads to exit.
831 * We don't want them attempting to activate new partitions as we
832 * try to deactivate the existing ones.
833 */ 869 */
834 xpc_exiting = 1; 870 xpc_exiting = 1;
835 wake_up_interruptible(&xpc_act_IRQ_wq); 871 wake_up_interruptible(&xpc_act_IRQ_wq);
836 872
837 /* wait for the heartbeat checker thread to mark itself inactive */ 873 /* ignore all incoming interrupts */
838 down(&xpc_hb_checker_exited); 874 free_irq(SGI_XPC_ACTIVATE, NULL);
839 875
840 /* wait for the discovery thread to mark itself inactive */ 876 /* wait for the discovery thread to mark itself inactive */
841 down(&xpc_discovery_exited); 877 down(&xpc_discovery_inactive);
878
879 /* wait for the heartbeat checker thread to mark itself inactive */
880 down(&xpc_hb_checker_inactive);
842 881
843 882
844 msleep_interruptible(300); 883 /* sleep for a 1/3 of a second or so */
884 (void) msleep_interruptible(300);
845 885
846 886
847 /* wait for all partitions to become inactive */ 887 /* wait for all partitions to become inactive */
848 888
889 printmsg_time = jiffies;
890
849 do { 891 do {
850 active_part_count = 0; 892 active_part_count = 0;
851 893
852 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 894 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
853 part = &xpc_partitions[partid]; 895 part = &xpc_partitions[partid];
854 if (part->act_state != XPC_P_INACTIVE) { 896 if (xpc_partition_disengaged(part) &&
855 active_part_count++; 897 part->act_state == XPC_P_INACTIVE) {
856 898 continue;
857 XPC_DEACTIVATE_PARTITION(part, xpcUnloading);
858 } 899 }
900
901 active_part_count++;
902
903 XPC_DEACTIVATE_PARTITION(part, reason);
904 }
905
906 if (active_part_count == 0) {
907 break;
908 }
909
910 if (jiffies >= printmsg_time) {
911 dev_info(xpc_part, "waiting for partitions to "
912 "deactivate/disengage, active count=%d, remote "
913 "engaged=0x%lx\n", active_part_count,
914 xpc_partition_engaged(1UL << partid));
915
916 printmsg_time = jiffies +
917 (XPC_DISENGAGE_PRINTMSG_INTERVAL * HZ);
859 } 918 }
860 919
861 if (active_part_count) 920 /* sleep for a 1/3 of a second or so */
862 msleep_interruptible(300); 921 (void) msleep_interruptible(300);
863 } while (active_part_count > 0); 922
923 } while (1);
924
925 DBUG_ON(xpc_partition_engaged(-1UL));
926
927
928 /* indicate to others that our reserved page is uninitialized */
929 xpc_rsvd_page->vars_pa = 0;
930
931 /* now it's time to eliminate our heartbeat */
932 del_timer_sync(&xpc_hb_timer);
933 DBUG_ON(xpc_vars->heartbeating_to_mask == 0);
864 934
935 /* take ourselves off of the reboot_notifier_list */
936 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
865 937
866 /* close down protections for IPI operations */ 938 /* close down protections for IPI operations */
867 xpc_restrict_IPI_ops(); 939 xpc_restrict_IPI_ops();
@@ -876,6 +948,34 @@ xpc_do_exit(void)
876} 948}
877 949
878 950
951/*
952 * This function is called when the system is being rebooted.
953 */
954static int
955xpc_system_reboot(struct notifier_block *nb, unsigned long event, void *unused)
956{
957 enum xpc_retval reason;
958
959
960 switch (event) {
961 case SYS_RESTART:
962 reason = xpcSystemReboot;
963 break;
964 case SYS_HALT:
965 reason = xpcSystemHalt;
966 break;
967 case SYS_POWER_OFF:
968 reason = xpcSystemPoweroff;
969 break;
970 default:
971 reason = xpcSystemGoingDown;
972 }
973
974 xpc_do_exit(reason);
975 return NOTIFY_DONE;
976}
977
978
879int __init 979int __init
880xpc_init(void) 980xpc_init(void)
881{ 981{
@@ -920,6 +1020,12 @@ xpc_init(void)
920 spin_lock_init(&part->act_lock); 1020 spin_lock_init(&part->act_lock);
921 part->act_state = XPC_P_INACTIVE; 1021 part->act_state = XPC_P_INACTIVE;
922 XPC_SET_REASON(part, 0, 0); 1022 XPC_SET_REASON(part, 0, 0);
1023
1024 init_timer(&part->disengage_request_timer);
1025 part->disengage_request_timer.function =
1026 xpc_timeout_partition_disengage_request;
1027 part->disengage_request_timer.data = (unsigned long) part;
1028
923 part->setup_state = XPC_P_UNSET; 1029 part->setup_state = XPC_P_UNSET;
924 init_waitqueue_head(&part->teardown_wq); 1030 init_waitqueue_head(&part->teardown_wq);
925 atomic_set(&part->references, 0); 1031 atomic_set(&part->references, 0);
@@ -976,6 +1082,13 @@ xpc_init(void)
976 } 1082 }
977 1083
978 1084
1085 /* add ourselves to the reboot_notifier_list */
1086 ret = register_reboot_notifier(&xpc_reboot_notifier);
1087 if (ret != 0) {
1088 dev_warn(xpc_part, "can't register reboot notifier\n");
1089 }
1090
1091
979 /* 1092 /*
980 * Set the beating to other partitions into motion. This is 1093 * Set the beating to other partitions into motion. This is
981 * the last requirement for other partitions' discovery to 1094 * the last requirement for other partitions' discovery to
@@ -997,6 +1110,9 @@ xpc_init(void)
997 /* indicate to others that our reserved page is uninitialized */ 1110 /* indicate to others that our reserved page is uninitialized */
998 xpc_rsvd_page->vars_pa = 0; 1111 xpc_rsvd_page->vars_pa = 0;
999 1112
1113 /* take ourselves off of the reboot_notifier_list */
1114 (void) unregister_reboot_notifier(&xpc_reboot_notifier);
1115
1000 del_timer_sync(&xpc_hb_timer); 1116 del_timer_sync(&xpc_hb_timer);
1001 free_irq(SGI_XPC_ACTIVATE, NULL); 1117 free_irq(SGI_XPC_ACTIVATE, NULL);
1002 xpc_restrict_IPI_ops(); 1118 xpc_restrict_IPI_ops();
@@ -1018,9 +1134,9 @@ xpc_init(void)
1018 dev_err(xpc_part, "failed while forking discovery thread\n"); 1134 dev_err(xpc_part, "failed while forking discovery thread\n");
1019 1135
1020 /* mark this new thread as a non-starter */ 1136 /* mark this new thread as a non-starter */
1021 up(&xpc_discovery_exited); 1137 up(&xpc_discovery_inactive);
1022 1138
1023 xpc_do_exit(); 1139 xpc_do_exit(xpcUnloading);
1024 return -EBUSY; 1140 return -EBUSY;
1025 } 1141 }
1026 1142
@@ -1039,7 +1155,7 @@ module_init(xpc_init);
1039void __exit 1155void __exit
1040xpc_exit(void) 1156xpc_exit(void)
1041{ 1157{
1042 xpc_do_exit(); 1158 xpc_do_exit(xpcUnloading);
1043} 1159}
1044module_exit(xpc_exit); 1160module_exit(xpc_exit);
1045 1161
diff --git a/arch/ia64/sn/kernel/xpc_partition.c b/arch/ia64/sn/kernel/xpc_partition.c
index 578265ea9e67..79a0fc4c860c 100644
--- a/arch/ia64/sn/kernel/xpc_partition.c
+++ b/arch/ia64/sn/kernel/xpc_partition.c
@@ -76,11 +76,6 @@ char ____cacheline_aligned
76 xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE]; 76 xpc_remote_copy_buffer[XPC_RSVD_PAGE_ALIGNED_SIZE];
77 77
78 78
79/* systune related variables */
80int xpc_hb_interval = XPC_HB_DEFAULT_INTERVAL;
81int xpc_hb_check_interval = XPC_HB_CHECK_DEFAULT_TIMEOUT;
82
83
84/* 79/*
85 * Given a nasid, get the physical address of the partition's reserved page 80 * Given a nasid, get the physical address of the partition's reserved page
86 * for that nasid. This function returns 0 on any error. 81 * for that nasid. This function returns 0 on any error.
@@ -239,16 +234,21 @@ xpc_rsvd_page_init(void)
239 xpc_vars->amos_page = amos_page; /* save for next load of XPC */ 234 xpc_vars->amos_page = amos_page; /* save for next load of XPC */
240 235
241 236
242 /* 237 /* initialize the activate IRQ related AMO variables */
243 * Initialize the activation related AMO variables. 238 for (i = 0; i < XP_NASID_MASK_WORDS; i++) {
244 */ 239 (void) xpc_IPI_init(XPC_ACTIVATE_IRQ_AMOS + i);
245 xpc_vars->act_amos = xpc_IPI_init(XP_MAX_PARTITIONS);
246 for (i = 1; i < XP_NASID_MASK_WORDS; i++) {
247 xpc_IPI_init(i + XP_MAX_PARTITIONS);
248 } 240 }
241
242 /* initialize the engaged remote partitions related AMO variables */
243 (void) xpc_IPI_init(XPC_ENGAGED_PARTITIONS_AMO);
244 (void) xpc_IPI_init(XPC_DISENGAGE_REQUEST_AMO);
245
249 /* export AMO page's physical address to other partitions */ 246 /* export AMO page's physical address to other partitions */
250 xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page); 247 xpc_vars->amos_page_pa = ia64_tpa((u64) xpc_vars->amos_page);
251 248
249 /* timestamp of when reserved page was initialized */
250 rp->stamp = CURRENT_TIME;
251
252 /* 252 /*
253 * This signifies to the remote partition that our reserved 253 * This signifies to the remote partition that our reserved
254 * page is initialized. 254 * page is initialized.
@@ -387,6 +387,11 @@ xpc_check_remote_hb(void)
387 remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer; 387 remote_vars = (struct xpc_vars *) xpc_remote_copy_buffer;
388 388
389 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) { 389 for (partid = 1; partid < XP_MAX_PARTITIONS; partid++) {
390
391 if (xpc_exiting) {
392 break;
393 }
394
390 if (partid == sn_partition_id) { 395 if (partid == sn_partition_id) {
391 continue; 396 continue;
392 } 397 }
@@ -417,7 +422,7 @@ xpc_check_remote_hb(void)
417 422
418 if (((remote_vars->heartbeat == part->last_heartbeat) && 423 if (((remote_vars->heartbeat == part->last_heartbeat) &&
419 (remote_vars->kdb_status == 0)) || 424 (remote_vars->kdb_status == 0)) ||
420 !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { 425 !xpc_hb_allowed(sn_partition_id, remote_vars)) {
421 426
422 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat); 427 XPC_DEACTIVATE_PARTITION(part, xpcNoHeartbeat);
423 continue; 428 continue;
@@ -436,23 +441,23 @@ xpc_check_remote_hb(void)
436 */ 441 */
437static enum xpc_retval 442static enum xpc_retval
438xpc_get_remote_rp(int nasid, u64 *discovered_nasids, 443xpc_get_remote_rp(int nasid, u64 *discovered_nasids,
439 struct xpc_rsvd_page *remote_rp, u64 *remote_rsvd_page_pa) 444 struct xpc_rsvd_page *remote_rp, u64 *remote_rp_pa)
440{ 445{
441 int bres, i; 446 int bres, i;
442 447
443 448
444 /* get the reserved page's physical address */ 449 /* get the reserved page's physical address */
445 450
446 *remote_rsvd_page_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp, 451 *remote_rp_pa = xpc_get_rsvd_page_pa(nasid, (u64) remote_rp,
447 XPC_RSVD_PAGE_ALIGNED_SIZE); 452 XPC_RSVD_PAGE_ALIGNED_SIZE);
448 if (*remote_rsvd_page_pa == 0) { 453 if (*remote_rp_pa == 0) {
449 return xpcNoRsvdPageAddr; 454 return xpcNoRsvdPageAddr;
450 } 455 }
451 456
452 457
453 /* pull over the reserved page structure */ 458 /* pull over the reserved page structure */
454 459
455 bres = xp_bte_copy(*remote_rsvd_page_pa, ia64_tpa((u64) remote_rp), 460 bres = xp_bte_copy(*remote_rp_pa, ia64_tpa((u64) remote_rp),
456 XPC_RSVD_PAGE_ALIGNED_SIZE, 461 XPC_RSVD_PAGE_ALIGNED_SIZE,
457 (BTE_NOTIFY | BTE_WACQUIRE), NULL); 462 (BTE_NOTIFY | BTE_WACQUIRE), NULL);
458 if (bres != BTE_SUCCESS) { 463 if (bres != BTE_SUCCESS) {
@@ -524,6 +529,55 @@ xpc_get_remote_vars(u64 remote_vars_pa, struct xpc_vars *remote_vars)
524 529
525 530
526/* 531/*
532 * Update the remote partition's info.
533 */
534static void
535xpc_update_partition_info(struct xpc_partition *part, u8 remote_rp_version,
536 struct timespec *remote_rp_stamp, u64 remote_rp_pa,
537 u64 remote_vars_pa, struct xpc_vars *remote_vars)
538{
539 part->remote_rp_version = remote_rp_version;
540 dev_dbg(xpc_part, " remote_rp_version = 0x%016lx\n",
541 part->remote_rp_version);
542
543 part->remote_rp_stamp = *remote_rp_stamp;
544 dev_dbg(xpc_part, " remote_rp_stamp (tv_sec = 0x%lx tv_nsec = 0x%lx\n",
545 part->remote_rp_stamp.tv_sec, part->remote_rp_stamp.tv_nsec);
546
547 part->remote_rp_pa = remote_rp_pa;
548 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", part->remote_rp_pa);
549
550 part->remote_vars_pa = remote_vars_pa;
551 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n",
552 part->remote_vars_pa);
553
554 part->last_heartbeat = remote_vars->heartbeat;
555 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n",
556 part->last_heartbeat);
557
558 part->remote_vars_part_pa = remote_vars->vars_part_pa;
559 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n",
560 part->remote_vars_part_pa);
561
562 part->remote_act_nasid = remote_vars->act_nasid;
563 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n",
564 part->remote_act_nasid);
565
566 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid;
567 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
568 part->remote_act_phys_cpuid);
569
570 part->remote_amos_page_pa = remote_vars->amos_page_pa;
571 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n",
572 part->remote_amos_page_pa);
573
574 part->remote_vars_version = remote_vars->version;
575 dev_dbg(xpc_part, " remote_vars_version = 0x%x\n",
576 part->remote_vars_version);
577}
578
579
580/*
527 * Prior code has determine the nasid which generated an IPI. Inspect 581 * Prior code has determine the nasid which generated an IPI. Inspect
528 * that nasid to determine if its partition needs to be activated or 582 * that nasid to determine if its partition needs to be activated or
529 * deactivated. 583 * deactivated.
@@ -542,8 +596,12 @@ xpc_identify_act_IRQ_req(int nasid)
542{ 596{
543 struct xpc_rsvd_page *remote_rp; 597 struct xpc_rsvd_page *remote_rp;
544 struct xpc_vars *remote_vars; 598 struct xpc_vars *remote_vars;
545 u64 remote_rsvd_page_pa; 599 u64 remote_rp_pa;
546 u64 remote_vars_pa; 600 u64 remote_vars_pa;
601 int remote_rp_version;
602 int reactivate = 0;
603 int stamp_diff;
604 struct timespec remote_rp_stamp = { 0, 0 };
547 partid_t partid; 605 partid_t partid;
548 struct xpc_partition *part; 606 struct xpc_partition *part;
549 enum xpc_retval ret; 607 enum xpc_retval ret;
@@ -553,7 +611,7 @@ xpc_identify_act_IRQ_req(int nasid)
553 611
554 remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer; 612 remote_rp = (struct xpc_rsvd_page *) xpc_remote_copy_buffer;
555 613
556 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rsvd_page_pa); 614 ret = xpc_get_remote_rp(nasid, NULL, remote_rp, &remote_rp_pa);
557 if (ret != xpcSuccess) { 615 if (ret != xpcSuccess) {
558 dev_warn(xpc_part, "unable to get reserved page from nasid %d, " 616 dev_warn(xpc_part, "unable to get reserved page from nasid %d, "
559 "which sent interrupt, reason=%d\n", nasid, ret); 617 "which sent interrupt, reason=%d\n", nasid, ret);
@@ -561,6 +619,10 @@ xpc_identify_act_IRQ_req(int nasid)
561 } 619 }
562 620
563 remote_vars_pa = remote_rp->vars_pa; 621 remote_vars_pa = remote_rp->vars_pa;
622 remote_rp_version = remote_rp->version;
623 if (XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
624 remote_rp_stamp = remote_rp->stamp;
625 }
564 partid = remote_rp->partid; 626 partid = remote_rp->partid;
565 part = &xpc_partitions[partid]; 627 part = &xpc_partitions[partid];
566 628
@@ -586,44 +648,117 @@ xpc_identify_act_IRQ_req(int nasid)
586 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd, 648 "%ld:0x%lx\n", (int) nasid, (int) partid, part->act_IRQ_rcvd,
587 remote_vars->heartbeat, remote_vars->heartbeating_to_mask); 649 remote_vars->heartbeat, remote_vars->heartbeating_to_mask);
588 650
651 if (xpc_partition_disengaged(part) &&
652 part->act_state == XPC_P_INACTIVE) {
589 653
590 if (part->act_state == XPC_P_INACTIVE) { 654 xpc_update_partition_info(part, remote_rp_version,
655 &remote_rp_stamp, remote_rp_pa,
656 remote_vars_pa, remote_vars);
591 657
592 part->remote_rp_pa = remote_rsvd_page_pa; 658 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
593 dev_dbg(xpc_part, " remote_rp_pa = 0x%016lx\n", 659 if (xpc_partition_disengage_requested(1UL << partid)) {
594 part->remote_rp_pa); 660 /*
661 * Other side is waiting on us to disengage,
662 * even though we already have.
663 */
664 return;
665 }
666 } else {
667 /* other side doesn't support disengage requests */
668 xpc_clear_partition_disengage_request(1UL << partid);
669 }
595 670
596 part->remote_vars_pa = remote_vars_pa; 671 xpc_activate_partition(part);
597 dev_dbg(xpc_part, " remote_vars_pa = 0x%016lx\n", 672 return;
598 part->remote_vars_pa); 673 }
599 674
600 part->last_heartbeat = remote_vars->heartbeat; 675 DBUG_ON(part->remote_rp_version == 0);
601 dev_dbg(xpc_part, " last_heartbeat = 0x%016lx\n", 676 DBUG_ON(part->remote_vars_version == 0);
602 part->last_heartbeat); 677
678 if (!XPC_SUPPORTS_RP_STAMP(part->remote_rp_version)) {
679 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(part->
680 remote_vars_version));
681
682 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
683 DBUG_ON(XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
684 version));
685 /* see if the other side rebooted */
686 if (part->remote_amos_page_pa ==
687 remote_vars->amos_page_pa &&
688 xpc_hb_allowed(sn_partition_id,
689 remote_vars)) {
690 /* doesn't look that way, so ignore the IPI */
691 return;
692 }
693 }
603 694
604 part->remote_vars_part_pa = remote_vars->vars_part_pa; 695 /*
605 dev_dbg(xpc_part, " remote_vars_part_pa = 0x%016lx\n", 696 * Other side rebooted and previous XPC didn't support the
606 part->remote_vars_part_pa); 697 * disengage request, so we don't need to do anything special.
698 */
607 699
608 part->remote_act_nasid = remote_vars->act_nasid; 700 xpc_update_partition_info(part, remote_rp_version,
609 dev_dbg(xpc_part, " remote_act_nasid = 0x%x\n", 701 &remote_rp_stamp, remote_rp_pa,
610 part->remote_act_nasid); 702 remote_vars_pa, remote_vars);
703 part->reactivate_nasid = nasid;
704 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
705 return;
706 }
611 707
612 part->remote_act_phys_cpuid = remote_vars->act_phys_cpuid; 708 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version));
613 dev_dbg(xpc_part, " remote_act_phys_cpuid = 0x%x\n",
614 part->remote_act_phys_cpuid);
615 709
616 part->remote_amos_page_pa = remote_vars->amos_page_pa; 710 if (!XPC_SUPPORTS_RP_STAMP(remote_rp_version)) {
617 dev_dbg(xpc_part, " remote_amos_page_pa = 0x%lx\n", 711 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
618 part->remote_amos_page_pa);
619 712
620 xpc_activate_partition(part); 713 /*
714 * Other side rebooted and previous XPC did support the
715 * disengage request, but the new one doesn't.
716 */
621 717
622 } else if (part->remote_amos_page_pa != remote_vars->amos_page_pa || 718 xpc_clear_partition_engaged(1UL << partid);
623 !XPC_HB_ALLOWED(sn_partition_id, remote_vars)) { 719 xpc_clear_partition_disengage_request(1UL << partid);
624 720
721 xpc_update_partition_info(part, remote_rp_version,
722 &remote_rp_stamp, remote_rp_pa,
723 remote_vars_pa, remote_vars);
724 reactivate = 1;
725
726 } else {
727 DBUG_ON(!XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->version));
728
729 stamp_diff = xpc_compare_stamps(&part->remote_rp_stamp,
730 &remote_rp_stamp);
731 if (stamp_diff != 0) {
732 DBUG_ON(stamp_diff >= 0);
733
734 /*
735 * Other side rebooted and the previous XPC did support
736 * the disengage request, as does the new one.
737 */
738
739 DBUG_ON(xpc_partition_engaged(1UL << partid));
740 DBUG_ON(xpc_partition_disengage_requested(1UL <<
741 partid));
742
743 xpc_update_partition_info(part, remote_rp_version,
744 &remote_rp_stamp, remote_rp_pa,
745 remote_vars_pa, remote_vars);
746 reactivate = 1;
747 }
748 }
749
750 if (!xpc_partition_disengaged(part)) {
751 /* still waiting on other side to disengage from us */
752 return;
753 }
754
755 if (reactivate) {
625 part->reactivate_nasid = nasid; 756 part->reactivate_nasid = nasid;
626 XPC_DEACTIVATE_PARTITION(part, xpcReactivating); 757 XPC_DEACTIVATE_PARTITION(part, xpcReactivating);
758
759 } else if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version) &&
760 xpc_partition_disengage_requested(1UL << partid)) {
761 XPC_DEACTIVATE_PARTITION(part, xpcOtherGoingDown);
627 } 762 }
628} 763}
629 764
@@ -646,12 +781,16 @@ xpc_identify_act_IRQ_sender(void)
646 struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page; 781 struct xpc_rsvd_page *rp = (struct xpc_rsvd_page *) xpc_rsvd_page;
647 782
648 783
649 act_amos = xpc_vars->act_amos; 784 act_amos = xpc_vars->amos_page + XPC_ACTIVATE_IRQ_AMOS;
650 785
651 786
652 /* scan through act AMO variable looking for non-zero entries */ 787 /* scan through act AMO variable looking for non-zero entries */
653 for (word = 0; word < XP_NASID_MASK_WORDS; word++) { 788 for (word = 0; word < XP_NASID_MASK_WORDS; word++) {
654 789
790 if (xpc_exiting) {
791 break;
792 }
793
655 nasid_mask = xpc_IPI_receive(&act_amos[word]); 794 nasid_mask = xpc_IPI_receive(&act_amos[word]);
656 if (nasid_mask == 0) { 795 if (nasid_mask == 0) {
657 /* no IRQs from nasids in this variable */ 796 /* no IRQs from nasids in this variable */
@@ -688,6 +827,55 @@ xpc_identify_act_IRQ_sender(void)
688 827
689 828
690/* 829/*
830 * See if the other side has responded to a partition disengage request
831 * from us.
832 */
833int
834xpc_partition_disengaged(struct xpc_partition *part)
835{
836 partid_t partid = XPC_PARTID(part);
837 int disengaged;
838
839
840 disengaged = (xpc_partition_engaged(1UL << partid) == 0);
841 if (part->disengage_request_timeout) {
842 if (!disengaged) {
843 if (jiffies < part->disengage_request_timeout) {
844 /* timelimit hasn't been reached yet */
845 return 0;
846 }
847
848 /*
849 * Other side hasn't responded to our disengage
850 * request in a timely fashion, so assume it's dead.
851 */
852
853 xpc_clear_partition_engaged(1UL << partid);
854 disengaged = 1;
855 }
856 part->disengage_request_timeout = 0;
857
858 /* cancel the timer function, provided it's not us */
859 if (!in_interrupt()) {
860 del_singleshot_timer_sync(&part->
861 disengage_request_timer);
862 }
863
864 DBUG_ON(part->act_state != XPC_P_DEACTIVATING &&
865 part->act_state != XPC_P_INACTIVE);
866 if (part->act_state != XPC_P_INACTIVE) {
867 xpc_wakeup_channel_mgr(part);
868 }
869
870 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
871 xpc_cancel_partition_disengage_request(part);
872 }
873 }
874 return disengaged;
875}
876
877
878/*
691 * Mark specified partition as active. 879 * Mark specified partition as active.
692 */ 880 */
693enum xpc_retval 881enum xpc_retval
@@ -721,7 +909,6 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
721 enum xpc_retval reason) 909 enum xpc_retval reason)
722{ 910{
723 unsigned long irq_flags; 911 unsigned long irq_flags;
724 partid_t partid = XPC_PARTID(part);
725 912
726 913
727 spin_lock_irqsave(&part->act_lock, irq_flags); 914 spin_lock_irqsave(&part->act_lock, irq_flags);
@@ -749,17 +936,27 @@ xpc_deactivate_partition(const int line, struct xpc_partition *part,
749 936
750 spin_unlock_irqrestore(&part->act_lock, irq_flags); 937 spin_unlock_irqrestore(&part->act_lock, irq_flags);
751 938
752 XPC_DISALLOW_HB(partid, xpc_vars); 939 if (XPC_SUPPORTS_DISENGAGE_REQUEST(part->remote_vars_version)) {
940 xpc_request_partition_disengage(part);
941 xpc_IPI_send_disengage(part);
942
943 /* set a timelimit on the disengage request */
944 part->disengage_request_timeout = jiffies +
945 (XPC_DISENGAGE_REQUEST_TIMELIMIT * HZ);
946 part->disengage_request_timer.expires =
947 part->disengage_request_timeout;
948 add_timer(&part->disengage_request_timer);
949 }
753 950
754 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid, 951 dev_dbg(xpc_part, "bringing partition %d down, reason = %d\n", partid,
755 reason); 952 reason);
756 953
757 xpc_partition_down(part, reason); 954 xpc_partition_going_down(part, reason);
758} 955}
759 956
760 957
761/* 958/*
762 * Mark specified partition as active. 959 * Mark specified partition as inactive.
763 */ 960 */
764void 961void
765xpc_mark_partition_inactive(struct xpc_partition *part) 962xpc_mark_partition_inactive(struct xpc_partition *part)
@@ -792,7 +989,7 @@ xpc_discovery(void)
792 void *remote_rp_base; 989 void *remote_rp_base;
793 struct xpc_rsvd_page *remote_rp; 990 struct xpc_rsvd_page *remote_rp;
794 struct xpc_vars *remote_vars; 991 struct xpc_vars *remote_vars;
795 u64 remote_rsvd_page_pa; 992 u64 remote_rp_pa;
796 u64 remote_vars_pa; 993 u64 remote_vars_pa;
797 int region; 994 int region;
798 int max_regions; 995 int max_regions;
@@ -877,7 +1074,7 @@ xpc_discovery(void)
877 /* pull over the reserved page structure */ 1074 /* pull over the reserved page structure */
878 1075
879 ret = xpc_get_remote_rp(nasid, discovered_nasids, 1076 ret = xpc_get_remote_rp(nasid, discovered_nasids,
880 remote_rp, &remote_rsvd_page_pa); 1077 remote_rp, &remote_rp_pa);
881 if (ret != xpcSuccess) { 1078 if (ret != xpcSuccess) {
882 dev_dbg(xpc_part, "unable to get reserved page " 1079 dev_dbg(xpc_part, "unable to get reserved page "
883 "from nasid %d, reason=%d\n", nasid, 1080 "from nasid %d, reason=%d\n", nasid,
@@ -948,6 +1145,13 @@ xpc_discovery(void)
948 remote_vars->act_nasid, 1145 remote_vars->act_nasid,
949 remote_vars->act_phys_cpuid); 1146 remote_vars->act_phys_cpuid);
950 1147
1148 if (XPC_SUPPORTS_DISENGAGE_REQUEST(remote_vars->
1149 version)) {
1150 part->remote_amos_page_pa =
1151 remote_vars->amos_page_pa;
1152 xpc_mark_partition_disengaged(part);
1153 xpc_cancel_partition_disengage_request(part);
1154 }
951 xpc_IPI_send_activate(remote_vars); 1155 xpc_IPI_send_activate(remote_vars);
952 } 1156 }
953 } 1157 }