aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/asm-ia64/sn/xp.h485
-rw-r--r--include/asm-ia64/sn/xpc.h1267
2 files changed, 0 insertions, 1752 deletions
diff --git a/include/asm-ia64/sn/xp.h b/include/asm-ia64/sn/xp.h
deleted file mode 100644
index f7711b308e48..000000000000
--- a/include/asm-ia64/sn/xp.h
+++ /dev/null
@@ -1,485 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (C) 2004-2005 Silicon Graphics, Inc. All rights reserved.
7 */
8
9
10/*
11 * External Cross Partition (XP) structures and defines.
12 */
13
14
15#ifndef _ASM_IA64_SN_XP_H
16#define _ASM_IA64_SN_XP_H
17
18
19#include <linux/cache.h>
20#include <linux/hardirq.h>
21#include <linux/mutex.h>
22#include <asm/sn/types.h>
23#include <asm/sn/bte.h>
24
25
26#ifdef USE_DBUG_ON
27#define DBUG_ON(condition) BUG_ON(condition)
28#else
29#define DBUG_ON(condition)
30#endif
31
32
33/*
34 * Define the maximum number of logically defined partitions the system
35 * can support. It is constrained by the maximum number of hardware
36 * partitionable regions. The term 'region' in this context refers to the
37 * minimum number of nodes that can comprise an access protection grouping.
38 * The access protection is in regards to memory, IPI and IOI.
39 *
40 * The maximum number of hardware partitionable regions is equal to the
41 * maximum number of nodes in the entire system divided by the minimum number
42 * of nodes that comprise an access protection grouping.
43 */
44#define XP_MAX_PARTITIONS 64
45
46
47/*
48 * Define the number of u64s required to represent all the C-brick nasids
49 * as a bitmap. The cross-partition kernel modules deal only with
50 * C-brick nasids, thus the need for bitmaps which don't account for
51 * odd-numbered (non C-brick) nasids.
52 */
53#define XP_MAX_PHYSNODE_ID (MAX_NUMALINK_NODES / 2)
54#define XP_NASID_MASK_BYTES ((XP_MAX_PHYSNODE_ID + 7) / 8)
55#define XP_NASID_MASK_WORDS ((XP_MAX_PHYSNODE_ID + 63) / 64)
56
57
58/*
59 * Wrapper for bte_copy() that should it return a failure status will retry
60 * the bte_copy() once in the hope that the failure was due to a temporary
61 * aberration (i.e., the link going down temporarily).
62 *
63 * src - physical address of the source of the transfer.
64 * vdst - virtual address of the destination of the transfer.
65 * len - number of bytes to transfer from source to destination.
66 * mode - see bte_copy() for definition.
67 * notification - see bte_copy() for definition.
68 *
69 * Note: xp_bte_copy() should never be called while holding a spinlock.
70 */
71static inline bte_result_t
72xp_bte_copy(u64 src, u64 vdst, u64 len, u64 mode, void *notification)
73{
74 bte_result_t ret;
75 u64 pdst = ia64_tpa(vdst);
76
77
78 /*
79 * Ensure that the physically mapped memory is contiguous.
80 *
81 * We do this by ensuring that the memory is from region 7 only.
82 * If the need should arise to use memory from one of the other
83 * regions, then modify the BUG_ON() statement to ensure that the
84 * memory from that region is always physically contiguous.
85 */
86 BUG_ON(REGION_NUMBER(vdst) != RGN_KERNEL);
87
88 ret = bte_copy(src, pdst, len, mode, notification);
89 if ((ret != BTE_SUCCESS) && BTE_ERROR_RETRY(ret)) {
90 if (!in_interrupt()) {
91 cond_resched();
92 }
93 ret = bte_copy(src, pdst, len, mode, notification);
94 }
95
96 return ret;
97}
98
99
100/*
101 * XPC establishes channel connections between the local partition and any
102 * other partition that is currently up. Over these channels, kernel-level
103 * `users' can communicate with their counterparts on the other partitions.
104 *
105 * The maxinum number of channels is limited to eight. For performance reasons,
106 * the internal cross partition structures require sixteen bytes per channel,
107 * and eight allows all of this interface-shared info to fit in one cache line.
108 *
109 * XPC_NCHANNELS reflects the total number of channels currently defined.
110 * If the need for additional channels arises, one can simply increase
111 * XPC_NCHANNELS accordingly. If the day should come where that number
112 * exceeds the MAXIMUM number of channels allowed (eight), then one will need
113 * to make changes to the XPC code to allow for this.
114 */
115#define XPC_MEM_CHANNEL 0 /* memory channel number */
116#define XPC_NET_CHANNEL 1 /* network channel number */
117
118#define XPC_NCHANNELS 2 /* #of defined channels */
119#define XPC_MAX_NCHANNELS 8 /* max #of channels allowed */
120
121#if XPC_NCHANNELS > XPC_MAX_NCHANNELS
122#error XPC_NCHANNELS exceeds MAXIMUM allowed.
123#endif
124
125
126/*
127 * The format of an XPC message is as follows:
128 *
129 * +-------+--------------------------------+
130 * | flags |////////////////////////////////|
131 * +-------+--------------------------------+
132 * | message # |
133 * +----------------------------------------+
134 * | payload (user-defined message) |
135 * | |
136 * :
137 * | |
138 * +----------------------------------------+
139 *
140 * The size of the payload is defined by the user via xpc_connect(). A user-
141 * defined message resides in the payload area.
142 *
143 * The user should have no dealings with the message header, but only the
144 * message's payload. When a message entry is allocated (via xpc_allocate())
145 * a pointer to the payload area is returned and not the actual beginning of
146 * the XPC message. The user then constructs a message in the payload area
147 * and passes that pointer as an argument on xpc_send() or xpc_send_notify().
148 *
149 * The size of a message entry (within a message queue) must be a cacheline
150 * sized multiple in order to facilitate the BTE transfer of messages from one
151 * message queue to another. A macro, XPC_MSG_SIZE(), is provided for the user
152 * that wants to fit as many msg entries as possible in a given memory size
153 * (e.g. a memory page).
154 */
155struct xpc_msg {
156 u8 flags; /* FOR XPC INTERNAL USE ONLY */
157 u8 reserved[7]; /* FOR XPC INTERNAL USE ONLY */
158 s64 number; /* FOR XPC INTERNAL USE ONLY */
159
160 u64 payload; /* user defined portion of message */
161};
162
163
164#define XPC_MSG_PAYLOAD_OFFSET (u64) (&((struct xpc_msg *)0)->payload)
165#define XPC_MSG_SIZE(_payload_size) \
166 L1_CACHE_ALIGN(XPC_MSG_PAYLOAD_OFFSET + (_payload_size))
167
168
169/*
170 * Define the return values and values passed to user's callout functions.
171 * (It is important to add new value codes at the end just preceding
172 * xpcUnknownReason, which must have the highest numerical value.)
173 */
174enum xpc_retval {
175 xpcSuccess = 0,
176
177 xpcNotConnected, /* 1: channel is not connected */
178 xpcConnected, /* 2: channel connected (opened) */
179 xpcRETIRED1, /* 3: (formerly xpcDisconnected) */
180
181 xpcMsgReceived, /* 4: message received */
182 xpcMsgDelivered, /* 5: message delivered and acknowledged */
183
184 xpcRETIRED2, /* 6: (formerly xpcTransferFailed) */
185
186 xpcNoWait, /* 7: operation would require wait */
187 xpcRetry, /* 8: retry operation */
188 xpcTimeout, /* 9: timeout in xpc_allocate_msg_wait() */
189 xpcInterrupted, /* 10: interrupted wait */
190
191 xpcUnequalMsgSizes, /* 11: message size disparity between sides */
192 xpcInvalidAddress, /* 12: invalid address */
193
194 xpcNoMemory, /* 13: no memory available for XPC structures */
195 xpcLackOfResources, /* 14: insufficient resources for operation */
196 xpcUnregistered, /* 15: channel is not registered */
197 xpcAlreadyRegistered, /* 16: channel is already registered */
198
199 xpcPartitionDown, /* 17: remote partition is down */
200 xpcNotLoaded, /* 18: XPC module is not loaded */
201 xpcUnloading, /* 19: this side is unloading XPC module */
202
203 xpcBadMagic, /* 20: XPC MAGIC string not found */
204
205 xpcReactivating, /* 21: remote partition was reactivated */
206
207 xpcUnregistering, /* 22: this side is unregistering channel */
208 xpcOtherUnregistering, /* 23: other side is unregistering channel */
209
210 xpcCloneKThread, /* 24: cloning kernel thread */
211 xpcCloneKThreadFailed, /* 25: cloning kernel thread failed */
212
213 xpcNoHeartbeat, /* 26: remote partition has no heartbeat */
214
215 xpcPioReadError, /* 27: PIO read error */
216 xpcPhysAddrRegFailed, /* 28: registration of phys addr range failed */
217
218 xpcBteDirectoryError, /* 29: maps to BTEFAIL_DIR */
219 xpcBtePoisonError, /* 30: maps to BTEFAIL_POISON */
220 xpcBteWriteError, /* 31: maps to BTEFAIL_WERR */
221 xpcBteAccessError, /* 32: maps to BTEFAIL_ACCESS */
222 xpcBtePWriteError, /* 33: maps to BTEFAIL_PWERR */
223 xpcBtePReadError, /* 34: maps to BTEFAIL_PRERR */
224 xpcBteTimeOutError, /* 35: maps to BTEFAIL_TOUT */
225 xpcBteXtalkError, /* 36: maps to BTEFAIL_XTERR */
226 xpcBteNotAvailable, /* 37: maps to BTEFAIL_NOTAVAIL */
227 xpcBteUnmappedError, /* 38: unmapped BTEFAIL_ error */
228
229 xpcBadVersion, /* 39: bad version number */
230 xpcVarsNotSet, /* 40: the XPC variables are not set up */
231 xpcNoRsvdPageAddr, /* 41: unable to get rsvd page's phys addr */
232 xpcInvalidPartid, /* 42: invalid partition ID */
233 xpcLocalPartid, /* 43: local partition ID */
234
235 xpcOtherGoingDown, /* 44: other side going down, reason unknown */
236 xpcSystemGoingDown, /* 45: system is going down, reason unknown */
237 xpcSystemHalt, /* 46: system is being halted */
238 xpcSystemReboot, /* 47: system is being rebooted */
239 xpcSystemPoweroff, /* 48: system is being powered off */
240
241 xpcDisconnecting, /* 49: channel disconnecting (closing) */
242
243 xpcOpenCloseError, /* 50: channel open/close protocol error */
244
245 xpcDisconnected, /* 51: channel disconnected (closed) */
246
247 xpcBteSh2Start, /* 52: BTE CRB timeout */
248
249 /* 53: 0x1 BTE Error Response Short */
250 xpcBteSh2RspShort = xpcBteSh2Start + BTEFAIL_SH2_RESP_SHORT,
251
252 /* 54: 0x2 BTE Error Response Long */
253 xpcBteSh2RspLong = xpcBteSh2Start + BTEFAIL_SH2_RESP_LONG,
254
255 /* 56: 0x4 BTE Error Response DSB */
256 xpcBteSh2RspDSB = xpcBteSh2Start + BTEFAIL_SH2_RESP_DSP,
257
258 /* 60: 0x8 BTE Error Response Access */
259 xpcBteSh2RspAccess = xpcBteSh2Start + BTEFAIL_SH2_RESP_ACCESS,
260
261 /* 68: 0x10 BTE Error CRB timeout */
262 xpcBteSh2CRBTO = xpcBteSh2Start + BTEFAIL_SH2_CRB_TO,
263
264 /* 84: 0x20 BTE Error NACK limit */
265 xpcBteSh2NACKLimit = xpcBteSh2Start + BTEFAIL_SH2_NACK_LIMIT,
266
267 /* 115: BTE end */
268 xpcBteSh2End = xpcBteSh2Start + BTEFAIL_SH2_ALL,
269
270 xpcUnknownReason /* 116: unknown reason -- must be last in list */
271};
272
273
274/*
275 * Define the callout function types used by XPC to update the user on
276 * connection activity and state changes (via the user function registered by
277 * xpc_connect()) and to notify them of messages received and delivered (via
278 * the user function registered by xpc_send_notify()).
279 *
280 * The two function types are xpc_channel_func and xpc_notify_func and
281 * both share the following arguments, with the exception of "data", which
282 * only xpc_channel_func has.
283 *
284 * Arguments:
285 *
286 * reason - reason code. (See following table.)
287 * partid - partition ID associated with condition.
288 * ch_number - channel # associated with condition.
289 * data - pointer to optional data. (See following table.)
290 * key - pointer to optional user-defined value provided as the "key"
291 * argument to xpc_connect() or xpc_send_notify().
292 *
293 * In the following table the "Optional Data" column applies to callouts made
294 * to functions registered by xpc_connect(). A "NA" in that column indicates
295 * that this reason code can be passed to functions registered by
296 * xpc_send_notify() (i.e. they don't have data arguments).
297 *
298 * Also, the first three reason codes in the following table indicate
299 * success, whereas the others indicate failure. When a failure reason code
300 * is received, one can assume that the channel is not connected.
301 *
302 *
303 * Reason Code | Cause | Optional Data
304 * =====================+================================+=====================
305 * xpcConnected | connection has been established| max #of entries
306 * | to the specified partition on | allowed in message
307 * | the specified channel | queue
308 * ---------------------+--------------------------------+---------------------
309 * xpcMsgReceived | an XPC message arrived from | address of payload
310 * | the specified partition on the |
311 * | specified channel | [the user must call
312 * | | xpc_received() when
313 * | | finished with the
314 * | | payload]
315 * ---------------------+--------------------------------+---------------------
316 * xpcMsgDelivered | notification that the message | NA
317 * | was delivered to the intended |
318 * | recipient and that they have |
319 * | acknowledged its receipt by |
320 * | calling xpc_received() |
321 * =====================+================================+=====================
322 * xpcUnequalMsgSizes | can't connect to the specified | NULL
323 * | partition on the specified |
324 * | channel because of mismatched |
325 * | message sizes |
326 * ---------------------+--------------------------------+---------------------
327 * xpcNoMemory | insufficient memory avaiable | NULL
328 * | to allocate message queue |
329 * ---------------------+--------------------------------+---------------------
330 * xpcLackOfResources | lack of resources to create | NULL
331 * | the necessary kthreads to |
332 * | support the channel |
333 * ---------------------+--------------------------------+---------------------
334 * xpcUnregistering | this side's user has | NULL or NA
335 * | unregistered by calling |
336 * | xpc_disconnect() |
337 * ---------------------+--------------------------------+---------------------
338 * xpcOtherUnregistering| the other side's user has | NULL or NA
339 * | unregistered by calling |
340 * | xpc_disconnect() |
341 * ---------------------+--------------------------------+---------------------
342 * xpcNoHeartbeat | the other side's XPC is no | NULL or NA
343 * | longer heartbeating |
344 * | |
345 * ---------------------+--------------------------------+---------------------
346 * xpcUnloading | this side's XPC module is | NULL or NA
347 * | being unloaded |
348 * | |
349 * ---------------------+--------------------------------+---------------------
350 * xpcOtherUnloading | the other side's XPC module is | NULL or NA
351 * | is being unloaded |
352 * | |
353 * ---------------------+--------------------------------+---------------------
354 * xpcPioReadError | xp_nofault_PIOR() returned an | NULL or NA
355 * | error while sending an IPI |
356 * | |
357 * ---------------------+--------------------------------+---------------------
358 * xpcInvalidAddress | the address either received or | NULL or NA
359 * | sent by the specified partition|
360 * | is invalid |
361 * ---------------------+--------------------------------+---------------------
362 * xpcBteNotAvailable | attempt to pull data from the | NULL or NA
363 * xpcBtePoisonError | specified partition over the |
364 * xpcBteWriteError | specified channel via a |
365 * xpcBteAccessError | bte_copy() failed |
366 * xpcBteTimeOutError | |
367 * xpcBteXtalkError | |
368 * xpcBteDirectoryError | |
369 * xpcBteGenericError | |
370 * xpcBteUnmappedError | |
371 * ---------------------+--------------------------------+---------------------
372 * xpcUnknownReason | the specified channel to the | NULL or NA
373 * | specified partition was |
374 * | unavailable for unknown reasons|
375 * =====================+================================+=====================
376 */
377
378typedef void (*xpc_channel_func)(enum xpc_retval reason, partid_t partid,
379 int ch_number, void *data, void *key);
380
381typedef void (*xpc_notify_func)(enum xpc_retval reason, partid_t partid,
382 int ch_number, void *key);
383
384
385/*
386 * The following is a registration entry. There is a global array of these,
387 * one per channel. It is used to record the connection registration made
388 * by the users of XPC. As long as a registration entry exists, for any
389 * partition that comes up, XPC will attempt to establish a connection on
390 * that channel. Notification that a connection has been made will occur via
391 * the xpc_channel_func function.
392 *
393 * The 'func' field points to the function to call when aynchronous
394 * notification is required for such events as: a connection established/lost,
395 * or an incoming message received, or an error condition encountered. A
396 * non-NULL 'func' field indicates that there is an active registration for
397 * the channel.
398 */
399struct xpc_registration {
400 struct mutex mutex;
401 xpc_channel_func func; /* function to call */
402 void *key; /* pointer to user's key */
403 u16 nentries; /* #of msg entries in local msg queue */
404 u16 msg_size; /* message queue's message size */
405 u32 assigned_limit; /* limit on #of assigned kthreads */
406 u32 idle_limit; /* limit on #of idle kthreads */
407} ____cacheline_aligned;
408
409
410#define XPC_CHANNEL_REGISTERED(_c) (xpc_registrations[_c].func != NULL)
411
412
413/* the following are valid xpc_allocate() flags */
414#define XPC_WAIT 0 /* wait flag */
415#define XPC_NOWAIT 1 /* no wait flag */
416
417
418struct xpc_interface {
419 void (*connect)(int);
420 void (*disconnect)(int);
421 enum xpc_retval (*allocate)(partid_t, int, u32, void **);
422 enum xpc_retval (*send)(partid_t, int, void *);
423 enum xpc_retval (*send_notify)(partid_t, int, void *,
424 xpc_notify_func, void *);
425 void (*received)(partid_t, int, void *);
426 enum xpc_retval (*partid_to_nasids)(partid_t, void *);
427};
428
429
430extern struct xpc_interface xpc_interface;
431
432extern void xpc_set_interface(void (*)(int),
433 void (*)(int),
434 enum xpc_retval (*)(partid_t, int, u32, void **),
435 enum xpc_retval (*)(partid_t, int, void *),
436 enum xpc_retval (*)(partid_t, int, void *, xpc_notify_func,
437 void *),
438 void (*)(partid_t, int, void *),
439 enum xpc_retval (*)(partid_t, void *));
440extern void xpc_clear_interface(void);
441
442
443extern enum xpc_retval xpc_connect(int, xpc_channel_func, void *, u16,
444 u16, u32, u32);
445extern void xpc_disconnect(int);
446
447static inline enum xpc_retval
448xpc_allocate(partid_t partid, int ch_number, u32 flags, void **payload)
449{
450 return xpc_interface.allocate(partid, ch_number, flags, payload);
451}
452
453static inline enum xpc_retval
454xpc_send(partid_t partid, int ch_number, void *payload)
455{
456 return xpc_interface.send(partid, ch_number, payload);
457}
458
459static inline enum xpc_retval
460xpc_send_notify(partid_t partid, int ch_number, void *payload,
461 xpc_notify_func func, void *key)
462{
463 return xpc_interface.send_notify(partid, ch_number, payload, func, key);
464}
465
466static inline void
467xpc_received(partid_t partid, int ch_number, void *payload)
468{
469 return xpc_interface.received(partid, ch_number, payload);
470}
471
472static inline enum xpc_retval
473xpc_partid_to_nasids(partid_t partid, void *nasids)
474{
475 return xpc_interface.partid_to_nasids(partid, nasids);
476}
477
478
479extern u64 xp_nofault_PIOR_target;
480extern int xp_nofault_PIOR(void *);
481extern int xp_error_PIOR(void);
482
483
484#endif /* _ASM_IA64_SN_XP_H */
485
diff --git a/include/asm-ia64/sn/xpc.h b/include/asm-ia64/sn/xpc.h
deleted file mode 100644
index 3c0900ab8003..000000000000
--- a/include/asm-ia64/sn/xpc.h
+++ /dev/null
@@ -1,1267 +0,0 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * Copyright (c) 2004-2007 Silicon Graphics, Inc. All Rights Reserved.
7 */
8
9
10/*
11 * Cross Partition Communication (XPC) structures and macros.
12 */
13
14#ifndef _ASM_IA64_SN_XPC_H
15#define _ASM_IA64_SN_XPC_H
16
17
18#include <linux/interrupt.h>
19#include <linux/sysctl.h>
20#include <linux/device.h>
21#include <linux/mutex.h>
22#include <linux/completion.h>
23#include <asm/pgtable.h>
24#include <asm/processor.h>
25#include <asm/sn/bte.h>
26#include <asm/sn/clksupport.h>
27#include <asm/sn/addrs.h>
28#include <asm/sn/mspec.h>
29#include <asm/sn/shub_mmr.h>
30#include <asm/sn/xp.h>
31
32
33/*
34 * XPC Version numbers consist of a major and minor number. XPC can always
35 * talk to versions with same major #, and never talk to versions with a
36 * different major #.
37 */
38#define _XPC_VERSION(_maj, _min) (((_maj) << 4) | ((_min) & 0xf))
39#define XPC_VERSION_MAJOR(_v) ((_v) >> 4)
40#define XPC_VERSION_MINOR(_v) ((_v) & 0xf)
41
42
43/*
44 * The next macros define word or bit representations for given
45 * C-brick nasid in either the SAL provided bit array representing
46 * nasids in the partition/machine or the AMO_t array used for
47 * inter-partition initiation communications.
48 *
49 * For SN2 machines, C-Bricks are alway even numbered NASIDs. As
50 * such, some space will be saved by insisting that nasid information
51 * passed from SAL always be packed for C-Bricks and the
52 * cross-partition interrupts use the same packing scheme.
53 */
54#define XPC_NASID_W_INDEX(_n) (((_n) / 64) / 2)
55#define XPC_NASID_B_INDEX(_n) (((_n) / 2) & (64 - 1))
56#define XPC_NASID_IN_ARRAY(_n, _p) ((_p)[XPC_NASID_W_INDEX(_n)] & \
57 (1UL << XPC_NASID_B_INDEX(_n)))
58#define XPC_NASID_FROM_W_B(_w, _b) (((_w) * 64 + (_b)) * 2)
59
60#define XPC_HB_DEFAULT_INTERVAL 5 /* incr HB every x secs */
61#define XPC_HB_CHECK_DEFAULT_INTERVAL 20 /* check HB every x secs */
62
63/* define the process name of HB checker and the CPU it is pinned to */
64#define XPC_HB_CHECK_THREAD_NAME "xpc_hb"
65#define XPC_HB_CHECK_CPU 0
66
67/* define the process name of the discovery thread */
68#define XPC_DISCOVERY_THREAD_NAME "xpc_discovery"
69
70
71/*
72 * the reserved page
73 *
74 * SAL reserves one page of memory per partition for XPC. Though a full page
75 * in length (16384 bytes), its starting address is not page aligned, but it
76 * is cacheline aligned. The reserved page consists of the following:
77 *
78 * reserved page header
79 *
80 * The first cacheline of the reserved page contains the header
81 * (struct xpc_rsvd_page). Before SAL initialization has completed,
82 * SAL has set up the following fields of the reserved page header:
83 * SAL_signature, SAL_version, partid, and nasids_size. The other
84 * fields are set up by XPC. (xpc_rsvd_page points to the local
85 * partition's reserved page.)
86 *
87 * part_nasids mask
88 * mach_nasids mask
89 *
90 * SAL also sets up two bitmaps (or masks), one that reflects the actual
91 * nasids in this partition (part_nasids), and the other that reflects
92 * the actual nasids in the entire machine (mach_nasids). We're only
93 * interested in the even numbered nasids (which contain the processors
94 * and/or memory), so we only need half as many bits to represent the
95 * nasids. The part_nasids mask is located starting at the first cacheline
96 * following the reserved page header. The mach_nasids mask follows right
97 * after the part_nasids mask. The size in bytes of each mask is reflected
98 * by the reserved page header field 'nasids_size'. (Local partition's
99 * mask pointers are xpc_part_nasids and xpc_mach_nasids.)
100 *
101 * vars
102 * vars part
103 *
104 * Immediately following the mach_nasids mask are the XPC variables
105 * required by other partitions. First are those that are generic to all
106 * partitions (vars), followed on the next available cacheline by those
107 * which are partition specific (vars part). These are setup by XPC.
108 * (Local partition's vars pointers are xpc_vars and xpc_vars_part.)
109 *
110 * Note: Until vars_pa is set, the partition XPC code has not been initialized.
111 */
112struct xpc_rsvd_page {
113 u64 SAL_signature; /* SAL: unique signature */
114 u64 SAL_version; /* SAL: version */
115 u8 partid; /* SAL: partition ID */
116 u8 version;
117 u8 pad1[6]; /* align to next u64 in cacheline */
118 volatile u64 vars_pa;
119 struct timespec stamp; /* time when reserved page was setup by XPC */
120 u64 pad2[9]; /* align to last u64 in cacheline */
121 u64 nasids_size; /* SAL: size of each nasid mask in bytes */
122};
123
124#define XPC_RP_VERSION _XPC_VERSION(1,1) /* version 1.1 of the reserved page */
125
126#define XPC_SUPPORTS_RP_STAMP(_version) \
127 (_version >= _XPC_VERSION(1,1))
128
129/*
130 * compare stamps - the return value is:
131 *
132 * < 0, if stamp1 < stamp2
133 * = 0, if stamp1 == stamp2
134 * > 0, if stamp1 > stamp2
135 */
136static inline int
137xpc_compare_stamps(struct timespec *stamp1, struct timespec *stamp2)
138{
139 int ret;
140
141
142 if ((ret = stamp1->tv_sec - stamp2->tv_sec) == 0) {
143 ret = stamp1->tv_nsec - stamp2->tv_nsec;
144 }
145 return ret;
146}
147
148
149/*
150 * Define the structures by which XPC variables can be exported to other
151 * partitions. (There are two: struct xpc_vars and struct xpc_vars_part)
152 */
153
154/*
155 * The following structure describes the partition generic variables
156 * needed by other partitions in order to properly initialize.
157 *
158 * struct xpc_vars version number also applies to struct xpc_vars_part.
159 * Changes to either structure and/or related functionality should be
160 * reflected by incrementing either the major or minor version numbers
161 * of struct xpc_vars.
162 */
163struct xpc_vars {
164 u8 version;
165 u64 heartbeat;
166 u64 heartbeating_to_mask;
167 u64 heartbeat_offline; /* if 0, heartbeat should be changing */
168 int act_nasid;
169 int act_phys_cpuid;
170 u64 vars_part_pa;
171 u64 amos_page_pa; /* paddr of page of AMOs from MSPEC driver */
172 AMO_t *amos_page; /* vaddr of page of AMOs from MSPEC driver */
173};
174
175#define XPC_V_VERSION _XPC_VERSION(3,1) /* version 3.1 of the cross vars */
176
177#define XPC_SUPPORTS_DISENGAGE_REQUEST(_version) \
178 (_version >= _XPC_VERSION(3,1))
179
180
181static inline int
182xpc_hb_allowed(partid_t partid, struct xpc_vars *vars)
183{
184 return ((vars->heartbeating_to_mask & (1UL << partid)) != 0);
185}
186
187static inline void
188xpc_allow_hb(partid_t partid, struct xpc_vars *vars)
189{
190 u64 old_mask, new_mask;
191
192 do {
193 old_mask = vars->heartbeating_to_mask;
194 new_mask = (old_mask | (1UL << partid));
195 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
196 old_mask);
197}
198
199static inline void
200xpc_disallow_hb(partid_t partid, struct xpc_vars *vars)
201{
202 u64 old_mask, new_mask;
203
204 do {
205 old_mask = vars->heartbeating_to_mask;
206 new_mask = (old_mask & ~(1UL << partid));
207 } while (cmpxchg(&vars->heartbeating_to_mask, old_mask, new_mask) !=
208 old_mask);
209}
210
211
212/*
213 * The AMOs page consists of a number of AMO variables which are divided into
214 * four groups, The first two groups are used to identify an IRQ's sender.
215 * These two groups consist of 64 and 128 AMO variables respectively. The last
216 * two groups, consisting of just one AMO variable each, are used to identify
217 * the remote partitions that are currently engaged (from the viewpoint of
218 * the XPC running on the remote partition).
219 */
220#define XPC_NOTIFY_IRQ_AMOS 0
221#define XPC_ACTIVATE_IRQ_AMOS (XPC_NOTIFY_IRQ_AMOS + XP_MAX_PARTITIONS)
222#define XPC_ENGAGED_PARTITIONS_AMO (XPC_ACTIVATE_IRQ_AMOS + XP_NASID_MASK_WORDS)
223#define XPC_DISENGAGE_REQUEST_AMO (XPC_ENGAGED_PARTITIONS_AMO + 1)
224
225
226/*
227 * The following structure describes the per partition specific variables.
228 *
229 * An array of these structures, one per partition, will be defined. As a
230 * partition becomes active XPC will copy the array entry corresponding to
231 * itself from that partition. It is desirable that the size of this
232 * structure evenly divide into a cacheline, such that none of the entries
233 * in this array crosses a cacheline boundary. As it is now, each entry
234 * occupies half a cacheline.
235 */
236struct xpc_vars_part {
237 volatile u64 magic;
238
239 u64 openclose_args_pa; /* physical address of open and close args */
240 u64 GPs_pa; /* physical address of Get/Put values */
241
242 u64 IPI_amo_pa; /* physical address of IPI AMO_t structure */
243 int IPI_nasid; /* nasid of where to send IPIs */
244 int IPI_phys_cpuid; /* physical CPU ID of where to send IPIs */
245
246 u8 nchannels; /* #of defined channels supported */
247
248 u8 reserved[23]; /* pad to a full 64 bytes */
249};
250
251/*
252 * The vars_part MAGIC numbers play a part in the first contact protocol.
253 *
254 * MAGIC1 indicates that the per partition specific variables for a remote
255 * partition have been initialized by this partition.
256 *
257 * MAGIC2 indicates that this partition has pulled the remote partititions
258 * per partition variables that pertain to this partition.
259 */
260#define XPC_VP_MAGIC1 0x0053524156435058L /* 'XPCVARS\0'L (little endian) */
261#define XPC_VP_MAGIC2 0x0073726176435058L /* 'XPCvars\0'L (little endian) */
262
263
264/* the reserved page sizes and offsets */
265
266#define XPC_RP_HEADER_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_rsvd_page))
267#define XPC_RP_VARS_SIZE L1_CACHE_ALIGN(sizeof(struct xpc_vars))
268
269#define XPC_RP_PART_NASIDS(_rp) (u64 *) ((u8 *) _rp + XPC_RP_HEADER_SIZE)
270#define XPC_RP_MACH_NASIDS(_rp) (XPC_RP_PART_NASIDS(_rp) + xp_nasid_mask_words)
271#define XPC_RP_VARS(_rp) ((struct xpc_vars *) XPC_RP_MACH_NASIDS(_rp) + xp_nasid_mask_words)
272#define XPC_RP_VARS_PART(_rp) (struct xpc_vars_part *) ((u8 *) XPC_RP_VARS(rp) + XPC_RP_VARS_SIZE)
273
274
275/*
276 * Functions registered by add_timer() or called by kernel_thread() only
277 * allow for a single 64-bit argument. The following macros can be used to
278 * pack and unpack two (32-bit, 16-bit or 8-bit) arguments into or out from
279 * the passed argument.
280 */
281#define XPC_PACK_ARGS(_arg1, _arg2) \
282 ((((u64) _arg1) & 0xffffffff) | \
283 ((((u64) _arg2) & 0xffffffff) << 32))
284
285#define XPC_UNPACK_ARG1(_args) (((u64) _args) & 0xffffffff)
286#define XPC_UNPACK_ARG2(_args) ((((u64) _args) >> 32) & 0xffffffff)
287
288
289
290/*
291 * Define a Get/Put value pair (pointers) used with a message queue.
292 */
293struct xpc_gp {
294 volatile s64 get; /* Get value */
295 volatile s64 put; /* Put value */
296};
297
298#define XPC_GP_SIZE \
299 L1_CACHE_ALIGN(sizeof(struct xpc_gp) * XPC_NCHANNELS)
300
301
302
303/*
304 * Define a structure that contains arguments associated with opening and
305 * closing a channel.
306 */
307struct xpc_openclose_args {
308 u16 reason; /* reason why channel is closing */
309 u16 msg_size; /* sizeof each message entry */
310 u16 remote_nentries; /* #of message entries in remote msg queue */
311 u16 local_nentries; /* #of message entries in local msg queue */
312 u64 local_msgqueue_pa; /* physical address of local message queue */
313};
314
315#define XPC_OPENCLOSE_ARGS_SIZE \
316 L1_CACHE_ALIGN(sizeof(struct xpc_openclose_args) * XPC_NCHANNELS)
317
318
319
320/* struct xpc_msg flags */
321
322#define XPC_M_DONE 0x01 /* msg has been received/consumed */
323#define XPC_M_READY 0x02 /* msg is ready to be sent */
324#define XPC_M_INTERRUPT 0x04 /* send interrupt when msg consumed */
325
326
327#define XPC_MSG_ADDRESS(_payload) \
328 ((struct xpc_msg *)((u8 *)(_payload) - XPC_MSG_PAYLOAD_OFFSET))
329
330
331
332/*
333 * Defines notify entry.
334 *
335 * This is used to notify a message's sender that their message was received
336 * and consumed by the intended recipient.
337 */
338struct xpc_notify {
339 volatile u8 type; /* type of notification */
340
341 /* the following two fields are only used if type == XPC_N_CALL */
342 xpc_notify_func func; /* user's notify function */
343 void *key; /* pointer to user's key */
344};
345
346/* struct xpc_notify type of notification */
347
348#define XPC_N_CALL 0x01 /* notify function provided by user */
349
350
351
352/*
353 * Define the structure that manages all the stuff required by a channel. In
354 * particular, they are used to manage the messages sent across the channel.
355 *
356 * This structure is private to a partition, and is NOT shared across the
357 * partition boundary.
358 *
359 * There is an array of these structures for each remote partition. It is
360 * allocated at the time a partition becomes active. The array contains one
361 * of these structures for each potential channel connection to that partition.
362 *
363 * Each of these structures manages two message queues (circular buffers).
364 * They are allocated at the time a channel connection is made. One of
365 * these message queues (local_msgqueue) holds the locally created messages
366 * that are destined for the remote partition. The other of these message
367 * queues (remote_msgqueue) is a locally cached copy of the remote partition's
368 * own local_msgqueue.
369 *
370 * The following is a description of the Get/Put pointers used to manage these
371 * two message queues. Consider the local_msgqueue to be on one partition
372 * and the remote_msgqueue to be its cached copy on another partition. A
373 * description of what each of the lettered areas contains is included.
374 *
375 *
376 * local_msgqueue remote_msgqueue
377 *
378 * |/////////| |/////////|
379 * w_remote_GP.get --> +---------+ |/////////|
380 * | F | |/////////|
381 * remote_GP.get --> +---------+ +---------+ <-- local_GP->get
382 * | | | |
383 * | | | E |
384 * | | | |
385 * | | +---------+ <-- w_local_GP.get
386 * | B | |/////////|
387 * | | |////D////|
388 * | | |/////////|
389 * | | +---------+ <-- w_remote_GP.put
390 * | | |////C////|
391 * local_GP->put --> +---------+ +---------+ <-- remote_GP.put
392 * | | |/////////|
393 * | A | |/////////|
394 * | | |/////////|
395 * w_local_GP.put --> +---------+ |/////////|
396 * |/////////| |/////////|
397 *
398 *
399 * ( remote_GP.[get|put] are cached copies of the remote
400 * partition's local_GP->[get|put], and thus their values can
401 * lag behind their counterparts on the remote partition. )
402 *
403 *
404 * A - Messages that have been allocated, but have not yet been sent to the
405 * remote partition.
406 *
407 * B - Messages that have been sent, but have not yet been acknowledged by the
408 * remote partition as having been received.
409 *
410 * C - Area that needs to be prepared for the copying of sent messages, by
411 * the clearing of the message flags of any previously received messages.
412 *
413 * D - Area into which sent messages are to be copied from the remote
414 * partition's local_msgqueue and then delivered to their intended
415 * recipients. [ To allow for a multi-message copy, another pointer
416 * (next_msg_to_pull) has been added to keep track of the next message
417 * number needing to be copied (pulled). It chases after w_remote_GP.put.
418 * Any messages lying between w_local_GP.get and next_msg_to_pull have
419 * been copied and are ready to be delivered. ]
420 *
421 * E - Messages that have been copied and delivered, but have not yet been
422 * acknowledged by the recipient as having been received.
423 *
424 * F - Messages that have been acknowledged, but XPC has not yet notified the
425 * sender that the message was received by its intended recipient.
426 * This is also an area that needs to be prepared for the allocating of
427 * new messages, by the clearing of the message flags of the acknowledged
428 * messages.
429 */
430struct xpc_channel {
431 partid_t partid; /* ID of remote partition connected */
432 spinlock_t lock; /* lock for updating this structure */
433 u32 flags; /* general flags */
434
435 enum xpc_retval reason; /* reason why channel is disconnect'g */
436 int reason_line; /* line# disconnect initiated from */
437
438 u16 number; /* channel # */
439
440 u16 msg_size; /* sizeof each msg entry */
441 u16 local_nentries; /* #of msg entries in local msg queue */
442 u16 remote_nentries; /* #of msg entries in remote msg queue*/
443
444 void *local_msgqueue_base; /* base address of kmalloc'd space */
445 struct xpc_msg *local_msgqueue; /* local message queue */
446 void *remote_msgqueue_base; /* base address of kmalloc'd space */
447 struct xpc_msg *remote_msgqueue;/* cached copy of remote partition's */
448 /* local message queue */
449 u64 remote_msgqueue_pa; /* phys addr of remote partition's */
450 /* local message queue */
451
452 atomic_t references; /* #of external references to queues */
453
454 atomic_t n_on_msg_allocate_wq; /* #on msg allocation wait queue */
455 wait_queue_head_t msg_allocate_wq; /* msg allocation wait queue */
456
457 u8 delayed_IPI_flags; /* IPI flags received, but delayed */
458 /* action until channel disconnected */
459
460 /* queue of msg senders who want to be notified when msg received */
461
462 atomic_t n_to_notify; /* #of msg senders to notify */
463 struct xpc_notify *notify_queue;/* notify queue for messages sent */
464
465 xpc_channel_func func; /* user's channel function */
466 void *key; /* pointer to user's key */
467
468 struct mutex msg_to_pull_mutex; /* next msg to pull serialization */
469 struct completion wdisconnect_wait; /* wait for channel disconnect */
470
471 struct xpc_openclose_args *local_openclose_args; /* args passed on */
472 /* opening or closing of channel */
473
474 /* various flavors of local and remote Get/Put values */
475
476 struct xpc_gp *local_GP; /* local Get/Put values */
477 struct xpc_gp remote_GP; /* remote Get/Put values */
478 struct xpc_gp w_local_GP; /* working local Get/Put values */
479 struct xpc_gp w_remote_GP; /* working remote Get/Put values */
480 s64 next_msg_to_pull; /* Put value of next msg to pull */
481
482 /* kthread management related fields */
483
484// >>> rethink having kthreads_assigned_limit and kthreads_idle_limit; perhaps
485// >>> allow the assigned limit be unbounded and let the idle limit be dynamic
486// >>> dependent on activity over the last interval of time
487 atomic_t kthreads_assigned; /* #of kthreads assigned to channel */
488 u32 kthreads_assigned_limit; /* limit on #of kthreads assigned */
489 atomic_t kthreads_idle; /* #of kthreads idle waiting for work */
490 u32 kthreads_idle_limit; /* limit on #of kthreads idle */
491 atomic_t kthreads_active; /* #of kthreads actively working */
492 // >>> following field is temporary
493 u32 kthreads_created; /* total #of kthreads created */
494
495 wait_queue_head_t idle_wq; /* idle kthread wait queue */
496
497} ____cacheline_aligned;
498
499
500/* struct xpc_channel flags */
501
502#define XPC_C_WASCONNECTED 0x00000001 /* channel was connected */
503
504#define XPC_C_ROPENREPLY 0x00000002 /* remote open channel reply */
505#define XPC_C_OPENREPLY 0x00000004 /* local open channel reply */
506#define XPC_C_ROPENREQUEST 0x00000008 /* remote open channel request */
507#define XPC_C_OPENREQUEST 0x00000010 /* local open channel request */
508
509#define XPC_C_SETUP 0x00000020 /* channel's msgqueues are alloc'd */
510#define XPC_C_CONNECTEDCALLOUT 0x00000040 /* connected callout initiated */
511#define XPC_C_CONNECTEDCALLOUT_MADE \
512 0x00000080 /* connected callout completed */
513#define XPC_C_CONNECTED 0x00000100 /* local channel is connected */
514#define XPC_C_CONNECTING 0x00000200 /* channel is being connected */
515
516#define XPC_C_RCLOSEREPLY 0x00000400 /* remote close channel reply */
517#define XPC_C_CLOSEREPLY 0x00000800 /* local close channel reply */
518#define XPC_C_RCLOSEREQUEST 0x00001000 /* remote close channel request */
519#define XPC_C_CLOSEREQUEST 0x00002000 /* local close channel request */
520
521#define XPC_C_DISCONNECTED 0x00004000 /* channel is disconnected */
522#define XPC_C_DISCONNECTING 0x00008000 /* channel is being disconnected */
523#define XPC_C_DISCONNECTINGCALLOUT \
524 0x00010000 /* disconnecting callout initiated */
525#define XPC_C_DISCONNECTINGCALLOUT_MADE \
526 0x00020000 /* disconnecting callout completed */
527#define XPC_C_WDISCONNECT 0x00040000 /* waiting for channel disconnect */
528
529
530
531/*
532 * Manages channels on a partition basis. There is one of these structures
533 * for each partition (a partition will never utilize the structure that
534 * represents itself).
535 */
536struct xpc_partition {
537
538 /* XPC HB infrastructure */
539
540 u8 remote_rp_version; /* version# of partition's rsvd pg */
541 struct timespec remote_rp_stamp;/* time when rsvd pg was initialized */
542 u64 remote_rp_pa; /* phys addr of partition's rsvd pg */
543 u64 remote_vars_pa; /* phys addr of partition's vars */
544 u64 remote_vars_part_pa; /* phys addr of partition's vars part */
545 u64 last_heartbeat; /* HB at last read */
546 u64 remote_amos_page_pa; /* phys addr of partition's amos page */
547 int remote_act_nasid; /* active part's act/deact nasid */
548 int remote_act_phys_cpuid; /* active part's act/deact phys cpuid */
549 u32 act_IRQ_rcvd; /* IRQs since activation */
550 spinlock_t act_lock; /* protect updating of act_state */
551 u8 act_state; /* from XPC HB viewpoint */
552 u8 remote_vars_version; /* version# of partition's vars */
553 enum xpc_retval reason; /* reason partition is deactivating */
554 int reason_line; /* line# deactivation initiated from */
555 int reactivate_nasid; /* nasid in partition to reactivate */
556
557 unsigned long disengage_request_timeout; /* timeout in jiffies */
558 struct timer_list disengage_request_timer;
559
560
561 /* XPC infrastructure referencing and teardown control */
562
563 volatile u8 setup_state; /* infrastructure setup state */
564 wait_queue_head_t teardown_wq; /* kthread waiting to teardown infra */
565 atomic_t references; /* #of references to infrastructure */
566
567
568 /*
569 * NONE OF THE PRECEDING FIELDS OF THIS STRUCTURE WILL BE CLEARED WHEN
570 * XPC SETS UP THE NECESSARY INFRASTRUCTURE TO SUPPORT CROSS PARTITION
571 * COMMUNICATION. ALL OF THE FOLLOWING FIELDS WILL BE CLEARED. (THE
572 * 'nchannels' FIELD MUST BE THE FIRST OF THE FIELDS TO BE CLEARED.)
573 */
574
575
576 u8 nchannels; /* #of defined channels supported */
577 atomic_t nchannels_active; /* #of channels that are not DISCONNECTED */
578 atomic_t nchannels_engaged;/* #of channels engaged with remote part */
579 struct xpc_channel *channels;/* array of channel structures */
580
581 void *local_GPs_base; /* base address of kmalloc'd space */
582 struct xpc_gp *local_GPs; /* local Get/Put values */
583 void *remote_GPs_base; /* base address of kmalloc'd space */
584 struct xpc_gp *remote_GPs;/* copy of remote partition's local Get/Put */
585 /* values */
586 u64 remote_GPs_pa; /* phys address of remote partition's local */
587 /* Get/Put values */
588
589
590 /* fields used to pass args when opening or closing a channel */
591
592 void *local_openclose_args_base; /* base address of kmalloc'd space */
593 struct xpc_openclose_args *local_openclose_args; /* local's args */
594 void *remote_openclose_args_base; /* base address of kmalloc'd space */
595 struct xpc_openclose_args *remote_openclose_args; /* copy of remote's */
596 /* args */
597 u64 remote_openclose_args_pa; /* phys addr of remote's args */
598
599
600 /* IPI sending, receiving and handling related fields */
601
602 int remote_IPI_nasid; /* nasid of where to send IPIs */
603 int remote_IPI_phys_cpuid; /* phys CPU ID of where to send IPIs */
604 AMO_t *remote_IPI_amo_va; /* address of remote IPI AMO_t structure */
605
606 AMO_t *local_IPI_amo_va; /* address of IPI AMO_t structure */
607 u64 local_IPI_amo; /* IPI amo flags yet to be handled */
608 char IPI_owner[8]; /* IPI owner's name */
609 struct timer_list dropped_IPI_timer; /* dropped IPI timer */
610
611 spinlock_t IPI_lock; /* IPI handler lock */
612
613
614 /* channel manager related fields */
615
616 atomic_t channel_mgr_requests; /* #of requests to activate chan mgr */
617 wait_queue_head_t channel_mgr_wq; /* channel mgr's wait queue */
618
619} ____cacheline_aligned;
620
621
622/* struct xpc_partition act_state values (for XPC HB) */
623
624#define XPC_P_INACTIVE 0x00 /* partition is not active */
625#define XPC_P_ACTIVATION_REQ 0x01 /* created thread to activate */
626#define XPC_P_ACTIVATING 0x02 /* activation thread started */
627#define XPC_P_ACTIVE 0x03 /* xpc_partition_up() was called */
628#define XPC_P_DEACTIVATING 0x04 /* partition deactivation initiated */
629
630
631#define XPC_DEACTIVATE_PARTITION(_p, _reason) \
632 xpc_deactivate_partition(__LINE__, (_p), (_reason))
633
634
635/* struct xpc_partition setup_state values */
636
637#define XPC_P_UNSET 0x00 /* infrastructure was never setup */
638#define XPC_P_SETUP 0x01 /* infrastructure is setup */
639#define XPC_P_WTEARDOWN 0x02 /* waiting to teardown infrastructure */
640#define XPC_P_TORNDOWN 0x03 /* infrastructure is torndown */
641
642
643
644/*
645 * struct xpc_partition IPI_timer #of seconds to wait before checking for
646 * dropped IPIs. These occur whenever an IPI amo write doesn't complete until
647 * after the IPI was received.
648 */
649#define XPC_P_DROPPED_IPI_WAIT (0.25 * HZ)
650
651
652/* number of seconds to wait for other partitions to disengage */
653#define XPC_DISENGAGE_REQUEST_DEFAULT_TIMELIMIT 90
654
655/* interval in seconds to print 'waiting disengagement' messages */
656#define XPC_DISENGAGE_PRINTMSG_INTERVAL 10
657
658
659#define XPC_PARTID(_p) ((partid_t) ((_p) - &xpc_partitions[0]))
660
661
662
663/* found in xp_main.c */
664extern struct xpc_registration xpc_registrations[];
665
666
667/* found in xpc_main.c */
668extern struct device *xpc_part;
669extern struct device *xpc_chan;
670extern int xpc_disengage_request_timelimit;
671extern int xpc_disengage_request_timedout;
672extern irqreturn_t xpc_notify_IRQ_handler(int, void *);
673extern void xpc_dropped_IPI_check(struct xpc_partition *);
674extern void xpc_activate_partition(struct xpc_partition *);
675extern void xpc_activate_kthreads(struct xpc_channel *, int);
676extern void xpc_create_kthreads(struct xpc_channel *, int, int);
677extern void xpc_disconnect_wait(int);
678
679
680/* found in xpc_partition.c */
681extern int xpc_exiting;
682extern struct xpc_vars *xpc_vars;
683extern struct xpc_rsvd_page *xpc_rsvd_page;
684extern struct xpc_vars_part *xpc_vars_part;
685extern struct xpc_partition xpc_partitions[XP_MAX_PARTITIONS + 1];
686extern char *xpc_remote_copy_buffer;
687extern void *xpc_remote_copy_buffer_base;
688extern void *xpc_kmalloc_cacheline_aligned(size_t, gfp_t, void **);
689extern struct xpc_rsvd_page *xpc_rsvd_page_init(void);
690extern void xpc_allow_IPI_ops(void);
691extern void xpc_restrict_IPI_ops(void);
692extern int xpc_identify_act_IRQ_sender(void);
693extern int xpc_partition_disengaged(struct xpc_partition *);
694extern enum xpc_retval xpc_mark_partition_active(struct xpc_partition *);
695extern void xpc_mark_partition_inactive(struct xpc_partition *);
696extern void xpc_discovery(void);
697extern void xpc_check_remote_hb(void);
698extern void xpc_deactivate_partition(const int, struct xpc_partition *,
699 enum xpc_retval);
700extern enum xpc_retval xpc_initiate_partid_to_nasids(partid_t, void *);
701
702
703/* found in xpc_channel.c */
704extern void xpc_initiate_connect(int);
705extern void xpc_initiate_disconnect(int);
706extern enum xpc_retval xpc_initiate_allocate(partid_t, int, u32, void **);
707extern enum xpc_retval xpc_initiate_send(partid_t, int, void *);
708extern enum xpc_retval xpc_initiate_send_notify(partid_t, int, void *,
709 xpc_notify_func, void *);
710extern void xpc_initiate_received(partid_t, int, void *);
711extern enum xpc_retval xpc_setup_infrastructure(struct xpc_partition *);
712extern enum xpc_retval xpc_pull_remote_vars_part(struct xpc_partition *);
713extern void xpc_process_channel_activity(struct xpc_partition *);
714extern void xpc_connected_callout(struct xpc_channel *);
715extern void xpc_deliver_msg(struct xpc_channel *);
716extern void xpc_disconnect_channel(const int, struct xpc_channel *,
717 enum xpc_retval, unsigned long *);
718extern void xpc_disconnect_callout(struct xpc_channel *, enum xpc_retval);
719extern void xpc_partition_going_down(struct xpc_partition *, enum xpc_retval);
720extern void xpc_teardown_infrastructure(struct xpc_partition *);
721
722
723
724static inline void
725xpc_wakeup_channel_mgr(struct xpc_partition *part)
726{
727 if (atomic_inc_return(&part->channel_mgr_requests) == 1) {
728 wake_up(&part->channel_mgr_wq);
729 }
730}
731
732
733
734/*
735 * These next two inlines are used to keep us from tearing down a channel's
736 * msg queues while a thread may be referencing them.
737 */
738static inline void
739xpc_msgqueue_ref(struct xpc_channel *ch)
740{
741 atomic_inc(&ch->references);
742}
743
744static inline void
745xpc_msgqueue_deref(struct xpc_channel *ch)
746{
747 s32 refs = atomic_dec_return(&ch->references);
748
749 DBUG_ON(refs < 0);
750 if (refs == 0) {
751 xpc_wakeup_channel_mgr(&xpc_partitions[ch->partid]);
752 }
753}
754
755
756
757#define XPC_DISCONNECT_CHANNEL(_ch, _reason, _irqflgs) \
758 xpc_disconnect_channel(__LINE__, _ch, _reason, _irqflgs)
759
760
761/*
762 * These two inlines are used to keep us from tearing down a partition's
763 * setup infrastructure while a thread may be referencing it.
764 */
765static inline void
766xpc_part_deref(struct xpc_partition *part)
767{
768 s32 refs = atomic_dec_return(&part->references);
769
770
771 DBUG_ON(refs < 0);
772 if (refs == 0 && part->setup_state == XPC_P_WTEARDOWN) {
773 wake_up(&part->teardown_wq);
774 }
775}
776
777static inline int
778xpc_part_ref(struct xpc_partition *part)
779{
780 int setup;
781
782
783 atomic_inc(&part->references);
784 setup = (part->setup_state == XPC_P_SETUP);
785 if (!setup) {
786 xpc_part_deref(part);
787 }
788 return setup;
789}
790
791
792
793/*
794 * The following macro is to be used for the setting of the reason and
795 * reason_line fields in both the struct xpc_channel and struct xpc_partition
796 * structures.
797 */
798#define XPC_SET_REASON(_p, _reason, _line) \
799 { \
800 (_p)->reason = _reason; \
801 (_p)->reason_line = _line; \
802 }
803
804
805
806/*
807 * This next set of inlines are used to keep track of when a partition is
808 * potentially engaged in accessing memory belonging to another partition.
809 */
810
811static inline void
812xpc_mark_partition_engaged(struct xpc_partition *part)
813{
814 unsigned long irq_flags;
815 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
816 (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
817
818
819 local_irq_save(irq_flags);
820
821 /* set bit corresponding to our partid in remote partition's AMO */
822 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
823 (1UL << sn_partition_id));
824 /*
825 * We must always use the nofault function regardless of whether we
826 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
827 * didn't, we'd never know that the other partition is down and would
828 * keep sending IPIs and AMOs to it until the heartbeat times out.
829 */
830 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
831 variable), xp_nofault_PIOR_target));
832
833 local_irq_restore(irq_flags);
834}
835
836static inline void
837xpc_mark_partition_disengaged(struct xpc_partition *part)
838{
839 unsigned long irq_flags;
840 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
841 (XPC_ENGAGED_PARTITIONS_AMO * sizeof(AMO_t)));
842
843
844 local_irq_save(irq_flags);
845
846 /* clear bit corresponding to our partid in remote partition's AMO */
847 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
848 ~(1UL << sn_partition_id));
849 /*
850 * We must always use the nofault function regardless of whether we
851 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
852 * didn't, we'd never know that the other partition is down and would
853 * keep sending IPIs and AMOs to it until the heartbeat times out.
854 */
855 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
856 variable), xp_nofault_PIOR_target));
857
858 local_irq_restore(irq_flags);
859}
860
861static inline void
862xpc_request_partition_disengage(struct xpc_partition *part)
863{
864 unsigned long irq_flags;
865 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
866 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
867
868
869 local_irq_save(irq_flags);
870
871 /* set bit corresponding to our partid in remote partition's AMO */
872 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR,
873 (1UL << sn_partition_id));
874 /*
875 * We must always use the nofault function regardless of whether we
876 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
877 * didn't, we'd never know that the other partition is down and would
878 * keep sending IPIs and AMOs to it until the heartbeat times out.
879 */
880 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
881 variable), xp_nofault_PIOR_target));
882
883 local_irq_restore(irq_flags);
884}
885
886static inline void
887xpc_cancel_partition_disengage_request(struct xpc_partition *part)
888{
889 unsigned long irq_flags;
890 AMO_t *amo = (AMO_t *) __va(part->remote_amos_page_pa +
891 (XPC_DISENGAGE_REQUEST_AMO * sizeof(AMO_t)));
892
893
894 local_irq_save(irq_flags);
895
896 /* clear bit corresponding to our partid in remote partition's AMO */
897 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
898 ~(1UL << sn_partition_id));
899 /*
900 * We must always use the nofault function regardless of whether we
901 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
902 * didn't, we'd never know that the other partition is down and would
903 * keep sending IPIs and AMOs to it until the heartbeat times out.
904 */
905 (void) xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->
906 variable), xp_nofault_PIOR_target));
907
908 local_irq_restore(irq_flags);
909}
910
911static inline u64
912xpc_partition_engaged(u64 partid_mask)
913{
914 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
915
916
917 /* return our partition's AMO variable ANDed with partid_mask */
918 return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
919 partid_mask);
920}
921
922static inline u64
923xpc_partition_disengage_requested(u64 partid_mask)
924{
925 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
926
927
928 /* return our partition's AMO variable ANDed with partid_mask */
929 return (FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_LOAD) &
930 partid_mask);
931}
932
933static inline void
934xpc_clear_partition_engaged(u64 partid_mask)
935{
936 AMO_t *amo = xpc_vars->amos_page + XPC_ENGAGED_PARTITIONS_AMO;
937
938
939 /* clear bit(s) based on partid_mask in our partition's AMO */
940 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
941 ~partid_mask);
942}
943
944static inline void
945xpc_clear_partition_disengage_request(u64 partid_mask)
946{
947 AMO_t *amo = xpc_vars->amos_page + XPC_DISENGAGE_REQUEST_AMO;
948
949
950 /* clear bit(s) based on partid_mask in our partition's AMO */
951 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_AND,
952 ~partid_mask);
953}
954
955
956
957/*
958 * The following set of macros and inlines are used for the sending and
959 * receiving of IPIs (also known as IRQs). There are two flavors of IPIs,
960 * one that is associated with partition activity (SGI_XPC_ACTIVATE) and
961 * the other that is associated with channel activity (SGI_XPC_NOTIFY).
962 */
963
964static inline u64
965xpc_IPI_receive(AMO_t *amo)
966{
967 return FETCHOP_LOAD_OP(TO_AMO((u64) &amo->variable), FETCHOP_CLEAR);
968}
969
970
971static inline enum xpc_retval
972xpc_IPI_send(AMO_t *amo, u64 flag, int nasid, int phys_cpuid, int vector)
973{
974 int ret = 0;
975 unsigned long irq_flags;
976
977
978 local_irq_save(irq_flags);
979
980 FETCHOP_STORE_OP(TO_AMO((u64) &amo->variable), FETCHOP_OR, flag);
981 sn_send_IPI_phys(nasid, phys_cpuid, vector, 0);
982
983 /*
984 * We must always use the nofault function regardless of whether we
985 * are on a Shub 1.1 system or a Shub 1.2 slice 0xc processor. If we
986 * didn't, we'd never know that the other partition is down and would
987 * keep sending IPIs and AMOs to it until the heartbeat times out.
988 */
989 ret = xp_nofault_PIOR((u64 *) GLOBAL_MMR_ADDR(NASID_GET(&amo->variable),
990 xp_nofault_PIOR_target));
991
992 local_irq_restore(irq_flags);
993
994 return ((ret == 0) ? xpcSuccess : xpcPioReadError);
995}
996
997
998/*
999 * IPIs associated with SGI_XPC_ACTIVATE IRQ.
1000 */
1001
1002/*
1003 * Flag the appropriate AMO variable and send an IPI to the specified node.
1004 */
1005static inline void
1006xpc_activate_IRQ_send(u64 amos_page_pa, int from_nasid, int to_nasid,
1007 int to_phys_cpuid)
1008{
1009 int w_index = XPC_NASID_W_INDEX(from_nasid);
1010 int b_index = XPC_NASID_B_INDEX(from_nasid);
1011 AMO_t *amos = (AMO_t *) __va(amos_page_pa +
1012 (XPC_ACTIVATE_IRQ_AMOS * sizeof(AMO_t)));
1013
1014
1015 (void) xpc_IPI_send(&amos[w_index], (1UL << b_index), to_nasid,
1016 to_phys_cpuid, SGI_XPC_ACTIVATE);
1017}
1018
1019static inline void
1020xpc_IPI_send_activate(struct xpc_vars *vars)
1021{
1022 xpc_activate_IRQ_send(vars->amos_page_pa, cnodeid_to_nasid(0),
1023 vars->act_nasid, vars->act_phys_cpuid);
1024}
1025
1026static inline void
1027xpc_IPI_send_activated(struct xpc_partition *part)
1028{
1029 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
1030 part->remote_act_nasid, part->remote_act_phys_cpuid);
1031}
1032
1033static inline void
1034xpc_IPI_send_reactivate(struct xpc_partition *part)
1035{
1036 xpc_activate_IRQ_send(xpc_vars->amos_page_pa, part->reactivate_nasid,
1037 xpc_vars->act_nasid, xpc_vars->act_phys_cpuid);
1038}
1039
1040static inline void
1041xpc_IPI_send_disengage(struct xpc_partition *part)
1042{
1043 xpc_activate_IRQ_send(part->remote_amos_page_pa, cnodeid_to_nasid(0),
1044 part->remote_act_nasid, part->remote_act_phys_cpuid);
1045}
1046
1047
1048/*
1049 * IPIs associated with SGI_XPC_NOTIFY IRQ.
1050 */
1051
1052/*
1053 * Send an IPI to the remote partition that is associated with the
1054 * specified channel.
1055 */
1056#define XPC_NOTIFY_IRQ_SEND(_ch, _ipi_f, _irq_f) \
1057 xpc_notify_IRQ_send(_ch, _ipi_f, #_ipi_f, _irq_f)
1058
1059static inline void
1060xpc_notify_IRQ_send(struct xpc_channel *ch, u8 ipi_flag, char *ipi_flag_string,
1061 unsigned long *irq_flags)
1062{
1063 struct xpc_partition *part = &xpc_partitions[ch->partid];
1064 enum xpc_retval ret;
1065
1066
1067 if (likely(part->act_state != XPC_P_DEACTIVATING)) {
1068 ret = xpc_IPI_send(part->remote_IPI_amo_va,
1069 (u64) ipi_flag << (ch->number * 8),
1070 part->remote_IPI_nasid,
1071 part->remote_IPI_phys_cpuid,
1072 SGI_XPC_NOTIFY);
1073 dev_dbg(xpc_chan, "%s sent to partid=%d, channel=%d, ret=%d\n",
1074 ipi_flag_string, ch->partid, ch->number, ret);
1075 if (unlikely(ret != xpcSuccess)) {
1076 if (irq_flags != NULL) {
1077 spin_unlock_irqrestore(&ch->lock, *irq_flags);
1078 }
1079 XPC_DEACTIVATE_PARTITION(part, ret);
1080 if (irq_flags != NULL) {
1081 spin_lock_irqsave(&ch->lock, *irq_flags);
1082 }
1083 }
1084 }
1085}
1086
1087
1088/*
1089 * Make it look like the remote partition, which is associated with the
1090 * specified channel, sent us an IPI. This faked IPI will be handled
1091 * by xpc_dropped_IPI_check().
1092 */
1093#define XPC_NOTIFY_IRQ_SEND_LOCAL(_ch, _ipi_f) \
1094 xpc_notify_IRQ_send_local(_ch, _ipi_f, #_ipi_f)
1095
1096static inline void
1097xpc_notify_IRQ_send_local(struct xpc_channel *ch, u8 ipi_flag,
1098 char *ipi_flag_string)
1099{
1100 struct xpc_partition *part = &xpc_partitions[ch->partid];
1101
1102
1103 FETCHOP_STORE_OP(TO_AMO((u64) &part->local_IPI_amo_va->variable),
1104 FETCHOP_OR, ((u64) ipi_flag << (ch->number * 8)));
1105 dev_dbg(xpc_chan, "%s sent local from partid=%d, channel=%d\n",
1106 ipi_flag_string, ch->partid, ch->number);
1107}
1108
1109
1110/*
1111 * The sending and receiving of IPIs includes the setting of an AMO variable
1112 * to indicate the reason the IPI was sent. The 64-bit variable is divided
1113 * up into eight bytes, ordered from right to left. Byte zero pertains to
1114 * channel 0, byte one to channel 1, and so on. Each byte is described by
1115 * the following IPI flags.
1116 */
1117
1118#define XPC_IPI_CLOSEREQUEST 0x01
1119#define XPC_IPI_CLOSEREPLY 0x02
1120#define XPC_IPI_OPENREQUEST 0x04
1121#define XPC_IPI_OPENREPLY 0x08
1122#define XPC_IPI_MSGREQUEST 0x10
1123
1124
1125/* given an AMO variable and a channel#, get its associated IPI flags */
1126#define XPC_GET_IPI_FLAGS(_amo, _c) ((u8) (((_amo) >> ((_c) * 8)) & 0xff))
1127#define XPC_SET_IPI_FLAGS(_amo, _c, _f) (_amo) |= ((u64) (_f) << ((_c) * 8))
1128
1129#define XPC_ANY_OPENCLOSE_IPI_FLAGS_SET(_amo) ((_amo) & __IA64_UL_CONST(0x0f0f0f0f0f0f0f0f))
1130#define XPC_ANY_MSG_IPI_FLAGS_SET(_amo) ((_amo) & __IA64_UL_CONST(0x1010101010101010))
1131
1132
1133static inline void
1134xpc_IPI_send_closerequest(struct xpc_channel *ch, unsigned long *irq_flags)
1135{
1136 struct xpc_openclose_args *args = ch->local_openclose_args;
1137
1138
1139 args->reason = ch->reason;
1140
1141 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREQUEST, irq_flags);
1142}
1143
1144static inline void
1145xpc_IPI_send_closereply(struct xpc_channel *ch, unsigned long *irq_flags)
1146{
1147 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_CLOSEREPLY, irq_flags);
1148}
1149
1150static inline void
1151xpc_IPI_send_openrequest(struct xpc_channel *ch, unsigned long *irq_flags)
1152{
1153 struct xpc_openclose_args *args = ch->local_openclose_args;
1154
1155
1156 args->msg_size = ch->msg_size;
1157 args->local_nentries = ch->local_nentries;
1158
1159 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREQUEST, irq_flags);
1160}
1161
1162static inline void
1163xpc_IPI_send_openreply(struct xpc_channel *ch, unsigned long *irq_flags)
1164{
1165 struct xpc_openclose_args *args = ch->local_openclose_args;
1166
1167
1168 args->remote_nentries = ch->remote_nentries;
1169 args->local_nentries = ch->local_nentries;
1170 args->local_msgqueue_pa = __pa(ch->local_msgqueue);
1171
1172 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_OPENREPLY, irq_flags);
1173}
1174
1175static inline void
1176xpc_IPI_send_msgrequest(struct xpc_channel *ch)
1177{
1178 XPC_NOTIFY_IRQ_SEND(ch, XPC_IPI_MSGREQUEST, NULL);
1179}
1180
1181static inline void
1182xpc_IPI_send_local_msgrequest(struct xpc_channel *ch)
1183{
1184 XPC_NOTIFY_IRQ_SEND_LOCAL(ch, XPC_IPI_MSGREQUEST);
1185}
1186
1187
1188/*
1189 * Memory for XPC's AMO variables is allocated by the MSPEC driver. These
1190 * pages are located in the lowest granule. The lowest granule uses 4k pages
1191 * for cached references and an alternate TLB handler to never provide a
1192 * cacheable mapping for the entire region. This will prevent speculative
1193 * reading of cached copies of our lines from being issued which will cause
1194 * a PI FSB Protocol error to be generated by the SHUB. For XPC, we need 64
1195 * AMO variables (based on XP_MAX_PARTITIONS) for message notification and an
1196 * additional 128 AMO variables (based on XP_NASID_MASK_WORDS) for partition
1197 * activation and 2 AMO variables for partition deactivation.
1198 */
1199static inline AMO_t *
1200xpc_IPI_init(int index)
1201{
1202 AMO_t *amo = xpc_vars->amos_page + index;
1203
1204
1205 (void) xpc_IPI_receive(amo); /* clear AMO variable */
1206 return amo;
1207}
1208
1209
1210
1211static inline enum xpc_retval
1212xpc_map_bte_errors(bte_result_t error)
1213{
1214 if (error == BTE_SUCCESS)
1215 return xpcSuccess;
1216
1217 if (is_shub2()) {
1218 if (BTE_VALID_SH2_ERROR(error))
1219 return xpcBteSh2Start + error;
1220 return xpcBteUnmappedError;
1221 }
1222 switch (error) {
1223 case BTE_SUCCESS: return xpcSuccess;
1224 case BTEFAIL_DIR: return xpcBteDirectoryError;
1225 case BTEFAIL_POISON: return xpcBtePoisonError;
1226 case BTEFAIL_WERR: return xpcBteWriteError;
1227 case BTEFAIL_ACCESS: return xpcBteAccessError;
1228 case BTEFAIL_PWERR: return xpcBtePWriteError;
1229 case BTEFAIL_PRERR: return xpcBtePReadError;
1230 case BTEFAIL_TOUT: return xpcBteTimeOutError;
1231 case BTEFAIL_XTERR: return xpcBteXtalkError;
1232 case BTEFAIL_NOTAVAIL: return xpcBteNotAvailable;
1233 default: return xpcBteUnmappedError;
1234 }
1235}
1236
1237
1238
1239/*
1240 * Check to see if there is any channel activity to/from the specified
1241 * partition.
1242 */
1243static inline void
1244xpc_check_for_channel_activity(struct xpc_partition *part)
1245{
1246 u64 IPI_amo;
1247 unsigned long irq_flags;
1248
1249
1250 IPI_amo = xpc_IPI_receive(part->local_IPI_amo_va);
1251 if (IPI_amo == 0) {
1252 return;
1253 }
1254
1255 spin_lock_irqsave(&part->IPI_lock, irq_flags);
1256 part->local_IPI_amo |= IPI_amo;
1257 spin_unlock_irqrestore(&part->IPI_lock, irq_flags);
1258
1259 dev_dbg(xpc_chan, "received IPI from partid=%d, IPI_amo=0x%lx\n",
1260 XPC_PARTID(part), IPI_amo);
1261
1262 xpc_wakeup_channel_mgr(part);
1263}
1264
1265
1266#endif /* _ASM_IA64_SN_XPC_H */
1267