diff options
Diffstat (limited to 'include/asm-x86/uv')
-rw-r--r-- | include/asm-x86/uv/uv_bau.h | 331 |
1 files changed, 331 insertions, 0 deletions
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h new file mode 100644 index 000000000000..f125f86c89ac --- /dev/null +++ b/include/asm-x86/uv/uv_bau.h | |||
@@ -0,0 +1,331 @@ | |||
1 | /* | ||
2 | * This file is subject to the terms and conditions of the GNU General Public | ||
3 | * License. See the file "COPYING" in the main directory of this archive | ||
4 | * for more details. | ||
5 | * | ||
6 | * SGI UV Broadcast Assist Unit definitions | ||
7 | * | ||
8 | * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. | ||
9 | */ | ||
10 | |||
11 | #ifndef __ASM_X86_UV_BAU__ | ||
12 | #define __ASM_X86_UV_BAU__ | ||
13 | |||
14 | #include <linux/bitmap.h> | ||
15 | #define BITSPERBYTE 8 | ||
16 | |||
17 | /* Broadcast Assist Unit messaging structures */ | ||
18 | |||
19 | /* | ||
20 | * Selective Broadcast activations are induced by software action | ||
21 | * specifying a particular 8-descriptor "set" via a 6-bit index written | ||
22 | * to an MMR. | ||
23 | * Thus there are 64 unique 512-byte sets of SB descriptors - one set for | ||
24 | * each 6-bit index value. These descriptor sets are mapped in sequence | ||
25 | * starting with set 0 located at the address specified in the | ||
26 | * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512, | ||
27 | * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on. | ||
28 | * | ||
29 | * We will use 31 sets, one for sending BAU messages from each of the 32 | ||
30 | * cpu's on the node. | ||
31 | * | ||
32 | * TLB shootdown will use the first of the 8 descriptors of each set. | ||
33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). | ||
34 | */ | ||
35 | |||
36 | #define UV_ITEMS_PER_DESCRIPTOR 8 | ||
37 | #define UV_CPUS_PER_ACT_STATUS 32 | ||
38 | #define UV_ACT_STATUS_MASK 0x3 | ||
39 | #define UV_ACT_STATUS_SIZE 2 | ||
40 | #define UV_ACTIVATION_DESCRIPTOR_SIZE 32 | ||
41 | #define UV_DISTRIBUTION_SIZE 256 | ||
42 | #define UV_SW_ACK_NPENDING 8 | ||
43 | #define UV_BAU_MESSAGE 200 /* Messaging irq; see irq_64.h */ | ||
44 | /* and include/asm-x86/hw_irq_64.h */ | ||
45 | /* To be dynamically allocated in the future */ | ||
46 | #define UV_NET_ENDPOINT_INTD 0x38 | ||
47 | #define UV_DESC_BASE_PNODE_SHIFT 49 /* position of pnode (nasid>>1) in MMR */ | ||
48 | #define UV_PAYLOADQ_PNODE_SHIFT 49 | ||
49 | |||
50 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" | ||
51 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) | ||
52 | |||
53 | /* bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 */ | ||
54 | #define DESC_STATUS_IDLE 0 | ||
55 | #define DESC_STATUS_ACTIVE 1 | ||
56 | #define DESC_STATUS_DESTINATION_TIMEOUT 2 | ||
57 | #define DESC_STATUS_SOURCE_TIMEOUT 3 | ||
58 | |||
59 | /* source side threshholds at which message retries print a warning */ | ||
60 | #define SOURCE_TIMEOUT_LIMIT 20 | ||
61 | #define DESTINATION_TIMEOUT_LIMIT 20 | ||
62 | |||
63 | /* number of entries in the destination side payload queue */ | ||
64 | #define DESTINATION_PAYLOAD_QUEUE_SIZE 17 | ||
65 | /* number of destination side software ack resources */ | ||
66 | #define DESTINATION_NUM_RESOURCES 8 | ||
67 | #define MAX_CPUS_PER_NODE 32 | ||
68 | |||
69 | /* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) */ | ||
70 | /* If the 'multilevel' flag in the header portion of the descriptor | ||
71 | * has been set to 0, then endpoint multi-unicast mode is selected. | ||
72 | * The distribution specification (32 bytes) is interpreted as a 256-bit | ||
73 | * distribution vector. Adjacent bits correspond to consecutive even numbered | ||
74 | * nodeIDs. The result of adding the index of a given bit to the 15-bit | ||
75 | * 'base_dest_nodeid' field of the header corresponds to the | ||
76 | * destination nodeID associated with that specified bit. */ | ||
77 | struct bau_target_nodemask { | ||
78 | unsigned long bits[BITS_TO_LONGS(256)]; | ||
79 | }; | ||
80 | |||
81 | /* mask of cpu's on a node */ | ||
82 | /* (during initialization we need to check that unsigned long has | ||
83 | enough bits for max. cpu's per node) */ | ||
84 | struct bau_local_cpumask { | ||
85 | unsigned long bits; | ||
86 | }; | ||
87 | |||
88 | /* | ||
89 | * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor) | ||
90 | * only 12 bytes (96 bits) of the payload area are usable. | ||
91 | * An additional 3 bytes (bits 27:4) of the header address are carried | ||
92 | * to the next bytes of the destination payload queue. | ||
93 | * And an additional 2 bytes of the header Suppl_A field are also | ||
94 | * carried to the destination payload queue. | ||
95 | * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte) | ||
96 | * of the destination payload queue, which is written by the hardware | ||
97 | * with the s/w ack resource bit vector. | ||
98 | * [ effective message contents (16 bytes (128 bits) maximum), not counting | ||
99 | * the s/w ack bit vector ] | ||
100 | */ | ||
101 | |||
102 | /* The payload is software-defined for INTD transactions */ | ||
103 | struct bau_msg_payload { | ||
104 | unsigned long address; /* signifies a page or all TLB's | ||
105 | of the cpu */ | ||
106 | /* 64 bits */ | ||
107 | unsigned short sending_cpu; /* filled in by sender */ | ||
108 | /* 16 bits */ | ||
109 | unsigned short acknowledge_count;/* filled in by destination */ | ||
110 | /* 16 bits */ | ||
111 | unsigned int reserved1:32; /* not usable */ | ||
112 | }; | ||
113 | |||
114 | |||
115 | /* Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) */ | ||
116 | /* see table 4.2.3.0.1 in broacast_assist spec. */ | ||
117 | struct bau_msg_header { | ||
118 | int dest_subnodeid:6; /* must be zero */ | ||
119 | /* bits 5:0 */ | ||
120 | int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ | ||
121 | /* bits 20:6 */ | ||
122 | int command:8; /* message type */ | ||
123 | /* bits 28:21 */ | ||
124 | /* 0x38: SN3net EndPoint Message */ | ||
125 | int rsvd_1:3; /* must be zero */ | ||
126 | /* bits 31:29 */ | ||
127 | /* int will align on 32 bits */ | ||
128 | int rsvd_2:9; /* must be zero */ | ||
129 | /* bits 40:32 */ | ||
130 | /* Suppl_A is 56-41 */ | ||
131 | int payload_2a:8; /* becomes byte 16 of msg */ | ||
132 | /* bits 48:41 */ /* not currently using */ | ||
133 | int payload_2b:8; /* becomes byte 17 of msg */ | ||
134 | /* bits 56:49 */ /* not currently using */ | ||
135 | /* Address field (96:57) is never used as an | ||
136 | address (these are address bits 42:3) */ | ||
137 | int rsvd_3:1; /* must be zero */ | ||
138 | /* bit 57 */ | ||
139 | /* address bits 27:4 are payload */ | ||
140 | /* these 24 bits become bytes 12-14 of msg */ | ||
141 | int replied_to:1; /* sent as 0 by the source to byte 12 */ | ||
142 | /* bit 58 */ | ||
143 | |||
144 | int payload_1a:5; /* not currently used */ | ||
145 | /* bits 63:59 */ | ||
146 | int payload_1b:8; /* not currently used */ | ||
147 | /* bits 71:64 */ | ||
148 | int payload_1c:8; /* not currently used */ | ||
149 | /* bits 79:72 */ | ||
150 | int payload_1d:2; /* not currently used */ | ||
151 | /* bits 81:80 */ | ||
152 | |||
153 | int rsvd_4:7; /* must be zero */ | ||
154 | /* bits 88:82 */ | ||
155 | int sw_ack_flag:1; /* software acknowledge flag */ | ||
156 | /* bit 89 */ | ||
157 | /* INTD trasactions at destination are to | ||
158 | wait for software acknowledge */ | ||
159 | int rsvd_5:6; /* must be zero */ | ||
160 | /* bits 95:90 */ | ||
161 | int rsvd_6:5; /* must be zero */ | ||
162 | /* bits 100:96 */ | ||
163 | int int_both:1; /* if 1, interrupt both sockets on the blade */ | ||
164 | /* bit 101*/ | ||
165 | int fairness:3; /* usually zero */ | ||
166 | /* bits 104:102 */ | ||
167 | int multilevel:1; /* multi-level multicast format */ | ||
168 | /* bit 105 */ | ||
169 | /* 0 for TLB: endpoint multi-unicast messages */ | ||
170 | int chaining:1; /* next descriptor is part of this activation*/ | ||
171 | /* bit 106 */ | ||
172 | int rsvd_7:21; /* must be zero */ | ||
173 | /* bits 127:107 */ | ||
174 | }; | ||
175 | |||
176 | /* The format of the message to send, plus all accompanying control */ | ||
177 | /* Should be 64 bytes */ | ||
178 | struct bau_activation_descriptor { | ||
179 | struct bau_target_nodemask distribution; | ||
180 | /* message template, consisting of header and payload: */ | ||
181 | struct bau_msg_header header; | ||
182 | struct bau_msg_payload payload; | ||
183 | }; | ||
184 | /* | ||
185 | * -payload-- ---------header------ | ||
186 | * bytes 0-11 bits 41-56 bits 58-81 | ||
187 | * A B (2) C (3) | ||
188 | * | ||
189 | * A/B/C are moved to: | ||
190 | * A C B | ||
191 | * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector) | ||
192 | * ------------payload queue----------- | ||
193 | */ | ||
194 | |||
195 | /* | ||
196 | * The payload queue on the destination side is an array of these. | ||
197 | * With BAU_MISC_CONTROL set for software acknowledge mode, the messages | ||
198 | * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17 | ||
199 | * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120) | ||
200 | * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from | ||
201 | * sw_ack_vector and payload_2) | ||
202 | * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software | ||
203 | * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload | ||
204 | * operation." | ||
205 | */ | ||
206 | struct bau_payload_queue_entry { | ||
207 | unsigned long address; /* signifies a page or all TLB's | ||
208 | of the cpu */ | ||
209 | /* 64 bits, bytes 0-7 */ | ||
210 | |||
211 | unsigned short sending_cpu; /* cpu that sent the message */ | ||
212 | /* 16 bits, bytes 8-9 */ | ||
213 | |||
214 | unsigned short acknowledge_count; /* filled in by destination */ | ||
215 | /* 16 bits, bytes 10-11 */ | ||
216 | |||
217 | unsigned short replied_to:1; /* sent as 0 by the source */ | ||
218 | /* 1 bit */ | ||
219 | unsigned short unused1:7; /* not currently using */ | ||
220 | /* 7 bits: byte 12) */ | ||
221 | |||
222 | unsigned char unused2[2]; /* not currently using */ | ||
223 | /* bytes 13-14 */ | ||
224 | |||
225 | unsigned char sw_ack_vector; /* filled in by the hardware */ | ||
226 | /* byte 15 (bits 127:120) */ | ||
227 | |||
228 | unsigned char unused4[3]; /* not currently using bytes 17-19 */ | ||
229 | /* bytes 17-19 */ | ||
230 | |||
231 | int number_of_cpus; /* filled in at destination */ | ||
232 | /* 32 bits, bytes 20-23 (aligned) */ | ||
233 | |||
234 | unsigned char unused5[8]; /* not using */ | ||
235 | /* bytes 24-31 */ | ||
236 | }; | ||
237 | |||
238 | /* one for every slot in the destination payload queue */ | ||
239 | struct bau_msg_status { | ||
240 | struct bau_local_cpumask seen_by; /* map of cpu's */ | ||
241 | }; | ||
242 | |||
243 | /* one for every slot in the destination software ack resources */ | ||
244 | struct bau_sw_ack_status { | ||
245 | struct bau_payload_queue_entry *msg; /* associated message */ | ||
246 | int watcher; /* cpu monitoring, or -1 */ | ||
247 | }; | ||
248 | |||
249 | /* one on every node and per-cpu; to locate the software tables */ | ||
250 | struct bau_control { | ||
251 | struct bau_activation_descriptor *descriptor_base; | ||
252 | struct bau_payload_queue_entry *bau_msg_head; | ||
253 | struct bau_payload_queue_entry *va_queue_first; | ||
254 | struct bau_payload_queue_entry *va_queue_last; | ||
255 | struct bau_msg_status *msg_statuses; | ||
256 | int *watching; /* pointer to array */ | ||
257 | }; | ||
258 | |||
259 | /* | ||
260 | * This structure is allocated per_cpu for UV TLB shootdown statistics. | ||
261 | */ | ||
262 | struct ptc_stats { | ||
263 | unsigned long ptc_i; /* number of IPI-style flushes */ | ||
264 | unsigned long requestor; /* number of nodes this cpu sent to */ | ||
265 | unsigned long requestee; /* times cpu was remotely requested */ | ||
266 | unsigned long alltlb; /* times all tlb's on this cpu were flushed */ | ||
267 | unsigned long onetlb; /* times just one tlb on this cpu was flushed */ | ||
268 | unsigned long s_retry; /* retries on source side timeouts */ | ||
269 | unsigned long d_retry; /* retries on destination side timeouts */ | ||
270 | unsigned long sflush_ns;/* nanoseconds spent in uv_flush_tlb_others */ | ||
271 | unsigned long dflush_ns;/* nanoseconds spent destination side */ | ||
272 | unsigned long retriesok; /* successes on retries */ | ||
273 | unsigned long nomsg; /* interrupts with no message */ | ||
274 | unsigned long multmsg; /* interrupts with multiple messages */ | ||
275 | unsigned long ntargeted;/* nodes targeted */ | ||
276 | }; | ||
277 | |||
278 | static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp) | ||
279 | { | ||
280 | return constant_test_bit(node, &dstp->bits[0]); | ||
281 | } | ||
282 | static inline void bau_node_set(int node, struct bau_target_nodemask *dstp) | ||
283 | { | ||
284 | __set_bit(node, &dstp->bits[0]); | ||
285 | } | ||
286 | static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits) | ||
287 | { | ||
288 | bitmap_zero(&dstp->bits[0], nbits); | ||
289 | } | ||
290 | |||
291 | static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) | ||
292 | { | ||
293 | bitmap_zero(&dstp->bits, nbits); | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * atomic increment of a short integer | ||
298 | * (rather than using the __sync_add_and_fetch() intrinsic) | ||
299 | * | ||
300 | * returns the new value of the variable | ||
301 | */ | ||
302 | static inline short int atomic_inc_short(short int *v) | ||
303 | { | ||
304 | asm volatile("movw $1, %%cx\n" | ||
305 | "lock ; xaddw %%cx, %0\n" | ||
306 | : "+m" (*v) /* outputs */ | ||
307 | : : "%cx", "memory"); /* inputs : clobbereds */ | ||
308 | return *v; | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * atomic OR of two long integers | ||
313 | * (rather than using the __sync_or_and_fetch() intrinsic) | ||
314 | */ | ||
315 | static inline void atomic_or_long(unsigned long *v1, unsigned long v2) | ||
316 | { | ||
317 | asm volatile("movq %0, %%rax; lea %1, %%rdx\n" | ||
318 | "lock ; orq %%rax, %%rdx\n" | ||
319 | : "+m" (*v1) /* outputs */ | ||
320 | : "m" (v1), "m" (v2) /* inputs */ | ||
321 | : "memory"); /* clobbereds */ | ||
322 | } | ||
323 | |||
324 | #define cpubit_isset(cpu, bau_local_cpumask) \ | ||
325 | test_bit((cpu), (bau_local_cpumask).bits) | ||
326 | |||
327 | int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); | ||
328 | void uv_bau_message_intr1(void); | ||
329 | void uv_bau_timeout_intr1(void); | ||
330 | |||
331 | #endif /* __ASM_X86_UV_BAU__ */ | ||