diff options
author | Cliff Wickman <cpw@sgi.com> | 2008-06-12 09:23:48 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-08 06:23:24 -0400 |
commit | b194b120507276b4f09e2e14f941884e777fc7c8 (patch) | |
tree | 2796da3608f770bae6382941ef95e90e889359d2 /include/asm-x86/uv/uv_bau.h | |
parent | 73e991f45fe7644711c0c9dd357a1a2c6e222707 (diff) |
SGI UV: TLB shootdown using broadcast assist unit, cleanups
TLB shootdown for SGI UV.
v1: 6/2 original
v2: 6/3 corrections/improvements per Ingo's review
v3: 6/4 split atomic operations off to a separate patch (Jeremy's review)
v4: 6/12 include <mach_apic.h> rather than <asm/mach-bigsmp/mach_apic.h>
(fixes a !SMP build problem that Ingo found)
fix the index on uv_table_bases[blade]
Signed-off-by: Cliff Wickman <cpw@sgi.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include/asm-x86/uv/uv_bau.h')
-rw-r--r-- | include/asm-x86/uv/uv_bau.h | 147 |
1 files changed, 76 insertions, 71 deletions
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h index f125f86c89ac..e52fec822667 100644 --- a/include/asm-x86/uv/uv_bau.h +++ b/include/asm-x86/uv/uv_bau.h | |||
@@ -14,9 +14,9 @@ | |||
14 | #include <linux/bitmap.h> | 14 | #include <linux/bitmap.h> |
15 | #define BITSPERBYTE 8 | 15 | #define BITSPERBYTE 8 |
16 | 16 | ||
17 | /* Broadcast Assist Unit messaging structures */ | ||
18 | |||
19 | /* | 17 | /* |
18 | * Broadcast Assist Unit messaging structures | ||
19 | * | ||
20 | * Selective Broadcast activations are induced by software action | 20 | * Selective Broadcast activations are induced by software action |
21 | * specifying a particular 8-descriptor "set" via a 6-bit index written | 21 | * specifying a particular 8-descriptor "set" via a 6-bit index written |
22 | * to an MMR. | 22 | * to an MMR. |
@@ -33,54 +33,73 @@ | |||
33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). | 33 | * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). |
34 | */ | 34 | */ |
35 | 35 | ||
36 | #define UV_ITEMS_PER_DESCRIPTOR 8 | 36 | #define UV_ITEMS_PER_DESCRIPTOR 8 |
37 | #define UV_CPUS_PER_ACT_STATUS 32 | 37 | #define UV_CPUS_PER_ACT_STATUS 32 |
38 | #define UV_ACT_STATUS_MASK 0x3 | 38 | #define UV_ACT_STATUS_MASK 0x3 |
39 | #define UV_ACT_STATUS_SIZE 2 | 39 | #define UV_ACT_STATUS_SIZE 2 |
40 | #define UV_ACTIVATION_DESCRIPTOR_SIZE 32 | 40 | #define UV_ACTIVATION_DESCRIPTOR_SIZE 32 |
41 | #define UV_DISTRIBUTION_SIZE 256 | 41 | #define UV_DISTRIBUTION_SIZE 256 |
42 | #define UV_SW_ACK_NPENDING 8 | 42 | #define UV_SW_ACK_NPENDING 8 |
43 | #define UV_BAU_MESSAGE 200 /* Messaging irq; see irq_64.h */ | 43 | #define UV_BAU_MESSAGE 200 |
44 | /* and include/asm-x86/hw_irq_64.h */ | 44 | /* |
45 | /* To be dynamically allocated in the future */ | 45 | * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h |
46 | #define UV_NET_ENDPOINT_INTD 0x38 | 46 | * To be dynamically allocated in the future |
47 | #define UV_DESC_BASE_PNODE_SHIFT 49 /* position of pnode (nasid>>1) in MMR */ | 47 | */ |
48 | #define UV_PAYLOADQ_PNODE_SHIFT 49 | 48 | #define UV_NET_ENDPOINT_INTD 0x38 |
49 | 49 | #define UV_DESC_BASE_PNODE_SHIFT 49 | |
50 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" | 50 | #define UV_PAYLOADQ_PNODE_SHIFT 49 |
51 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) | 51 | #define UV_PTC_BASENAME "sgi_uv/ptc_statistics" |
52 | 52 | #define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) | |
53 | /* bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 */ | 53 | |
54 | /* | ||
55 | * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 | ||
56 | */ | ||
54 | #define DESC_STATUS_IDLE 0 | 57 | #define DESC_STATUS_IDLE 0 |
55 | #define DESC_STATUS_ACTIVE 1 | 58 | #define DESC_STATUS_ACTIVE 1 |
56 | #define DESC_STATUS_DESTINATION_TIMEOUT 2 | 59 | #define DESC_STATUS_DESTINATION_TIMEOUT 2 |
57 | #define DESC_STATUS_SOURCE_TIMEOUT 3 | 60 | #define DESC_STATUS_SOURCE_TIMEOUT 3 |
58 | 61 | ||
59 | /* source side threshholds at which message retries print a warning */ | 62 | /* |
63 | * source side threshholds at which message retries print a warning | ||
64 | */ | ||
60 | #define SOURCE_TIMEOUT_LIMIT 20 | 65 | #define SOURCE_TIMEOUT_LIMIT 20 |
61 | #define DESTINATION_TIMEOUT_LIMIT 20 | 66 | #define DESTINATION_TIMEOUT_LIMIT 20 |
62 | 67 | ||
63 | /* number of entries in the destination side payload queue */ | 68 | /* |
69 | * number of entries in the destination side payload queue | ||
70 | */ | ||
64 | #define DESTINATION_PAYLOAD_QUEUE_SIZE 17 | 71 | #define DESTINATION_PAYLOAD_QUEUE_SIZE 17 |
65 | /* number of destination side software ack resources */ | 72 | /* |
73 | * number of destination side software ack resources | ||
74 | */ | ||
66 | #define DESTINATION_NUM_RESOURCES 8 | 75 | #define DESTINATION_NUM_RESOURCES 8 |
67 | #define MAX_CPUS_PER_NODE 32 | 76 | #define MAX_CPUS_PER_NODE 32 |
77 | /* | ||
78 | * completion statuses for sending a TLB flush message | ||
79 | */ | ||
80 | #define FLUSH_RETRY 1 | ||
81 | #define FLUSH_GIVEUP 2 | ||
82 | #define FLUSH_COMPLETE 3 | ||
68 | 83 | ||
69 | /* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) */ | 84 | /* |
70 | /* If the 'multilevel' flag in the header portion of the descriptor | 85 | * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) |
86 | * If the 'multilevel' flag in the header portion of the descriptor | ||
71 | * has been set to 0, then endpoint multi-unicast mode is selected. | 87 | * has been set to 0, then endpoint multi-unicast mode is selected. |
72 | * The distribution specification (32 bytes) is interpreted as a 256-bit | 88 | * The distribution specification (32 bytes) is interpreted as a 256-bit |
73 | * distribution vector. Adjacent bits correspond to consecutive even numbered | 89 | * distribution vector. Adjacent bits correspond to consecutive even numbered |
74 | * nodeIDs. The result of adding the index of a given bit to the 15-bit | 90 | * nodeIDs. The result of adding the index of a given bit to the 15-bit |
75 | * 'base_dest_nodeid' field of the header corresponds to the | 91 | * 'base_dest_nodeid' field of the header corresponds to the |
76 | * destination nodeID associated with that specified bit. */ | 92 | * destination nodeID associated with that specified bit. |
93 | */ | ||
77 | struct bau_target_nodemask { | 94 | struct bau_target_nodemask { |
78 | unsigned long bits[BITS_TO_LONGS(256)]; | 95 | unsigned long bits[BITS_TO_LONGS(256)]; |
79 | }; | 96 | }; |
80 | 97 | ||
81 | /* mask of cpu's on a node */ | 98 | /* |
82 | /* (during initialization we need to check that unsigned long has | 99 | * mask of cpu's on a node |
83 | enough bits for max. cpu's per node) */ | 100 | * (during initialization we need to check that unsigned long has |
101 | * enough bits for max. cpu's per node) | ||
102 | */ | ||
84 | struct bau_local_cpumask { | 103 | struct bau_local_cpumask { |
85 | unsigned long bits; | 104 | unsigned long bits; |
86 | }; | 105 | }; |
@@ -99,7 +118,9 @@ struct bau_local_cpumask { | |||
99 | * the s/w ack bit vector ] | 118 | * the s/w ack bit vector ] |
100 | */ | 119 | */ |
101 | 120 | ||
102 | /* The payload is software-defined for INTD transactions */ | 121 | /* |
122 | * The payload is software-defined for INTD transactions | ||
123 | */ | ||
103 | struct bau_msg_payload { | 124 | struct bau_msg_payload { |
104 | unsigned long address; /* signifies a page or all TLB's | 125 | unsigned long address; /* signifies a page or all TLB's |
105 | of the cpu */ | 126 | of the cpu */ |
@@ -112,8 +133,10 @@ struct bau_msg_payload { | |||
112 | }; | 133 | }; |
113 | 134 | ||
114 | 135 | ||
115 | /* Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) */ | 136 | /* |
116 | /* see table 4.2.3.0.1 in broacast_assist spec. */ | 137 | * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) |
138 | * see table 4.2.3.0.1 in broacast_assist spec. | ||
139 | */ | ||
117 | struct bau_msg_header { | 140 | struct bau_msg_header { |
118 | int dest_subnodeid:6; /* must be zero */ | 141 | int dest_subnodeid:6; /* must be zero */ |
119 | /* bits 5:0 */ | 142 | /* bits 5:0 */ |
@@ -173,11 +196,15 @@ struct bau_msg_header { | |||
173 | /* bits 127:107 */ | 196 | /* bits 127:107 */ |
174 | }; | 197 | }; |
175 | 198 | ||
176 | /* The format of the message to send, plus all accompanying control */ | 199 | /* |
177 | /* Should be 64 bytes */ | 200 | * The format of the message to send, plus all accompanying control |
201 | * Should be 64 bytes | ||
202 | */ | ||
178 | struct bau_activation_descriptor { | 203 | struct bau_activation_descriptor { |
179 | struct bau_target_nodemask distribution; | 204 | struct bau_target_nodemask distribution; |
180 | /* message template, consisting of header and payload: */ | 205 | /* |
206 | * message template, consisting of header and payload: | ||
207 | */ | ||
181 | struct bau_msg_header header; | 208 | struct bau_msg_header header; |
182 | struct bau_msg_payload payload; | 209 | struct bau_msg_payload payload; |
183 | }; | 210 | }; |
@@ -235,18 +262,24 @@ struct bau_payload_queue_entry { | |||
235 | /* bytes 24-31 */ | 262 | /* bytes 24-31 */ |
236 | }; | 263 | }; |
237 | 264 | ||
238 | /* one for every slot in the destination payload queue */ | 265 | /* |
266 | * one for every slot in the destination payload queue | ||
267 | */ | ||
239 | struct bau_msg_status { | 268 | struct bau_msg_status { |
240 | struct bau_local_cpumask seen_by; /* map of cpu's */ | 269 | struct bau_local_cpumask seen_by; /* map of cpu's */ |
241 | }; | 270 | }; |
242 | 271 | ||
243 | /* one for every slot in the destination software ack resources */ | 272 | /* |
273 | * one for every slot in the destination software ack resources | ||
274 | */ | ||
244 | struct bau_sw_ack_status { | 275 | struct bau_sw_ack_status { |
245 | struct bau_payload_queue_entry *msg; /* associated message */ | 276 | struct bau_payload_queue_entry *msg; /* associated message */ |
246 | int watcher; /* cpu monitoring, or -1 */ | 277 | int watcher; /* cpu monitoring, or -1 */ |
247 | }; | 278 | }; |
248 | 279 | ||
249 | /* one on every node and per-cpu; to locate the software tables */ | 280 | /* |
281 | * one on every node and per-cpu; to locate the software tables | ||
282 | */ | ||
250 | struct bau_control { | 283 | struct bau_control { |
251 | struct bau_activation_descriptor *descriptor_base; | 284 | struct bau_activation_descriptor *descriptor_base; |
252 | struct bau_payload_queue_entry *bau_msg_head; | 285 | struct bau_payload_queue_entry *bau_msg_head; |
@@ -267,8 +300,8 @@ struct ptc_stats { | |||
267 | unsigned long onetlb; /* times just one tlb on this cpu was flushed */ | 300 | unsigned long onetlb; /* times just one tlb on this cpu was flushed */ |
268 | unsigned long s_retry; /* retries on source side timeouts */ | 301 | unsigned long s_retry; /* retries on source side timeouts */ |
269 | unsigned long d_retry; /* retries on destination side timeouts */ | 302 | unsigned long d_retry; /* retries on destination side timeouts */ |
270 | unsigned long sflush_ns;/* nanoseconds spent in uv_flush_tlb_others */ | 303 | unsigned long sflush; /* cycles spent in uv_flush_tlb_others */ |
271 | unsigned long dflush_ns;/* nanoseconds spent destination side */ | 304 | unsigned long dflush; /* cycles spent on destination side */ |
272 | unsigned long retriesok; /* successes on retries */ | 305 | unsigned long retriesok; /* successes on retries */ |
273 | unsigned long nomsg; /* interrupts with no message */ | 306 | unsigned long nomsg; /* interrupts with no message */ |
274 | unsigned long multmsg; /* interrupts with multiple messages */ | 307 | unsigned long multmsg; /* interrupts with multiple messages */ |
@@ -293,39 +326,11 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits) | |||
293 | bitmap_zero(&dstp->bits, nbits); | 326 | bitmap_zero(&dstp->bits, nbits); |
294 | } | 327 | } |
295 | 328 | ||
296 | /* | ||
297 | * atomic increment of a short integer | ||
298 | * (rather than using the __sync_add_and_fetch() intrinsic) | ||
299 | * | ||
300 | * returns the new value of the variable | ||
301 | */ | ||
302 | static inline short int atomic_inc_short(short int *v) | ||
303 | { | ||
304 | asm volatile("movw $1, %%cx\n" | ||
305 | "lock ; xaddw %%cx, %0\n" | ||
306 | : "+m" (*v) /* outputs */ | ||
307 | : : "%cx", "memory"); /* inputs : clobbereds */ | ||
308 | return *v; | ||
309 | } | ||
310 | |||
311 | /* | ||
312 | * atomic OR of two long integers | ||
313 | * (rather than using the __sync_or_and_fetch() intrinsic) | ||
314 | */ | ||
315 | static inline void atomic_or_long(unsigned long *v1, unsigned long v2) | ||
316 | { | ||
317 | asm volatile("movq %0, %%rax; lea %1, %%rdx\n" | ||
318 | "lock ; orq %%rax, %%rdx\n" | ||
319 | : "+m" (*v1) /* outputs */ | ||
320 | : "m" (v1), "m" (v2) /* inputs */ | ||
321 | : "memory"); /* clobbereds */ | ||
322 | } | ||
323 | |||
324 | #define cpubit_isset(cpu, bau_local_cpumask) \ | 329 | #define cpubit_isset(cpu, bau_local_cpumask) \ |
325 | test_bit((cpu), (bau_local_cpumask).bits) | 330 | test_bit((cpu), (bau_local_cpumask).bits) |
326 | 331 | ||
327 | int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); | 332 | extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); |
328 | void uv_bau_message_intr1(void); | 333 | extern void uv_bau_message_intr1(void); |
329 | void uv_bau_timeout_intr1(void); | 334 | extern void uv_bau_timeout_intr1(void); |
330 | 335 | ||
331 | #endif /* __ASM_X86_UV_BAU__ */ | 336 | #endif /* __ASM_X86_UV_BAU__ */ |