aboutsummaryrefslogtreecommitdiffstats
path: root/include/asm-x86/uv/uv_bau.h
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2008-06-12 09:23:48 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-08 06:23:24 -0400
commitb194b120507276b4f09e2e14f941884e777fc7c8 (patch)
tree2796da3608f770bae6382941ef95e90e889359d2 /include/asm-x86/uv/uv_bau.h
parent73e991f45fe7644711c0c9dd357a1a2c6e222707 (diff)
SGI UV: TLB shootdown using broadcast assist unit, cleanups
TLB shootdown for SGI UV. v1: 6/2 original v2: 6/3 corrections/improvements per Ingo's review v3: 6/4 split atomic operations off to a separate patch (Jeremy's review) v4: 6/12 include <mach_apic.h> rather than <asm/mach-bigsmp/mach_apic.h> (fixes a !SMP build problem that Ingo found) fix the index on uv_table_bases[blade] Signed-off-by: Cliff Wickman <cpw@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'include/asm-x86/uv/uv_bau.h')
-rw-r--r--include/asm-x86/uv/uv_bau.h147
1 files changed, 76 insertions, 71 deletions
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
index f125f86c89ac..e52fec822667 100644
--- a/include/asm-x86/uv/uv_bau.h
+++ b/include/asm-x86/uv/uv_bau.h
@@ -14,9 +14,9 @@
14#include <linux/bitmap.h> 14#include <linux/bitmap.h>
15#define BITSPERBYTE 8 15#define BITSPERBYTE 8
16 16
17/* Broadcast Assist Unit messaging structures */
18
19/* 17/*
18 * Broadcast Assist Unit messaging structures
19 *
20 * Selective Broadcast activations are induced by software action 20 * Selective Broadcast activations are induced by software action
21 * specifying a particular 8-descriptor "set" via a 6-bit index written 21 * specifying a particular 8-descriptor "set" via a 6-bit index written
22 * to an MMR. 22 * to an MMR.
@@ -33,54 +33,73 @@
33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set). 33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
34 */ 34 */
35 35
36#define UV_ITEMS_PER_DESCRIPTOR 8 36#define UV_ITEMS_PER_DESCRIPTOR 8
37#define UV_CPUS_PER_ACT_STATUS 32 37#define UV_CPUS_PER_ACT_STATUS 32
38#define UV_ACT_STATUS_MASK 0x3 38#define UV_ACT_STATUS_MASK 0x3
39#define UV_ACT_STATUS_SIZE 2 39#define UV_ACT_STATUS_SIZE 2
40#define UV_ACTIVATION_DESCRIPTOR_SIZE 32 40#define UV_ACTIVATION_DESCRIPTOR_SIZE 32
41#define UV_DISTRIBUTION_SIZE 256 41#define UV_DISTRIBUTION_SIZE 256
42#define UV_SW_ACK_NPENDING 8 42#define UV_SW_ACK_NPENDING 8
43#define UV_BAU_MESSAGE 200 /* Messaging irq; see irq_64.h */ 43#define UV_BAU_MESSAGE 200
44 /* and include/asm-x86/hw_irq_64.h */ 44/*
45 /* To be dynamically allocated in the future */ 45 * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h
46#define UV_NET_ENDPOINT_INTD 0x38 46 * To be dynamically allocated in the future
47#define UV_DESC_BASE_PNODE_SHIFT 49 /* position of pnode (nasid>>1) in MMR */ 47 */
48#define UV_PAYLOADQ_PNODE_SHIFT 49 48#define UV_NET_ENDPOINT_INTD 0x38
49 49#define UV_DESC_BASE_PNODE_SHIFT 49
50#define UV_PTC_BASENAME "sgi_uv/ptc_statistics" 50#define UV_PAYLOADQ_PNODE_SHIFT 49
51#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask)) 51#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
52 52#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
53/* bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1 */ 53
54/*
55 * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
56 */
54#define DESC_STATUS_IDLE 0 57#define DESC_STATUS_IDLE 0
55#define DESC_STATUS_ACTIVE 1 58#define DESC_STATUS_ACTIVE 1
56#define DESC_STATUS_DESTINATION_TIMEOUT 2 59#define DESC_STATUS_DESTINATION_TIMEOUT 2
57#define DESC_STATUS_SOURCE_TIMEOUT 3 60#define DESC_STATUS_SOURCE_TIMEOUT 3
58 61
59/* source side threshholds at which message retries print a warning */ 62/*
63 * source side threshholds at which message retries print a warning
64 */
60#define SOURCE_TIMEOUT_LIMIT 20 65#define SOURCE_TIMEOUT_LIMIT 20
61#define DESTINATION_TIMEOUT_LIMIT 20 66#define DESTINATION_TIMEOUT_LIMIT 20
62 67
63/* number of entries in the destination side payload queue */ 68/*
69 * number of entries in the destination side payload queue
70 */
64#define DESTINATION_PAYLOAD_QUEUE_SIZE 17 71#define DESTINATION_PAYLOAD_QUEUE_SIZE 17
65/* number of destination side software ack resources */ 72/*
73 * number of destination side software ack resources
74 */
66#define DESTINATION_NUM_RESOURCES 8 75#define DESTINATION_NUM_RESOURCES 8
67#define MAX_CPUS_PER_NODE 32 76#define MAX_CPUS_PER_NODE 32
77/*
78 * completion statuses for sending a TLB flush message
79 */
80#define FLUSH_RETRY 1
81#define FLUSH_GIVEUP 2
82#define FLUSH_COMPLETE 3
68 83
69/* Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor) */ 84/*
70/* If the 'multilevel' flag in the header portion of the descriptor 85 * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
86 * If the 'multilevel' flag in the header portion of the descriptor
71 * has been set to 0, then endpoint multi-unicast mode is selected. 87 * has been set to 0, then endpoint multi-unicast mode is selected.
72 * The distribution specification (32 bytes) is interpreted as a 256-bit 88 * The distribution specification (32 bytes) is interpreted as a 256-bit
73 * distribution vector. Adjacent bits correspond to consecutive even numbered 89 * distribution vector. Adjacent bits correspond to consecutive even numbered
74 * nodeIDs. The result of adding the index of a given bit to the 15-bit 90 * nodeIDs. The result of adding the index of a given bit to the 15-bit
75 * 'base_dest_nodeid' field of the header corresponds to the 91 * 'base_dest_nodeid' field of the header corresponds to the
76 * destination nodeID associated with that specified bit. */ 92 * destination nodeID associated with that specified bit.
93 */
77struct bau_target_nodemask { 94struct bau_target_nodemask {
78 unsigned long bits[BITS_TO_LONGS(256)]; 95 unsigned long bits[BITS_TO_LONGS(256)];
79}; 96};
80 97
81/* mask of cpu's on a node */ 98/*
82/* (during initialization we need to check that unsigned long has 99 * mask of cpu's on a node
83 enough bits for max. cpu's per node) */ 100 * (during initialization we need to check that unsigned long has
101 * enough bits for max. cpu's per node)
102 */
84struct bau_local_cpumask { 103struct bau_local_cpumask {
85 unsigned long bits; 104 unsigned long bits;
86}; 105};
@@ -99,7 +118,9 @@ struct bau_local_cpumask {
99 * the s/w ack bit vector ] 118 * the s/w ack bit vector ]
100 */ 119 */
101 120
102/* The payload is software-defined for INTD transactions */ 121/*
122 * The payload is software-defined for INTD transactions
123 */
103struct bau_msg_payload { 124struct bau_msg_payload {
104 unsigned long address; /* signifies a page or all TLB's 125 unsigned long address; /* signifies a page or all TLB's
105 of the cpu */ 126 of the cpu */
@@ -112,8 +133,10 @@ struct bau_msg_payload {
112}; 133};
113 134
114 135
115/* Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor) */ 136/*
116/* see table 4.2.3.0.1 in broacast_assist spec. */ 137 * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
138 * see table 4.2.3.0.1 in broacast_assist spec.
139 */
117struct bau_msg_header { 140struct bau_msg_header {
118 int dest_subnodeid:6; /* must be zero */ 141 int dest_subnodeid:6; /* must be zero */
119 /* bits 5:0 */ 142 /* bits 5:0 */
@@ -173,11 +196,15 @@ struct bau_msg_header {
173 /* bits 127:107 */ 196 /* bits 127:107 */
174}; 197};
175 198
176/* The format of the message to send, plus all accompanying control */ 199/*
177/* Should be 64 bytes */ 200 * The format of the message to send, plus all accompanying control
201 * Should be 64 bytes
202 */
178struct bau_activation_descriptor { 203struct bau_activation_descriptor {
179 struct bau_target_nodemask distribution; 204 struct bau_target_nodemask distribution;
180 /* message template, consisting of header and payload: */ 205 /*
206 * message template, consisting of header and payload:
207 */
181 struct bau_msg_header header; 208 struct bau_msg_header header;
182 struct bau_msg_payload payload; 209 struct bau_msg_payload payload;
183}; 210};
@@ -235,18 +262,24 @@ struct bau_payload_queue_entry {
235 /* bytes 24-31 */ 262 /* bytes 24-31 */
236}; 263};
237 264
238/* one for every slot in the destination payload queue */ 265/*
266 * one for every slot in the destination payload queue
267 */
239struct bau_msg_status { 268struct bau_msg_status {
240 struct bau_local_cpumask seen_by; /* map of cpu's */ 269 struct bau_local_cpumask seen_by; /* map of cpu's */
241}; 270};
242 271
243/* one for every slot in the destination software ack resources */ 272/*
273 * one for every slot in the destination software ack resources
274 */
244struct bau_sw_ack_status { 275struct bau_sw_ack_status {
245 struct bau_payload_queue_entry *msg; /* associated message */ 276 struct bau_payload_queue_entry *msg; /* associated message */
246 int watcher; /* cpu monitoring, or -1 */ 277 int watcher; /* cpu monitoring, or -1 */
247}; 278};
248 279
249/* one on every node and per-cpu; to locate the software tables */ 280/*
281 * one on every node and per-cpu; to locate the software tables
282 */
250struct bau_control { 283struct bau_control {
251 struct bau_activation_descriptor *descriptor_base; 284 struct bau_activation_descriptor *descriptor_base;
252 struct bau_payload_queue_entry *bau_msg_head; 285 struct bau_payload_queue_entry *bau_msg_head;
@@ -267,8 +300,8 @@ struct ptc_stats {
267 unsigned long onetlb; /* times just one tlb on this cpu was flushed */ 300 unsigned long onetlb; /* times just one tlb on this cpu was flushed */
268 unsigned long s_retry; /* retries on source side timeouts */ 301 unsigned long s_retry; /* retries on source side timeouts */
269 unsigned long d_retry; /* retries on destination side timeouts */ 302 unsigned long d_retry; /* retries on destination side timeouts */
270 unsigned long sflush_ns;/* nanoseconds spent in uv_flush_tlb_others */ 303 unsigned long sflush; /* cycles spent in uv_flush_tlb_others */
271 unsigned long dflush_ns;/* nanoseconds spent destination side */ 304 unsigned long dflush; /* cycles spent on destination side */
272 unsigned long retriesok; /* successes on retries */ 305 unsigned long retriesok; /* successes on retries */
273 unsigned long nomsg; /* interrupts with no message */ 306 unsigned long nomsg; /* interrupts with no message */
274 unsigned long multmsg; /* interrupts with multiple messages */ 307 unsigned long multmsg; /* interrupts with multiple messages */
@@ -293,39 +326,11 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
293 bitmap_zero(&dstp->bits, nbits); 326 bitmap_zero(&dstp->bits, nbits);
294} 327}
295 328
296/*
297 * atomic increment of a short integer
298 * (rather than using the __sync_add_and_fetch() intrinsic)
299 *
300 * returns the new value of the variable
301 */
302static inline short int atomic_inc_short(short int *v)
303{
304 asm volatile("movw $1, %%cx\n"
305 "lock ; xaddw %%cx, %0\n"
306 : "+m" (*v) /* outputs */
307 : : "%cx", "memory"); /* inputs : clobbereds */
308 return *v;
309}
310
311/*
312 * atomic OR of two long integers
313 * (rather than using the __sync_or_and_fetch() intrinsic)
314 */
315static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
316{
317 asm volatile("movq %0, %%rax; lea %1, %%rdx\n"
318 "lock ; orq %%rax, %%rdx\n"
319 : "+m" (*v1) /* outputs */
320 : "m" (v1), "m" (v2) /* inputs */
321 : "memory"); /* clobbereds */
322}
323
324#define cpubit_isset(cpu, bau_local_cpumask) \ 329#define cpubit_isset(cpu, bau_local_cpumask) \
325 test_bit((cpu), (bau_local_cpumask).bits) 330 test_bit((cpu), (bau_local_cpumask).bits)
326 331
327int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); 332extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
328void uv_bau_message_intr1(void); 333extern void uv_bau_message_intr1(void);
329void uv_bau_timeout_intr1(void); 334extern void uv_bau_timeout_intr1(void);
330 335
331#endif /* __ASM_X86_UV_BAU__ */ 336#endif /* __ASM_X86_UV_BAU__ */