aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-08 06:24:13 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-08 06:24:13 -0400
commite3ae0acf59244ecf5b023ec99cef4b6b29d649bc (patch)
tree02a8bfdcab61677430ecfe04e9e44e1007c35f0e
parent4b62ac9a2b859f932afd5625362c927111b7dd9b (diff)
parente7eb8726d0e144f0925972c4ecee945e91a42753 (diff)
Merge branch 'x86/uv' into x86/devel
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/apic_32.c2
-rw-r--r--arch/x86/kernel/apic_64.c2
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/smpboot.c5
-rw-r--r--arch/x86/kernel/tlb_64.c5
-rw-r--r--arch/x86/kernel/tlb_uv.c792
-rw-r--r--include/asm-x86/atomic_64.h26
-rw-r--r--include/asm-x86/mpspec.h7
-rw-r--r--include/asm-x86/mpspec_def.h9
-rw-r--r--include/asm-x86/uv/uv_bau.h337
11 files changed, 1181 insertions, 10 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8ef4dbf32cf9..9a71be827927 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -93,7 +93,7 @@ obj-$(CONFIG_OLPC) += olpc.o
93### 93###
94# 64 bit specific files 94# 64 bit specific files
95ifeq ($(CONFIG_X86_64),y) 95ifeq ($(CONFIG_X86_64),y)
96 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o 96 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
97 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 97 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
98 obj-$(CONFIG_AUDIT) += audit_64.o 98 obj-$(CONFIG_AUDIT) += audit_64.o
99 99
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 84ce106b33c8..dd8de26b2786 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -1267,7 +1267,7 @@ int __init APIC_init_uniprocessor(void)
1267#ifdef CONFIG_CRASH_DUMP 1267#ifdef CONFIG_CRASH_DUMP
1268 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1268 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id());
1269#endif 1269#endif
1270 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 1270 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1271 1271
1272 setup_local_APIC(); 1272 setup_local_APIC();
1273 1273
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index e494809fc508..d9d663ffa641 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -918,7 +918,7 @@ int __init APIC_init_uniprocessor(void)
918 918
919 verify_local_APIC(); 919 verify_local_APIC();
920 920
921 phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); 921 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
922 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid)); 922 apic_write(APIC_ID, SET_APIC_ID(boot_cpu_physical_apicid));
923 923
924 setup_local_APIC(); 924 setup_local_APIC();
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e4c5f951e68d..ff15ab552280 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -719,6 +719,10 @@ ENTRY(apic_timer_interrupt)
719 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 719 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
720END(apic_timer_interrupt) 720END(apic_timer_interrupt)
721 721
722ENTRY(uv_bau_message_intr1)
723 apicinterrupt 220,uv_bau_message_interrupt
724END(uv_bau_message_intr1)
725
722ENTRY(error_interrupt) 726ENTRY(error_interrupt)
723 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 727 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
724END(error_interrupt) 728END(error_interrupt)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ae0a7a200421..d3ad4e09455b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1090,10 +1090,9 @@ static __init void disable_smp(void)
1090 smpboot_clear_io_apic_irqs(); 1090 smpboot_clear_io_apic_irqs();
1091#endif 1091#endif
1092 if (smp_found_config) 1092 if (smp_found_config)
1093 phys_cpu_present_map = 1093 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1094 physid_mask_of_physid(boot_cpu_physical_apicid);
1095 else 1094 else
1096 phys_cpu_present_map = physid_mask_of_physid(0); 1095 physid_set_mask_of_physid(0, &phys_cpu_present_map);
1097 map_cpu_to_logical_apicid(); 1096 map_cpu_to_logical_apicid();
1098 cpu_set(0, per_cpu(cpu_sibling_map, 0)); 1097 cpu_set(0, per_cpu(cpu_sibling_map, 0));
1099 cpu_set(0, per_cpu(cpu_core_map, 0)); 1098 cpu_set(0, per_cpu(cpu_core_map, 0));
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index a1f07d793202..5039d0f097a2 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -15,6 +15,8 @@
15#include <asm/proto.h> 15#include <asm/proto.h>
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/idle.h> 17#include <asm/idle.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
18 20
19#include <mach_ipi.h> 21#include <mach_ipi.h>
20/* 22/*
@@ -162,6 +164,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
162 union smp_flush_state *f; 164 union smp_flush_state *f;
163 cpumask_t cpumask = *cpumaskp; 165 cpumask_t cpumask = *cpumaskp;
164 166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169
165 /* Caller has disabled preemption */ 170 /* Caller has disabled preemption */
166 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
167 f = &per_cpu(flush_state, sender); 172 f = &per_cpu(flush_state, sender);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
new file mode 100644
index 000000000000..d0fbb7712ab0
--- /dev/null
+++ b/arch/x86/kernel/tlb_uv.c
@@ -0,0 +1,792 @@
1/*
2 * SGI UltraViolet TLB flush routines.
3 *
4 * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI.
5 *
6 * This code is released under the GNU General Public License version 2 or
7 * later.
8 */
9#include <linux/mc146818rtc.h>
10#include <linux/proc_fs.h>
11#include <linux/kernel.h>
12
13#include <asm/mmu_context.h>
14#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h>
17#include <asm/genapic.h>
18#include <asm/idle.h>
19#include <asm/tsc.h>
20
21#include <mach_apic.h>
22
23static struct bau_control **uv_bau_table_bases __read_mostly;
24static int uv_bau_retry_limit __read_mostly;
25
26/* position of pnode (which is nasid>>1): */
27static int uv_nshift __read_mostly;
28
29static unsigned long uv_mmask __read_mostly;
30
31static DEFINE_PER_CPU(struct ptc_stats, ptcstats);
32static DEFINE_PER_CPU(struct bau_control, bau_control);
33
34/*
35 * Free a software acknowledge hardware resource by clearing its Pending
36 * bit. This will return a reply to the sender.
37 * If the message has timed out, a reply has already been sent by the
38 * hardware but the resource has not been released. In that case our
39 * clear of the Timeout bit (as well) will free the resource. No reply will
40 * be sent (the hardware will only do one reply per message).
41 */
42static void uv_reply_to_message(int resource,
43 struct bau_payload_queue_entry *msg,
44 struct bau_msg_status *msp)
45{
46 unsigned long dw;
47
48 dw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
49 msg->replied_to = 1;
50 msg->sw_ack_vector = 0;
51 if (msp)
52 msp->seen_by.bits = 0;
53 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, dw);
54}
55
56/*
57 * Do all the things a cpu should do for a TLB shootdown message.
58 * Other cpu's may come here at the same time for this message.
59 */
60static void uv_bau_process_message(struct bau_payload_queue_entry *msg,
61 int msg_slot, int sw_ack_slot)
62{
63 unsigned long this_cpu_mask;
64 struct bau_msg_status *msp;
65 int cpu;
66
67 msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
68 cpu = uv_blade_processor_id();
69 msg->number_of_cpus =
70 uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
71 this_cpu_mask = 1UL << cpu;
72 if (msp->seen_by.bits & this_cpu_mask)
73 return;
74 atomic_or_long(&msp->seen_by.bits, this_cpu_mask);
75
76 if (msg->replied_to == 1)
77 return;
78
79 if (msg->address == TLB_FLUSH_ALL) {
80 local_flush_tlb();
81 __get_cpu_var(ptcstats).alltlb++;
82 } else {
83 __flush_tlb_one(msg->address);
84 __get_cpu_var(ptcstats).onetlb++;
85 }
86
87 __get_cpu_var(ptcstats).requestee++;
88
89 atomic_inc_short(&msg->acknowledge_count);
90 if (msg->number_of_cpus == msg->acknowledge_count)
91 uv_reply_to_message(sw_ack_slot, msg, msp);
92}
93
94/*
95 * Examine the payload queue on one distribution node to see
96 * which messages have not been seen, and which cpu(s) have not seen them.
97 *
98 * Returns the number of cpu's that have not responded.
99 */
100static int uv_examine_destination(struct bau_control *bau_tablesp, int sender)
101{
102 struct bau_payload_queue_entry *msg;
103 struct bau_msg_status *msp;
104 int count = 0;
105 int i;
106 int j;
107
108 for (msg = bau_tablesp->va_queue_first, i = 0; i < DEST_Q_SIZE;
109 msg++, i++) {
110 if ((msg->sending_cpu == sender) && (!msg->replied_to)) {
111 msp = bau_tablesp->msg_statuses + i;
112 printk(KERN_DEBUG
113 "blade %d: address:%#lx %d of %d, not cpu(s): ",
114 i, msg->address, msg->acknowledge_count,
115 msg->number_of_cpus);
116 for (j = 0; j < msg->number_of_cpus; j++) {
117 if (!((1L << j) & msp->seen_by.bits)) {
118 count++;
119 printk("%d ", j);
120 }
121 }
122 printk("\n");
123 }
124 }
125 return count;
126}
127
128/*
129 * Examine the payload queue on all the distribution nodes to see
130 * which messages have not been seen, and which cpu(s) have not seen them.
131 *
132 * Returns the number of cpu's that have not responded.
133 */
134static int uv_examine_destinations(struct bau_target_nodemask *distribution)
135{
136 int sender;
137 int i;
138 int count = 0;
139
140 sender = smp_processor_id();
141 for (i = 0; i < sizeof(struct bau_target_nodemask) * BITSPERBYTE; i++) {
142 if (!bau_node_isset(i, distribution))
143 continue;
144 count += uv_examine_destination(uv_bau_table_bases[i], sender);
145 }
146 return count;
147}
148
149/*
150 * wait for completion of a broadcast message
151 *
152 * return COMPLETE, RETRY or GIVEUP
153 */
154static int uv_wait_completion(struct bau_desc *bau_desc,
155 unsigned long mmr_offset, int right_shift)
156{
157 int exams = 0;
158 long destination_timeouts = 0;
159 long source_timeouts = 0;
160 unsigned long descriptor_status;
161
162 while ((descriptor_status = (((unsigned long)
163 uv_read_local_mmr(mmr_offset) >>
164 right_shift) & UV_ACT_STATUS_MASK)) !=
165 DESC_STATUS_IDLE) {
166 if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
167 source_timeouts++;
168 if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
169 source_timeouts = 0;
170 __get_cpu_var(ptcstats).s_retry++;
171 return FLUSH_RETRY;
172 }
173 /*
174 * spin here looking for progress at the destinations
175 */
176 if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
177 destination_timeouts++;
178 if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
179 /*
180 * returns number of cpus not responding
181 */
182 if (uv_examine_destinations
183 (&bau_desc->distribution) == 0) {
184 __get_cpu_var(ptcstats).d_retry++;
185 return FLUSH_RETRY;
186 }
187 exams++;
188 if (exams >= uv_bau_retry_limit) {
189 printk(KERN_DEBUG
190 "uv_flush_tlb_others");
191 printk("giving up on cpu %d\n",
192 smp_processor_id());
193 return FLUSH_GIVEUP;
194 }
195 /*
196 * delays can hang the simulator
197 udelay(1000);
198 */
199 destination_timeouts = 0;
200 }
201 }
202 }
203 return FLUSH_COMPLETE;
204}
205
206/**
207 * uv_flush_send_and_wait
208 *
209 * Send a broadcast and wait for a broadcast message to complete.
210 *
211 * The cpumaskp mask contains the cpus the broadcast was sent to.
212 *
213 * Returns 1 if all remote flushing was done. The mask is zeroed.
214 * Returns 0 if some remote flushing remains to be done. The mask is left
215 * unchanged.
216 */
217int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
218 cpumask_t *cpumaskp)
219{
220 int completion_status = 0;
221 int right_shift;
222 int tries = 0;
223 int blade;
224 int bit;
225 unsigned long mmr_offset;
226 unsigned long index;
227 cycles_t time1;
228 cycles_t time2;
229
230 if (cpu < UV_CPUS_PER_ACT_STATUS) {
231 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
232 right_shift = cpu * UV_ACT_STATUS_SIZE;
233 } else {
234 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
235 right_shift =
236 ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
237 }
238 time1 = get_cycles();
239 do {
240 tries++;
241 index = (1UL << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) |
242 cpu;
243 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
244 completion_status = uv_wait_completion(bau_desc, mmr_offset,
245 right_shift);
246 } while (completion_status == FLUSH_RETRY);
247 time2 = get_cycles();
248 __get_cpu_var(ptcstats).sflush += (time2 - time1);
249 if (tries > 1)
250 __get_cpu_var(ptcstats).retriesok++;
251
252 if (completion_status == FLUSH_GIVEUP) {
253 /*
254 * Cause the caller to do an IPI-style TLB shootdown on
255 * the cpu's, all of which are still in the mask.
256 */
257 __get_cpu_var(ptcstats).ptc_i++;
258 return 0;
259 }
260
261 /*
262 * Success, so clear the remote cpu's from the mask so we don't
263 * use the IPI method of shootdown on them.
264 */
265 for_each_cpu_mask(bit, *cpumaskp) {
266 blade = uv_cpu_to_blade_id(bit);
267 if (blade == this_blade)
268 continue;
269 cpu_clear(bit, *cpumaskp);
270 }
271 if (!cpus_empty(*cpumaskp))
272 return 0;
273 return 1;
274}
275
276/**
277 * uv_flush_tlb_others - globally purge translation cache of a virtual
278 * address or all TLB's
279 * @cpumaskp: mask of all cpu's in which the address is to be removed
280 * @mm: mm_struct containing virtual address range
281 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
282 *
283 * This is the entry point for initiating any UV global TLB shootdown.
284 *
285 * Purges the translation caches of all specified processors of the given
286 * virtual address, or purges all TLB's on specified processors.
287 *
288 * The caller has derived the cpumaskp from the mm_struct and has subtracted
289 * the local cpu from the mask. This function is called only if there
290 * are bits set in the mask. (e.g. flush_tlb_page())
291 *
292 * The cpumaskp is converted into a nodemask of the nodes containing
293 * the cpus.
294 *
295 * Returns 1 if all remote flushing was done.
296 * Returns 0 if some remote flushing remains to be done.
297 */
298int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
299 unsigned long va)
300{
301 int i;
302 int bit;
303 int blade;
304 int cpu;
305 int this_blade;
306 int locals = 0;
307 struct bau_desc *bau_desc;
308
309 cpu = uv_blade_processor_id();
310 this_blade = uv_numa_blade_id();
311 bau_desc = __get_cpu_var(bau_control).descriptor_base;
312 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu;
313
314 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
315
316 i = 0;
317 for_each_cpu_mask(bit, *cpumaskp) {
318 blade = uv_cpu_to_blade_id(bit);
319 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
320 if (blade == this_blade) {
321 locals++;
322 continue;
323 }
324 bau_node_set(blade, &bau_desc->distribution);
325 i++;
326 }
327 if (i == 0) {
328 /*
329 * no off_node flushing; return status for local node
330 */
331 if (locals)
332 return 0;
333 else
334 return 1;
335 }
336 __get_cpu_var(ptcstats).requestor++;
337 __get_cpu_var(ptcstats).ntargeted += i;
338
339 bau_desc->payload.address = va;
340 bau_desc->payload.sending_cpu = smp_processor_id();
341
342 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp);
343}
344
345/*
346 * The BAU message interrupt comes here. (registered by set_intr_gate)
347 * See entry_64.S
348 *
349 * We received a broadcast assist message.
350 *
351 * Interrupts may have been disabled; this interrupt could represent
352 * the receipt of several messages.
353 *
354 * All cores/threads on this node get this interrupt.
355 * The last one to see it does the s/w ack.
356 * (the resource will not be freed until noninterruptable cpus see this
357 * interrupt; hardware will timeout the s/w ack and reply ERROR)
358 */
359void uv_bau_message_interrupt(struct pt_regs *regs)
360{
361 struct bau_payload_queue_entry *va_queue_first;
362 struct bau_payload_queue_entry *va_queue_last;
363 struct bau_payload_queue_entry *msg;
364 struct pt_regs *old_regs = set_irq_regs(regs);
365 cycles_t time1;
366 cycles_t time2;
367 int msg_slot;
368 int sw_ack_slot;
369 int fw;
370 int count = 0;
371 unsigned long local_pnode;
372
373 ack_APIC_irq();
374 exit_idle();
375 irq_enter();
376
377 time1 = get_cycles();
378
379 local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
380
381 va_queue_first = __get_cpu_var(bau_control).va_queue_first;
382 va_queue_last = __get_cpu_var(bau_control).va_queue_last;
383
384 msg = __get_cpu_var(bau_control).bau_msg_head;
385 while (msg->sw_ack_vector) {
386 count++;
387 fw = msg->sw_ack_vector;
388 msg_slot = msg - va_queue_first;
389 sw_ack_slot = ffs(fw) - 1;
390
391 uv_bau_process_message(msg, msg_slot, sw_ack_slot);
392
393 msg++;
394 if (msg > va_queue_last)
395 msg = va_queue_first;
396 __get_cpu_var(bau_control).bau_msg_head = msg;
397 }
398 if (!count)
399 __get_cpu_var(ptcstats).nomsg++;
400 else if (count > 1)
401 __get_cpu_var(ptcstats).multmsg++;
402
403 time2 = get_cycles();
404 __get_cpu_var(ptcstats).dflush += (time2 - time1);
405
406 irq_exit();
407 set_irq_regs(old_regs);
408}
409
410static void uv_enable_timeouts(void)
411{
412 int i;
413 int blade;
414 int last_blade;
415 int pnode;
416 int cur_cpu = 0;
417 unsigned long apicid;
418
419 last_blade = -1;
420 for_each_online_node(i) {
421 blade = uv_node_to_blade_id(i);
422 if (blade == last_blade)
423 continue;
424 last_blade = blade;
425 apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
426 pnode = uv_blade_to_pnode(blade);
427 cur_cpu += uv_blade_nr_possible_cpus(i);
428 }
429}
430
431static void *uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
432{
433 if (*offset < num_possible_cpus())
434 return offset;
435 return NULL;
436}
437
438static void *uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
439{
440 (*offset)++;
441 if (*offset < num_possible_cpus())
442 return offset;
443 return NULL;
444}
445
446static void uv_ptc_seq_stop(struct seq_file *file, void *data)
447{
448}
449
450/*
451 * Display the statistics thru /proc
452 * data points to the cpu number
453 */
454static int uv_ptc_seq_show(struct seq_file *file, void *data)
455{
456 struct ptc_stats *stat;
457 int cpu;
458
459 cpu = *(loff_t *)data;
460
461 if (!cpu) {
462 seq_printf(file,
463 "# cpu requestor requestee one all sretry dretry ptc_i ");
464 seq_printf(file,
465 "sw_ack sflush dflush sok dnomsg dmult starget\n");
466 }
467 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
468 stat = &per_cpu(ptcstats, cpu);
469 seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ",
470 cpu, stat->requestor,
471 stat->requestee, stat->onetlb, stat->alltlb,
472 stat->s_retry, stat->d_retry, stat->ptc_i);
473 seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
474 uv_read_global_mmr64(uv_blade_to_pnode
475 (uv_cpu_to_blade_id(cpu)),
476 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
477 stat->sflush, stat->dflush,
478 stat->retriesok, stat->nomsg,
479 stat->multmsg, stat->ntargeted);
480 }
481
482 return 0;
483}
484
485/*
486 * 0: display meaning of the statistics
487 * >0: retry limit
488 */
489static ssize_t uv_ptc_proc_write(struct file *file, const char __user *user,
490 size_t count, loff_t *data)
491{
492 long newmode;
493 char optstr[64];
494
495 if (count == 0 || count > sizeof(optstr))
496 return -EINVAL;
497 if (copy_from_user(optstr, user, count))
498 return -EFAULT;
499 optstr[count - 1] = '\0';
500 if (strict_strtoul(optstr, 10, &newmode) < 0) {
501 printk(KERN_DEBUG "%s is invalid\n", optstr);
502 return -EINVAL;
503 }
504
505 if (newmode == 0) {
506 printk(KERN_DEBUG "# cpu: cpu number\n");
507 printk(KERN_DEBUG
508 "requestor: times this cpu was the flush requestor\n");
509 printk(KERN_DEBUG
510 "requestee: times this cpu was requested to flush its TLBs\n");
511 printk(KERN_DEBUG
512 "one: times requested to flush a single address\n");
513 printk(KERN_DEBUG
514 "all: times requested to flush all TLB's\n");
515 printk(KERN_DEBUG
516 "sretry: number of retries of source-side timeouts\n");
517 printk(KERN_DEBUG
518 "dretry: number of retries of destination-side timeouts\n");
519 printk(KERN_DEBUG
520 "ptc_i: times UV fell through to IPI-style flushes\n");
521 printk(KERN_DEBUG
522 "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
523 printk(KERN_DEBUG
524 "sflush_us: cycles spent in uv_flush_tlb_others()\n");
525 printk(KERN_DEBUG
526 "dflush_us: cycles spent in handling flush requests\n");
527 printk(KERN_DEBUG "sok: successes on retry\n");
528 printk(KERN_DEBUG "dnomsg: interrupts with no message\n");
529 printk(KERN_DEBUG
530 "dmult: interrupts with multiple messages\n");
531 printk(KERN_DEBUG "starget: nodes targeted\n");
532 } else {
533 uv_bau_retry_limit = newmode;
534 printk(KERN_DEBUG "timeout retry limit:%d\n",
535 uv_bau_retry_limit);
536 }
537
538 return count;
539}
540
541static const struct seq_operations uv_ptc_seq_ops = {
542 .start = uv_ptc_seq_start,
543 .next = uv_ptc_seq_next,
544 .stop = uv_ptc_seq_stop,
545 .show = uv_ptc_seq_show
546};
547
548static int uv_ptc_proc_open(struct inode *inode, struct file *file)
549{
550 return seq_open(file, &uv_ptc_seq_ops);
551}
552
553static const struct file_operations proc_uv_ptc_operations = {
554 .open = uv_ptc_proc_open,
555 .read = seq_read,
556 .write = uv_ptc_proc_write,
557 .llseek = seq_lseek,
558 .release = seq_release,
559};
560
561static int __init uv_ptc_init(void)
562{
563 struct proc_dir_entry *proc_uv_ptc;
564
565 if (!is_uv_system())
566 return 0;
567
568 if (!proc_mkdir("sgi_uv", NULL))
569 return -EINVAL;
570
571 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
572 if (!proc_uv_ptc) {
573 printk(KERN_ERR "unable to create %s proc entry\n",
574 UV_PTC_BASENAME);
575 remove_proc_entry("sgi_uv", NULL);
576 return -EINVAL;
577 }
578 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
579 return 0;
580}
581
582/*
583 * begin the initialization of the per-blade control structures
584 */
585static struct bau_control * __init uv_table_bases_init(int blade, int node)
586{
587 int i;
588 int *ip;
589 struct bau_msg_status *msp;
590 struct bau_control *bau_tabp;
591
592 bau_tabp =
593 kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, node);
594 BUG_ON(!bau_tabp);
595
596 bau_tabp->msg_statuses =
597 kmalloc_node(sizeof(struct bau_msg_status) *
598 DEST_Q_SIZE, GFP_KERNEL, node);
599 BUG_ON(!bau_tabp->msg_statuses);
600
601 for (i = 0, msp = bau_tabp->msg_statuses; i < DEST_Q_SIZE; i++, msp++)
602 bau_cpubits_clear(&msp->seen_by, (int)
603 uv_blade_nr_possible_cpus(blade));
604
605 bau_tabp->watching =
606 kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node);
607 BUG_ON(!bau_tabp->watching);
608
609 for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++)
610 *ip = 0;
611
612 uv_bau_table_bases[blade] = bau_tabp;
613
614 return bau_tabp;
615}
616
617/*
618 * finish the initialization of the per-blade control structures
619 */
620static void __init
621uv_table_bases_finish(int blade, int node, int cur_cpu,
622 struct bau_control *bau_tablesp,
623 struct bau_desc *adp)
624{
625 struct bau_control *bcp;
626 int i;
627
628 for (i = cur_cpu; i < cur_cpu + uv_blade_nr_possible_cpus(blade); i++) {
629 bcp = (struct bau_control *)&per_cpu(bau_control, i);
630
631 bcp->bau_msg_head = bau_tablesp->va_queue_first;
632 bcp->va_queue_first = bau_tablesp->va_queue_first;
633 bcp->va_queue_last = bau_tablesp->va_queue_last;
634 bcp->watching = bau_tablesp->watching;
635 bcp->msg_statuses = bau_tablesp->msg_statuses;
636 bcp->descriptor_base = adp;
637 }
638}
639
640/*
641 * initialize the sending side's sending buffers
642 */
643static struct bau_desc * __init
644uv_activation_descriptor_init(int node, int pnode)
645{
646 int i;
647 unsigned long pa;
648 unsigned long m;
649 unsigned long n;
650 unsigned long mmr_image;
651 struct bau_desc *adp;
652 struct bau_desc *ad2;
653
654 adp = (struct bau_desc *)
655 kmalloc_node(16384, GFP_KERNEL, node);
656 BUG_ON(!adp);
657
658 pa = __pa((unsigned long)adp);
659 n = pa >> uv_nshift;
660 m = pa & uv_mmask;
661
662 mmr_image = uv_read_global_mmr64(pnode, UVH_LB_BAU_SB_DESCRIPTOR_BASE);
663 if (mmr_image) {
664 uv_write_global_mmr64(pnode, (unsigned long)
665 UVH_LB_BAU_SB_DESCRIPTOR_BASE,
666 (n << UV_DESC_BASE_PNODE_SHIFT | m));
667 }
668
669 for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) {
670 memset(ad2, 0, sizeof(struct bau_desc));
671 ad2->header.sw_ack_flag = 1;
672 ad2->header.base_dest_nodeid =
673 uv_blade_to_pnode(uv_cpu_to_blade_id(0));
674 ad2->header.command = UV_NET_ENDPOINT_INTD;
675 ad2->header.int_both = 1;
676 /*
677 * all others need to be set to zero:
678 * fairness chaining multilevel count replied_to
679 */
680 }
681 return adp;
682}
683
684/*
685 * initialize the destination side's receiving buffers
686 */
687static struct bau_payload_queue_entry * __init
688uv_payload_queue_init(int node, int pnode, struct bau_control *bau_tablesp)
689{
690 struct bau_payload_queue_entry *pqp;
691 char *cp;
692
693 pqp = (struct bau_payload_queue_entry *) kmalloc_node(
694 (DEST_Q_SIZE + 1) * sizeof(struct bau_payload_queue_entry),
695 GFP_KERNEL, node);
696 BUG_ON(!pqp);
697
698 cp = (char *)pqp + 31;
699 pqp = (struct bau_payload_queue_entry *)(((unsigned long)cp >> 5) << 5);
700 bau_tablesp->va_queue_first = pqp;
701 uv_write_global_mmr64(pnode,
702 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
703 ((unsigned long)pnode <<
704 UV_PAYLOADQ_PNODE_SHIFT) |
705 uv_physnodeaddr(pqp));
706 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
707 uv_physnodeaddr(pqp));
708 bau_tablesp->va_queue_last = pqp + (DEST_Q_SIZE - 1);
709 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
710 (unsigned long)
711 uv_physnodeaddr(bau_tablesp->va_queue_last));
712 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) * DEST_Q_SIZE);
713
714 return pqp;
715}
716
717/*
718 * Initialization of each UV blade's structures
719 */
720static int __init uv_init_blade(int blade, int node, int cur_cpu)
721{
722 int pnode;
723 unsigned long pa;
724 unsigned long apicid;
725 struct bau_desc *adp;
726 struct bau_payload_queue_entry *pqp;
727 struct bau_control *bau_tablesp;
728
729 bau_tablesp = uv_table_bases_init(blade, node);
730 pnode = uv_blade_to_pnode(blade);
731 adp = uv_activation_descriptor_init(node, pnode);
732 pqp = uv_payload_queue_init(node, pnode, bau_tablesp);
733 uv_table_bases_finish(blade, node, cur_cpu, bau_tablesp, adp);
734 /*
735 * the below initialization can't be in firmware because the
736 * messaging IRQ will be determined by the OS
737 */
738 apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
739 pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
740 if ((pa & 0xff) != UV_BAU_MESSAGE) {
741 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
742 ((apicid << 32) | UV_BAU_MESSAGE));
743 }
744 return 0;
745}
746
747/*
748 * Initialization of BAU-related structures
749 */
750static int __init uv_bau_init(void)
751{
752 int blade;
753 int node;
754 int nblades;
755 int last_blade;
756 int cur_cpu = 0;
757
758 if (!is_uv_system())
759 return 0;
760
761 uv_bau_retry_limit = 1;
762 uv_nshift = uv_hub_info->n_val;
763 uv_mmask = (1UL << uv_hub_info->n_val) - 1;
764 nblades = 0;
765 last_blade = -1;
766 for_each_online_node(node) {
767 blade = uv_node_to_blade_id(node);
768 if (blade == last_blade)
769 continue;
770 last_blade = blade;
771 nblades++;
772 }
773 uv_bau_table_bases = (struct bau_control **)
774 kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
775 BUG_ON(!uv_bau_table_bases);
776
777 last_blade = -1;
778 for_each_online_node(node) {
779 blade = uv_node_to_blade_id(node);
780 if (blade == last_blade)
781 continue;
782 last_blade = blade;
783 uv_init_blade(blade, node, cur_cpu);
784 cur_cpu += uv_blade_nr_possible_cpus(blade);
785 }
786 set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
787 uv_enable_timeouts();
788
789 return 0;
790}
791__initcall(uv_bau_init);
792__initcall(uv_ptc_init);
diff --git a/include/asm-x86/atomic_64.h b/include/asm-x86/atomic_64.h
index fe589c153db8..a0095191c02e 100644
--- a/include/asm-x86/atomic_64.h
+++ b/include/asm-x86/atomic_64.h
@@ -425,6 +425,32 @@ static inline int atomic64_add_unless(atomic64_t *v, long a, long u)
425 return c != (u); 425 return c != (u);
426} 426}
427 427
428/**
429 * atomic_inc_short - increment of a short integer
430 * @v: pointer to type int
431 *
432 * Atomically adds 1 to @v
433 * Returns the new value of @u
434 */
435static inline short int atomic_inc_short(short int *v)
436{
437 asm(LOCK_PREFIX "addw $1, %0" : "+m" (*v));
438 return *v;
439}
440
441/**
442 * atomic_or_long - OR of two long integers
443 * @v1: pointer to type unsigned long
444 * @v2: pointer to type unsigned long
445 *
446 * Atomically ORs @v1 and @v2
447 * Returns the result of the OR
448 */
449static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
450{
451 asm(LOCK_PREFIX "orq %1, %0" : "+m" (*v1) : "r" (v2));
452}
453
428#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0) 454#define atomic64_inc_not_zero(v) atomic64_add_unless((v), 1, 0)
429 455
430/* These are x86-specific, used by some header files */ 456/* These are x86-specific, used by some header files */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 6ec1a5453b3e..b6995e567fcc 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -122,6 +122,7 @@ typedef struct physid_mask physid_mask_t;
122 __physid_mask; \ 122 __physid_mask; \
123 }) 123 })
124 124
125/* Note: will create very large stack frames if physid_mask_t is big */
125#define physid_mask_of_physid(physid) \ 126#define physid_mask_of_physid(physid) \
126 ({ \ 127 ({ \
127 physid_mask_t __physid_mask = PHYSID_MASK_NONE; \ 128 physid_mask_t __physid_mask = PHYSID_MASK_NONE; \
@@ -129,6 +130,12 @@ typedef struct physid_mask physid_mask_t;
129 __physid_mask; \ 130 __physid_mask; \
130 }) 131 })
131 132
133static inline void physid_set_mask_of_physid(int physid, physid_mask_t *map)
134{
135 physids_clear(*map);
136 physid_set(physid, *map);
137}
138
132#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} } 139#define PHYSID_MASK_ALL { {[0 ... PHYSID_ARRAY_SIZE-1] = ~0UL} }
133#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} } 140#define PHYSID_MASK_NONE { {[0 ... PHYSID_ARRAY_SIZE-1] = 0UL} }
134 141
diff --git a/include/asm-x86/mpspec_def.h b/include/asm-x86/mpspec_def.h
index dc6ef85e3624..38d1e73b49e4 100644
--- a/include/asm-x86/mpspec_def.h
+++ b/include/asm-x86/mpspec_def.h
@@ -17,10 +17,11 @@
17# define MAX_MPC_ENTRY 1024 17# define MAX_MPC_ENTRY 1024
18# define MAX_APICS 256 18# define MAX_APICS 256
19#else 19#else
20/* 20# if NR_CPUS <= 255
21 * A maximum of 255 APICs with the current APIC ID architecture. 21# define MAX_APICS 255
22 */ 22# else
23# define MAX_APICS 255 23# define MAX_APICS 32768
24# endif
24#endif 25#endif
25 26
26struct intel_mp_floating { 27struct intel_mp_floating {
diff --git a/include/asm-x86/uv/uv_bau.h b/include/asm-x86/uv/uv_bau.h
new file mode 100644
index 000000000000..91ac0dfb7588
--- /dev/null
+++ b/include/asm-x86/uv/uv_bau.h
@@ -0,0 +1,337 @@
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * SGI UV Broadcast Assist Unit definitions
7 *
8 * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved.
9 */
10
11#ifndef __ASM_X86_UV_BAU__
12#define __ASM_X86_UV_BAU__
13
14#include <linux/bitmap.h>
15#define BITSPERBYTE 8
16
17/*
18 * Broadcast Assist Unit messaging structures
19 *
20 * Selective Broadcast activations are induced by software action
21 * specifying a particular 8-descriptor "set" via a 6-bit index written
22 * to an MMR.
23 * Thus there are 64 unique 512-byte sets of SB descriptors - one set for
24 * each 6-bit index value. These descriptor sets are mapped in sequence
25 * starting with set 0 located at the address specified in the
26 * BAU_SB_DESCRIPTOR_BASE register, set 1 is located at BASE + 512,
27 * set 2 is at BASE + 2*512, set 3 at BASE + 3*512, and so on.
28 *
29 * We will use 31 sets, one for sending BAU messages from each of the 32
30 * cpu's on the node.
31 *
32 * TLB shootdown will use the first of the 8 descriptors of each set.
33 * Each of the descriptors is 64 bytes in size (8*64 = 512 bytes in a set).
34 */
35
36#define UV_ITEMS_PER_DESCRIPTOR 8
37#define UV_CPUS_PER_ACT_STATUS 32
38#define UV_ACT_STATUS_MASK 0x3
39#define UV_ACT_STATUS_SIZE 2
40#define UV_ACTIVATION_DESCRIPTOR_SIZE 32
41#define UV_DISTRIBUTION_SIZE 256
42#define UV_SW_ACK_NPENDING 8
43#define UV_BAU_MESSAGE 200
44/*
45 * Messaging irq; see irq_64.h and include/asm-x86/hw_irq_64.h
46 * To be dynamically allocated in the future
47 */
48#define UV_NET_ENDPOINT_INTD 0x38
49#define UV_DESC_BASE_PNODE_SHIFT 49
50#define UV_PAYLOADQ_PNODE_SHIFT 49
51#define UV_PTC_BASENAME "sgi_uv/ptc_statistics"
52#define uv_physnodeaddr(x) ((__pa((unsigned long)(x)) & uv_mmask))
53
54/*
55 * bits in UVH_LB_BAU_SB_ACTIVATION_STATUS_0/1
56 */
57#define DESC_STATUS_IDLE 0
58#define DESC_STATUS_ACTIVE 1
59#define DESC_STATUS_DESTINATION_TIMEOUT 2
60#define DESC_STATUS_SOURCE_TIMEOUT 3
61
62/*
63 * source side threshholds at which message retries print a warning
64 */
65#define SOURCE_TIMEOUT_LIMIT 20
66#define DESTINATION_TIMEOUT_LIMIT 20
67
68/*
69 * number of entries in the destination side payload queue
70 */
71#define DEST_Q_SIZE 17
72/*
73 * number of destination side software ack resources
74 */
75#define DEST_NUM_RESOURCES 8
76#define MAX_CPUS_PER_NODE 32
77/*
78 * completion statuses for sending a TLB flush message
79 */
80#define FLUSH_RETRY 1
81#define FLUSH_GIVEUP 2
82#define FLUSH_COMPLETE 3
83
84/*
85 * Distribution: 32 bytes (256 bits) (bytes 0-0x1f of descriptor)
86 * If the 'multilevel' flag in the header portion of the descriptor
87 * has been set to 0, then endpoint multi-unicast mode is selected.
88 * The distribution specification (32 bytes) is interpreted as a 256-bit
89 * distribution vector. Adjacent bits correspond to consecutive even numbered
90 * nodeIDs. The result of adding the index of a given bit to the 15-bit
91 * 'base_dest_nodeid' field of the header corresponds to the
92 * destination nodeID associated with that specified bit.
93 */
94struct bau_target_nodemask {
95 unsigned long bits[BITS_TO_LONGS(256)];
96};
97
98/*
99 * mask of cpu's on a node
100 * (during initialization we need to check that unsigned long has
101 * enough bits for max. cpu's per node)
102 */
103struct bau_local_cpumask {
104 unsigned long bits;
105};
106
107/*
108 * Payload: 16 bytes (128 bits) (bytes 0x20-0x2f of descriptor)
109 * only 12 bytes (96 bits) of the payload area are usable.
110 * An additional 3 bytes (bits 27:4) of the header address are carried
111 * to the next bytes of the destination payload queue.
112 * And an additional 2 bytes of the header Suppl_A field are also
113 * carried to the destination payload queue.
114 * But the first byte of the Suppl_A becomes bits 127:120 (the 16th byte)
115 * of the destination payload queue, which is written by the hardware
116 * with the s/w ack resource bit vector.
117 * [ effective message contents (16 bytes (128 bits) maximum), not counting
118 * the s/w ack bit vector ]
119 */
120
121/*
122 * The payload is software-defined for INTD transactions
123 */
124struct bau_msg_payload {
125 unsigned long address; /* signifies a page or all TLB's
126 of the cpu */
127 /* 64 bits */
128 unsigned short sending_cpu; /* filled in by sender */
129 /* 16 bits */
130 unsigned short acknowledge_count;/* filled in by destination */
131 /* 16 bits */
132 unsigned int reserved1:32; /* not usable */
133};
134
135
136/*
137 * Message header: 16 bytes (128 bits) (bytes 0x30-0x3f of descriptor)
138 * see table 4.2.3.0.1 in broacast_assist spec.
139 */
140struct bau_msg_header {
141 int dest_subnodeid:6; /* must be zero */
142 /* bits 5:0 */
143 int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */
144 /* bits 20:6 */
145 int command:8; /* message type */
146 /* bits 28:21 */
147 /* 0x38: SN3net EndPoint Message */
148 int rsvd_1:3; /* must be zero */
149 /* bits 31:29 */
150 /* int will align on 32 bits */
151 int rsvd_2:9; /* must be zero */
152 /* bits 40:32 */
153 /* Suppl_A is 56-41 */
154 int payload_2a:8; /* becomes byte 16 of msg */
155 /* bits 48:41 */ /* not currently using */
156 int payload_2b:8; /* becomes byte 17 of msg */
157 /* bits 56:49 */ /* not currently using */
158 /* Address field (96:57) is never used as an
159 address (these are address bits 42:3) */
160 int rsvd_3:1; /* must be zero */
161 /* bit 57 */
162 /* address bits 27:4 are payload */
163 /* these 24 bits become bytes 12-14 of msg */
164 int replied_to:1; /* sent as 0 by the source to byte 12 */
165 /* bit 58 */
166
167 int payload_1a:5; /* not currently used */
168 /* bits 63:59 */
169 int payload_1b:8; /* not currently used */
170 /* bits 71:64 */
171 int payload_1c:8; /* not currently used */
172 /* bits 79:72 */
173 int payload_1d:2; /* not currently used */
174 /* bits 81:80 */
175
176 int rsvd_4:7; /* must be zero */
177 /* bits 88:82 */
178 int sw_ack_flag:1; /* software acknowledge flag */
179 /* bit 89 */
180 /* INTD trasactions at destination are to
181 wait for software acknowledge */
182 int rsvd_5:6; /* must be zero */
183 /* bits 95:90 */
184 int rsvd_6:5; /* must be zero */
185 /* bits 100:96 */
186 int int_both:1; /* if 1, interrupt both sockets on the blade */
187 /* bit 101*/
188 int fairness:3; /* usually zero */
189 /* bits 104:102 */
190 int multilevel:1; /* multi-level multicast format */
191 /* bit 105 */
192 /* 0 for TLB: endpoint multi-unicast messages */
193 int chaining:1; /* next descriptor is part of this activation*/
194 /* bit 106 */
195 int rsvd_7:21; /* must be zero */
196 /* bits 127:107 */
197};
198
199/*
200 * The activation descriptor:
201 * The format of the message to send, plus all accompanying control
202 * Should be 64 bytes
203 */
204struct bau_desc {
205 struct bau_target_nodemask distribution;
206 /*
207 * message template, consisting of header and payload:
208 */
209 struct bau_msg_header header;
210 struct bau_msg_payload payload;
211};
212/*
213 * -payload-- ---------header------
214 * bytes 0-11 bits 41-56 bits 58-81
215 * A B (2) C (3)
216 *
217 * A/B/C are moved to:
218 * A C B
219 * bytes 0-11 bytes 12-14 bytes 16-17 (byte 15 filled in by hw as vector)
220 * ------------payload queue-----------
221 */
222
223/*
224 * The payload queue on the destination side is an array of these.
225 * With BAU_MISC_CONTROL set for software acknowledge mode, the messages
226 * are 32 bytes (2 micropackets) (256 bits) in length, but contain only 17
227 * bytes of usable data, including the sw ack vector in byte 15 (bits 127:120)
228 * (12 bytes come from bau_msg_payload, 3 from payload_1, 2 from
229 * sw_ack_vector and payload_2)
230 * "Enabling Software Acknowledgment mode (see Section 4.3.3 Software
231 * Acknowledge Processing) also selects 32 byte (17 bytes usable) payload
232 * operation."
233 */
234struct bau_payload_queue_entry {
235 unsigned long address; /* signifies a page or all TLB's
236 of the cpu */
237 /* 64 bits, bytes 0-7 */
238
239 unsigned short sending_cpu; /* cpu that sent the message */
240 /* 16 bits, bytes 8-9 */
241
242 unsigned short acknowledge_count; /* filled in by destination */
243 /* 16 bits, bytes 10-11 */
244
245 unsigned short replied_to:1; /* sent as 0 by the source */
246 /* 1 bit */
247 unsigned short unused1:7; /* not currently using */
248 /* 7 bits: byte 12) */
249
250 unsigned char unused2[2]; /* not currently using */
251 /* bytes 13-14 */
252
253 unsigned char sw_ack_vector; /* filled in by the hardware */
254 /* byte 15 (bits 127:120) */
255
256 unsigned char unused4[3]; /* not currently using bytes 17-19 */
257 /* bytes 17-19 */
258
259 int number_of_cpus; /* filled in at destination */
260 /* 32 bits, bytes 20-23 (aligned) */
261
262 unsigned char unused5[8]; /* not using */
263 /* bytes 24-31 */
264};
265
266/*
267 * one for every slot in the destination payload queue
268 */
269struct bau_msg_status {
270 struct bau_local_cpumask seen_by; /* map of cpu's */
271};
272
273/*
274 * one for every slot in the destination software ack resources
275 */
276struct bau_sw_ack_status {
277 struct bau_payload_queue_entry *msg; /* associated message */
278 int watcher; /* cpu monitoring, or -1 */
279};
280
281/*
282 * one on every node and per-cpu; to locate the software tables
283 */
284struct bau_control {
285 struct bau_desc *descriptor_base;
286 struct bau_payload_queue_entry *bau_msg_head;
287 struct bau_payload_queue_entry *va_queue_first;
288 struct bau_payload_queue_entry *va_queue_last;
289 struct bau_msg_status *msg_statuses;
290 int *watching; /* pointer to array */
291};
292
293/*
294 * This structure is allocated per_cpu for UV TLB shootdown statistics.
295 */
296struct ptc_stats {
297 unsigned long ptc_i; /* number of IPI-style flushes */
298 unsigned long requestor; /* number of nodes this cpu sent to */
299 unsigned long requestee; /* times cpu was remotely requested */
300 unsigned long alltlb; /* times all tlb's on this cpu were flushed */
301 unsigned long onetlb; /* times just one tlb on this cpu was flushed */
302 unsigned long s_retry; /* retries on source side timeouts */
303 unsigned long d_retry; /* retries on destination side timeouts */
304 unsigned long sflush; /* cycles spent in uv_flush_tlb_others */
305 unsigned long dflush; /* cycles spent on destination side */
306 unsigned long retriesok; /* successes on retries */
307 unsigned long nomsg; /* interrupts with no message */
308 unsigned long multmsg; /* interrupts with multiple messages */
309 unsigned long ntargeted;/* nodes targeted */
310};
311
312static inline int bau_node_isset(int node, struct bau_target_nodemask *dstp)
313{
314 return constant_test_bit(node, &dstp->bits[0]);
315}
316static inline void bau_node_set(int node, struct bau_target_nodemask *dstp)
317{
318 __set_bit(node, &dstp->bits[0]);
319}
320static inline void bau_nodes_clear(struct bau_target_nodemask *dstp, int nbits)
321{
322 bitmap_zero(&dstp->bits[0], nbits);
323}
324
325static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
326{
327 bitmap_zero(&dstp->bits, nbits);
328}
329
330#define cpubit_isset(cpu, bau_local_cpumask) \
331 test_bit((cpu), (bau_local_cpumask).bits)
332
333extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
334extern void uv_bau_message_intr1(void);
335extern void uv_bau_timeout_intr1(void);
336
337#endif /* __ASM_X86_UV_BAU__ */