aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorCliff Wickman <cpw@sgi.com>2008-06-02 09:56:14 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-08 06:23:22 -0400
commit1812924bb1823950c1dc95c478b71b037057356e (patch)
tree74ecf29e332a320d7850008ca4f8607dace88de6 /arch/x86
parentd98b940ab29a245de84a1c138b866dcc29217601 (diff)
x86, SGI UV: TLB shootdown using broadcast assist unit
TLB shootdown for SGI UV. Depends on patch (in tip/x86/irq): x86-update-macros-used-by-uv-platform.patch Jack Steiner May 29 This patch provides the ability to flush TLB's in cpu's that are not on the local node. The hardware mechanism for distributing the flush messages is the UV's "broadcast assist unit". The hook to intercept TLB shootdown requests is a 2-line change to native_flush_tlb_others() (arch/x86/kernel/tlb_64.c). This code has been tested on a hardware simulator. The real hardware is not yet available. The shootdown statistics are provided through /proc/sgi_uv/ptc_statistics. The use of /sys was considered, but would have required the use of many /sys files. The debugfs was also considered, but these statistics should be available on an ongoing basis, not just for debugging. Issues to be fixed later: - The IRQ for the messaging interrupt is currently hardcoded as 200 (see UV_BAU_MESSAGE). It should be dynamically assigned in the future. - The use of appropriate udelay()'s is untested, as they are a problem in the simulator. Signed-off-by: Cliff Wickman <cpw@sgi.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/tlb_64.c5
-rw-r--r--arch/x86/kernel/tlb_uv.c736
4 files changed, 746 insertions, 1 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 605995e11ea7..4078e98f0125 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -94,7 +94,7 @@ obj-$(CONFIG_OLPC) += olpc.o
94### 94###
95# 64 bit specific files 95# 64 bit specific files
96ifeq ($(CONFIG_X86_64),y) 96ifeq ($(CONFIG_X86_64),y)
97 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o 97 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
98 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 98 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
99 obj-$(CONFIG_AUDIT) += audit_64.o 99 obj-$(CONFIG_AUDIT) += audit_64.o
100 100
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 556a8df522a7..6fd1987466eb 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -720,6 +720,10 @@ ENTRY(apic_timer_interrupt)
720 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt 720 apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
721END(apic_timer_interrupt) 721END(apic_timer_interrupt)
722 722
723ENTRY(uv_bau_message_intr1)
724 apicinterrupt 220,uv_bau_message_interrupt
725END(uv_bau_message_intr1)
726
723ENTRY(error_interrupt) 727ENTRY(error_interrupt)
724 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt 728 apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
725END(error_interrupt) 729END(error_interrupt)
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index a1f07d793202..fc132113bdab 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -15,6 +15,8 @@
15#include <asm/proto.h> 15#include <asm/proto.h>
16#include <asm/apicdef.h> 16#include <asm/apicdef.h>
17#include <asm/idle.h> 17#include <asm/idle.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
18 20
19#include <mach_ipi.h> 21#include <mach_ipi.h>
20/* 22/*
@@ -162,6 +164,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
162 union smp_flush_state *f; 164 union smp_flush_state *f;
163 cpumask_t cpumask = *cpumaskp; 165 cpumask_t cpumask = *cpumaskp;
164 166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169
165 /* Caller has disabled preemption */ 170 /* Caller has disabled preemption */
166 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
167 f = &per_cpu(flush_state, sender); 172 f = &per_cpu(flush_state, sender);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
new file mode 100644
index 000000000000..28e7c68d9d78
--- /dev/null
+++ b/arch/x86/kernel/tlb_uv.c
@@ -0,0 +1,736 @@
1/*
2 * SGI UltraViolet TLB flush routines.
3 *
4 * (c) 2008 Cliff Wickman <cpw@sgi.com>, SGI.
5 *
6 * This code is released under the GNU General Public License version 2 or
7 * later.
8 */
9#include <linux/mc146818rtc.h>
10#include <linux/proc_fs.h>
11#include <linux/kernel.h>
12
13#include <asm/mach-bigsmp/mach_apic.h>
14#include <asm/mmu_context.h>
15#include <asm/idle.h>
16#include <asm/genapic.h>
17#include <asm/uv/uv_hub.h>
18#include <asm/uv/uv_mmrs.h>
19#include <asm/uv/uv_bau.h>
20
21struct bau_control **uv_bau_table_bases;
22static int uv_bau_retry_limit;
23static int uv_nshift; /* position of pnode (which is nasid>>1) */
24static unsigned long uv_mmask;
25
26char *status_table[] = {
27 "IDLE",
28 "ACTIVE",
29 "DESTINATION TIMEOUT",
30 "SOURCE TIMEOUT"
31};
32
33DEFINE_PER_CPU(struct ptc_stats, ptcstats);
34DEFINE_PER_CPU(struct bau_control, bau_control);
35
36/*
37 * Free a software acknowledge hardware resource by clearing its Pending
38 * bit. This will return a reply to the sender.
39 * If the message has timed out, a reply has already been sent by the
40 * hardware but the resource has not been released. In that case our
41 * clear of the Timeout bit (as well) will free the resource. No reply will
42 * be sent (the hardware will only do one reply per message).
43 */
44static void
45uv_reply_to_message(int resource,
46 struct bau_payload_queue_entry *msg,
47 struct bau_msg_status *msp)
48{
49 int fw;
50
51 fw = (1 << (resource + UV_SW_ACK_NPENDING)) | (1 << resource);
52 msg->replied_to = 1;
53 msg->sw_ack_vector = 0;
54 if (msp)
55 msp->seen_by.bits = 0;
56 uv_write_local_mmr(UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE_ALIAS, fw);
57 return;
58}
59
60/*
61 * Do all the things a cpu should do for a TLB shootdown message.
62 * Other cpu's may come here at the same time for this message.
63 */
64static void
65uv_bau_process_message(struct bau_payload_queue_entry *msg,
66 int msg_slot, int sw_ack_slot)
67{
68 int cpu;
69 unsigned long this_cpu_mask;
70 struct bau_msg_status *msp;
71
72 msp = __get_cpu_var(bau_control).msg_statuses + msg_slot;
73 cpu = uv_blade_processor_id();
74 msg->number_of_cpus =
75 uv_blade_nr_online_cpus(uv_node_to_blade_id(numa_node_id()));
76 this_cpu_mask = (unsigned long)1 << cpu;
77 if (msp->seen_by.bits & this_cpu_mask)
78 return;
79 atomic_or_long(&msp->seen_by.bits, this_cpu_mask);
80
81 if (msg->replied_to == 1)
82 return;
83
84 if (msg->address == TLB_FLUSH_ALL) {
85 local_flush_tlb();
86 __get_cpu_var(ptcstats).alltlb++;
87 } else {
88 __flush_tlb_one(msg->address);
89 __get_cpu_var(ptcstats).onetlb++;
90 }
91
92 __get_cpu_var(ptcstats).requestee++;
93
94 atomic_inc_short(&msg->acknowledge_count);
95 if (msg->number_of_cpus == msg->acknowledge_count)
96 uv_reply_to_message(sw_ack_slot, msg, msp);
97 return;
98}
99
100/*
101 * Examine the payload queue on all the distribution nodes to see
102 * which messages have not been seen, and which cpu(s) have not seen them.
103 *
104 * Returns the number of cpu's that have not responded.
105 */
106static int
107uv_examine_destinations(struct bau_target_nodemask *distribution)
108{
109 int sender;
110 int i;
111 int j;
112 int k;
113 int count = 0;
114 struct bau_control *bau_tablesp;
115 struct bau_payload_queue_entry *msg;
116 struct bau_msg_status *msp;
117
118 sender = smp_processor_id();
119 for (i = 0; i < (sizeof(struct bau_target_nodemask) * BITSPERBYTE);
120 i++) {
121 if (bau_node_isset(i, distribution)) {
122 bau_tablesp = uv_bau_table_bases[i];
123 for (msg = bau_tablesp->va_queue_first, j = 0;
124 j < DESTINATION_PAYLOAD_QUEUE_SIZE; msg++, j++) {
125 if ((msg->sending_cpu == sender) &&
126 (!msg->replied_to)) {
127 msp = bau_tablesp->msg_statuses + j;
128 printk(KERN_DEBUG
129 "blade %d: address:%#lx %d of %d, not cpu(s): ",
130 i, msg->address,
131 msg->acknowledge_count,
132 msg->number_of_cpus);
133 for (k = 0; k < msg->number_of_cpus;
134 k++) {
135 if (!((long)1 << k & msp->
136 seen_by.bits)) {
137 count++;
138 printk("%d ", k);
139 }
140 }
141 printk("\n");
142 }
143 }
144 }
145 }
146 return count;
147}
148
149/**
150 * uv_flush_tlb_others - globally purge translation cache of a virtual
151 * address or all TLB's
152 * @cpumaskp: mask of all cpu's in which the address is to be removed
153 * @mm: mm_struct containing virtual address range
154 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
155 *
156 * This is the entry point for initiating any UV global TLB shootdown.
157 *
158 * Purges the translation caches of all specified processors of the given
159 * virtual address, or purges all TLB's on specified processors.
160 *
161 * The caller has derived the cpumaskp from the mm_struct and has subtracted
162 * the local cpu from the mask. This function is called only if there
163 * are bits set in the mask. (e.g. flush_tlb_page())
164 *
165 * The cpumaskp is converted into a nodemask of the nodes containing
166 * the cpus.
167 */
168int
169uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, unsigned long va)
170{
171 int i;
172 int blade;
173 int cpu;
174 int bit;
175 int right_shift;
176 int this_blade;
177 int exams = 0;
178 int tries = 0;
179 long source_timeouts = 0;
180 long destination_timeouts = 0;
181 unsigned long index;
182 unsigned long mmr_offset;
183 unsigned long descriptor_status;
184 struct bau_activation_descriptor *bau_desc;
185 ktime_t time1, time2;
186
187 cpu = uv_blade_processor_id();
188 this_blade = uv_numa_blade_id();
189 bau_desc = __get_cpu_var(bau_control).descriptor_base;
190 bau_desc += (UV_ITEMS_PER_DESCRIPTOR * cpu);
191
192 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
193
194 i = 0;
195 for_each_cpu_mask(bit, *cpumaskp) {
196 blade = uv_cpu_to_blade_id(bit);
197 if (blade > (UV_DISTRIBUTION_SIZE - 1))
198 BUG();
199 if (blade == this_blade)
200 continue;
201 bau_node_set(blade, &bau_desc->distribution);
202 /* leave the bits for the remote cpu's in the mask until
203 success; on failure we fall back to the IPI method */
204 i++;
205 }
206 if (i == 0)
207 goto none_to_flush;
208 __get_cpu_var(ptcstats).requestor++;
209 __get_cpu_var(ptcstats).ntargeted += i;
210
211 bau_desc->payload.address = va;
212 bau_desc->payload.sending_cpu = smp_processor_id();
213
214 if (cpu < UV_CPUS_PER_ACT_STATUS) {
215 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_0;
216 right_shift = cpu * UV_ACT_STATUS_SIZE;
217 } else {
218 mmr_offset = UVH_LB_BAU_SB_ACTIVATION_STATUS_1;
219 right_shift =
220 ((cpu - UV_CPUS_PER_ACT_STATUS) * UV_ACT_STATUS_SIZE);
221 }
222 time1 = ktime_get();
223
224retry:
225 tries++;
226 index = ((unsigned long)
227 1 << UVH_LB_BAU_SB_ACTIVATION_CONTROL_PUSH_SHFT) | cpu;
228 uv_write_local_mmr(UVH_LB_BAU_SB_ACTIVATION_CONTROL, index);
229
230 while ((descriptor_status = (((unsigned long)
231 uv_read_local_mmr(mmr_offset) >>
232 right_shift) & UV_ACT_STATUS_MASK)) !=
233 DESC_STATUS_IDLE) {
234 if (descriptor_status == DESC_STATUS_SOURCE_TIMEOUT) {
235 source_timeouts++;
236 if (source_timeouts > SOURCE_TIMEOUT_LIMIT)
237 source_timeouts = 0;
238 __get_cpu_var(ptcstats).s_retry++;
239 goto retry;
240 }
241 /* spin here looking for progress at the destinations */
242 if (descriptor_status == DESC_STATUS_DESTINATION_TIMEOUT) {
243 destination_timeouts++;
244 if (destination_timeouts > DESTINATION_TIMEOUT_LIMIT) {
245 /* returns # of cpus not responding */
246 if (uv_examine_destinations
247 (&bau_desc->distribution) == 0) {
248 __get_cpu_var(ptcstats).d_retry++;
249 goto retry;
250 }
251 exams++;
252 if (exams >= uv_bau_retry_limit) {
253 printk(KERN_DEBUG
254 "uv_flush_tlb_others");
255 printk("giving up on cpu %d\n",
256 smp_processor_id());
257 goto unsuccessful;
258 }
259 /* delays can hang up the simulator
260 udelay(1000);
261 */
262 destination_timeouts = 0;
263 }
264 }
265 }
266 if (tries > 1)
267 __get_cpu_var(ptcstats).retriesok++;
268 /* on success, clear the remote cpu's from the mask so we don't
269 use the IPI method of shootdown on them */
270 for_each_cpu_mask(bit, *cpumaskp) {
271 blade = uv_cpu_to_blade_id(bit);
272 if (blade == this_blade)
273 continue;
274 cpu_clear(bit, *cpumaskp);
275 }
276
277unsuccessful:
278 time2 = ktime_get();
279 __get_cpu_var(ptcstats).sflush_ns += (time2.tv64 - time1.tv64);
280
281none_to_flush:
282 if (cpus_empty(*cpumaskp))
283 return 1;
284
285 /* Cause the caller to do an IPI-style TLB shootdown on
286 the cpu's still in the mask */
287 __get_cpu_var(ptcstats).ptc_i++;
288 return 0;
289}
290
291/*
292 * The BAU message interrupt comes here. (registered by set_intr_gate)
293 * See entry_64.S
294 *
295 * We received a broadcast assist message.
296 *
297 * Interrupts may have been disabled; this interrupt could represent
298 * the receipt of several messages.
299 *
300 * All cores/threads on this node get this interrupt.
301 * The last one to see it does the s/w ack.
302 * (the resource will not be freed until noninterruptable cpus see this
303 * interrupt; hardware will timeout the s/w ack and reply ERROR)
304 */
305void
306uv_bau_message_interrupt(struct pt_regs *regs)
307{
308 struct bau_payload_queue_entry *pqp;
309 struct bau_payload_queue_entry *msg;
310 struct pt_regs *old_regs = set_irq_regs(regs);
311 ktime_t time1, time2;
312 int msg_slot;
313 int sw_ack_slot;
314 int fw;
315 int count = 0;
316 unsigned long local_pnode;
317
318 ack_APIC_irq();
319 exit_idle();
320 irq_enter();
321
322 time1 = ktime_get();
323
324 local_pnode = uv_blade_to_pnode(uv_numa_blade_id());
325
326 pqp = __get_cpu_var(bau_control).va_queue_first;
327 msg = __get_cpu_var(bau_control).bau_msg_head;
328 while (msg->sw_ack_vector) {
329 count++;
330 fw = msg->sw_ack_vector;
331 msg_slot = msg - pqp;
332 sw_ack_slot = ffs(fw) - 1;
333
334 uv_bau_process_message(msg, msg_slot, sw_ack_slot);
335
336 msg++;
337 if (msg > __get_cpu_var(bau_control).va_queue_last)
338 msg = __get_cpu_var(bau_control).va_queue_first;
339 __get_cpu_var(bau_control).bau_msg_head = msg;
340 }
341 if (!count)
342 __get_cpu_var(ptcstats).nomsg++;
343 else if (count > 1)
344 __get_cpu_var(ptcstats).multmsg++;
345
346 time2 = ktime_get();
347 __get_cpu_var(ptcstats).dflush_ns += (time2.tv64 - time1.tv64);
348
349 irq_exit();
350 set_irq_regs(old_regs);
351 return;
352}
353
354static void
355uv_enable_timeouts(void)
356{
357 int i;
358 int blade;
359 int last_blade;
360 int pnode;
361 int cur_cpu = 0;
362 unsigned long apicid;
363
364 /* better if we had each_online_blade */
365 last_blade = -1;
366 for_each_online_node(i) {
367 blade = uv_node_to_blade_id(i);
368 if (blade == last_blade)
369 continue;
370 last_blade = blade;
371 apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
372 pnode = uv_blade_to_pnode(blade);
373 cur_cpu += uv_blade_nr_possible_cpus(i);
374 }
375 return;
376}
377
378static void *
379uv_ptc_seq_start(struct seq_file *file, loff_t *offset)
380{
381 if (*offset < num_possible_cpus())
382 return offset;
383 return NULL;
384}
385
386static void *
387uv_ptc_seq_next(struct seq_file *file, void *data, loff_t *offset)
388{
389 (*offset)++;
390 if (*offset < num_possible_cpus())
391 return offset;
392 return NULL;
393}
394
395static void
396uv_ptc_seq_stop(struct seq_file *file, void *data)
397{
398}
399
400/*
401 * Display the statistics thru /proc
402 * data points to the cpu number
403 */
404static int
405uv_ptc_seq_show(struct seq_file *file, void *data)
406{
407 struct ptc_stats *stat;
408 int cpu;
409
410 cpu = *(loff_t *)data;
411
412 if (!cpu) {
413 seq_printf(file,
414 "# cpu requestor requestee one all sretry dretry ptc_i ");
415 seq_printf(file,
416 "sw_ack sflush_us dflush_us sok dnomsg dmult starget\n");
417 }
418 if (cpu < num_possible_cpus() && cpu_online(cpu)) {
419 stat = &per_cpu(ptcstats, cpu);
420 seq_printf(file, "cpu %d %ld %ld %ld %ld %ld %ld %ld ",
421 cpu, stat->requestor,
422 stat->requestee, stat->onetlb, stat->alltlb,
423 stat->s_retry, stat->d_retry, stat->ptc_i);
424 seq_printf(file, "%lx %ld %ld %ld %ld %ld %ld\n",
425 uv_read_global_mmr64(uv_blade_to_pnode
426 (uv_cpu_to_blade_id(cpu)),
427 UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE),
428 stat->sflush_ns / 1000, stat->dflush_ns / 1000,
429 stat->retriesok, stat->nomsg,
430 stat->multmsg, stat->ntargeted);
431 }
432
433 return 0;
434}
435
436/*
437 * 0: display meaning of the statistics
438 * >0: retry limit
439 */
440static ssize_t
441uv_ptc_proc_write(struct file *file, const char __user *user,
442 size_t count, loff_t *data)
443{
444 long newmode;
445 char optstr[64];
446
447 if (copy_from_user(optstr, user, count))
448 return -EFAULT;
449 optstr[count - 1] = '\0';
450 if (strict_strtoul(optstr, 10, &newmode) < 0) {
451 printk(KERN_DEBUG "%s is invalid\n", optstr);
452 return -EINVAL;
453 }
454
455 if (newmode == 0) {
456 printk(KERN_DEBUG "# cpu: cpu number\n");
457 printk(KERN_DEBUG
458 "requestor: times this cpu was the flush requestor\n");
459 printk(KERN_DEBUG
460 "requestee: times this cpu was requested to flush its TLBs\n");
461 printk(KERN_DEBUG
462 "one: times requested to flush a single address\n");
463 printk(KERN_DEBUG
464 "all: times requested to flush all TLB's\n");
465 printk(KERN_DEBUG
466 "sretry: number of retries of source-side timeouts\n");
467 printk(KERN_DEBUG
468 "dretry: number of retries of destination-side timeouts\n");
469 printk(KERN_DEBUG
470 "ptc_i: times UV fell through to IPI-style flushes\n");
471 printk(KERN_DEBUG
472 "sw_ack: image of UVH_LB_BAU_INTD_SOFTWARE_ACKNOWLEDGE\n");
473 printk(KERN_DEBUG
474 "sflush_us: microseconds spent in uv_flush_tlb_others()\n");
475 printk(KERN_DEBUG
476 "dflush_us: microseconds spent in handling flush requests\n");
477 printk(KERN_DEBUG "sok: successes on retry\n");
478 printk(KERN_DEBUG "dnomsg: interrupts with no message\n");
479 printk(KERN_DEBUG
480 "dmult: interrupts with multiple messages\n");
481 printk(KERN_DEBUG "starget: nodes targeted\n");
482 } else {
483 uv_bau_retry_limit = newmode;
484 printk(KERN_DEBUG "timeout retry limit:%d\n",
485 uv_bau_retry_limit);
486 }
487
488 return count;
489}
490
491static const struct seq_operations uv_ptc_seq_ops = {
492 .start = uv_ptc_seq_start,
493 .next = uv_ptc_seq_next,
494 .stop = uv_ptc_seq_stop,
495 .show = uv_ptc_seq_show
496};
497
498static int
499uv_ptc_proc_open(struct inode *inode, struct file *file)
500{
501 return seq_open(file, &uv_ptc_seq_ops);
502}
503
504static const struct file_operations proc_uv_ptc_operations = {
505 .open = uv_ptc_proc_open,
506 .read = seq_read,
507 .write = uv_ptc_proc_write,
508 .llseek = seq_lseek,
509 .release = seq_release,
510};
511
512static struct proc_dir_entry *proc_uv_ptc;
513
514static int __init
515uv_ptc_init(void)
516{
517 static struct proc_dir_entry *sgi_proc_dir;
518
519 sgi_proc_dir = NULL;
520
521 if (!is_uv_system())
522 return 0;
523
524 sgi_proc_dir = proc_mkdir("sgi_uv", NULL);
525 if (!sgi_proc_dir)
526 return -EINVAL;
527
528 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL);
529 if (!proc_uv_ptc) {
530 printk(KERN_ERR "unable to create %s proc entry\n",
531 UV_PTC_BASENAME);
532 return -EINVAL;
533 }
534 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
535 return 0;
536}
537
538static void __exit
539uv_ptc_exit(void)
540{
541 remove_proc_entry(UV_PTC_BASENAME, NULL);
542}
543
544module_init(uv_ptc_init);
545module_exit(uv_ptc_exit);
546
547/*
548 * Initialization of BAU-related structures
549 */
550int __init
551uv_bau_init(void)
552{
553 int i;
554 int j;
555 int blade;
556 int nblades;
557 int *ip;
558 int pnode;
559 int last_blade;
560 int cur_cpu = 0;
561 unsigned long pa;
562 unsigned long n;
563 unsigned long m;
564 unsigned long mmr_image;
565 unsigned long apicid;
566 char *cp;
567 struct bau_control *bau_tablesp;
568 struct bau_activation_descriptor *adp, *ad2;
569 struct bau_payload_queue_entry *pqp;
570 struct bau_msg_status *msp;
571 struct bau_control *bcp;
572
573 if (!is_uv_system())
574 return 0;
575
576 uv_bau_retry_limit = 1;
577
578 if ((sizeof(struct bau_local_cpumask) * BITSPERBYTE) <
579 MAX_CPUS_PER_NODE) {
580 printk(KERN_ERR
581 "uv_bau_init: bau_local_cpumask.bits too small\n");
582 BUG();
583 }
584
585 uv_nshift = uv_hub_info->n_val;
586 uv_mmask = ((unsigned long)1 << uv_hub_info->n_val) - 1;
587 nblades = 0;
588 last_blade = -1;
589 for_each_online_node(i) {
590 blade = uv_node_to_blade_id(i);
591 if (blade == last_blade)
592 continue;
593 last_blade = blade;
594 nblades++;
595 }
596
597 uv_bau_table_bases = (struct bau_control **)
598 kmalloc(nblades * sizeof(struct bau_control *), GFP_KERNEL);
599 if (!uv_bau_table_bases)
600 BUG();
601
602 /* better if we had each_online_blade */
603 last_blade = -1;
604 for_each_online_node(i) {
605 blade = uv_node_to_blade_id(i);
606 if (blade == last_blade)
607 continue;
608 last_blade = blade;
609
610 bau_tablesp =
611 kmalloc_node(sizeof(struct bau_control), GFP_KERNEL, i);
612 if (!bau_tablesp)
613 BUG();
614
615 bau_tablesp->msg_statuses =
616 kmalloc_node(sizeof(struct bau_msg_status) *
617 DESTINATION_PAYLOAD_QUEUE_SIZE, GFP_KERNEL, i);
618 if (!bau_tablesp->msg_statuses)
619 BUG();
620 for (j = 0, msp = bau_tablesp->msg_statuses;
621 j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, msp++) {
622 bau_cpubits_clear(&msp->seen_by, (int)
623 uv_blade_nr_possible_cpus(blade));
624 }
625
626 bau_tablesp->watching =
627 kmalloc_node(sizeof(int) * DESTINATION_NUM_RESOURCES,
628 GFP_KERNEL, i);
629 if (!bau_tablesp->watching)
630 BUG();
631 for (j = 0, ip = bau_tablesp->watching;
632 j < DESTINATION_PAYLOAD_QUEUE_SIZE; j++, ip++) {
633 *ip = 0;
634 }
635
636 uv_bau_table_bases[i] = bau_tablesp;
637
638 pnode = uv_blade_to_pnode(blade);
639
640 if (sizeof(struct bau_activation_descriptor) != 64)
641 BUG();
642
643 adp = (struct bau_activation_descriptor *)
644 kmalloc_node(16384, GFP_KERNEL, i);
645 if (!adp)
646 BUG();
647 if ((unsigned long)adp & 0xfff)
648 BUG();
649 pa = __pa((unsigned long)adp);
650 n = pa >> uv_nshift;
651 m = pa & uv_mmask;
652
653 mmr_image = uv_read_global_mmr64(pnode,
654 UVH_LB_BAU_SB_DESCRIPTOR_BASE);
655 if (mmr_image)
656 uv_write_global_mmr64(pnode, (unsigned long)
657 UVH_LB_BAU_SB_DESCRIPTOR_BASE,
658 (n << UV_DESC_BASE_PNODE_SHIFT |
659 m));
660 for (j = 0, ad2 = adp; j < UV_ACTIVATION_DESCRIPTOR_SIZE;
661 j++, ad2++) {
662 memset(ad2, 0,
663 sizeof(struct bau_activation_descriptor));
664 ad2->header.sw_ack_flag = 1;
665 ad2->header.base_dest_nodeid =
666 uv_blade_to_pnode(uv_cpu_to_blade_id(0));
667 ad2->header.command = UV_NET_ENDPOINT_INTD;
668 ad2->header.int_both = 1;
669 /* all others need to be set to zero:
670 fairness chaining multilevel count replied_to */
671 }
672
673 pqp = (struct bau_payload_queue_entry *)
674 kmalloc_node((DESTINATION_PAYLOAD_QUEUE_SIZE + 1) *
675 sizeof(struct bau_payload_queue_entry),
676 GFP_KERNEL, i);
677 if (!pqp)
678 BUG();
679 if (sizeof(struct bau_payload_queue_entry) != 32)
680 BUG();
681 if ((unsigned long)(&((struct bau_payload_queue_entry *)0)->
682 sw_ack_vector) != 15)
683 BUG();
684
685 cp = (char *)pqp + 31;
686 pqp = (struct bau_payload_queue_entry *)
687 (((unsigned long)cp >> 5) << 5);
688 bau_tablesp->va_queue_first = pqp;
689 uv_write_global_mmr64(pnode,
690 UVH_LB_BAU_INTD_PAYLOAD_QUEUE_FIRST,
691 ((unsigned long)pnode <<
692 UV_PAYLOADQ_PNODE_SHIFT) |
693 uv_physnodeaddr(pqp));
694 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_TAIL,
695 uv_physnodeaddr(pqp));
696 bau_tablesp->va_queue_last =
697 pqp + (DESTINATION_PAYLOAD_QUEUE_SIZE - 1);
698 uv_write_global_mmr64(pnode, UVH_LB_BAU_INTD_PAYLOAD_QUEUE_LAST,
699 (unsigned long)
700 uv_physnodeaddr(bau_tablesp->
701 va_queue_last));
702 memset(pqp, 0, sizeof(struct bau_payload_queue_entry) *
703 DESTINATION_PAYLOAD_QUEUE_SIZE);
704
705 /* this initialization can't be in firmware because the
706 messaging IRQ will be determined by the OS */
707 apicid = per_cpu(x86_cpu_to_apicid, cur_cpu);
708 pa = uv_read_global_mmr64(pnode, UVH_BAU_DATA_CONFIG);
709 if ((pa & 0xff) != UV_BAU_MESSAGE) {
710 uv_write_global_mmr64(pnode, UVH_BAU_DATA_CONFIG,
711 ((apicid << 32) |
712 UV_BAU_MESSAGE));
713 }
714
715 for (j = cur_cpu; j < (cur_cpu + uv_blade_nr_possible_cpus(i));
716 j++) {
717 bcp = (struct bau_control *)&per_cpu(bau_control, j);
718 bcp->bau_msg_head = bau_tablesp->va_queue_first;
719 bcp->va_queue_first = bau_tablesp->va_queue_first;
720
721 bcp->va_queue_last = bau_tablesp->va_queue_last;
722 bcp->watching = bau_tablesp->watching;
723 bcp->msg_statuses = bau_tablesp->msg_statuses;
724 bcp->descriptor_base = adp;
725 }
726 cur_cpu += uv_blade_nr_possible_cpus(i);
727 }
728
729 set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1);
730
731 uv_enable_timeouts();
732
733 return 0;
734}
735
736__initcall(uv_bau_init);