aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-16 13:45:23 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-16 13:45:23 -0400
commit02b2318e07f98a7cdf7089a4457a8d62424aa824 (patch)
treeb40353a9ee6b034e21192ceb5df445fbc5fbdd32 /arch
parentb91cba52e9b7b3f1c0037908a192d93a869ca9e5 (diff)
parentd54bc2793ec3405c6b8f217568a82b87bd8a591b (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6
* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6: (26 commits) [SPARC64]: Fix UP build. [SPARC64]: dr-cpu unconfigure support. [SERIAL]: Fix console write locking in sparc drivers. [SPARC64]: Give more accurate errors in dr_cpu_configure(). [SPARC64]: Clear cpu_{core,sibling}_map[] in smp_fill_in_sib_core_maps() [SPARC64]: Fix leak when DR added cpu does not bootup. [SPARC64]: Add ->set_affinity IRQ handlers. [SPARC64]: Process dr-cpu events in a kthread instead of workqueue. [SPARC64]: More sensible udelay implementation. [SPARC64]: SMP build fixes. [SPARC64]: mdesc.c needs linux/mm.h [SPARC64]: Fix build regressions added by dr-cpu changes. [SPARC64]: Unconditionally register vio_bus_type. [SPARC64]: Initial LDOM cpu hotplug support. [SPARC64]: Fix setting of variables in LDOM guest. [SPARC64]: Fix MD property lifetime bugs. [SPARC64]: Abstract out mdesc accesses for better MD update handling. [SPARC64]: Use more mearningful names for IRQ registry. [SPARC64]: Initial domain-services driver. [SPARC64]: Export powerd facilities for external entities. ...
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/Kconfig15
-rw-r--r--arch/sparc64/kernel/Makefile3
-rw-r--r--arch/sparc64/kernel/ds.c1158
-rw-r--r--arch/sparc64/kernel/hvtramp.S139
-rw-r--r--arch/sparc64/kernel/irq.c84
-rw-r--r--arch/sparc64/kernel/ldc.c2373
-rw-r--r--arch/sparc64/kernel/mdesc.c698
-rw-r--r--arch/sparc64/kernel/power.c54
-rw-r--r--arch/sparc64/kernel/process.c21
-rw-r--r--arch/sparc64/kernel/prom.c2
-rw-r--r--arch/sparc64/kernel/setup.c5
-rw-r--r--arch/sparc64/kernel/smp.c251
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c16
-rw-r--r--arch/sparc64/kernel/sysfs.c2
-rw-r--r--arch/sparc64/kernel/time.c28
-rw-r--r--arch/sparc64/kernel/vio.c395
-rw-r--r--arch/sparc64/kernel/viohs.c792
-rw-r--r--arch/sparc64/lib/Makefile2
-rw-r--r--arch/sparc64/lib/delay.c46
-rw-r--r--arch/sparc64/prom/misc.c13
-rw-r--r--arch/sparc64/prom/p1275.c1
-rw-r--r--arch/sparc64/prom/tree.c13
22 files changed, 5655 insertions, 456 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 6566d13db04f..b84b6af1241e 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -108,6 +108,15 @@ config SECCOMP
108 108
109source kernel/Kconfig.hz 109source kernel/Kconfig.hz
110 110
111config HOTPLUG_CPU
112 bool "Support for hot-pluggable CPUs"
113 depends on SMP
114 select HOTPLUG
115 ---help---
116 Say Y here to experiment with turning CPUs off and on. CPUs
117 can be controlled through /sys/devices/system/cpu/cpu#.
118 Say N if you want to disable CPU hotplug.
119
111source "init/Kconfig" 120source "init/Kconfig"
112 121
113config SYSVIPC_COMPAT 122config SYSVIPC_COMPAT
@@ -305,6 +314,12 @@ config SUN_IO
305 bool 314 bool
306 default y 315 default y
307 316
317config SUN_LDOMS
318 bool "Sun Logical Domains support"
319 help
320 Say Y here is you want to support virtual devices via
321 Logical Domains.
322
308config PCI 323config PCI
309 bool "PCI support" 324 bool "PCI support"
310 select ARCH_SUPPORTS_MSI 325 select ARCH_SUPPORTS_MSI
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index f964bf28d21a..b66876bf410c 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
18obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ 18obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \
19 pci_psycho.o pci_sabre.o pci_schizo.o \ 19 pci_psycho.o pci_sabre.o pci_schizo.o \
20 pci_sun4v.o pci_sun4v_asm.o pci_fire.o 20 pci_sun4v.o pci_sun4v_asm.o pci_fire.o
21obj-$(CONFIG_SMP) += smp.o trampoline.o 21obj-$(CONFIG_SMP) += smp.o trampoline.o hvtramp.o
22obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o 22obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o
23obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o 23obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o
24obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o 24obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o
@@ -26,6 +26,7 @@ obj-$(CONFIG_MODULES) += module.o
26obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o 26obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
27obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o 27obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
28obj-$(CONFIG_KPROBES) += kprobes.o 28obj-$(CONFIG_KPROBES) += kprobes.o
29obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
29obj-$(CONFIG_AUDIT) += audit.o 30obj-$(CONFIG_AUDIT) += audit.o
30obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o 31obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o
31obj-y += $(obj-yy) 32obj-y += $(obj-yy)
diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c
new file mode 100644
index 000000000000..1c587107cef0
--- /dev/null
+++ b/arch/sparc64/kernel/ds.c
@@ -0,0 +1,1158 @@
1/* ds.c: Domain Services driver for Logical Domains
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/types.h>
9#include <linux/module.h>
10#include <linux/string.h>
11#include <linux/slab.h>
12#include <linux/sched.h>
13#include <linux/delay.h>
14#include <linux/mutex.h>
15#include <linux/kthread.h>
16#include <linux/cpu.h>
17
18#include <asm/ldc.h>
19#include <asm/vio.h>
20#include <asm/power.h>
21#include <asm/mdesc.h>
22#include <asm/head.h>
23#include <asm/irq.h>
24
25#define DRV_MODULE_NAME "ds"
26#define PFX DRV_MODULE_NAME ": "
27#define DRV_MODULE_VERSION "1.0"
28#define DRV_MODULE_RELDATE "Jul 11, 2007"
29
30static char version[] __devinitdata =
31 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
32MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
33MODULE_DESCRIPTION("Sun LDOM domain services driver");
34MODULE_LICENSE("GPL");
35MODULE_VERSION(DRV_MODULE_VERSION);
36
37struct ds_msg_tag {
38 __u32 type;
39#define DS_INIT_REQ 0x00
40#define DS_INIT_ACK 0x01
41#define DS_INIT_NACK 0x02
42#define DS_REG_REQ 0x03
43#define DS_REG_ACK 0x04
44#define DS_REG_NACK 0x05
45#define DS_UNREG_REQ 0x06
46#define DS_UNREG_ACK 0x07
47#define DS_UNREG_NACK 0x08
48#define DS_DATA 0x09
49#define DS_NACK 0x0a
50
51 __u32 len;
52};
53
54/* Result codes */
55#define DS_OK 0x00
56#define DS_REG_VER_NACK 0x01
57#define DS_REG_DUP 0x02
58#define DS_INV_HDL 0x03
59#define DS_TYPE_UNKNOWN 0x04
60
61struct ds_version {
62 __u16 major;
63 __u16 minor;
64};
65
66struct ds_ver_req {
67 struct ds_msg_tag tag;
68 struct ds_version ver;
69};
70
71struct ds_ver_ack {
72 struct ds_msg_tag tag;
73 __u16 minor;
74};
75
76struct ds_ver_nack {
77 struct ds_msg_tag tag;
78 __u16 major;
79};
80
81struct ds_reg_req {
82 struct ds_msg_tag tag;
83 __u64 handle;
84 __u16 major;
85 __u16 minor;
86 char svc_id[0];
87};
88
89struct ds_reg_ack {
90 struct ds_msg_tag tag;
91 __u64 handle;
92 __u16 minor;
93};
94
95struct ds_reg_nack {
96 struct ds_msg_tag tag;
97 __u64 handle;
98 __u16 major;
99};
100
101struct ds_unreg_req {
102 struct ds_msg_tag tag;
103 __u64 handle;
104};
105
106struct ds_unreg_ack {
107 struct ds_msg_tag tag;
108 __u64 handle;
109};
110
111struct ds_unreg_nack {
112 struct ds_msg_tag tag;
113 __u64 handle;
114};
115
116struct ds_data {
117 struct ds_msg_tag tag;
118 __u64 handle;
119};
120
121struct ds_data_nack {
122 struct ds_msg_tag tag;
123 __u64 handle;
124 __u64 result;
125};
126
127struct ds_cap_state {
128 __u64 handle;
129
130 void (*data)(struct ldc_channel *lp,
131 struct ds_cap_state *cp,
132 void *buf, int len);
133
134 const char *service_id;
135
136 u8 state;
137#define CAP_STATE_UNKNOWN 0x00
138#define CAP_STATE_REG_SENT 0x01
139#define CAP_STATE_REGISTERED 0x02
140};
141
142static void md_update_data(struct ldc_channel *lp, struct ds_cap_state *cp,
143 void *buf, int len);
144static void domain_shutdown_data(struct ldc_channel *lp,
145 struct ds_cap_state *cp,
146 void *buf, int len);
147static void domain_panic_data(struct ldc_channel *lp,
148 struct ds_cap_state *cp,
149 void *buf, int len);
150#ifdef CONFIG_HOTPLUG_CPU
151static void dr_cpu_data(struct ldc_channel *lp,
152 struct ds_cap_state *cp,
153 void *buf, int len);
154#endif
155static void ds_pri_data(struct ldc_channel *lp,
156 struct ds_cap_state *cp,
157 void *buf, int len);
158static void ds_var_data(struct ldc_channel *lp,
159 struct ds_cap_state *cp,
160 void *buf, int len);
161
162struct ds_cap_state ds_states[] = {
163 {
164 .service_id = "md-update",
165 .data = md_update_data,
166 },
167 {
168 .service_id = "domain-shutdown",
169 .data = domain_shutdown_data,
170 },
171 {
172 .service_id = "domain-panic",
173 .data = domain_panic_data,
174 },
175#ifdef CONFIG_HOTPLUG_CPU
176 {
177 .service_id = "dr-cpu",
178 .data = dr_cpu_data,
179 },
180#endif
181 {
182 .service_id = "pri",
183 .data = ds_pri_data,
184 },
185 {
186 .service_id = "var-config",
187 .data = ds_var_data,
188 },
189 {
190 .service_id = "var-config-backup",
191 .data = ds_var_data,
192 },
193};
194
195static DEFINE_SPINLOCK(ds_lock);
196
197struct ds_info {
198 struct ldc_channel *lp;
199 u8 hs_state;
200#define DS_HS_START 0x01
201#define DS_HS_DONE 0x02
202
203 void *rcv_buf;
204 int rcv_buf_len;
205};
206
207static struct ds_info *ds_info;
208
209static struct ds_cap_state *find_cap(u64 handle)
210{
211 unsigned int index = handle >> 32;
212
213 if (index >= ARRAY_SIZE(ds_states))
214 return NULL;
215 return &ds_states[index];
216}
217
218static struct ds_cap_state *find_cap_by_string(const char *name)
219{
220 int i;
221
222 for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
223 if (strcmp(ds_states[i].service_id, name))
224 continue;
225
226 return &ds_states[i];
227 }
228 return NULL;
229}
230
231static int ds_send(struct ldc_channel *lp, void *data, int len)
232{
233 int err, limit = 1000;
234
235 err = -EINVAL;
236 while (limit-- > 0) {
237 err = ldc_write(lp, data, len);
238 if (!err || (err != -EAGAIN))
239 break;
240 udelay(1);
241 }
242
243 return err;
244}
245
246struct ds_md_update_req {
247 __u64 req_num;
248};
249
250struct ds_md_update_res {
251 __u64 req_num;
252 __u32 result;
253};
254
255static void md_update_data(struct ldc_channel *lp,
256 struct ds_cap_state *dp,
257 void *buf, int len)
258{
259 struct ds_data *dpkt = buf;
260 struct ds_md_update_req *rp;
261 struct {
262 struct ds_data data;
263 struct ds_md_update_res res;
264 } pkt;
265
266 rp = (struct ds_md_update_req *) (dpkt + 1);
267
268 printk(KERN_INFO PFX "Machine description update.\n");
269
270 memset(&pkt, 0, sizeof(pkt));
271 pkt.data.tag.type = DS_DATA;
272 pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
273 pkt.data.handle = dp->handle;
274 pkt.res.req_num = rp->req_num;
275 pkt.res.result = DS_OK;
276
277 ds_send(lp, &pkt, sizeof(pkt));
278
279 mdesc_update();
280}
281
282struct ds_shutdown_req {
283 __u64 req_num;
284 __u32 ms_delay;
285};
286
287struct ds_shutdown_res {
288 __u64 req_num;
289 __u32 result;
290 char reason[1];
291};
292
293static void domain_shutdown_data(struct ldc_channel *lp,
294 struct ds_cap_state *dp,
295 void *buf, int len)
296{
297 struct ds_data *dpkt = buf;
298 struct ds_shutdown_req *rp;
299 struct {
300 struct ds_data data;
301 struct ds_shutdown_res res;
302 } pkt;
303
304 rp = (struct ds_shutdown_req *) (dpkt + 1);
305
306 printk(KERN_ALERT PFX "Shutdown request from "
307 "LDOM manager received.\n");
308
309 memset(&pkt, 0, sizeof(pkt));
310 pkt.data.tag.type = DS_DATA;
311 pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
312 pkt.data.handle = dp->handle;
313 pkt.res.req_num = rp->req_num;
314 pkt.res.result = DS_OK;
315 pkt.res.reason[0] = 0;
316
317 ds_send(lp, &pkt, sizeof(pkt));
318
319 wake_up_powerd();
320}
321
322struct ds_panic_req {
323 __u64 req_num;
324};
325
326struct ds_panic_res {
327 __u64 req_num;
328 __u32 result;
329 char reason[1];
330};
331
332static void domain_panic_data(struct ldc_channel *lp,
333 struct ds_cap_state *dp,
334 void *buf, int len)
335{
336 struct ds_data *dpkt = buf;
337 struct ds_panic_req *rp;
338 struct {
339 struct ds_data data;
340 struct ds_panic_res res;
341 } pkt;
342
343 rp = (struct ds_panic_req *) (dpkt + 1);
344
345 printk(KERN_ALERT PFX "Panic request from "
346 "LDOM manager received.\n");
347
348 memset(&pkt, 0, sizeof(pkt));
349 pkt.data.tag.type = DS_DATA;
350 pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
351 pkt.data.handle = dp->handle;
352 pkt.res.req_num = rp->req_num;
353 pkt.res.result = DS_OK;
354 pkt.res.reason[0] = 0;
355
356 ds_send(lp, &pkt, sizeof(pkt));
357
358 panic("PANIC requested by LDOM manager.");
359}
360
361#ifdef CONFIG_HOTPLUG_CPU
362struct dr_cpu_tag {
363 __u64 req_num;
364 __u32 type;
365#define DR_CPU_CONFIGURE 0x43
366#define DR_CPU_UNCONFIGURE 0x55
367#define DR_CPU_FORCE_UNCONFIGURE 0x46
368#define DR_CPU_STATUS 0x53
369
370/* Responses */
371#define DR_CPU_OK 0x6f
372#define DR_CPU_ERROR 0x65
373
374 __u32 num_records;
375};
376
377struct dr_cpu_resp_entry {
378 __u32 cpu;
379 __u32 result;
380#define DR_CPU_RES_OK 0x00
381#define DR_CPU_RES_FAILURE 0x01
382#define DR_CPU_RES_BLOCKED 0x02
383#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03
384#define DR_CPU_RES_NOT_IN_MD 0x04
385
386 __u32 stat;
387#define DR_CPU_STAT_NOT_PRESENT 0x00
388#define DR_CPU_STAT_UNCONFIGURED 0x01
389#define DR_CPU_STAT_CONFIGURED 0x02
390
391 __u32 str_off;
392};
393
394/* DR cpu requests get queued onto the work list by the
395 * dr_cpu_data() callback. The list is protected by
396 * ds_lock, and processed by dr_cpu_process() in order.
397 */
398static LIST_HEAD(dr_cpu_work_list);
399static DECLARE_WAIT_QUEUE_HEAD(dr_cpu_wait);
400
401struct dr_cpu_queue_entry {
402 struct list_head list;
403 char req[0];
404};
405
406static void __dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
407{
408 struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
409 struct ds_info *dp = ds_info;
410 struct {
411 struct ds_data data;
412 struct dr_cpu_tag tag;
413 } pkt;
414 int msg_len;
415
416 memset(&pkt, 0, sizeof(pkt));
417 pkt.data.tag.type = DS_DATA;
418 pkt.data.handle = cp->handle;
419 pkt.tag.req_num = tag->req_num;
420 pkt.tag.type = DR_CPU_ERROR;
421 pkt.tag.num_records = 0;
422
423 msg_len = (sizeof(struct ds_data) +
424 sizeof(struct dr_cpu_tag));
425
426 pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
427
428 ds_send(dp->lp, &pkt, msg_len);
429}
430
431static void dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
432{
433 unsigned long flags;
434
435 spin_lock_irqsave(&ds_lock, flags);
436 __dr_cpu_send_error(cp, data);
437 spin_unlock_irqrestore(&ds_lock, flags);
438}
439
440#define CPU_SENTINEL 0xffffffff
441
442static void purge_dups(u32 *list, u32 num_ents)
443{
444 unsigned int i;
445
446 for (i = 0; i < num_ents; i++) {
447 u32 cpu = list[i];
448 unsigned int j;
449
450 if (cpu == CPU_SENTINEL)
451 continue;
452
453 for (j = i + 1; j < num_ents; j++) {
454 if (list[j] == cpu)
455 list[j] = CPU_SENTINEL;
456 }
457 }
458}
459
460static int dr_cpu_size_response(int ncpus)
461{
462 return (sizeof(struct ds_data) +
463 sizeof(struct dr_cpu_tag) +
464 (sizeof(struct dr_cpu_resp_entry) * ncpus));
465}
466
467static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
468 u64 handle, int resp_len, int ncpus,
469 cpumask_t *mask, u32 default_stat)
470{
471 struct dr_cpu_resp_entry *ent;
472 struct dr_cpu_tag *tag;
473 int i, cpu;
474
475 tag = (struct dr_cpu_tag *) (resp + 1);
476 ent = (struct dr_cpu_resp_entry *) (tag + 1);
477
478 resp->tag.type = DS_DATA;
479 resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
480 resp->handle = handle;
481 tag->req_num = req_num;
482 tag->type = DR_CPU_OK;
483 tag->num_records = ncpus;
484
485 i = 0;
486 for_each_cpu_mask(cpu, *mask) {
487 ent[i].cpu = cpu;
488 ent[i].result = DR_CPU_RES_OK;
489 ent[i].stat = default_stat;
490 i++;
491 }
492 BUG_ON(i != ncpus);
493}
494
495static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
496 u32 res, u32 stat)
497{
498 struct dr_cpu_resp_entry *ent;
499 struct dr_cpu_tag *tag;
500 int i;
501
502 tag = (struct dr_cpu_tag *) (resp + 1);
503 ent = (struct dr_cpu_resp_entry *) (tag + 1);
504
505 for (i = 0; i < ncpus; i++) {
506 if (ent[i].cpu != cpu)
507 continue;
508 ent[i].result = res;
509 ent[i].stat = stat;
510 break;
511 }
512}
513
514static int dr_cpu_configure(struct ds_cap_state *cp, u64 req_num,
515 cpumask_t *mask)
516{
517 struct ds_data *resp;
518 int resp_len, ncpus, cpu;
519 unsigned long flags;
520
521 ncpus = cpus_weight(*mask);
522 resp_len = dr_cpu_size_response(ncpus);
523 resp = kzalloc(resp_len, GFP_KERNEL);
524 if (!resp)
525 return -ENOMEM;
526
527 dr_cpu_init_response(resp, req_num, cp->handle,
528 resp_len, ncpus, mask,
529 DR_CPU_STAT_CONFIGURED);
530
531 mdesc_fill_in_cpu_data(*mask);
532
533 for_each_cpu_mask(cpu, *mask) {
534 int err;
535
536 printk(KERN_INFO PFX "Starting cpu %d...\n", cpu);
537 err = cpu_up(cpu);
538 if (err) {
539 __u32 res = DR_CPU_RES_FAILURE;
540 __u32 stat = DR_CPU_STAT_UNCONFIGURED;
541
542 if (!cpu_present(cpu)) {
543 /* CPU not present in MD */
544 res = DR_CPU_RES_NOT_IN_MD;
545 stat = DR_CPU_STAT_NOT_PRESENT;
546 } else if (err == -ENODEV) {
547 /* CPU did not call in successfully */
548 res = DR_CPU_RES_CPU_NOT_RESPONDING;
549 }
550
551 printk(KERN_INFO PFX "CPU startup failed err=%d\n",
552 err);
553 dr_cpu_mark(resp, cpu, ncpus, res, stat);
554 }
555 }
556
557 spin_lock_irqsave(&ds_lock, flags);
558 ds_send(ds_info->lp, resp, resp_len);
559 spin_unlock_irqrestore(&ds_lock, flags);
560
561 kfree(resp);
562
563 /* Redistribute IRQs, taking into account the new cpus. */
564 fixup_irqs();
565
566 return 0;
567}
568
569static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num,
570 cpumask_t *mask)
571{
572 struct ds_data *resp;
573 int resp_len, ncpus, cpu;
574 unsigned long flags;
575
576 ncpus = cpus_weight(*mask);
577 resp_len = dr_cpu_size_response(ncpus);
578 resp = kzalloc(resp_len, GFP_KERNEL);
579 if (!resp)
580 return -ENOMEM;
581
582 dr_cpu_init_response(resp, req_num, cp->handle,
583 resp_len, ncpus, mask,
584 DR_CPU_STAT_UNCONFIGURED);
585
586 for_each_cpu_mask(cpu, *mask) {
587 int err;
588
589 printk(KERN_INFO PFX "CPU[%d]: Shutting down cpu %d...\n",
590 smp_processor_id(), cpu);
591 err = cpu_down(cpu);
592 if (err)
593 dr_cpu_mark(resp, cpu, ncpus,
594 DR_CPU_RES_FAILURE,
595 DR_CPU_STAT_CONFIGURED);
596 }
597
598 spin_lock_irqsave(&ds_lock, flags);
599 ds_send(ds_info->lp, resp, resp_len);
600 spin_unlock_irqrestore(&ds_lock, flags);
601
602 kfree(resp);
603
604 return 0;
605}
606
607static void process_dr_cpu_list(struct ds_cap_state *cp)
608{
609 struct dr_cpu_queue_entry *qp, *tmp;
610 unsigned long flags;
611 LIST_HEAD(todo);
612 cpumask_t mask;
613
614 spin_lock_irqsave(&ds_lock, flags);
615 list_splice(&dr_cpu_work_list, &todo);
616 INIT_LIST_HEAD(&dr_cpu_work_list);
617 spin_unlock_irqrestore(&ds_lock, flags);
618
619 list_for_each_entry_safe(qp, tmp, &todo, list) {
620 struct ds_data *data = (struct ds_data *) qp->req;
621 struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
622 u32 *cpu_list = (u32 *) (tag + 1);
623 u64 req_num = tag->req_num;
624 unsigned int i;
625 int err;
626
627 switch (tag->type) {
628 case DR_CPU_CONFIGURE:
629 case DR_CPU_UNCONFIGURE:
630 case DR_CPU_FORCE_UNCONFIGURE:
631 break;
632
633 default:
634 dr_cpu_send_error(cp, data);
635 goto next;
636 }
637
638 purge_dups(cpu_list, tag->num_records);
639
640 cpus_clear(mask);
641 for (i = 0; i < tag->num_records; i++) {
642 if (cpu_list[i] == CPU_SENTINEL)
643 continue;
644
645 if (cpu_list[i] < NR_CPUS)
646 cpu_set(cpu_list[i], mask);
647 }
648
649 if (tag->type == DR_CPU_CONFIGURE)
650 err = dr_cpu_configure(cp, req_num, &mask);
651 else
652 err = dr_cpu_unconfigure(cp, req_num, &mask);
653
654 if (err)
655 dr_cpu_send_error(cp, data);
656
657next:
658 list_del(&qp->list);
659 kfree(qp);
660 }
661}
662
663static int dr_cpu_thread(void *__unused)
664{
665 struct ds_cap_state *cp;
666 DEFINE_WAIT(wait);
667
668 cp = find_cap_by_string("dr-cpu");
669
670 while (1) {
671 prepare_to_wait(&dr_cpu_wait, &wait, TASK_INTERRUPTIBLE);
672 if (list_empty(&dr_cpu_work_list))
673 schedule();
674 finish_wait(&dr_cpu_wait, &wait);
675
676 if (kthread_should_stop())
677 break;
678
679 process_dr_cpu_list(cp);
680 }
681
682 return 0;
683}
684
685static void dr_cpu_data(struct ldc_channel *lp,
686 struct ds_cap_state *dp,
687 void *buf, int len)
688{
689 struct dr_cpu_queue_entry *qp;
690 struct ds_data *dpkt = buf;
691 struct dr_cpu_tag *rp;
692
693 rp = (struct dr_cpu_tag *) (dpkt + 1);
694
695 qp = kmalloc(sizeof(struct dr_cpu_queue_entry) + len, GFP_ATOMIC);
696 if (!qp) {
697 struct ds_cap_state *cp;
698
699 cp = find_cap_by_string("dr-cpu");
700 __dr_cpu_send_error(cp, dpkt);
701 } else {
702 memcpy(&qp->req, buf, len);
703 list_add_tail(&qp->list, &dr_cpu_work_list);
704 wake_up(&dr_cpu_wait);
705 }
706}
707#endif
708
709struct ds_pri_msg {
710 __u64 req_num;
711 __u64 type;
712#define DS_PRI_REQUEST 0x00
713#define DS_PRI_DATA 0x01
714#define DS_PRI_UPDATE 0x02
715};
716
717static void ds_pri_data(struct ldc_channel *lp,
718 struct ds_cap_state *dp,
719 void *buf, int len)
720{
721 struct ds_data *dpkt = buf;
722 struct ds_pri_msg *rp;
723
724 rp = (struct ds_pri_msg *) (dpkt + 1);
725
726 printk(KERN_INFO PFX "PRI REQ [%lx:%lx], len=%d\n",
727 rp->req_num, rp->type, len);
728}
729
730struct ds_var_hdr {
731 __u32 type;
732#define DS_VAR_SET_REQ 0x00
733#define DS_VAR_DELETE_REQ 0x01
734#define DS_VAR_SET_RESP 0x02
735#define DS_VAR_DELETE_RESP 0x03
736};
737
738struct ds_var_set_msg {
739 struct ds_var_hdr hdr;
740 char name_and_value[0];
741};
742
743struct ds_var_delete_msg {
744 struct ds_var_hdr hdr;
745 char name[0];
746};
747
748struct ds_var_resp {
749 struct ds_var_hdr hdr;
750 __u32 result;
751#define DS_VAR_SUCCESS 0x00
752#define DS_VAR_NO_SPACE 0x01
753#define DS_VAR_INVALID_VAR 0x02
754#define DS_VAR_INVALID_VAL 0x03
755#define DS_VAR_NOT_PRESENT 0x04
756};
757
758static DEFINE_MUTEX(ds_var_mutex);
759static int ds_var_doorbell;
760static int ds_var_response;
761
762static void ds_var_data(struct ldc_channel *lp,
763 struct ds_cap_state *dp,
764 void *buf, int len)
765{
766 struct ds_data *dpkt = buf;
767 struct ds_var_resp *rp;
768
769 rp = (struct ds_var_resp *) (dpkt + 1);
770
771 if (rp->hdr.type != DS_VAR_SET_RESP &&
772 rp->hdr.type != DS_VAR_DELETE_RESP)
773 return;
774
775 ds_var_response = rp->result;
776 wmb();
777 ds_var_doorbell = 1;
778}
779
780void ldom_set_var(const char *var, const char *value)
781{
782 struct ds_info *dp = ds_info;
783 struct ds_cap_state *cp;
784
785 cp = find_cap_by_string("var-config");
786 if (cp->state != CAP_STATE_REGISTERED)
787 cp = find_cap_by_string("var-config-backup");
788
789 if (cp->state == CAP_STATE_REGISTERED) {
790 union {
791 struct {
792 struct ds_data data;
793 struct ds_var_set_msg msg;
794 } header;
795 char all[512];
796 } pkt;
797 unsigned long flags;
798 char *base, *p;
799 int msg_len, loops;
800
801 memset(&pkt, 0, sizeof(pkt));
802 pkt.header.data.tag.type = DS_DATA;
803 pkt.header.data.handle = cp->handle;
804 pkt.header.msg.hdr.type = DS_VAR_SET_REQ;
805 base = p = &pkt.header.msg.name_and_value[0];
806 strcpy(p, var);
807 p += strlen(var) + 1;
808 strcpy(p, value);
809 p += strlen(value) + 1;
810
811 msg_len = (sizeof(struct ds_data) +
812 sizeof(struct ds_var_set_msg) +
813 (p - base));
814 msg_len = (msg_len + 3) & ~3;
815 pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
816
817 mutex_lock(&ds_var_mutex);
818
819 spin_lock_irqsave(&ds_lock, flags);
820 ds_var_doorbell = 0;
821 ds_var_response = -1;
822
823 ds_send(dp->lp, &pkt, msg_len);
824 spin_unlock_irqrestore(&ds_lock, flags);
825
826 loops = 1000;
827 while (ds_var_doorbell == 0) {
828 if (loops-- < 0)
829 break;
830 barrier();
831 udelay(100);
832 }
833
834 mutex_unlock(&ds_var_mutex);
835
836 if (ds_var_doorbell == 0 ||
837 ds_var_response != DS_VAR_SUCCESS)
838 printk(KERN_ERR PFX "var-config [%s:%s] "
839 "failed, response(%d).\n",
840 var, value,
841 ds_var_response);
842 } else {
843 printk(KERN_ERR PFX "var-config not registered so "
844 "could not set (%s) variable to (%s).\n",
845 var, value);
846 }
847}
848
849void ldom_reboot(const char *boot_command)
850{
851 /* Don't bother with any of this if the boot_command
852 * is empty.
853 */
854 if (boot_command && strlen(boot_command)) {
855 char full_boot_str[256];
856
857 strcpy(full_boot_str, "boot ");
858 strcpy(full_boot_str + strlen("boot "), boot_command);
859
860 ldom_set_var("reboot-command", full_boot_str);
861 }
862 sun4v_mach_sir();
863}
864
865void ldom_power_off(void)
866{
867 sun4v_mach_exit(0);
868}
869
870static void ds_conn_reset(struct ds_info *dp)
871{
872 printk(KERN_ERR PFX "ds_conn_reset() from %p\n",
873 __builtin_return_address(0));
874}
875
876static int register_services(struct ds_info *dp)
877{
878 struct ldc_channel *lp = dp->lp;
879 int i;
880
881 for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
882 struct {
883 struct ds_reg_req req;
884 u8 id_buf[256];
885 } pbuf;
886 struct ds_cap_state *cp = &ds_states[i];
887 int err, msg_len;
888 u64 new_count;
889
890 if (cp->state == CAP_STATE_REGISTERED)
891 continue;
892
893 new_count = sched_clock() & 0xffffffff;
894 cp->handle = ((u64) i << 32) | new_count;
895
896 msg_len = (sizeof(struct ds_reg_req) +
897 strlen(cp->service_id));
898
899 memset(&pbuf, 0, sizeof(pbuf));
900 pbuf.req.tag.type = DS_REG_REQ;
901 pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag));
902 pbuf.req.handle = cp->handle;
903 pbuf.req.major = 1;
904 pbuf.req.minor = 0;
905 strcpy(pbuf.req.svc_id, cp->service_id);
906
907 err = ds_send(lp, &pbuf, msg_len);
908 if (err > 0)
909 cp->state = CAP_STATE_REG_SENT;
910 }
911 return 0;
912}
913
914static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt)
915{
916
917 if (dp->hs_state == DS_HS_START) {
918 if (pkt->type != DS_INIT_ACK)
919 goto conn_reset;
920
921 dp->hs_state = DS_HS_DONE;
922
923 return register_services(dp);
924 }
925
926 if (dp->hs_state != DS_HS_DONE)
927 goto conn_reset;
928
929 if (pkt->type == DS_REG_ACK) {
930 struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt;
931 struct ds_cap_state *cp = find_cap(ap->handle);
932
933 if (!cp) {
934 printk(KERN_ERR PFX "REG ACK for unknown handle %lx\n",
935 ap->handle);
936 return 0;
937 }
938 printk(KERN_INFO PFX "Registered %s service.\n",
939 cp->service_id);
940 cp->state = CAP_STATE_REGISTERED;
941 } else if (pkt->type == DS_REG_NACK) {
942 struct ds_reg_nack *np = (struct ds_reg_nack *) pkt;
943 struct ds_cap_state *cp = find_cap(np->handle);
944
945 if (!cp) {
946 printk(KERN_ERR PFX "REG NACK for "
947 "unknown handle %lx\n",
948 np->handle);
949 return 0;
950 }
951 printk(KERN_INFO PFX "Could not register %s service\n",
952 cp->service_id);
953 cp->state = CAP_STATE_UNKNOWN;
954 }
955
956 return 0;
957
958conn_reset:
959 ds_conn_reset(dp);
960 return -ECONNRESET;
961}
962
963static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len)
964{
965 struct ds_data *dpkt = (struct ds_data *) pkt;
966 struct ds_cap_state *cp = find_cap(dpkt->handle);
967
968 if (!cp) {
969 struct ds_data_nack nack = {
970 .tag = {
971 .type = DS_NACK,
972 .len = (sizeof(struct ds_data_nack) -
973 sizeof(struct ds_msg_tag)),
974 },
975 .handle = dpkt->handle,
976 .result = DS_INV_HDL,
977 };
978
979 printk(KERN_ERR PFX "Data for unknown handle %lu\n",
980 dpkt->handle);
981 ds_send(dp->lp, &nack, sizeof(nack));
982 } else {
983 cp->data(dp->lp, cp, dpkt, len);
984 }
985 return 0;
986}
987
988static void ds_up(struct ds_info *dp)
989{
990 struct ldc_channel *lp = dp->lp;
991 struct ds_ver_req req;
992 int err;
993
994 req.tag.type = DS_INIT_REQ;
995 req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag);
996 req.ver.major = 1;
997 req.ver.minor = 0;
998
999 err = ds_send(lp, &req, sizeof(req));
1000 if (err > 0)
1001 dp->hs_state = DS_HS_START;
1002}
1003
1004static void ds_event(void *arg, int event)
1005{
1006 struct ds_info *dp = arg;
1007 struct ldc_channel *lp = dp->lp;
1008 unsigned long flags;
1009 int err;
1010
1011 spin_lock_irqsave(&ds_lock, flags);
1012
1013 if (event == LDC_EVENT_UP) {
1014 ds_up(dp);
1015 spin_unlock_irqrestore(&ds_lock, flags);
1016 return;
1017 }
1018
1019 if (event != LDC_EVENT_DATA_READY) {
1020 printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event);
1021 spin_unlock_irqrestore(&ds_lock, flags);
1022 return;
1023 }
1024
1025 err = 0;
1026 while (1) {
1027 struct ds_msg_tag *tag;
1028
1029 err = ldc_read(lp, dp->rcv_buf, sizeof(*tag));
1030
1031 if (unlikely(err < 0)) {
1032 if (err == -ECONNRESET)
1033 ds_conn_reset(dp);
1034 break;
1035 }
1036 if (err == 0)
1037 break;
1038
1039 tag = dp->rcv_buf;
1040 err = ldc_read(lp, tag + 1, tag->len);
1041
1042 if (unlikely(err < 0)) {
1043 if (err == -ECONNRESET)
1044 ds_conn_reset(dp);
1045 break;
1046 }
1047 if (err < tag->len)
1048 break;
1049
1050 if (tag->type < DS_DATA)
1051 err = ds_handshake(dp, dp->rcv_buf);
1052 else
1053 err = ds_data(dp, dp->rcv_buf,
1054 sizeof(*tag) + err);
1055 if (err == -ECONNRESET)
1056 break;
1057 }
1058
1059 spin_unlock_irqrestore(&ds_lock, flags);
1060}
1061
1062static int __devinit ds_probe(struct vio_dev *vdev,
1063 const struct vio_device_id *id)
1064{
1065 static int ds_version_printed;
1066 struct ldc_channel_config ds_cfg = {
1067 .event = ds_event,
1068 .mtu = 4096,
1069 .mode = LDC_MODE_STREAM,
1070 };
1071 struct ldc_channel *lp;
1072 struct ds_info *dp;
1073 int err;
1074
1075 if (ds_version_printed++ == 0)
1076 printk(KERN_INFO "%s", version);
1077
1078 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1079 err = -ENOMEM;
1080 if (!dp)
1081 goto out_err;
1082
1083 dp->rcv_buf = kzalloc(4096, GFP_KERNEL);
1084 if (!dp->rcv_buf)
1085 goto out_free_dp;
1086
1087 dp->rcv_buf_len = 4096;
1088
1089 ds_cfg.tx_irq = vdev->tx_irq;
1090 ds_cfg.rx_irq = vdev->rx_irq;
1091
1092 lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
1093 if (IS_ERR(lp)) {
1094 err = PTR_ERR(lp);
1095 goto out_free_rcv_buf;
1096 }
1097 dp->lp = lp;
1098
1099 err = ldc_bind(lp, "DS");
1100 if (err)
1101 goto out_free_ldc;
1102
1103 ds_info = dp;
1104
1105 start_powerd();
1106
1107 return err;
1108
1109out_free_ldc:
1110 ldc_free(dp->lp);
1111
1112out_free_rcv_buf:
1113 kfree(dp->rcv_buf);
1114
1115out_free_dp:
1116 kfree(dp);
1117
1118out_err:
1119 return err;
1120}
1121
1122static int ds_remove(struct vio_dev *vdev)
1123{
1124 return 0;
1125}
1126
1127static struct vio_device_id ds_match[] = {
1128 {
1129 .type = "domain-services-port",
1130 },
1131 {},
1132};
1133
1134static struct vio_driver ds_driver = {
1135 .id_table = ds_match,
1136 .probe = ds_probe,
1137 .remove = ds_remove,
1138 .driver = {
1139 .name = "ds",
1140 .owner = THIS_MODULE,
1141 }
1142};
1143
1144static int __init ds_init(void)
1145{
1146 int i;
1147
1148 for (i = 0; i < ARRAY_SIZE(ds_states); i++)
1149 ds_states[i].handle = ((u64)i << 32);
1150
1151#ifdef CONFIG_HOTPLUG_CPU
1152 kthread_run(dr_cpu_thread, NULL, "kdrcpud");
1153#endif
1154
1155 return vio_register_driver(&ds_driver);
1156}
1157
1158subsys_initcall(ds_init);
diff --git a/arch/sparc64/kernel/hvtramp.S b/arch/sparc64/kernel/hvtramp.S
new file mode 100644
index 000000000000..76a090e2c2a8
--- /dev/null
+++ b/arch/sparc64/kernel/hvtramp.S
@@ -0,0 +1,139 @@
1/* hvtramp.S: Hypervisor start-cpu trampoline code.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <asm/thread_info.h>
7#include <asm/hypervisor.h>
8#include <asm/scratchpad.h>
9#include <asm/spitfire.h>
10#include <asm/hvtramp.h>
11#include <asm/pstate.h>
12#include <asm/ptrace.h>
13#include <asm/asi.h>
14
15 .text
16 .align 8
17 .globl hv_cpu_startup, hv_cpu_startup_end
18
19 /* This code executes directly out of the hypervisor
20 * with physical addressing (va==pa). %o0 contains
21 * our client argument which for Linux points to
22 * a descriptor data structure which defines the
23 * MMU entries we need to load up.
24 *
25 * After we set things up we enable the MMU and call
26 * into the kernel.
27 *
28 * First setup basic privileged cpu state.
29 */
30hv_cpu_startup:
31 wrpr %g0, 0, %gl
32 wrpr %g0, 15, %pil
33 wrpr %g0, 0, %canrestore
34 wrpr %g0, 0, %otherwin
35 wrpr %g0, 6, %cansave
36 wrpr %g0, 6, %cleanwin
37 wrpr %g0, 0, %cwp
38 wrpr %g0, 0, %wstate
39 wrpr %g0, 0, %tl
40
41 sethi %hi(sparc64_ttable_tl0), %g1
42 wrpr %g1, %tba
43
44 mov %o0, %l0
45
46 lduw [%l0 + HVTRAMP_DESCR_CPU], %g1
47 mov SCRATCHPAD_CPUID, %g2
48 stxa %g1, [%g2] ASI_SCRATCHPAD
49
50 ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
51 stxa %g2, [%g0] ASI_SCRATCHPAD
52
53 mov 0, %l1
54 lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
55 add %l0, HVTRAMP_DESCR_MAPS, %l3
56
571: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0
58 clr %o1
59 ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2
60 mov HV_MMU_IMMU | HV_MMU_DMMU, %o3
61 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
62 ta HV_FAST_TRAP
63
64 brnz,pn %o0, 80f
65 nop
66
67 add %l1, 1, %l1
68 cmp %l1, %l2
69 blt,a,pt %xcc, 1b
70 add %l3, HVTRAMP_MAPPING_SIZE, %l3
71
72 ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
73 mov HV_FAST_MMU_FAULT_AREA_CONF, %o5
74 ta HV_FAST_TRAP
75
76 brnz,pn %o0, 80f
77 nop
78
79 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
80
81 ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
82
83 mov 1, %o0
84 set 1f, %o1
85 mov HV_FAST_MMU_ENABLE, %o5
86 ta HV_FAST_TRAP
87
88 ba,pt %xcc, 80f
89 nop
90
911:
92 wr %g0, 0, %fprs
93 wr %g0, ASI_P, %asi
94
95 mov PRIMARY_CONTEXT, %g7
96 stxa %g0, [%g7] ASI_MMU
97 membar #Sync
98
99 mov SECONDARY_CONTEXT, %g7
100 stxa %g0, [%g7] ASI_MMU
101 membar #Sync
102
103 mov %l6, %g6
104 ldx [%g6 + TI_TASK], %g4
105
106 mov 1, %g5
107 sllx %g5, THREAD_SHIFT, %g5
108 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
109 add %g6, %g5, %sp
110 mov 0, %fp
111
112 call init_irqwork_curcpu
113 nop
114 call hard_smp_processor_id
115 nop
116
117 mov %o0, %o1
118 mov 0, %o0
119 mov 0, %o2
120 call sun4v_init_mondo_queues
121 mov 1, %o3
122
123 call init_cur_cpu_trap
124 mov %g6, %o0
125
126 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
127
128 call smp_callin
129 nop
130 call cpu_idle
131 mov 0, %o0
132 call cpu_panic
133 nop
134
13580: ba,pt %xcc, 80b
136 nop
137
138 .align 8
139hv_cpu_startup_end:
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 6b6165d36fd8..8cb3358674f5 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -293,6 +293,11 @@ static void sun4u_irq_enable(unsigned int virt_irq)
293 } 293 }
294} 294}
295 295
296static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
297{
298 sun4u_irq_enable(virt_irq);
299}
300
296static void sun4u_irq_disable(unsigned int virt_irq) 301static void sun4u_irq_disable(unsigned int virt_irq)
297{ 302{
298 struct irq_handler_data *data = get_irq_chip_data(virt_irq); 303 struct irq_handler_data *data = get_irq_chip_data(virt_irq);
@@ -309,6 +314,10 @@ static void sun4u_irq_disable(unsigned int virt_irq)
309static void sun4u_irq_end(unsigned int virt_irq) 314static void sun4u_irq_end(unsigned int virt_irq)
310{ 315{
311 struct irq_handler_data *data = get_irq_chip_data(virt_irq); 316 struct irq_handler_data *data = get_irq_chip_data(virt_irq);
317 struct irq_desc *desc = irq_desc + virt_irq;
318
319 if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
320 return;
312 321
313 if (likely(data)) 322 if (likely(data))
314 upa_writeq(ICLR_IDLE, data->iclr); 323 upa_writeq(ICLR_IDLE, data->iclr);
@@ -340,6 +349,24 @@ static void sun4v_irq_enable(unsigned int virt_irq)
340 } 349 }
341} 350}
342 351
352static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
353{
354 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
355 unsigned int ino = bucket - &ivector_table[0];
356
357 if (likely(bucket)) {
358 unsigned long cpuid;
359 int err;
360
361 cpuid = irq_choose_cpu(virt_irq);
362
363 err = sun4v_intr_settarget(ino, cpuid);
364 if (err != HV_EOK)
365 printk("sun4v_intr_settarget(%x,%lu): err(%d)\n",
366 ino, cpuid, err);
367 }
368}
369
343static void sun4v_irq_disable(unsigned int virt_irq) 370static void sun4v_irq_disable(unsigned int virt_irq)
344{ 371{
345 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); 372 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
@@ -373,6 +400,10 @@ static void sun4v_irq_end(unsigned int virt_irq)
373{ 400{
374 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); 401 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
375 unsigned int ino = bucket - &ivector_table[0]; 402 unsigned int ino = bucket - &ivector_table[0];
403 struct irq_desc *desc = irq_desc + virt_irq;
404
405 if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
406 return;
376 407
377 if (likely(bucket)) { 408 if (likely(bucket)) {
378 int err; 409 int err;
@@ -418,6 +449,28 @@ static void sun4v_virq_enable(unsigned int virt_irq)
418 } 449 }
419} 450}
420 451
452static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
453{
454 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
455 unsigned int ino = bucket - &ivector_table[0];
456
457 if (likely(bucket)) {
458 unsigned long cpuid, dev_handle, dev_ino;
459 int err;
460
461 cpuid = irq_choose_cpu(virt_irq);
462
463 dev_handle = ino & IMAP_IGN;
464 dev_ino = ino & IMAP_INO;
465
466 err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
467 if (err != HV_EOK)
468 printk("sun4v_vintr_set_target(%lx,%lx,%lu): "
469 "err(%d)\n",
470 dev_handle, dev_ino, cpuid, err);
471 }
472}
473
421static void sun4v_virq_disable(unsigned int virt_irq) 474static void sun4v_virq_disable(unsigned int virt_irq)
422{ 475{
423 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); 476 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
@@ -443,6 +496,10 @@ static void sun4v_virq_end(unsigned int virt_irq)
443{ 496{
444 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); 497 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
445 unsigned int ino = bucket - &ivector_table[0]; 498 unsigned int ino = bucket - &ivector_table[0];
499 struct irq_desc *desc = irq_desc + virt_irq;
500
501 if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
502 return;
446 503
447 if (likely(bucket)) { 504 if (likely(bucket)) {
448 unsigned long dev_handle, dev_ino; 505 unsigned long dev_handle, dev_ino;
@@ -477,6 +534,7 @@ static struct irq_chip sun4u_irq = {
477 .enable = sun4u_irq_enable, 534 .enable = sun4u_irq_enable,
478 .disable = sun4u_irq_disable, 535 .disable = sun4u_irq_disable,
479 .end = sun4u_irq_end, 536 .end = sun4u_irq_end,
537 .set_affinity = sun4u_set_affinity,
480}; 538};
481 539
482static struct irq_chip sun4u_irq_ack = { 540static struct irq_chip sun4u_irq_ack = {
@@ -485,6 +543,7 @@ static struct irq_chip sun4u_irq_ack = {
485 .disable = sun4u_irq_disable, 543 .disable = sun4u_irq_disable,
486 .ack = run_pre_handler, 544 .ack = run_pre_handler,
487 .end = sun4u_irq_end, 545 .end = sun4u_irq_end,
546 .set_affinity = sun4u_set_affinity,
488}; 547};
489 548
490static struct irq_chip sun4v_irq = { 549static struct irq_chip sun4v_irq = {
@@ -492,6 +551,7 @@ static struct irq_chip sun4v_irq = {
492 .enable = sun4v_irq_enable, 551 .enable = sun4v_irq_enable,
493 .disable = sun4v_irq_disable, 552 .disable = sun4v_irq_disable,
494 .end = sun4v_irq_end, 553 .end = sun4v_irq_end,
554 .set_affinity = sun4v_set_affinity,
495}; 555};
496 556
497static struct irq_chip sun4v_irq_ack = { 557static struct irq_chip sun4v_irq_ack = {
@@ -500,6 +560,7 @@ static struct irq_chip sun4v_irq_ack = {
500 .disable = sun4v_irq_disable, 560 .disable = sun4v_irq_disable,
501 .ack = run_pre_handler, 561 .ack = run_pre_handler,
502 .end = sun4v_irq_end, 562 .end = sun4v_irq_end,
563 .set_affinity = sun4v_set_affinity,
503}; 564};
504 565
505#ifdef CONFIG_PCI_MSI 566#ifdef CONFIG_PCI_MSI
@@ -511,6 +572,7 @@ static struct irq_chip sun4v_msi = {
511 .disable = sun4v_msi_disable, 572 .disable = sun4v_msi_disable,
512 .ack = run_pre_handler, 573 .ack = run_pre_handler,
513 .end = sun4v_irq_end, 574 .end = sun4v_irq_end,
575 .set_affinity = sun4v_set_affinity,
514}; 576};
515#endif 577#endif
516 578
@@ -519,6 +581,7 @@ static struct irq_chip sun4v_virq = {
519 .enable = sun4v_virq_enable, 581 .enable = sun4v_virq_enable,
520 .disable = sun4v_virq_disable, 582 .disable = sun4v_virq_disable,
521 .end = sun4v_virq_end, 583 .end = sun4v_virq_end,
584 .set_affinity = sun4v_virt_set_affinity,
522}; 585};
523 586
524static struct irq_chip sun4v_virq_ack = { 587static struct irq_chip sun4v_virq_ack = {
@@ -527,6 +590,7 @@ static struct irq_chip sun4v_virq_ack = {
527 .disable = sun4v_virq_disable, 590 .disable = sun4v_virq_disable,
528 .ack = run_pre_handler, 591 .ack = run_pre_handler,
529 .end = sun4v_virq_end, 592 .end = sun4v_virq_end,
593 .set_affinity = sun4v_virt_set_affinity,
530}; 594};
531 595
532void irq_install_pre_handler(int virt_irq, 596void irq_install_pre_handler(int virt_irq,
@@ -739,6 +803,26 @@ void handler_irq(int irq, struct pt_regs *regs)
739 set_irq_regs(old_regs); 803 set_irq_regs(old_regs);
740} 804}
741 805
806#ifdef CONFIG_HOTPLUG_CPU
807void fixup_irqs(void)
808{
809 unsigned int irq;
810
811 for (irq = 0; irq < NR_IRQS; irq++) {
812 unsigned long flags;
813
814 spin_lock_irqsave(&irq_desc[irq].lock, flags);
815 if (irq_desc[irq].action &&
816 !(irq_desc[irq].status & IRQ_PER_CPU)) {
817 if (irq_desc[irq].chip->set_affinity)
818 irq_desc[irq].chip->set_affinity(irq,
819 irq_desc[irq].affinity);
820 }
821 spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
822 }
823}
824#endif
825
742struct sun5_timer { 826struct sun5_timer {
743 u64 count0; 827 u64 count0;
744 u64 limit0; 828 u64 limit0;
diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c
new file mode 100644
index 000000000000..85a2be0b0962
--- /dev/null
+++ b/arch/sparc64/kernel/ldc.c
@@ -0,0 +1,2373 @@
1/* ldc.c: Logical Domain Channel link-layer protocol driver.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/delay.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/scatterlist.h>
14#include <linux/interrupt.h>
15#include <linux/list.h>
16#include <linux/init.h>
17
18#include <asm/hypervisor.h>
19#include <asm/iommu.h>
20#include <asm/page.h>
21#include <asm/ldc.h>
22#include <asm/mdesc.h>
23
24#define DRV_MODULE_NAME "ldc"
25#define PFX DRV_MODULE_NAME ": "
26#define DRV_MODULE_VERSION "1.0"
27#define DRV_MODULE_RELDATE "June 25, 2007"
28
29static char version[] __devinitdata =
30 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31#define LDC_PACKET_SIZE 64
32
33/* Packet header layout for unreliable and reliable mode frames.
34 * When in RAW mode, packets are simply straight 64-byte payloads
35 * with no headers.
36 */
37struct ldc_packet {
38 u8 type;
39#define LDC_CTRL 0x01
40#define LDC_DATA 0x02
41#define LDC_ERR 0x10
42
43 u8 stype;
44#define LDC_INFO 0x01
45#define LDC_ACK 0x02
46#define LDC_NACK 0x04
47
48 u8 ctrl;
49#define LDC_VERS 0x01 /* Link Version */
50#define LDC_RTS 0x02 /* Request To Send */
51#define LDC_RTR 0x03 /* Ready To Receive */
52#define LDC_RDX 0x04 /* Ready for Data eXchange */
53#define LDC_CTRL_MSK 0x0f
54
55 u8 env;
56#define LDC_LEN 0x3f
57#define LDC_FRAG_MASK 0xc0
58#define LDC_START 0x40
59#define LDC_STOP 0x80
60
61 u32 seqid;
62
63 union {
64 u8 u_data[LDC_PACKET_SIZE - 8];
65 struct {
66 u32 pad;
67 u32 ackid;
68 u8 r_data[LDC_PACKET_SIZE - 8 - 8];
69 } r;
70 } u;
71};
72
73struct ldc_version {
74 u16 major;
75 u16 minor;
76};
77
78/* Ordered from largest major to lowest. */
79static struct ldc_version ver_arr[] = {
80 { .major = 1, .minor = 0 },
81};
82
83#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
84#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
85
86struct ldc_channel;
87
88struct ldc_mode_ops {
89 int (*write)(struct ldc_channel *, const void *, unsigned int);
90 int (*read)(struct ldc_channel *, void *, unsigned int);
91};
92
93static const struct ldc_mode_ops raw_ops;
94static const struct ldc_mode_ops nonraw_ops;
95static const struct ldc_mode_ops stream_ops;
96
97int ldom_domaining_enabled;
98
99struct ldc_iommu {
100 /* Protects arena alloc/free. */
101 spinlock_t lock;
102 struct iommu_arena arena;
103 struct ldc_mtable_entry *page_table;
104};
105
106struct ldc_channel {
107 /* Protects all operations that depend upon channel state. */
108 spinlock_t lock;
109
110 unsigned long id;
111
112 u8 *mssbuf;
113 u32 mssbuf_len;
114 u32 mssbuf_off;
115
116 struct ldc_packet *tx_base;
117 unsigned long tx_head;
118 unsigned long tx_tail;
119 unsigned long tx_num_entries;
120 unsigned long tx_ra;
121
122 unsigned long tx_acked;
123
124 struct ldc_packet *rx_base;
125 unsigned long rx_head;
126 unsigned long rx_tail;
127 unsigned long rx_num_entries;
128 unsigned long rx_ra;
129
130 u32 rcv_nxt;
131 u32 snd_nxt;
132
133 unsigned long chan_state;
134
135 struct ldc_channel_config cfg;
136 void *event_arg;
137
138 const struct ldc_mode_ops *mops;
139
140 struct ldc_iommu iommu;
141
142 struct ldc_version ver;
143
144 u8 hs_state;
145#define LDC_HS_CLOSED 0x00
146#define LDC_HS_OPEN 0x01
147#define LDC_HS_GOTVERS 0x02
148#define LDC_HS_SENTRTR 0x03
149#define LDC_HS_GOTRTR 0x04
150#define LDC_HS_COMPLETE 0x10
151
152 u8 flags;
153#define LDC_FLAG_ALLOCED_QUEUES 0x01
154#define LDC_FLAG_REGISTERED_QUEUES 0x02
155#define LDC_FLAG_REGISTERED_IRQS 0x04
156#define LDC_FLAG_RESET 0x10
157
158 u8 mss;
159 u8 state;
160
161#define LDC_IRQ_NAME_MAX 32
162 char rx_irq_name[LDC_IRQ_NAME_MAX];
163 char tx_irq_name[LDC_IRQ_NAME_MAX];
164
165 struct hlist_head mh_list;
166
167 struct hlist_node list;
168};
169
170#define ldcdbg(TYPE, f, a...) \
171do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
172 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
173} while (0)
174
175static const char *state_to_str(u8 state)
176{
177 switch (state) {
178 case LDC_STATE_INVALID:
179 return "INVALID";
180 case LDC_STATE_INIT:
181 return "INIT";
182 case LDC_STATE_BOUND:
183 return "BOUND";
184 case LDC_STATE_READY:
185 return "READY";
186 case LDC_STATE_CONNECTED:
187 return "CONNECTED";
188 default:
189 return "<UNKNOWN>";
190 }
191}
192
193static void ldc_set_state(struct ldc_channel *lp, u8 state)
194{
195 ldcdbg(STATE, "STATE (%s) --> (%s)\n",
196 state_to_str(lp->state),
197 state_to_str(state));
198
199 lp->state = state;
200}
201
202static unsigned long __advance(unsigned long off, unsigned long num_entries)
203{
204 off += LDC_PACKET_SIZE;
205 if (off == (num_entries * LDC_PACKET_SIZE))
206 off = 0;
207
208 return off;
209}
210
211static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
212{
213 return __advance(off, lp->rx_num_entries);
214}
215
216static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
217{
218 return __advance(off, lp->tx_num_entries);
219}
220
221static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
222 unsigned long *new_tail)
223{
224 struct ldc_packet *p;
225 unsigned long t;
226
227 t = tx_advance(lp, lp->tx_tail);
228 if (t == lp->tx_head)
229 return NULL;
230
231 *new_tail = t;
232
233 p = lp->tx_base;
234 return p + (lp->tx_tail / LDC_PACKET_SIZE);
235}
236
237/* When we are in reliable or stream mode, have to track the next packet
238 * we haven't gotten an ACK for in the TX queue using tx_acked. We have
239 * to be careful not to stomp over the queue past that point. During
240 * the handshake, we don't have TX data packets pending in the queue
241 * and that's why handshake_get_tx_packet() need not be mindful of
242 * lp->tx_acked.
243 */
244static unsigned long head_for_data(struct ldc_channel *lp)
245{
246 if (lp->cfg.mode == LDC_MODE_STREAM)
247 return lp->tx_acked;
248 return lp->tx_head;
249}
250
251static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
252{
253 unsigned long limit, tail, new_tail, diff;
254 unsigned int mss;
255
256 limit = head_for_data(lp);
257 tail = lp->tx_tail;
258 new_tail = tx_advance(lp, tail);
259 if (new_tail == limit)
260 return 0;
261
262 if (limit > new_tail)
263 diff = limit - new_tail;
264 else
265 diff = (limit +
266 ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
267 diff /= LDC_PACKET_SIZE;
268 mss = lp->mss;
269
270 if (diff * mss < size)
271 return 0;
272
273 return 1;
274}
275
276static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
277 unsigned long *new_tail)
278{
279 struct ldc_packet *p;
280 unsigned long h, t;
281
282 h = head_for_data(lp);
283 t = tx_advance(lp, lp->tx_tail);
284 if (t == h)
285 return NULL;
286
287 *new_tail = t;
288
289 p = lp->tx_base;
290 return p + (lp->tx_tail / LDC_PACKET_SIZE);
291}
292
293static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
294{
295 unsigned long orig_tail = lp->tx_tail;
296 int limit = 1000;
297
298 lp->tx_tail = tail;
299 while (limit-- > 0) {
300 unsigned long err;
301
302 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
303 if (!err)
304 return 0;
305
306 if (err != HV_EWOULDBLOCK) {
307 lp->tx_tail = orig_tail;
308 return -EINVAL;
309 }
310 udelay(1);
311 }
312
313 lp->tx_tail = orig_tail;
314 return -EBUSY;
315}
316
317/* This just updates the head value in the hypervisor using
318 * a polling loop with a timeout. The caller takes care of
319 * upating software state representing the head change, if any.
320 */
321static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
322{
323 int limit = 1000;
324
325 while (limit-- > 0) {
326 unsigned long err;
327
328 err = sun4v_ldc_rx_set_qhead(lp->id, head);
329 if (!err)
330 return 0;
331
332 if (err != HV_EWOULDBLOCK)
333 return -EINVAL;
334
335 udelay(1);
336 }
337
338 return -EBUSY;
339}
340
341static int send_tx_packet(struct ldc_channel *lp,
342 struct ldc_packet *p,
343 unsigned long new_tail)
344{
345 BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
346
347 return set_tx_tail(lp, new_tail);
348}
349
350static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
351 u8 stype, u8 ctrl,
352 void *data, int dlen,
353 unsigned long *new_tail)
354{
355 struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
356
357 if (p) {
358 memset(p, 0, sizeof(*p));
359 p->type = LDC_CTRL;
360 p->stype = stype;
361 p->ctrl = ctrl;
362 if (data)
363 memcpy(p->u.u_data, data, dlen);
364 }
365 return p;
366}
367
368static int start_handshake(struct ldc_channel *lp)
369{
370 struct ldc_packet *p;
371 struct ldc_version *ver;
372 unsigned long new_tail;
373
374 ver = &ver_arr[0];
375
376 ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
377 ver->major, ver->minor);
378
379 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
380 ver, sizeof(*ver), &new_tail);
381 if (p) {
382 int err = send_tx_packet(lp, p, new_tail);
383 if (!err)
384 lp->flags &= ~LDC_FLAG_RESET;
385 return err;
386 }
387 return -EBUSY;
388}
389
390static int send_version_nack(struct ldc_channel *lp,
391 u16 major, u16 minor)
392{
393 struct ldc_packet *p;
394 struct ldc_version ver;
395 unsigned long new_tail;
396
397 ver.major = major;
398 ver.minor = minor;
399
400 p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
401 &ver, sizeof(ver), &new_tail);
402 if (p) {
403 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
404 ver.major, ver.minor);
405
406 return send_tx_packet(lp, p, new_tail);
407 }
408 return -EBUSY;
409}
410
411static int send_version_ack(struct ldc_channel *lp,
412 struct ldc_version *vp)
413{
414 struct ldc_packet *p;
415 unsigned long new_tail;
416
417 p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
418 vp, sizeof(*vp), &new_tail);
419 if (p) {
420 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
421 vp->major, vp->minor);
422
423 return send_tx_packet(lp, p, new_tail);
424 }
425 return -EBUSY;
426}
427
428static int send_rts(struct ldc_channel *lp)
429{
430 struct ldc_packet *p;
431 unsigned long new_tail;
432
433 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
434 &new_tail);
435 if (p) {
436 p->env = lp->cfg.mode;
437 p->seqid = 0;
438 lp->rcv_nxt = 0;
439
440 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
441 p->env, p->seqid);
442
443 return send_tx_packet(lp, p, new_tail);
444 }
445 return -EBUSY;
446}
447
448static int send_rtr(struct ldc_channel *lp)
449{
450 struct ldc_packet *p;
451 unsigned long new_tail;
452
453 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
454 &new_tail);
455 if (p) {
456 p->env = lp->cfg.mode;
457 p->seqid = 0;
458
459 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
460 p->env, p->seqid);
461
462 return send_tx_packet(lp, p, new_tail);
463 }
464 return -EBUSY;
465}
466
467static int send_rdx(struct ldc_channel *lp)
468{
469 struct ldc_packet *p;
470 unsigned long new_tail;
471
472 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
473 &new_tail);
474 if (p) {
475 p->env = 0;
476 p->seqid = ++lp->snd_nxt;
477 p->u.r.ackid = lp->rcv_nxt;
478
479 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
480 p->env, p->seqid, p->u.r.ackid);
481
482 return send_tx_packet(lp, p, new_tail);
483 }
484 return -EBUSY;
485}
486
487static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
488{
489 struct ldc_packet *p;
490 unsigned long new_tail;
491 int err;
492
493 p = data_get_tx_packet(lp, &new_tail);
494 if (!p)
495 return -EBUSY;
496 memset(p, 0, sizeof(*p));
497 p->type = data_pkt->type;
498 p->stype = LDC_NACK;
499 p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
500 p->seqid = lp->snd_nxt + 1;
501 p->u.r.ackid = lp->rcv_nxt;
502
503 ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
504 p->type, p->ctrl, p->seqid, p->u.r.ackid);
505
506 err = send_tx_packet(lp, p, new_tail);
507 if (!err)
508 lp->snd_nxt++;
509
510 return err;
511}
512
513static int ldc_abort(struct ldc_channel *lp)
514{
515 unsigned long hv_err;
516
517 ldcdbg(STATE, "ABORT\n");
518
519 /* We report but do not act upon the hypervisor errors because
520 * there really isn't much we can do if they fail at this point.
521 */
522 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
523 if (hv_err)
524 printk(KERN_ERR PFX "ldc_abort: "
525 "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
526 lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
527
528 hv_err = sun4v_ldc_tx_get_state(lp->id,
529 &lp->tx_head,
530 &lp->tx_tail,
531 &lp->chan_state);
532 if (hv_err)
533 printk(KERN_ERR PFX "ldc_abort: "
534 "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
535 lp->id, hv_err);
536
537 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
538 if (hv_err)
539 printk(KERN_ERR PFX "ldc_abort: "
540 "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
541 lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
542
543 /* Refetch the RX queue state as well, because we could be invoked
544 * here in the queue processing context.
545 */
546 hv_err = sun4v_ldc_rx_get_state(lp->id,
547 &lp->rx_head,
548 &lp->rx_tail,
549 &lp->chan_state);
550 if (hv_err)
551 printk(KERN_ERR PFX "ldc_abort: "
552 "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
553 lp->id, hv_err);
554
555 return -ECONNRESET;
556}
557
558static struct ldc_version *find_by_major(u16 major)
559{
560 struct ldc_version *ret = NULL;
561 int i;
562
563 for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
564 struct ldc_version *v = &ver_arr[i];
565 if (v->major <= major) {
566 ret = v;
567 break;
568 }
569 }
570 return ret;
571}
572
573static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
574{
575 struct ldc_version *vap;
576 int err;
577
578 ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
579 vp->major, vp->minor);
580
581 if (lp->hs_state == LDC_HS_GOTVERS) {
582 lp->hs_state = LDC_HS_OPEN;
583 memset(&lp->ver, 0, sizeof(lp->ver));
584 }
585
586 vap = find_by_major(vp->major);
587 if (!vap) {
588 err = send_version_nack(lp, 0, 0);
589 } else if (vap->major != vp->major) {
590 err = send_version_nack(lp, vap->major, vap->minor);
591 } else {
592 struct ldc_version ver = *vp;
593 if (ver.minor > vap->minor)
594 ver.minor = vap->minor;
595 err = send_version_ack(lp, &ver);
596 if (!err) {
597 lp->ver = ver;
598 lp->hs_state = LDC_HS_GOTVERS;
599 }
600 }
601 if (err)
602 return ldc_abort(lp);
603
604 return 0;
605}
606
607static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
608{
609 ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
610 vp->major, vp->minor);
611
612 if (lp->hs_state == LDC_HS_GOTVERS) {
613 if (lp->ver.major != vp->major ||
614 lp->ver.minor != vp->minor)
615 return ldc_abort(lp);
616 } else {
617 lp->ver = *vp;
618 lp->hs_state = LDC_HS_GOTVERS;
619 }
620 if (send_rts(lp))
621 return ldc_abort(lp);
622 return 0;
623}
624
625static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
626{
627 struct ldc_version *vap;
628
629 if ((vp->major == 0 && vp->minor == 0) ||
630 !(vap = find_by_major(vp->major))) {
631 return ldc_abort(lp);
632 } else {
633 struct ldc_packet *p;
634 unsigned long new_tail;
635
636 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
637 vap, sizeof(*vap),
638 &new_tail);
639 if (p)
640 return send_tx_packet(lp, p, new_tail);
641 else
642 return ldc_abort(lp);
643 }
644}
645
646static int process_version(struct ldc_channel *lp,
647 struct ldc_packet *p)
648{
649 struct ldc_version *vp;
650
651 vp = (struct ldc_version *) p->u.u_data;
652
653 switch (p->stype) {
654 case LDC_INFO:
655 return process_ver_info(lp, vp);
656
657 case LDC_ACK:
658 return process_ver_ack(lp, vp);
659
660 case LDC_NACK:
661 return process_ver_nack(lp, vp);
662
663 default:
664 return ldc_abort(lp);
665 }
666}
667
668static int process_rts(struct ldc_channel *lp,
669 struct ldc_packet *p)
670{
671 ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
672 p->stype, p->seqid, p->env);
673
674 if (p->stype != LDC_INFO ||
675 lp->hs_state != LDC_HS_GOTVERS ||
676 p->env != lp->cfg.mode)
677 return ldc_abort(lp);
678
679 lp->snd_nxt = p->seqid;
680 lp->rcv_nxt = p->seqid;
681 lp->hs_state = LDC_HS_SENTRTR;
682 if (send_rtr(lp))
683 return ldc_abort(lp);
684
685 return 0;
686}
687
688static int process_rtr(struct ldc_channel *lp,
689 struct ldc_packet *p)
690{
691 ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
692 p->stype, p->seqid, p->env);
693
694 if (p->stype != LDC_INFO ||
695 p->env != lp->cfg.mode)
696 return ldc_abort(lp);
697
698 lp->snd_nxt = p->seqid;
699 lp->hs_state = LDC_HS_COMPLETE;
700 ldc_set_state(lp, LDC_STATE_CONNECTED);
701 send_rdx(lp);
702
703 return LDC_EVENT_UP;
704}
705
706static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
707{
708 return lp->rcv_nxt + 1 == seqid;
709}
710
711static int process_rdx(struct ldc_channel *lp,
712 struct ldc_packet *p)
713{
714 ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
715 p->stype, p->seqid, p->env, p->u.r.ackid);
716
717 if (p->stype != LDC_INFO ||
718 !(rx_seq_ok(lp, p->seqid)))
719 return ldc_abort(lp);
720
721 lp->rcv_nxt = p->seqid;
722
723 lp->hs_state = LDC_HS_COMPLETE;
724 ldc_set_state(lp, LDC_STATE_CONNECTED);
725
726 return LDC_EVENT_UP;
727}
728
729static int process_control_frame(struct ldc_channel *lp,
730 struct ldc_packet *p)
731{
732 switch (p->ctrl) {
733 case LDC_VERS:
734 return process_version(lp, p);
735
736 case LDC_RTS:
737 return process_rts(lp, p);
738
739 case LDC_RTR:
740 return process_rtr(lp, p);
741
742 case LDC_RDX:
743 return process_rdx(lp, p);
744
745 default:
746 return ldc_abort(lp);
747 }
748}
749
750static int process_error_frame(struct ldc_channel *lp,
751 struct ldc_packet *p)
752{
753 return ldc_abort(lp);
754}
755
756static int process_data_ack(struct ldc_channel *lp,
757 struct ldc_packet *ack)
758{
759 unsigned long head = lp->tx_acked;
760 u32 ackid = ack->u.r.ackid;
761
762 while (1) {
763 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
764
765 head = tx_advance(lp, head);
766
767 if (p->seqid == ackid) {
768 lp->tx_acked = head;
769 return 0;
770 }
771 if (head == lp->tx_tail)
772 return ldc_abort(lp);
773 }
774
775 return 0;
776}
777
778static void send_events(struct ldc_channel *lp, unsigned int event_mask)
779{
780 if (event_mask & LDC_EVENT_RESET)
781 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
782 if (event_mask & LDC_EVENT_UP)
783 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
784 if (event_mask & LDC_EVENT_DATA_READY)
785 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
786}
787
788static irqreturn_t ldc_rx(int irq, void *dev_id)
789{
790 struct ldc_channel *lp = dev_id;
791 unsigned long orig_state, hv_err, flags;
792 unsigned int event_mask;
793
794 spin_lock_irqsave(&lp->lock, flags);
795
796 orig_state = lp->chan_state;
797 hv_err = sun4v_ldc_rx_get_state(lp->id,
798 &lp->rx_head,
799 &lp->rx_tail,
800 &lp->chan_state);
801
802 ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
803 orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
804
805 event_mask = 0;
806
807 if (lp->cfg.mode == LDC_MODE_RAW &&
808 lp->chan_state == LDC_CHANNEL_UP) {
809 lp->hs_state = LDC_HS_COMPLETE;
810 ldc_set_state(lp, LDC_STATE_CONNECTED);
811
812 event_mask |= LDC_EVENT_UP;
813
814 orig_state = lp->chan_state;
815 }
816
817 /* If we are in reset state, flush the RX queue and ignore
818 * everything.
819 */
820 if (lp->flags & LDC_FLAG_RESET) {
821 (void) __set_rx_head(lp, lp->rx_tail);
822 goto out;
823 }
824
825 /* Once we finish the handshake, we let the ldc_read()
826 * paths do all of the control frame and state management.
827 * Just trigger the callback.
828 */
829 if (lp->hs_state == LDC_HS_COMPLETE) {
830handshake_complete:
831 if (lp->chan_state != orig_state) {
832 unsigned int event = LDC_EVENT_RESET;
833
834 if (lp->chan_state == LDC_CHANNEL_UP)
835 event = LDC_EVENT_UP;
836
837 event_mask |= event;
838 }
839 if (lp->rx_head != lp->rx_tail)
840 event_mask |= LDC_EVENT_DATA_READY;
841
842 goto out;
843 }
844
845 if (lp->chan_state != orig_state)
846 goto out;
847
848 while (lp->rx_head != lp->rx_tail) {
849 struct ldc_packet *p;
850 unsigned long new;
851 int err;
852
853 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
854
855 switch (p->type) {
856 case LDC_CTRL:
857 err = process_control_frame(lp, p);
858 if (err > 0)
859 event_mask |= err;
860 break;
861
862 case LDC_DATA:
863 event_mask |= LDC_EVENT_DATA_READY;
864 err = 0;
865 break;
866
867 case LDC_ERR:
868 err = process_error_frame(lp, p);
869 break;
870
871 default:
872 err = ldc_abort(lp);
873 break;
874 }
875
876 if (err < 0)
877 break;
878
879 new = lp->rx_head;
880 new += LDC_PACKET_SIZE;
881 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
882 new = 0;
883 lp->rx_head = new;
884
885 err = __set_rx_head(lp, new);
886 if (err < 0) {
887 (void) ldc_abort(lp);
888 break;
889 }
890 if (lp->hs_state == LDC_HS_COMPLETE)
891 goto handshake_complete;
892 }
893
894out:
895 spin_unlock_irqrestore(&lp->lock, flags);
896
897 send_events(lp, event_mask);
898
899 return IRQ_HANDLED;
900}
901
902static irqreturn_t ldc_tx(int irq, void *dev_id)
903{
904 struct ldc_channel *lp = dev_id;
905 unsigned long flags, hv_err, orig_state;
906 unsigned int event_mask = 0;
907
908 spin_lock_irqsave(&lp->lock, flags);
909
910 orig_state = lp->chan_state;
911 hv_err = sun4v_ldc_tx_get_state(lp->id,
912 &lp->tx_head,
913 &lp->tx_tail,
914 &lp->chan_state);
915
916 ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
917 orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
918
919 if (lp->cfg.mode == LDC_MODE_RAW &&
920 lp->chan_state == LDC_CHANNEL_UP) {
921 lp->hs_state = LDC_HS_COMPLETE;
922 ldc_set_state(lp, LDC_STATE_CONNECTED);
923
924 event_mask |= LDC_EVENT_UP;
925 }
926
927 spin_unlock_irqrestore(&lp->lock, flags);
928
929 send_events(lp, event_mask);
930
931 return IRQ_HANDLED;
932}
933
934/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
935 * XXX that addition and removal from the ldc_channel_list has
936 * XXX atomicity, otherwise the __ldc_channel_exists() check is
937 * XXX totally pointless as another thread can slip into ldc_alloc()
938 * XXX and add a channel with the same ID. There also needs to be
939 * XXX a spinlock for ldc_channel_list.
940 */
941static HLIST_HEAD(ldc_channel_list);
942
943static int __ldc_channel_exists(unsigned long id)
944{
945 struct ldc_channel *lp;
946 struct hlist_node *n;
947
948 hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
949 if (lp->id == id)
950 return 1;
951 }
952 return 0;
953}
954
955static int alloc_queue(const char *name, unsigned long num_entries,
956 struct ldc_packet **base, unsigned long *ra)
957{
958 unsigned long size, order;
959 void *q;
960
961 size = num_entries * LDC_PACKET_SIZE;
962 order = get_order(size);
963
964 q = (void *) __get_free_pages(GFP_KERNEL, order);
965 if (!q) {
966 printk(KERN_ERR PFX "Alloc of %s queue failed with "
967 "size=%lu order=%lu\n", name, size, order);
968 return -ENOMEM;
969 }
970
971 memset(q, 0, PAGE_SIZE << order);
972
973 *base = q;
974 *ra = __pa(q);
975
976 return 0;
977}
978
979static void free_queue(unsigned long num_entries, struct ldc_packet *q)
980{
981 unsigned long size, order;
982
983 if (!q)
984 return;
985
986 size = num_entries * LDC_PACKET_SIZE;
987 order = get_order(size);
988
989 free_pages((unsigned long)q, order);
990}
991
992/* XXX Make this configurable... XXX */
993#define LDC_IOTABLE_SIZE (8 * 1024)
994
995static int ldc_iommu_init(struct ldc_channel *lp)
996{
997 unsigned long sz, num_tsb_entries, tsbsize, order;
998 struct ldc_iommu *iommu = &lp->iommu;
999 struct ldc_mtable_entry *table;
1000 unsigned long hv_err;
1001 int err;
1002
1003 num_tsb_entries = LDC_IOTABLE_SIZE;
1004 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1005
1006 spin_lock_init(&iommu->lock);
1007
1008 sz = num_tsb_entries / 8;
1009 sz = (sz + 7UL) & ~7UL;
1010 iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1011 if (!iommu->arena.map) {
1012 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1013 return -ENOMEM;
1014 }
1015
1016 iommu->arena.limit = num_tsb_entries;
1017
1018 order = get_order(tsbsize);
1019
1020 table = (struct ldc_mtable_entry *)
1021 __get_free_pages(GFP_KERNEL, order);
1022 err = -ENOMEM;
1023 if (!table) {
1024 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1025 "size=%lu order=%lu\n", tsbsize, order);
1026 goto out_free_map;
1027 }
1028
1029 memset(table, 0, PAGE_SIZE << order);
1030
1031 iommu->page_table = table;
1032
1033 hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1034 num_tsb_entries);
1035 err = -EINVAL;
1036 if (hv_err)
1037 goto out_free_table;
1038
1039 return 0;
1040
1041out_free_table:
1042 free_pages((unsigned long) table, order);
1043 iommu->page_table = NULL;
1044
1045out_free_map:
1046 kfree(iommu->arena.map);
1047 iommu->arena.map = NULL;
1048
1049 return err;
1050}
1051
1052static void ldc_iommu_release(struct ldc_channel *lp)
1053{
1054 struct ldc_iommu *iommu = &lp->iommu;
1055 unsigned long num_tsb_entries, tsbsize, order;
1056
1057 (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1058
1059 num_tsb_entries = iommu->arena.limit;
1060 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1061 order = get_order(tsbsize);
1062
1063 free_pages((unsigned long) iommu->page_table, order);
1064 iommu->page_table = NULL;
1065
1066 kfree(iommu->arena.map);
1067 iommu->arena.map = NULL;
1068}
1069
1070struct ldc_channel *ldc_alloc(unsigned long id,
1071 const struct ldc_channel_config *cfgp,
1072 void *event_arg)
1073{
1074 struct ldc_channel *lp;
1075 const struct ldc_mode_ops *mops;
1076 unsigned long dummy1, dummy2, hv_err;
1077 u8 mss, *mssbuf;
1078 int err;
1079
1080 err = -ENODEV;
1081 if (!ldom_domaining_enabled)
1082 goto out_err;
1083
1084 err = -EINVAL;
1085 if (!cfgp)
1086 goto out_err;
1087
1088 switch (cfgp->mode) {
1089 case LDC_MODE_RAW:
1090 mops = &raw_ops;
1091 mss = LDC_PACKET_SIZE;
1092 break;
1093
1094 case LDC_MODE_UNRELIABLE:
1095 mops = &nonraw_ops;
1096 mss = LDC_PACKET_SIZE - 8;
1097 break;
1098
1099 case LDC_MODE_STREAM:
1100 mops = &stream_ops;
1101 mss = LDC_PACKET_SIZE - 8 - 8;
1102 break;
1103
1104 default:
1105 goto out_err;
1106 }
1107
1108 if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1109 goto out_err;
1110
1111 hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1112 err = -ENODEV;
1113 if (hv_err == HV_ECHANNEL)
1114 goto out_err;
1115
1116 err = -EEXIST;
1117 if (__ldc_channel_exists(id))
1118 goto out_err;
1119
1120 mssbuf = NULL;
1121
1122 lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1123 err = -ENOMEM;
1124 if (!lp)
1125 goto out_err;
1126
1127 spin_lock_init(&lp->lock);
1128
1129 lp->id = id;
1130
1131 err = ldc_iommu_init(lp);
1132 if (err)
1133 goto out_free_ldc;
1134
1135 lp->mops = mops;
1136 lp->mss = mss;
1137
1138 lp->cfg = *cfgp;
1139 if (!lp->cfg.mtu)
1140 lp->cfg.mtu = LDC_DEFAULT_MTU;
1141
1142 if (lp->cfg.mode == LDC_MODE_STREAM) {
1143 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1144 if (!mssbuf) {
1145 err = -ENOMEM;
1146 goto out_free_iommu;
1147 }
1148 lp->mssbuf = mssbuf;
1149 }
1150
1151 lp->event_arg = event_arg;
1152
1153 /* XXX allow setting via ldc_channel_config to override defaults
1154 * XXX or use some formula based upon mtu
1155 */
1156 lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1157 lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1158
1159 err = alloc_queue("TX", lp->tx_num_entries,
1160 &lp->tx_base, &lp->tx_ra);
1161 if (err)
1162 goto out_free_mssbuf;
1163
1164 err = alloc_queue("RX", lp->rx_num_entries,
1165 &lp->rx_base, &lp->rx_ra);
1166 if (err)
1167 goto out_free_txq;
1168
1169 lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1170
1171 lp->hs_state = LDC_HS_CLOSED;
1172 ldc_set_state(lp, LDC_STATE_INIT);
1173
1174 INIT_HLIST_NODE(&lp->list);
1175 hlist_add_head(&lp->list, &ldc_channel_list);
1176
1177 INIT_HLIST_HEAD(&lp->mh_list);
1178
1179 return lp;
1180
1181out_free_txq:
1182 free_queue(lp->tx_num_entries, lp->tx_base);
1183
1184out_free_mssbuf:
1185 if (mssbuf)
1186 kfree(mssbuf);
1187
1188out_free_iommu:
1189 ldc_iommu_release(lp);
1190
1191out_free_ldc:
1192 kfree(lp);
1193
1194out_err:
1195 return ERR_PTR(err);
1196}
1197EXPORT_SYMBOL(ldc_alloc);
1198
1199void ldc_free(struct ldc_channel *lp)
1200{
1201 if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1202 free_irq(lp->cfg.rx_irq, lp);
1203 free_irq(lp->cfg.tx_irq, lp);
1204 }
1205
1206 if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1207 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1208 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1209 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1210 }
1211 if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1212 free_queue(lp->tx_num_entries, lp->tx_base);
1213 free_queue(lp->rx_num_entries, lp->rx_base);
1214 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1215 }
1216
1217 hlist_del(&lp->list);
1218
1219 if (lp->mssbuf)
1220 kfree(lp->mssbuf);
1221
1222 ldc_iommu_release(lp);
1223
1224 kfree(lp);
1225}
1226EXPORT_SYMBOL(ldc_free);
1227
1228/* Bind the channel. This registers the LDC queues with
1229 * the hypervisor and puts the channel into a pseudo-listening
1230 * state. This does not initiate a handshake, ldc_connect() does
1231 * that.
1232 */
1233int ldc_bind(struct ldc_channel *lp, const char *name)
1234{
1235 unsigned long hv_err, flags;
1236 int err = -EINVAL;
1237
1238 spin_lock_irqsave(&lp->lock, flags);
1239
1240 if (!name)
1241 goto out_err;
1242
1243 if (lp->state != LDC_STATE_INIT)
1244 goto out_err;
1245
1246 snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1247 snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1248
1249 err = request_irq(lp->cfg.rx_irq, ldc_rx,
1250 IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1251 lp->rx_irq_name, lp);
1252 if (err)
1253 goto out_err;
1254
1255 err = request_irq(lp->cfg.tx_irq, ldc_tx,
1256 IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1257 lp->tx_irq_name, lp);
1258 if (err)
1259 goto out_free_rx_irq;
1260
1261
1262 lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1263
1264 err = -ENODEV;
1265 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1266 if (hv_err)
1267 goto out_free_tx_irq;
1268
1269 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1270 if (hv_err)
1271 goto out_free_tx_irq;
1272
1273 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1274 if (hv_err)
1275 goto out_unmap_tx;
1276
1277 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1278 if (hv_err)
1279 goto out_unmap_tx;
1280
1281 lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1282
1283 hv_err = sun4v_ldc_tx_get_state(lp->id,
1284 &lp->tx_head,
1285 &lp->tx_tail,
1286 &lp->chan_state);
1287 err = -EBUSY;
1288 if (hv_err)
1289 goto out_unmap_rx;
1290
1291 lp->tx_acked = lp->tx_head;
1292
1293 lp->hs_state = LDC_HS_OPEN;
1294 ldc_set_state(lp, LDC_STATE_BOUND);
1295
1296 spin_unlock_irqrestore(&lp->lock, flags);
1297
1298 return 0;
1299
1300out_unmap_rx:
1301 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1302 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1303
1304out_unmap_tx:
1305 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1306
1307out_free_tx_irq:
1308 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1309 free_irq(lp->cfg.tx_irq, lp);
1310
1311out_free_rx_irq:
1312 free_irq(lp->cfg.rx_irq, lp);
1313
1314out_err:
1315 spin_unlock_irqrestore(&lp->lock, flags);
1316
1317 return err;
1318}
1319EXPORT_SYMBOL(ldc_bind);
1320
1321int ldc_connect(struct ldc_channel *lp)
1322{
1323 unsigned long flags;
1324 int err;
1325
1326 if (lp->cfg.mode == LDC_MODE_RAW)
1327 return -EINVAL;
1328
1329 spin_lock_irqsave(&lp->lock, flags);
1330
1331 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1332 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1333 lp->hs_state != LDC_HS_OPEN)
1334 err = -EINVAL;
1335 else
1336 err = start_handshake(lp);
1337
1338 spin_unlock_irqrestore(&lp->lock, flags);
1339
1340 return err;
1341}
1342EXPORT_SYMBOL(ldc_connect);
1343
1344int ldc_disconnect(struct ldc_channel *lp)
1345{
1346 unsigned long hv_err, flags;
1347 int err;
1348
1349 if (lp->cfg.mode == LDC_MODE_RAW)
1350 return -EINVAL;
1351
1352 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1353 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1354 return -EINVAL;
1355
1356 spin_lock_irqsave(&lp->lock, flags);
1357
1358 err = -ENODEV;
1359 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1360 if (hv_err)
1361 goto out_err;
1362
1363 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1364 if (hv_err)
1365 goto out_err;
1366
1367 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1368 if (hv_err)
1369 goto out_err;
1370
1371 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1372 if (hv_err)
1373 goto out_err;
1374
1375 ldc_set_state(lp, LDC_STATE_BOUND);
1376 lp->hs_state = LDC_HS_OPEN;
1377 lp->flags |= LDC_FLAG_RESET;
1378
1379 spin_unlock_irqrestore(&lp->lock, flags);
1380
1381 return 0;
1382
1383out_err:
1384 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1385 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1386 free_irq(lp->cfg.tx_irq, lp);
1387 free_irq(lp->cfg.rx_irq, lp);
1388 lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1389 LDC_FLAG_REGISTERED_QUEUES);
1390 ldc_set_state(lp, LDC_STATE_INIT);
1391
1392 spin_unlock_irqrestore(&lp->lock, flags);
1393
1394 return err;
1395}
1396EXPORT_SYMBOL(ldc_disconnect);
1397
1398int ldc_state(struct ldc_channel *lp)
1399{
1400 return lp->state;
1401}
1402EXPORT_SYMBOL(ldc_state);
1403
1404static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1405{
1406 struct ldc_packet *p;
1407 unsigned long new_tail;
1408 int err;
1409
1410 if (size > LDC_PACKET_SIZE)
1411 return -EMSGSIZE;
1412
1413 p = data_get_tx_packet(lp, &new_tail);
1414 if (!p)
1415 return -EAGAIN;
1416
1417 memcpy(p, buf, size);
1418
1419 err = send_tx_packet(lp, p, new_tail);
1420 if (!err)
1421 err = size;
1422
1423 return err;
1424}
1425
1426static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1427{
1428 struct ldc_packet *p;
1429 unsigned long hv_err, new;
1430 int err;
1431
1432 if (size < LDC_PACKET_SIZE)
1433 return -EINVAL;
1434
1435 hv_err = sun4v_ldc_rx_get_state(lp->id,
1436 &lp->rx_head,
1437 &lp->rx_tail,
1438 &lp->chan_state);
1439 if (hv_err)
1440 return ldc_abort(lp);
1441
1442 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1443 lp->chan_state == LDC_CHANNEL_RESETTING)
1444 return -ECONNRESET;
1445
1446 if (lp->rx_head == lp->rx_tail)
1447 return 0;
1448
1449 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1450 memcpy(buf, p, LDC_PACKET_SIZE);
1451
1452 new = rx_advance(lp, lp->rx_head);
1453 lp->rx_head = new;
1454
1455 err = __set_rx_head(lp, new);
1456 if (err < 0)
1457 err = -ECONNRESET;
1458 else
1459 err = LDC_PACKET_SIZE;
1460
1461 return err;
1462}
1463
1464static const struct ldc_mode_ops raw_ops = {
1465 .write = write_raw,
1466 .read = read_raw,
1467};
1468
1469static int write_nonraw(struct ldc_channel *lp, const void *buf,
1470 unsigned int size)
1471{
1472 unsigned long hv_err, tail;
1473 unsigned int copied;
1474 u32 seq;
1475 int err;
1476
1477 hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1478 &lp->chan_state);
1479 if (unlikely(hv_err))
1480 return -EBUSY;
1481
1482 if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1483 return ldc_abort(lp);
1484
1485 if (!tx_has_space_for(lp, size))
1486 return -EAGAIN;
1487
1488 seq = lp->snd_nxt;
1489 copied = 0;
1490 tail = lp->tx_tail;
1491 while (copied < size) {
1492 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1493 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1494 p->u.u_data :
1495 p->u.r.r_data);
1496 int data_len;
1497
1498 p->type = LDC_DATA;
1499 p->stype = LDC_INFO;
1500 p->ctrl = 0;
1501
1502 data_len = size - copied;
1503 if (data_len > lp->mss)
1504 data_len = lp->mss;
1505
1506 BUG_ON(data_len > LDC_LEN);
1507
1508 p->env = (data_len |
1509 (copied == 0 ? LDC_START : 0) |
1510 (data_len == size - copied ? LDC_STOP : 0));
1511
1512 p->seqid = ++seq;
1513
1514 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1515 p->type,
1516 p->stype,
1517 p->ctrl,
1518 p->env,
1519 p->seqid);
1520
1521 memcpy(data, buf, data_len);
1522 buf += data_len;
1523 copied += data_len;
1524
1525 tail = tx_advance(lp, tail);
1526 }
1527
1528 err = set_tx_tail(lp, tail);
1529 if (!err) {
1530 lp->snd_nxt = seq;
1531 err = size;
1532 }
1533
1534 return err;
1535}
1536
1537static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1538 struct ldc_packet *first_frag)
1539{
1540 int err;
1541
1542 if (first_frag)
1543 lp->rcv_nxt = first_frag->seqid - 1;
1544
1545 err = send_data_nack(lp, p);
1546 if (err)
1547 return err;
1548
1549 err = __set_rx_head(lp, lp->rx_tail);
1550 if (err < 0)
1551 return ldc_abort(lp);
1552
1553 return 0;
1554}
1555
1556static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1557{
1558 if (p->stype & LDC_ACK) {
1559 int err = process_data_ack(lp, p);
1560 if (err)
1561 return err;
1562 }
1563 if (p->stype & LDC_NACK)
1564 return ldc_abort(lp);
1565
1566 return 0;
1567}
1568
1569static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1570{
1571 unsigned long dummy;
1572 int limit = 1000;
1573
1574 ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1575 cur_head, lp->rx_head, lp->rx_tail);
1576 while (limit-- > 0) {
1577 unsigned long hv_err;
1578
1579 hv_err = sun4v_ldc_rx_get_state(lp->id,
1580 &dummy,
1581 &lp->rx_tail,
1582 &lp->chan_state);
1583 if (hv_err)
1584 return ldc_abort(lp);
1585
1586 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1587 lp->chan_state == LDC_CHANNEL_RESETTING)
1588 return -ECONNRESET;
1589
1590 if (cur_head != lp->rx_tail) {
1591 ldcdbg(DATA, "DATA WAIT DONE "
1592 "head[%lx] tail[%lx] chan_state[%lx]\n",
1593 dummy, lp->rx_tail, lp->chan_state);
1594 return 0;
1595 }
1596
1597 udelay(1);
1598 }
1599 return -EAGAIN;
1600}
1601
1602static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1603{
1604 int err = __set_rx_head(lp, head);
1605
1606 if (err < 0)
1607 return ldc_abort(lp);
1608
1609 lp->rx_head = head;
1610 return 0;
1611}
1612
1613static void send_data_ack(struct ldc_channel *lp)
1614{
1615 unsigned long new_tail;
1616 struct ldc_packet *p;
1617
1618 p = data_get_tx_packet(lp, &new_tail);
1619 if (likely(p)) {
1620 int err;
1621
1622 memset(p, 0, sizeof(*p));
1623 p->type = LDC_DATA;
1624 p->stype = LDC_ACK;
1625 p->ctrl = 0;
1626 p->seqid = lp->snd_nxt + 1;
1627 p->u.r.ackid = lp->rcv_nxt;
1628
1629 err = send_tx_packet(lp, p, new_tail);
1630 if (!err)
1631 lp->snd_nxt++;
1632 }
1633}
1634
1635static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1636{
1637 struct ldc_packet *first_frag;
1638 unsigned long hv_err, new;
1639 int err, copied;
1640
1641 hv_err = sun4v_ldc_rx_get_state(lp->id,
1642 &lp->rx_head,
1643 &lp->rx_tail,
1644 &lp->chan_state);
1645 if (hv_err)
1646 return ldc_abort(lp);
1647
1648 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1649 lp->chan_state == LDC_CHANNEL_RESETTING)
1650 return -ECONNRESET;
1651
1652 if (lp->rx_head == lp->rx_tail)
1653 return 0;
1654
1655 first_frag = NULL;
1656 copied = err = 0;
1657 new = lp->rx_head;
1658 while (1) {
1659 struct ldc_packet *p;
1660 int pkt_len;
1661
1662 BUG_ON(new == lp->rx_tail);
1663 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1664
1665 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1666 "rcv_nxt[%08x]\n",
1667 p->type,
1668 p->stype,
1669 p->ctrl,
1670 p->env,
1671 p->seqid,
1672 p->u.r.ackid,
1673 lp->rcv_nxt);
1674
1675 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1676 err = rx_bad_seq(lp, p, first_frag);
1677 copied = 0;
1678 break;
1679 }
1680
1681 if (p->type & LDC_CTRL) {
1682 err = process_control_frame(lp, p);
1683 if (err < 0)
1684 break;
1685 err = 0;
1686 }
1687
1688 lp->rcv_nxt = p->seqid;
1689
1690 if (!(p->type & LDC_DATA)) {
1691 new = rx_advance(lp, new);
1692 goto no_data;
1693 }
1694 if (p->stype & (LDC_ACK | LDC_NACK)) {
1695 err = data_ack_nack(lp, p);
1696 if (err)
1697 break;
1698 }
1699 if (!(p->stype & LDC_INFO)) {
1700 new = rx_advance(lp, new);
1701 err = rx_set_head(lp, new);
1702 if (err)
1703 break;
1704 goto no_data;
1705 }
1706
1707 pkt_len = p->env & LDC_LEN;
1708
1709 /* Every initial packet starts with the START bit set.
1710 *
1711 * Singleton packets will have both START+STOP set.
1712 *
1713 * Fragments will have START set in the first frame, STOP
1714 * set in the last frame, and neither bit set in middle
1715 * frames of the packet.
1716 *
1717 * Therefore if we are at the beginning of a packet and
1718 * we don't see START, or we are in the middle of a fragmented
1719 * packet and do see START, we are unsynchronized and should
1720 * flush the RX queue.
1721 */
1722 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1723 (first_frag != NULL && (p->env & LDC_START))) {
1724 if (!first_frag)
1725 new = rx_advance(lp, new);
1726
1727 err = rx_set_head(lp, new);
1728 if (err)
1729 break;
1730
1731 if (!first_frag)
1732 goto no_data;
1733 }
1734 if (!first_frag)
1735 first_frag = p;
1736
1737 if (pkt_len > size - copied) {
1738 /* User didn't give us a big enough buffer,
1739 * what to do? This is a pretty serious error.
1740 *
1741 * Since we haven't updated the RX ring head to
1742 * consume any of the packets, signal the error
1743 * to the user and just leave the RX ring alone.
1744 *
1745 * This seems the best behavior because this allows
1746 * a user of the LDC layer to start with a small
1747 * RX buffer for ldc_read() calls and use -EMSGSIZE
1748 * as a cue to enlarge it's read buffer.
1749 */
1750 err = -EMSGSIZE;
1751 break;
1752 }
1753
1754 /* Ok, we are gonna eat this one. */
1755 new = rx_advance(lp, new);
1756
1757 memcpy(buf,
1758 (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1759 p->u.u_data : p->u.r.r_data), pkt_len);
1760 buf += pkt_len;
1761 copied += pkt_len;
1762
1763 if (p->env & LDC_STOP)
1764 break;
1765
1766no_data:
1767 if (new == lp->rx_tail) {
1768 err = rx_data_wait(lp, new);
1769 if (err)
1770 break;
1771 }
1772 }
1773
1774 if (!err)
1775 err = rx_set_head(lp, new);
1776
1777 if (err && first_frag)
1778 lp->rcv_nxt = first_frag->seqid - 1;
1779
1780 if (!err) {
1781 err = copied;
1782 if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1783 send_data_ack(lp);
1784 }
1785
1786 return err;
1787}
1788
1789static const struct ldc_mode_ops nonraw_ops = {
1790 .write = write_nonraw,
1791 .read = read_nonraw,
1792};
1793
1794static int write_stream(struct ldc_channel *lp, const void *buf,
1795 unsigned int size)
1796{
1797 if (size > lp->cfg.mtu)
1798 size = lp->cfg.mtu;
1799 return write_nonraw(lp, buf, size);
1800}
1801
1802static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1803{
1804 if (!lp->mssbuf_len) {
1805 int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1806 if (err < 0)
1807 return err;
1808
1809 lp->mssbuf_len = err;
1810 lp->mssbuf_off = 0;
1811 }
1812
1813 if (size > lp->mssbuf_len)
1814 size = lp->mssbuf_len;
1815 memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1816
1817 lp->mssbuf_off += size;
1818 lp->mssbuf_len -= size;
1819
1820 return size;
1821}
1822
1823static const struct ldc_mode_ops stream_ops = {
1824 .write = write_stream,
1825 .read = read_stream,
1826};
1827
1828int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1829{
1830 unsigned long flags;
1831 int err;
1832
1833 if (!buf)
1834 return -EINVAL;
1835
1836 if (!size)
1837 return 0;
1838
1839 spin_lock_irqsave(&lp->lock, flags);
1840
1841 if (lp->hs_state != LDC_HS_COMPLETE)
1842 err = -ENOTCONN;
1843 else
1844 err = lp->mops->write(lp, buf, size);
1845
1846 spin_unlock_irqrestore(&lp->lock, flags);
1847
1848 return err;
1849}
1850EXPORT_SYMBOL(ldc_write);
1851
1852int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1853{
1854 unsigned long flags;
1855 int err;
1856
1857 if (!buf)
1858 return -EINVAL;
1859
1860 if (!size)
1861 return 0;
1862
1863 spin_lock_irqsave(&lp->lock, flags);
1864
1865 if (lp->hs_state != LDC_HS_COMPLETE)
1866 err = -ENOTCONN;
1867 else
1868 err = lp->mops->read(lp, buf, size);
1869
1870 spin_unlock_irqrestore(&lp->lock, flags);
1871
1872 return err;
1873}
1874EXPORT_SYMBOL(ldc_read);
1875
1876static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1877{
1878 struct iommu_arena *arena = &iommu->arena;
1879 unsigned long n, i, start, end, limit;
1880 int pass;
1881
1882 limit = arena->limit;
1883 start = arena->hint;
1884 pass = 0;
1885
1886again:
1887 n = find_next_zero_bit(arena->map, limit, start);
1888 end = n + npages;
1889 if (unlikely(end >= limit)) {
1890 if (likely(pass < 1)) {
1891 limit = start;
1892 start = 0;
1893 pass++;
1894 goto again;
1895 } else {
1896 /* Scanned the whole thing, give up. */
1897 return -1;
1898 }
1899 }
1900
1901 for (i = n; i < end; i++) {
1902 if (test_bit(i, arena->map)) {
1903 start = i + 1;
1904 goto again;
1905 }
1906 }
1907
1908 for (i = n; i < end; i++)
1909 __set_bit(i, arena->map);
1910
1911 arena->hint = end;
1912
1913 return n;
1914}
1915
1916#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
1917#define COOKIE_PGSZ_CODE_SHIFT 60ULL
1918
1919static u64 pagesize_code(void)
1920{
1921 switch (PAGE_SIZE) {
1922 default:
1923 case (8ULL * 1024ULL):
1924 return 0;
1925 case (64ULL * 1024ULL):
1926 return 1;
1927 case (512ULL * 1024ULL):
1928 return 2;
1929 case (4ULL * 1024ULL * 1024ULL):
1930 return 3;
1931 case (32ULL * 1024ULL * 1024ULL):
1932 return 4;
1933 case (256ULL * 1024ULL * 1024ULL):
1934 return 5;
1935 }
1936}
1937
1938static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1939{
1940 return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1941 (index << PAGE_SHIFT) |
1942 page_offset);
1943}
1944
1945static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1946{
1947 u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1948
1949 cookie &= ~COOKIE_PGSZ_CODE;
1950
1951 *shift = szcode * 3;
1952
1953 return (cookie >> (13ULL + (szcode * 3ULL)));
1954}
1955
1956static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1957 unsigned long npages)
1958{
1959 long entry;
1960
1961 entry = arena_alloc(iommu, npages);
1962 if (unlikely(entry < 0))
1963 return NULL;
1964
1965 return iommu->page_table + entry;
1966}
1967
1968static u64 perm_to_mte(unsigned int map_perm)
1969{
1970 u64 mte_base;
1971
1972 mte_base = pagesize_code();
1973
1974 if (map_perm & LDC_MAP_SHADOW) {
1975 if (map_perm & LDC_MAP_R)
1976 mte_base |= LDC_MTE_COPY_R;
1977 if (map_perm & LDC_MAP_W)
1978 mte_base |= LDC_MTE_COPY_W;
1979 }
1980 if (map_perm & LDC_MAP_DIRECT) {
1981 if (map_perm & LDC_MAP_R)
1982 mte_base |= LDC_MTE_READ;
1983 if (map_perm & LDC_MAP_W)
1984 mte_base |= LDC_MTE_WRITE;
1985 if (map_perm & LDC_MAP_X)
1986 mte_base |= LDC_MTE_EXEC;
1987 }
1988 if (map_perm & LDC_MAP_IO) {
1989 if (map_perm & LDC_MAP_R)
1990 mte_base |= LDC_MTE_IOMMU_R;
1991 if (map_perm & LDC_MAP_W)
1992 mte_base |= LDC_MTE_IOMMU_W;
1993 }
1994
1995 return mte_base;
1996}
1997
1998static int pages_in_region(unsigned long base, long len)
1999{
2000 int count = 0;
2001
2002 do {
2003 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2004
2005 len -= (new - base);
2006 base = new;
2007 count++;
2008 } while (len > 0);
2009
2010 return count;
2011}
2012
2013struct cookie_state {
2014 struct ldc_mtable_entry *page_table;
2015 struct ldc_trans_cookie *cookies;
2016 u64 mte_base;
2017 u64 prev_cookie;
2018 u32 pte_idx;
2019 u32 nc;
2020};
2021
2022static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2023 unsigned long off, unsigned long len)
2024{
2025 do {
2026 unsigned long tlen, new = pa + PAGE_SIZE;
2027 u64 this_cookie;
2028
2029 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2030
2031 tlen = PAGE_SIZE;
2032 if (off)
2033 tlen = PAGE_SIZE - off;
2034 if (tlen > len)
2035 tlen = len;
2036
2037 this_cookie = make_cookie(sp->pte_idx,
2038 pagesize_code(), off);
2039
2040 off = 0;
2041
2042 if (this_cookie == sp->prev_cookie) {
2043 sp->cookies[sp->nc - 1].cookie_size += tlen;
2044 } else {
2045 sp->cookies[sp->nc].cookie_addr = this_cookie;
2046 sp->cookies[sp->nc].cookie_size = tlen;
2047 sp->nc++;
2048 }
2049 sp->prev_cookie = this_cookie + tlen;
2050
2051 sp->pte_idx++;
2052
2053 len -= tlen;
2054 pa = new;
2055 } while (len > 0);
2056}
2057
2058static int sg_count_one(struct scatterlist *sg)
2059{
2060 unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT;
2061 long len = sg->length;
2062
2063 if ((sg->offset | len) & (8UL - 1))
2064 return -EFAULT;
2065
2066 return pages_in_region(base + sg->offset, len);
2067}
2068
2069static int sg_count_pages(struct scatterlist *sg, int num_sg)
2070{
2071 int count;
2072 int i;
2073
2074 count = 0;
2075 for (i = 0; i < num_sg; i++) {
2076 int err = sg_count_one(sg + i);
2077 if (err < 0)
2078 return err;
2079 count += err;
2080 }
2081
2082 return count;
2083}
2084
2085int ldc_map_sg(struct ldc_channel *lp,
2086 struct scatterlist *sg, int num_sg,
2087 struct ldc_trans_cookie *cookies, int ncookies,
2088 unsigned int map_perm)
2089{
2090 unsigned long i, npages, flags;
2091 struct ldc_mtable_entry *base;
2092 struct cookie_state state;
2093 struct ldc_iommu *iommu;
2094 int err;
2095
2096 if (map_perm & ~LDC_MAP_ALL)
2097 return -EINVAL;
2098
2099 err = sg_count_pages(sg, num_sg);
2100 if (err < 0)
2101 return err;
2102
2103 npages = err;
2104 if (err > ncookies)
2105 return -EMSGSIZE;
2106
2107 iommu = &lp->iommu;
2108
2109 spin_lock_irqsave(&iommu->lock, flags);
2110 base = alloc_npages(iommu, npages);
2111 spin_unlock_irqrestore(&iommu->lock, flags);
2112
2113 if (!base)
2114 return -ENOMEM;
2115
2116 state.page_table = iommu->page_table;
2117 state.cookies = cookies;
2118 state.mte_base = perm_to_mte(map_perm);
2119 state.prev_cookie = ~(u64)0;
2120 state.pte_idx = (base - iommu->page_table);
2121 state.nc = 0;
2122
2123 for (i = 0; i < num_sg; i++)
2124 fill_cookies(&state, page_to_pfn(sg[i].page) << PAGE_SHIFT,
2125 sg[i].offset, sg[i].length);
2126
2127 return state.nc;
2128}
2129EXPORT_SYMBOL(ldc_map_sg);
2130
2131int ldc_map_single(struct ldc_channel *lp,
2132 void *buf, unsigned int len,
2133 struct ldc_trans_cookie *cookies, int ncookies,
2134 unsigned int map_perm)
2135{
2136 unsigned long npages, pa, flags;
2137 struct ldc_mtable_entry *base;
2138 struct cookie_state state;
2139 struct ldc_iommu *iommu;
2140
2141 if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2142 return -EINVAL;
2143
2144 pa = __pa(buf);
2145 if ((pa | len) & (8UL - 1))
2146 return -EFAULT;
2147
2148 npages = pages_in_region(pa, len);
2149
2150 iommu = &lp->iommu;
2151
2152 spin_lock_irqsave(&iommu->lock, flags);
2153 base = alloc_npages(iommu, npages);
2154 spin_unlock_irqrestore(&iommu->lock, flags);
2155
2156 if (!base)
2157 return -ENOMEM;
2158
2159 state.page_table = iommu->page_table;
2160 state.cookies = cookies;
2161 state.mte_base = perm_to_mte(map_perm);
2162 state.prev_cookie = ~(u64)0;
2163 state.pte_idx = (base - iommu->page_table);
2164 state.nc = 0;
2165 fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2166 BUG_ON(state.nc != 1);
2167
2168 return state.nc;
2169}
2170EXPORT_SYMBOL(ldc_map_single);
2171
2172static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2173 u64 cookie, u64 size)
2174{
2175 struct iommu_arena *arena = &iommu->arena;
2176 unsigned long i, shift, index, npages;
2177 struct ldc_mtable_entry *base;
2178
2179 npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2180 index = cookie_to_index(cookie, &shift);
2181 base = iommu->page_table + index;
2182
2183 BUG_ON(index > arena->limit ||
2184 (index + npages) > arena->limit);
2185
2186 for (i = 0; i < npages; i++) {
2187 if (base->cookie)
2188 sun4v_ldc_revoke(id, cookie + (i << shift),
2189 base->cookie);
2190 base->mte = 0;
2191 __clear_bit(index + i, arena->map);
2192 }
2193}
2194
2195void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2196 int ncookies)
2197{
2198 struct ldc_iommu *iommu = &lp->iommu;
2199 unsigned long flags;
2200 int i;
2201
2202 spin_lock_irqsave(&iommu->lock, flags);
2203 for (i = 0; i < ncookies; i++) {
2204 u64 addr = cookies[i].cookie_addr;
2205 u64 size = cookies[i].cookie_size;
2206
2207 free_npages(lp->id, iommu, addr, size);
2208 }
2209 spin_unlock_irqrestore(&iommu->lock, flags);
2210}
2211EXPORT_SYMBOL(ldc_unmap);
2212
2213int ldc_copy(struct ldc_channel *lp, int copy_dir,
2214 void *buf, unsigned int len, unsigned long offset,
2215 struct ldc_trans_cookie *cookies, int ncookies)
2216{
2217 unsigned int orig_len;
2218 unsigned long ra;
2219 int i;
2220
2221 if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2222 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2223 lp->id, copy_dir);
2224 return -EINVAL;
2225 }
2226
2227 ra = __pa(buf);
2228 if ((ra | len | offset) & (8UL - 1)) {
2229 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2230 "ra[%lx] len[%x] offset[%lx]\n",
2231 lp->id, ra, len, offset);
2232 return -EFAULT;
2233 }
2234
2235 if (lp->hs_state != LDC_HS_COMPLETE ||
2236 (lp->flags & LDC_FLAG_RESET)) {
2237 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2238 "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2239 return -ECONNRESET;
2240 }
2241
2242 orig_len = len;
2243 for (i = 0; i < ncookies; i++) {
2244 unsigned long cookie_raddr = cookies[i].cookie_addr;
2245 unsigned long this_len = cookies[i].cookie_size;
2246 unsigned long actual_len;
2247
2248 if (unlikely(offset)) {
2249 unsigned long this_off = offset;
2250
2251 if (this_off > this_len)
2252 this_off = this_len;
2253
2254 offset -= this_off;
2255 this_len -= this_off;
2256 if (!this_len)
2257 continue;
2258 cookie_raddr += this_off;
2259 }
2260
2261 if (this_len > len)
2262 this_len = len;
2263
2264 while (1) {
2265 unsigned long hv_err;
2266
2267 hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2268 cookie_raddr, ra,
2269 this_len, &actual_len);
2270 if (unlikely(hv_err)) {
2271 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2272 "HV error %lu\n",
2273 lp->id, hv_err);
2274 if (lp->hs_state != LDC_HS_COMPLETE ||
2275 (lp->flags & LDC_FLAG_RESET))
2276 return -ECONNRESET;
2277 else
2278 return -EFAULT;
2279 }
2280
2281 cookie_raddr += actual_len;
2282 ra += actual_len;
2283 len -= actual_len;
2284 if (actual_len == this_len)
2285 break;
2286
2287 this_len -= actual_len;
2288 }
2289
2290 if (!len)
2291 break;
2292 }
2293
2294 /* It is caller policy what to do about short copies.
2295 * For example, a networking driver can declare the
2296 * packet a runt and drop it.
2297 */
2298
2299 return orig_len - len;
2300}
2301EXPORT_SYMBOL(ldc_copy);
2302
2303void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2304 struct ldc_trans_cookie *cookies, int *ncookies,
2305 unsigned int map_perm)
2306{
2307 void *buf;
2308 int err;
2309
2310 if (len & (8UL - 1))
2311 return ERR_PTR(-EINVAL);
2312
2313 buf = kzalloc(len, GFP_KERNEL);
2314 if (!buf)
2315 return ERR_PTR(-ENOMEM);
2316
2317 err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2318 if (err < 0) {
2319 kfree(buf);
2320 return ERR_PTR(err);
2321 }
2322 *ncookies = err;
2323
2324 return buf;
2325}
2326EXPORT_SYMBOL(ldc_alloc_exp_dring);
2327
2328void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2329 struct ldc_trans_cookie *cookies, int ncookies)
2330{
2331 ldc_unmap(lp, cookies, ncookies);
2332 kfree(buf);
2333}
2334EXPORT_SYMBOL(ldc_free_exp_dring);
2335
2336static int __init ldc_init(void)
2337{
2338 unsigned long major, minor;
2339 struct mdesc_handle *hp;
2340 const u64 *v;
2341 u64 mp;
2342
2343 hp = mdesc_grab();
2344 if (!hp)
2345 return -ENODEV;
2346
2347 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2348 if (mp == MDESC_NODE_NULL)
2349 return -ENODEV;
2350
2351 v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2352 if (!v)
2353 return -ENODEV;
2354
2355 major = 1;
2356 minor = 0;
2357 if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2358 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2359 return -ENODEV;
2360 }
2361
2362 printk(KERN_INFO "%s", version);
2363
2364 if (!*v) {
2365 printk(KERN_INFO PFX "Domaining disabled.\n");
2366 return -ENODEV;
2367 }
2368 ldom_domaining_enabled = 1;
2369
2370 return 0;
2371}
2372
2373core_initcall(ldc_init);
diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c
index f0e16045fb16..62a389793949 100644
--- a/arch/sparc64/kernel/mdesc.c
+++ b/arch/sparc64/kernel/mdesc.c
@@ -6,6 +6,9 @@
6#include <linux/types.h> 6#include <linux/types.h>
7#include <linux/bootmem.h> 7#include <linux/bootmem.h>
8#include <linux/log2.h> 8#include <linux/log2.h>
9#include <linux/list.h>
10#include <linux/slab.h>
11#include <linux/mm.h>
9 12
10#include <asm/hypervisor.h> 13#include <asm/hypervisor.h>
11#include <asm/mdesc.h> 14#include <asm/mdesc.h>
@@ -29,7 +32,7 @@ struct mdesc_hdr {
29 u32 node_sz; /* node block size */ 32 u32 node_sz; /* node block size */
30 u32 name_sz; /* name block size */ 33 u32 name_sz; /* name block size */
31 u32 data_sz; /* data block size */ 34 u32 data_sz; /* data block size */
32}; 35} __attribute__((aligned(16)));
33 36
34struct mdesc_elem { 37struct mdesc_elem {
35 u8 tag; 38 u8 tag;
@@ -53,306 +56,402 @@ struct mdesc_elem {
53 } d; 56 } d;
54}; 57};
55 58
56static struct mdesc_hdr *main_mdesc; 59struct mdesc_mem_ops {
57static struct mdesc_node *allnodes; 60 struct mdesc_handle *(*alloc)(unsigned int mdesc_size);
58 61 void (*free)(struct mdesc_handle *handle);
59static struct mdesc_node *allnodes_tail; 62};
60static unsigned int unique_id;
61 63
62static struct mdesc_node **mdesc_hash; 64struct mdesc_handle {
63static unsigned int mdesc_hash_size; 65 struct list_head list;
66 struct mdesc_mem_ops *mops;
67 void *self_base;
68 atomic_t refcnt;
69 unsigned int handle_size;
70 struct mdesc_hdr mdesc;
71};
64 72
65static inline unsigned int node_hashfn(u64 node) 73static void mdesc_handle_init(struct mdesc_handle *hp,
74 unsigned int handle_size,
75 void *base)
66{ 76{
67 return ((unsigned int) (node ^ (node >> 8) ^ (node >> 16))) 77 BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1));
68 & (mdesc_hash_size - 1); 78
79 memset(hp, 0, handle_size);
80 INIT_LIST_HEAD(&hp->list);
81 hp->self_base = base;
82 atomic_set(&hp->refcnt, 1);
83 hp->handle_size = handle_size;
69} 84}
70 85
71static inline void hash_node(struct mdesc_node *mp) 86static struct mdesc_handle *mdesc_bootmem_alloc(unsigned int mdesc_size)
72{ 87{
73 struct mdesc_node **head = &mdesc_hash[node_hashfn(mp->node)]; 88 struct mdesc_handle *hp;
89 unsigned int handle_size, alloc_size;
74 90
75 mp->hash_next = *head; 91 handle_size = (sizeof(struct mdesc_handle) -
76 *head = mp; 92 sizeof(struct mdesc_hdr) +
93 mdesc_size);
94 alloc_size = PAGE_ALIGN(handle_size);
77 95
78 if (allnodes_tail) { 96 hp = __alloc_bootmem(alloc_size, PAGE_SIZE, 0UL);
79 allnodes_tail->allnodes_next = mp; 97 if (hp)
80 allnodes_tail = mp; 98 mdesc_handle_init(hp, handle_size, hp);
81 } else { 99
82 allnodes = allnodes_tail = mp; 100 return hp;
83 }
84} 101}
85 102
86static struct mdesc_node *find_node(u64 node) 103static void mdesc_bootmem_free(struct mdesc_handle *hp)
87{ 104{
88 struct mdesc_node *mp = mdesc_hash[node_hashfn(node)]; 105 unsigned int alloc_size, handle_size = hp->handle_size;
106 unsigned long start, end;
107
108 BUG_ON(atomic_read(&hp->refcnt) != 0);
109 BUG_ON(!list_empty(&hp->list));
89 110
90 while (mp) { 111 alloc_size = PAGE_ALIGN(handle_size);
91 if (mp->node == node)
92 return mp;
93 112
94 mp = mp->hash_next; 113 start = (unsigned long) hp;
114 end = start + alloc_size;
115
116 while (start < end) {
117 struct page *p;
118
119 p = virt_to_page(start);
120 ClearPageReserved(p);
121 __free_page(p);
122 start += PAGE_SIZE;
95 } 123 }
96 return NULL;
97} 124}
98 125
99struct property *md_find_property(const struct mdesc_node *mp, 126static struct mdesc_mem_ops bootmem_mdesc_memops = {
100 const char *name, 127 .alloc = mdesc_bootmem_alloc,
101 int *lenp) 128 .free = mdesc_bootmem_free,
129};
130
131static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
102{ 132{
103 struct property *pp; 133 unsigned int handle_size;
134 void *base;
104 135
105 for (pp = mp->properties; pp != 0; pp = pp->next) { 136 handle_size = (sizeof(struct mdesc_handle) -
106 if (strcasecmp(pp->name, name) == 0) { 137 sizeof(struct mdesc_hdr) +
107 if (lenp) 138 mdesc_size);
108 *lenp = pp->length; 139
109 break; 140 base = kmalloc(handle_size + 15, GFP_KERNEL);
110 } 141 if (base) {
142 struct mdesc_handle *hp;
143 unsigned long addr;
144
145 addr = (unsigned long)base;
146 addr = (addr + 15UL) & ~15UL;
147 hp = (struct mdesc_handle *) addr;
148
149 mdesc_handle_init(hp, handle_size, base);
150 return hp;
111 } 151 }
112 return pp; 152
153 return NULL;
113} 154}
114EXPORT_SYMBOL(md_find_property);
115 155
116/* 156static void mdesc_kfree(struct mdesc_handle *hp)
117 * Find a property with a given name for a given node
118 * and return the value.
119 */
120const void *md_get_property(const struct mdesc_node *mp, const char *name,
121 int *lenp)
122{ 157{
123 struct property *pp = md_find_property(mp, name, lenp); 158 BUG_ON(atomic_read(&hp->refcnt) != 0);
124 return pp ? pp->value : NULL; 159 BUG_ON(!list_empty(&hp->list));
160
161 kfree(hp->self_base);
125} 162}
126EXPORT_SYMBOL(md_get_property);
127 163
128struct mdesc_node *md_find_node_by_name(struct mdesc_node *from, 164static struct mdesc_mem_ops kmalloc_mdesc_memops = {
129 const char *name) 165 .alloc = mdesc_kmalloc,
166 .free = mdesc_kfree,
167};
168
169static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size,
170 struct mdesc_mem_ops *mops)
130{ 171{
131 struct mdesc_node *mp; 172 struct mdesc_handle *hp = mops->alloc(mdesc_size);
132 173
133 mp = from ? from->allnodes_next : allnodes; 174 if (hp)
134 for (; mp != NULL; mp = mp->allnodes_next) { 175 hp->mops = mops;
135 if (strcmp(mp->name, name) == 0)
136 break;
137 }
138 return mp;
139}
140EXPORT_SYMBOL(md_find_node_by_name);
141 176
142static unsigned int mdesc_early_allocated; 177 return hp;
178}
143 179
144static void * __init mdesc_early_alloc(unsigned long size) 180static void mdesc_free(struct mdesc_handle *hp)
145{ 181{
146 void *ret; 182 hp->mops->free(hp);
183}
147 184
148 ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL); 185static struct mdesc_handle *cur_mdesc;
149 if (ret == NULL) { 186static LIST_HEAD(mdesc_zombie_list);
150 prom_printf("MDESC: alloc of %lu bytes failed.\n", size); 187static DEFINE_SPINLOCK(mdesc_lock);
151 prom_halt();
152 }
153 188
154 memset(ret, 0, size); 189struct mdesc_handle *mdesc_grab(void)
190{
191 struct mdesc_handle *hp;
192 unsigned long flags;
155 193
156 mdesc_early_allocated += size; 194 spin_lock_irqsave(&mdesc_lock, flags);
195 hp = cur_mdesc;
196 if (hp)
197 atomic_inc(&hp->refcnt);
198 spin_unlock_irqrestore(&mdesc_lock, flags);
157 199
158 return ret; 200 return hp;
159} 201}
202EXPORT_SYMBOL(mdesc_grab);
160 203
161static unsigned int __init count_arcs(struct mdesc_elem *ep) 204void mdesc_release(struct mdesc_handle *hp)
162{ 205{
163 unsigned int ret = 0; 206 unsigned long flags;
164 207
165 ep++; 208 spin_lock_irqsave(&mdesc_lock, flags);
166 while (ep->tag != MD_NODE_END) { 209 if (atomic_dec_and_test(&hp->refcnt)) {
167 if (ep->tag == MD_PROP_ARC) 210 list_del_init(&hp->list);
168 ret++; 211 hp->mops->free(hp);
169 ep++;
170 } 212 }
171 return ret; 213 spin_unlock_irqrestore(&mdesc_lock, flags);
172} 214}
215EXPORT_SYMBOL(mdesc_release);
173 216
174static void __init mdesc_node_alloc(u64 node, struct mdesc_elem *ep, const char *names) 217static void do_mdesc_update(struct work_struct *work)
175{ 218{
176 unsigned int num_arcs = count_arcs(ep); 219 unsigned long len, real_len, status;
177 struct mdesc_node *mp; 220 struct mdesc_handle *hp, *orig_hp;
221 unsigned long flags;
222
223 (void) sun4v_mach_desc(0UL, 0UL, &len);
224
225 hp = mdesc_alloc(len, &kmalloc_mdesc_memops);
226 if (!hp) {
227 printk(KERN_ERR "MD: mdesc alloc fails\n");
228 return;
229 }
230
231 status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
232 if (status != HV_EOK || real_len > len) {
233 printk(KERN_ERR "MD: mdesc reread fails with %lu\n",
234 status);
235 atomic_dec(&hp->refcnt);
236 mdesc_free(hp);
237 return;
238 }
178 239
179 mp = mdesc_early_alloc(sizeof(*mp) + 240 spin_lock_irqsave(&mdesc_lock, flags);
180 (num_arcs * sizeof(struct mdesc_arc))); 241 orig_hp = cur_mdesc;
181 mp->name = names + ep->name_offset; 242 cur_mdesc = hp;
182 mp->node = node;
183 mp->unique_id = unique_id++;
184 mp->num_arcs = num_arcs;
185 243
186 hash_node(mp); 244 if (atomic_dec_and_test(&orig_hp->refcnt))
245 mdesc_free(orig_hp);
246 else
247 list_add(&orig_hp->list, &mdesc_zombie_list);
248 spin_unlock_irqrestore(&mdesc_lock, flags);
187} 249}
188 250
189static inline struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) 251static DECLARE_WORK(mdesc_update_work, do_mdesc_update);
252
253void mdesc_update(void)
254{
255 schedule_work(&mdesc_update_work);
256}
257
258static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc)
190{ 259{
191 return (struct mdesc_elem *) (mdesc + 1); 260 return (struct mdesc_elem *) (mdesc + 1);
192} 261}
193 262
194static inline void *name_block(struct mdesc_hdr *mdesc) 263static void *name_block(struct mdesc_hdr *mdesc)
195{ 264{
196 return ((void *) node_block(mdesc)) + mdesc->node_sz; 265 return ((void *) node_block(mdesc)) + mdesc->node_sz;
197} 266}
198 267
199static inline void *data_block(struct mdesc_hdr *mdesc) 268static void *data_block(struct mdesc_hdr *mdesc)
200{ 269{
201 return ((void *) name_block(mdesc)) + mdesc->name_sz; 270 return ((void *) name_block(mdesc)) + mdesc->name_sz;
202} 271}
203 272
204/* In order to avoid recursion (the graph can be very deep) we use a 273u64 mdesc_node_by_name(struct mdesc_handle *hp,
205 * two pass algorithm. First we allocate all the nodes and hash them. 274 u64 from_node, const char *name)
206 * Then we iterate over each node, filling in the arcs and properties.
207 */
208static void __init build_all_nodes(struct mdesc_hdr *mdesc)
209{ 275{
210 struct mdesc_elem *start, *ep; 276 struct mdesc_elem *ep = node_block(&hp->mdesc);
211 struct mdesc_node *mp; 277 const char *names = name_block(&hp->mdesc);
212 const char *names; 278 u64 last_node = hp->mdesc.node_sz / 16;
213 void *data; 279 u64 ret;
214 u64 last_node; 280
215 281 if (from_node == MDESC_NODE_NULL)
216 start = ep = node_block(mdesc); 282 from_node = 0;
217 last_node = mdesc->node_sz / 16; 283
284 if (from_node >= last_node)
285 return MDESC_NODE_NULL;
286
287 ret = ep[from_node].d.val;
288 while (ret < last_node) {
289 if (ep[ret].tag != MD_NODE)
290 return MDESC_NODE_NULL;
291 if (!strcmp(names + ep[ret].name_offset, name))
292 break;
293 ret = ep[ret].d.val;
294 }
295 if (ret >= last_node)
296 ret = MDESC_NODE_NULL;
297 return ret;
298}
299EXPORT_SYMBOL(mdesc_node_by_name);
218 300
219 names = name_block(mdesc); 301const void *mdesc_get_property(struct mdesc_handle *hp, u64 node,
302 const char *name, int *lenp)
303{
304 const char *names = name_block(&hp->mdesc);
305 u64 last_node = hp->mdesc.node_sz / 16;
306 void *data = data_block(&hp->mdesc);
307 struct mdesc_elem *ep;
220 308
221 while (1) { 309 if (node == MDESC_NODE_NULL || node >= last_node)
222 u64 node = ep - start; 310 return NULL;
223 311
224 if (ep->tag == MD_LIST_END) 312 ep = node_block(&hp->mdesc) + node;
313 ep++;
314 for (; ep->tag != MD_NODE_END; ep++) {
315 void *val = NULL;
316 int len = 0;
317
318 switch (ep->tag) {
319 case MD_PROP_VAL:
320 val = &ep->d.val;
321 len = 8;
225 break; 322 break;
226 323
227 if (ep->tag != MD_NODE) { 324 case MD_PROP_STR:
228 prom_printf("MDESC: Inconsistent element list.\n"); 325 case MD_PROP_DATA:
229 prom_halt(); 326 val = data + ep->d.data.data_offset;
230 } 327 len = ep->d.data.data_len;
231 328 break;
232 mdesc_node_alloc(node, ep, names);
233 329
234 if (ep->d.val >= last_node) { 330 default:
235 printk("MDESC: Warning, early break out of node scan.\n");
236 printk("MDESC: Next node [%lu] last_node [%lu].\n",
237 node, last_node);
238 break; 331 break;
239 } 332 }
333 if (!val)
334 continue;
240 335
241 ep = start + ep->d.val; 336 if (!strcmp(names + ep->name_offset, name)) {
337 if (lenp)
338 *lenp = len;
339 return val;
340 }
242 } 341 }
243 342
244 data = data_block(mdesc); 343 return NULL;
245 for (mp = allnodes; mp; mp = mp->allnodes_next) { 344}
246 struct mdesc_elem *ep = start + mp->node; 345EXPORT_SYMBOL(mdesc_get_property);
247 struct property **link = &mp->properties;
248 unsigned int this_arc = 0;
249
250 ep++;
251 while (ep->tag != MD_NODE_END) {
252 switch (ep->tag) {
253 case MD_PROP_ARC: {
254 struct mdesc_node *target;
255
256 if (this_arc >= mp->num_arcs) {
257 prom_printf("MDESC: ARC overrun [%u:%u]\n",
258 this_arc, mp->num_arcs);
259 prom_halt();
260 }
261 target = find_node(ep->d.val);
262 if (!target) {
263 printk("MDESC: Warning, arc points to "
264 "missing node, ignoring.\n");
265 break;
266 }
267 mp->arcs[this_arc].name =
268 (names + ep->name_offset);
269 mp->arcs[this_arc].arc = target;
270 this_arc++;
271 break;
272 }
273 346
274 case MD_PROP_VAL: 347u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type)
275 case MD_PROP_STR: 348{
276 case MD_PROP_DATA: { 349 struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
277 struct property *p = mdesc_early_alloc(sizeof(*p)); 350 const char *names = name_block(&hp->mdesc);
278 351 u64 last_node = hp->mdesc.node_sz / 16;
279 p->unique_id = unique_id++;
280 p->name = (char *) names + ep->name_offset;
281 if (ep->tag == MD_PROP_VAL) {
282 p->value = &ep->d.val;
283 p->length = 8;
284 } else {
285 p->value = data + ep->d.data.data_offset;
286 p->length = ep->d.data.data_len;
287 }
288 *link = p;
289 link = &p->next;
290 break;
291 }
292 352
293 case MD_NOOP: 353 if (from == MDESC_NODE_NULL || from >= last_node)
294 break; 354 return MDESC_NODE_NULL;
295 355
296 default: 356 ep = base + from;
297 printk("MDESC: Warning, ignoring unknown tag type %02x\n", 357
298 ep->tag); 358 ep++;
299 } 359 for (; ep->tag != MD_NODE_END; ep++) {
300 ep++; 360 if (ep->tag != MD_PROP_ARC)
301 } 361 continue;
362
363 if (strcmp(names + ep->name_offset, arc_type))
364 continue;
365
366 return ep - base;
302 } 367 }
368
369 return MDESC_NODE_NULL;
303} 370}
371EXPORT_SYMBOL(mdesc_next_arc);
304 372
305static unsigned int __init count_nodes(struct mdesc_hdr *mdesc) 373u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc)
306{ 374{
307 struct mdesc_elem *ep = node_block(mdesc); 375 struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
308 struct mdesc_elem *end; 376
309 unsigned int cnt = 0; 377 ep = base + arc;
310 378
311 end = ((void *)ep) + mdesc->node_sz; 379 return ep->d.val;
312 while (ep < end) { 380}
313 if (ep->tag == MD_NODE) 381EXPORT_SYMBOL(mdesc_arc_target);
314 cnt++; 382
315 ep++; 383const char *mdesc_node_name(struct mdesc_handle *hp, u64 node)
316 } 384{
317 return cnt; 385 struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
386 const char *names = name_block(&hp->mdesc);
387 u64 last_node = hp->mdesc.node_sz / 16;
388
389 if (node == MDESC_NODE_NULL || node >= last_node)
390 return NULL;
391
392 ep = base + node;
393 if (ep->tag != MD_NODE)
394 return NULL;
395
396 return names + ep->name_offset;
318} 397}
398EXPORT_SYMBOL(mdesc_node_name);
319 399
320static void __init report_platform_properties(void) 400static void __init report_platform_properties(void)
321{ 401{
322 struct mdesc_node *pn = md_find_node_by_name(NULL, "platform"); 402 struct mdesc_handle *hp = mdesc_grab();
403 u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
323 const char *s; 404 const char *s;
324 const u64 *v; 405 const u64 *v;
325 406
326 if (!pn) { 407 if (pn == MDESC_NODE_NULL) {
327 prom_printf("No platform node in machine-description.\n"); 408 prom_printf("No platform node in machine-description.\n");
328 prom_halt(); 409 prom_halt();
329 } 410 }
330 411
331 s = md_get_property(pn, "banner-name", NULL); 412 s = mdesc_get_property(hp, pn, "banner-name", NULL);
332 printk("PLATFORM: banner-name [%s]\n", s); 413 printk("PLATFORM: banner-name [%s]\n", s);
333 s = md_get_property(pn, "name", NULL); 414 s = mdesc_get_property(hp, pn, "name", NULL);
334 printk("PLATFORM: name [%s]\n", s); 415 printk("PLATFORM: name [%s]\n", s);
335 416
336 v = md_get_property(pn, "hostid", NULL); 417 v = mdesc_get_property(hp, pn, "hostid", NULL);
337 if (v) 418 if (v)
338 printk("PLATFORM: hostid [%08lx]\n", *v); 419 printk("PLATFORM: hostid [%08lx]\n", *v);
339 v = md_get_property(pn, "serial#", NULL); 420 v = mdesc_get_property(hp, pn, "serial#", NULL);
340 if (v) 421 if (v)
341 printk("PLATFORM: serial# [%08lx]\n", *v); 422 printk("PLATFORM: serial# [%08lx]\n", *v);
342 v = md_get_property(pn, "stick-frequency", NULL); 423 v = mdesc_get_property(hp, pn, "stick-frequency", NULL);
343 printk("PLATFORM: stick-frequency [%08lx]\n", *v); 424 printk("PLATFORM: stick-frequency [%08lx]\n", *v);
344 v = md_get_property(pn, "mac-address", NULL); 425 v = mdesc_get_property(hp, pn, "mac-address", NULL);
345 if (v) 426 if (v)
346 printk("PLATFORM: mac-address [%lx]\n", *v); 427 printk("PLATFORM: mac-address [%lx]\n", *v);
347 v = md_get_property(pn, "watchdog-resolution", NULL); 428 v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL);
348 if (v) 429 if (v)
349 printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v); 430 printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v);
350 v = md_get_property(pn, "watchdog-max-timeout", NULL); 431 v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL);
351 if (v) 432 if (v)
352 printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v); 433 printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v);
353 v = md_get_property(pn, "max-cpus", NULL); 434 v = mdesc_get_property(hp, pn, "max-cpus", NULL);
354 if (v) 435 if (v)
355 printk("PLATFORM: max-cpus [%lu]\n", *v); 436 printk("PLATFORM: max-cpus [%lu]\n", *v);
437
438#ifdef CONFIG_SMP
439 {
440 int max_cpu, i;
441
442 if (v) {
443 max_cpu = *v;
444 if (max_cpu > NR_CPUS)
445 max_cpu = NR_CPUS;
446 } else {
447 max_cpu = NR_CPUS;
448 }
449 for (i = 0; i < max_cpu; i++)
450 cpu_set(i, cpu_possible_map);
451 }
452#endif
453
454 mdesc_release(hp);
356} 455}
357 456
358static int inline find_in_proplist(const char *list, const char *match, int len) 457static int inline find_in_proplist(const char *list, const char *match, int len)
@@ -369,15 +468,17 @@ static int inline find_in_proplist(const char *list, const char *match, int len)
369 return 0; 468 return 0;
370} 469}
371 470
372static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp) 471static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
472 struct mdesc_handle *hp,
473 u64 mp)
373{ 474{
374 const u64 *level = md_get_property(mp, "level", NULL); 475 const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
375 const u64 *size = md_get_property(mp, "size", NULL); 476 const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
376 const u64 *line_size = md_get_property(mp, "line-size", NULL); 477 const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL);
377 const char *type; 478 const char *type;
378 int type_len; 479 int type_len;
379 480
380 type = md_get_property(mp, "type", &type_len); 481 type = mdesc_get_property(hp, mp, "type", &type_len);
381 482
382 switch (*level) { 483 switch (*level) {
383 case 1: 484 case 1:
@@ -400,48 +501,45 @@ static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp)
400 } 501 }
401 502
402 if (*level == 1) { 503 if (*level == 1) {
403 unsigned int i; 504 u64 a;
404
405 for (i = 0; i < mp->num_arcs; i++) {
406 struct mdesc_node *t = mp->arcs[i].arc;
407 505
408 if (strcmp(mp->arcs[i].name, "fwd")) 506 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
409 continue; 507 u64 target = mdesc_arc_target(hp, a);
508 const char *name = mdesc_node_name(hp, target);
410 509
411 if (!strcmp(t->name, "cache")) 510 if (!strcmp(name, "cache"))
412 fill_in_one_cache(c, t); 511 fill_in_one_cache(c, hp, target);
413 } 512 }
414 } 513 }
415} 514}
416 515
417static void __init mark_core_ids(struct mdesc_node *mp, int core_id) 516static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
517 int core_id)
418{ 518{
419 unsigned int i; 519 u64 a;
420 520
421 for (i = 0; i < mp->num_arcs; i++) { 521 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
422 struct mdesc_node *t = mp->arcs[i].arc; 522 u64 t = mdesc_arc_target(hp, a);
523 const char *name;
423 const u64 *id; 524 const u64 *id;
424 525
425 if (strcmp(mp->arcs[i].name, "back")) 526 name = mdesc_node_name(hp, t);
426 continue; 527 if (!strcmp(name, "cpu")) {
427 528 id = mdesc_get_property(hp, t, "id", NULL);
428 if (!strcmp(t->name, "cpu")) {
429 id = md_get_property(t, "id", NULL);
430 if (*id < NR_CPUS) 529 if (*id < NR_CPUS)
431 cpu_data(*id).core_id = core_id; 530 cpu_data(*id).core_id = core_id;
432 } else { 531 } else {
433 unsigned int j; 532 u64 j;
434 533
435 for (j = 0; j < t->num_arcs; j++) { 534 mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
436 struct mdesc_node *n = t->arcs[j].arc; 535 u64 n = mdesc_arc_target(hp, j);
536 const char *n_name;
437 537
438 if (strcmp(t->arcs[j].name, "back")) 538 n_name = mdesc_node_name(hp, n);
539 if (strcmp(n_name, "cpu"))
439 continue; 540 continue;
440 541
441 if (strcmp(n->name, "cpu")) 542 id = mdesc_get_property(hp, n, "id", NULL);
442 continue;
443
444 id = md_get_property(n, "id", NULL);
445 if (*id < NR_CPUS) 543 if (*id < NR_CPUS)
446 cpu_data(*id).core_id = core_id; 544 cpu_data(*id).core_id = core_id;
447 } 545 }
@@ -449,78 +547,81 @@ static void __init mark_core_ids(struct mdesc_node *mp, int core_id)
449 } 547 }
450} 548}
451 549
452static void __init set_core_ids(void) 550static void __devinit set_core_ids(struct mdesc_handle *hp)
453{ 551{
454 struct mdesc_node *mp;
455 int idx; 552 int idx;
553 u64 mp;
456 554
457 idx = 1; 555 idx = 1;
458 md_for_each_node_by_name(mp, "cache") { 556 mdesc_for_each_node_by_name(hp, mp, "cache") {
459 const u64 *level = md_get_property(mp, "level", NULL); 557 const u64 *level;
460 const char *type; 558 const char *type;
461 int len; 559 int len;
462 560
561 level = mdesc_get_property(hp, mp, "level", NULL);
463 if (*level != 1) 562 if (*level != 1)
464 continue; 563 continue;
465 564
466 type = md_get_property(mp, "type", &len); 565 type = mdesc_get_property(hp, mp, "type", &len);
467 if (!find_in_proplist(type, "instn", len)) 566 if (!find_in_proplist(type, "instn", len))
468 continue; 567 continue;
469 568
470 mark_core_ids(mp, idx); 569 mark_core_ids(hp, mp, idx);
471 570
472 idx++; 571 idx++;
473 } 572 }
474} 573}
475 574
476static void __init mark_proc_ids(struct mdesc_node *mp, int proc_id) 575static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
576 int proc_id)
477{ 577{
478 int i; 578 u64 a;
479 579
480 for (i = 0; i < mp->num_arcs; i++) { 580 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
481 struct mdesc_node *t = mp->arcs[i].arc; 581 u64 t = mdesc_arc_target(hp, a);
582 const char *name;
482 const u64 *id; 583 const u64 *id;
483 584
484 if (strcmp(mp->arcs[i].name, "back")) 585 name = mdesc_node_name(hp, t);
485 continue; 586 if (strcmp(name, "cpu"))
486
487 if (strcmp(t->name, "cpu"))
488 continue; 587 continue;
489 588
490 id = md_get_property(t, "id", NULL); 589 id = mdesc_get_property(hp, t, "id", NULL);
491 if (*id < NR_CPUS) 590 if (*id < NR_CPUS)
492 cpu_data(*id).proc_id = proc_id; 591 cpu_data(*id).proc_id = proc_id;
493 } 592 }
494} 593}
495 594
496static void __init __set_proc_ids(const char *exec_unit_name) 595static void __devinit __set_proc_ids(struct mdesc_handle *hp,
596 const char *exec_unit_name)
497{ 597{
498 struct mdesc_node *mp;
499 int idx; 598 int idx;
599 u64 mp;
500 600
501 idx = 0; 601 idx = 0;
502 md_for_each_node_by_name(mp, exec_unit_name) { 602 mdesc_for_each_node_by_name(hp, mp, exec_unit_name) {
503 const char *type; 603 const char *type;
504 int len; 604 int len;
505 605
506 type = md_get_property(mp, "type", &len); 606 type = mdesc_get_property(hp, mp, "type", &len);
507 if (!find_in_proplist(type, "int", len) && 607 if (!find_in_proplist(type, "int", len) &&
508 !find_in_proplist(type, "integer", len)) 608 !find_in_proplist(type, "integer", len))
509 continue; 609 continue;
510 610
511 mark_proc_ids(mp, idx); 611 mark_proc_ids(hp, mp, idx);
512 612
513 idx++; 613 idx++;
514 } 614 }
515} 615}
516 616
517static void __init set_proc_ids(void) 617static void __devinit set_proc_ids(struct mdesc_handle *hp)
518{ 618{
519 __set_proc_ids("exec_unit"); 619 __set_proc_ids(hp, "exec_unit");
520 __set_proc_ids("exec-unit"); 620 __set_proc_ids(hp, "exec-unit");
521} 621}
522 622
523static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def) 623static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
624 unsigned char def)
524{ 625{
525 u64 val; 626 u64 val;
526 627
@@ -538,35 +639,37 @@ use_default:
538 *mask = ((1U << def) * 64U) - 1U; 639 *mask = ((1U << def) * 64U) - 1U;
539} 640}
540 641
541static void __init get_mondo_data(struct mdesc_node *mp, struct trap_per_cpu *tb) 642static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
643 struct trap_per_cpu *tb)
542{ 644{
543 const u64 *val; 645 const u64 *val;
544 646
545 val = md_get_property(mp, "q-cpu-mondo-#bits", NULL); 647 val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL);
546 get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7); 648 get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7);
547 649
548 val = md_get_property(mp, "q-dev-mondo-#bits", NULL); 650 val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL);
549 get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7); 651 get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7);
550 652
551 val = md_get_property(mp, "q-resumable-#bits", NULL); 653 val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL);
552 get_one_mondo_bits(val, &tb->resum_qmask, 6); 654 get_one_mondo_bits(val, &tb->resum_qmask, 6);
553 655
554 val = md_get_property(mp, "q-nonresumable-#bits", NULL); 656 val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL);
555 get_one_mondo_bits(val, &tb->nonresum_qmask, 2); 657 get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
556} 658}
557 659
558static void __init mdesc_fill_in_cpu_data(void) 660void __devinit mdesc_fill_in_cpu_data(cpumask_t mask)
559{ 661{
560 struct mdesc_node *mp; 662 struct mdesc_handle *hp = mdesc_grab();
663 u64 mp;
561 664
562 ncpus_probed = 0; 665 ncpus_probed = 0;
563 md_for_each_node_by_name(mp, "cpu") { 666 mdesc_for_each_node_by_name(hp, mp, "cpu") {
564 const u64 *id = md_get_property(mp, "id", NULL); 667 const u64 *id = mdesc_get_property(hp, mp, "id", NULL);
565 const u64 *cfreq = md_get_property(mp, "clock-frequency", NULL); 668 const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
566 struct trap_per_cpu *tb; 669 struct trap_per_cpu *tb;
567 cpuinfo_sparc *c; 670 cpuinfo_sparc *c;
568 unsigned int i;
569 int cpuid; 671 int cpuid;
672 u64 a;
570 673
571 ncpus_probed++; 674 ncpus_probed++;
572 675
@@ -575,6 +678,8 @@ static void __init mdesc_fill_in_cpu_data(void)
575#ifdef CONFIG_SMP 678#ifdef CONFIG_SMP
576 if (cpuid >= NR_CPUS) 679 if (cpuid >= NR_CPUS)
577 continue; 680 continue;
681 if (!cpu_isset(cpuid, mask))
682 continue;
578#else 683#else
579 /* On uniprocessor we only want the values for the 684 /* On uniprocessor we only want the values for the
580 * real physical cpu the kernel booted onto, however 685 * real physical cpu the kernel booted onto, however
@@ -589,35 +694,30 @@ static void __init mdesc_fill_in_cpu_data(void)
589 c->clock_tick = *cfreq; 694 c->clock_tick = *cfreq;
590 695
591 tb = &trap_block[cpuid]; 696 tb = &trap_block[cpuid];
592 get_mondo_data(mp, tb); 697 get_mondo_data(hp, mp, tb);
593
594 for (i = 0; i < mp->num_arcs; i++) {
595 struct mdesc_node *t = mp->arcs[i].arc;
596 unsigned int j;
597 698
598 if (strcmp(mp->arcs[i].name, "fwd")) 699 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
599 continue; 700 u64 j, t = mdesc_arc_target(hp, a);
701 const char *t_name;
600 702
601 if (!strcmp(t->name, "cache")) { 703 t_name = mdesc_node_name(hp, t);
602 fill_in_one_cache(c, t); 704 if (!strcmp(t_name, "cache")) {
705 fill_in_one_cache(c, hp, t);
603 continue; 706 continue;
604 } 707 }
605 708
606 for (j = 0; j < t->num_arcs; j++) { 709 mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
607 struct mdesc_node *n; 710 u64 n = mdesc_arc_target(hp, j);
711 const char *n_name;
608 712
609 n = t->arcs[j].arc; 713 n_name = mdesc_node_name(hp, n);
610 if (strcmp(t->arcs[j].name, "fwd")) 714 if (!strcmp(n_name, "cache"))
611 continue; 715 fill_in_one_cache(c, hp, n);
612
613 if (!strcmp(n->name, "cache"))
614 fill_in_one_cache(c, n);
615 } 716 }
616 } 717 }
617 718
618#ifdef CONFIG_SMP 719#ifdef CONFIG_SMP
619 cpu_set(cpuid, cpu_present_map); 720 cpu_set(cpuid, cpu_present_map);
620 cpu_set(cpuid, phys_cpu_present_map);
621#endif 721#endif
622 722
623 c->core_id = 0; 723 c->core_id = 0;
@@ -628,45 +728,43 @@ static void __init mdesc_fill_in_cpu_data(void)
628 sparc64_multi_core = 1; 728 sparc64_multi_core = 1;
629#endif 729#endif
630 730
631 set_core_ids(); 731 set_core_ids(hp);
632 set_proc_ids(); 732 set_proc_ids(hp);
633 733
634 smp_fill_in_sib_core_maps(); 734 smp_fill_in_sib_core_maps();
735
736 mdesc_release(hp);
635} 737}
636 738
637void __init sun4v_mdesc_init(void) 739void __init sun4v_mdesc_init(void)
638{ 740{
741 struct mdesc_handle *hp;
639 unsigned long len, real_len, status; 742 unsigned long len, real_len, status;
743 cpumask_t mask;
640 744
641 (void) sun4v_mach_desc(0UL, 0UL, &len); 745 (void) sun4v_mach_desc(0UL, 0UL, &len);
642 746
643 printk("MDESC: Size is %lu bytes.\n", len); 747 printk("MDESC: Size is %lu bytes.\n", len);
644 748
645 main_mdesc = mdesc_early_alloc(len); 749 hp = mdesc_alloc(len, &bootmem_mdesc_memops);
750 if (hp == NULL) {
751 prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
752 prom_halt();
753 }
646 754
647 status = sun4v_mach_desc(__pa(main_mdesc), len, &real_len); 755 status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
648 if (status != HV_EOK || real_len > len) { 756 if (status != HV_EOK || real_len > len) {
649 prom_printf("sun4v_mach_desc fails, err(%lu), " 757 prom_printf("sun4v_mach_desc fails, err(%lu), "
650 "len(%lu), real_len(%lu)\n", 758 "len(%lu), real_len(%lu)\n",
651 status, len, real_len); 759 status, len, real_len);
760 mdesc_free(hp);
652 prom_halt(); 761 prom_halt();
653 } 762 }
654 763
655 len = count_nodes(main_mdesc); 764 cur_mdesc = hp;
656 printk("MDESC: %lu nodes.\n", len);
657
658 len = roundup_pow_of_two(len);
659
660 mdesc_hash = mdesc_early_alloc(len * sizeof(struct mdesc_node *));
661 mdesc_hash_size = len;
662
663 printk("MDESC: Hash size %lu entries.\n", len);
664
665 build_all_nodes(main_mdesc);
666
667 printk("MDESC: Built graph with %u bytes of memory.\n",
668 mdesc_early_allocated);
669 765
670 report_platform_properties(); 766 report_platform_properties();
671 mdesc_fill_in_cpu_data(); 767
768 cpus_setall(mask);
769 mdesc_fill_in_cpu_data(mask);
672} 770}
diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c
index 5d6adea3967f..8dd4294ad21e 100644
--- a/arch/sparc64/kernel/power.c
+++ b/arch/sparc64/kernel/power.c
@@ -1,7 +1,6 @@
1/* $Id: power.c,v 1.10 2001/12/11 01:57:16 davem Exp $ 1/* power.c: Power management driver.
2 * power.c: Power management driver.
3 * 2 *
4 * Copyright (C) 1999 David S. Miller (davem@redhat.com) 3 * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
5 */ 4 */
6 5
7#include <linux/kernel.h> 6#include <linux/kernel.h>
@@ -19,6 +18,7 @@
19#include <asm/prom.h> 18#include <asm/prom.h>
20#include <asm/of_device.h> 19#include <asm/of_device.h>
21#include <asm/io.h> 20#include <asm/io.h>
21#include <asm/power.h>
22#include <asm/sstate.h> 22#include <asm/sstate.h>
23 23
24#include <linux/unistd.h> 24#include <linux/unistd.h>
@@ -29,24 +29,26 @@
29 */ 29 */
30int scons_pwroff = 1; 30int scons_pwroff = 1;
31 31
32#ifdef CONFIG_PCI
33#include <linux/pci.h>
34static void __iomem *power_reg; 32static void __iomem *power_reg;
35 33
36static DECLARE_WAIT_QUEUE_HEAD(powerd_wait); 34static DECLARE_WAIT_QUEUE_HEAD(powerd_wait);
37static int button_pressed; 35static int button_pressed;
38 36
39static irqreturn_t power_handler(int irq, void *dev_id) 37void wake_up_powerd(void)
40{ 38{
41 if (button_pressed == 0) { 39 if (button_pressed == 0) {
42 button_pressed = 1; 40 button_pressed = 1;
43 wake_up(&powerd_wait); 41 wake_up(&powerd_wait);
44 } 42 }
43}
44
45static irqreturn_t power_handler(int irq, void *dev_id)
46{
47 wake_up_powerd();
45 48
46 /* FIXME: Check registers for status... */ 49 /* FIXME: Check registers for status... */
47 return IRQ_HANDLED; 50 return IRQ_HANDLED;
48} 51}
49#endif /* CONFIG_PCI */
50 52
51extern void machine_halt(void); 53extern void machine_halt(void);
52extern void machine_alt_power_off(void); 54extern void machine_alt_power_off(void);
@@ -56,19 +58,18 @@ void machine_power_off(void)
56{ 58{
57 sstate_poweroff(); 59 sstate_poweroff();
58 if (!serial_console || scons_pwroff) { 60 if (!serial_console || scons_pwroff) {
59#ifdef CONFIG_PCI
60 if (power_reg) { 61 if (power_reg) {
61 /* Both register bits seem to have the 62 /* Both register bits seem to have the
62 * same effect, so until I figure out 63 * same effect, so until I figure out
63 * what the difference is... 64 * what the difference is...
64 */ 65 */
65 writel(AUXIO_PCIO_CPWR_OFF | AUXIO_PCIO_SPWR_OFF, power_reg); 66 writel(AUXIO_PCIO_CPWR_OFF | AUXIO_PCIO_SPWR_OFF, power_reg);
66 } else 67 } else {
67#endif /* CONFIG_PCI */
68 if (poweroff_method != NULL) { 68 if (poweroff_method != NULL) {
69 poweroff_method(); 69 poweroff_method();
70 /* not reached */ 70 /* not reached */
71 } 71 }
72 }
72 } 73 }
73 machine_halt(); 74 machine_halt();
74} 75}
@@ -76,7 +77,6 @@ void machine_power_off(void)
76void (*pm_power_off)(void) = machine_power_off; 77void (*pm_power_off)(void) = machine_power_off;
77EXPORT_SYMBOL(pm_power_off); 78EXPORT_SYMBOL(pm_power_off);
78 79
79#ifdef CONFIG_PCI
80static int powerd(void *__unused) 80static int powerd(void *__unused)
81{ 81{
82 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; 82 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
@@ -86,7 +86,7 @@ static int powerd(void *__unused)
86 daemonize("powerd"); 86 daemonize("powerd");
87 87
88 add_wait_queue(&powerd_wait, &wait); 88 add_wait_queue(&powerd_wait, &wait);
89again: 89
90 for (;;) { 90 for (;;) {
91 set_task_state(current, TASK_INTERRUPTIBLE); 91 set_task_state(current, TASK_INTERRUPTIBLE);
92 if (button_pressed) 92 if (button_pressed)
@@ -100,16 +100,28 @@ again:
100 /* Ok, down we go... */ 100 /* Ok, down we go... */
101 button_pressed = 0; 101 button_pressed = 0;
102 if (kernel_execve("/sbin/shutdown", argv, envp) < 0) { 102 if (kernel_execve("/sbin/shutdown", argv, envp) < 0) {
103 printk("powerd: shutdown execution failed\n"); 103 printk(KERN_ERR "powerd: shutdown execution failed\n");
104 add_wait_queue(&powerd_wait, &wait); 104 machine_power_off();
105 goto again;
106 } 105 }
107 return 0; 106 return 0;
108} 107}
109 108
109int start_powerd(void)
110{
111 int err;
112
113 err = kernel_thread(powerd, NULL, CLONE_FS);
114 if (err < 0)
115 printk(KERN_ERR "power: Failed to start power daemon.\n");
116 else
117 printk(KERN_INFO "power: powerd running.\n");
118
119 return err;
120}
121
110static int __init has_button_interrupt(unsigned int irq, struct device_node *dp) 122static int __init has_button_interrupt(unsigned int irq, struct device_node *dp)
111{ 123{
112 if (irq == PCI_IRQ_NONE) 124 if (irq == 0xffffffff)
113 return 0; 125 return 0;
114 if (!of_find_property(dp, "button", NULL)) 126 if (!of_find_property(dp, "button", NULL))
115 return 0; 127 return 0;
@@ -130,17 +142,14 @@ static int __devinit power_probe(struct of_device *op, const struct of_device_id
130 poweroff_method = machine_halt; /* able to use the standard halt */ 142 poweroff_method = machine_halt; /* able to use the standard halt */
131 143
132 if (has_button_interrupt(irq, op->node)) { 144 if (has_button_interrupt(irq, op->node)) {
133 if (kernel_thread(powerd, NULL, CLONE_FS) < 0) { 145 if (start_powerd() < 0)
134 printk("Failed to start power daemon.\n");
135 return 0; 146 return 0;
136 }
137 printk("powerd running.\n");
138 147
139 if (request_irq(irq, 148 if (request_irq(irq,
140 power_handler, 0, "power", NULL) < 0) 149 power_handler, 0, "power", NULL) < 0)
141 printk("power: Error, cannot register IRQ handler.\n"); 150 printk(KERN_ERR "power: Cannot setup IRQ handler.\n");
142 } else { 151 } else {
143 printk("not using powerd.\n"); 152 printk(KERN_INFO "power: Not using powerd.\n");
144 } 153 }
145 154
146 return 0; 155 return 0;
@@ -164,4 +173,3 @@ void __init power_init(void)
164 of_register_driver(&power_driver, &of_bus_type); 173 of_register_driver(&power_driver, &of_bus_type);
165 return; 174 return;
166} 175}
167#endif /* CONFIG_PCI */
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index f5f97e2c669c..93557507ec9f 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -29,6 +29,7 @@
29#include <linux/compat.h> 29#include <linux/compat.h>
30#include <linux/tick.h> 30#include <linux/tick.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h>
32 33
33#include <asm/oplib.h> 34#include <asm/oplib.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -49,7 +50,7 @@
49 50
50/* #define VERBOSE_SHOWREGS */ 51/* #define VERBOSE_SHOWREGS */
51 52
52static void sparc64_yield(void) 53static void sparc64_yield(int cpu)
53{ 54{
54 if (tlb_type != hypervisor) 55 if (tlb_type != hypervisor)
55 return; 56 return;
@@ -57,7 +58,7 @@ static void sparc64_yield(void)
57 clear_thread_flag(TIF_POLLING_NRFLAG); 58 clear_thread_flag(TIF_POLLING_NRFLAG);
58 smp_mb__after_clear_bit(); 59 smp_mb__after_clear_bit();
59 60
60 while (!need_resched()) { 61 while (!need_resched() && !cpu_is_offline(cpu)) {
61 unsigned long pstate; 62 unsigned long pstate;
62 63
63 /* Disable interrupts. */ 64 /* Disable interrupts. */
@@ -68,7 +69,7 @@ static void sparc64_yield(void)
68 : "=&r" (pstate) 69 : "=&r" (pstate)
69 : "i" (PSTATE_IE)); 70 : "i" (PSTATE_IE));
70 71
71 if (!need_resched()) 72 if (!need_resched() && !cpu_is_offline(cpu))
72 sun4v_cpu_yield(); 73 sun4v_cpu_yield();
73 74
74 /* Re-enable interrupts. */ 75 /* Re-enable interrupts. */
@@ -86,15 +87,25 @@ static void sparc64_yield(void)
86/* The idle loop on sparc64. */ 87/* The idle loop on sparc64. */
87void cpu_idle(void) 88void cpu_idle(void)
88{ 89{
90 int cpu = smp_processor_id();
91
89 set_thread_flag(TIF_POLLING_NRFLAG); 92 set_thread_flag(TIF_POLLING_NRFLAG);
90 93
91 while(1) { 94 while(1) {
92 tick_nohz_stop_sched_tick(); 95 tick_nohz_stop_sched_tick();
93 while (!need_resched()) 96
94 sparc64_yield(); 97 while (!need_resched() && !cpu_is_offline(cpu))
98 sparc64_yield(cpu);
99
95 tick_nohz_restart_sched_tick(); 100 tick_nohz_restart_sched_tick();
96 101
97 preempt_enable_no_resched(); 102 preempt_enable_no_resched();
103
104#ifdef CONFIG_HOTPLUG_CPU
105 if (cpu_is_offline(cpu))
106 cpu_play_dead();
107#endif
108
98 schedule(); 109 schedule();
99 preempt_disable(); 110 preempt_disable();
100 } 111 }
diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c
index 61036b346664..5d220302cd50 100644
--- a/arch/sparc64/kernel/prom.c
+++ b/arch/sparc64/kernel/prom.c
@@ -1808,7 +1808,7 @@ static void __init of_fill_in_cpu_data(void)
1808 1808
1809#ifdef CONFIG_SMP 1809#ifdef CONFIG_SMP
1810 cpu_set(cpuid, cpu_present_map); 1810 cpu_set(cpuid, cpu_present_map);
1811 cpu_set(cpuid, phys_cpu_present_map); 1811 cpu_set(cpuid, cpu_possible_map);
1812#endif 1812#endif
1813 } 1813 }
1814 1814
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 7490cc670a53..dc928e49e341 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -442,7 +442,6 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
442 "D$ parity tl1\t: %u\n" 442 "D$ parity tl1\t: %u\n"
443 "I$ parity tl1\t: %u\n" 443 "I$ parity tl1\t: %u\n"
444#ifndef CONFIG_SMP 444#ifndef CONFIG_SMP
445 "Cpu0Bogo\t: %lu.%02lu\n"
446 "Cpu0ClkTck\t: %016lx\n" 445 "Cpu0ClkTck\t: %016lx\n"
447#endif 446#endif
448 , 447 ,
@@ -455,10 +454,8 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
455 ncpus_probed, 454 ncpus_probed,
456 num_online_cpus(), 455 num_online_cpus(),
457 dcache_parity_tl1_occurred, 456 dcache_parity_tl1_occurred,
458 icache_parity_tl1_occurred 457 icache_parity_tl1_occurred,
459#ifndef CONFIG_SMP 458#ifndef CONFIG_SMP
460 , cpu_data(0).udelay_val/(500000/HZ),
461 (cpu_data(0).udelay_val/(5000/HZ)) % 100,
462 cpu_data(0).clock_tick 459 cpu_data(0).clock_tick
463#endif 460#endif
464 ); 461 );
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 40e40f968d61..b448d33321c6 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1,6 +1,6 @@
1/* smp.c: Sparc64 SMP support. 1/* smp.c: Sparc64 SMP support.
2 * 2 *
3 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) 3 * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#include <linux/module.h> 6#include <linux/module.h>
@@ -28,6 +28,8 @@
28#include <asm/tlbflush.h> 28#include <asm/tlbflush.h>
29#include <asm/mmu_context.h> 29#include <asm/mmu_context.h>
30#include <asm/cpudata.h> 30#include <asm/cpudata.h>
31#include <asm/hvtramp.h>
32#include <asm/io.h>
31 33
32#include <asm/irq.h> 34#include <asm/irq.h>
33#include <asm/irq_regs.h> 35#include <asm/irq_regs.h>
@@ -41,22 +43,26 @@
41#include <asm/sections.h> 43#include <asm/sections.h>
42#include <asm/prom.h> 44#include <asm/prom.h>
43#include <asm/mdesc.h> 45#include <asm/mdesc.h>
46#include <asm/ldc.h>
47#include <asm/hypervisor.h>
44 48
45extern void calibrate_delay(void); 49extern void calibrate_delay(void);
46 50
47int sparc64_multi_core __read_mostly; 51int sparc64_multi_core __read_mostly;
48 52
49/* Please don't make this stuff initdata!!! --DaveM */ 53cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
50unsigned char boot_cpu_id;
51
52cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; 54cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
53cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
54cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly = 55cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
55 { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 56 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
56cpumask_t cpu_core_map[NR_CPUS] __read_mostly = 57cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
57 { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 58 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
59
60EXPORT_SYMBOL(cpu_possible_map);
61EXPORT_SYMBOL(cpu_online_map);
62EXPORT_SYMBOL(cpu_sibling_map);
63EXPORT_SYMBOL(cpu_core_map);
64
58static cpumask_t smp_commenced_mask; 65static cpumask_t smp_commenced_mask;
59static cpumask_t cpu_callout_map;
60 66
61void smp_info(struct seq_file *m) 67void smp_info(struct seq_file *m)
62{ 68{
@@ -73,18 +79,17 @@ void smp_bogo(struct seq_file *m)
73 79
74 for_each_online_cpu(i) 80 for_each_online_cpu(i)
75 seq_printf(m, 81 seq_printf(m,
76 "Cpu%dBogo\t: %lu.%02lu\n"
77 "Cpu%dClkTck\t: %016lx\n", 82 "Cpu%dClkTck\t: %016lx\n",
78 i, cpu_data(i).udelay_val / (500000/HZ),
79 (cpu_data(i).udelay_val / (5000/HZ)) % 100,
80 i, cpu_data(i).clock_tick); 83 i, cpu_data(i).clock_tick);
81} 84}
82 85
86static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
87
83extern void setup_sparc64_timer(void); 88extern void setup_sparc64_timer(void);
84 89
85static volatile unsigned long callin_flag = 0; 90static volatile unsigned long callin_flag = 0;
86 91
87void __init smp_callin(void) 92void __devinit smp_callin(void)
88{ 93{
89 int cpuid = hard_smp_processor_id(); 94 int cpuid = hard_smp_processor_id();
90 95
@@ -102,8 +107,6 @@ void __init smp_callin(void)
102 107
103 local_irq_enable(); 108 local_irq_enable();
104 109
105 calibrate_delay();
106 cpu_data(cpuid).udelay_val = loops_per_jiffy;
107 callin_flag = 1; 110 callin_flag = 1;
108 __asm__ __volatile__("membar #Sync\n\t" 111 __asm__ __volatile__("membar #Sync\n\t"
109 "flush %%g6" : : : "memory"); 112 "flush %%g6" : : : "memory");
@@ -120,7 +123,9 @@ void __init smp_callin(void)
120 while (!cpu_isset(cpuid, smp_commenced_mask)) 123 while (!cpu_isset(cpuid, smp_commenced_mask))
121 rmb(); 124 rmb();
122 125
126 spin_lock(&call_lock);
123 cpu_set(cpuid, cpu_online_map); 127 cpu_set(cpuid, cpu_online_map);
128 spin_unlock(&call_lock);
124 129
125 /* idle thread is expected to have preempt disabled */ 130 /* idle thread is expected to have preempt disabled */
126 preempt_disable(); 131 preempt_disable();
@@ -268,6 +273,67 @@ static void smp_synchronize_one_tick(int cpu)
268 spin_unlock_irqrestore(&itc_sync_lock, flags); 273 spin_unlock_irqrestore(&itc_sync_lock, flags);
269} 274}
270 275
276#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
277/* XXX Put this in some common place. XXX */
278static unsigned long kimage_addr_to_ra(void *p)
279{
280 unsigned long val = (unsigned long) p;
281
282 return kern_base + (val - KERNBASE);
283}
284
285static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
286{
287 extern unsigned long sparc64_ttable_tl0;
288 extern unsigned long kern_locked_tte_data;
289 extern int bigkernel;
290 struct hvtramp_descr *hdesc;
291 unsigned long trampoline_ra;
292 struct trap_per_cpu *tb;
293 u64 tte_vaddr, tte_data;
294 unsigned long hv_err;
295
296 hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
297 if (!hdesc) {
298 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
299 "hvtramp_descr.\n");
300 return;
301 }
302
303 hdesc->cpu = cpu;
304 hdesc->num_mappings = (bigkernel ? 2 : 1);
305
306 tb = &trap_block[cpu];
307 tb->hdesc = hdesc;
308
309 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
310 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
311
312 hdesc->thread_reg = thread_reg;
313
314 tte_vaddr = (unsigned long) KERNBASE;
315 tte_data = kern_locked_tte_data;
316
317 hdesc->maps[0].vaddr = tte_vaddr;
318 hdesc->maps[0].tte = tte_data;
319 if (bigkernel) {
320 tte_vaddr += 0x400000;
321 tte_data += 0x400000;
322 hdesc->maps[1].vaddr = tte_vaddr;
323 hdesc->maps[1].tte = tte_data;
324 }
325
326 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
327
328 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
329 kimage_addr_to_ra(&sparc64_ttable_tl0),
330 __pa(hdesc));
331 if (hv_err)
332 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
333 "gives error %lu\n", hv_err);
334}
335#endif
336
271extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load); 337extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
272 338
273extern unsigned long sparc64_cpu_startup; 339extern unsigned long sparc64_cpu_startup;
@@ -280,6 +346,7 @@ static struct thread_info *cpu_new_thread = NULL;
280 346
281static int __devinit smp_boot_one_cpu(unsigned int cpu) 347static int __devinit smp_boot_one_cpu(unsigned int cpu)
282{ 348{
349 struct trap_per_cpu *tb = &trap_block[cpu];
283 unsigned long entry = 350 unsigned long entry =
284 (unsigned long)(&sparc64_cpu_startup); 351 (unsigned long)(&sparc64_cpu_startup);
285 unsigned long cookie = 352 unsigned long cookie =
@@ -290,20 +357,25 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
290 p = fork_idle(cpu); 357 p = fork_idle(cpu);
291 callin_flag = 0; 358 callin_flag = 0;
292 cpu_new_thread = task_thread_info(p); 359 cpu_new_thread = task_thread_info(p);
293 cpu_set(cpu, cpu_callout_map);
294 360
295 if (tlb_type == hypervisor) { 361 if (tlb_type == hypervisor) {
296 /* Alloc the mondo queues, cpu will load them. */ 362 /* Alloc the mondo queues, cpu will load them. */
297 sun4v_init_mondo_queues(0, cpu, 1, 0); 363 sun4v_init_mondo_queues(0, cpu, 1, 0);
298 364
299 prom_startcpu_cpuid(cpu, entry, cookie); 365#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
366 if (ldom_domaining_enabled)
367 ldom_startcpu_cpuid(cpu,
368 (unsigned long) cpu_new_thread);
369 else
370#endif
371 prom_startcpu_cpuid(cpu, entry, cookie);
300 } else { 372 } else {
301 struct device_node *dp = of_find_node_by_cpuid(cpu); 373 struct device_node *dp = of_find_node_by_cpuid(cpu);
302 374
303 prom_startcpu(dp->node, entry, cookie); 375 prom_startcpu(dp->node, entry, cookie);
304 } 376 }
305 377
306 for (timeout = 0; timeout < 5000000; timeout++) { 378 for (timeout = 0; timeout < 50000; timeout++) {
307 if (callin_flag) 379 if (callin_flag)
308 break; 380 break;
309 udelay(100); 381 udelay(100);
@@ -313,11 +385,15 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
313 ret = 0; 385 ret = 0;
314 } else { 386 } else {
315 printk("Processor %d is stuck.\n", cpu); 387 printk("Processor %d is stuck.\n", cpu);
316 cpu_clear(cpu, cpu_callout_map);
317 ret = -ENODEV; 388 ret = -ENODEV;
318 } 389 }
319 cpu_new_thread = NULL; 390 cpu_new_thread = NULL;
320 391
392 if (tb->hdesc) {
393 kfree(tb->hdesc);
394 tb->hdesc = NULL;
395 }
396
321 return ret; 397 return ret;
322} 398}
323 399
@@ -720,7 +796,6 @@ struct call_data_struct {
720 int wait; 796 int wait;
721}; 797};
722 798
723static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
724static struct call_data_struct *call_data; 799static struct call_data_struct *call_data;
725 800
726extern unsigned long xcall_call_function; 801extern unsigned long xcall_call_function;
@@ -1152,34 +1227,14 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
1152 preempt_enable(); 1227 preempt_enable();
1153} 1228}
1154 1229
1155void __init smp_tick_init(void)
1156{
1157 boot_cpu_id = hard_smp_processor_id();
1158}
1159
1160/* /proc/profile writes can call this, don't __init it please. */ 1230/* /proc/profile writes can call this, don't __init it please. */
1161int setup_profiling_timer(unsigned int multiplier) 1231int setup_profiling_timer(unsigned int multiplier)
1162{ 1232{
1163 return -EINVAL; 1233 return -EINVAL;
1164} 1234}
1165 1235
1166/* Constrain the number of cpus to max_cpus. */
1167void __init smp_prepare_cpus(unsigned int max_cpus) 1236void __init smp_prepare_cpus(unsigned int max_cpus)
1168{ 1237{
1169 int i;
1170
1171 if (num_possible_cpus() > max_cpus) {
1172 for_each_possible_cpu(i) {
1173 if (i != boot_cpu_id) {
1174 cpu_clear(i, phys_cpu_present_map);
1175 cpu_clear(i, cpu_present_map);
1176 if (num_possible_cpus() <= max_cpus)
1177 break;
1178 }
1179 }
1180 }
1181
1182 cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
1183} 1238}
1184 1239
1185void __devinit smp_prepare_boot_cpu(void) 1240void __devinit smp_prepare_boot_cpu(void)
@@ -1190,30 +1245,32 @@ void __devinit smp_fill_in_sib_core_maps(void)
1190{ 1245{
1191 unsigned int i; 1246 unsigned int i;
1192 1247
1193 for_each_possible_cpu(i) { 1248 for_each_present_cpu(i) {
1194 unsigned int j; 1249 unsigned int j;
1195 1250
1251 cpus_clear(cpu_core_map[i]);
1196 if (cpu_data(i).core_id == 0) { 1252 if (cpu_data(i).core_id == 0) {
1197 cpu_set(i, cpu_core_map[i]); 1253 cpu_set(i, cpu_core_map[i]);
1198 continue; 1254 continue;
1199 } 1255 }
1200 1256
1201 for_each_possible_cpu(j) { 1257 for_each_present_cpu(j) {
1202 if (cpu_data(i).core_id == 1258 if (cpu_data(i).core_id ==
1203 cpu_data(j).core_id) 1259 cpu_data(j).core_id)
1204 cpu_set(j, cpu_core_map[i]); 1260 cpu_set(j, cpu_core_map[i]);
1205 } 1261 }
1206 } 1262 }
1207 1263
1208 for_each_possible_cpu(i) { 1264 for_each_present_cpu(i) {
1209 unsigned int j; 1265 unsigned int j;
1210 1266
1267 cpus_clear(cpu_sibling_map[i]);
1211 if (cpu_data(i).proc_id == -1) { 1268 if (cpu_data(i).proc_id == -1) {
1212 cpu_set(i, cpu_sibling_map[i]); 1269 cpu_set(i, cpu_sibling_map[i]);
1213 continue; 1270 continue;
1214 } 1271 }
1215 1272
1216 for_each_possible_cpu(j) { 1273 for_each_present_cpu(j) {
1217 if (cpu_data(i).proc_id == 1274 if (cpu_data(i).proc_id ==
1218 cpu_data(j).proc_id) 1275 cpu_data(j).proc_id)
1219 cpu_set(j, cpu_sibling_map[i]); 1276 cpu_set(j, cpu_sibling_map[i]);
@@ -1242,18 +1299,112 @@ int __cpuinit __cpu_up(unsigned int cpu)
1242 return ret; 1299 return ret;
1243} 1300}
1244 1301
1245void __init smp_cpus_done(unsigned int max_cpus) 1302#ifdef CONFIG_HOTPLUG_CPU
1303void cpu_play_dead(void)
1304{
1305 int cpu = smp_processor_id();
1306 unsigned long pstate;
1307
1308 idle_task_exit();
1309
1310 if (tlb_type == hypervisor) {
1311 struct trap_per_cpu *tb = &trap_block[cpu];
1312
1313 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1314 tb->cpu_mondo_pa, 0);
1315 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1316 tb->dev_mondo_pa, 0);
1317 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1318 tb->resum_mondo_pa, 0);
1319 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1320 tb->nonresum_mondo_pa, 0);
1321 }
1322
1323 cpu_clear(cpu, smp_commenced_mask);
1324 membar_safe("#Sync");
1325
1326 local_irq_disable();
1327
1328 __asm__ __volatile__(
1329 "rdpr %%pstate, %0\n\t"
1330 "wrpr %0, %1, %%pstate"
1331 : "=r" (pstate)
1332 : "i" (PSTATE_IE));
1333
1334 while (1)
1335 barrier();
1336}
1337
1338int __cpu_disable(void)
1246{ 1339{
1247 unsigned long bogosum = 0; 1340 int cpu = smp_processor_id();
1341 cpuinfo_sparc *c;
1248 int i; 1342 int i;
1249 1343
1250 for_each_online_cpu(i) 1344 for_each_cpu_mask(i, cpu_core_map[cpu])
1251 bogosum += cpu_data(i).udelay_val; 1345 cpu_clear(cpu, cpu_core_map[i]);
1252 printk("Total of %ld processors activated " 1346 cpus_clear(cpu_core_map[cpu]);
1253 "(%lu.%02lu BogoMIPS).\n", 1347
1254 (long) num_online_cpus(), 1348 for_each_cpu_mask(i, cpu_sibling_map[cpu])
1255 bogosum/(500000/HZ), 1349 cpu_clear(cpu, cpu_sibling_map[i]);
1256 (bogosum/(5000/HZ))%100); 1350 cpus_clear(cpu_sibling_map[cpu]);
1351
1352 c = &cpu_data(cpu);
1353
1354 c->core_id = 0;
1355 c->proc_id = -1;
1356
1357 spin_lock(&call_lock);
1358 cpu_clear(cpu, cpu_online_map);
1359 spin_unlock(&call_lock);
1360
1361 smp_wmb();
1362
1363 /* Make sure no interrupts point to this cpu. */
1364 fixup_irqs();
1365
1366 local_irq_enable();
1367 mdelay(1);
1368 local_irq_disable();
1369
1370 return 0;
1371}
1372
1373void __cpu_die(unsigned int cpu)
1374{
1375 int i;
1376
1377 for (i = 0; i < 100; i++) {
1378 smp_rmb();
1379 if (!cpu_isset(cpu, smp_commenced_mask))
1380 break;
1381 msleep(100);
1382 }
1383 if (cpu_isset(cpu, smp_commenced_mask)) {
1384 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1385 } else {
1386#if defined(CONFIG_SUN_LDOMS)
1387 unsigned long hv_err;
1388 int limit = 100;
1389
1390 do {
1391 hv_err = sun4v_cpu_stop(cpu);
1392 if (hv_err == HV_EOK) {
1393 cpu_clear(cpu, cpu_present_map);
1394 break;
1395 }
1396 } while (--limit > 0);
1397 if (limit <= 0) {
1398 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1399 hv_err);
1400 }
1401#endif
1402 }
1403}
1404#endif
1405
1406void __init smp_cpus_done(unsigned int max_cpus)
1407{
1257} 1408}
1258 1409
1259void smp_send_reschedule(int cpu) 1410void smp_send_reschedule(int cpu)
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 6fa761612899..719d676c2ddc 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -1,7 +1,6 @@
1/* $Id: sparc64_ksyms.c,v 1.121 2002/02/09 19:49:31 davem Exp $ 1/* arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
2 * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
3 * 2 *
4 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 3 * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
5 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) 4 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
6 * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) 5 * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
7 */ 6 */
@@ -28,7 +27,6 @@
28#include <net/compat.h> 27#include <net/compat.h>
29 28
30#include <asm/oplib.h> 29#include <asm/oplib.h>
31#include <asm/delay.h>
32#include <asm/system.h> 30#include <asm/system.h>
33#include <asm/auxio.h> 31#include <asm/auxio.h>
34#include <asm/pgtable.h> 32#include <asm/pgtable.h>
@@ -124,10 +122,6 @@ EXPORT_SYMBOL(__write_lock);
124EXPORT_SYMBOL(__write_unlock); 122EXPORT_SYMBOL(__write_unlock);
125EXPORT_SYMBOL(__write_trylock); 123EXPORT_SYMBOL(__write_trylock);
126 124
127/* CPU online map and active count. */
128EXPORT_SYMBOL(cpu_online_map);
129EXPORT_SYMBOL(phys_cpu_present_map);
130
131EXPORT_SYMBOL(smp_call_function); 125EXPORT_SYMBOL(smp_call_function);
132#endif /* CONFIG_SMP */ 126#endif /* CONFIG_SMP */
133 127
@@ -330,12 +324,6 @@ EXPORT_SYMBOL(memset);
330EXPORT_SYMBOL(memmove); 324EXPORT_SYMBOL(memmove);
331EXPORT_SYMBOL(strncmp); 325EXPORT_SYMBOL(strncmp);
332 326
333/* Delay routines. */
334EXPORT_SYMBOL(__udelay);
335EXPORT_SYMBOL(__ndelay);
336EXPORT_SYMBOL(__const_udelay);
337EXPORT_SYMBOL(__delay);
338
339void VISenter(void); 327void VISenter(void);
340/* RAID code needs this */ 328/* RAID code needs this */
341EXPORT_SYMBOL(VISenter); 329EXPORT_SYMBOL(VISenter);
diff --git a/arch/sparc64/kernel/sysfs.c b/arch/sparc64/kernel/sysfs.c
index cdb1477af89f..52816c7be0b9 100644
--- a/arch/sparc64/kernel/sysfs.c
+++ b/arch/sparc64/kernel/sysfs.c
@@ -193,7 +193,6 @@ static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
193} 193}
194 194
195SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick); 195SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick);
196SHOW_CPUDATA_ULONG_NAME(udelay_val, udelay_val);
197SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size); 196SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size);
198SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size); 197SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size);
199SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size); 198SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size);
@@ -203,7 +202,6 @@ SHOW_CPUDATA_UINT_NAME(l2_cache_line_size, ecache_line_size);
203 202
204static struct sysdev_attribute cpu_core_attrs[] = { 203static struct sysdev_attribute cpu_core_attrs[] = {
205 _SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL), 204 _SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL),
206 _SYSDEV_ATTR(udelay_val, 0444, show_udelay_val, NULL),
207 _SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL), 205 _SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL),
208 _SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL), 206 _SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL),
209 _SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL), 207 _SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL),
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index a31a0439244f..62e316ab1339 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -849,9 +849,6 @@ static unsigned long sparc64_init_timers(void)
849{ 849{
850 struct device_node *dp; 850 struct device_node *dp;
851 unsigned long clock; 851 unsigned long clock;
852#ifdef CONFIG_SMP
853 extern void smp_tick_init(void);
854#endif
855 852
856 dp = of_find_node_by_path("/"); 853 dp = of_find_node_by_path("/");
857 if (tlb_type == spitfire) { 854 if (tlb_type == spitfire) {
@@ -874,10 +871,6 @@ static unsigned long sparc64_init_timers(void)
874 clock = of_getintprop_default(dp, "stick-frequency", 0); 871 clock = of_getintprop_default(dp, "stick-frequency", 0);
875 } 872 }
876 873
877#ifdef CONFIG_SMP
878 smp_tick_init();
879#endif
880
881 return clock; 874 return clock;
882} 875}
883 876
@@ -1038,10 +1031,31 @@ static void __init setup_clockevent_multiplier(unsigned long hz)
1038 sparc64_clockevent.mult = mult; 1031 sparc64_clockevent.mult = mult;
1039} 1032}
1040 1033
1034static unsigned long tb_ticks_per_usec __read_mostly;
1035
1036void __delay(unsigned long loops)
1037{
1038 unsigned long bclock, now;
1039
1040 bclock = tick_ops->get_tick();
1041 do {
1042 now = tick_ops->get_tick();
1043 } while ((now-bclock) < loops);
1044}
1045EXPORT_SYMBOL(__delay);
1046
1047void udelay(unsigned long usecs)
1048{
1049 __delay(tb_ticks_per_usec * usecs);
1050}
1051EXPORT_SYMBOL(udelay);
1052
1041void __init time_init(void) 1053void __init time_init(void)
1042{ 1054{
1043 unsigned long clock = sparc64_init_timers(); 1055 unsigned long clock = sparc64_init_timers();
1044 1056
1057 tb_ticks_per_usec = clock / USEC_PER_SEC;
1058
1045 timer_ticks_per_nsec_quotient = 1059 timer_ticks_per_nsec_quotient =
1046 clocksource_hz2mult(clock, SPARC64_NSEC_PER_CYC_SHIFT); 1060 clocksource_hz2mult(clock, SPARC64_NSEC_PER_CYC_SHIFT);
1047 1061
diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c
new file mode 100644
index 000000000000..49569b44ea1f
--- /dev/null
+++ b/arch/sparc64/kernel/vio.c
@@ -0,0 +1,395 @@
1/* vio.c: Virtual I/O channel devices probing infrastructure.
2 *
3 * Copyright (c) 2003-2005 IBM Corp.
4 * Dave Engebretsen engebret@us.ibm.com
5 * Santiago Leon santil@us.ibm.com
6 * Hollis Blanchard <hollisb@us.ibm.com>
7 * Stephen Rothwell
8 *
9 * Adapted to sparc64 by David S. Miller davem@davemloft.net
10 */
11
12#include <linux/kernel.h>
13#include <linux/irq.h>
14#include <linux/init.h>
15
16#include <asm/mdesc.h>
17#include <asm/vio.h>
18
19static inline int find_in_proplist(const char *list, const char *match,
20 int len)
21{
22 while (len > 0) {
23 int l;
24
25 if (!strcmp(list, match))
26 return 1;
27 l = strlen(list) + 1;
28 list += l;
29 len -= l;
30 }
31 return 0;
32}
33
34static const struct vio_device_id *vio_match_device(
35 const struct vio_device_id *matches,
36 const struct vio_dev *dev)
37{
38 const char *type, *compat;
39 int len;
40
41 type = dev->type;
42 compat = dev->compat;
43 len = dev->compat_len;
44
45 while (matches->type[0] || matches->compat[0]) {
46 int match = 1;
47 if (matches->type[0])
48 match &= !strcmp(matches->type, type);
49
50 if (matches->compat[0]) {
51 match &= len &&
52 find_in_proplist(compat, matches->compat, len);
53 }
54 if (match)
55 return matches;
56 matches++;
57 }
58 return NULL;
59}
60
61static int vio_bus_match(struct device *dev, struct device_driver *drv)
62{
63 struct vio_dev *vio_dev = to_vio_dev(dev);
64 struct vio_driver *vio_drv = to_vio_driver(drv);
65 const struct vio_device_id *matches = vio_drv->id_table;
66
67 if (!matches)
68 return 0;
69
70 return vio_match_device(matches, vio_dev) != NULL;
71}
72
73static int vio_device_probe(struct device *dev)
74{
75 struct vio_dev *vdev = to_vio_dev(dev);
76 struct vio_driver *drv = to_vio_driver(dev->driver);
77 const struct vio_device_id *id;
78 int error = -ENODEV;
79
80 if (drv->probe) {
81 id = vio_match_device(drv->id_table, vdev);
82 if (id)
83 error = drv->probe(vdev, id);
84 }
85
86 return error;
87}
88
89static int vio_device_remove(struct device *dev)
90{
91 struct vio_dev *vdev = to_vio_dev(dev);
92 struct vio_driver *drv = to_vio_driver(dev->driver);
93
94 if (drv->remove)
95 return drv->remove(vdev);
96
97 return 1;
98}
99
100static ssize_t devspec_show(struct device *dev,
101 struct device_attribute *attr, char *buf)
102{
103 struct vio_dev *vdev = to_vio_dev(dev);
104 const char *str = "none";
105
106 if (!strcmp(vdev->type, "network"))
107 str = "vnet";
108 else if (!strcmp(vdev->type, "block"))
109 str = "vdisk";
110
111 return sprintf(buf, "%s\n", str);
112}
113
114static ssize_t type_show(struct device *dev,
115 struct device_attribute *attr, char *buf)
116{
117 struct vio_dev *vdev = to_vio_dev(dev);
118 return sprintf(buf, "%s\n", vdev->type);
119}
120
121static struct device_attribute vio_dev_attrs[] = {
122 __ATTR_RO(devspec),
123 __ATTR_RO(type),
124 __ATTR_NULL
125};
126
127static struct bus_type vio_bus_type = {
128 .name = "vio",
129 .dev_attrs = vio_dev_attrs,
130 .match = vio_bus_match,
131 .probe = vio_device_probe,
132 .remove = vio_device_remove,
133};
134
135int vio_register_driver(struct vio_driver *viodrv)
136{
137 viodrv->driver.bus = &vio_bus_type;
138
139 return driver_register(&viodrv->driver);
140}
141EXPORT_SYMBOL(vio_register_driver);
142
143void vio_unregister_driver(struct vio_driver *viodrv)
144{
145 driver_unregister(&viodrv->driver);
146}
147EXPORT_SYMBOL(vio_unregister_driver);
148
149static void __devinit vio_dev_release(struct device *dev)
150{
151 kfree(to_vio_dev(dev));
152}
153
154static ssize_t
155show_pciobppath_attr(struct device *dev, struct device_attribute *attr,
156 char *buf)
157{
158 struct vio_dev *vdev;
159 struct device_node *dp;
160
161 vdev = to_vio_dev(dev);
162 dp = vdev->dp;
163
164 return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
165}
166
167static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH,
168 show_pciobppath_attr, NULL);
169
170struct device_node *cdev_node;
171
172static struct vio_dev *root_vdev;
173static u64 cdev_cfg_handle;
174
175static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp,
176 struct vio_dev *vdev)
177{
178 u64 a;
179
180 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
181 const u64 *chan_id;
182 const u64 *irq;
183 u64 target;
184
185 target = mdesc_arc_target(hp, a);
186
187 irq = mdesc_get_property(hp, target, "tx-ino", NULL);
188 if (irq)
189 vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq);
190
191 irq = mdesc_get_property(hp, target, "rx-ino", NULL);
192 if (irq)
193 vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq);
194
195 chan_id = mdesc_get_property(hp, target, "id", NULL);
196 if (chan_id)
197 vdev->channel_id = *chan_id;
198 }
199}
200
201static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
202 struct device *parent)
203{
204 const char *type, *compat;
205 struct device_node *dp;
206 struct vio_dev *vdev;
207 int err, tlen, clen;
208
209 type = mdesc_get_property(hp, mp, "device-type", &tlen);
210 if (!type) {
211 type = mdesc_get_property(hp, mp, "name", &tlen);
212 if (!type) {
213 type = mdesc_node_name(hp, mp);
214 tlen = strlen(type) + 1;
215 }
216 }
217 if (tlen > VIO_MAX_TYPE_LEN) {
218 printk(KERN_ERR "VIO: Type string [%s] is too long.\n",
219 type);
220 return NULL;
221 }
222
223 compat = mdesc_get_property(hp, mp, "device-type", &clen);
224 if (!compat) {
225 clen = 0;
226 } else if (clen > VIO_MAX_COMPAT_LEN) {
227 printk(KERN_ERR "VIO: Compat len %d for [%s] is too long.\n",
228 clen, type);
229 return NULL;
230 }
231
232 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
233 if (!vdev) {
234 printk(KERN_ERR "VIO: Could not allocate vio_dev\n");
235 return NULL;
236 }
237
238 vdev->mp = mp;
239 memcpy(vdev->type, type, tlen);
240 if (compat)
241 memcpy(vdev->compat, compat, clen);
242 else
243 memset(vdev->compat, 0, sizeof(vdev->compat));
244 vdev->compat_len = clen;
245
246 vdev->channel_id = ~0UL;
247 vdev->tx_irq = ~0;
248 vdev->rx_irq = ~0;
249
250 vio_fill_channel_info(hp, mp, vdev);
251
252 snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%lx", mp);
253 vdev->dev.parent = parent;
254 vdev->dev.bus = &vio_bus_type;
255 vdev->dev.release = vio_dev_release;
256
257 if (parent == NULL) {
258 dp = cdev_node;
259 } else if (to_vio_dev(parent) == root_vdev) {
260 dp = of_get_next_child(cdev_node, NULL);
261 while (dp) {
262 if (!strcmp(dp->type, type))
263 break;
264
265 dp = of_get_next_child(cdev_node, dp);
266 }
267 } else {
268 dp = to_vio_dev(parent)->dp;
269 }
270 vdev->dp = dp;
271
272 err = device_register(&vdev->dev);
273 if (err) {
274 printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
275 vdev->dev.bus_id, err);
276 kfree(vdev);
277 return NULL;
278 }
279 if (vdev->dp)
280 err = sysfs_create_file(&vdev->dev.kobj,
281 &dev_attr_obppath.attr);
282
283 return vdev;
284}
285
286static void walk_tree(struct mdesc_handle *hp, u64 n, struct vio_dev *parent)
287{
288 u64 a;
289
290 mdesc_for_each_arc(a, hp, n, MDESC_ARC_TYPE_FWD) {
291 struct vio_dev *vdev;
292 u64 target;
293
294 target = mdesc_arc_target(hp, a);
295 vdev = vio_create_one(hp, target, &parent->dev);
296 if (vdev)
297 walk_tree(hp, target, vdev);
298 }
299}
300
301static void create_devices(struct mdesc_handle *hp, u64 root)
302{
303 u64 mp;
304
305 root_vdev = vio_create_one(hp, root, NULL);
306 if (!root_vdev) {
307 printk(KERN_ERR "VIO: Coult not create root device.\n");
308 return;
309 }
310
311 walk_tree(hp, root, root_vdev);
312
313 /* Domain services is odd as it doesn't sit underneath the
314 * channel-devices node, so we plug it in manually.
315 */
316 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "domain-services");
317 if (mp != MDESC_NODE_NULL) {
318 struct vio_dev *parent = vio_create_one(hp, mp,
319 &root_vdev->dev);
320
321 if (parent)
322 walk_tree(hp, mp, parent);
323 }
324}
325
326const char *channel_devices_node = "channel-devices";
327const char *channel_devices_compat = "SUNW,sun4v-channel-devices";
328const char *cfg_handle_prop = "cfg-handle";
329
330static int __init vio_init(void)
331{
332 struct mdesc_handle *hp;
333 const char *compat;
334 const u64 *cfg_handle;
335 int err, len;
336 u64 root;
337
338 err = bus_register(&vio_bus_type);
339 if (err) {
340 printk(KERN_ERR "VIO: Could not register bus type err=%d\n",
341 err);
342 return err;
343 }
344
345 hp = mdesc_grab();
346 if (!hp)
347 return 0;
348
349 root = mdesc_node_by_name(hp, MDESC_NODE_NULL, channel_devices_node);
350 if (root == MDESC_NODE_NULL) {
351 printk(KERN_INFO "VIO: No channel-devices MDESC node.\n");
352 mdesc_release(hp);
353 return 0;
354 }
355
356 cdev_node = of_find_node_by_name(NULL, "channel-devices");
357 err = -ENODEV;
358 if (!cdev_node) {
359 printk(KERN_INFO "VIO: No channel-devices OBP node.\n");
360 goto out_release;
361 }
362
363 compat = mdesc_get_property(hp, root, "compatible", &len);
364 if (!compat) {
365 printk(KERN_ERR "VIO: Channel devices lacks compatible "
366 "property\n");
367 goto out_release;
368 }
369 if (!find_in_proplist(compat, channel_devices_compat, len)) {
370 printk(KERN_ERR "VIO: Channel devices node lacks (%s) "
371 "compat entry.\n", channel_devices_compat);
372 goto out_release;
373 }
374
375 cfg_handle = mdesc_get_property(hp, root, cfg_handle_prop, NULL);
376 if (!cfg_handle) {
377 printk(KERN_ERR "VIO: Channel devices lacks %s property\n",
378 cfg_handle_prop);
379 goto out_release;
380 }
381
382 cdev_cfg_handle = *cfg_handle;
383
384 create_devices(hp, root);
385
386 mdesc_release(hp);
387
388 return 0;
389
390out_release:
391 mdesc_release(hp);
392 return err;
393}
394
395postcore_initcall(vio_init);
diff --git a/arch/sparc64/kernel/viohs.c b/arch/sparc64/kernel/viohs.c
new file mode 100644
index 000000000000..15613add45d1
--- /dev/null
+++ b/arch/sparc64/kernel/viohs.c
@@ -0,0 +1,792 @@
1/* viohs.c: LDOM Virtual I/O handshake helper layer.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/string.h>
9#include <linux/delay.h>
10#include <linux/sched.h>
11#include <linux/slab.h>
12
13#include <asm/ldc.h>
14#include <asm/vio.h>
15
16int vio_ldc_send(struct vio_driver_state *vio, void *data, int len)
17{
18 int err, limit = 1000;
19
20 err = -EINVAL;
21 while (limit-- > 0) {
22 err = ldc_write(vio->lp, data, len);
23 if (!err || (err != -EAGAIN))
24 break;
25 udelay(1);
26 }
27
28 return err;
29}
30EXPORT_SYMBOL(vio_ldc_send);
31
32static int send_ctrl(struct vio_driver_state *vio,
33 struct vio_msg_tag *tag, int len)
34{
35 tag->sid = vio_send_sid(vio);
36 return vio_ldc_send(vio, tag, len);
37}
38
39static void init_tag(struct vio_msg_tag *tag, u8 type, u8 stype, u16 stype_env)
40{
41 tag->type = type;
42 tag->stype = stype;
43 tag->stype_env = stype_env;
44}
45
46static int send_version(struct vio_driver_state *vio, u16 major, u16 minor)
47{
48 struct vio_ver_info pkt;
49
50 vio->_local_sid = (u32) sched_clock();
51
52 memset(&pkt, 0, sizeof(pkt));
53 init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_VER_INFO);
54 pkt.major = major;
55 pkt.minor = minor;
56 pkt.dev_class = vio->dev_class;
57
58 viodbg(HS, "SEND VERSION INFO maj[%u] min[%u] devclass[%u]\n",
59 major, minor, vio->dev_class);
60
61 return send_ctrl(vio, &pkt.tag, sizeof(pkt));
62}
63
64static int start_handshake(struct vio_driver_state *vio)
65{
66 int err;
67
68 viodbg(HS, "START HANDSHAKE\n");
69
70 vio->hs_state = VIO_HS_INVALID;
71
72 err = send_version(vio,
73 vio->ver_table[0].major,
74 vio->ver_table[0].minor);
75 if (err < 0)
76 return err;
77
78 return 0;
79}
80
81void vio_link_state_change(struct vio_driver_state *vio, int event)
82{
83 if (event == LDC_EVENT_UP) {
84 vio->hs_state = VIO_HS_INVALID;
85
86 switch (vio->dev_class) {
87 case VDEV_NETWORK:
88 case VDEV_NETWORK_SWITCH:
89 vio->dr_state = (VIO_DR_STATE_TXREQ |
90 VIO_DR_STATE_RXREQ);
91 break;
92
93 case VDEV_DISK:
94 vio->dr_state = VIO_DR_STATE_TXREQ;
95 break;
96 case VDEV_DISK_SERVER:
97 vio->dr_state = VIO_DR_STATE_RXREQ;
98 break;
99 }
100 start_handshake(vio);
101 }
102}
103EXPORT_SYMBOL(vio_link_state_change);
104
105static int handshake_failure(struct vio_driver_state *vio)
106{
107 struct vio_dring_state *dr;
108
109 /* XXX Put policy here... Perhaps start a timer to fire
110 * XXX in 100 ms, which will bring the link up and retry
111 * XXX the handshake.
112 */
113
114 viodbg(HS, "HANDSHAKE FAILURE\n");
115
116 vio->dr_state &= ~(VIO_DR_STATE_TXREG |
117 VIO_DR_STATE_RXREG);
118
119 dr = &vio->drings[VIO_DRIVER_RX_RING];
120 memset(dr, 0, sizeof(*dr));
121
122 kfree(vio->desc_buf);
123 vio->desc_buf = NULL;
124 vio->desc_buf_len = 0;
125
126 vio->hs_state = VIO_HS_INVALID;
127
128 return -ECONNRESET;
129}
130
131static int process_unknown(struct vio_driver_state *vio, void *arg)
132{
133 struct vio_msg_tag *pkt = arg;
134
135 viodbg(HS, "UNKNOWN CONTROL [%02x:%02x:%04x:%08x]\n",
136 pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
137
138 printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n",
139 vio->vdev->channel_id);
140
141 ldc_disconnect(vio->lp);
142
143 return -ECONNRESET;
144}
145
146static int send_dreg(struct vio_driver_state *vio)
147{
148 struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_TX_RING];
149 union {
150 struct vio_dring_register pkt;
151 char all[sizeof(struct vio_dring_register) +
152 (sizeof(struct ldc_trans_cookie) *
153 dr->ncookies)];
154 } u;
155 int i;
156
157 memset(&u, 0, sizeof(u));
158 init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG);
159 u.pkt.dring_ident = 0;
160 u.pkt.num_descr = dr->num_entries;
161 u.pkt.descr_size = dr->entry_size;
162 u.pkt.options = VIO_TX_DRING;
163 u.pkt.num_cookies = dr->ncookies;
164
165 viodbg(HS, "SEND DRING_REG INFO ndesc[%u] dsz[%u] opt[0x%x] "
166 "ncookies[%u]\n",
167 u.pkt.num_descr, u.pkt.descr_size, u.pkt.options,
168 u.pkt.num_cookies);
169
170 for (i = 0; i < dr->ncookies; i++) {
171 u.pkt.cookies[i] = dr->cookies[i];
172
173 viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
174 i,
175 (unsigned long long) u.pkt.cookies[i].cookie_addr,
176 (unsigned long long) u.pkt.cookies[i].cookie_size);
177 }
178
179 return send_ctrl(vio, &u.pkt.tag, sizeof(u));
180}
181
182static int send_rdx(struct vio_driver_state *vio)
183{
184 struct vio_rdx pkt;
185
186 memset(&pkt, 0, sizeof(pkt));
187
188 init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX);
189
190 viodbg(HS, "SEND RDX INFO\n");
191
192 return send_ctrl(vio, &pkt.tag, sizeof(pkt));
193}
194
195static int send_attr(struct vio_driver_state *vio)
196{
197 return vio->ops->send_attr(vio);
198}
199
200static struct vio_version *find_by_major(struct vio_driver_state *vio,
201 u16 major)
202{
203 struct vio_version *ret = NULL;
204 int i;
205
206 for (i = 0; i < vio->ver_table_entries; i++) {
207 struct vio_version *v = &vio->ver_table[i];
208 if (v->major <= major) {
209 ret = v;
210 break;
211 }
212 }
213 return ret;
214}
215
216static int process_ver_info(struct vio_driver_state *vio,
217 struct vio_ver_info *pkt)
218{
219 struct vio_version *vap;
220 int err;
221
222 viodbg(HS, "GOT VERSION INFO maj[%u] min[%u] devclass[%u]\n",
223 pkt->major, pkt->minor, pkt->dev_class);
224
225 if (vio->hs_state != VIO_HS_INVALID) {
226 /* XXX Perhaps invoke start_handshake? XXX */
227 memset(&vio->ver, 0, sizeof(vio->ver));
228 vio->hs_state = VIO_HS_INVALID;
229 }
230
231 vap = find_by_major(vio, pkt->major);
232
233 vio->_peer_sid = pkt->tag.sid;
234
235 if (!vap) {
236 pkt->tag.stype = VIO_SUBTYPE_NACK;
237 pkt->major = 0;
238 pkt->minor = 0;
239 viodbg(HS, "SEND VERSION NACK maj[0] min[0]\n");
240 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
241 } else if (vap->major != pkt->major) {
242 pkt->tag.stype = VIO_SUBTYPE_NACK;
243 pkt->major = vap->major;
244 pkt->minor = vap->minor;
245 viodbg(HS, "SEND VERSION NACK maj[%u] min[%u]\n",
246 pkt->major, pkt->minor);
247 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
248 } else {
249 struct vio_version ver = {
250 .major = pkt->major,
251 .minor = pkt->minor,
252 };
253 if (ver.minor > vap->minor)
254 ver.minor = vap->minor;
255 pkt->minor = ver.minor;
256 pkt->tag.stype = VIO_SUBTYPE_ACK;
257 viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n",
258 pkt->major, pkt->minor);
259 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
260 if (err > 0) {
261 vio->ver = ver;
262 vio->hs_state = VIO_HS_GOTVERS;
263 }
264 }
265 if (err < 0)
266 return handshake_failure(vio);
267
268 return 0;
269}
270
271static int process_ver_ack(struct vio_driver_state *vio,
272 struct vio_ver_info *pkt)
273{
274 viodbg(HS, "GOT VERSION ACK maj[%u] min[%u] devclass[%u]\n",
275 pkt->major, pkt->minor, pkt->dev_class);
276
277 if (vio->hs_state & VIO_HS_GOTVERS) {
278 if (vio->ver.major != pkt->major ||
279 vio->ver.minor != pkt->minor) {
280 pkt->tag.stype = VIO_SUBTYPE_NACK;
281 (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
282 return handshake_failure(vio);
283 }
284 } else {
285 vio->ver.major = pkt->major;
286 vio->ver.minor = pkt->minor;
287 vio->hs_state = VIO_HS_GOTVERS;
288 }
289
290 switch (vio->dev_class) {
291 case VDEV_NETWORK:
292 case VDEV_DISK:
293 if (send_attr(vio) < 0)
294 return handshake_failure(vio);
295 break;
296
297 default:
298 break;
299 }
300
301 return 0;
302}
303
304static int process_ver_nack(struct vio_driver_state *vio,
305 struct vio_ver_info *pkt)
306{
307 struct vio_version *nver;
308
309 viodbg(HS, "GOT VERSION NACK maj[%u] min[%u] devclass[%u]\n",
310 pkt->major, pkt->minor, pkt->dev_class);
311
312 if ((pkt->major == 0 && pkt->minor == 0) ||
313 !(nver = find_by_major(vio, pkt->major)))
314 return handshake_failure(vio);
315
316 if (send_version(vio, nver->major, nver->minor) < 0)
317 return handshake_failure(vio);
318
319 return 0;
320}
321
322static int process_ver(struct vio_driver_state *vio, struct vio_ver_info *pkt)
323{
324 switch (pkt->tag.stype) {
325 case VIO_SUBTYPE_INFO:
326 return process_ver_info(vio, pkt);
327
328 case VIO_SUBTYPE_ACK:
329 return process_ver_ack(vio, pkt);
330
331 case VIO_SUBTYPE_NACK:
332 return process_ver_nack(vio, pkt);
333
334 default:
335 return handshake_failure(vio);
336 };
337}
338
339static int process_attr(struct vio_driver_state *vio, void *pkt)
340{
341 int err;
342
343 if (!(vio->hs_state & VIO_HS_GOTVERS))
344 return handshake_failure(vio);
345
346 err = vio->ops->handle_attr(vio, pkt);
347 if (err < 0) {
348 return handshake_failure(vio);
349 } else {
350 vio->hs_state |= VIO_HS_GOT_ATTR;
351
352 if ((vio->dr_state & VIO_DR_STATE_TXREQ) &&
353 !(vio->hs_state & VIO_HS_SENT_DREG)) {
354 if (send_dreg(vio) < 0)
355 return handshake_failure(vio);
356
357 vio->hs_state |= VIO_HS_SENT_DREG;
358 }
359 }
360 return 0;
361}
362
363static int all_drings_registered(struct vio_driver_state *vio)
364{
365 int need_rx, need_tx;
366
367 need_rx = (vio->dr_state & VIO_DR_STATE_RXREQ);
368 need_tx = (vio->dr_state & VIO_DR_STATE_TXREQ);
369
370 if (need_rx &&
371 !(vio->dr_state & VIO_DR_STATE_RXREG))
372 return 0;
373
374 if (need_tx &&
375 !(vio->dr_state & VIO_DR_STATE_TXREG))
376 return 0;
377
378 return 1;
379}
380
381static int process_dreg_info(struct vio_driver_state *vio,
382 struct vio_dring_register *pkt)
383{
384 struct vio_dring_state *dr;
385 int i, len;
386
387 viodbg(HS, "GOT DRING_REG INFO ident[%llx] "
388 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
389 (unsigned long long) pkt->dring_ident,
390 pkt->num_descr, pkt->descr_size, pkt->options,
391 pkt->num_cookies);
392
393 if (!(vio->dr_state & VIO_DR_STATE_RXREQ))
394 goto send_nack;
395
396 if (vio->dr_state & VIO_DR_STATE_RXREG)
397 goto send_nack;
398
399 vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
400 if (!vio->desc_buf)
401 goto send_nack;
402
403 vio->desc_buf_len = pkt->descr_size;
404
405 dr = &vio->drings[VIO_DRIVER_RX_RING];
406
407 dr->num_entries = pkt->num_descr;
408 dr->entry_size = pkt->descr_size;
409 dr->ncookies = pkt->num_cookies;
410 for (i = 0; i < dr->ncookies; i++) {
411 dr->cookies[i] = pkt->cookies[i];
412
413 viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
414 i,
415 (unsigned long long)
416 pkt->cookies[i].cookie_addr,
417 (unsigned long long)
418 pkt->cookies[i].cookie_size);
419 }
420
421 pkt->tag.stype = VIO_SUBTYPE_ACK;
422 pkt->dring_ident = ++dr->ident;
423
424 viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n",
425 (unsigned long long) pkt->dring_ident);
426
427 len = (sizeof(*pkt) +
428 (dr->ncookies * sizeof(struct ldc_trans_cookie)));
429 if (send_ctrl(vio, &pkt->tag, len) < 0)
430 goto send_nack;
431
432 vio->dr_state |= VIO_DR_STATE_RXREG;
433
434 return 0;
435
436send_nack:
437 pkt->tag.stype = VIO_SUBTYPE_NACK;
438 viodbg(HS, "SEND DRING_REG NACK\n");
439 (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
440
441 return handshake_failure(vio);
442}
443
444static int process_dreg_ack(struct vio_driver_state *vio,
445 struct vio_dring_register *pkt)
446{
447 struct vio_dring_state *dr;
448
449 viodbg(HS, "GOT DRING_REG ACK ident[%llx] "
450 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
451 (unsigned long long) pkt->dring_ident,
452 pkt->num_descr, pkt->descr_size, pkt->options,
453 pkt->num_cookies);
454
455 dr = &vio->drings[VIO_DRIVER_TX_RING];
456
457 if (!(vio->dr_state & VIO_DR_STATE_TXREQ))
458 return handshake_failure(vio);
459
460 dr->ident = pkt->dring_ident;
461 vio->dr_state |= VIO_DR_STATE_TXREG;
462
463 if (all_drings_registered(vio)) {
464 if (send_rdx(vio) < 0)
465 return handshake_failure(vio);
466 vio->hs_state = VIO_HS_SENT_RDX;
467 }
468 return 0;
469}
470
471static int process_dreg_nack(struct vio_driver_state *vio,
472 struct vio_dring_register *pkt)
473{
474 viodbg(HS, "GOT DRING_REG NACK ident[%llx] "
475 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
476 (unsigned long long) pkt->dring_ident,
477 pkt->num_descr, pkt->descr_size, pkt->options,
478 pkt->num_cookies);
479
480 return handshake_failure(vio);
481}
482
483static int process_dreg(struct vio_driver_state *vio,
484 struct vio_dring_register *pkt)
485{
486 if (!(vio->hs_state & VIO_HS_GOTVERS))
487 return handshake_failure(vio);
488
489 switch (pkt->tag.stype) {
490 case VIO_SUBTYPE_INFO:
491 return process_dreg_info(vio, pkt);
492
493 case VIO_SUBTYPE_ACK:
494 return process_dreg_ack(vio, pkt);
495
496 case VIO_SUBTYPE_NACK:
497 return process_dreg_nack(vio, pkt);
498
499 default:
500 return handshake_failure(vio);
501 }
502}
503
504static int process_dunreg(struct vio_driver_state *vio,
505 struct vio_dring_unregister *pkt)
506{
507 struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING];
508
509 viodbg(HS, "GOT DRING_UNREG\n");
510
511 if (pkt->dring_ident != dr->ident)
512 return 0;
513
514 vio->dr_state &= ~VIO_DR_STATE_RXREG;
515
516 memset(dr, 0, sizeof(*dr));
517
518 kfree(vio->desc_buf);
519 vio->desc_buf = NULL;
520 vio->desc_buf_len = 0;
521
522 return 0;
523}
524
525static int process_rdx_info(struct vio_driver_state *vio, struct vio_rdx *pkt)
526{
527 viodbg(HS, "GOT RDX INFO\n");
528
529 pkt->tag.stype = VIO_SUBTYPE_ACK;
530 viodbg(HS, "SEND RDX ACK\n");
531 if (send_ctrl(vio, &pkt->tag, sizeof(*pkt)) < 0)
532 return handshake_failure(vio);
533
534 vio->hs_state |= VIO_HS_SENT_RDX_ACK;
535 return 0;
536}
537
538static int process_rdx_ack(struct vio_driver_state *vio, struct vio_rdx *pkt)
539{
540 viodbg(HS, "GOT RDX ACK\n");
541
542 if (!(vio->hs_state & VIO_HS_SENT_RDX))
543 return handshake_failure(vio);
544
545 vio->hs_state |= VIO_HS_GOT_RDX_ACK;
546 return 0;
547}
548
549static int process_rdx_nack(struct vio_driver_state *vio, struct vio_rdx *pkt)
550{
551 viodbg(HS, "GOT RDX NACK\n");
552
553 return handshake_failure(vio);
554}
555
556static int process_rdx(struct vio_driver_state *vio, struct vio_rdx *pkt)
557{
558 if (!all_drings_registered(vio))
559 handshake_failure(vio);
560
561 switch (pkt->tag.stype) {
562 case VIO_SUBTYPE_INFO:
563 return process_rdx_info(vio, pkt);
564
565 case VIO_SUBTYPE_ACK:
566 return process_rdx_ack(vio, pkt);
567
568 case VIO_SUBTYPE_NACK:
569 return process_rdx_nack(vio, pkt);
570
571 default:
572 return handshake_failure(vio);
573 }
574}
575
576int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt)
577{
578 struct vio_msg_tag *tag = pkt;
579 u8 prev_state = vio->hs_state;
580 int err;
581
582 switch (tag->stype_env) {
583 case VIO_VER_INFO:
584 err = process_ver(vio, pkt);
585 break;
586
587 case VIO_ATTR_INFO:
588 err = process_attr(vio, pkt);
589 break;
590
591 case VIO_DRING_REG:
592 err = process_dreg(vio, pkt);
593 break;
594
595 case VIO_DRING_UNREG:
596 err = process_dunreg(vio, pkt);
597 break;
598
599 case VIO_RDX:
600 err = process_rdx(vio, pkt);
601 break;
602
603 default:
604 err = process_unknown(vio, pkt);
605 break;
606 }
607 if (!err &&
608 vio->hs_state != prev_state &&
609 (vio->hs_state & VIO_HS_COMPLETE))
610 vio->ops->handshake_complete(vio);
611
612 return err;
613}
614EXPORT_SYMBOL(vio_control_pkt_engine);
615
616void vio_conn_reset(struct vio_driver_state *vio)
617{
618}
619EXPORT_SYMBOL(vio_conn_reset);
620
621/* The issue is that the Solaris virtual disk server just mirrors the
622 * SID values it gets from the client peer. So we work around that
623 * here in vio_{validate,send}_sid() so that the drivers don't need
624 * to be aware of this crap.
625 */
626int vio_validate_sid(struct vio_driver_state *vio, struct vio_msg_tag *tp)
627{
628 u32 sid;
629
630 /* Always let VERSION+INFO packets through unchecked, they
631 * define the new SID.
632 */
633 if (tp->type == VIO_TYPE_CTRL &&
634 tp->stype == VIO_SUBTYPE_INFO &&
635 tp->stype_env == VIO_VER_INFO)
636 return 0;
637
638 /* Ok, now figure out which SID to use. */
639 switch (vio->dev_class) {
640 case VDEV_NETWORK:
641 case VDEV_NETWORK_SWITCH:
642 case VDEV_DISK_SERVER:
643 default:
644 sid = vio->_peer_sid;
645 break;
646
647 case VDEV_DISK:
648 sid = vio->_local_sid;
649 break;
650 }
651
652 if (sid == tp->sid)
653 return 0;
654 viodbg(DATA, "BAD SID tag->sid[%08x] peer_sid[%08x] local_sid[%08x]\n",
655 tp->sid, vio->_peer_sid, vio->_local_sid);
656 return -EINVAL;
657}
658EXPORT_SYMBOL(vio_validate_sid);
659
660u32 vio_send_sid(struct vio_driver_state *vio)
661{
662 switch (vio->dev_class) {
663 case VDEV_NETWORK:
664 case VDEV_NETWORK_SWITCH:
665 case VDEV_DISK:
666 default:
667 return vio->_local_sid;
668
669 case VDEV_DISK_SERVER:
670 return vio->_peer_sid;
671 }
672}
673EXPORT_SYMBOL(vio_send_sid);
674
675extern int vio_ldc_alloc(struct vio_driver_state *vio,
676 struct ldc_channel_config *base_cfg,
677 void *event_arg)
678{
679 struct ldc_channel_config cfg = *base_cfg;
680 struct ldc_channel *lp;
681
682 cfg.tx_irq = vio->vdev->tx_irq;
683 cfg.rx_irq = vio->vdev->rx_irq;
684
685 lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg);
686 if (IS_ERR(lp))
687 return PTR_ERR(lp);
688
689 vio->lp = lp;
690
691 return 0;
692}
693EXPORT_SYMBOL(vio_ldc_alloc);
694
695void vio_ldc_free(struct vio_driver_state *vio)
696{
697 ldc_free(vio->lp);
698 vio->lp = NULL;
699
700 kfree(vio->desc_buf);
701 vio->desc_buf = NULL;
702 vio->desc_buf_len = 0;
703}
704EXPORT_SYMBOL(vio_ldc_free);
705
706void vio_port_up(struct vio_driver_state *vio)
707{
708 unsigned long flags;
709 int err, state;
710
711 spin_lock_irqsave(&vio->lock, flags);
712
713 state = ldc_state(vio->lp);
714
715 err = 0;
716 if (state == LDC_STATE_INIT) {
717 err = ldc_bind(vio->lp, vio->name);
718 if (err)
719 printk(KERN_WARNING "%s: Port %lu bind failed, "
720 "err=%d\n",
721 vio->name, vio->vdev->channel_id, err);
722 }
723
724 if (!err) {
725 err = ldc_connect(vio->lp);
726 if (err)
727 printk(KERN_WARNING "%s: Port %lu connect failed, "
728 "err=%d\n",
729 vio->name, vio->vdev->channel_id, err);
730 }
731 if (err) {
732 unsigned long expires = jiffies + HZ;
733
734 expires = round_jiffies(expires);
735 mod_timer(&vio->timer, expires);
736 }
737
738 spin_unlock_irqrestore(&vio->lock, flags);
739}
740EXPORT_SYMBOL(vio_port_up);
741
742static void vio_port_timer(unsigned long _arg)
743{
744 struct vio_driver_state *vio = (struct vio_driver_state *) _arg;
745
746 vio_port_up(vio);
747}
748
749int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
750 u8 dev_class, struct vio_version *ver_table,
751 int ver_table_size, struct vio_driver_ops *ops,
752 char *name)
753{
754 switch (dev_class) {
755 case VDEV_NETWORK:
756 case VDEV_NETWORK_SWITCH:
757 case VDEV_DISK:
758 case VDEV_DISK_SERVER:
759 break;
760
761 default:
762 return -EINVAL;
763 }
764
765 if (!ops->send_attr ||
766 !ops->handle_attr ||
767 !ops->handshake_complete)
768 return -EINVAL;
769
770 if (!ver_table || ver_table_size < 0)
771 return -EINVAL;
772
773 if (!name)
774 return -EINVAL;
775
776 spin_lock_init(&vio->lock);
777
778 vio->name = name;
779
780 vio->dev_class = dev_class;
781 vio->vdev = vdev;
782
783 vio->ver_table = ver_table;
784 vio->ver_table_entries = ver_table_size;
785
786 vio->ops = ops;
787
788 setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio);
789
790 return 0;
791}
792EXPORT_SYMBOL(vio_driver_init);
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index 4a725d8985f1..c4a6d6e7d03c 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -14,6 +14,6 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
14 NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ 14 NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \
15 NGpage.o NGbzero.o \ 15 NGpage.o NGbzero.o \
16 copy_in_user.o user_fixup.o memmove.o \ 16 copy_in_user.o user_fixup.o memmove.o \
17 mcount.o ipcsum.o rwsem.o xor.o delay.o 17 mcount.o ipcsum.o rwsem.o xor.o
18 18
19obj-y += iomap.o 19obj-y += iomap.o
diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c
deleted file mode 100644
index fb27e54a03ee..000000000000
--- a/arch/sparc64/lib/delay.c
+++ /dev/null
@@ -1,46 +0,0 @@
1/* delay.c: Delay loops for sparc64
2 *
3 * Copyright (C) 2004, 2006 David S. Miller <davem@davemloft.net>
4 *
5 * Based heavily upon x86 variant which is:
6 * Copyright (C) 1993 Linus Torvalds
7 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
8 */
9
10#include <linux/delay.h>
11#include <asm/timer.h>
12
13void __delay(unsigned long loops)
14{
15 unsigned long bclock, now;
16
17 bclock = tick_ops->get_tick();
18 do {
19 now = tick_ops->get_tick();
20 } while ((now-bclock) < loops);
21}
22
23/* We used to multiply by HZ after shifting down by 32 bits
24 * but that runs into problems for higher values of HZ and
25 * slow cpus.
26 */
27void __const_udelay(unsigned long n)
28{
29 n *= 4;
30
31 n *= (cpu_data(raw_smp_processor_id()).udelay_val * (HZ/4));
32 n >>= 32;
33
34 __delay(n + 1);
35}
36
37void __udelay(unsigned long n)
38{
39 __const_udelay(n * 0x10c7UL);
40}
41
42
43void __ndelay(unsigned long n)
44{
45 __const_udelay(n * 0x5UL);
46}
diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c
index f3e0c14e9eef..33c5b7da31e5 100644
--- a/arch/sparc64/prom/misc.c
+++ b/arch/sparc64/prom/misc.c
@@ -14,6 +14,7 @@
14#include <asm/openprom.h> 14#include <asm/openprom.h>
15#include <asm/oplib.h> 15#include <asm/oplib.h>
16#include <asm/system.h> 16#include <asm/system.h>
17#include <asm/ldc.h>
17 18
18int prom_service_exists(const char *service_name) 19int prom_service_exists(const char *service_name)
19{ 20{
@@ -37,6 +38,10 @@ void prom_sun4v_guest_soft_state(void)
37/* Reset and reboot the machine with the command 'bcommand'. */ 38/* Reset and reboot the machine with the command 'bcommand'. */
38void prom_reboot(const char *bcommand) 39void prom_reboot(const char *bcommand)
39{ 40{
41#ifdef CONFIG_SUN_LDOMS
42 if (ldom_domaining_enabled)
43 ldom_reboot(bcommand);
44#endif
40 p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) | 45 p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) |
41 P1275_INOUT(1, 0), bcommand); 46 P1275_INOUT(1, 0), bcommand);
42} 47}
@@ -91,6 +96,10 @@ void prom_cmdline(void)
91 */ 96 */
92void prom_halt(void) 97void prom_halt(void)
93{ 98{
99#ifdef CONFIG_SUN_LDOMS
100 if (ldom_domaining_enabled)
101 ldom_power_off();
102#endif
94again: 103again:
95 p1275_cmd("exit", P1275_INOUT(0, 0)); 104 p1275_cmd("exit", P1275_INOUT(0, 0));
96 goto again; /* PROM is out to get me -DaveM */ 105 goto again; /* PROM is out to get me -DaveM */
@@ -98,6 +107,10 @@ again:
98 107
99void prom_halt_power_off(void) 108void prom_halt_power_off(void)
100{ 109{
110#ifdef CONFIG_SUN_LDOMS
111 if (ldom_domaining_enabled)
112 ldom_power_off();
113#endif
101 p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0)); 114 p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0));
102 115
103 /* if nothing else helps, we just halt */ 116 /* if nothing else helps, we just halt */
diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c
index 2b32c489860c..7fcccc0e19cf 100644
--- a/arch/sparc64/prom/p1275.c
+++ b/arch/sparc64/prom/p1275.c
@@ -16,6 +16,7 @@
16#include <asm/system.h> 16#include <asm/system.h>
17#include <asm/spitfire.h> 17#include <asm/spitfire.h>
18#include <asm/pstate.h> 18#include <asm/pstate.h>
19#include <asm/ldc.h>
19 20
20struct { 21struct {
21 long prom_callback; /* 0x00 */ 22 long prom_callback; /* 0x00 */
diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c
index 500f05e2cfcb..17b7ecfe7ca9 100644
--- a/arch/sparc64/prom/tree.c
+++ b/arch/sparc64/prom/tree.c
@@ -13,6 +13,7 @@
13 13
14#include <asm/openprom.h> 14#include <asm/openprom.h>
15#include <asm/oplib.h> 15#include <asm/oplib.h>
16#include <asm/ldc.h>
16 17
17/* Return the child of node 'node' or zero if no this node has no 18/* Return the child of node 'node' or zero if no this node has no
18 * direct descendent. 19 * direct descendent.
@@ -261,9 +262,17 @@ int prom_node_has_property(int node, const char *prop)
261int 262int
262prom_setprop(int node, const char *pname, char *value, int size) 263prom_setprop(int node, const char *pname, char *value, int size)
263{ 264{
264 if(size == 0) return 0; 265 if (size == 0)
265 if((pname == 0) || (value == 0)) return 0; 266 return 0;
267 if ((pname == 0) || (value == 0))
268 return 0;
266 269
270#ifdef CONFIG_SUN_LDOMS
271 if (ldom_domaining_enabled) {
272 ldom_set_var(pname, value);
273 return 0;
274 }
275#endif
267 return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)| 276 return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)|
268 P1275_ARG(2,P1275_ARG_IN_BUF)| 277 P1275_ARG(2,P1275_ARG_IN_BUF)|
269 P1275_INOUT(4, 1), 278 P1275_INOUT(4, 1),