aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-16 13:45:23 -0400
committerLinus Torvalds <torvalds@woody.linux-foundation.org>2007-07-16 13:45:23 -0400
commit02b2318e07f98a7cdf7089a4457a8d62424aa824 (patch)
treeb40353a9ee6b034e21192ceb5df445fbc5fbdd32
parentb91cba52e9b7b3f1c0037908a192d93a869ca9e5 (diff)
parentd54bc2793ec3405c6b8f217568a82b87bd8a591b (diff)
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6
* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6: (26 commits) [SPARC64]: Fix UP build. [SPARC64]: dr-cpu unconfigure support. [SERIAL]: Fix console write locking in sparc drivers. [SPARC64]: Give more accurate errors in dr_cpu_configure(). [SPARC64]: Clear cpu_{core,sibling}_map[] in smp_fill_in_sib_core_maps() [SPARC64]: Fix leak when DR added cpu does not bootup. [SPARC64]: Add ->set_affinity IRQ handlers. [SPARC64]: Process dr-cpu events in a kthread instead of workqueue. [SPARC64]: More sensible udelay implementation. [SPARC64]: SMP build fixes. [SPARC64]: mdesc.c needs linux/mm.h [SPARC64]: Fix build regressions added by dr-cpu changes. [SPARC64]: Unconditionally register vio_bus_type. [SPARC64]: Initial LDOM cpu hotplug support. [SPARC64]: Fix setting of variables in LDOM guest. [SPARC64]: Fix MD property lifetime bugs. [SPARC64]: Abstract out mdesc accesses for better MD update handling. [SPARC64]: Use more mearningful names for IRQ registry. [SPARC64]: Initial domain-services driver. [SPARC64]: Export powerd facilities for external entities. ...
-rw-r--r--arch/sparc64/Kconfig15
-rw-r--r--arch/sparc64/kernel/Makefile3
-rw-r--r--arch/sparc64/kernel/ds.c1158
-rw-r--r--arch/sparc64/kernel/hvtramp.S139
-rw-r--r--arch/sparc64/kernel/irq.c84
-rw-r--r--arch/sparc64/kernel/ldc.c2373
-rw-r--r--arch/sparc64/kernel/mdesc.c698
-rw-r--r--arch/sparc64/kernel/power.c54
-rw-r--r--arch/sparc64/kernel/process.c21
-rw-r--r--arch/sparc64/kernel/prom.c2
-rw-r--r--arch/sparc64/kernel/setup.c5
-rw-r--r--arch/sparc64/kernel/smp.c251
-rw-r--r--arch/sparc64/kernel/sparc64_ksyms.c16
-rw-r--r--arch/sparc64/kernel/sysfs.c2
-rw-r--r--arch/sparc64/kernel/time.c28
-rw-r--r--arch/sparc64/kernel/vio.c395
-rw-r--r--arch/sparc64/kernel/viohs.c792
-rw-r--r--arch/sparc64/lib/Makefile2
-rw-r--r--arch/sparc64/lib/delay.c46
-rw-r--r--arch/sparc64/prom/misc.c13
-rw-r--r--arch/sparc64/prom/p1275.c1
-rw-r--r--arch/sparc64/prom/tree.c13
-rw-r--r--drivers/block/Kconfig7
-rw-r--r--drivers/block/Makefile1
-rw-r--r--drivers/block/sunvdc.c972
-rw-r--r--drivers/net/Kconfig6
-rw-r--r--drivers/net/Makefile1
-rw-r--r--drivers/net/sunvnet.c1164
-rw-r--r--drivers/net/sunvnet.h70
-rw-r--r--drivers/serial/sunhv.c30
-rw-r--r--drivers/serial/sunsab.c19
-rw-r--r--drivers/serial/sunsu.c14
-rw-r--r--drivers/serial/sunzilog.c17
-rw-r--r--include/asm-sparc64/bugs.h5
-rw-r--r--include/asm-sparc64/cpudata.h5
-rw-r--r--include/asm-sparc64/delay.h32
-rw-r--r--include/asm-sparc64/hvtramp.h37
-rw-r--r--include/asm-sparc64/hypervisor.h2
-rw-r--r--include/asm-sparc64/irq.h2
-rw-r--r--include/asm-sparc64/ldc.h138
-rw-r--r--include/asm-sparc64/mdesc.h88
-rw-r--r--include/asm-sparc64/mmu_context.h3
-rw-r--r--include/asm-sparc64/power.h7
-rw-r--r--include/asm-sparc64/smp.h11
-rw-r--r--include/asm-sparc64/vio.h404
45 files changed, 8609 insertions, 537 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 6566d13db04f..b84b6af1241e 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -108,6 +108,15 @@ config SECCOMP
108 108
109source kernel/Kconfig.hz 109source kernel/Kconfig.hz
110 110
111config HOTPLUG_CPU
112 bool "Support for hot-pluggable CPUs"
113 depends on SMP
114 select HOTPLUG
115 ---help---
116 Say Y here to experiment with turning CPUs off and on. CPUs
117 can be controlled through /sys/devices/system/cpu/cpu#.
118 Say N if you want to disable CPU hotplug.
119
111source "init/Kconfig" 120source "init/Kconfig"
112 121
113config SYSVIPC_COMPAT 122config SYSVIPC_COMPAT
@@ -305,6 +314,12 @@ config SUN_IO
305 bool 314 bool
306 default y 315 default y
307 316
317config SUN_LDOMS
318 bool "Sun Logical Domains support"
319 help
320 Say Y here is you want to support virtual devices via
321 Logical Domains.
322
308config PCI 323config PCI
309 bool "PCI support" 324 bool "PCI support"
310 select ARCH_SUPPORTS_MSI 325 select ARCH_SUPPORTS_MSI
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index f964bf28d21a..b66876bf410c 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -18,7 +18,7 @@ obj-$(CONFIG_STACKTRACE) += stacktrace.o
18obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \ 18obj-$(CONFIG_PCI) += ebus.o isa.o pci_common.o pci_iommu.o \
19 pci_psycho.o pci_sabre.o pci_schizo.o \ 19 pci_psycho.o pci_sabre.o pci_schizo.o \
20 pci_sun4v.o pci_sun4v_asm.o pci_fire.o 20 pci_sun4v.o pci_sun4v_asm.o pci_fire.o
21obj-$(CONFIG_SMP) += smp.o trampoline.o 21obj-$(CONFIG_SMP) += smp.o trampoline.o hvtramp.o
22obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o 22obj-$(CONFIG_SPARC32_COMPAT) += sys32.o sys_sparc32.o signal32.o
23obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o 23obj-$(CONFIG_BINFMT_ELF32) += binfmt_elf32.o
24obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o 24obj-$(CONFIG_BINFMT_AOUT32) += binfmt_aout32.o
@@ -26,6 +26,7 @@ obj-$(CONFIG_MODULES) += module.o
26obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o 26obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
27obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o 27obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
28obj-$(CONFIG_KPROBES) += kprobes.o 28obj-$(CONFIG_KPROBES) += kprobes.o
29obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o ds.o
29obj-$(CONFIG_AUDIT) += audit.o 30obj-$(CONFIG_AUDIT) += audit.o
30obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o 31obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o
31obj-y += $(obj-yy) 32obj-y += $(obj-yy)
diff --git a/arch/sparc64/kernel/ds.c b/arch/sparc64/kernel/ds.c
new file mode 100644
index 000000000000..1c587107cef0
--- /dev/null
+++ b/arch/sparc64/kernel/ds.c
@@ -0,0 +1,1158 @@
1/* ds.c: Domain Services driver for Logical Domains
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/types.h>
9#include <linux/module.h>
10#include <linux/string.h>
11#include <linux/slab.h>
12#include <linux/sched.h>
13#include <linux/delay.h>
14#include <linux/mutex.h>
15#include <linux/kthread.h>
16#include <linux/cpu.h>
17
18#include <asm/ldc.h>
19#include <asm/vio.h>
20#include <asm/power.h>
21#include <asm/mdesc.h>
22#include <asm/head.h>
23#include <asm/irq.h>
24
25#define DRV_MODULE_NAME "ds"
26#define PFX DRV_MODULE_NAME ": "
27#define DRV_MODULE_VERSION "1.0"
28#define DRV_MODULE_RELDATE "Jul 11, 2007"
29
30static char version[] __devinitdata =
31 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
32MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
33MODULE_DESCRIPTION("Sun LDOM domain services driver");
34MODULE_LICENSE("GPL");
35MODULE_VERSION(DRV_MODULE_VERSION);
36
37struct ds_msg_tag {
38 __u32 type;
39#define DS_INIT_REQ 0x00
40#define DS_INIT_ACK 0x01
41#define DS_INIT_NACK 0x02
42#define DS_REG_REQ 0x03
43#define DS_REG_ACK 0x04
44#define DS_REG_NACK 0x05
45#define DS_UNREG_REQ 0x06
46#define DS_UNREG_ACK 0x07
47#define DS_UNREG_NACK 0x08
48#define DS_DATA 0x09
49#define DS_NACK 0x0a
50
51 __u32 len;
52};
53
54/* Result codes */
55#define DS_OK 0x00
56#define DS_REG_VER_NACK 0x01
57#define DS_REG_DUP 0x02
58#define DS_INV_HDL 0x03
59#define DS_TYPE_UNKNOWN 0x04
60
61struct ds_version {
62 __u16 major;
63 __u16 minor;
64};
65
66struct ds_ver_req {
67 struct ds_msg_tag tag;
68 struct ds_version ver;
69};
70
71struct ds_ver_ack {
72 struct ds_msg_tag tag;
73 __u16 minor;
74};
75
76struct ds_ver_nack {
77 struct ds_msg_tag tag;
78 __u16 major;
79};
80
81struct ds_reg_req {
82 struct ds_msg_tag tag;
83 __u64 handle;
84 __u16 major;
85 __u16 minor;
86 char svc_id[0];
87};
88
89struct ds_reg_ack {
90 struct ds_msg_tag tag;
91 __u64 handle;
92 __u16 minor;
93};
94
95struct ds_reg_nack {
96 struct ds_msg_tag tag;
97 __u64 handle;
98 __u16 major;
99};
100
101struct ds_unreg_req {
102 struct ds_msg_tag tag;
103 __u64 handle;
104};
105
106struct ds_unreg_ack {
107 struct ds_msg_tag tag;
108 __u64 handle;
109};
110
111struct ds_unreg_nack {
112 struct ds_msg_tag tag;
113 __u64 handle;
114};
115
116struct ds_data {
117 struct ds_msg_tag tag;
118 __u64 handle;
119};
120
121struct ds_data_nack {
122 struct ds_msg_tag tag;
123 __u64 handle;
124 __u64 result;
125};
126
127struct ds_cap_state {
128 __u64 handle;
129
130 void (*data)(struct ldc_channel *lp,
131 struct ds_cap_state *cp,
132 void *buf, int len);
133
134 const char *service_id;
135
136 u8 state;
137#define CAP_STATE_UNKNOWN 0x00
138#define CAP_STATE_REG_SENT 0x01
139#define CAP_STATE_REGISTERED 0x02
140};
141
142static void md_update_data(struct ldc_channel *lp, struct ds_cap_state *cp,
143 void *buf, int len);
144static void domain_shutdown_data(struct ldc_channel *lp,
145 struct ds_cap_state *cp,
146 void *buf, int len);
147static void domain_panic_data(struct ldc_channel *lp,
148 struct ds_cap_state *cp,
149 void *buf, int len);
150#ifdef CONFIG_HOTPLUG_CPU
151static void dr_cpu_data(struct ldc_channel *lp,
152 struct ds_cap_state *cp,
153 void *buf, int len);
154#endif
155static void ds_pri_data(struct ldc_channel *lp,
156 struct ds_cap_state *cp,
157 void *buf, int len);
158static void ds_var_data(struct ldc_channel *lp,
159 struct ds_cap_state *cp,
160 void *buf, int len);
161
162struct ds_cap_state ds_states[] = {
163 {
164 .service_id = "md-update",
165 .data = md_update_data,
166 },
167 {
168 .service_id = "domain-shutdown",
169 .data = domain_shutdown_data,
170 },
171 {
172 .service_id = "domain-panic",
173 .data = domain_panic_data,
174 },
175#ifdef CONFIG_HOTPLUG_CPU
176 {
177 .service_id = "dr-cpu",
178 .data = dr_cpu_data,
179 },
180#endif
181 {
182 .service_id = "pri",
183 .data = ds_pri_data,
184 },
185 {
186 .service_id = "var-config",
187 .data = ds_var_data,
188 },
189 {
190 .service_id = "var-config-backup",
191 .data = ds_var_data,
192 },
193};
194
195static DEFINE_SPINLOCK(ds_lock);
196
197struct ds_info {
198 struct ldc_channel *lp;
199 u8 hs_state;
200#define DS_HS_START 0x01
201#define DS_HS_DONE 0x02
202
203 void *rcv_buf;
204 int rcv_buf_len;
205};
206
207static struct ds_info *ds_info;
208
209static struct ds_cap_state *find_cap(u64 handle)
210{
211 unsigned int index = handle >> 32;
212
213 if (index >= ARRAY_SIZE(ds_states))
214 return NULL;
215 return &ds_states[index];
216}
217
218static struct ds_cap_state *find_cap_by_string(const char *name)
219{
220 int i;
221
222 for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
223 if (strcmp(ds_states[i].service_id, name))
224 continue;
225
226 return &ds_states[i];
227 }
228 return NULL;
229}
230
231static int ds_send(struct ldc_channel *lp, void *data, int len)
232{
233 int err, limit = 1000;
234
235 err = -EINVAL;
236 while (limit-- > 0) {
237 err = ldc_write(lp, data, len);
238 if (!err || (err != -EAGAIN))
239 break;
240 udelay(1);
241 }
242
243 return err;
244}
245
246struct ds_md_update_req {
247 __u64 req_num;
248};
249
250struct ds_md_update_res {
251 __u64 req_num;
252 __u32 result;
253};
254
255static void md_update_data(struct ldc_channel *lp,
256 struct ds_cap_state *dp,
257 void *buf, int len)
258{
259 struct ds_data *dpkt = buf;
260 struct ds_md_update_req *rp;
261 struct {
262 struct ds_data data;
263 struct ds_md_update_res res;
264 } pkt;
265
266 rp = (struct ds_md_update_req *) (dpkt + 1);
267
268 printk(KERN_INFO PFX "Machine description update.\n");
269
270 memset(&pkt, 0, sizeof(pkt));
271 pkt.data.tag.type = DS_DATA;
272 pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
273 pkt.data.handle = dp->handle;
274 pkt.res.req_num = rp->req_num;
275 pkt.res.result = DS_OK;
276
277 ds_send(lp, &pkt, sizeof(pkt));
278
279 mdesc_update();
280}
281
282struct ds_shutdown_req {
283 __u64 req_num;
284 __u32 ms_delay;
285};
286
287struct ds_shutdown_res {
288 __u64 req_num;
289 __u32 result;
290 char reason[1];
291};
292
293static void domain_shutdown_data(struct ldc_channel *lp,
294 struct ds_cap_state *dp,
295 void *buf, int len)
296{
297 struct ds_data *dpkt = buf;
298 struct ds_shutdown_req *rp;
299 struct {
300 struct ds_data data;
301 struct ds_shutdown_res res;
302 } pkt;
303
304 rp = (struct ds_shutdown_req *) (dpkt + 1);
305
306 printk(KERN_ALERT PFX "Shutdown request from "
307 "LDOM manager received.\n");
308
309 memset(&pkt, 0, sizeof(pkt));
310 pkt.data.tag.type = DS_DATA;
311 pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
312 pkt.data.handle = dp->handle;
313 pkt.res.req_num = rp->req_num;
314 pkt.res.result = DS_OK;
315 pkt.res.reason[0] = 0;
316
317 ds_send(lp, &pkt, sizeof(pkt));
318
319 wake_up_powerd();
320}
321
322struct ds_panic_req {
323 __u64 req_num;
324};
325
326struct ds_panic_res {
327 __u64 req_num;
328 __u32 result;
329 char reason[1];
330};
331
332static void domain_panic_data(struct ldc_channel *lp,
333 struct ds_cap_state *dp,
334 void *buf, int len)
335{
336 struct ds_data *dpkt = buf;
337 struct ds_panic_req *rp;
338 struct {
339 struct ds_data data;
340 struct ds_panic_res res;
341 } pkt;
342
343 rp = (struct ds_panic_req *) (dpkt + 1);
344
345 printk(KERN_ALERT PFX "Panic request from "
346 "LDOM manager received.\n");
347
348 memset(&pkt, 0, sizeof(pkt));
349 pkt.data.tag.type = DS_DATA;
350 pkt.data.tag.len = sizeof(pkt) - sizeof(struct ds_msg_tag);
351 pkt.data.handle = dp->handle;
352 pkt.res.req_num = rp->req_num;
353 pkt.res.result = DS_OK;
354 pkt.res.reason[0] = 0;
355
356 ds_send(lp, &pkt, sizeof(pkt));
357
358 panic("PANIC requested by LDOM manager.");
359}
360
361#ifdef CONFIG_HOTPLUG_CPU
362struct dr_cpu_tag {
363 __u64 req_num;
364 __u32 type;
365#define DR_CPU_CONFIGURE 0x43
366#define DR_CPU_UNCONFIGURE 0x55
367#define DR_CPU_FORCE_UNCONFIGURE 0x46
368#define DR_CPU_STATUS 0x53
369
370/* Responses */
371#define DR_CPU_OK 0x6f
372#define DR_CPU_ERROR 0x65
373
374 __u32 num_records;
375};
376
377struct dr_cpu_resp_entry {
378 __u32 cpu;
379 __u32 result;
380#define DR_CPU_RES_OK 0x00
381#define DR_CPU_RES_FAILURE 0x01
382#define DR_CPU_RES_BLOCKED 0x02
383#define DR_CPU_RES_CPU_NOT_RESPONDING 0x03
384#define DR_CPU_RES_NOT_IN_MD 0x04
385
386 __u32 stat;
387#define DR_CPU_STAT_NOT_PRESENT 0x00
388#define DR_CPU_STAT_UNCONFIGURED 0x01
389#define DR_CPU_STAT_CONFIGURED 0x02
390
391 __u32 str_off;
392};
393
394/* DR cpu requests get queued onto the work list by the
395 * dr_cpu_data() callback. The list is protected by
396 * ds_lock, and processed by dr_cpu_process() in order.
397 */
398static LIST_HEAD(dr_cpu_work_list);
399static DECLARE_WAIT_QUEUE_HEAD(dr_cpu_wait);
400
401struct dr_cpu_queue_entry {
402 struct list_head list;
403 char req[0];
404};
405
406static void __dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
407{
408 struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
409 struct ds_info *dp = ds_info;
410 struct {
411 struct ds_data data;
412 struct dr_cpu_tag tag;
413 } pkt;
414 int msg_len;
415
416 memset(&pkt, 0, sizeof(pkt));
417 pkt.data.tag.type = DS_DATA;
418 pkt.data.handle = cp->handle;
419 pkt.tag.req_num = tag->req_num;
420 pkt.tag.type = DR_CPU_ERROR;
421 pkt.tag.num_records = 0;
422
423 msg_len = (sizeof(struct ds_data) +
424 sizeof(struct dr_cpu_tag));
425
426 pkt.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
427
428 ds_send(dp->lp, &pkt, msg_len);
429}
430
431static void dr_cpu_send_error(struct ds_cap_state *cp, struct ds_data *data)
432{
433 unsigned long flags;
434
435 spin_lock_irqsave(&ds_lock, flags);
436 __dr_cpu_send_error(cp, data);
437 spin_unlock_irqrestore(&ds_lock, flags);
438}
439
440#define CPU_SENTINEL 0xffffffff
441
442static void purge_dups(u32 *list, u32 num_ents)
443{
444 unsigned int i;
445
446 for (i = 0; i < num_ents; i++) {
447 u32 cpu = list[i];
448 unsigned int j;
449
450 if (cpu == CPU_SENTINEL)
451 continue;
452
453 for (j = i + 1; j < num_ents; j++) {
454 if (list[j] == cpu)
455 list[j] = CPU_SENTINEL;
456 }
457 }
458}
459
460static int dr_cpu_size_response(int ncpus)
461{
462 return (sizeof(struct ds_data) +
463 sizeof(struct dr_cpu_tag) +
464 (sizeof(struct dr_cpu_resp_entry) * ncpus));
465}
466
467static void dr_cpu_init_response(struct ds_data *resp, u64 req_num,
468 u64 handle, int resp_len, int ncpus,
469 cpumask_t *mask, u32 default_stat)
470{
471 struct dr_cpu_resp_entry *ent;
472 struct dr_cpu_tag *tag;
473 int i, cpu;
474
475 tag = (struct dr_cpu_tag *) (resp + 1);
476 ent = (struct dr_cpu_resp_entry *) (tag + 1);
477
478 resp->tag.type = DS_DATA;
479 resp->tag.len = resp_len - sizeof(struct ds_msg_tag);
480 resp->handle = handle;
481 tag->req_num = req_num;
482 tag->type = DR_CPU_OK;
483 tag->num_records = ncpus;
484
485 i = 0;
486 for_each_cpu_mask(cpu, *mask) {
487 ent[i].cpu = cpu;
488 ent[i].result = DR_CPU_RES_OK;
489 ent[i].stat = default_stat;
490 i++;
491 }
492 BUG_ON(i != ncpus);
493}
494
495static void dr_cpu_mark(struct ds_data *resp, int cpu, int ncpus,
496 u32 res, u32 stat)
497{
498 struct dr_cpu_resp_entry *ent;
499 struct dr_cpu_tag *tag;
500 int i;
501
502 tag = (struct dr_cpu_tag *) (resp + 1);
503 ent = (struct dr_cpu_resp_entry *) (tag + 1);
504
505 for (i = 0; i < ncpus; i++) {
506 if (ent[i].cpu != cpu)
507 continue;
508 ent[i].result = res;
509 ent[i].stat = stat;
510 break;
511 }
512}
513
514static int dr_cpu_configure(struct ds_cap_state *cp, u64 req_num,
515 cpumask_t *mask)
516{
517 struct ds_data *resp;
518 int resp_len, ncpus, cpu;
519 unsigned long flags;
520
521 ncpus = cpus_weight(*mask);
522 resp_len = dr_cpu_size_response(ncpus);
523 resp = kzalloc(resp_len, GFP_KERNEL);
524 if (!resp)
525 return -ENOMEM;
526
527 dr_cpu_init_response(resp, req_num, cp->handle,
528 resp_len, ncpus, mask,
529 DR_CPU_STAT_CONFIGURED);
530
531 mdesc_fill_in_cpu_data(*mask);
532
533 for_each_cpu_mask(cpu, *mask) {
534 int err;
535
536 printk(KERN_INFO PFX "Starting cpu %d...\n", cpu);
537 err = cpu_up(cpu);
538 if (err) {
539 __u32 res = DR_CPU_RES_FAILURE;
540 __u32 stat = DR_CPU_STAT_UNCONFIGURED;
541
542 if (!cpu_present(cpu)) {
543 /* CPU not present in MD */
544 res = DR_CPU_RES_NOT_IN_MD;
545 stat = DR_CPU_STAT_NOT_PRESENT;
546 } else if (err == -ENODEV) {
547 /* CPU did not call in successfully */
548 res = DR_CPU_RES_CPU_NOT_RESPONDING;
549 }
550
551 printk(KERN_INFO PFX "CPU startup failed err=%d\n",
552 err);
553 dr_cpu_mark(resp, cpu, ncpus, res, stat);
554 }
555 }
556
557 spin_lock_irqsave(&ds_lock, flags);
558 ds_send(ds_info->lp, resp, resp_len);
559 spin_unlock_irqrestore(&ds_lock, flags);
560
561 kfree(resp);
562
563 /* Redistribute IRQs, taking into account the new cpus. */
564 fixup_irqs();
565
566 return 0;
567}
568
569static int dr_cpu_unconfigure(struct ds_cap_state *cp, u64 req_num,
570 cpumask_t *mask)
571{
572 struct ds_data *resp;
573 int resp_len, ncpus, cpu;
574 unsigned long flags;
575
576 ncpus = cpus_weight(*mask);
577 resp_len = dr_cpu_size_response(ncpus);
578 resp = kzalloc(resp_len, GFP_KERNEL);
579 if (!resp)
580 return -ENOMEM;
581
582 dr_cpu_init_response(resp, req_num, cp->handle,
583 resp_len, ncpus, mask,
584 DR_CPU_STAT_UNCONFIGURED);
585
586 for_each_cpu_mask(cpu, *mask) {
587 int err;
588
589 printk(KERN_INFO PFX "CPU[%d]: Shutting down cpu %d...\n",
590 smp_processor_id(), cpu);
591 err = cpu_down(cpu);
592 if (err)
593 dr_cpu_mark(resp, cpu, ncpus,
594 DR_CPU_RES_FAILURE,
595 DR_CPU_STAT_CONFIGURED);
596 }
597
598 spin_lock_irqsave(&ds_lock, flags);
599 ds_send(ds_info->lp, resp, resp_len);
600 spin_unlock_irqrestore(&ds_lock, flags);
601
602 kfree(resp);
603
604 return 0;
605}
606
607static void process_dr_cpu_list(struct ds_cap_state *cp)
608{
609 struct dr_cpu_queue_entry *qp, *tmp;
610 unsigned long flags;
611 LIST_HEAD(todo);
612 cpumask_t mask;
613
614 spin_lock_irqsave(&ds_lock, flags);
615 list_splice(&dr_cpu_work_list, &todo);
616 INIT_LIST_HEAD(&dr_cpu_work_list);
617 spin_unlock_irqrestore(&ds_lock, flags);
618
619 list_for_each_entry_safe(qp, tmp, &todo, list) {
620 struct ds_data *data = (struct ds_data *) qp->req;
621 struct dr_cpu_tag *tag = (struct dr_cpu_tag *) (data + 1);
622 u32 *cpu_list = (u32 *) (tag + 1);
623 u64 req_num = tag->req_num;
624 unsigned int i;
625 int err;
626
627 switch (tag->type) {
628 case DR_CPU_CONFIGURE:
629 case DR_CPU_UNCONFIGURE:
630 case DR_CPU_FORCE_UNCONFIGURE:
631 break;
632
633 default:
634 dr_cpu_send_error(cp, data);
635 goto next;
636 }
637
638 purge_dups(cpu_list, tag->num_records);
639
640 cpus_clear(mask);
641 for (i = 0; i < tag->num_records; i++) {
642 if (cpu_list[i] == CPU_SENTINEL)
643 continue;
644
645 if (cpu_list[i] < NR_CPUS)
646 cpu_set(cpu_list[i], mask);
647 }
648
649 if (tag->type == DR_CPU_CONFIGURE)
650 err = dr_cpu_configure(cp, req_num, &mask);
651 else
652 err = dr_cpu_unconfigure(cp, req_num, &mask);
653
654 if (err)
655 dr_cpu_send_error(cp, data);
656
657next:
658 list_del(&qp->list);
659 kfree(qp);
660 }
661}
662
663static int dr_cpu_thread(void *__unused)
664{
665 struct ds_cap_state *cp;
666 DEFINE_WAIT(wait);
667
668 cp = find_cap_by_string("dr-cpu");
669
670 while (1) {
671 prepare_to_wait(&dr_cpu_wait, &wait, TASK_INTERRUPTIBLE);
672 if (list_empty(&dr_cpu_work_list))
673 schedule();
674 finish_wait(&dr_cpu_wait, &wait);
675
676 if (kthread_should_stop())
677 break;
678
679 process_dr_cpu_list(cp);
680 }
681
682 return 0;
683}
684
685static void dr_cpu_data(struct ldc_channel *lp,
686 struct ds_cap_state *dp,
687 void *buf, int len)
688{
689 struct dr_cpu_queue_entry *qp;
690 struct ds_data *dpkt = buf;
691 struct dr_cpu_tag *rp;
692
693 rp = (struct dr_cpu_tag *) (dpkt + 1);
694
695 qp = kmalloc(sizeof(struct dr_cpu_queue_entry) + len, GFP_ATOMIC);
696 if (!qp) {
697 struct ds_cap_state *cp;
698
699 cp = find_cap_by_string("dr-cpu");
700 __dr_cpu_send_error(cp, dpkt);
701 } else {
702 memcpy(&qp->req, buf, len);
703 list_add_tail(&qp->list, &dr_cpu_work_list);
704 wake_up(&dr_cpu_wait);
705 }
706}
707#endif
708
709struct ds_pri_msg {
710 __u64 req_num;
711 __u64 type;
712#define DS_PRI_REQUEST 0x00
713#define DS_PRI_DATA 0x01
714#define DS_PRI_UPDATE 0x02
715};
716
717static void ds_pri_data(struct ldc_channel *lp,
718 struct ds_cap_state *dp,
719 void *buf, int len)
720{
721 struct ds_data *dpkt = buf;
722 struct ds_pri_msg *rp;
723
724 rp = (struct ds_pri_msg *) (dpkt + 1);
725
726 printk(KERN_INFO PFX "PRI REQ [%lx:%lx], len=%d\n",
727 rp->req_num, rp->type, len);
728}
729
730struct ds_var_hdr {
731 __u32 type;
732#define DS_VAR_SET_REQ 0x00
733#define DS_VAR_DELETE_REQ 0x01
734#define DS_VAR_SET_RESP 0x02
735#define DS_VAR_DELETE_RESP 0x03
736};
737
738struct ds_var_set_msg {
739 struct ds_var_hdr hdr;
740 char name_and_value[0];
741};
742
743struct ds_var_delete_msg {
744 struct ds_var_hdr hdr;
745 char name[0];
746};
747
748struct ds_var_resp {
749 struct ds_var_hdr hdr;
750 __u32 result;
751#define DS_VAR_SUCCESS 0x00
752#define DS_VAR_NO_SPACE 0x01
753#define DS_VAR_INVALID_VAR 0x02
754#define DS_VAR_INVALID_VAL 0x03
755#define DS_VAR_NOT_PRESENT 0x04
756};
757
758static DEFINE_MUTEX(ds_var_mutex);
759static int ds_var_doorbell;
760static int ds_var_response;
761
762static void ds_var_data(struct ldc_channel *lp,
763 struct ds_cap_state *dp,
764 void *buf, int len)
765{
766 struct ds_data *dpkt = buf;
767 struct ds_var_resp *rp;
768
769 rp = (struct ds_var_resp *) (dpkt + 1);
770
771 if (rp->hdr.type != DS_VAR_SET_RESP &&
772 rp->hdr.type != DS_VAR_DELETE_RESP)
773 return;
774
775 ds_var_response = rp->result;
776 wmb();
777 ds_var_doorbell = 1;
778}
779
780void ldom_set_var(const char *var, const char *value)
781{
782 struct ds_info *dp = ds_info;
783 struct ds_cap_state *cp;
784
785 cp = find_cap_by_string("var-config");
786 if (cp->state != CAP_STATE_REGISTERED)
787 cp = find_cap_by_string("var-config-backup");
788
789 if (cp->state == CAP_STATE_REGISTERED) {
790 union {
791 struct {
792 struct ds_data data;
793 struct ds_var_set_msg msg;
794 } header;
795 char all[512];
796 } pkt;
797 unsigned long flags;
798 char *base, *p;
799 int msg_len, loops;
800
801 memset(&pkt, 0, sizeof(pkt));
802 pkt.header.data.tag.type = DS_DATA;
803 pkt.header.data.handle = cp->handle;
804 pkt.header.msg.hdr.type = DS_VAR_SET_REQ;
805 base = p = &pkt.header.msg.name_and_value[0];
806 strcpy(p, var);
807 p += strlen(var) + 1;
808 strcpy(p, value);
809 p += strlen(value) + 1;
810
811 msg_len = (sizeof(struct ds_data) +
812 sizeof(struct ds_var_set_msg) +
813 (p - base));
814 msg_len = (msg_len + 3) & ~3;
815 pkt.header.data.tag.len = msg_len - sizeof(struct ds_msg_tag);
816
817 mutex_lock(&ds_var_mutex);
818
819 spin_lock_irqsave(&ds_lock, flags);
820 ds_var_doorbell = 0;
821 ds_var_response = -1;
822
823 ds_send(dp->lp, &pkt, msg_len);
824 spin_unlock_irqrestore(&ds_lock, flags);
825
826 loops = 1000;
827 while (ds_var_doorbell == 0) {
828 if (loops-- < 0)
829 break;
830 barrier();
831 udelay(100);
832 }
833
834 mutex_unlock(&ds_var_mutex);
835
836 if (ds_var_doorbell == 0 ||
837 ds_var_response != DS_VAR_SUCCESS)
838 printk(KERN_ERR PFX "var-config [%s:%s] "
839 "failed, response(%d).\n",
840 var, value,
841 ds_var_response);
842 } else {
843 printk(KERN_ERR PFX "var-config not registered so "
844 "could not set (%s) variable to (%s).\n",
845 var, value);
846 }
847}
848
849void ldom_reboot(const char *boot_command)
850{
851 /* Don't bother with any of this if the boot_command
852 * is empty.
853 */
854 if (boot_command && strlen(boot_command)) {
855 char full_boot_str[256];
856
857 strcpy(full_boot_str, "boot ");
858 strcpy(full_boot_str + strlen("boot "), boot_command);
859
860 ldom_set_var("reboot-command", full_boot_str);
861 }
862 sun4v_mach_sir();
863}
864
865void ldom_power_off(void)
866{
867 sun4v_mach_exit(0);
868}
869
870static void ds_conn_reset(struct ds_info *dp)
871{
872 printk(KERN_ERR PFX "ds_conn_reset() from %p\n",
873 __builtin_return_address(0));
874}
875
876static int register_services(struct ds_info *dp)
877{
878 struct ldc_channel *lp = dp->lp;
879 int i;
880
881 for (i = 0; i < ARRAY_SIZE(ds_states); i++) {
882 struct {
883 struct ds_reg_req req;
884 u8 id_buf[256];
885 } pbuf;
886 struct ds_cap_state *cp = &ds_states[i];
887 int err, msg_len;
888 u64 new_count;
889
890 if (cp->state == CAP_STATE_REGISTERED)
891 continue;
892
893 new_count = sched_clock() & 0xffffffff;
894 cp->handle = ((u64) i << 32) | new_count;
895
896 msg_len = (sizeof(struct ds_reg_req) +
897 strlen(cp->service_id));
898
899 memset(&pbuf, 0, sizeof(pbuf));
900 pbuf.req.tag.type = DS_REG_REQ;
901 pbuf.req.tag.len = (msg_len - sizeof(struct ds_msg_tag));
902 pbuf.req.handle = cp->handle;
903 pbuf.req.major = 1;
904 pbuf.req.minor = 0;
905 strcpy(pbuf.req.svc_id, cp->service_id);
906
907 err = ds_send(lp, &pbuf, msg_len);
908 if (err > 0)
909 cp->state = CAP_STATE_REG_SENT;
910 }
911 return 0;
912}
913
914static int ds_handshake(struct ds_info *dp, struct ds_msg_tag *pkt)
915{
916
917 if (dp->hs_state == DS_HS_START) {
918 if (pkt->type != DS_INIT_ACK)
919 goto conn_reset;
920
921 dp->hs_state = DS_HS_DONE;
922
923 return register_services(dp);
924 }
925
926 if (dp->hs_state != DS_HS_DONE)
927 goto conn_reset;
928
929 if (pkt->type == DS_REG_ACK) {
930 struct ds_reg_ack *ap = (struct ds_reg_ack *) pkt;
931 struct ds_cap_state *cp = find_cap(ap->handle);
932
933 if (!cp) {
934 printk(KERN_ERR PFX "REG ACK for unknown handle %lx\n",
935 ap->handle);
936 return 0;
937 }
938 printk(KERN_INFO PFX "Registered %s service.\n",
939 cp->service_id);
940 cp->state = CAP_STATE_REGISTERED;
941 } else if (pkt->type == DS_REG_NACK) {
942 struct ds_reg_nack *np = (struct ds_reg_nack *) pkt;
943 struct ds_cap_state *cp = find_cap(np->handle);
944
945 if (!cp) {
946 printk(KERN_ERR PFX "REG NACK for "
947 "unknown handle %lx\n",
948 np->handle);
949 return 0;
950 }
951 printk(KERN_INFO PFX "Could not register %s service\n",
952 cp->service_id);
953 cp->state = CAP_STATE_UNKNOWN;
954 }
955
956 return 0;
957
958conn_reset:
959 ds_conn_reset(dp);
960 return -ECONNRESET;
961}
962
963static int ds_data(struct ds_info *dp, struct ds_msg_tag *pkt, int len)
964{
965 struct ds_data *dpkt = (struct ds_data *) pkt;
966 struct ds_cap_state *cp = find_cap(dpkt->handle);
967
968 if (!cp) {
969 struct ds_data_nack nack = {
970 .tag = {
971 .type = DS_NACK,
972 .len = (sizeof(struct ds_data_nack) -
973 sizeof(struct ds_msg_tag)),
974 },
975 .handle = dpkt->handle,
976 .result = DS_INV_HDL,
977 };
978
979 printk(KERN_ERR PFX "Data for unknown handle %lu\n",
980 dpkt->handle);
981 ds_send(dp->lp, &nack, sizeof(nack));
982 } else {
983 cp->data(dp->lp, cp, dpkt, len);
984 }
985 return 0;
986}
987
988static void ds_up(struct ds_info *dp)
989{
990 struct ldc_channel *lp = dp->lp;
991 struct ds_ver_req req;
992 int err;
993
994 req.tag.type = DS_INIT_REQ;
995 req.tag.len = sizeof(req) - sizeof(struct ds_msg_tag);
996 req.ver.major = 1;
997 req.ver.minor = 0;
998
999 err = ds_send(lp, &req, sizeof(req));
1000 if (err > 0)
1001 dp->hs_state = DS_HS_START;
1002}
1003
1004static void ds_event(void *arg, int event)
1005{
1006 struct ds_info *dp = arg;
1007 struct ldc_channel *lp = dp->lp;
1008 unsigned long flags;
1009 int err;
1010
1011 spin_lock_irqsave(&ds_lock, flags);
1012
1013 if (event == LDC_EVENT_UP) {
1014 ds_up(dp);
1015 spin_unlock_irqrestore(&ds_lock, flags);
1016 return;
1017 }
1018
1019 if (event != LDC_EVENT_DATA_READY) {
1020 printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event);
1021 spin_unlock_irqrestore(&ds_lock, flags);
1022 return;
1023 }
1024
1025 err = 0;
1026 while (1) {
1027 struct ds_msg_tag *tag;
1028
1029 err = ldc_read(lp, dp->rcv_buf, sizeof(*tag));
1030
1031 if (unlikely(err < 0)) {
1032 if (err == -ECONNRESET)
1033 ds_conn_reset(dp);
1034 break;
1035 }
1036 if (err == 0)
1037 break;
1038
1039 tag = dp->rcv_buf;
1040 err = ldc_read(lp, tag + 1, tag->len);
1041
1042 if (unlikely(err < 0)) {
1043 if (err == -ECONNRESET)
1044 ds_conn_reset(dp);
1045 break;
1046 }
1047 if (err < tag->len)
1048 break;
1049
1050 if (tag->type < DS_DATA)
1051 err = ds_handshake(dp, dp->rcv_buf);
1052 else
1053 err = ds_data(dp, dp->rcv_buf,
1054 sizeof(*tag) + err);
1055 if (err == -ECONNRESET)
1056 break;
1057 }
1058
1059 spin_unlock_irqrestore(&ds_lock, flags);
1060}
1061
1062static int __devinit ds_probe(struct vio_dev *vdev,
1063 const struct vio_device_id *id)
1064{
1065 static int ds_version_printed;
1066 struct ldc_channel_config ds_cfg = {
1067 .event = ds_event,
1068 .mtu = 4096,
1069 .mode = LDC_MODE_STREAM,
1070 };
1071 struct ldc_channel *lp;
1072 struct ds_info *dp;
1073 int err;
1074
1075 if (ds_version_printed++ == 0)
1076 printk(KERN_INFO "%s", version);
1077
1078 dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1079 err = -ENOMEM;
1080 if (!dp)
1081 goto out_err;
1082
1083 dp->rcv_buf = kzalloc(4096, GFP_KERNEL);
1084 if (!dp->rcv_buf)
1085 goto out_free_dp;
1086
1087 dp->rcv_buf_len = 4096;
1088
1089 ds_cfg.tx_irq = vdev->tx_irq;
1090 ds_cfg.rx_irq = vdev->rx_irq;
1091
1092 lp = ldc_alloc(vdev->channel_id, &ds_cfg, dp);
1093 if (IS_ERR(lp)) {
1094 err = PTR_ERR(lp);
1095 goto out_free_rcv_buf;
1096 }
1097 dp->lp = lp;
1098
1099 err = ldc_bind(lp, "DS");
1100 if (err)
1101 goto out_free_ldc;
1102
1103 ds_info = dp;
1104
1105 start_powerd();
1106
1107 return err;
1108
1109out_free_ldc:
1110 ldc_free(dp->lp);
1111
1112out_free_rcv_buf:
1113 kfree(dp->rcv_buf);
1114
1115out_free_dp:
1116 kfree(dp);
1117
1118out_err:
1119 return err;
1120}
1121
1122static int ds_remove(struct vio_dev *vdev)
1123{
1124 return 0;
1125}
1126
1127static struct vio_device_id ds_match[] = {
1128 {
1129 .type = "domain-services-port",
1130 },
1131 {},
1132};
1133
1134static struct vio_driver ds_driver = {
1135 .id_table = ds_match,
1136 .probe = ds_probe,
1137 .remove = ds_remove,
1138 .driver = {
1139 .name = "ds",
1140 .owner = THIS_MODULE,
1141 }
1142};
1143
1144static int __init ds_init(void)
1145{
1146 int i;
1147
1148 for (i = 0; i < ARRAY_SIZE(ds_states); i++)
1149 ds_states[i].handle = ((u64)i << 32);
1150
1151#ifdef CONFIG_HOTPLUG_CPU
1152 kthread_run(dr_cpu_thread, NULL, "kdrcpud");
1153#endif
1154
1155 return vio_register_driver(&ds_driver);
1156}
1157
1158subsys_initcall(ds_init);
diff --git a/arch/sparc64/kernel/hvtramp.S b/arch/sparc64/kernel/hvtramp.S
new file mode 100644
index 000000000000..76a090e2c2a8
--- /dev/null
+++ b/arch/sparc64/kernel/hvtramp.S
@@ -0,0 +1,139 @@
1/* hvtramp.S: Hypervisor start-cpu trampoline code.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <asm/thread_info.h>
7#include <asm/hypervisor.h>
8#include <asm/scratchpad.h>
9#include <asm/spitfire.h>
10#include <asm/hvtramp.h>
11#include <asm/pstate.h>
12#include <asm/ptrace.h>
13#include <asm/asi.h>
14
15 .text
16 .align 8
17 .globl hv_cpu_startup, hv_cpu_startup_end
18
19 /* This code executes directly out of the hypervisor
20 * with physical addressing (va==pa). %o0 contains
21 * our client argument which for Linux points to
22 * a descriptor data structure which defines the
23 * MMU entries we need to load up.
24 *
25 * After we set things up we enable the MMU and call
26 * into the kernel.
27 *
28 * First setup basic privileged cpu state.
29 */
30hv_cpu_startup:
31 wrpr %g0, 0, %gl
32 wrpr %g0, 15, %pil
33 wrpr %g0, 0, %canrestore
34 wrpr %g0, 0, %otherwin
35 wrpr %g0, 6, %cansave
36 wrpr %g0, 6, %cleanwin
37 wrpr %g0, 0, %cwp
38 wrpr %g0, 0, %wstate
39 wrpr %g0, 0, %tl
40
41 sethi %hi(sparc64_ttable_tl0), %g1
42 wrpr %g1, %tba
43
44 mov %o0, %l0
45
46 lduw [%l0 + HVTRAMP_DESCR_CPU], %g1
47 mov SCRATCHPAD_CPUID, %g2
48 stxa %g1, [%g2] ASI_SCRATCHPAD
49
50 ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_VA], %g2
51 stxa %g2, [%g0] ASI_SCRATCHPAD
52
53 mov 0, %l1
54 lduw [%l0 + HVTRAMP_DESCR_NUM_MAPPINGS], %l2
55 add %l0, HVTRAMP_DESCR_MAPS, %l3
56
571: ldx [%l3 + HVTRAMP_MAPPING_VADDR], %o0
58 clr %o1
59 ldx [%l3 + HVTRAMP_MAPPING_TTE], %o2
60 mov HV_MMU_IMMU | HV_MMU_DMMU, %o3
61 mov HV_FAST_MMU_MAP_PERM_ADDR, %o5
62 ta HV_FAST_TRAP
63
64 brnz,pn %o0, 80f
65 nop
66
67 add %l1, 1, %l1
68 cmp %l1, %l2
69 blt,a,pt %xcc, 1b
70 add %l3, HVTRAMP_MAPPING_SIZE, %l3
71
72 ldx [%l0 + HVTRAMP_DESCR_FAULT_INFO_PA], %o0
73 mov HV_FAST_MMU_FAULT_AREA_CONF, %o5
74 ta HV_FAST_TRAP
75
76 brnz,pn %o0, 80f
77 nop
78
79 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF), %pstate
80
81 ldx [%l0 + HVTRAMP_DESCR_THREAD_REG], %l6
82
83 mov 1, %o0
84 set 1f, %o1
85 mov HV_FAST_MMU_ENABLE, %o5
86 ta HV_FAST_TRAP
87
88 ba,pt %xcc, 80f
89 nop
90
911:
92 wr %g0, 0, %fprs
93 wr %g0, ASI_P, %asi
94
95 mov PRIMARY_CONTEXT, %g7
96 stxa %g0, [%g7] ASI_MMU
97 membar #Sync
98
99 mov SECONDARY_CONTEXT, %g7
100 stxa %g0, [%g7] ASI_MMU
101 membar #Sync
102
103 mov %l6, %g6
104 ldx [%g6 + TI_TASK], %g4
105
106 mov 1, %g5
107 sllx %g5, THREAD_SHIFT, %g5
108 sub %g5, (STACKFRAME_SZ + STACK_BIAS), %g5
109 add %g6, %g5, %sp
110 mov 0, %fp
111
112 call init_irqwork_curcpu
113 nop
114 call hard_smp_processor_id
115 nop
116
117 mov %o0, %o1
118 mov 0, %o0
119 mov 0, %o2
120 call sun4v_init_mondo_queues
121 mov 1, %o3
122
123 call init_cur_cpu_trap
124 mov %g6, %o0
125
126 wrpr %g0, (PSTATE_PRIV | PSTATE_PEF | PSTATE_IE), %pstate
127
128 call smp_callin
129 nop
130 call cpu_idle
131 mov 0, %o0
132 call cpu_panic
133 nop
134
13580: ba,pt %xcc, 80b
136 nop
137
138 .align 8
139hv_cpu_startup_end:
diff --git a/arch/sparc64/kernel/irq.c b/arch/sparc64/kernel/irq.c
index 6b6165d36fd8..8cb3358674f5 100644
--- a/arch/sparc64/kernel/irq.c
+++ b/arch/sparc64/kernel/irq.c
@@ -293,6 +293,11 @@ static void sun4u_irq_enable(unsigned int virt_irq)
293 } 293 }
294} 294}
295 295
296static void sun4u_set_affinity(unsigned int virt_irq, cpumask_t mask)
297{
298 sun4u_irq_enable(virt_irq);
299}
300
296static void sun4u_irq_disable(unsigned int virt_irq) 301static void sun4u_irq_disable(unsigned int virt_irq)
297{ 302{
298 struct irq_handler_data *data = get_irq_chip_data(virt_irq); 303 struct irq_handler_data *data = get_irq_chip_data(virt_irq);
@@ -309,6 +314,10 @@ static void sun4u_irq_disable(unsigned int virt_irq)
309static void sun4u_irq_end(unsigned int virt_irq) 314static void sun4u_irq_end(unsigned int virt_irq)
310{ 315{
311 struct irq_handler_data *data = get_irq_chip_data(virt_irq); 316 struct irq_handler_data *data = get_irq_chip_data(virt_irq);
317 struct irq_desc *desc = irq_desc + virt_irq;
318
319 if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
320 return;
312 321
313 if (likely(data)) 322 if (likely(data))
314 upa_writeq(ICLR_IDLE, data->iclr); 323 upa_writeq(ICLR_IDLE, data->iclr);
@@ -340,6 +349,24 @@ static void sun4v_irq_enable(unsigned int virt_irq)
340 } 349 }
341} 350}
342 351
352static void sun4v_set_affinity(unsigned int virt_irq, cpumask_t mask)
353{
354 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
355 unsigned int ino = bucket - &ivector_table[0];
356
357 if (likely(bucket)) {
358 unsigned long cpuid;
359 int err;
360
361 cpuid = irq_choose_cpu(virt_irq);
362
363 err = sun4v_intr_settarget(ino, cpuid);
364 if (err != HV_EOK)
365 printk("sun4v_intr_settarget(%x,%lu): err(%d)\n",
366 ino, cpuid, err);
367 }
368}
369
343static void sun4v_irq_disable(unsigned int virt_irq) 370static void sun4v_irq_disable(unsigned int virt_irq)
344{ 371{
345 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); 372 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
@@ -373,6 +400,10 @@ static void sun4v_irq_end(unsigned int virt_irq)
373{ 400{
374 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); 401 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
375 unsigned int ino = bucket - &ivector_table[0]; 402 unsigned int ino = bucket - &ivector_table[0];
403 struct irq_desc *desc = irq_desc + virt_irq;
404
405 if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
406 return;
376 407
377 if (likely(bucket)) { 408 if (likely(bucket)) {
378 int err; 409 int err;
@@ -418,6 +449,28 @@ static void sun4v_virq_enable(unsigned int virt_irq)
418 } 449 }
419} 450}
420 451
452static void sun4v_virt_set_affinity(unsigned int virt_irq, cpumask_t mask)
453{
454 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
455 unsigned int ino = bucket - &ivector_table[0];
456
457 if (likely(bucket)) {
458 unsigned long cpuid, dev_handle, dev_ino;
459 int err;
460
461 cpuid = irq_choose_cpu(virt_irq);
462
463 dev_handle = ino & IMAP_IGN;
464 dev_ino = ino & IMAP_INO;
465
466 err = sun4v_vintr_set_target(dev_handle, dev_ino, cpuid);
467 if (err != HV_EOK)
468 printk("sun4v_vintr_set_target(%lx,%lx,%lu): "
469 "err(%d)\n",
470 dev_handle, dev_ino, cpuid, err);
471 }
472}
473
421static void sun4v_virq_disable(unsigned int virt_irq) 474static void sun4v_virq_disable(unsigned int virt_irq)
422{ 475{
423 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); 476 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
@@ -443,6 +496,10 @@ static void sun4v_virq_end(unsigned int virt_irq)
443{ 496{
444 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq); 497 struct ino_bucket *bucket = virt_irq_to_bucket(virt_irq);
445 unsigned int ino = bucket - &ivector_table[0]; 498 unsigned int ino = bucket - &ivector_table[0];
499 struct irq_desc *desc = irq_desc + virt_irq;
500
501 if (unlikely(desc->status & (IRQ_DISABLED|IRQ_INPROGRESS)))
502 return;
446 503
447 if (likely(bucket)) { 504 if (likely(bucket)) {
448 unsigned long dev_handle, dev_ino; 505 unsigned long dev_handle, dev_ino;
@@ -477,6 +534,7 @@ static struct irq_chip sun4u_irq = {
477 .enable = sun4u_irq_enable, 534 .enable = sun4u_irq_enable,
478 .disable = sun4u_irq_disable, 535 .disable = sun4u_irq_disable,
479 .end = sun4u_irq_end, 536 .end = sun4u_irq_end,
537 .set_affinity = sun4u_set_affinity,
480}; 538};
481 539
482static struct irq_chip sun4u_irq_ack = { 540static struct irq_chip sun4u_irq_ack = {
@@ -485,6 +543,7 @@ static struct irq_chip sun4u_irq_ack = {
485 .disable = sun4u_irq_disable, 543 .disable = sun4u_irq_disable,
486 .ack = run_pre_handler, 544 .ack = run_pre_handler,
487 .end = sun4u_irq_end, 545 .end = sun4u_irq_end,
546 .set_affinity = sun4u_set_affinity,
488}; 547};
489 548
490static struct irq_chip sun4v_irq = { 549static struct irq_chip sun4v_irq = {
@@ -492,6 +551,7 @@ static struct irq_chip sun4v_irq = {
492 .enable = sun4v_irq_enable, 551 .enable = sun4v_irq_enable,
493 .disable = sun4v_irq_disable, 552 .disable = sun4v_irq_disable,
494 .end = sun4v_irq_end, 553 .end = sun4v_irq_end,
554 .set_affinity = sun4v_set_affinity,
495}; 555};
496 556
497static struct irq_chip sun4v_irq_ack = { 557static struct irq_chip sun4v_irq_ack = {
@@ -500,6 +560,7 @@ static struct irq_chip sun4v_irq_ack = {
500 .disable = sun4v_irq_disable, 560 .disable = sun4v_irq_disable,
501 .ack = run_pre_handler, 561 .ack = run_pre_handler,
502 .end = sun4v_irq_end, 562 .end = sun4v_irq_end,
563 .set_affinity = sun4v_set_affinity,
503}; 564};
504 565
505#ifdef CONFIG_PCI_MSI 566#ifdef CONFIG_PCI_MSI
@@ -511,6 +572,7 @@ static struct irq_chip sun4v_msi = {
511 .disable = sun4v_msi_disable, 572 .disable = sun4v_msi_disable,
512 .ack = run_pre_handler, 573 .ack = run_pre_handler,
513 .end = sun4v_irq_end, 574 .end = sun4v_irq_end,
575 .set_affinity = sun4v_set_affinity,
514}; 576};
515#endif 577#endif
516 578
@@ -519,6 +581,7 @@ static struct irq_chip sun4v_virq = {
519 .enable = sun4v_virq_enable, 581 .enable = sun4v_virq_enable,
520 .disable = sun4v_virq_disable, 582 .disable = sun4v_virq_disable,
521 .end = sun4v_virq_end, 583 .end = sun4v_virq_end,
584 .set_affinity = sun4v_virt_set_affinity,
522}; 585};
523 586
524static struct irq_chip sun4v_virq_ack = { 587static struct irq_chip sun4v_virq_ack = {
@@ -527,6 +590,7 @@ static struct irq_chip sun4v_virq_ack = {
527 .disable = sun4v_virq_disable, 590 .disable = sun4v_virq_disable,
528 .ack = run_pre_handler, 591 .ack = run_pre_handler,
529 .end = sun4v_virq_end, 592 .end = sun4v_virq_end,
593 .set_affinity = sun4v_virt_set_affinity,
530}; 594};
531 595
532void irq_install_pre_handler(int virt_irq, 596void irq_install_pre_handler(int virt_irq,
@@ -739,6 +803,26 @@ void handler_irq(int irq, struct pt_regs *regs)
739 set_irq_regs(old_regs); 803 set_irq_regs(old_regs);
740} 804}
741 805
806#ifdef CONFIG_HOTPLUG_CPU
807void fixup_irqs(void)
808{
809 unsigned int irq;
810
811 for (irq = 0; irq < NR_IRQS; irq++) {
812 unsigned long flags;
813
814 spin_lock_irqsave(&irq_desc[irq].lock, flags);
815 if (irq_desc[irq].action &&
816 !(irq_desc[irq].status & IRQ_PER_CPU)) {
817 if (irq_desc[irq].chip->set_affinity)
818 irq_desc[irq].chip->set_affinity(irq,
819 irq_desc[irq].affinity);
820 }
821 spin_unlock_irqrestore(&irq_desc[irq].lock, flags);
822 }
823}
824#endif
825
742struct sun5_timer { 826struct sun5_timer {
743 u64 count0; 827 u64 count0;
744 u64 limit0; 828 u64 limit0;
diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c
new file mode 100644
index 000000000000..85a2be0b0962
--- /dev/null
+++ b/arch/sparc64/kernel/ldc.c
@@ -0,0 +1,2373 @@
1/* ldc.c: Logical Domain Channel link-layer protocol driver.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/delay.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/scatterlist.h>
14#include <linux/interrupt.h>
15#include <linux/list.h>
16#include <linux/init.h>
17
18#include <asm/hypervisor.h>
19#include <asm/iommu.h>
20#include <asm/page.h>
21#include <asm/ldc.h>
22#include <asm/mdesc.h>
23
24#define DRV_MODULE_NAME "ldc"
25#define PFX DRV_MODULE_NAME ": "
26#define DRV_MODULE_VERSION "1.0"
27#define DRV_MODULE_RELDATE "June 25, 2007"
28
29static char version[] __devinitdata =
30 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31#define LDC_PACKET_SIZE 64
32
33/* Packet header layout for unreliable and reliable mode frames.
34 * When in RAW mode, packets are simply straight 64-byte payloads
35 * with no headers.
36 */
37struct ldc_packet {
38 u8 type;
39#define LDC_CTRL 0x01
40#define LDC_DATA 0x02
41#define LDC_ERR 0x10
42
43 u8 stype;
44#define LDC_INFO 0x01
45#define LDC_ACK 0x02
46#define LDC_NACK 0x04
47
48 u8 ctrl;
49#define LDC_VERS 0x01 /* Link Version */
50#define LDC_RTS 0x02 /* Request To Send */
51#define LDC_RTR 0x03 /* Ready To Receive */
52#define LDC_RDX 0x04 /* Ready for Data eXchange */
53#define LDC_CTRL_MSK 0x0f
54
55 u8 env;
56#define LDC_LEN 0x3f
57#define LDC_FRAG_MASK 0xc0
58#define LDC_START 0x40
59#define LDC_STOP 0x80
60
61 u32 seqid;
62
63 union {
64 u8 u_data[LDC_PACKET_SIZE - 8];
65 struct {
66 u32 pad;
67 u32 ackid;
68 u8 r_data[LDC_PACKET_SIZE - 8 - 8];
69 } r;
70 } u;
71};
72
73struct ldc_version {
74 u16 major;
75 u16 minor;
76};
77
78/* Ordered from largest major to lowest. */
79static struct ldc_version ver_arr[] = {
80 { .major = 1, .minor = 0 },
81};
82
83#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
84#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
85
86struct ldc_channel;
87
88struct ldc_mode_ops {
89 int (*write)(struct ldc_channel *, const void *, unsigned int);
90 int (*read)(struct ldc_channel *, void *, unsigned int);
91};
92
93static const struct ldc_mode_ops raw_ops;
94static const struct ldc_mode_ops nonraw_ops;
95static const struct ldc_mode_ops stream_ops;
96
97int ldom_domaining_enabled;
98
99struct ldc_iommu {
100 /* Protects arena alloc/free. */
101 spinlock_t lock;
102 struct iommu_arena arena;
103 struct ldc_mtable_entry *page_table;
104};
105
106struct ldc_channel {
107 /* Protects all operations that depend upon channel state. */
108 spinlock_t lock;
109
110 unsigned long id;
111
112 u8 *mssbuf;
113 u32 mssbuf_len;
114 u32 mssbuf_off;
115
116 struct ldc_packet *tx_base;
117 unsigned long tx_head;
118 unsigned long tx_tail;
119 unsigned long tx_num_entries;
120 unsigned long tx_ra;
121
122 unsigned long tx_acked;
123
124 struct ldc_packet *rx_base;
125 unsigned long rx_head;
126 unsigned long rx_tail;
127 unsigned long rx_num_entries;
128 unsigned long rx_ra;
129
130 u32 rcv_nxt;
131 u32 snd_nxt;
132
133 unsigned long chan_state;
134
135 struct ldc_channel_config cfg;
136 void *event_arg;
137
138 const struct ldc_mode_ops *mops;
139
140 struct ldc_iommu iommu;
141
142 struct ldc_version ver;
143
144 u8 hs_state;
145#define LDC_HS_CLOSED 0x00
146#define LDC_HS_OPEN 0x01
147#define LDC_HS_GOTVERS 0x02
148#define LDC_HS_SENTRTR 0x03
149#define LDC_HS_GOTRTR 0x04
150#define LDC_HS_COMPLETE 0x10
151
152 u8 flags;
153#define LDC_FLAG_ALLOCED_QUEUES 0x01
154#define LDC_FLAG_REGISTERED_QUEUES 0x02
155#define LDC_FLAG_REGISTERED_IRQS 0x04
156#define LDC_FLAG_RESET 0x10
157
158 u8 mss;
159 u8 state;
160
161#define LDC_IRQ_NAME_MAX 32
162 char rx_irq_name[LDC_IRQ_NAME_MAX];
163 char tx_irq_name[LDC_IRQ_NAME_MAX];
164
165 struct hlist_head mh_list;
166
167 struct hlist_node list;
168};
169
170#define ldcdbg(TYPE, f, a...) \
171do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
172 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
173} while (0)
174
175static const char *state_to_str(u8 state)
176{
177 switch (state) {
178 case LDC_STATE_INVALID:
179 return "INVALID";
180 case LDC_STATE_INIT:
181 return "INIT";
182 case LDC_STATE_BOUND:
183 return "BOUND";
184 case LDC_STATE_READY:
185 return "READY";
186 case LDC_STATE_CONNECTED:
187 return "CONNECTED";
188 default:
189 return "<UNKNOWN>";
190 }
191}
192
193static void ldc_set_state(struct ldc_channel *lp, u8 state)
194{
195 ldcdbg(STATE, "STATE (%s) --> (%s)\n",
196 state_to_str(lp->state),
197 state_to_str(state));
198
199 lp->state = state;
200}
201
202static unsigned long __advance(unsigned long off, unsigned long num_entries)
203{
204 off += LDC_PACKET_SIZE;
205 if (off == (num_entries * LDC_PACKET_SIZE))
206 off = 0;
207
208 return off;
209}
210
211static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
212{
213 return __advance(off, lp->rx_num_entries);
214}
215
216static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
217{
218 return __advance(off, lp->tx_num_entries);
219}
220
221static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
222 unsigned long *new_tail)
223{
224 struct ldc_packet *p;
225 unsigned long t;
226
227 t = tx_advance(lp, lp->tx_tail);
228 if (t == lp->tx_head)
229 return NULL;
230
231 *new_tail = t;
232
233 p = lp->tx_base;
234 return p + (lp->tx_tail / LDC_PACKET_SIZE);
235}
236
237/* When we are in reliable or stream mode, have to track the next packet
238 * we haven't gotten an ACK for in the TX queue using tx_acked. We have
239 * to be careful not to stomp over the queue past that point. During
240 * the handshake, we don't have TX data packets pending in the queue
241 * and that's why handshake_get_tx_packet() need not be mindful of
242 * lp->tx_acked.
243 */
244static unsigned long head_for_data(struct ldc_channel *lp)
245{
246 if (lp->cfg.mode == LDC_MODE_STREAM)
247 return lp->tx_acked;
248 return lp->tx_head;
249}
250
251static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
252{
253 unsigned long limit, tail, new_tail, diff;
254 unsigned int mss;
255
256 limit = head_for_data(lp);
257 tail = lp->tx_tail;
258 new_tail = tx_advance(lp, tail);
259 if (new_tail == limit)
260 return 0;
261
262 if (limit > new_tail)
263 diff = limit - new_tail;
264 else
265 diff = (limit +
266 ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
267 diff /= LDC_PACKET_SIZE;
268 mss = lp->mss;
269
270 if (diff * mss < size)
271 return 0;
272
273 return 1;
274}
275
276static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
277 unsigned long *new_tail)
278{
279 struct ldc_packet *p;
280 unsigned long h, t;
281
282 h = head_for_data(lp);
283 t = tx_advance(lp, lp->tx_tail);
284 if (t == h)
285 return NULL;
286
287 *new_tail = t;
288
289 p = lp->tx_base;
290 return p + (lp->tx_tail / LDC_PACKET_SIZE);
291}
292
293static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
294{
295 unsigned long orig_tail = lp->tx_tail;
296 int limit = 1000;
297
298 lp->tx_tail = tail;
299 while (limit-- > 0) {
300 unsigned long err;
301
302 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
303 if (!err)
304 return 0;
305
306 if (err != HV_EWOULDBLOCK) {
307 lp->tx_tail = orig_tail;
308 return -EINVAL;
309 }
310 udelay(1);
311 }
312
313 lp->tx_tail = orig_tail;
314 return -EBUSY;
315}
316
317/* This just updates the head value in the hypervisor using
318 * a polling loop with a timeout. The caller takes care of
319 * upating software state representing the head change, if any.
320 */
321static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
322{
323 int limit = 1000;
324
325 while (limit-- > 0) {
326 unsigned long err;
327
328 err = sun4v_ldc_rx_set_qhead(lp->id, head);
329 if (!err)
330 return 0;
331
332 if (err != HV_EWOULDBLOCK)
333 return -EINVAL;
334
335 udelay(1);
336 }
337
338 return -EBUSY;
339}
340
341static int send_tx_packet(struct ldc_channel *lp,
342 struct ldc_packet *p,
343 unsigned long new_tail)
344{
345 BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
346
347 return set_tx_tail(lp, new_tail);
348}
349
350static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
351 u8 stype, u8 ctrl,
352 void *data, int dlen,
353 unsigned long *new_tail)
354{
355 struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
356
357 if (p) {
358 memset(p, 0, sizeof(*p));
359 p->type = LDC_CTRL;
360 p->stype = stype;
361 p->ctrl = ctrl;
362 if (data)
363 memcpy(p->u.u_data, data, dlen);
364 }
365 return p;
366}
367
368static int start_handshake(struct ldc_channel *lp)
369{
370 struct ldc_packet *p;
371 struct ldc_version *ver;
372 unsigned long new_tail;
373
374 ver = &ver_arr[0];
375
376 ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
377 ver->major, ver->minor);
378
379 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
380 ver, sizeof(*ver), &new_tail);
381 if (p) {
382 int err = send_tx_packet(lp, p, new_tail);
383 if (!err)
384 lp->flags &= ~LDC_FLAG_RESET;
385 return err;
386 }
387 return -EBUSY;
388}
389
390static int send_version_nack(struct ldc_channel *lp,
391 u16 major, u16 minor)
392{
393 struct ldc_packet *p;
394 struct ldc_version ver;
395 unsigned long new_tail;
396
397 ver.major = major;
398 ver.minor = minor;
399
400 p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
401 &ver, sizeof(ver), &new_tail);
402 if (p) {
403 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
404 ver.major, ver.minor);
405
406 return send_tx_packet(lp, p, new_tail);
407 }
408 return -EBUSY;
409}
410
411static int send_version_ack(struct ldc_channel *lp,
412 struct ldc_version *vp)
413{
414 struct ldc_packet *p;
415 unsigned long new_tail;
416
417 p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
418 vp, sizeof(*vp), &new_tail);
419 if (p) {
420 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
421 vp->major, vp->minor);
422
423 return send_tx_packet(lp, p, new_tail);
424 }
425 return -EBUSY;
426}
427
428static int send_rts(struct ldc_channel *lp)
429{
430 struct ldc_packet *p;
431 unsigned long new_tail;
432
433 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
434 &new_tail);
435 if (p) {
436 p->env = lp->cfg.mode;
437 p->seqid = 0;
438 lp->rcv_nxt = 0;
439
440 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
441 p->env, p->seqid);
442
443 return send_tx_packet(lp, p, new_tail);
444 }
445 return -EBUSY;
446}
447
448static int send_rtr(struct ldc_channel *lp)
449{
450 struct ldc_packet *p;
451 unsigned long new_tail;
452
453 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
454 &new_tail);
455 if (p) {
456 p->env = lp->cfg.mode;
457 p->seqid = 0;
458
459 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
460 p->env, p->seqid);
461
462 return send_tx_packet(lp, p, new_tail);
463 }
464 return -EBUSY;
465}
466
467static int send_rdx(struct ldc_channel *lp)
468{
469 struct ldc_packet *p;
470 unsigned long new_tail;
471
472 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
473 &new_tail);
474 if (p) {
475 p->env = 0;
476 p->seqid = ++lp->snd_nxt;
477 p->u.r.ackid = lp->rcv_nxt;
478
479 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
480 p->env, p->seqid, p->u.r.ackid);
481
482 return send_tx_packet(lp, p, new_tail);
483 }
484 return -EBUSY;
485}
486
487static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
488{
489 struct ldc_packet *p;
490 unsigned long new_tail;
491 int err;
492
493 p = data_get_tx_packet(lp, &new_tail);
494 if (!p)
495 return -EBUSY;
496 memset(p, 0, sizeof(*p));
497 p->type = data_pkt->type;
498 p->stype = LDC_NACK;
499 p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
500 p->seqid = lp->snd_nxt + 1;
501 p->u.r.ackid = lp->rcv_nxt;
502
503 ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
504 p->type, p->ctrl, p->seqid, p->u.r.ackid);
505
506 err = send_tx_packet(lp, p, new_tail);
507 if (!err)
508 lp->snd_nxt++;
509
510 return err;
511}
512
513static int ldc_abort(struct ldc_channel *lp)
514{
515 unsigned long hv_err;
516
517 ldcdbg(STATE, "ABORT\n");
518
519 /* We report but do not act upon the hypervisor errors because
520 * there really isn't much we can do if they fail at this point.
521 */
522 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
523 if (hv_err)
524 printk(KERN_ERR PFX "ldc_abort: "
525 "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
526 lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
527
528 hv_err = sun4v_ldc_tx_get_state(lp->id,
529 &lp->tx_head,
530 &lp->tx_tail,
531 &lp->chan_state);
532 if (hv_err)
533 printk(KERN_ERR PFX "ldc_abort: "
534 "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
535 lp->id, hv_err);
536
537 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
538 if (hv_err)
539 printk(KERN_ERR PFX "ldc_abort: "
540 "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
541 lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
542
543 /* Refetch the RX queue state as well, because we could be invoked
544 * here in the queue processing context.
545 */
546 hv_err = sun4v_ldc_rx_get_state(lp->id,
547 &lp->rx_head,
548 &lp->rx_tail,
549 &lp->chan_state);
550 if (hv_err)
551 printk(KERN_ERR PFX "ldc_abort: "
552 "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
553 lp->id, hv_err);
554
555 return -ECONNRESET;
556}
557
558static struct ldc_version *find_by_major(u16 major)
559{
560 struct ldc_version *ret = NULL;
561 int i;
562
563 for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
564 struct ldc_version *v = &ver_arr[i];
565 if (v->major <= major) {
566 ret = v;
567 break;
568 }
569 }
570 return ret;
571}
572
573static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
574{
575 struct ldc_version *vap;
576 int err;
577
578 ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
579 vp->major, vp->minor);
580
581 if (lp->hs_state == LDC_HS_GOTVERS) {
582 lp->hs_state = LDC_HS_OPEN;
583 memset(&lp->ver, 0, sizeof(lp->ver));
584 }
585
586 vap = find_by_major(vp->major);
587 if (!vap) {
588 err = send_version_nack(lp, 0, 0);
589 } else if (vap->major != vp->major) {
590 err = send_version_nack(lp, vap->major, vap->minor);
591 } else {
592 struct ldc_version ver = *vp;
593 if (ver.minor > vap->minor)
594 ver.minor = vap->minor;
595 err = send_version_ack(lp, &ver);
596 if (!err) {
597 lp->ver = ver;
598 lp->hs_state = LDC_HS_GOTVERS;
599 }
600 }
601 if (err)
602 return ldc_abort(lp);
603
604 return 0;
605}
606
607static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
608{
609 ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
610 vp->major, vp->minor);
611
612 if (lp->hs_state == LDC_HS_GOTVERS) {
613 if (lp->ver.major != vp->major ||
614 lp->ver.minor != vp->minor)
615 return ldc_abort(lp);
616 } else {
617 lp->ver = *vp;
618 lp->hs_state = LDC_HS_GOTVERS;
619 }
620 if (send_rts(lp))
621 return ldc_abort(lp);
622 return 0;
623}
624
625static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
626{
627 struct ldc_version *vap;
628
629 if ((vp->major == 0 && vp->minor == 0) ||
630 !(vap = find_by_major(vp->major))) {
631 return ldc_abort(lp);
632 } else {
633 struct ldc_packet *p;
634 unsigned long new_tail;
635
636 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
637 vap, sizeof(*vap),
638 &new_tail);
639 if (p)
640 return send_tx_packet(lp, p, new_tail);
641 else
642 return ldc_abort(lp);
643 }
644}
645
646static int process_version(struct ldc_channel *lp,
647 struct ldc_packet *p)
648{
649 struct ldc_version *vp;
650
651 vp = (struct ldc_version *) p->u.u_data;
652
653 switch (p->stype) {
654 case LDC_INFO:
655 return process_ver_info(lp, vp);
656
657 case LDC_ACK:
658 return process_ver_ack(lp, vp);
659
660 case LDC_NACK:
661 return process_ver_nack(lp, vp);
662
663 default:
664 return ldc_abort(lp);
665 }
666}
667
668static int process_rts(struct ldc_channel *lp,
669 struct ldc_packet *p)
670{
671 ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
672 p->stype, p->seqid, p->env);
673
674 if (p->stype != LDC_INFO ||
675 lp->hs_state != LDC_HS_GOTVERS ||
676 p->env != lp->cfg.mode)
677 return ldc_abort(lp);
678
679 lp->snd_nxt = p->seqid;
680 lp->rcv_nxt = p->seqid;
681 lp->hs_state = LDC_HS_SENTRTR;
682 if (send_rtr(lp))
683 return ldc_abort(lp);
684
685 return 0;
686}
687
688static int process_rtr(struct ldc_channel *lp,
689 struct ldc_packet *p)
690{
691 ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
692 p->stype, p->seqid, p->env);
693
694 if (p->stype != LDC_INFO ||
695 p->env != lp->cfg.mode)
696 return ldc_abort(lp);
697
698 lp->snd_nxt = p->seqid;
699 lp->hs_state = LDC_HS_COMPLETE;
700 ldc_set_state(lp, LDC_STATE_CONNECTED);
701 send_rdx(lp);
702
703 return LDC_EVENT_UP;
704}
705
706static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
707{
708 return lp->rcv_nxt + 1 == seqid;
709}
710
711static int process_rdx(struct ldc_channel *lp,
712 struct ldc_packet *p)
713{
714 ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
715 p->stype, p->seqid, p->env, p->u.r.ackid);
716
717 if (p->stype != LDC_INFO ||
718 !(rx_seq_ok(lp, p->seqid)))
719 return ldc_abort(lp);
720
721 lp->rcv_nxt = p->seqid;
722
723 lp->hs_state = LDC_HS_COMPLETE;
724 ldc_set_state(lp, LDC_STATE_CONNECTED);
725
726 return LDC_EVENT_UP;
727}
728
729static int process_control_frame(struct ldc_channel *lp,
730 struct ldc_packet *p)
731{
732 switch (p->ctrl) {
733 case LDC_VERS:
734 return process_version(lp, p);
735
736 case LDC_RTS:
737 return process_rts(lp, p);
738
739 case LDC_RTR:
740 return process_rtr(lp, p);
741
742 case LDC_RDX:
743 return process_rdx(lp, p);
744
745 default:
746 return ldc_abort(lp);
747 }
748}
749
750static int process_error_frame(struct ldc_channel *lp,
751 struct ldc_packet *p)
752{
753 return ldc_abort(lp);
754}
755
756static int process_data_ack(struct ldc_channel *lp,
757 struct ldc_packet *ack)
758{
759 unsigned long head = lp->tx_acked;
760 u32 ackid = ack->u.r.ackid;
761
762 while (1) {
763 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
764
765 head = tx_advance(lp, head);
766
767 if (p->seqid == ackid) {
768 lp->tx_acked = head;
769 return 0;
770 }
771 if (head == lp->tx_tail)
772 return ldc_abort(lp);
773 }
774
775 return 0;
776}
777
778static void send_events(struct ldc_channel *lp, unsigned int event_mask)
779{
780 if (event_mask & LDC_EVENT_RESET)
781 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
782 if (event_mask & LDC_EVENT_UP)
783 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
784 if (event_mask & LDC_EVENT_DATA_READY)
785 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
786}
787
788static irqreturn_t ldc_rx(int irq, void *dev_id)
789{
790 struct ldc_channel *lp = dev_id;
791 unsigned long orig_state, hv_err, flags;
792 unsigned int event_mask;
793
794 spin_lock_irqsave(&lp->lock, flags);
795
796 orig_state = lp->chan_state;
797 hv_err = sun4v_ldc_rx_get_state(lp->id,
798 &lp->rx_head,
799 &lp->rx_tail,
800 &lp->chan_state);
801
802 ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
803 orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
804
805 event_mask = 0;
806
807 if (lp->cfg.mode == LDC_MODE_RAW &&
808 lp->chan_state == LDC_CHANNEL_UP) {
809 lp->hs_state = LDC_HS_COMPLETE;
810 ldc_set_state(lp, LDC_STATE_CONNECTED);
811
812 event_mask |= LDC_EVENT_UP;
813
814 orig_state = lp->chan_state;
815 }
816
817 /* If we are in reset state, flush the RX queue and ignore
818 * everything.
819 */
820 if (lp->flags & LDC_FLAG_RESET) {
821 (void) __set_rx_head(lp, lp->rx_tail);
822 goto out;
823 }
824
825 /* Once we finish the handshake, we let the ldc_read()
826 * paths do all of the control frame and state management.
827 * Just trigger the callback.
828 */
829 if (lp->hs_state == LDC_HS_COMPLETE) {
830handshake_complete:
831 if (lp->chan_state != orig_state) {
832 unsigned int event = LDC_EVENT_RESET;
833
834 if (lp->chan_state == LDC_CHANNEL_UP)
835 event = LDC_EVENT_UP;
836
837 event_mask |= event;
838 }
839 if (lp->rx_head != lp->rx_tail)
840 event_mask |= LDC_EVENT_DATA_READY;
841
842 goto out;
843 }
844
845 if (lp->chan_state != orig_state)
846 goto out;
847
848 while (lp->rx_head != lp->rx_tail) {
849 struct ldc_packet *p;
850 unsigned long new;
851 int err;
852
853 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
854
855 switch (p->type) {
856 case LDC_CTRL:
857 err = process_control_frame(lp, p);
858 if (err > 0)
859 event_mask |= err;
860 break;
861
862 case LDC_DATA:
863 event_mask |= LDC_EVENT_DATA_READY;
864 err = 0;
865 break;
866
867 case LDC_ERR:
868 err = process_error_frame(lp, p);
869 break;
870
871 default:
872 err = ldc_abort(lp);
873 break;
874 }
875
876 if (err < 0)
877 break;
878
879 new = lp->rx_head;
880 new += LDC_PACKET_SIZE;
881 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
882 new = 0;
883 lp->rx_head = new;
884
885 err = __set_rx_head(lp, new);
886 if (err < 0) {
887 (void) ldc_abort(lp);
888 break;
889 }
890 if (lp->hs_state == LDC_HS_COMPLETE)
891 goto handshake_complete;
892 }
893
894out:
895 spin_unlock_irqrestore(&lp->lock, flags);
896
897 send_events(lp, event_mask);
898
899 return IRQ_HANDLED;
900}
901
902static irqreturn_t ldc_tx(int irq, void *dev_id)
903{
904 struct ldc_channel *lp = dev_id;
905 unsigned long flags, hv_err, orig_state;
906 unsigned int event_mask = 0;
907
908 spin_lock_irqsave(&lp->lock, flags);
909
910 orig_state = lp->chan_state;
911 hv_err = sun4v_ldc_tx_get_state(lp->id,
912 &lp->tx_head,
913 &lp->tx_tail,
914 &lp->chan_state);
915
916 ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
917 orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
918
919 if (lp->cfg.mode == LDC_MODE_RAW &&
920 lp->chan_state == LDC_CHANNEL_UP) {
921 lp->hs_state = LDC_HS_COMPLETE;
922 ldc_set_state(lp, LDC_STATE_CONNECTED);
923
924 event_mask |= LDC_EVENT_UP;
925 }
926
927 spin_unlock_irqrestore(&lp->lock, flags);
928
929 send_events(lp, event_mask);
930
931 return IRQ_HANDLED;
932}
933
934/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
935 * XXX that addition and removal from the ldc_channel_list has
936 * XXX atomicity, otherwise the __ldc_channel_exists() check is
937 * XXX totally pointless as another thread can slip into ldc_alloc()
938 * XXX and add a channel with the same ID. There also needs to be
939 * XXX a spinlock for ldc_channel_list.
940 */
941static HLIST_HEAD(ldc_channel_list);
942
943static int __ldc_channel_exists(unsigned long id)
944{
945 struct ldc_channel *lp;
946 struct hlist_node *n;
947
948 hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
949 if (lp->id == id)
950 return 1;
951 }
952 return 0;
953}
954
955static int alloc_queue(const char *name, unsigned long num_entries,
956 struct ldc_packet **base, unsigned long *ra)
957{
958 unsigned long size, order;
959 void *q;
960
961 size = num_entries * LDC_PACKET_SIZE;
962 order = get_order(size);
963
964 q = (void *) __get_free_pages(GFP_KERNEL, order);
965 if (!q) {
966 printk(KERN_ERR PFX "Alloc of %s queue failed with "
967 "size=%lu order=%lu\n", name, size, order);
968 return -ENOMEM;
969 }
970
971 memset(q, 0, PAGE_SIZE << order);
972
973 *base = q;
974 *ra = __pa(q);
975
976 return 0;
977}
978
979static void free_queue(unsigned long num_entries, struct ldc_packet *q)
980{
981 unsigned long size, order;
982
983 if (!q)
984 return;
985
986 size = num_entries * LDC_PACKET_SIZE;
987 order = get_order(size);
988
989 free_pages((unsigned long)q, order);
990}
991
992/* XXX Make this configurable... XXX */
993#define LDC_IOTABLE_SIZE (8 * 1024)
994
995static int ldc_iommu_init(struct ldc_channel *lp)
996{
997 unsigned long sz, num_tsb_entries, tsbsize, order;
998 struct ldc_iommu *iommu = &lp->iommu;
999 struct ldc_mtable_entry *table;
1000 unsigned long hv_err;
1001 int err;
1002
1003 num_tsb_entries = LDC_IOTABLE_SIZE;
1004 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1005
1006 spin_lock_init(&iommu->lock);
1007
1008 sz = num_tsb_entries / 8;
1009 sz = (sz + 7UL) & ~7UL;
1010 iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1011 if (!iommu->arena.map) {
1012 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1013 return -ENOMEM;
1014 }
1015
1016 iommu->arena.limit = num_tsb_entries;
1017
1018 order = get_order(tsbsize);
1019
1020 table = (struct ldc_mtable_entry *)
1021 __get_free_pages(GFP_KERNEL, order);
1022 err = -ENOMEM;
1023 if (!table) {
1024 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1025 "size=%lu order=%lu\n", tsbsize, order);
1026 goto out_free_map;
1027 }
1028
1029 memset(table, 0, PAGE_SIZE << order);
1030
1031 iommu->page_table = table;
1032
1033 hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1034 num_tsb_entries);
1035 err = -EINVAL;
1036 if (hv_err)
1037 goto out_free_table;
1038
1039 return 0;
1040
1041out_free_table:
1042 free_pages((unsigned long) table, order);
1043 iommu->page_table = NULL;
1044
1045out_free_map:
1046 kfree(iommu->arena.map);
1047 iommu->arena.map = NULL;
1048
1049 return err;
1050}
1051
1052static void ldc_iommu_release(struct ldc_channel *lp)
1053{
1054 struct ldc_iommu *iommu = &lp->iommu;
1055 unsigned long num_tsb_entries, tsbsize, order;
1056
1057 (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1058
1059 num_tsb_entries = iommu->arena.limit;
1060 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1061 order = get_order(tsbsize);
1062
1063 free_pages((unsigned long) iommu->page_table, order);
1064 iommu->page_table = NULL;
1065
1066 kfree(iommu->arena.map);
1067 iommu->arena.map = NULL;
1068}
1069
1070struct ldc_channel *ldc_alloc(unsigned long id,
1071 const struct ldc_channel_config *cfgp,
1072 void *event_arg)
1073{
1074 struct ldc_channel *lp;
1075 const struct ldc_mode_ops *mops;
1076 unsigned long dummy1, dummy2, hv_err;
1077 u8 mss, *mssbuf;
1078 int err;
1079
1080 err = -ENODEV;
1081 if (!ldom_domaining_enabled)
1082 goto out_err;
1083
1084 err = -EINVAL;
1085 if (!cfgp)
1086 goto out_err;
1087
1088 switch (cfgp->mode) {
1089 case LDC_MODE_RAW:
1090 mops = &raw_ops;
1091 mss = LDC_PACKET_SIZE;
1092 break;
1093
1094 case LDC_MODE_UNRELIABLE:
1095 mops = &nonraw_ops;
1096 mss = LDC_PACKET_SIZE - 8;
1097 break;
1098
1099 case LDC_MODE_STREAM:
1100 mops = &stream_ops;
1101 mss = LDC_PACKET_SIZE - 8 - 8;
1102 break;
1103
1104 default:
1105 goto out_err;
1106 }
1107
1108 if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1109 goto out_err;
1110
1111 hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1112 err = -ENODEV;
1113 if (hv_err == HV_ECHANNEL)
1114 goto out_err;
1115
1116 err = -EEXIST;
1117 if (__ldc_channel_exists(id))
1118 goto out_err;
1119
1120 mssbuf = NULL;
1121
1122 lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1123 err = -ENOMEM;
1124 if (!lp)
1125 goto out_err;
1126
1127 spin_lock_init(&lp->lock);
1128
1129 lp->id = id;
1130
1131 err = ldc_iommu_init(lp);
1132 if (err)
1133 goto out_free_ldc;
1134
1135 lp->mops = mops;
1136 lp->mss = mss;
1137
1138 lp->cfg = *cfgp;
1139 if (!lp->cfg.mtu)
1140 lp->cfg.mtu = LDC_DEFAULT_MTU;
1141
1142 if (lp->cfg.mode == LDC_MODE_STREAM) {
1143 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1144 if (!mssbuf) {
1145 err = -ENOMEM;
1146 goto out_free_iommu;
1147 }
1148 lp->mssbuf = mssbuf;
1149 }
1150
1151 lp->event_arg = event_arg;
1152
1153 /* XXX allow setting via ldc_channel_config to override defaults
1154 * XXX or use some formula based upon mtu
1155 */
1156 lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1157 lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1158
1159 err = alloc_queue("TX", lp->tx_num_entries,
1160 &lp->tx_base, &lp->tx_ra);
1161 if (err)
1162 goto out_free_mssbuf;
1163
1164 err = alloc_queue("RX", lp->rx_num_entries,
1165 &lp->rx_base, &lp->rx_ra);
1166 if (err)
1167 goto out_free_txq;
1168
1169 lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1170
1171 lp->hs_state = LDC_HS_CLOSED;
1172 ldc_set_state(lp, LDC_STATE_INIT);
1173
1174 INIT_HLIST_NODE(&lp->list);
1175 hlist_add_head(&lp->list, &ldc_channel_list);
1176
1177 INIT_HLIST_HEAD(&lp->mh_list);
1178
1179 return lp;
1180
1181out_free_txq:
1182 free_queue(lp->tx_num_entries, lp->tx_base);
1183
1184out_free_mssbuf:
1185 if (mssbuf)
1186 kfree(mssbuf);
1187
1188out_free_iommu:
1189 ldc_iommu_release(lp);
1190
1191out_free_ldc:
1192 kfree(lp);
1193
1194out_err:
1195 return ERR_PTR(err);
1196}
1197EXPORT_SYMBOL(ldc_alloc);
1198
1199void ldc_free(struct ldc_channel *lp)
1200{
1201 if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1202 free_irq(lp->cfg.rx_irq, lp);
1203 free_irq(lp->cfg.tx_irq, lp);
1204 }
1205
1206 if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1207 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1208 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1209 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1210 }
1211 if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1212 free_queue(lp->tx_num_entries, lp->tx_base);
1213 free_queue(lp->rx_num_entries, lp->rx_base);
1214 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1215 }
1216
1217 hlist_del(&lp->list);
1218
1219 if (lp->mssbuf)
1220 kfree(lp->mssbuf);
1221
1222 ldc_iommu_release(lp);
1223
1224 kfree(lp);
1225}
1226EXPORT_SYMBOL(ldc_free);
1227
1228/* Bind the channel. This registers the LDC queues with
1229 * the hypervisor and puts the channel into a pseudo-listening
1230 * state. This does not initiate a handshake, ldc_connect() does
1231 * that.
1232 */
1233int ldc_bind(struct ldc_channel *lp, const char *name)
1234{
1235 unsigned long hv_err, flags;
1236 int err = -EINVAL;
1237
1238 spin_lock_irqsave(&lp->lock, flags);
1239
1240 if (!name)
1241 goto out_err;
1242
1243 if (lp->state != LDC_STATE_INIT)
1244 goto out_err;
1245
1246 snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1247 snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1248
1249 err = request_irq(lp->cfg.rx_irq, ldc_rx,
1250 IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1251 lp->rx_irq_name, lp);
1252 if (err)
1253 goto out_err;
1254
1255 err = request_irq(lp->cfg.tx_irq, ldc_tx,
1256 IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1257 lp->tx_irq_name, lp);
1258 if (err)
1259 goto out_free_rx_irq;
1260
1261
1262 lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1263
1264 err = -ENODEV;
1265 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1266 if (hv_err)
1267 goto out_free_tx_irq;
1268
1269 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1270 if (hv_err)
1271 goto out_free_tx_irq;
1272
1273 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1274 if (hv_err)
1275 goto out_unmap_tx;
1276
1277 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1278 if (hv_err)
1279 goto out_unmap_tx;
1280
1281 lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1282
1283 hv_err = sun4v_ldc_tx_get_state(lp->id,
1284 &lp->tx_head,
1285 &lp->tx_tail,
1286 &lp->chan_state);
1287 err = -EBUSY;
1288 if (hv_err)
1289 goto out_unmap_rx;
1290
1291 lp->tx_acked = lp->tx_head;
1292
1293 lp->hs_state = LDC_HS_OPEN;
1294 ldc_set_state(lp, LDC_STATE_BOUND);
1295
1296 spin_unlock_irqrestore(&lp->lock, flags);
1297
1298 return 0;
1299
1300out_unmap_rx:
1301 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1302 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1303
1304out_unmap_tx:
1305 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1306
1307out_free_tx_irq:
1308 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1309 free_irq(lp->cfg.tx_irq, lp);
1310
1311out_free_rx_irq:
1312 free_irq(lp->cfg.rx_irq, lp);
1313
1314out_err:
1315 spin_unlock_irqrestore(&lp->lock, flags);
1316
1317 return err;
1318}
1319EXPORT_SYMBOL(ldc_bind);
1320
1321int ldc_connect(struct ldc_channel *lp)
1322{
1323 unsigned long flags;
1324 int err;
1325
1326 if (lp->cfg.mode == LDC_MODE_RAW)
1327 return -EINVAL;
1328
1329 spin_lock_irqsave(&lp->lock, flags);
1330
1331 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1332 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1333 lp->hs_state != LDC_HS_OPEN)
1334 err = -EINVAL;
1335 else
1336 err = start_handshake(lp);
1337
1338 spin_unlock_irqrestore(&lp->lock, flags);
1339
1340 return err;
1341}
1342EXPORT_SYMBOL(ldc_connect);
1343
1344int ldc_disconnect(struct ldc_channel *lp)
1345{
1346 unsigned long hv_err, flags;
1347 int err;
1348
1349 if (lp->cfg.mode == LDC_MODE_RAW)
1350 return -EINVAL;
1351
1352 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1353 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1354 return -EINVAL;
1355
1356 spin_lock_irqsave(&lp->lock, flags);
1357
1358 err = -ENODEV;
1359 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1360 if (hv_err)
1361 goto out_err;
1362
1363 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1364 if (hv_err)
1365 goto out_err;
1366
1367 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1368 if (hv_err)
1369 goto out_err;
1370
1371 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1372 if (hv_err)
1373 goto out_err;
1374
1375 ldc_set_state(lp, LDC_STATE_BOUND);
1376 lp->hs_state = LDC_HS_OPEN;
1377 lp->flags |= LDC_FLAG_RESET;
1378
1379 spin_unlock_irqrestore(&lp->lock, flags);
1380
1381 return 0;
1382
1383out_err:
1384 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1385 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1386 free_irq(lp->cfg.tx_irq, lp);
1387 free_irq(lp->cfg.rx_irq, lp);
1388 lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1389 LDC_FLAG_REGISTERED_QUEUES);
1390 ldc_set_state(lp, LDC_STATE_INIT);
1391
1392 spin_unlock_irqrestore(&lp->lock, flags);
1393
1394 return err;
1395}
1396EXPORT_SYMBOL(ldc_disconnect);
1397
1398int ldc_state(struct ldc_channel *lp)
1399{
1400 return lp->state;
1401}
1402EXPORT_SYMBOL(ldc_state);
1403
1404static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1405{
1406 struct ldc_packet *p;
1407 unsigned long new_tail;
1408 int err;
1409
1410 if (size > LDC_PACKET_SIZE)
1411 return -EMSGSIZE;
1412
1413 p = data_get_tx_packet(lp, &new_tail);
1414 if (!p)
1415 return -EAGAIN;
1416
1417 memcpy(p, buf, size);
1418
1419 err = send_tx_packet(lp, p, new_tail);
1420 if (!err)
1421 err = size;
1422
1423 return err;
1424}
1425
1426static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1427{
1428 struct ldc_packet *p;
1429 unsigned long hv_err, new;
1430 int err;
1431
1432 if (size < LDC_PACKET_SIZE)
1433 return -EINVAL;
1434
1435 hv_err = sun4v_ldc_rx_get_state(lp->id,
1436 &lp->rx_head,
1437 &lp->rx_tail,
1438 &lp->chan_state);
1439 if (hv_err)
1440 return ldc_abort(lp);
1441
1442 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1443 lp->chan_state == LDC_CHANNEL_RESETTING)
1444 return -ECONNRESET;
1445
1446 if (lp->rx_head == lp->rx_tail)
1447 return 0;
1448
1449 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1450 memcpy(buf, p, LDC_PACKET_SIZE);
1451
1452 new = rx_advance(lp, lp->rx_head);
1453 lp->rx_head = new;
1454
1455 err = __set_rx_head(lp, new);
1456 if (err < 0)
1457 err = -ECONNRESET;
1458 else
1459 err = LDC_PACKET_SIZE;
1460
1461 return err;
1462}
1463
1464static const struct ldc_mode_ops raw_ops = {
1465 .write = write_raw,
1466 .read = read_raw,
1467};
1468
1469static int write_nonraw(struct ldc_channel *lp, const void *buf,
1470 unsigned int size)
1471{
1472 unsigned long hv_err, tail;
1473 unsigned int copied;
1474 u32 seq;
1475 int err;
1476
1477 hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1478 &lp->chan_state);
1479 if (unlikely(hv_err))
1480 return -EBUSY;
1481
1482 if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1483 return ldc_abort(lp);
1484
1485 if (!tx_has_space_for(lp, size))
1486 return -EAGAIN;
1487
1488 seq = lp->snd_nxt;
1489 copied = 0;
1490 tail = lp->tx_tail;
1491 while (copied < size) {
1492 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1493 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1494 p->u.u_data :
1495 p->u.r.r_data);
1496 int data_len;
1497
1498 p->type = LDC_DATA;
1499 p->stype = LDC_INFO;
1500 p->ctrl = 0;
1501
1502 data_len = size - copied;
1503 if (data_len > lp->mss)
1504 data_len = lp->mss;
1505
1506 BUG_ON(data_len > LDC_LEN);
1507
1508 p->env = (data_len |
1509 (copied == 0 ? LDC_START : 0) |
1510 (data_len == size - copied ? LDC_STOP : 0));
1511
1512 p->seqid = ++seq;
1513
1514 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1515 p->type,
1516 p->stype,
1517 p->ctrl,
1518 p->env,
1519 p->seqid);
1520
1521 memcpy(data, buf, data_len);
1522 buf += data_len;
1523 copied += data_len;
1524
1525 tail = tx_advance(lp, tail);
1526 }
1527
1528 err = set_tx_tail(lp, tail);
1529 if (!err) {
1530 lp->snd_nxt = seq;
1531 err = size;
1532 }
1533
1534 return err;
1535}
1536
1537static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1538 struct ldc_packet *first_frag)
1539{
1540 int err;
1541
1542 if (first_frag)
1543 lp->rcv_nxt = first_frag->seqid - 1;
1544
1545 err = send_data_nack(lp, p);
1546 if (err)
1547 return err;
1548
1549 err = __set_rx_head(lp, lp->rx_tail);
1550 if (err < 0)
1551 return ldc_abort(lp);
1552
1553 return 0;
1554}
1555
1556static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1557{
1558 if (p->stype & LDC_ACK) {
1559 int err = process_data_ack(lp, p);
1560 if (err)
1561 return err;
1562 }
1563 if (p->stype & LDC_NACK)
1564 return ldc_abort(lp);
1565
1566 return 0;
1567}
1568
1569static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1570{
1571 unsigned long dummy;
1572 int limit = 1000;
1573
1574 ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1575 cur_head, lp->rx_head, lp->rx_tail);
1576 while (limit-- > 0) {
1577 unsigned long hv_err;
1578
1579 hv_err = sun4v_ldc_rx_get_state(lp->id,
1580 &dummy,
1581 &lp->rx_tail,
1582 &lp->chan_state);
1583 if (hv_err)
1584 return ldc_abort(lp);
1585
1586 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1587 lp->chan_state == LDC_CHANNEL_RESETTING)
1588 return -ECONNRESET;
1589
1590 if (cur_head != lp->rx_tail) {
1591 ldcdbg(DATA, "DATA WAIT DONE "
1592 "head[%lx] tail[%lx] chan_state[%lx]\n",
1593 dummy, lp->rx_tail, lp->chan_state);
1594 return 0;
1595 }
1596
1597 udelay(1);
1598 }
1599 return -EAGAIN;
1600}
1601
1602static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1603{
1604 int err = __set_rx_head(lp, head);
1605
1606 if (err < 0)
1607 return ldc_abort(lp);
1608
1609 lp->rx_head = head;
1610 return 0;
1611}
1612
1613static void send_data_ack(struct ldc_channel *lp)
1614{
1615 unsigned long new_tail;
1616 struct ldc_packet *p;
1617
1618 p = data_get_tx_packet(lp, &new_tail);
1619 if (likely(p)) {
1620 int err;
1621
1622 memset(p, 0, sizeof(*p));
1623 p->type = LDC_DATA;
1624 p->stype = LDC_ACK;
1625 p->ctrl = 0;
1626 p->seqid = lp->snd_nxt + 1;
1627 p->u.r.ackid = lp->rcv_nxt;
1628
1629 err = send_tx_packet(lp, p, new_tail);
1630 if (!err)
1631 lp->snd_nxt++;
1632 }
1633}
1634
1635static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1636{
1637 struct ldc_packet *first_frag;
1638 unsigned long hv_err, new;
1639 int err, copied;
1640
1641 hv_err = sun4v_ldc_rx_get_state(lp->id,
1642 &lp->rx_head,
1643 &lp->rx_tail,
1644 &lp->chan_state);
1645 if (hv_err)
1646 return ldc_abort(lp);
1647
1648 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1649 lp->chan_state == LDC_CHANNEL_RESETTING)
1650 return -ECONNRESET;
1651
1652 if (lp->rx_head == lp->rx_tail)
1653 return 0;
1654
1655 first_frag = NULL;
1656 copied = err = 0;
1657 new = lp->rx_head;
1658 while (1) {
1659 struct ldc_packet *p;
1660 int pkt_len;
1661
1662 BUG_ON(new == lp->rx_tail);
1663 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1664
1665 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1666 "rcv_nxt[%08x]\n",
1667 p->type,
1668 p->stype,
1669 p->ctrl,
1670 p->env,
1671 p->seqid,
1672 p->u.r.ackid,
1673 lp->rcv_nxt);
1674
1675 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1676 err = rx_bad_seq(lp, p, first_frag);
1677 copied = 0;
1678 break;
1679 }
1680
1681 if (p->type & LDC_CTRL) {
1682 err = process_control_frame(lp, p);
1683 if (err < 0)
1684 break;
1685 err = 0;
1686 }
1687
1688 lp->rcv_nxt = p->seqid;
1689
1690 if (!(p->type & LDC_DATA)) {
1691 new = rx_advance(lp, new);
1692 goto no_data;
1693 }
1694 if (p->stype & (LDC_ACK | LDC_NACK)) {
1695 err = data_ack_nack(lp, p);
1696 if (err)
1697 break;
1698 }
1699 if (!(p->stype & LDC_INFO)) {
1700 new = rx_advance(lp, new);
1701 err = rx_set_head(lp, new);
1702 if (err)
1703 break;
1704 goto no_data;
1705 }
1706
1707 pkt_len = p->env & LDC_LEN;
1708
1709 /* Every initial packet starts with the START bit set.
1710 *
1711 * Singleton packets will have both START+STOP set.
1712 *
1713 * Fragments will have START set in the first frame, STOP
1714 * set in the last frame, and neither bit set in middle
1715 * frames of the packet.
1716 *
1717 * Therefore if we are at the beginning of a packet and
1718 * we don't see START, or we are in the middle of a fragmented
1719 * packet and do see START, we are unsynchronized and should
1720 * flush the RX queue.
1721 */
1722 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1723 (first_frag != NULL && (p->env & LDC_START))) {
1724 if (!first_frag)
1725 new = rx_advance(lp, new);
1726
1727 err = rx_set_head(lp, new);
1728 if (err)
1729 break;
1730
1731 if (!first_frag)
1732 goto no_data;
1733 }
1734 if (!first_frag)
1735 first_frag = p;
1736
1737 if (pkt_len > size - copied) {
1738 /* User didn't give us a big enough buffer,
1739 * what to do? This is a pretty serious error.
1740 *
1741 * Since we haven't updated the RX ring head to
1742 * consume any of the packets, signal the error
1743 * to the user and just leave the RX ring alone.
1744 *
1745 * This seems the best behavior because this allows
1746 * a user of the LDC layer to start with a small
1747 * RX buffer for ldc_read() calls and use -EMSGSIZE
1748 * as a cue to enlarge it's read buffer.
1749 */
1750 err = -EMSGSIZE;
1751 break;
1752 }
1753
1754 /* Ok, we are gonna eat this one. */
1755 new = rx_advance(lp, new);
1756
1757 memcpy(buf,
1758 (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1759 p->u.u_data : p->u.r.r_data), pkt_len);
1760 buf += pkt_len;
1761 copied += pkt_len;
1762
1763 if (p->env & LDC_STOP)
1764 break;
1765
1766no_data:
1767 if (new == lp->rx_tail) {
1768 err = rx_data_wait(lp, new);
1769 if (err)
1770 break;
1771 }
1772 }
1773
1774 if (!err)
1775 err = rx_set_head(lp, new);
1776
1777 if (err && first_frag)
1778 lp->rcv_nxt = first_frag->seqid - 1;
1779
1780 if (!err) {
1781 err = copied;
1782 if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1783 send_data_ack(lp);
1784 }
1785
1786 return err;
1787}
1788
1789static const struct ldc_mode_ops nonraw_ops = {
1790 .write = write_nonraw,
1791 .read = read_nonraw,
1792};
1793
1794static int write_stream(struct ldc_channel *lp, const void *buf,
1795 unsigned int size)
1796{
1797 if (size > lp->cfg.mtu)
1798 size = lp->cfg.mtu;
1799 return write_nonraw(lp, buf, size);
1800}
1801
1802static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1803{
1804 if (!lp->mssbuf_len) {
1805 int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1806 if (err < 0)
1807 return err;
1808
1809 lp->mssbuf_len = err;
1810 lp->mssbuf_off = 0;
1811 }
1812
1813 if (size > lp->mssbuf_len)
1814 size = lp->mssbuf_len;
1815 memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1816
1817 lp->mssbuf_off += size;
1818 lp->mssbuf_len -= size;
1819
1820 return size;
1821}
1822
1823static const struct ldc_mode_ops stream_ops = {
1824 .write = write_stream,
1825 .read = read_stream,
1826};
1827
1828int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1829{
1830 unsigned long flags;
1831 int err;
1832
1833 if (!buf)
1834 return -EINVAL;
1835
1836 if (!size)
1837 return 0;
1838
1839 spin_lock_irqsave(&lp->lock, flags);
1840
1841 if (lp->hs_state != LDC_HS_COMPLETE)
1842 err = -ENOTCONN;
1843 else
1844 err = lp->mops->write(lp, buf, size);
1845
1846 spin_unlock_irqrestore(&lp->lock, flags);
1847
1848 return err;
1849}
1850EXPORT_SYMBOL(ldc_write);
1851
1852int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1853{
1854 unsigned long flags;
1855 int err;
1856
1857 if (!buf)
1858 return -EINVAL;
1859
1860 if (!size)
1861 return 0;
1862
1863 spin_lock_irqsave(&lp->lock, flags);
1864
1865 if (lp->hs_state != LDC_HS_COMPLETE)
1866 err = -ENOTCONN;
1867 else
1868 err = lp->mops->read(lp, buf, size);
1869
1870 spin_unlock_irqrestore(&lp->lock, flags);
1871
1872 return err;
1873}
1874EXPORT_SYMBOL(ldc_read);
1875
1876static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1877{
1878 struct iommu_arena *arena = &iommu->arena;
1879 unsigned long n, i, start, end, limit;
1880 int pass;
1881
1882 limit = arena->limit;
1883 start = arena->hint;
1884 pass = 0;
1885
1886again:
1887 n = find_next_zero_bit(arena->map, limit, start);
1888 end = n + npages;
1889 if (unlikely(end >= limit)) {
1890 if (likely(pass < 1)) {
1891 limit = start;
1892 start = 0;
1893 pass++;
1894 goto again;
1895 } else {
1896 /* Scanned the whole thing, give up. */
1897 return -1;
1898 }
1899 }
1900
1901 for (i = n; i < end; i++) {
1902 if (test_bit(i, arena->map)) {
1903 start = i + 1;
1904 goto again;
1905 }
1906 }
1907
1908 for (i = n; i < end; i++)
1909 __set_bit(i, arena->map);
1910
1911 arena->hint = end;
1912
1913 return n;
1914}
1915
1916#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
1917#define COOKIE_PGSZ_CODE_SHIFT 60ULL
1918
1919static u64 pagesize_code(void)
1920{
1921 switch (PAGE_SIZE) {
1922 default:
1923 case (8ULL * 1024ULL):
1924 return 0;
1925 case (64ULL * 1024ULL):
1926 return 1;
1927 case (512ULL * 1024ULL):
1928 return 2;
1929 case (4ULL * 1024ULL * 1024ULL):
1930 return 3;
1931 case (32ULL * 1024ULL * 1024ULL):
1932 return 4;
1933 case (256ULL * 1024ULL * 1024ULL):
1934 return 5;
1935 }
1936}
1937
1938static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1939{
1940 return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1941 (index << PAGE_SHIFT) |
1942 page_offset);
1943}
1944
1945static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1946{
1947 u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1948
1949 cookie &= ~COOKIE_PGSZ_CODE;
1950
1951 *shift = szcode * 3;
1952
1953 return (cookie >> (13ULL + (szcode * 3ULL)));
1954}
1955
1956static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1957 unsigned long npages)
1958{
1959 long entry;
1960
1961 entry = arena_alloc(iommu, npages);
1962 if (unlikely(entry < 0))
1963 return NULL;
1964
1965 return iommu->page_table + entry;
1966}
1967
1968static u64 perm_to_mte(unsigned int map_perm)
1969{
1970 u64 mte_base;
1971
1972 mte_base = pagesize_code();
1973
1974 if (map_perm & LDC_MAP_SHADOW) {
1975 if (map_perm & LDC_MAP_R)
1976 mte_base |= LDC_MTE_COPY_R;
1977 if (map_perm & LDC_MAP_W)
1978 mte_base |= LDC_MTE_COPY_W;
1979 }
1980 if (map_perm & LDC_MAP_DIRECT) {
1981 if (map_perm & LDC_MAP_R)
1982 mte_base |= LDC_MTE_READ;
1983 if (map_perm & LDC_MAP_W)
1984 mte_base |= LDC_MTE_WRITE;
1985 if (map_perm & LDC_MAP_X)
1986 mte_base |= LDC_MTE_EXEC;
1987 }
1988 if (map_perm & LDC_MAP_IO) {
1989 if (map_perm & LDC_MAP_R)
1990 mte_base |= LDC_MTE_IOMMU_R;
1991 if (map_perm & LDC_MAP_W)
1992 mte_base |= LDC_MTE_IOMMU_W;
1993 }
1994
1995 return mte_base;
1996}
1997
1998static int pages_in_region(unsigned long base, long len)
1999{
2000 int count = 0;
2001
2002 do {
2003 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
2004
2005 len -= (new - base);
2006 base = new;
2007 count++;
2008 } while (len > 0);
2009
2010 return count;
2011}
2012
2013struct cookie_state {
2014 struct ldc_mtable_entry *page_table;
2015 struct ldc_trans_cookie *cookies;
2016 u64 mte_base;
2017 u64 prev_cookie;
2018 u32 pte_idx;
2019 u32 nc;
2020};
2021
2022static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2023 unsigned long off, unsigned long len)
2024{
2025 do {
2026 unsigned long tlen, new = pa + PAGE_SIZE;
2027 u64 this_cookie;
2028
2029 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2030
2031 tlen = PAGE_SIZE;
2032 if (off)
2033 tlen = PAGE_SIZE - off;
2034 if (tlen > len)
2035 tlen = len;
2036
2037 this_cookie = make_cookie(sp->pte_idx,
2038 pagesize_code(), off);
2039
2040 off = 0;
2041
2042 if (this_cookie == sp->prev_cookie) {
2043 sp->cookies[sp->nc - 1].cookie_size += tlen;
2044 } else {
2045 sp->cookies[sp->nc].cookie_addr = this_cookie;
2046 sp->cookies[sp->nc].cookie_size = tlen;
2047 sp->nc++;
2048 }
2049 sp->prev_cookie = this_cookie + tlen;
2050
2051 sp->pte_idx++;
2052
2053 len -= tlen;
2054 pa = new;
2055 } while (len > 0);
2056}
2057
2058static int sg_count_one(struct scatterlist *sg)
2059{
2060 unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT;
2061 long len = sg->length;
2062
2063 if ((sg->offset | len) & (8UL - 1))
2064 return -EFAULT;
2065
2066 return pages_in_region(base + sg->offset, len);
2067}
2068
2069static int sg_count_pages(struct scatterlist *sg, int num_sg)
2070{
2071 int count;
2072 int i;
2073
2074 count = 0;
2075 for (i = 0; i < num_sg; i++) {
2076 int err = sg_count_one(sg + i);
2077 if (err < 0)
2078 return err;
2079 count += err;
2080 }
2081
2082 return count;
2083}
2084
2085int ldc_map_sg(struct ldc_channel *lp,
2086 struct scatterlist *sg, int num_sg,
2087 struct ldc_trans_cookie *cookies, int ncookies,
2088 unsigned int map_perm)
2089{
2090 unsigned long i, npages, flags;
2091 struct ldc_mtable_entry *base;
2092 struct cookie_state state;
2093 struct ldc_iommu *iommu;
2094 int err;
2095
2096 if (map_perm & ~LDC_MAP_ALL)
2097 return -EINVAL;
2098
2099 err = sg_count_pages(sg, num_sg);
2100 if (err < 0)
2101 return err;
2102
2103 npages = err;
2104 if (err > ncookies)
2105 return -EMSGSIZE;
2106
2107 iommu = &lp->iommu;
2108
2109 spin_lock_irqsave(&iommu->lock, flags);
2110 base = alloc_npages(iommu, npages);
2111 spin_unlock_irqrestore(&iommu->lock, flags);
2112
2113 if (!base)
2114 return -ENOMEM;
2115
2116 state.page_table = iommu->page_table;
2117 state.cookies = cookies;
2118 state.mte_base = perm_to_mte(map_perm);
2119 state.prev_cookie = ~(u64)0;
2120 state.pte_idx = (base - iommu->page_table);
2121 state.nc = 0;
2122
2123 for (i = 0; i < num_sg; i++)
2124 fill_cookies(&state, page_to_pfn(sg[i].page) << PAGE_SHIFT,
2125 sg[i].offset, sg[i].length);
2126
2127 return state.nc;
2128}
2129EXPORT_SYMBOL(ldc_map_sg);
2130
2131int ldc_map_single(struct ldc_channel *lp,
2132 void *buf, unsigned int len,
2133 struct ldc_trans_cookie *cookies, int ncookies,
2134 unsigned int map_perm)
2135{
2136 unsigned long npages, pa, flags;
2137 struct ldc_mtable_entry *base;
2138 struct cookie_state state;
2139 struct ldc_iommu *iommu;
2140
2141 if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2142 return -EINVAL;
2143
2144 pa = __pa(buf);
2145 if ((pa | len) & (8UL - 1))
2146 return -EFAULT;
2147
2148 npages = pages_in_region(pa, len);
2149
2150 iommu = &lp->iommu;
2151
2152 spin_lock_irqsave(&iommu->lock, flags);
2153 base = alloc_npages(iommu, npages);
2154 spin_unlock_irqrestore(&iommu->lock, flags);
2155
2156 if (!base)
2157 return -ENOMEM;
2158
2159 state.page_table = iommu->page_table;
2160 state.cookies = cookies;
2161 state.mte_base = perm_to_mte(map_perm);
2162 state.prev_cookie = ~(u64)0;
2163 state.pte_idx = (base - iommu->page_table);
2164 state.nc = 0;
2165 fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2166 BUG_ON(state.nc != 1);
2167
2168 return state.nc;
2169}
2170EXPORT_SYMBOL(ldc_map_single);
2171
2172static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2173 u64 cookie, u64 size)
2174{
2175 struct iommu_arena *arena = &iommu->arena;
2176 unsigned long i, shift, index, npages;
2177 struct ldc_mtable_entry *base;
2178
2179 npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2180 index = cookie_to_index(cookie, &shift);
2181 base = iommu->page_table + index;
2182
2183 BUG_ON(index > arena->limit ||
2184 (index + npages) > arena->limit);
2185
2186 for (i = 0; i < npages; i++) {
2187 if (base->cookie)
2188 sun4v_ldc_revoke(id, cookie + (i << shift),
2189 base->cookie);
2190 base->mte = 0;
2191 __clear_bit(index + i, arena->map);
2192 }
2193}
2194
2195void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2196 int ncookies)
2197{
2198 struct ldc_iommu *iommu = &lp->iommu;
2199 unsigned long flags;
2200 int i;
2201
2202 spin_lock_irqsave(&iommu->lock, flags);
2203 for (i = 0; i < ncookies; i++) {
2204 u64 addr = cookies[i].cookie_addr;
2205 u64 size = cookies[i].cookie_size;
2206
2207 free_npages(lp->id, iommu, addr, size);
2208 }
2209 spin_unlock_irqrestore(&iommu->lock, flags);
2210}
2211EXPORT_SYMBOL(ldc_unmap);
2212
2213int ldc_copy(struct ldc_channel *lp, int copy_dir,
2214 void *buf, unsigned int len, unsigned long offset,
2215 struct ldc_trans_cookie *cookies, int ncookies)
2216{
2217 unsigned int orig_len;
2218 unsigned long ra;
2219 int i;
2220
2221 if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2222 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2223 lp->id, copy_dir);
2224 return -EINVAL;
2225 }
2226
2227 ra = __pa(buf);
2228 if ((ra | len | offset) & (8UL - 1)) {
2229 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2230 "ra[%lx] len[%x] offset[%lx]\n",
2231 lp->id, ra, len, offset);
2232 return -EFAULT;
2233 }
2234
2235 if (lp->hs_state != LDC_HS_COMPLETE ||
2236 (lp->flags & LDC_FLAG_RESET)) {
2237 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2238 "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2239 return -ECONNRESET;
2240 }
2241
2242 orig_len = len;
2243 for (i = 0; i < ncookies; i++) {
2244 unsigned long cookie_raddr = cookies[i].cookie_addr;
2245 unsigned long this_len = cookies[i].cookie_size;
2246 unsigned long actual_len;
2247
2248 if (unlikely(offset)) {
2249 unsigned long this_off = offset;
2250
2251 if (this_off > this_len)
2252 this_off = this_len;
2253
2254 offset -= this_off;
2255 this_len -= this_off;
2256 if (!this_len)
2257 continue;
2258 cookie_raddr += this_off;
2259 }
2260
2261 if (this_len > len)
2262 this_len = len;
2263
2264 while (1) {
2265 unsigned long hv_err;
2266
2267 hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2268 cookie_raddr, ra,
2269 this_len, &actual_len);
2270 if (unlikely(hv_err)) {
2271 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2272 "HV error %lu\n",
2273 lp->id, hv_err);
2274 if (lp->hs_state != LDC_HS_COMPLETE ||
2275 (lp->flags & LDC_FLAG_RESET))
2276 return -ECONNRESET;
2277 else
2278 return -EFAULT;
2279 }
2280
2281 cookie_raddr += actual_len;
2282 ra += actual_len;
2283 len -= actual_len;
2284 if (actual_len == this_len)
2285 break;
2286
2287 this_len -= actual_len;
2288 }
2289
2290 if (!len)
2291 break;
2292 }
2293
2294 /* It is caller policy what to do about short copies.
2295 * For example, a networking driver can declare the
2296 * packet a runt and drop it.
2297 */
2298
2299 return orig_len - len;
2300}
2301EXPORT_SYMBOL(ldc_copy);
2302
2303void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2304 struct ldc_trans_cookie *cookies, int *ncookies,
2305 unsigned int map_perm)
2306{
2307 void *buf;
2308 int err;
2309
2310 if (len & (8UL - 1))
2311 return ERR_PTR(-EINVAL);
2312
2313 buf = kzalloc(len, GFP_KERNEL);
2314 if (!buf)
2315 return ERR_PTR(-ENOMEM);
2316
2317 err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2318 if (err < 0) {
2319 kfree(buf);
2320 return ERR_PTR(err);
2321 }
2322 *ncookies = err;
2323
2324 return buf;
2325}
2326EXPORT_SYMBOL(ldc_alloc_exp_dring);
2327
2328void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2329 struct ldc_trans_cookie *cookies, int ncookies)
2330{
2331 ldc_unmap(lp, cookies, ncookies);
2332 kfree(buf);
2333}
2334EXPORT_SYMBOL(ldc_free_exp_dring);
2335
2336static int __init ldc_init(void)
2337{
2338 unsigned long major, minor;
2339 struct mdesc_handle *hp;
2340 const u64 *v;
2341 u64 mp;
2342
2343 hp = mdesc_grab();
2344 if (!hp)
2345 return -ENODEV;
2346
2347 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2348 if (mp == MDESC_NODE_NULL)
2349 return -ENODEV;
2350
2351 v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2352 if (!v)
2353 return -ENODEV;
2354
2355 major = 1;
2356 minor = 0;
2357 if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2358 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2359 return -ENODEV;
2360 }
2361
2362 printk(KERN_INFO "%s", version);
2363
2364 if (!*v) {
2365 printk(KERN_INFO PFX "Domaining disabled.\n");
2366 return -ENODEV;
2367 }
2368 ldom_domaining_enabled = 1;
2369
2370 return 0;
2371}
2372
2373core_initcall(ldc_init);
diff --git a/arch/sparc64/kernel/mdesc.c b/arch/sparc64/kernel/mdesc.c
index f0e16045fb16..62a389793949 100644
--- a/arch/sparc64/kernel/mdesc.c
+++ b/arch/sparc64/kernel/mdesc.c
@@ -6,6 +6,9 @@
6#include <linux/types.h> 6#include <linux/types.h>
7#include <linux/bootmem.h> 7#include <linux/bootmem.h>
8#include <linux/log2.h> 8#include <linux/log2.h>
9#include <linux/list.h>
10#include <linux/slab.h>
11#include <linux/mm.h>
9 12
10#include <asm/hypervisor.h> 13#include <asm/hypervisor.h>
11#include <asm/mdesc.h> 14#include <asm/mdesc.h>
@@ -29,7 +32,7 @@ struct mdesc_hdr {
29 u32 node_sz; /* node block size */ 32 u32 node_sz; /* node block size */
30 u32 name_sz; /* name block size */ 33 u32 name_sz; /* name block size */
31 u32 data_sz; /* data block size */ 34 u32 data_sz; /* data block size */
32}; 35} __attribute__((aligned(16)));
33 36
34struct mdesc_elem { 37struct mdesc_elem {
35 u8 tag; 38 u8 tag;
@@ -53,306 +56,402 @@ struct mdesc_elem {
53 } d; 56 } d;
54}; 57};
55 58
56static struct mdesc_hdr *main_mdesc; 59struct mdesc_mem_ops {
57static struct mdesc_node *allnodes; 60 struct mdesc_handle *(*alloc)(unsigned int mdesc_size);
58 61 void (*free)(struct mdesc_handle *handle);
59static struct mdesc_node *allnodes_tail; 62};
60static unsigned int unique_id;
61 63
62static struct mdesc_node **mdesc_hash; 64struct mdesc_handle {
63static unsigned int mdesc_hash_size; 65 struct list_head list;
66 struct mdesc_mem_ops *mops;
67 void *self_base;
68 atomic_t refcnt;
69 unsigned int handle_size;
70 struct mdesc_hdr mdesc;
71};
64 72
65static inline unsigned int node_hashfn(u64 node) 73static void mdesc_handle_init(struct mdesc_handle *hp,
74 unsigned int handle_size,
75 void *base)
66{ 76{
67 return ((unsigned int) (node ^ (node >> 8) ^ (node >> 16))) 77 BUG_ON(((unsigned long)&hp->mdesc) & (16UL - 1));
68 & (mdesc_hash_size - 1); 78
79 memset(hp, 0, handle_size);
80 INIT_LIST_HEAD(&hp->list);
81 hp->self_base = base;
82 atomic_set(&hp->refcnt, 1);
83 hp->handle_size = handle_size;
69} 84}
70 85
71static inline void hash_node(struct mdesc_node *mp) 86static struct mdesc_handle *mdesc_bootmem_alloc(unsigned int mdesc_size)
72{ 87{
73 struct mdesc_node **head = &mdesc_hash[node_hashfn(mp->node)]; 88 struct mdesc_handle *hp;
89 unsigned int handle_size, alloc_size;
74 90
75 mp->hash_next = *head; 91 handle_size = (sizeof(struct mdesc_handle) -
76 *head = mp; 92 sizeof(struct mdesc_hdr) +
93 mdesc_size);
94 alloc_size = PAGE_ALIGN(handle_size);
77 95
78 if (allnodes_tail) { 96 hp = __alloc_bootmem(alloc_size, PAGE_SIZE, 0UL);
79 allnodes_tail->allnodes_next = mp; 97 if (hp)
80 allnodes_tail = mp; 98 mdesc_handle_init(hp, handle_size, hp);
81 } else { 99
82 allnodes = allnodes_tail = mp; 100 return hp;
83 }
84} 101}
85 102
86static struct mdesc_node *find_node(u64 node) 103static void mdesc_bootmem_free(struct mdesc_handle *hp)
87{ 104{
88 struct mdesc_node *mp = mdesc_hash[node_hashfn(node)]; 105 unsigned int alloc_size, handle_size = hp->handle_size;
106 unsigned long start, end;
107
108 BUG_ON(atomic_read(&hp->refcnt) != 0);
109 BUG_ON(!list_empty(&hp->list));
89 110
90 while (mp) { 111 alloc_size = PAGE_ALIGN(handle_size);
91 if (mp->node == node)
92 return mp;
93 112
94 mp = mp->hash_next; 113 start = (unsigned long) hp;
114 end = start + alloc_size;
115
116 while (start < end) {
117 struct page *p;
118
119 p = virt_to_page(start);
120 ClearPageReserved(p);
121 __free_page(p);
122 start += PAGE_SIZE;
95 } 123 }
96 return NULL;
97} 124}
98 125
99struct property *md_find_property(const struct mdesc_node *mp, 126static struct mdesc_mem_ops bootmem_mdesc_memops = {
100 const char *name, 127 .alloc = mdesc_bootmem_alloc,
101 int *lenp) 128 .free = mdesc_bootmem_free,
129};
130
131static struct mdesc_handle *mdesc_kmalloc(unsigned int mdesc_size)
102{ 132{
103 struct property *pp; 133 unsigned int handle_size;
134 void *base;
104 135
105 for (pp = mp->properties; pp != 0; pp = pp->next) { 136 handle_size = (sizeof(struct mdesc_handle) -
106 if (strcasecmp(pp->name, name) == 0) { 137 sizeof(struct mdesc_hdr) +
107 if (lenp) 138 mdesc_size);
108 *lenp = pp->length; 139
109 break; 140 base = kmalloc(handle_size + 15, GFP_KERNEL);
110 } 141 if (base) {
142 struct mdesc_handle *hp;
143 unsigned long addr;
144
145 addr = (unsigned long)base;
146 addr = (addr + 15UL) & ~15UL;
147 hp = (struct mdesc_handle *) addr;
148
149 mdesc_handle_init(hp, handle_size, base);
150 return hp;
111 } 151 }
112 return pp; 152
153 return NULL;
113} 154}
114EXPORT_SYMBOL(md_find_property);
115 155
116/* 156static void mdesc_kfree(struct mdesc_handle *hp)
117 * Find a property with a given name for a given node
118 * and return the value.
119 */
120const void *md_get_property(const struct mdesc_node *mp, const char *name,
121 int *lenp)
122{ 157{
123 struct property *pp = md_find_property(mp, name, lenp); 158 BUG_ON(atomic_read(&hp->refcnt) != 0);
124 return pp ? pp->value : NULL; 159 BUG_ON(!list_empty(&hp->list));
160
161 kfree(hp->self_base);
125} 162}
126EXPORT_SYMBOL(md_get_property);
127 163
128struct mdesc_node *md_find_node_by_name(struct mdesc_node *from, 164static struct mdesc_mem_ops kmalloc_mdesc_memops = {
129 const char *name) 165 .alloc = mdesc_kmalloc,
166 .free = mdesc_kfree,
167};
168
169static struct mdesc_handle *mdesc_alloc(unsigned int mdesc_size,
170 struct mdesc_mem_ops *mops)
130{ 171{
131 struct mdesc_node *mp; 172 struct mdesc_handle *hp = mops->alloc(mdesc_size);
132 173
133 mp = from ? from->allnodes_next : allnodes; 174 if (hp)
134 for (; mp != NULL; mp = mp->allnodes_next) { 175 hp->mops = mops;
135 if (strcmp(mp->name, name) == 0)
136 break;
137 }
138 return mp;
139}
140EXPORT_SYMBOL(md_find_node_by_name);
141 176
142static unsigned int mdesc_early_allocated; 177 return hp;
178}
143 179
144static void * __init mdesc_early_alloc(unsigned long size) 180static void mdesc_free(struct mdesc_handle *hp)
145{ 181{
146 void *ret; 182 hp->mops->free(hp);
183}
147 184
148 ret = __alloc_bootmem(size, SMP_CACHE_BYTES, 0UL); 185static struct mdesc_handle *cur_mdesc;
149 if (ret == NULL) { 186static LIST_HEAD(mdesc_zombie_list);
150 prom_printf("MDESC: alloc of %lu bytes failed.\n", size); 187static DEFINE_SPINLOCK(mdesc_lock);
151 prom_halt();
152 }
153 188
154 memset(ret, 0, size); 189struct mdesc_handle *mdesc_grab(void)
190{
191 struct mdesc_handle *hp;
192 unsigned long flags;
155 193
156 mdesc_early_allocated += size; 194 spin_lock_irqsave(&mdesc_lock, flags);
195 hp = cur_mdesc;
196 if (hp)
197 atomic_inc(&hp->refcnt);
198 spin_unlock_irqrestore(&mdesc_lock, flags);
157 199
158 return ret; 200 return hp;
159} 201}
202EXPORT_SYMBOL(mdesc_grab);
160 203
161static unsigned int __init count_arcs(struct mdesc_elem *ep) 204void mdesc_release(struct mdesc_handle *hp)
162{ 205{
163 unsigned int ret = 0; 206 unsigned long flags;
164 207
165 ep++; 208 spin_lock_irqsave(&mdesc_lock, flags);
166 while (ep->tag != MD_NODE_END) { 209 if (atomic_dec_and_test(&hp->refcnt)) {
167 if (ep->tag == MD_PROP_ARC) 210 list_del_init(&hp->list);
168 ret++; 211 hp->mops->free(hp);
169 ep++;
170 } 212 }
171 return ret; 213 spin_unlock_irqrestore(&mdesc_lock, flags);
172} 214}
215EXPORT_SYMBOL(mdesc_release);
173 216
174static void __init mdesc_node_alloc(u64 node, struct mdesc_elem *ep, const char *names) 217static void do_mdesc_update(struct work_struct *work)
175{ 218{
176 unsigned int num_arcs = count_arcs(ep); 219 unsigned long len, real_len, status;
177 struct mdesc_node *mp; 220 struct mdesc_handle *hp, *orig_hp;
221 unsigned long flags;
222
223 (void) sun4v_mach_desc(0UL, 0UL, &len);
224
225 hp = mdesc_alloc(len, &kmalloc_mdesc_memops);
226 if (!hp) {
227 printk(KERN_ERR "MD: mdesc alloc fails\n");
228 return;
229 }
230
231 status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
232 if (status != HV_EOK || real_len > len) {
233 printk(KERN_ERR "MD: mdesc reread fails with %lu\n",
234 status);
235 atomic_dec(&hp->refcnt);
236 mdesc_free(hp);
237 return;
238 }
178 239
179 mp = mdesc_early_alloc(sizeof(*mp) + 240 spin_lock_irqsave(&mdesc_lock, flags);
180 (num_arcs * sizeof(struct mdesc_arc))); 241 orig_hp = cur_mdesc;
181 mp->name = names + ep->name_offset; 242 cur_mdesc = hp;
182 mp->node = node;
183 mp->unique_id = unique_id++;
184 mp->num_arcs = num_arcs;
185 243
186 hash_node(mp); 244 if (atomic_dec_and_test(&orig_hp->refcnt))
245 mdesc_free(orig_hp);
246 else
247 list_add(&orig_hp->list, &mdesc_zombie_list);
248 spin_unlock_irqrestore(&mdesc_lock, flags);
187} 249}
188 250
189static inline struct mdesc_elem *node_block(struct mdesc_hdr *mdesc) 251static DECLARE_WORK(mdesc_update_work, do_mdesc_update);
252
253void mdesc_update(void)
254{
255 schedule_work(&mdesc_update_work);
256}
257
258static struct mdesc_elem *node_block(struct mdesc_hdr *mdesc)
190{ 259{
191 return (struct mdesc_elem *) (mdesc + 1); 260 return (struct mdesc_elem *) (mdesc + 1);
192} 261}
193 262
194static inline void *name_block(struct mdesc_hdr *mdesc) 263static void *name_block(struct mdesc_hdr *mdesc)
195{ 264{
196 return ((void *) node_block(mdesc)) + mdesc->node_sz; 265 return ((void *) node_block(mdesc)) + mdesc->node_sz;
197} 266}
198 267
199static inline void *data_block(struct mdesc_hdr *mdesc) 268static void *data_block(struct mdesc_hdr *mdesc)
200{ 269{
201 return ((void *) name_block(mdesc)) + mdesc->name_sz; 270 return ((void *) name_block(mdesc)) + mdesc->name_sz;
202} 271}
203 272
204/* In order to avoid recursion (the graph can be very deep) we use a 273u64 mdesc_node_by_name(struct mdesc_handle *hp,
205 * two pass algorithm. First we allocate all the nodes and hash them. 274 u64 from_node, const char *name)
206 * Then we iterate over each node, filling in the arcs and properties.
207 */
208static void __init build_all_nodes(struct mdesc_hdr *mdesc)
209{ 275{
210 struct mdesc_elem *start, *ep; 276 struct mdesc_elem *ep = node_block(&hp->mdesc);
211 struct mdesc_node *mp; 277 const char *names = name_block(&hp->mdesc);
212 const char *names; 278 u64 last_node = hp->mdesc.node_sz / 16;
213 void *data; 279 u64 ret;
214 u64 last_node; 280
215 281 if (from_node == MDESC_NODE_NULL)
216 start = ep = node_block(mdesc); 282 from_node = 0;
217 last_node = mdesc->node_sz / 16; 283
284 if (from_node >= last_node)
285 return MDESC_NODE_NULL;
286
287 ret = ep[from_node].d.val;
288 while (ret < last_node) {
289 if (ep[ret].tag != MD_NODE)
290 return MDESC_NODE_NULL;
291 if (!strcmp(names + ep[ret].name_offset, name))
292 break;
293 ret = ep[ret].d.val;
294 }
295 if (ret >= last_node)
296 ret = MDESC_NODE_NULL;
297 return ret;
298}
299EXPORT_SYMBOL(mdesc_node_by_name);
218 300
219 names = name_block(mdesc); 301const void *mdesc_get_property(struct mdesc_handle *hp, u64 node,
302 const char *name, int *lenp)
303{
304 const char *names = name_block(&hp->mdesc);
305 u64 last_node = hp->mdesc.node_sz / 16;
306 void *data = data_block(&hp->mdesc);
307 struct mdesc_elem *ep;
220 308
221 while (1) { 309 if (node == MDESC_NODE_NULL || node >= last_node)
222 u64 node = ep - start; 310 return NULL;
223 311
224 if (ep->tag == MD_LIST_END) 312 ep = node_block(&hp->mdesc) + node;
313 ep++;
314 for (; ep->tag != MD_NODE_END; ep++) {
315 void *val = NULL;
316 int len = 0;
317
318 switch (ep->tag) {
319 case MD_PROP_VAL:
320 val = &ep->d.val;
321 len = 8;
225 break; 322 break;
226 323
227 if (ep->tag != MD_NODE) { 324 case MD_PROP_STR:
228 prom_printf("MDESC: Inconsistent element list.\n"); 325 case MD_PROP_DATA:
229 prom_halt(); 326 val = data + ep->d.data.data_offset;
230 } 327 len = ep->d.data.data_len;
231 328 break;
232 mdesc_node_alloc(node, ep, names);
233 329
234 if (ep->d.val >= last_node) { 330 default:
235 printk("MDESC: Warning, early break out of node scan.\n");
236 printk("MDESC: Next node [%lu] last_node [%lu].\n",
237 node, last_node);
238 break; 331 break;
239 } 332 }
333 if (!val)
334 continue;
240 335
241 ep = start + ep->d.val; 336 if (!strcmp(names + ep->name_offset, name)) {
337 if (lenp)
338 *lenp = len;
339 return val;
340 }
242 } 341 }
243 342
244 data = data_block(mdesc); 343 return NULL;
245 for (mp = allnodes; mp; mp = mp->allnodes_next) { 344}
246 struct mdesc_elem *ep = start + mp->node; 345EXPORT_SYMBOL(mdesc_get_property);
247 struct property **link = &mp->properties;
248 unsigned int this_arc = 0;
249
250 ep++;
251 while (ep->tag != MD_NODE_END) {
252 switch (ep->tag) {
253 case MD_PROP_ARC: {
254 struct mdesc_node *target;
255
256 if (this_arc >= mp->num_arcs) {
257 prom_printf("MDESC: ARC overrun [%u:%u]\n",
258 this_arc, mp->num_arcs);
259 prom_halt();
260 }
261 target = find_node(ep->d.val);
262 if (!target) {
263 printk("MDESC: Warning, arc points to "
264 "missing node, ignoring.\n");
265 break;
266 }
267 mp->arcs[this_arc].name =
268 (names + ep->name_offset);
269 mp->arcs[this_arc].arc = target;
270 this_arc++;
271 break;
272 }
273 346
274 case MD_PROP_VAL: 347u64 mdesc_next_arc(struct mdesc_handle *hp, u64 from, const char *arc_type)
275 case MD_PROP_STR: 348{
276 case MD_PROP_DATA: { 349 struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
277 struct property *p = mdesc_early_alloc(sizeof(*p)); 350 const char *names = name_block(&hp->mdesc);
278 351 u64 last_node = hp->mdesc.node_sz / 16;
279 p->unique_id = unique_id++;
280 p->name = (char *) names + ep->name_offset;
281 if (ep->tag == MD_PROP_VAL) {
282 p->value = &ep->d.val;
283 p->length = 8;
284 } else {
285 p->value = data + ep->d.data.data_offset;
286 p->length = ep->d.data.data_len;
287 }
288 *link = p;
289 link = &p->next;
290 break;
291 }
292 352
293 case MD_NOOP: 353 if (from == MDESC_NODE_NULL || from >= last_node)
294 break; 354 return MDESC_NODE_NULL;
295 355
296 default: 356 ep = base + from;
297 printk("MDESC: Warning, ignoring unknown tag type %02x\n", 357
298 ep->tag); 358 ep++;
299 } 359 for (; ep->tag != MD_NODE_END; ep++) {
300 ep++; 360 if (ep->tag != MD_PROP_ARC)
301 } 361 continue;
362
363 if (strcmp(names + ep->name_offset, arc_type))
364 continue;
365
366 return ep - base;
302 } 367 }
368
369 return MDESC_NODE_NULL;
303} 370}
371EXPORT_SYMBOL(mdesc_next_arc);
304 372
305static unsigned int __init count_nodes(struct mdesc_hdr *mdesc) 373u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc)
306{ 374{
307 struct mdesc_elem *ep = node_block(mdesc); 375 struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
308 struct mdesc_elem *end; 376
309 unsigned int cnt = 0; 377 ep = base + arc;
310 378
311 end = ((void *)ep) + mdesc->node_sz; 379 return ep->d.val;
312 while (ep < end) { 380}
313 if (ep->tag == MD_NODE) 381EXPORT_SYMBOL(mdesc_arc_target);
314 cnt++; 382
315 ep++; 383const char *mdesc_node_name(struct mdesc_handle *hp, u64 node)
316 } 384{
317 return cnt; 385 struct mdesc_elem *ep, *base = node_block(&hp->mdesc);
386 const char *names = name_block(&hp->mdesc);
387 u64 last_node = hp->mdesc.node_sz / 16;
388
389 if (node == MDESC_NODE_NULL || node >= last_node)
390 return NULL;
391
392 ep = base + node;
393 if (ep->tag != MD_NODE)
394 return NULL;
395
396 return names + ep->name_offset;
318} 397}
398EXPORT_SYMBOL(mdesc_node_name);
319 399
320static void __init report_platform_properties(void) 400static void __init report_platform_properties(void)
321{ 401{
322 struct mdesc_node *pn = md_find_node_by_name(NULL, "platform"); 402 struct mdesc_handle *hp = mdesc_grab();
403 u64 pn = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
323 const char *s; 404 const char *s;
324 const u64 *v; 405 const u64 *v;
325 406
326 if (!pn) { 407 if (pn == MDESC_NODE_NULL) {
327 prom_printf("No platform node in machine-description.\n"); 408 prom_printf("No platform node in machine-description.\n");
328 prom_halt(); 409 prom_halt();
329 } 410 }
330 411
331 s = md_get_property(pn, "banner-name", NULL); 412 s = mdesc_get_property(hp, pn, "banner-name", NULL);
332 printk("PLATFORM: banner-name [%s]\n", s); 413 printk("PLATFORM: banner-name [%s]\n", s);
333 s = md_get_property(pn, "name", NULL); 414 s = mdesc_get_property(hp, pn, "name", NULL);
334 printk("PLATFORM: name [%s]\n", s); 415 printk("PLATFORM: name [%s]\n", s);
335 416
336 v = md_get_property(pn, "hostid", NULL); 417 v = mdesc_get_property(hp, pn, "hostid", NULL);
337 if (v) 418 if (v)
338 printk("PLATFORM: hostid [%08lx]\n", *v); 419 printk("PLATFORM: hostid [%08lx]\n", *v);
339 v = md_get_property(pn, "serial#", NULL); 420 v = mdesc_get_property(hp, pn, "serial#", NULL);
340 if (v) 421 if (v)
341 printk("PLATFORM: serial# [%08lx]\n", *v); 422 printk("PLATFORM: serial# [%08lx]\n", *v);
342 v = md_get_property(pn, "stick-frequency", NULL); 423 v = mdesc_get_property(hp, pn, "stick-frequency", NULL);
343 printk("PLATFORM: stick-frequency [%08lx]\n", *v); 424 printk("PLATFORM: stick-frequency [%08lx]\n", *v);
344 v = md_get_property(pn, "mac-address", NULL); 425 v = mdesc_get_property(hp, pn, "mac-address", NULL);
345 if (v) 426 if (v)
346 printk("PLATFORM: mac-address [%lx]\n", *v); 427 printk("PLATFORM: mac-address [%lx]\n", *v);
347 v = md_get_property(pn, "watchdog-resolution", NULL); 428 v = mdesc_get_property(hp, pn, "watchdog-resolution", NULL);
348 if (v) 429 if (v)
349 printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v); 430 printk("PLATFORM: watchdog-resolution [%lu ms]\n", *v);
350 v = md_get_property(pn, "watchdog-max-timeout", NULL); 431 v = mdesc_get_property(hp, pn, "watchdog-max-timeout", NULL);
351 if (v) 432 if (v)
352 printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v); 433 printk("PLATFORM: watchdog-max-timeout [%lu ms]\n", *v);
353 v = md_get_property(pn, "max-cpus", NULL); 434 v = mdesc_get_property(hp, pn, "max-cpus", NULL);
354 if (v) 435 if (v)
355 printk("PLATFORM: max-cpus [%lu]\n", *v); 436 printk("PLATFORM: max-cpus [%lu]\n", *v);
437
438#ifdef CONFIG_SMP
439 {
440 int max_cpu, i;
441
442 if (v) {
443 max_cpu = *v;
444 if (max_cpu > NR_CPUS)
445 max_cpu = NR_CPUS;
446 } else {
447 max_cpu = NR_CPUS;
448 }
449 for (i = 0; i < max_cpu; i++)
450 cpu_set(i, cpu_possible_map);
451 }
452#endif
453
454 mdesc_release(hp);
356} 455}
357 456
358static int inline find_in_proplist(const char *list, const char *match, int len) 457static int inline find_in_proplist(const char *list, const char *match, int len)
@@ -369,15 +468,17 @@ static int inline find_in_proplist(const char *list, const char *match, int len)
369 return 0; 468 return 0;
370} 469}
371 470
372static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp) 471static void __devinit fill_in_one_cache(cpuinfo_sparc *c,
472 struct mdesc_handle *hp,
473 u64 mp)
373{ 474{
374 const u64 *level = md_get_property(mp, "level", NULL); 475 const u64 *level = mdesc_get_property(hp, mp, "level", NULL);
375 const u64 *size = md_get_property(mp, "size", NULL); 476 const u64 *size = mdesc_get_property(hp, mp, "size", NULL);
376 const u64 *line_size = md_get_property(mp, "line-size", NULL); 477 const u64 *line_size = mdesc_get_property(hp, mp, "line-size", NULL);
377 const char *type; 478 const char *type;
378 int type_len; 479 int type_len;
379 480
380 type = md_get_property(mp, "type", &type_len); 481 type = mdesc_get_property(hp, mp, "type", &type_len);
381 482
382 switch (*level) { 483 switch (*level) {
383 case 1: 484 case 1:
@@ -400,48 +501,45 @@ static void __init fill_in_one_cache(cpuinfo_sparc *c, struct mdesc_node *mp)
400 } 501 }
401 502
402 if (*level == 1) { 503 if (*level == 1) {
403 unsigned int i; 504 u64 a;
404
405 for (i = 0; i < mp->num_arcs; i++) {
406 struct mdesc_node *t = mp->arcs[i].arc;
407 505
408 if (strcmp(mp->arcs[i].name, "fwd")) 506 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
409 continue; 507 u64 target = mdesc_arc_target(hp, a);
508 const char *name = mdesc_node_name(hp, target);
410 509
411 if (!strcmp(t->name, "cache")) 510 if (!strcmp(name, "cache"))
412 fill_in_one_cache(c, t); 511 fill_in_one_cache(c, hp, target);
413 } 512 }
414 } 513 }
415} 514}
416 515
417static void __init mark_core_ids(struct mdesc_node *mp, int core_id) 516static void __devinit mark_core_ids(struct mdesc_handle *hp, u64 mp,
517 int core_id)
418{ 518{
419 unsigned int i; 519 u64 a;
420 520
421 for (i = 0; i < mp->num_arcs; i++) { 521 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
422 struct mdesc_node *t = mp->arcs[i].arc; 522 u64 t = mdesc_arc_target(hp, a);
523 const char *name;
423 const u64 *id; 524 const u64 *id;
424 525
425 if (strcmp(mp->arcs[i].name, "back")) 526 name = mdesc_node_name(hp, t);
426 continue; 527 if (!strcmp(name, "cpu")) {
427 528 id = mdesc_get_property(hp, t, "id", NULL);
428 if (!strcmp(t->name, "cpu")) {
429 id = md_get_property(t, "id", NULL);
430 if (*id < NR_CPUS) 529 if (*id < NR_CPUS)
431 cpu_data(*id).core_id = core_id; 530 cpu_data(*id).core_id = core_id;
432 } else { 531 } else {
433 unsigned int j; 532 u64 j;
434 533
435 for (j = 0; j < t->num_arcs; j++) { 534 mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_BACK) {
436 struct mdesc_node *n = t->arcs[j].arc; 535 u64 n = mdesc_arc_target(hp, j);
536 const char *n_name;
437 537
438 if (strcmp(t->arcs[j].name, "back")) 538 n_name = mdesc_node_name(hp, n);
539 if (strcmp(n_name, "cpu"))
439 continue; 540 continue;
440 541
441 if (strcmp(n->name, "cpu")) 542 id = mdesc_get_property(hp, n, "id", NULL);
442 continue;
443
444 id = md_get_property(n, "id", NULL);
445 if (*id < NR_CPUS) 543 if (*id < NR_CPUS)
446 cpu_data(*id).core_id = core_id; 544 cpu_data(*id).core_id = core_id;
447 } 545 }
@@ -449,78 +547,81 @@ static void __init mark_core_ids(struct mdesc_node *mp, int core_id)
449 } 547 }
450} 548}
451 549
452static void __init set_core_ids(void) 550static void __devinit set_core_ids(struct mdesc_handle *hp)
453{ 551{
454 struct mdesc_node *mp;
455 int idx; 552 int idx;
553 u64 mp;
456 554
457 idx = 1; 555 idx = 1;
458 md_for_each_node_by_name(mp, "cache") { 556 mdesc_for_each_node_by_name(hp, mp, "cache") {
459 const u64 *level = md_get_property(mp, "level", NULL); 557 const u64 *level;
460 const char *type; 558 const char *type;
461 int len; 559 int len;
462 560
561 level = mdesc_get_property(hp, mp, "level", NULL);
463 if (*level != 1) 562 if (*level != 1)
464 continue; 563 continue;
465 564
466 type = md_get_property(mp, "type", &len); 565 type = mdesc_get_property(hp, mp, "type", &len);
467 if (!find_in_proplist(type, "instn", len)) 566 if (!find_in_proplist(type, "instn", len))
468 continue; 567 continue;
469 568
470 mark_core_ids(mp, idx); 569 mark_core_ids(hp, mp, idx);
471 570
472 idx++; 571 idx++;
473 } 572 }
474} 573}
475 574
476static void __init mark_proc_ids(struct mdesc_node *mp, int proc_id) 575static void __devinit mark_proc_ids(struct mdesc_handle *hp, u64 mp,
576 int proc_id)
477{ 577{
478 int i; 578 u64 a;
479 579
480 for (i = 0; i < mp->num_arcs; i++) { 580 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_BACK) {
481 struct mdesc_node *t = mp->arcs[i].arc; 581 u64 t = mdesc_arc_target(hp, a);
582 const char *name;
482 const u64 *id; 583 const u64 *id;
483 584
484 if (strcmp(mp->arcs[i].name, "back")) 585 name = mdesc_node_name(hp, t);
485 continue; 586 if (strcmp(name, "cpu"))
486
487 if (strcmp(t->name, "cpu"))
488 continue; 587 continue;
489 588
490 id = md_get_property(t, "id", NULL); 589 id = mdesc_get_property(hp, t, "id", NULL);
491 if (*id < NR_CPUS) 590 if (*id < NR_CPUS)
492 cpu_data(*id).proc_id = proc_id; 591 cpu_data(*id).proc_id = proc_id;
493 } 592 }
494} 593}
495 594
496static void __init __set_proc_ids(const char *exec_unit_name) 595static void __devinit __set_proc_ids(struct mdesc_handle *hp,
596 const char *exec_unit_name)
497{ 597{
498 struct mdesc_node *mp;
499 int idx; 598 int idx;
599 u64 mp;
500 600
501 idx = 0; 601 idx = 0;
502 md_for_each_node_by_name(mp, exec_unit_name) { 602 mdesc_for_each_node_by_name(hp, mp, exec_unit_name) {
503 const char *type; 603 const char *type;
504 int len; 604 int len;
505 605
506 type = md_get_property(mp, "type", &len); 606 type = mdesc_get_property(hp, mp, "type", &len);
507 if (!find_in_proplist(type, "int", len) && 607 if (!find_in_proplist(type, "int", len) &&
508 !find_in_proplist(type, "integer", len)) 608 !find_in_proplist(type, "integer", len))
509 continue; 609 continue;
510 610
511 mark_proc_ids(mp, idx); 611 mark_proc_ids(hp, mp, idx);
512 612
513 idx++; 613 idx++;
514 } 614 }
515} 615}
516 616
517static void __init set_proc_ids(void) 617static void __devinit set_proc_ids(struct mdesc_handle *hp)
518{ 618{
519 __set_proc_ids("exec_unit"); 619 __set_proc_ids(hp, "exec_unit");
520 __set_proc_ids("exec-unit"); 620 __set_proc_ids(hp, "exec-unit");
521} 621}
522 622
523static void __init get_one_mondo_bits(const u64 *p, unsigned int *mask, unsigned char def) 623static void __devinit get_one_mondo_bits(const u64 *p, unsigned int *mask,
624 unsigned char def)
524{ 625{
525 u64 val; 626 u64 val;
526 627
@@ -538,35 +639,37 @@ use_default:
538 *mask = ((1U << def) * 64U) - 1U; 639 *mask = ((1U << def) * 64U) - 1U;
539} 640}
540 641
541static void __init get_mondo_data(struct mdesc_node *mp, struct trap_per_cpu *tb) 642static void __devinit get_mondo_data(struct mdesc_handle *hp, u64 mp,
643 struct trap_per_cpu *tb)
542{ 644{
543 const u64 *val; 645 const u64 *val;
544 646
545 val = md_get_property(mp, "q-cpu-mondo-#bits", NULL); 647 val = mdesc_get_property(hp, mp, "q-cpu-mondo-#bits", NULL);
546 get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7); 648 get_one_mondo_bits(val, &tb->cpu_mondo_qmask, 7);
547 649
548 val = md_get_property(mp, "q-dev-mondo-#bits", NULL); 650 val = mdesc_get_property(hp, mp, "q-dev-mondo-#bits", NULL);
549 get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7); 651 get_one_mondo_bits(val, &tb->dev_mondo_qmask, 7);
550 652
551 val = md_get_property(mp, "q-resumable-#bits", NULL); 653 val = mdesc_get_property(hp, mp, "q-resumable-#bits", NULL);
552 get_one_mondo_bits(val, &tb->resum_qmask, 6); 654 get_one_mondo_bits(val, &tb->resum_qmask, 6);
553 655
554 val = md_get_property(mp, "q-nonresumable-#bits", NULL); 656 val = mdesc_get_property(hp, mp, "q-nonresumable-#bits", NULL);
555 get_one_mondo_bits(val, &tb->nonresum_qmask, 2); 657 get_one_mondo_bits(val, &tb->nonresum_qmask, 2);
556} 658}
557 659
558static void __init mdesc_fill_in_cpu_data(void) 660void __devinit mdesc_fill_in_cpu_data(cpumask_t mask)
559{ 661{
560 struct mdesc_node *mp; 662 struct mdesc_handle *hp = mdesc_grab();
663 u64 mp;
561 664
562 ncpus_probed = 0; 665 ncpus_probed = 0;
563 md_for_each_node_by_name(mp, "cpu") { 666 mdesc_for_each_node_by_name(hp, mp, "cpu") {
564 const u64 *id = md_get_property(mp, "id", NULL); 667 const u64 *id = mdesc_get_property(hp, mp, "id", NULL);
565 const u64 *cfreq = md_get_property(mp, "clock-frequency", NULL); 668 const u64 *cfreq = mdesc_get_property(hp, mp, "clock-frequency", NULL);
566 struct trap_per_cpu *tb; 669 struct trap_per_cpu *tb;
567 cpuinfo_sparc *c; 670 cpuinfo_sparc *c;
568 unsigned int i;
569 int cpuid; 671 int cpuid;
672 u64 a;
570 673
571 ncpus_probed++; 674 ncpus_probed++;
572 675
@@ -575,6 +678,8 @@ static void __init mdesc_fill_in_cpu_data(void)
575#ifdef CONFIG_SMP 678#ifdef CONFIG_SMP
576 if (cpuid >= NR_CPUS) 679 if (cpuid >= NR_CPUS)
577 continue; 680 continue;
681 if (!cpu_isset(cpuid, mask))
682 continue;
578#else 683#else
579 /* On uniprocessor we only want the values for the 684 /* On uniprocessor we only want the values for the
580 * real physical cpu the kernel booted onto, however 685 * real physical cpu the kernel booted onto, however
@@ -589,35 +694,30 @@ static void __init mdesc_fill_in_cpu_data(void)
589 c->clock_tick = *cfreq; 694 c->clock_tick = *cfreq;
590 695
591 tb = &trap_block[cpuid]; 696 tb = &trap_block[cpuid];
592 get_mondo_data(mp, tb); 697 get_mondo_data(hp, mp, tb);
593
594 for (i = 0; i < mp->num_arcs; i++) {
595 struct mdesc_node *t = mp->arcs[i].arc;
596 unsigned int j;
597 698
598 if (strcmp(mp->arcs[i].name, "fwd")) 699 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
599 continue; 700 u64 j, t = mdesc_arc_target(hp, a);
701 const char *t_name;
600 702
601 if (!strcmp(t->name, "cache")) { 703 t_name = mdesc_node_name(hp, t);
602 fill_in_one_cache(c, t); 704 if (!strcmp(t_name, "cache")) {
705 fill_in_one_cache(c, hp, t);
603 continue; 706 continue;
604 } 707 }
605 708
606 for (j = 0; j < t->num_arcs; j++) { 709 mdesc_for_each_arc(j, hp, t, MDESC_ARC_TYPE_FWD) {
607 struct mdesc_node *n; 710 u64 n = mdesc_arc_target(hp, j);
711 const char *n_name;
608 712
609 n = t->arcs[j].arc; 713 n_name = mdesc_node_name(hp, n);
610 if (strcmp(t->arcs[j].name, "fwd")) 714 if (!strcmp(n_name, "cache"))
611 continue; 715 fill_in_one_cache(c, hp, n);
612
613 if (!strcmp(n->name, "cache"))
614 fill_in_one_cache(c, n);
615 } 716 }
616 } 717 }
617 718
618#ifdef CONFIG_SMP 719#ifdef CONFIG_SMP
619 cpu_set(cpuid, cpu_present_map); 720 cpu_set(cpuid, cpu_present_map);
620 cpu_set(cpuid, phys_cpu_present_map);
621#endif 721#endif
622 722
623 c->core_id = 0; 723 c->core_id = 0;
@@ -628,45 +728,43 @@ static void __init mdesc_fill_in_cpu_data(void)
628 sparc64_multi_core = 1; 728 sparc64_multi_core = 1;
629#endif 729#endif
630 730
631 set_core_ids(); 731 set_core_ids(hp);
632 set_proc_ids(); 732 set_proc_ids(hp);
633 733
634 smp_fill_in_sib_core_maps(); 734 smp_fill_in_sib_core_maps();
735
736 mdesc_release(hp);
635} 737}
636 738
637void __init sun4v_mdesc_init(void) 739void __init sun4v_mdesc_init(void)
638{ 740{
741 struct mdesc_handle *hp;
639 unsigned long len, real_len, status; 742 unsigned long len, real_len, status;
743 cpumask_t mask;
640 744
641 (void) sun4v_mach_desc(0UL, 0UL, &len); 745 (void) sun4v_mach_desc(0UL, 0UL, &len);
642 746
643 printk("MDESC: Size is %lu bytes.\n", len); 747 printk("MDESC: Size is %lu bytes.\n", len);
644 748
645 main_mdesc = mdesc_early_alloc(len); 749 hp = mdesc_alloc(len, &bootmem_mdesc_memops);
750 if (hp == NULL) {
751 prom_printf("MDESC: alloc of %lu bytes failed.\n", len);
752 prom_halt();
753 }
646 754
647 status = sun4v_mach_desc(__pa(main_mdesc), len, &real_len); 755 status = sun4v_mach_desc(__pa(&hp->mdesc), len, &real_len);
648 if (status != HV_EOK || real_len > len) { 756 if (status != HV_EOK || real_len > len) {
649 prom_printf("sun4v_mach_desc fails, err(%lu), " 757 prom_printf("sun4v_mach_desc fails, err(%lu), "
650 "len(%lu), real_len(%lu)\n", 758 "len(%lu), real_len(%lu)\n",
651 status, len, real_len); 759 status, len, real_len);
760 mdesc_free(hp);
652 prom_halt(); 761 prom_halt();
653 } 762 }
654 763
655 len = count_nodes(main_mdesc); 764 cur_mdesc = hp;
656 printk("MDESC: %lu nodes.\n", len);
657
658 len = roundup_pow_of_two(len);
659
660 mdesc_hash = mdesc_early_alloc(len * sizeof(struct mdesc_node *));
661 mdesc_hash_size = len;
662
663 printk("MDESC: Hash size %lu entries.\n", len);
664
665 build_all_nodes(main_mdesc);
666
667 printk("MDESC: Built graph with %u bytes of memory.\n",
668 mdesc_early_allocated);
669 765
670 report_platform_properties(); 766 report_platform_properties();
671 mdesc_fill_in_cpu_data(); 767
768 cpus_setall(mask);
769 mdesc_fill_in_cpu_data(mask);
672} 770}
diff --git a/arch/sparc64/kernel/power.c b/arch/sparc64/kernel/power.c
index 5d6adea3967f..8dd4294ad21e 100644
--- a/arch/sparc64/kernel/power.c
+++ b/arch/sparc64/kernel/power.c
@@ -1,7 +1,6 @@
1/* $Id: power.c,v 1.10 2001/12/11 01:57:16 davem Exp $ 1/* power.c: Power management driver.
2 * power.c: Power management driver.
3 * 2 *
4 * Copyright (C) 1999 David S. Miller (davem@redhat.com) 3 * Copyright (C) 1999, 2007 David S. Miller (davem@davemloft.net)
5 */ 4 */
6 5
7#include <linux/kernel.h> 6#include <linux/kernel.h>
@@ -19,6 +18,7 @@
19#include <asm/prom.h> 18#include <asm/prom.h>
20#include <asm/of_device.h> 19#include <asm/of_device.h>
21#include <asm/io.h> 20#include <asm/io.h>
21#include <asm/power.h>
22#include <asm/sstate.h> 22#include <asm/sstate.h>
23 23
24#include <linux/unistd.h> 24#include <linux/unistd.h>
@@ -29,24 +29,26 @@
29 */ 29 */
30int scons_pwroff = 1; 30int scons_pwroff = 1;
31 31
32#ifdef CONFIG_PCI
33#include <linux/pci.h>
34static void __iomem *power_reg; 32static void __iomem *power_reg;
35 33
36static DECLARE_WAIT_QUEUE_HEAD(powerd_wait); 34static DECLARE_WAIT_QUEUE_HEAD(powerd_wait);
37static int button_pressed; 35static int button_pressed;
38 36
39static irqreturn_t power_handler(int irq, void *dev_id) 37void wake_up_powerd(void)
40{ 38{
41 if (button_pressed == 0) { 39 if (button_pressed == 0) {
42 button_pressed = 1; 40 button_pressed = 1;
43 wake_up(&powerd_wait); 41 wake_up(&powerd_wait);
44 } 42 }
43}
44
45static irqreturn_t power_handler(int irq, void *dev_id)
46{
47 wake_up_powerd();
45 48
46 /* FIXME: Check registers for status... */ 49 /* FIXME: Check registers for status... */
47 return IRQ_HANDLED; 50 return IRQ_HANDLED;
48} 51}
49#endif /* CONFIG_PCI */
50 52
51extern void machine_halt(void); 53extern void machine_halt(void);
52extern void machine_alt_power_off(void); 54extern void machine_alt_power_off(void);
@@ -56,19 +58,18 @@ void machine_power_off(void)
56{ 58{
57 sstate_poweroff(); 59 sstate_poweroff();
58 if (!serial_console || scons_pwroff) { 60 if (!serial_console || scons_pwroff) {
59#ifdef CONFIG_PCI
60 if (power_reg) { 61 if (power_reg) {
61 /* Both register bits seem to have the 62 /* Both register bits seem to have the
62 * same effect, so until I figure out 63 * same effect, so until I figure out
63 * what the difference is... 64 * what the difference is...
64 */ 65 */
65 writel(AUXIO_PCIO_CPWR_OFF | AUXIO_PCIO_SPWR_OFF, power_reg); 66 writel(AUXIO_PCIO_CPWR_OFF | AUXIO_PCIO_SPWR_OFF, power_reg);
66 } else 67 } else {
67#endif /* CONFIG_PCI */
68 if (poweroff_method != NULL) { 68 if (poweroff_method != NULL) {
69 poweroff_method(); 69 poweroff_method();
70 /* not reached */ 70 /* not reached */
71 } 71 }
72 }
72 } 73 }
73 machine_halt(); 74 machine_halt();
74} 75}
@@ -76,7 +77,6 @@ void machine_power_off(void)
76void (*pm_power_off)(void) = machine_power_off; 77void (*pm_power_off)(void) = machine_power_off;
77EXPORT_SYMBOL(pm_power_off); 78EXPORT_SYMBOL(pm_power_off);
78 79
79#ifdef CONFIG_PCI
80static int powerd(void *__unused) 80static int powerd(void *__unused)
81{ 81{
82 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL }; 82 static char *envp[] = { "HOME=/", "TERM=linux", "PATH=/sbin:/usr/sbin:/bin:/usr/bin", NULL };
@@ -86,7 +86,7 @@ static int powerd(void *__unused)
86 daemonize("powerd"); 86 daemonize("powerd");
87 87
88 add_wait_queue(&powerd_wait, &wait); 88 add_wait_queue(&powerd_wait, &wait);
89again: 89
90 for (;;) { 90 for (;;) {
91 set_task_state(current, TASK_INTERRUPTIBLE); 91 set_task_state(current, TASK_INTERRUPTIBLE);
92 if (button_pressed) 92 if (button_pressed)
@@ -100,16 +100,28 @@ again:
100 /* Ok, down we go... */ 100 /* Ok, down we go... */
101 button_pressed = 0; 101 button_pressed = 0;
102 if (kernel_execve("/sbin/shutdown", argv, envp) < 0) { 102 if (kernel_execve("/sbin/shutdown", argv, envp) < 0) {
103 printk("powerd: shutdown execution failed\n"); 103 printk(KERN_ERR "powerd: shutdown execution failed\n");
104 add_wait_queue(&powerd_wait, &wait); 104 machine_power_off();
105 goto again;
106 } 105 }
107 return 0; 106 return 0;
108} 107}
109 108
109int start_powerd(void)
110{
111 int err;
112
113 err = kernel_thread(powerd, NULL, CLONE_FS);
114 if (err < 0)
115 printk(KERN_ERR "power: Failed to start power daemon.\n");
116 else
117 printk(KERN_INFO "power: powerd running.\n");
118
119 return err;
120}
121
110static int __init has_button_interrupt(unsigned int irq, struct device_node *dp) 122static int __init has_button_interrupt(unsigned int irq, struct device_node *dp)
111{ 123{
112 if (irq == PCI_IRQ_NONE) 124 if (irq == 0xffffffff)
113 return 0; 125 return 0;
114 if (!of_find_property(dp, "button", NULL)) 126 if (!of_find_property(dp, "button", NULL))
115 return 0; 127 return 0;
@@ -130,17 +142,14 @@ static int __devinit power_probe(struct of_device *op, const struct of_device_id
130 poweroff_method = machine_halt; /* able to use the standard halt */ 142 poweroff_method = machine_halt; /* able to use the standard halt */
131 143
132 if (has_button_interrupt(irq, op->node)) { 144 if (has_button_interrupt(irq, op->node)) {
133 if (kernel_thread(powerd, NULL, CLONE_FS) < 0) { 145 if (start_powerd() < 0)
134 printk("Failed to start power daemon.\n");
135 return 0; 146 return 0;
136 }
137 printk("powerd running.\n");
138 147
139 if (request_irq(irq, 148 if (request_irq(irq,
140 power_handler, 0, "power", NULL) < 0) 149 power_handler, 0, "power", NULL) < 0)
141 printk("power: Error, cannot register IRQ handler.\n"); 150 printk(KERN_ERR "power: Cannot setup IRQ handler.\n");
142 } else { 151 } else {
143 printk("not using powerd.\n"); 152 printk(KERN_INFO "power: Not using powerd.\n");
144 } 153 }
145 154
146 return 0; 155 return 0;
@@ -164,4 +173,3 @@ void __init power_init(void)
164 of_register_driver(&power_driver, &of_bus_type); 173 of_register_driver(&power_driver, &of_bus_type);
165 return; 174 return;
166} 175}
167#endif /* CONFIG_PCI */
diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
index f5f97e2c669c..93557507ec9f 100644
--- a/arch/sparc64/kernel/process.c
+++ b/arch/sparc64/kernel/process.c
@@ -29,6 +29,7 @@
29#include <linux/compat.h> 29#include <linux/compat.h>
30#include <linux/tick.h> 30#include <linux/tick.h>
31#include <linux/init.h> 31#include <linux/init.h>
32#include <linux/cpu.h>
32 33
33#include <asm/oplib.h> 34#include <asm/oplib.h>
34#include <asm/uaccess.h> 35#include <asm/uaccess.h>
@@ -49,7 +50,7 @@
49 50
50/* #define VERBOSE_SHOWREGS */ 51/* #define VERBOSE_SHOWREGS */
51 52
52static void sparc64_yield(void) 53static void sparc64_yield(int cpu)
53{ 54{
54 if (tlb_type != hypervisor) 55 if (tlb_type != hypervisor)
55 return; 56 return;
@@ -57,7 +58,7 @@ static void sparc64_yield(void)
57 clear_thread_flag(TIF_POLLING_NRFLAG); 58 clear_thread_flag(TIF_POLLING_NRFLAG);
58 smp_mb__after_clear_bit(); 59 smp_mb__after_clear_bit();
59 60
60 while (!need_resched()) { 61 while (!need_resched() && !cpu_is_offline(cpu)) {
61 unsigned long pstate; 62 unsigned long pstate;
62 63
63 /* Disable interrupts. */ 64 /* Disable interrupts. */
@@ -68,7 +69,7 @@ static void sparc64_yield(void)
68 : "=&r" (pstate) 69 : "=&r" (pstate)
69 : "i" (PSTATE_IE)); 70 : "i" (PSTATE_IE));
70 71
71 if (!need_resched()) 72 if (!need_resched() && !cpu_is_offline(cpu))
72 sun4v_cpu_yield(); 73 sun4v_cpu_yield();
73 74
74 /* Re-enable interrupts. */ 75 /* Re-enable interrupts. */
@@ -86,15 +87,25 @@ static void sparc64_yield(void)
86/* The idle loop on sparc64. */ 87/* The idle loop on sparc64. */
87void cpu_idle(void) 88void cpu_idle(void)
88{ 89{
90 int cpu = smp_processor_id();
91
89 set_thread_flag(TIF_POLLING_NRFLAG); 92 set_thread_flag(TIF_POLLING_NRFLAG);
90 93
91 while(1) { 94 while(1) {
92 tick_nohz_stop_sched_tick(); 95 tick_nohz_stop_sched_tick();
93 while (!need_resched()) 96
94 sparc64_yield(); 97 while (!need_resched() && !cpu_is_offline(cpu))
98 sparc64_yield(cpu);
99
95 tick_nohz_restart_sched_tick(); 100 tick_nohz_restart_sched_tick();
96 101
97 preempt_enable_no_resched(); 102 preempt_enable_no_resched();
103
104#ifdef CONFIG_HOTPLUG_CPU
105 if (cpu_is_offline(cpu))
106 cpu_play_dead();
107#endif
108
98 schedule(); 109 schedule();
99 preempt_disable(); 110 preempt_disable();
100 } 111 }
diff --git a/arch/sparc64/kernel/prom.c b/arch/sparc64/kernel/prom.c
index 61036b346664..5d220302cd50 100644
--- a/arch/sparc64/kernel/prom.c
+++ b/arch/sparc64/kernel/prom.c
@@ -1808,7 +1808,7 @@ static void __init of_fill_in_cpu_data(void)
1808 1808
1809#ifdef CONFIG_SMP 1809#ifdef CONFIG_SMP
1810 cpu_set(cpuid, cpu_present_map); 1810 cpu_set(cpuid, cpu_present_map);
1811 cpu_set(cpuid, phys_cpu_present_map); 1811 cpu_set(cpuid, cpu_possible_map);
1812#endif 1812#endif
1813 } 1813 }
1814 1814
diff --git a/arch/sparc64/kernel/setup.c b/arch/sparc64/kernel/setup.c
index 7490cc670a53..dc928e49e341 100644
--- a/arch/sparc64/kernel/setup.c
+++ b/arch/sparc64/kernel/setup.c
@@ -442,7 +442,6 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
442 "D$ parity tl1\t: %u\n" 442 "D$ parity tl1\t: %u\n"
443 "I$ parity tl1\t: %u\n" 443 "I$ parity tl1\t: %u\n"
444#ifndef CONFIG_SMP 444#ifndef CONFIG_SMP
445 "Cpu0Bogo\t: %lu.%02lu\n"
446 "Cpu0ClkTck\t: %016lx\n" 445 "Cpu0ClkTck\t: %016lx\n"
447#endif 446#endif
448 , 447 ,
@@ -455,10 +454,8 @@ static int show_cpuinfo(struct seq_file *m, void *__unused)
455 ncpus_probed, 454 ncpus_probed,
456 num_online_cpus(), 455 num_online_cpus(),
457 dcache_parity_tl1_occurred, 456 dcache_parity_tl1_occurred,
458 icache_parity_tl1_occurred 457 icache_parity_tl1_occurred,
459#ifndef CONFIG_SMP 458#ifndef CONFIG_SMP
460 , cpu_data(0).udelay_val/(500000/HZ),
461 (cpu_data(0).udelay_val/(5000/HZ)) % 100,
462 cpu_data(0).clock_tick 459 cpu_data(0).clock_tick
463#endif 460#endif
464 ); 461 );
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
index 40e40f968d61..b448d33321c6 100644
--- a/arch/sparc64/kernel/smp.c
+++ b/arch/sparc64/kernel/smp.c
@@ -1,6 +1,6 @@
1/* smp.c: Sparc64 SMP support. 1/* smp.c: Sparc64 SMP support.
2 * 2 *
3 * Copyright (C) 1997 David S. Miller (davem@caip.rutgers.edu) 3 * Copyright (C) 1997, 2007 David S. Miller (davem@davemloft.net)
4 */ 4 */
5 5
6#include <linux/module.h> 6#include <linux/module.h>
@@ -28,6 +28,8 @@
28#include <asm/tlbflush.h> 28#include <asm/tlbflush.h>
29#include <asm/mmu_context.h> 29#include <asm/mmu_context.h>
30#include <asm/cpudata.h> 30#include <asm/cpudata.h>
31#include <asm/hvtramp.h>
32#include <asm/io.h>
31 33
32#include <asm/irq.h> 34#include <asm/irq.h>
33#include <asm/irq_regs.h> 35#include <asm/irq_regs.h>
@@ -41,22 +43,26 @@
41#include <asm/sections.h> 43#include <asm/sections.h>
42#include <asm/prom.h> 44#include <asm/prom.h>
43#include <asm/mdesc.h> 45#include <asm/mdesc.h>
46#include <asm/ldc.h>
47#include <asm/hypervisor.h>
44 48
45extern void calibrate_delay(void); 49extern void calibrate_delay(void);
46 50
47int sparc64_multi_core __read_mostly; 51int sparc64_multi_core __read_mostly;
48 52
49/* Please don't make this stuff initdata!!! --DaveM */ 53cpumask_t cpu_possible_map __read_mostly = CPU_MASK_NONE;
50unsigned char boot_cpu_id;
51
52cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE; 54cpumask_t cpu_online_map __read_mostly = CPU_MASK_NONE;
53cpumask_t phys_cpu_present_map __read_mostly = CPU_MASK_NONE;
54cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly = 55cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly =
55 { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 56 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
56cpumask_t cpu_core_map[NR_CPUS] __read_mostly = 57cpumask_t cpu_core_map[NR_CPUS] __read_mostly =
57 { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; 58 { [0 ... NR_CPUS-1] = CPU_MASK_NONE };
59
60EXPORT_SYMBOL(cpu_possible_map);
61EXPORT_SYMBOL(cpu_online_map);
62EXPORT_SYMBOL(cpu_sibling_map);
63EXPORT_SYMBOL(cpu_core_map);
64
58static cpumask_t smp_commenced_mask; 65static cpumask_t smp_commenced_mask;
59static cpumask_t cpu_callout_map;
60 66
61void smp_info(struct seq_file *m) 67void smp_info(struct seq_file *m)
62{ 68{
@@ -73,18 +79,17 @@ void smp_bogo(struct seq_file *m)
73 79
74 for_each_online_cpu(i) 80 for_each_online_cpu(i)
75 seq_printf(m, 81 seq_printf(m,
76 "Cpu%dBogo\t: %lu.%02lu\n"
77 "Cpu%dClkTck\t: %016lx\n", 82 "Cpu%dClkTck\t: %016lx\n",
78 i, cpu_data(i).udelay_val / (500000/HZ),
79 (cpu_data(i).udelay_val / (5000/HZ)) % 100,
80 i, cpu_data(i).clock_tick); 83 i, cpu_data(i).clock_tick);
81} 84}
82 85
86static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
87
83extern void setup_sparc64_timer(void); 88extern void setup_sparc64_timer(void);
84 89
85static volatile unsigned long callin_flag = 0; 90static volatile unsigned long callin_flag = 0;
86 91
87void __init smp_callin(void) 92void __devinit smp_callin(void)
88{ 93{
89 int cpuid = hard_smp_processor_id(); 94 int cpuid = hard_smp_processor_id();
90 95
@@ -102,8 +107,6 @@ void __init smp_callin(void)
102 107
103 local_irq_enable(); 108 local_irq_enable();
104 109
105 calibrate_delay();
106 cpu_data(cpuid).udelay_val = loops_per_jiffy;
107 callin_flag = 1; 110 callin_flag = 1;
108 __asm__ __volatile__("membar #Sync\n\t" 111 __asm__ __volatile__("membar #Sync\n\t"
109 "flush %%g6" : : : "memory"); 112 "flush %%g6" : : : "memory");
@@ -120,7 +123,9 @@ void __init smp_callin(void)
120 while (!cpu_isset(cpuid, smp_commenced_mask)) 123 while (!cpu_isset(cpuid, smp_commenced_mask))
121 rmb(); 124 rmb();
122 125
126 spin_lock(&call_lock);
123 cpu_set(cpuid, cpu_online_map); 127 cpu_set(cpuid, cpu_online_map);
128 spin_unlock(&call_lock);
124 129
125 /* idle thread is expected to have preempt disabled */ 130 /* idle thread is expected to have preempt disabled */
126 preempt_disable(); 131 preempt_disable();
@@ -268,6 +273,67 @@ static void smp_synchronize_one_tick(int cpu)
268 spin_unlock_irqrestore(&itc_sync_lock, flags); 273 spin_unlock_irqrestore(&itc_sync_lock, flags);
269} 274}
270 275
276#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
277/* XXX Put this in some common place. XXX */
278static unsigned long kimage_addr_to_ra(void *p)
279{
280 unsigned long val = (unsigned long) p;
281
282 return kern_base + (val - KERNBASE);
283}
284
285static void ldom_startcpu_cpuid(unsigned int cpu, unsigned long thread_reg)
286{
287 extern unsigned long sparc64_ttable_tl0;
288 extern unsigned long kern_locked_tte_data;
289 extern int bigkernel;
290 struct hvtramp_descr *hdesc;
291 unsigned long trampoline_ra;
292 struct trap_per_cpu *tb;
293 u64 tte_vaddr, tte_data;
294 unsigned long hv_err;
295
296 hdesc = kzalloc(sizeof(*hdesc), GFP_KERNEL);
297 if (!hdesc) {
298 printk(KERN_ERR "ldom_startcpu_cpuid: Cannot allocate "
299 "hvtramp_descr.\n");
300 return;
301 }
302
303 hdesc->cpu = cpu;
304 hdesc->num_mappings = (bigkernel ? 2 : 1);
305
306 tb = &trap_block[cpu];
307 tb->hdesc = hdesc;
308
309 hdesc->fault_info_va = (unsigned long) &tb->fault_info;
310 hdesc->fault_info_pa = kimage_addr_to_ra(&tb->fault_info);
311
312 hdesc->thread_reg = thread_reg;
313
314 tte_vaddr = (unsigned long) KERNBASE;
315 tte_data = kern_locked_tte_data;
316
317 hdesc->maps[0].vaddr = tte_vaddr;
318 hdesc->maps[0].tte = tte_data;
319 if (bigkernel) {
320 tte_vaddr += 0x400000;
321 tte_data += 0x400000;
322 hdesc->maps[1].vaddr = tte_vaddr;
323 hdesc->maps[1].tte = tte_data;
324 }
325
326 trampoline_ra = kimage_addr_to_ra(hv_cpu_startup);
327
328 hv_err = sun4v_cpu_start(cpu, trampoline_ra,
329 kimage_addr_to_ra(&sparc64_ttable_tl0),
330 __pa(hdesc));
331 if (hv_err)
332 printk(KERN_ERR "ldom_startcpu_cpuid: sun4v_cpu_start() "
333 "gives error %lu\n", hv_err);
334}
335#endif
336
271extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load); 337extern void sun4v_init_mondo_queues(int use_bootmem, int cpu, int alloc, int load);
272 338
273extern unsigned long sparc64_cpu_startup; 339extern unsigned long sparc64_cpu_startup;
@@ -280,6 +346,7 @@ static struct thread_info *cpu_new_thread = NULL;
280 346
281static int __devinit smp_boot_one_cpu(unsigned int cpu) 347static int __devinit smp_boot_one_cpu(unsigned int cpu)
282{ 348{
349 struct trap_per_cpu *tb = &trap_block[cpu];
283 unsigned long entry = 350 unsigned long entry =
284 (unsigned long)(&sparc64_cpu_startup); 351 (unsigned long)(&sparc64_cpu_startup);
285 unsigned long cookie = 352 unsigned long cookie =
@@ -290,20 +357,25 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
290 p = fork_idle(cpu); 357 p = fork_idle(cpu);
291 callin_flag = 0; 358 callin_flag = 0;
292 cpu_new_thread = task_thread_info(p); 359 cpu_new_thread = task_thread_info(p);
293 cpu_set(cpu, cpu_callout_map);
294 360
295 if (tlb_type == hypervisor) { 361 if (tlb_type == hypervisor) {
296 /* Alloc the mondo queues, cpu will load them. */ 362 /* Alloc the mondo queues, cpu will load them. */
297 sun4v_init_mondo_queues(0, cpu, 1, 0); 363 sun4v_init_mondo_queues(0, cpu, 1, 0);
298 364
299 prom_startcpu_cpuid(cpu, entry, cookie); 365#if defined(CONFIG_SUN_LDOMS) && defined(CONFIG_HOTPLUG_CPU)
366 if (ldom_domaining_enabled)
367 ldom_startcpu_cpuid(cpu,
368 (unsigned long) cpu_new_thread);
369 else
370#endif
371 prom_startcpu_cpuid(cpu, entry, cookie);
300 } else { 372 } else {
301 struct device_node *dp = of_find_node_by_cpuid(cpu); 373 struct device_node *dp = of_find_node_by_cpuid(cpu);
302 374
303 prom_startcpu(dp->node, entry, cookie); 375 prom_startcpu(dp->node, entry, cookie);
304 } 376 }
305 377
306 for (timeout = 0; timeout < 5000000; timeout++) { 378 for (timeout = 0; timeout < 50000; timeout++) {
307 if (callin_flag) 379 if (callin_flag)
308 break; 380 break;
309 udelay(100); 381 udelay(100);
@@ -313,11 +385,15 @@ static int __devinit smp_boot_one_cpu(unsigned int cpu)
313 ret = 0; 385 ret = 0;
314 } else { 386 } else {
315 printk("Processor %d is stuck.\n", cpu); 387 printk("Processor %d is stuck.\n", cpu);
316 cpu_clear(cpu, cpu_callout_map);
317 ret = -ENODEV; 388 ret = -ENODEV;
318 } 389 }
319 cpu_new_thread = NULL; 390 cpu_new_thread = NULL;
320 391
392 if (tb->hdesc) {
393 kfree(tb->hdesc);
394 tb->hdesc = NULL;
395 }
396
321 return ret; 397 return ret;
322} 398}
323 399
@@ -720,7 +796,6 @@ struct call_data_struct {
720 int wait; 796 int wait;
721}; 797};
722 798
723static __cacheline_aligned_in_smp DEFINE_SPINLOCK(call_lock);
724static struct call_data_struct *call_data; 799static struct call_data_struct *call_data;
725 800
726extern unsigned long xcall_call_function; 801extern unsigned long xcall_call_function;
@@ -1152,34 +1227,14 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
1152 preempt_enable(); 1227 preempt_enable();
1153} 1228}
1154 1229
1155void __init smp_tick_init(void)
1156{
1157 boot_cpu_id = hard_smp_processor_id();
1158}
1159
1160/* /proc/profile writes can call this, don't __init it please. */ 1230/* /proc/profile writes can call this, don't __init it please. */
1161int setup_profiling_timer(unsigned int multiplier) 1231int setup_profiling_timer(unsigned int multiplier)
1162{ 1232{
1163 return -EINVAL; 1233 return -EINVAL;
1164} 1234}
1165 1235
1166/* Constrain the number of cpus to max_cpus. */
1167void __init smp_prepare_cpus(unsigned int max_cpus) 1236void __init smp_prepare_cpus(unsigned int max_cpus)
1168{ 1237{
1169 int i;
1170
1171 if (num_possible_cpus() > max_cpus) {
1172 for_each_possible_cpu(i) {
1173 if (i != boot_cpu_id) {
1174 cpu_clear(i, phys_cpu_present_map);
1175 cpu_clear(i, cpu_present_map);
1176 if (num_possible_cpus() <= max_cpus)
1177 break;
1178 }
1179 }
1180 }
1181
1182 cpu_data(boot_cpu_id).udelay_val = loops_per_jiffy;
1183} 1238}
1184 1239
1185void __devinit smp_prepare_boot_cpu(void) 1240void __devinit smp_prepare_boot_cpu(void)
@@ -1190,30 +1245,32 @@ void __devinit smp_fill_in_sib_core_maps(void)
1190{ 1245{
1191 unsigned int i; 1246 unsigned int i;
1192 1247
1193 for_each_possible_cpu(i) { 1248 for_each_present_cpu(i) {
1194 unsigned int j; 1249 unsigned int j;
1195 1250
1251 cpus_clear(cpu_core_map[i]);
1196 if (cpu_data(i).core_id == 0) { 1252 if (cpu_data(i).core_id == 0) {
1197 cpu_set(i, cpu_core_map[i]); 1253 cpu_set(i, cpu_core_map[i]);
1198 continue; 1254 continue;
1199 } 1255 }
1200 1256
1201 for_each_possible_cpu(j) { 1257 for_each_present_cpu(j) {
1202 if (cpu_data(i).core_id == 1258 if (cpu_data(i).core_id ==
1203 cpu_data(j).core_id) 1259 cpu_data(j).core_id)
1204 cpu_set(j, cpu_core_map[i]); 1260 cpu_set(j, cpu_core_map[i]);
1205 } 1261 }
1206 } 1262 }
1207 1263
1208 for_each_possible_cpu(i) { 1264 for_each_present_cpu(i) {
1209 unsigned int j; 1265 unsigned int j;
1210 1266
1267 cpus_clear(cpu_sibling_map[i]);
1211 if (cpu_data(i).proc_id == -1) { 1268 if (cpu_data(i).proc_id == -1) {
1212 cpu_set(i, cpu_sibling_map[i]); 1269 cpu_set(i, cpu_sibling_map[i]);
1213 continue; 1270 continue;
1214 } 1271 }
1215 1272
1216 for_each_possible_cpu(j) { 1273 for_each_present_cpu(j) {
1217 if (cpu_data(i).proc_id == 1274 if (cpu_data(i).proc_id ==
1218 cpu_data(j).proc_id) 1275 cpu_data(j).proc_id)
1219 cpu_set(j, cpu_sibling_map[i]); 1276 cpu_set(j, cpu_sibling_map[i]);
@@ -1242,18 +1299,112 @@ int __cpuinit __cpu_up(unsigned int cpu)
1242 return ret; 1299 return ret;
1243} 1300}
1244 1301
1245void __init smp_cpus_done(unsigned int max_cpus) 1302#ifdef CONFIG_HOTPLUG_CPU
1303void cpu_play_dead(void)
1304{
1305 int cpu = smp_processor_id();
1306 unsigned long pstate;
1307
1308 idle_task_exit();
1309
1310 if (tlb_type == hypervisor) {
1311 struct trap_per_cpu *tb = &trap_block[cpu];
1312
1313 sun4v_cpu_qconf(HV_CPU_QUEUE_CPU_MONDO,
1314 tb->cpu_mondo_pa, 0);
1315 sun4v_cpu_qconf(HV_CPU_QUEUE_DEVICE_MONDO,
1316 tb->dev_mondo_pa, 0);
1317 sun4v_cpu_qconf(HV_CPU_QUEUE_RES_ERROR,
1318 tb->resum_mondo_pa, 0);
1319 sun4v_cpu_qconf(HV_CPU_QUEUE_NONRES_ERROR,
1320 tb->nonresum_mondo_pa, 0);
1321 }
1322
1323 cpu_clear(cpu, smp_commenced_mask);
1324 membar_safe("#Sync");
1325
1326 local_irq_disable();
1327
1328 __asm__ __volatile__(
1329 "rdpr %%pstate, %0\n\t"
1330 "wrpr %0, %1, %%pstate"
1331 : "=r" (pstate)
1332 : "i" (PSTATE_IE));
1333
1334 while (1)
1335 barrier();
1336}
1337
1338int __cpu_disable(void)
1246{ 1339{
1247 unsigned long bogosum = 0; 1340 int cpu = smp_processor_id();
1341 cpuinfo_sparc *c;
1248 int i; 1342 int i;
1249 1343
1250 for_each_online_cpu(i) 1344 for_each_cpu_mask(i, cpu_core_map[cpu])
1251 bogosum += cpu_data(i).udelay_val; 1345 cpu_clear(cpu, cpu_core_map[i]);
1252 printk("Total of %ld processors activated " 1346 cpus_clear(cpu_core_map[cpu]);
1253 "(%lu.%02lu BogoMIPS).\n", 1347
1254 (long) num_online_cpus(), 1348 for_each_cpu_mask(i, cpu_sibling_map[cpu])
1255 bogosum/(500000/HZ), 1349 cpu_clear(cpu, cpu_sibling_map[i]);
1256 (bogosum/(5000/HZ))%100); 1350 cpus_clear(cpu_sibling_map[cpu]);
1351
1352 c = &cpu_data(cpu);
1353
1354 c->core_id = 0;
1355 c->proc_id = -1;
1356
1357 spin_lock(&call_lock);
1358 cpu_clear(cpu, cpu_online_map);
1359 spin_unlock(&call_lock);
1360
1361 smp_wmb();
1362
1363 /* Make sure no interrupts point to this cpu. */
1364 fixup_irqs();
1365
1366 local_irq_enable();
1367 mdelay(1);
1368 local_irq_disable();
1369
1370 return 0;
1371}
1372
1373void __cpu_die(unsigned int cpu)
1374{
1375 int i;
1376
1377 for (i = 0; i < 100; i++) {
1378 smp_rmb();
1379 if (!cpu_isset(cpu, smp_commenced_mask))
1380 break;
1381 msleep(100);
1382 }
1383 if (cpu_isset(cpu, smp_commenced_mask)) {
1384 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1385 } else {
1386#if defined(CONFIG_SUN_LDOMS)
1387 unsigned long hv_err;
1388 int limit = 100;
1389
1390 do {
1391 hv_err = sun4v_cpu_stop(cpu);
1392 if (hv_err == HV_EOK) {
1393 cpu_clear(cpu, cpu_present_map);
1394 break;
1395 }
1396 } while (--limit > 0);
1397 if (limit <= 0) {
1398 printk(KERN_ERR "sun4v_cpu_stop() fails err=%lu\n",
1399 hv_err);
1400 }
1401#endif
1402 }
1403}
1404#endif
1405
1406void __init smp_cpus_done(unsigned int max_cpus)
1407{
1257} 1408}
1258 1409
1259void smp_send_reschedule(int cpu) 1410void smp_send_reschedule(int cpu)
diff --git a/arch/sparc64/kernel/sparc64_ksyms.c b/arch/sparc64/kernel/sparc64_ksyms.c
index 6fa761612899..719d676c2ddc 100644
--- a/arch/sparc64/kernel/sparc64_ksyms.c
+++ b/arch/sparc64/kernel/sparc64_ksyms.c
@@ -1,7 +1,6 @@
1/* $Id: sparc64_ksyms.c,v 1.121 2002/02/09 19:49:31 davem Exp $ 1/* arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
2 * arch/sparc64/kernel/sparc64_ksyms.c: Sparc64 specific ksyms support.
3 * 2 *
4 * Copyright (C) 1996 David S. Miller (davem@caip.rutgers.edu) 3 * Copyright (C) 1996, 2007 David S. Miller (davem@davemloft.net)
5 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be) 4 * Copyright (C) 1996 Eddie C. Dost (ecd@skynet.be)
6 * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz) 5 * Copyright (C) 1999 Jakub Jelinek (jj@ultra.linux.cz)
7 */ 6 */
@@ -28,7 +27,6 @@
28#include <net/compat.h> 27#include <net/compat.h>
29 28
30#include <asm/oplib.h> 29#include <asm/oplib.h>
31#include <asm/delay.h>
32#include <asm/system.h> 30#include <asm/system.h>
33#include <asm/auxio.h> 31#include <asm/auxio.h>
34#include <asm/pgtable.h> 32#include <asm/pgtable.h>
@@ -124,10 +122,6 @@ EXPORT_SYMBOL(__write_lock);
124EXPORT_SYMBOL(__write_unlock); 122EXPORT_SYMBOL(__write_unlock);
125EXPORT_SYMBOL(__write_trylock); 123EXPORT_SYMBOL(__write_trylock);
126 124
127/* CPU online map and active count. */
128EXPORT_SYMBOL(cpu_online_map);
129EXPORT_SYMBOL(phys_cpu_present_map);
130
131EXPORT_SYMBOL(smp_call_function); 125EXPORT_SYMBOL(smp_call_function);
132#endif /* CONFIG_SMP */ 126#endif /* CONFIG_SMP */
133 127
@@ -330,12 +324,6 @@ EXPORT_SYMBOL(memset);
330EXPORT_SYMBOL(memmove); 324EXPORT_SYMBOL(memmove);
331EXPORT_SYMBOL(strncmp); 325EXPORT_SYMBOL(strncmp);
332 326
333/* Delay routines. */
334EXPORT_SYMBOL(__udelay);
335EXPORT_SYMBOL(__ndelay);
336EXPORT_SYMBOL(__const_udelay);
337EXPORT_SYMBOL(__delay);
338
339void VISenter(void); 327void VISenter(void);
340/* RAID code needs this */ 328/* RAID code needs this */
341EXPORT_SYMBOL(VISenter); 329EXPORT_SYMBOL(VISenter);
diff --git a/arch/sparc64/kernel/sysfs.c b/arch/sparc64/kernel/sysfs.c
index cdb1477af89f..52816c7be0b9 100644
--- a/arch/sparc64/kernel/sysfs.c
+++ b/arch/sparc64/kernel/sysfs.c
@@ -193,7 +193,6 @@ static ssize_t show_##NAME(struct sys_device *dev, char *buf) \
193} 193}
194 194
195SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick); 195SHOW_CPUDATA_ULONG_NAME(clock_tick, clock_tick);
196SHOW_CPUDATA_ULONG_NAME(udelay_val, udelay_val);
197SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size); 196SHOW_CPUDATA_UINT_NAME(l1_dcache_size, dcache_size);
198SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size); 197SHOW_CPUDATA_UINT_NAME(l1_dcache_line_size, dcache_line_size);
199SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size); 198SHOW_CPUDATA_UINT_NAME(l1_icache_size, icache_size);
@@ -203,7 +202,6 @@ SHOW_CPUDATA_UINT_NAME(l2_cache_line_size, ecache_line_size);
203 202
204static struct sysdev_attribute cpu_core_attrs[] = { 203static struct sysdev_attribute cpu_core_attrs[] = {
205 _SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL), 204 _SYSDEV_ATTR(clock_tick, 0444, show_clock_tick, NULL),
206 _SYSDEV_ATTR(udelay_val, 0444, show_udelay_val, NULL),
207 _SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL), 205 _SYSDEV_ATTR(l1_dcache_size, 0444, show_l1_dcache_size, NULL),
208 _SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL), 206 _SYSDEV_ATTR(l1_dcache_line_size, 0444, show_l1_dcache_line_size, NULL),
209 _SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL), 207 _SYSDEV_ATTR(l1_icache_size, 0444, show_l1_icache_size, NULL),
diff --git a/arch/sparc64/kernel/time.c b/arch/sparc64/kernel/time.c
index a31a0439244f..62e316ab1339 100644
--- a/arch/sparc64/kernel/time.c
+++ b/arch/sparc64/kernel/time.c
@@ -849,9 +849,6 @@ static unsigned long sparc64_init_timers(void)
849{ 849{
850 struct device_node *dp; 850 struct device_node *dp;
851 unsigned long clock; 851 unsigned long clock;
852#ifdef CONFIG_SMP
853 extern void smp_tick_init(void);
854#endif
855 852
856 dp = of_find_node_by_path("/"); 853 dp = of_find_node_by_path("/");
857 if (tlb_type == spitfire) { 854 if (tlb_type == spitfire) {
@@ -874,10 +871,6 @@ static unsigned long sparc64_init_timers(void)
874 clock = of_getintprop_default(dp, "stick-frequency", 0); 871 clock = of_getintprop_default(dp, "stick-frequency", 0);
875 } 872 }
876 873
877#ifdef CONFIG_SMP
878 smp_tick_init();
879#endif
880
881 return clock; 874 return clock;
882} 875}
883 876
@@ -1038,10 +1031,31 @@ static void __init setup_clockevent_multiplier(unsigned long hz)
1038 sparc64_clockevent.mult = mult; 1031 sparc64_clockevent.mult = mult;
1039} 1032}
1040 1033
1034static unsigned long tb_ticks_per_usec __read_mostly;
1035
1036void __delay(unsigned long loops)
1037{
1038 unsigned long bclock, now;
1039
1040 bclock = tick_ops->get_tick();
1041 do {
1042 now = tick_ops->get_tick();
1043 } while ((now-bclock) < loops);
1044}
1045EXPORT_SYMBOL(__delay);
1046
1047void udelay(unsigned long usecs)
1048{
1049 __delay(tb_ticks_per_usec * usecs);
1050}
1051EXPORT_SYMBOL(udelay);
1052
1041void __init time_init(void) 1053void __init time_init(void)
1042{ 1054{
1043 unsigned long clock = sparc64_init_timers(); 1055 unsigned long clock = sparc64_init_timers();
1044 1056
1057 tb_ticks_per_usec = clock / USEC_PER_SEC;
1058
1045 timer_ticks_per_nsec_quotient = 1059 timer_ticks_per_nsec_quotient =
1046 clocksource_hz2mult(clock, SPARC64_NSEC_PER_CYC_SHIFT); 1060 clocksource_hz2mult(clock, SPARC64_NSEC_PER_CYC_SHIFT);
1047 1061
diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c
new file mode 100644
index 000000000000..49569b44ea1f
--- /dev/null
+++ b/arch/sparc64/kernel/vio.c
@@ -0,0 +1,395 @@
1/* vio.c: Virtual I/O channel devices probing infrastructure.
2 *
3 * Copyright (c) 2003-2005 IBM Corp.
4 * Dave Engebretsen engebret@us.ibm.com
5 * Santiago Leon santil@us.ibm.com
6 * Hollis Blanchard <hollisb@us.ibm.com>
7 * Stephen Rothwell
8 *
9 * Adapted to sparc64 by David S. Miller davem@davemloft.net
10 */
11
12#include <linux/kernel.h>
13#include <linux/irq.h>
14#include <linux/init.h>
15
16#include <asm/mdesc.h>
17#include <asm/vio.h>
18
19static inline int find_in_proplist(const char *list, const char *match,
20 int len)
21{
22 while (len > 0) {
23 int l;
24
25 if (!strcmp(list, match))
26 return 1;
27 l = strlen(list) + 1;
28 list += l;
29 len -= l;
30 }
31 return 0;
32}
33
34static const struct vio_device_id *vio_match_device(
35 const struct vio_device_id *matches,
36 const struct vio_dev *dev)
37{
38 const char *type, *compat;
39 int len;
40
41 type = dev->type;
42 compat = dev->compat;
43 len = dev->compat_len;
44
45 while (matches->type[0] || matches->compat[0]) {
46 int match = 1;
47 if (matches->type[0])
48 match &= !strcmp(matches->type, type);
49
50 if (matches->compat[0]) {
51 match &= len &&
52 find_in_proplist(compat, matches->compat, len);
53 }
54 if (match)
55 return matches;
56 matches++;
57 }
58 return NULL;
59}
60
61static int vio_bus_match(struct device *dev, struct device_driver *drv)
62{
63 struct vio_dev *vio_dev = to_vio_dev(dev);
64 struct vio_driver *vio_drv = to_vio_driver(drv);
65 const struct vio_device_id *matches = vio_drv->id_table;
66
67 if (!matches)
68 return 0;
69
70 return vio_match_device(matches, vio_dev) != NULL;
71}
72
73static int vio_device_probe(struct device *dev)
74{
75 struct vio_dev *vdev = to_vio_dev(dev);
76 struct vio_driver *drv = to_vio_driver(dev->driver);
77 const struct vio_device_id *id;
78 int error = -ENODEV;
79
80 if (drv->probe) {
81 id = vio_match_device(drv->id_table, vdev);
82 if (id)
83 error = drv->probe(vdev, id);
84 }
85
86 return error;
87}
88
89static int vio_device_remove(struct device *dev)
90{
91 struct vio_dev *vdev = to_vio_dev(dev);
92 struct vio_driver *drv = to_vio_driver(dev->driver);
93
94 if (drv->remove)
95 return drv->remove(vdev);
96
97 return 1;
98}
99
100static ssize_t devspec_show(struct device *dev,
101 struct device_attribute *attr, char *buf)
102{
103 struct vio_dev *vdev = to_vio_dev(dev);
104 const char *str = "none";
105
106 if (!strcmp(vdev->type, "network"))
107 str = "vnet";
108 else if (!strcmp(vdev->type, "block"))
109 str = "vdisk";
110
111 return sprintf(buf, "%s\n", str);
112}
113
114static ssize_t type_show(struct device *dev,
115 struct device_attribute *attr, char *buf)
116{
117 struct vio_dev *vdev = to_vio_dev(dev);
118 return sprintf(buf, "%s\n", vdev->type);
119}
120
121static struct device_attribute vio_dev_attrs[] = {
122 __ATTR_RO(devspec),
123 __ATTR_RO(type),
124 __ATTR_NULL
125};
126
127static struct bus_type vio_bus_type = {
128 .name = "vio",
129 .dev_attrs = vio_dev_attrs,
130 .match = vio_bus_match,
131 .probe = vio_device_probe,
132 .remove = vio_device_remove,
133};
134
135int vio_register_driver(struct vio_driver *viodrv)
136{
137 viodrv->driver.bus = &vio_bus_type;
138
139 return driver_register(&viodrv->driver);
140}
141EXPORT_SYMBOL(vio_register_driver);
142
143void vio_unregister_driver(struct vio_driver *viodrv)
144{
145 driver_unregister(&viodrv->driver);
146}
147EXPORT_SYMBOL(vio_unregister_driver);
148
149static void __devinit vio_dev_release(struct device *dev)
150{
151 kfree(to_vio_dev(dev));
152}
153
154static ssize_t
155show_pciobppath_attr(struct device *dev, struct device_attribute *attr,
156 char *buf)
157{
158 struct vio_dev *vdev;
159 struct device_node *dp;
160
161 vdev = to_vio_dev(dev);
162 dp = vdev->dp;
163
164 return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
165}
166
167static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH,
168 show_pciobppath_attr, NULL);
169
170struct device_node *cdev_node;
171
172static struct vio_dev *root_vdev;
173static u64 cdev_cfg_handle;
174
175static void vio_fill_channel_info(struct mdesc_handle *hp, u64 mp,
176 struct vio_dev *vdev)
177{
178 u64 a;
179
180 mdesc_for_each_arc(a, hp, mp, MDESC_ARC_TYPE_FWD) {
181 const u64 *chan_id;
182 const u64 *irq;
183 u64 target;
184
185 target = mdesc_arc_target(hp, a);
186
187 irq = mdesc_get_property(hp, target, "tx-ino", NULL);
188 if (irq)
189 vdev->tx_irq = sun4v_build_virq(cdev_cfg_handle, *irq);
190
191 irq = mdesc_get_property(hp, target, "rx-ino", NULL);
192 if (irq)
193 vdev->rx_irq = sun4v_build_virq(cdev_cfg_handle, *irq);
194
195 chan_id = mdesc_get_property(hp, target, "id", NULL);
196 if (chan_id)
197 vdev->channel_id = *chan_id;
198 }
199}
200
201static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp,
202 struct device *parent)
203{
204 const char *type, *compat;
205 struct device_node *dp;
206 struct vio_dev *vdev;
207 int err, tlen, clen;
208
209 type = mdesc_get_property(hp, mp, "device-type", &tlen);
210 if (!type) {
211 type = mdesc_get_property(hp, mp, "name", &tlen);
212 if (!type) {
213 type = mdesc_node_name(hp, mp);
214 tlen = strlen(type) + 1;
215 }
216 }
217 if (tlen > VIO_MAX_TYPE_LEN) {
218 printk(KERN_ERR "VIO: Type string [%s] is too long.\n",
219 type);
220 return NULL;
221 }
222
223 compat = mdesc_get_property(hp, mp, "device-type", &clen);
224 if (!compat) {
225 clen = 0;
226 } else if (clen > VIO_MAX_COMPAT_LEN) {
227 printk(KERN_ERR "VIO: Compat len %d for [%s] is too long.\n",
228 clen, type);
229 return NULL;
230 }
231
232 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
233 if (!vdev) {
234 printk(KERN_ERR "VIO: Could not allocate vio_dev\n");
235 return NULL;
236 }
237
238 vdev->mp = mp;
239 memcpy(vdev->type, type, tlen);
240 if (compat)
241 memcpy(vdev->compat, compat, clen);
242 else
243 memset(vdev->compat, 0, sizeof(vdev->compat));
244 vdev->compat_len = clen;
245
246 vdev->channel_id = ~0UL;
247 vdev->tx_irq = ~0;
248 vdev->rx_irq = ~0;
249
250 vio_fill_channel_info(hp, mp, vdev);
251
252 snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%lx", mp);
253 vdev->dev.parent = parent;
254 vdev->dev.bus = &vio_bus_type;
255 vdev->dev.release = vio_dev_release;
256
257 if (parent == NULL) {
258 dp = cdev_node;
259 } else if (to_vio_dev(parent) == root_vdev) {
260 dp = of_get_next_child(cdev_node, NULL);
261 while (dp) {
262 if (!strcmp(dp->type, type))
263 break;
264
265 dp = of_get_next_child(cdev_node, dp);
266 }
267 } else {
268 dp = to_vio_dev(parent)->dp;
269 }
270 vdev->dp = dp;
271
272 err = device_register(&vdev->dev);
273 if (err) {
274 printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
275 vdev->dev.bus_id, err);
276 kfree(vdev);
277 return NULL;
278 }
279 if (vdev->dp)
280 err = sysfs_create_file(&vdev->dev.kobj,
281 &dev_attr_obppath.attr);
282
283 return vdev;
284}
285
286static void walk_tree(struct mdesc_handle *hp, u64 n, struct vio_dev *parent)
287{
288 u64 a;
289
290 mdesc_for_each_arc(a, hp, n, MDESC_ARC_TYPE_FWD) {
291 struct vio_dev *vdev;
292 u64 target;
293
294 target = mdesc_arc_target(hp, a);
295 vdev = vio_create_one(hp, target, &parent->dev);
296 if (vdev)
297 walk_tree(hp, target, vdev);
298 }
299}
300
301static void create_devices(struct mdesc_handle *hp, u64 root)
302{
303 u64 mp;
304
305 root_vdev = vio_create_one(hp, root, NULL);
306 if (!root_vdev) {
307 printk(KERN_ERR "VIO: Coult not create root device.\n");
308 return;
309 }
310
311 walk_tree(hp, root, root_vdev);
312
313 /* Domain services is odd as it doesn't sit underneath the
314 * channel-devices node, so we plug it in manually.
315 */
316 mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "domain-services");
317 if (mp != MDESC_NODE_NULL) {
318 struct vio_dev *parent = vio_create_one(hp, mp,
319 &root_vdev->dev);
320
321 if (parent)
322 walk_tree(hp, mp, parent);
323 }
324}
325
326const char *channel_devices_node = "channel-devices";
327const char *channel_devices_compat = "SUNW,sun4v-channel-devices";
328const char *cfg_handle_prop = "cfg-handle";
329
330static int __init vio_init(void)
331{
332 struct mdesc_handle *hp;
333 const char *compat;
334 const u64 *cfg_handle;
335 int err, len;
336 u64 root;
337
338 err = bus_register(&vio_bus_type);
339 if (err) {
340 printk(KERN_ERR "VIO: Could not register bus type err=%d\n",
341 err);
342 return err;
343 }
344
345 hp = mdesc_grab();
346 if (!hp)
347 return 0;
348
349 root = mdesc_node_by_name(hp, MDESC_NODE_NULL, channel_devices_node);
350 if (root == MDESC_NODE_NULL) {
351 printk(KERN_INFO "VIO: No channel-devices MDESC node.\n");
352 mdesc_release(hp);
353 return 0;
354 }
355
356 cdev_node = of_find_node_by_name(NULL, "channel-devices");
357 err = -ENODEV;
358 if (!cdev_node) {
359 printk(KERN_INFO "VIO: No channel-devices OBP node.\n");
360 goto out_release;
361 }
362
363 compat = mdesc_get_property(hp, root, "compatible", &len);
364 if (!compat) {
365 printk(KERN_ERR "VIO: Channel devices lacks compatible "
366 "property\n");
367 goto out_release;
368 }
369 if (!find_in_proplist(compat, channel_devices_compat, len)) {
370 printk(KERN_ERR "VIO: Channel devices node lacks (%s) "
371 "compat entry.\n", channel_devices_compat);
372 goto out_release;
373 }
374
375 cfg_handle = mdesc_get_property(hp, root, cfg_handle_prop, NULL);
376 if (!cfg_handle) {
377 printk(KERN_ERR "VIO: Channel devices lacks %s property\n",
378 cfg_handle_prop);
379 goto out_release;
380 }
381
382 cdev_cfg_handle = *cfg_handle;
383
384 create_devices(hp, root);
385
386 mdesc_release(hp);
387
388 return 0;
389
390out_release:
391 mdesc_release(hp);
392 return err;
393}
394
395postcore_initcall(vio_init);
diff --git a/arch/sparc64/kernel/viohs.c b/arch/sparc64/kernel/viohs.c
new file mode 100644
index 000000000000..15613add45d1
--- /dev/null
+++ b/arch/sparc64/kernel/viohs.c
@@ -0,0 +1,792 @@
1/* viohs.c: LDOM Virtual I/O handshake helper layer.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/string.h>
9#include <linux/delay.h>
10#include <linux/sched.h>
11#include <linux/slab.h>
12
13#include <asm/ldc.h>
14#include <asm/vio.h>
15
16int vio_ldc_send(struct vio_driver_state *vio, void *data, int len)
17{
18 int err, limit = 1000;
19
20 err = -EINVAL;
21 while (limit-- > 0) {
22 err = ldc_write(vio->lp, data, len);
23 if (!err || (err != -EAGAIN))
24 break;
25 udelay(1);
26 }
27
28 return err;
29}
30EXPORT_SYMBOL(vio_ldc_send);
31
32static int send_ctrl(struct vio_driver_state *vio,
33 struct vio_msg_tag *tag, int len)
34{
35 tag->sid = vio_send_sid(vio);
36 return vio_ldc_send(vio, tag, len);
37}
38
39static void init_tag(struct vio_msg_tag *tag, u8 type, u8 stype, u16 stype_env)
40{
41 tag->type = type;
42 tag->stype = stype;
43 tag->stype_env = stype_env;
44}
45
46static int send_version(struct vio_driver_state *vio, u16 major, u16 minor)
47{
48 struct vio_ver_info pkt;
49
50 vio->_local_sid = (u32) sched_clock();
51
52 memset(&pkt, 0, sizeof(pkt));
53 init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_VER_INFO);
54 pkt.major = major;
55 pkt.minor = minor;
56 pkt.dev_class = vio->dev_class;
57
58 viodbg(HS, "SEND VERSION INFO maj[%u] min[%u] devclass[%u]\n",
59 major, minor, vio->dev_class);
60
61 return send_ctrl(vio, &pkt.tag, sizeof(pkt));
62}
63
64static int start_handshake(struct vio_driver_state *vio)
65{
66 int err;
67
68 viodbg(HS, "START HANDSHAKE\n");
69
70 vio->hs_state = VIO_HS_INVALID;
71
72 err = send_version(vio,
73 vio->ver_table[0].major,
74 vio->ver_table[0].minor);
75 if (err < 0)
76 return err;
77
78 return 0;
79}
80
81void vio_link_state_change(struct vio_driver_state *vio, int event)
82{
83 if (event == LDC_EVENT_UP) {
84 vio->hs_state = VIO_HS_INVALID;
85
86 switch (vio->dev_class) {
87 case VDEV_NETWORK:
88 case VDEV_NETWORK_SWITCH:
89 vio->dr_state = (VIO_DR_STATE_TXREQ |
90 VIO_DR_STATE_RXREQ);
91 break;
92
93 case VDEV_DISK:
94 vio->dr_state = VIO_DR_STATE_TXREQ;
95 break;
96 case VDEV_DISK_SERVER:
97 vio->dr_state = VIO_DR_STATE_RXREQ;
98 break;
99 }
100 start_handshake(vio);
101 }
102}
103EXPORT_SYMBOL(vio_link_state_change);
104
105static int handshake_failure(struct vio_driver_state *vio)
106{
107 struct vio_dring_state *dr;
108
109 /* XXX Put policy here... Perhaps start a timer to fire
110 * XXX in 100 ms, which will bring the link up and retry
111 * XXX the handshake.
112 */
113
114 viodbg(HS, "HANDSHAKE FAILURE\n");
115
116 vio->dr_state &= ~(VIO_DR_STATE_TXREG |
117 VIO_DR_STATE_RXREG);
118
119 dr = &vio->drings[VIO_DRIVER_RX_RING];
120 memset(dr, 0, sizeof(*dr));
121
122 kfree(vio->desc_buf);
123 vio->desc_buf = NULL;
124 vio->desc_buf_len = 0;
125
126 vio->hs_state = VIO_HS_INVALID;
127
128 return -ECONNRESET;
129}
130
131static int process_unknown(struct vio_driver_state *vio, void *arg)
132{
133 struct vio_msg_tag *pkt = arg;
134
135 viodbg(HS, "UNKNOWN CONTROL [%02x:%02x:%04x:%08x]\n",
136 pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
137
138 printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n",
139 vio->vdev->channel_id);
140
141 ldc_disconnect(vio->lp);
142
143 return -ECONNRESET;
144}
145
146static int send_dreg(struct vio_driver_state *vio)
147{
148 struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_TX_RING];
149 union {
150 struct vio_dring_register pkt;
151 char all[sizeof(struct vio_dring_register) +
152 (sizeof(struct ldc_trans_cookie) *
153 dr->ncookies)];
154 } u;
155 int i;
156
157 memset(&u, 0, sizeof(u));
158 init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG);
159 u.pkt.dring_ident = 0;
160 u.pkt.num_descr = dr->num_entries;
161 u.pkt.descr_size = dr->entry_size;
162 u.pkt.options = VIO_TX_DRING;
163 u.pkt.num_cookies = dr->ncookies;
164
165 viodbg(HS, "SEND DRING_REG INFO ndesc[%u] dsz[%u] opt[0x%x] "
166 "ncookies[%u]\n",
167 u.pkt.num_descr, u.pkt.descr_size, u.pkt.options,
168 u.pkt.num_cookies);
169
170 for (i = 0; i < dr->ncookies; i++) {
171 u.pkt.cookies[i] = dr->cookies[i];
172
173 viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
174 i,
175 (unsigned long long) u.pkt.cookies[i].cookie_addr,
176 (unsigned long long) u.pkt.cookies[i].cookie_size);
177 }
178
179 return send_ctrl(vio, &u.pkt.tag, sizeof(u));
180}
181
182static int send_rdx(struct vio_driver_state *vio)
183{
184 struct vio_rdx pkt;
185
186 memset(&pkt, 0, sizeof(pkt));
187
188 init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX);
189
190 viodbg(HS, "SEND RDX INFO\n");
191
192 return send_ctrl(vio, &pkt.tag, sizeof(pkt));
193}
194
195static int send_attr(struct vio_driver_state *vio)
196{
197 return vio->ops->send_attr(vio);
198}
199
200static struct vio_version *find_by_major(struct vio_driver_state *vio,
201 u16 major)
202{
203 struct vio_version *ret = NULL;
204 int i;
205
206 for (i = 0; i < vio->ver_table_entries; i++) {
207 struct vio_version *v = &vio->ver_table[i];
208 if (v->major <= major) {
209 ret = v;
210 break;
211 }
212 }
213 return ret;
214}
215
216static int process_ver_info(struct vio_driver_state *vio,
217 struct vio_ver_info *pkt)
218{
219 struct vio_version *vap;
220 int err;
221
222 viodbg(HS, "GOT VERSION INFO maj[%u] min[%u] devclass[%u]\n",
223 pkt->major, pkt->minor, pkt->dev_class);
224
225 if (vio->hs_state != VIO_HS_INVALID) {
226 /* XXX Perhaps invoke start_handshake? XXX */
227 memset(&vio->ver, 0, sizeof(vio->ver));
228 vio->hs_state = VIO_HS_INVALID;
229 }
230
231 vap = find_by_major(vio, pkt->major);
232
233 vio->_peer_sid = pkt->tag.sid;
234
235 if (!vap) {
236 pkt->tag.stype = VIO_SUBTYPE_NACK;
237 pkt->major = 0;
238 pkt->minor = 0;
239 viodbg(HS, "SEND VERSION NACK maj[0] min[0]\n");
240 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
241 } else if (vap->major != pkt->major) {
242 pkt->tag.stype = VIO_SUBTYPE_NACK;
243 pkt->major = vap->major;
244 pkt->minor = vap->minor;
245 viodbg(HS, "SEND VERSION NACK maj[%u] min[%u]\n",
246 pkt->major, pkt->minor);
247 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
248 } else {
249 struct vio_version ver = {
250 .major = pkt->major,
251 .minor = pkt->minor,
252 };
253 if (ver.minor > vap->minor)
254 ver.minor = vap->minor;
255 pkt->minor = ver.minor;
256 pkt->tag.stype = VIO_SUBTYPE_ACK;
257 viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n",
258 pkt->major, pkt->minor);
259 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
260 if (err > 0) {
261 vio->ver = ver;
262 vio->hs_state = VIO_HS_GOTVERS;
263 }
264 }
265 if (err < 0)
266 return handshake_failure(vio);
267
268 return 0;
269}
270
271static int process_ver_ack(struct vio_driver_state *vio,
272 struct vio_ver_info *pkt)
273{
274 viodbg(HS, "GOT VERSION ACK maj[%u] min[%u] devclass[%u]\n",
275 pkt->major, pkt->minor, pkt->dev_class);
276
277 if (vio->hs_state & VIO_HS_GOTVERS) {
278 if (vio->ver.major != pkt->major ||
279 vio->ver.minor != pkt->minor) {
280 pkt->tag.stype = VIO_SUBTYPE_NACK;
281 (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
282 return handshake_failure(vio);
283 }
284 } else {
285 vio->ver.major = pkt->major;
286 vio->ver.minor = pkt->minor;
287 vio->hs_state = VIO_HS_GOTVERS;
288 }
289
290 switch (vio->dev_class) {
291 case VDEV_NETWORK:
292 case VDEV_DISK:
293 if (send_attr(vio) < 0)
294 return handshake_failure(vio);
295 break;
296
297 default:
298 break;
299 }
300
301 return 0;
302}
303
304static int process_ver_nack(struct vio_driver_state *vio,
305 struct vio_ver_info *pkt)
306{
307 struct vio_version *nver;
308
309 viodbg(HS, "GOT VERSION NACK maj[%u] min[%u] devclass[%u]\n",
310 pkt->major, pkt->minor, pkt->dev_class);
311
312 if ((pkt->major == 0 && pkt->minor == 0) ||
313 !(nver = find_by_major(vio, pkt->major)))
314 return handshake_failure(vio);
315
316 if (send_version(vio, nver->major, nver->minor) < 0)
317 return handshake_failure(vio);
318
319 return 0;
320}
321
322static int process_ver(struct vio_driver_state *vio, struct vio_ver_info *pkt)
323{
324 switch (pkt->tag.stype) {
325 case VIO_SUBTYPE_INFO:
326 return process_ver_info(vio, pkt);
327
328 case VIO_SUBTYPE_ACK:
329 return process_ver_ack(vio, pkt);
330
331 case VIO_SUBTYPE_NACK:
332 return process_ver_nack(vio, pkt);
333
334 default:
335 return handshake_failure(vio);
336 };
337}
338
339static int process_attr(struct vio_driver_state *vio, void *pkt)
340{
341 int err;
342
343 if (!(vio->hs_state & VIO_HS_GOTVERS))
344 return handshake_failure(vio);
345
346 err = vio->ops->handle_attr(vio, pkt);
347 if (err < 0) {
348 return handshake_failure(vio);
349 } else {
350 vio->hs_state |= VIO_HS_GOT_ATTR;
351
352 if ((vio->dr_state & VIO_DR_STATE_TXREQ) &&
353 !(vio->hs_state & VIO_HS_SENT_DREG)) {
354 if (send_dreg(vio) < 0)
355 return handshake_failure(vio);
356
357 vio->hs_state |= VIO_HS_SENT_DREG;
358 }
359 }
360 return 0;
361}
362
363static int all_drings_registered(struct vio_driver_state *vio)
364{
365 int need_rx, need_tx;
366
367 need_rx = (vio->dr_state & VIO_DR_STATE_RXREQ);
368 need_tx = (vio->dr_state & VIO_DR_STATE_TXREQ);
369
370 if (need_rx &&
371 !(vio->dr_state & VIO_DR_STATE_RXREG))
372 return 0;
373
374 if (need_tx &&
375 !(vio->dr_state & VIO_DR_STATE_TXREG))
376 return 0;
377
378 return 1;
379}
380
381static int process_dreg_info(struct vio_driver_state *vio,
382 struct vio_dring_register *pkt)
383{
384 struct vio_dring_state *dr;
385 int i, len;
386
387 viodbg(HS, "GOT DRING_REG INFO ident[%llx] "
388 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
389 (unsigned long long) pkt->dring_ident,
390 pkt->num_descr, pkt->descr_size, pkt->options,
391 pkt->num_cookies);
392
393 if (!(vio->dr_state & VIO_DR_STATE_RXREQ))
394 goto send_nack;
395
396 if (vio->dr_state & VIO_DR_STATE_RXREG)
397 goto send_nack;
398
399 vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
400 if (!vio->desc_buf)
401 goto send_nack;
402
403 vio->desc_buf_len = pkt->descr_size;
404
405 dr = &vio->drings[VIO_DRIVER_RX_RING];
406
407 dr->num_entries = pkt->num_descr;
408 dr->entry_size = pkt->descr_size;
409 dr->ncookies = pkt->num_cookies;
410 for (i = 0; i < dr->ncookies; i++) {
411 dr->cookies[i] = pkt->cookies[i];
412
413 viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
414 i,
415 (unsigned long long)
416 pkt->cookies[i].cookie_addr,
417 (unsigned long long)
418 pkt->cookies[i].cookie_size);
419 }
420
421 pkt->tag.stype = VIO_SUBTYPE_ACK;
422 pkt->dring_ident = ++dr->ident;
423
424 viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n",
425 (unsigned long long) pkt->dring_ident);
426
427 len = (sizeof(*pkt) +
428 (dr->ncookies * sizeof(struct ldc_trans_cookie)));
429 if (send_ctrl(vio, &pkt->tag, len) < 0)
430 goto send_nack;
431
432 vio->dr_state |= VIO_DR_STATE_RXREG;
433
434 return 0;
435
436send_nack:
437 pkt->tag.stype = VIO_SUBTYPE_NACK;
438 viodbg(HS, "SEND DRING_REG NACK\n");
439 (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
440
441 return handshake_failure(vio);
442}
443
444static int process_dreg_ack(struct vio_driver_state *vio,
445 struct vio_dring_register *pkt)
446{
447 struct vio_dring_state *dr;
448
449 viodbg(HS, "GOT DRING_REG ACK ident[%llx] "
450 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
451 (unsigned long long) pkt->dring_ident,
452 pkt->num_descr, pkt->descr_size, pkt->options,
453 pkt->num_cookies);
454
455 dr = &vio->drings[VIO_DRIVER_TX_RING];
456
457 if (!(vio->dr_state & VIO_DR_STATE_TXREQ))
458 return handshake_failure(vio);
459
460 dr->ident = pkt->dring_ident;
461 vio->dr_state |= VIO_DR_STATE_TXREG;
462
463 if (all_drings_registered(vio)) {
464 if (send_rdx(vio) < 0)
465 return handshake_failure(vio);
466 vio->hs_state = VIO_HS_SENT_RDX;
467 }
468 return 0;
469}
470
471static int process_dreg_nack(struct vio_driver_state *vio,
472 struct vio_dring_register *pkt)
473{
474 viodbg(HS, "GOT DRING_REG NACK ident[%llx] "
475 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
476 (unsigned long long) pkt->dring_ident,
477 pkt->num_descr, pkt->descr_size, pkt->options,
478 pkt->num_cookies);
479
480 return handshake_failure(vio);
481}
482
483static int process_dreg(struct vio_driver_state *vio,
484 struct vio_dring_register *pkt)
485{
486 if (!(vio->hs_state & VIO_HS_GOTVERS))
487 return handshake_failure(vio);
488
489 switch (pkt->tag.stype) {
490 case VIO_SUBTYPE_INFO:
491 return process_dreg_info(vio, pkt);
492
493 case VIO_SUBTYPE_ACK:
494 return process_dreg_ack(vio, pkt);
495
496 case VIO_SUBTYPE_NACK:
497 return process_dreg_nack(vio, pkt);
498
499 default:
500 return handshake_failure(vio);
501 }
502}
503
504static int process_dunreg(struct vio_driver_state *vio,
505 struct vio_dring_unregister *pkt)
506{
507 struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING];
508
509 viodbg(HS, "GOT DRING_UNREG\n");
510
511 if (pkt->dring_ident != dr->ident)
512 return 0;
513
514 vio->dr_state &= ~VIO_DR_STATE_RXREG;
515
516 memset(dr, 0, sizeof(*dr));
517
518 kfree(vio->desc_buf);
519 vio->desc_buf = NULL;
520 vio->desc_buf_len = 0;
521
522 return 0;
523}
524
525static int process_rdx_info(struct vio_driver_state *vio, struct vio_rdx *pkt)
526{
527 viodbg(HS, "GOT RDX INFO\n");
528
529 pkt->tag.stype = VIO_SUBTYPE_ACK;
530 viodbg(HS, "SEND RDX ACK\n");
531 if (send_ctrl(vio, &pkt->tag, sizeof(*pkt)) < 0)
532 return handshake_failure(vio);
533
534 vio->hs_state |= VIO_HS_SENT_RDX_ACK;
535 return 0;
536}
537
538static int process_rdx_ack(struct vio_driver_state *vio, struct vio_rdx *pkt)
539{
540 viodbg(HS, "GOT RDX ACK\n");
541
542 if (!(vio->hs_state & VIO_HS_SENT_RDX))
543 return handshake_failure(vio);
544
545 vio->hs_state |= VIO_HS_GOT_RDX_ACK;
546 return 0;
547}
548
549static int process_rdx_nack(struct vio_driver_state *vio, struct vio_rdx *pkt)
550{
551 viodbg(HS, "GOT RDX NACK\n");
552
553 return handshake_failure(vio);
554}
555
556static int process_rdx(struct vio_driver_state *vio, struct vio_rdx *pkt)
557{
558 if (!all_drings_registered(vio))
559 handshake_failure(vio);
560
561 switch (pkt->tag.stype) {
562 case VIO_SUBTYPE_INFO:
563 return process_rdx_info(vio, pkt);
564
565 case VIO_SUBTYPE_ACK:
566 return process_rdx_ack(vio, pkt);
567
568 case VIO_SUBTYPE_NACK:
569 return process_rdx_nack(vio, pkt);
570
571 default:
572 return handshake_failure(vio);
573 }
574}
575
576int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt)
577{
578 struct vio_msg_tag *tag = pkt;
579 u8 prev_state = vio->hs_state;
580 int err;
581
582 switch (tag->stype_env) {
583 case VIO_VER_INFO:
584 err = process_ver(vio, pkt);
585 break;
586
587 case VIO_ATTR_INFO:
588 err = process_attr(vio, pkt);
589 break;
590
591 case VIO_DRING_REG:
592 err = process_dreg(vio, pkt);
593 break;
594
595 case VIO_DRING_UNREG:
596 err = process_dunreg(vio, pkt);
597 break;
598
599 case VIO_RDX:
600 err = process_rdx(vio, pkt);
601 break;
602
603 default:
604 err = process_unknown(vio, pkt);
605 break;
606 }
607 if (!err &&
608 vio->hs_state != prev_state &&
609 (vio->hs_state & VIO_HS_COMPLETE))
610 vio->ops->handshake_complete(vio);
611
612 return err;
613}
614EXPORT_SYMBOL(vio_control_pkt_engine);
615
616void vio_conn_reset(struct vio_driver_state *vio)
617{
618}
619EXPORT_SYMBOL(vio_conn_reset);
620
621/* The issue is that the Solaris virtual disk server just mirrors the
622 * SID values it gets from the client peer. So we work around that
623 * here in vio_{validate,send}_sid() so that the drivers don't need
624 * to be aware of this crap.
625 */
626int vio_validate_sid(struct vio_driver_state *vio, struct vio_msg_tag *tp)
627{
628 u32 sid;
629
630 /* Always let VERSION+INFO packets through unchecked, they
631 * define the new SID.
632 */
633 if (tp->type == VIO_TYPE_CTRL &&
634 tp->stype == VIO_SUBTYPE_INFO &&
635 tp->stype_env == VIO_VER_INFO)
636 return 0;
637
638 /* Ok, now figure out which SID to use. */
639 switch (vio->dev_class) {
640 case VDEV_NETWORK:
641 case VDEV_NETWORK_SWITCH:
642 case VDEV_DISK_SERVER:
643 default:
644 sid = vio->_peer_sid;
645 break;
646
647 case VDEV_DISK:
648 sid = vio->_local_sid;
649 break;
650 }
651
652 if (sid == tp->sid)
653 return 0;
654 viodbg(DATA, "BAD SID tag->sid[%08x] peer_sid[%08x] local_sid[%08x]\n",
655 tp->sid, vio->_peer_sid, vio->_local_sid);
656 return -EINVAL;
657}
658EXPORT_SYMBOL(vio_validate_sid);
659
660u32 vio_send_sid(struct vio_driver_state *vio)
661{
662 switch (vio->dev_class) {
663 case VDEV_NETWORK:
664 case VDEV_NETWORK_SWITCH:
665 case VDEV_DISK:
666 default:
667 return vio->_local_sid;
668
669 case VDEV_DISK_SERVER:
670 return vio->_peer_sid;
671 }
672}
673EXPORT_SYMBOL(vio_send_sid);
674
675extern int vio_ldc_alloc(struct vio_driver_state *vio,
676 struct ldc_channel_config *base_cfg,
677 void *event_arg)
678{
679 struct ldc_channel_config cfg = *base_cfg;
680 struct ldc_channel *lp;
681
682 cfg.tx_irq = vio->vdev->tx_irq;
683 cfg.rx_irq = vio->vdev->rx_irq;
684
685 lp = ldc_alloc(vio->vdev->channel_id, &cfg, event_arg);
686 if (IS_ERR(lp))
687 return PTR_ERR(lp);
688
689 vio->lp = lp;
690
691 return 0;
692}
693EXPORT_SYMBOL(vio_ldc_alloc);
694
695void vio_ldc_free(struct vio_driver_state *vio)
696{
697 ldc_free(vio->lp);
698 vio->lp = NULL;
699
700 kfree(vio->desc_buf);
701 vio->desc_buf = NULL;
702 vio->desc_buf_len = 0;
703}
704EXPORT_SYMBOL(vio_ldc_free);
705
706void vio_port_up(struct vio_driver_state *vio)
707{
708 unsigned long flags;
709 int err, state;
710
711 spin_lock_irqsave(&vio->lock, flags);
712
713 state = ldc_state(vio->lp);
714
715 err = 0;
716 if (state == LDC_STATE_INIT) {
717 err = ldc_bind(vio->lp, vio->name);
718 if (err)
719 printk(KERN_WARNING "%s: Port %lu bind failed, "
720 "err=%d\n",
721 vio->name, vio->vdev->channel_id, err);
722 }
723
724 if (!err) {
725 err = ldc_connect(vio->lp);
726 if (err)
727 printk(KERN_WARNING "%s: Port %lu connect failed, "
728 "err=%d\n",
729 vio->name, vio->vdev->channel_id, err);
730 }
731 if (err) {
732 unsigned long expires = jiffies + HZ;
733
734 expires = round_jiffies(expires);
735 mod_timer(&vio->timer, expires);
736 }
737
738 spin_unlock_irqrestore(&vio->lock, flags);
739}
740EXPORT_SYMBOL(vio_port_up);
741
742static void vio_port_timer(unsigned long _arg)
743{
744 struct vio_driver_state *vio = (struct vio_driver_state *) _arg;
745
746 vio_port_up(vio);
747}
748
749int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
750 u8 dev_class, struct vio_version *ver_table,
751 int ver_table_size, struct vio_driver_ops *ops,
752 char *name)
753{
754 switch (dev_class) {
755 case VDEV_NETWORK:
756 case VDEV_NETWORK_SWITCH:
757 case VDEV_DISK:
758 case VDEV_DISK_SERVER:
759 break;
760
761 default:
762 return -EINVAL;
763 }
764
765 if (!ops->send_attr ||
766 !ops->handle_attr ||
767 !ops->handshake_complete)
768 return -EINVAL;
769
770 if (!ver_table || ver_table_size < 0)
771 return -EINVAL;
772
773 if (!name)
774 return -EINVAL;
775
776 spin_lock_init(&vio->lock);
777
778 vio->name = name;
779
780 vio->dev_class = dev_class;
781 vio->vdev = vdev;
782
783 vio->ver_table = ver_table;
784 vio->ver_table_entries = ver_table_size;
785
786 vio->ops = ops;
787
788 setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio);
789
790 return 0;
791}
792EXPORT_SYMBOL(vio_driver_init);
diff --git a/arch/sparc64/lib/Makefile b/arch/sparc64/lib/Makefile
index 4a725d8985f1..c4a6d6e7d03c 100644
--- a/arch/sparc64/lib/Makefile
+++ b/arch/sparc64/lib/Makefile
@@ -14,6 +14,6 @@ lib-y := PeeCeeI.o copy_page.o clear_page.o strlen.o strncmp.o \
14 NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \ 14 NGmemcpy.o NGcopy_from_user.o NGcopy_to_user.o NGpatch.o \
15 NGpage.o NGbzero.o \ 15 NGpage.o NGbzero.o \
16 copy_in_user.o user_fixup.o memmove.o \ 16 copy_in_user.o user_fixup.o memmove.o \
17 mcount.o ipcsum.o rwsem.o xor.o delay.o 17 mcount.o ipcsum.o rwsem.o xor.o
18 18
19obj-y += iomap.o 19obj-y += iomap.o
diff --git a/arch/sparc64/lib/delay.c b/arch/sparc64/lib/delay.c
deleted file mode 100644
index fb27e54a03ee..000000000000
--- a/arch/sparc64/lib/delay.c
+++ /dev/null
@@ -1,46 +0,0 @@
1/* delay.c: Delay loops for sparc64
2 *
3 * Copyright (C) 2004, 2006 David S. Miller <davem@davemloft.net>
4 *
5 * Based heavily upon x86 variant which is:
6 * Copyright (C) 1993 Linus Torvalds
7 * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
8 */
9
10#include <linux/delay.h>
11#include <asm/timer.h>
12
13void __delay(unsigned long loops)
14{
15 unsigned long bclock, now;
16
17 bclock = tick_ops->get_tick();
18 do {
19 now = tick_ops->get_tick();
20 } while ((now-bclock) < loops);
21}
22
23/* We used to multiply by HZ after shifting down by 32 bits
24 * but that runs into problems for higher values of HZ and
25 * slow cpus.
26 */
27void __const_udelay(unsigned long n)
28{
29 n *= 4;
30
31 n *= (cpu_data(raw_smp_processor_id()).udelay_val * (HZ/4));
32 n >>= 32;
33
34 __delay(n + 1);
35}
36
37void __udelay(unsigned long n)
38{
39 __const_udelay(n * 0x10c7UL);
40}
41
42
43void __ndelay(unsigned long n)
44{
45 __const_udelay(n * 0x5UL);
46}
diff --git a/arch/sparc64/prom/misc.c b/arch/sparc64/prom/misc.c
index f3e0c14e9eef..33c5b7da31e5 100644
--- a/arch/sparc64/prom/misc.c
+++ b/arch/sparc64/prom/misc.c
@@ -14,6 +14,7 @@
14#include <asm/openprom.h> 14#include <asm/openprom.h>
15#include <asm/oplib.h> 15#include <asm/oplib.h>
16#include <asm/system.h> 16#include <asm/system.h>
17#include <asm/ldc.h>
17 18
18int prom_service_exists(const char *service_name) 19int prom_service_exists(const char *service_name)
19{ 20{
@@ -37,6 +38,10 @@ void prom_sun4v_guest_soft_state(void)
37/* Reset and reboot the machine with the command 'bcommand'. */ 38/* Reset and reboot the machine with the command 'bcommand'. */
38void prom_reboot(const char *bcommand) 39void prom_reboot(const char *bcommand)
39{ 40{
41#ifdef CONFIG_SUN_LDOMS
42 if (ldom_domaining_enabled)
43 ldom_reboot(bcommand);
44#endif
40 p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) | 45 p1275_cmd("boot", P1275_ARG(0, P1275_ARG_IN_STRING) |
41 P1275_INOUT(1, 0), bcommand); 46 P1275_INOUT(1, 0), bcommand);
42} 47}
@@ -91,6 +96,10 @@ void prom_cmdline(void)
91 */ 96 */
92void prom_halt(void) 97void prom_halt(void)
93{ 98{
99#ifdef CONFIG_SUN_LDOMS
100 if (ldom_domaining_enabled)
101 ldom_power_off();
102#endif
94again: 103again:
95 p1275_cmd("exit", P1275_INOUT(0, 0)); 104 p1275_cmd("exit", P1275_INOUT(0, 0));
96 goto again; /* PROM is out to get me -DaveM */ 105 goto again; /* PROM is out to get me -DaveM */
@@ -98,6 +107,10 @@ again:
98 107
99void prom_halt_power_off(void) 108void prom_halt_power_off(void)
100{ 109{
110#ifdef CONFIG_SUN_LDOMS
111 if (ldom_domaining_enabled)
112 ldom_power_off();
113#endif
101 p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0)); 114 p1275_cmd("SUNW,power-off", P1275_INOUT(0, 0));
102 115
103 /* if nothing else helps, we just halt */ 116 /* if nothing else helps, we just halt */
diff --git a/arch/sparc64/prom/p1275.c b/arch/sparc64/prom/p1275.c
index 2b32c489860c..7fcccc0e19cf 100644
--- a/arch/sparc64/prom/p1275.c
+++ b/arch/sparc64/prom/p1275.c
@@ -16,6 +16,7 @@
16#include <asm/system.h> 16#include <asm/system.h>
17#include <asm/spitfire.h> 17#include <asm/spitfire.h>
18#include <asm/pstate.h> 18#include <asm/pstate.h>
19#include <asm/ldc.h>
19 20
20struct { 21struct {
21 long prom_callback; /* 0x00 */ 22 long prom_callback; /* 0x00 */
diff --git a/arch/sparc64/prom/tree.c b/arch/sparc64/prom/tree.c
index 500f05e2cfcb..17b7ecfe7ca9 100644
--- a/arch/sparc64/prom/tree.c
+++ b/arch/sparc64/prom/tree.c
@@ -13,6 +13,7 @@
13 13
14#include <asm/openprom.h> 14#include <asm/openprom.h>
15#include <asm/oplib.h> 15#include <asm/oplib.h>
16#include <asm/ldc.h>
16 17
17/* Return the child of node 'node' or zero if no this node has no 18/* Return the child of node 'node' or zero if no this node has no
18 * direct descendent. 19 * direct descendent.
@@ -261,9 +262,17 @@ int prom_node_has_property(int node, const char *prop)
261int 262int
262prom_setprop(int node, const char *pname, char *value, int size) 263prom_setprop(int node, const char *pname, char *value, int size)
263{ 264{
264 if(size == 0) return 0; 265 if (size == 0)
265 if((pname == 0) || (value == 0)) return 0; 266 return 0;
267 if ((pname == 0) || (value == 0))
268 return 0;
266 269
270#ifdef CONFIG_SUN_LDOMS
271 if (ldom_domaining_enabled) {
272 ldom_set_var(pname, value);
273 return 0;
274 }
275#endif
267 return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)| 276 return p1275_cmd ("setprop", P1275_ARG(1,P1275_ARG_IN_STRING)|
268 P1275_ARG(2,P1275_ARG_IN_BUF)| 277 P1275_ARG(2,P1275_ARG_IN_BUF)|
269 P1275_INOUT(4, 1), 278 P1275_INOUT(4, 1),
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index fd7a53bdcb63..e49162b15578 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -423,6 +423,13 @@ config ATA_OVER_ETH
423 This driver provides Support for ATA over Ethernet block 423 This driver provides Support for ATA over Ethernet block
424 devices like the Coraid EtherDrive (R) Storage Blade. 424 devices like the Coraid EtherDrive (R) Storage Blade.
425 425
426config SUNVDC
427 tristate "Sun Virtual Disk Client support"
428 depends on SUN_LDOMS
429 help
430 Support for virtual disk devices as a client under Sun
431 Logical Domains.
432
426source "drivers/s390/block/Kconfig" 433source "drivers/s390/block/Kconfig"
427 434
428endif # BLK_DEV 435endif # BLK_DEV
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index e5f98acc5d52..43371c59623e 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_BLK_CPQ_DA) += cpqarray.o
19obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o 19obj-$(CONFIG_BLK_CPQ_CISS_DA) += cciss.o
20obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o 20obj-$(CONFIG_BLK_DEV_DAC960) += DAC960.o
21obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o 21obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o
22obj-$(CONFIG_SUNVDC) += sunvdc.o
22 23
23obj-$(CONFIG_BLK_DEV_UMEM) += umem.o 24obj-$(CONFIG_BLK_DEV_UMEM) += umem.o
24obj-$(CONFIG_BLK_DEV_NBD) += nbd.o 25obj-$(CONFIG_BLK_DEV_NBD) += nbd.o
diff --git a/drivers/block/sunvdc.c b/drivers/block/sunvdc.c
new file mode 100644
index 000000000000..0f5e3caf85d7
--- /dev/null
+++ b/drivers/block/sunvdc.c
@@ -0,0 +1,972 @@
1/* sunvdc.c: Sun LDOM Virtual Disk Client.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/module.h>
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/blkdev.h>
10#include <linux/hdreg.h>
11#include <linux/genhd.h>
12#include <linux/slab.h>
13#include <linux/spinlock.h>
14#include <linux/completion.h>
15#include <linux/delay.h>
16#include <linux/init.h>
17#include <linux/list.h>
18
19#include <asm/vio.h>
20#include <asm/ldc.h>
21
22#define DRV_MODULE_NAME "sunvdc"
23#define PFX DRV_MODULE_NAME ": "
24#define DRV_MODULE_VERSION "1.0"
25#define DRV_MODULE_RELDATE "June 25, 2007"
26
27static char version[] __devinitdata =
28 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
29MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
30MODULE_DESCRIPTION("Sun LDOM virtual disk client driver");
31MODULE_LICENSE("GPL");
32MODULE_VERSION(DRV_MODULE_VERSION);
33
34#define VDC_TX_RING_SIZE 256
35
36#define WAITING_FOR_LINK_UP 0x01
37#define WAITING_FOR_TX_SPACE 0x02
38#define WAITING_FOR_GEN_CMD 0x04
39#define WAITING_FOR_ANY -1
40
41struct vdc_req_entry {
42 struct request *req;
43};
44
45struct vdc_port {
46 struct vio_driver_state vio;
47
48 struct vdc *vp;
49
50 struct gendisk *disk;
51
52 struct vdc_completion *cmp;
53
54 u64 req_id;
55 u64 seq;
56 struct vdc_req_entry rq_arr[VDC_TX_RING_SIZE];
57
58 unsigned long ring_cookies;
59
60 u64 max_xfer_size;
61 u32 vdisk_block_size;
62
63 /* The server fills these in for us in the disk attribute
64 * ACK packet.
65 */
66 u64 operations;
67 u32 vdisk_size;
68 u8 vdisk_type;
69 u8 dev_no;
70
71 char disk_name[32];
72
73 struct vio_disk_geom geom;
74 struct vio_disk_vtoc label;
75
76 struct list_head list;
77};
78
79static inline struct vdc_port *to_vdc_port(struct vio_driver_state *vio)
80{
81 return container_of(vio, struct vdc_port, vio);
82}
83
84struct vdc {
85 /* Protects prot_list. */
86 spinlock_t lock;
87
88 struct vio_dev *dev;
89
90 struct list_head port_list;
91};
92
93/* Ordered from largest major to lowest */
94static struct vio_version vdc_versions[] = {
95 { .major = 1, .minor = 0 },
96};
97
98#define VDCBLK_NAME "vdisk"
99static int vdc_major;
100#define PARTITION_SHIFT 3
101
102static inline u32 vdc_tx_dring_avail(struct vio_dring_state *dr)
103{
104 return vio_dring_avail(dr, VDC_TX_RING_SIZE);
105}
106
107static int vdc_getgeo(struct block_device *bdev, struct hd_geometry *geo)
108{
109 struct gendisk *disk = bdev->bd_disk;
110 struct vdc_port *port = disk->private_data;
111
112 geo->heads = (u8) port->geom.num_hd;
113 geo->sectors = (u8) port->geom.num_sec;
114 geo->cylinders = port->geom.num_cyl;
115
116 return 0;
117}
118
119static struct block_device_operations vdc_fops = {
120 .owner = THIS_MODULE,
121 .getgeo = vdc_getgeo,
122};
123
124static void vdc_finish(struct vio_driver_state *vio, int err, int waiting_for)
125{
126 if (vio->cmp &&
127 (waiting_for == -1 ||
128 vio->cmp->waiting_for == waiting_for)) {
129 vio->cmp->err = err;
130 complete(&vio->cmp->com);
131 vio->cmp = NULL;
132 }
133}
134
135static void vdc_handshake_complete(struct vio_driver_state *vio)
136{
137 vdc_finish(vio, 0, WAITING_FOR_LINK_UP);
138}
139
140static int vdc_handle_unknown(struct vdc_port *port, void *arg)
141{
142 struct vio_msg_tag *pkt = arg;
143
144 printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
145 pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
146 printk(KERN_ERR PFX "Resetting connection.\n");
147
148 ldc_disconnect(port->vio.lp);
149
150 return -ECONNRESET;
151}
152
153static int vdc_send_attr(struct vio_driver_state *vio)
154{
155 struct vdc_port *port = to_vdc_port(vio);
156 struct vio_disk_attr_info pkt;
157
158 memset(&pkt, 0, sizeof(pkt));
159
160 pkt.tag.type = VIO_TYPE_CTRL;
161 pkt.tag.stype = VIO_SUBTYPE_INFO;
162 pkt.tag.stype_env = VIO_ATTR_INFO;
163 pkt.tag.sid = vio_send_sid(vio);
164
165 pkt.xfer_mode = VIO_DRING_MODE;
166 pkt.vdisk_block_size = port->vdisk_block_size;
167 pkt.max_xfer_size = port->max_xfer_size;
168
169 viodbg(HS, "SEND ATTR xfer_mode[0x%x] blksz[%u] max_xfer[%lu]\n",
170 pkt.xfer_mode, pkt.vdisk_block_size, pkt.max_xfer_size);
171
172 return vio_ldc_send(&port->vio, &pkt, sizeof(pkt));
173}
174
175static int vdc_handle_attr(struct vio_driver_state *vio, void *arg)
176{
177 struct vdc_port *port = to_vdc_port(vio);
178 struct vio_disk_attr_info *pkt = arg;
179
180 viodbg(HS, "GOT ATTR stype[0x%x] ops[%lx] disk_size[%lu] disk_type[%x] "
181 "xfer_mode[0x%x] blksz[%u] max_xfer[%lu]\n",
182 pkt->tag.stype, pkt->operations,
183 pkt->vdisk_size, pkt->vdisk_type,
184 pkt->xfer_mode, pkt->vdisk_block_size,
185 pkt->max_xfer_size);
186
187 if (pkt->tag.stype == VIO_SUBTYPE_ACK) {
188 switch (pkt->vdisk_type) {
189 case VD_DISK_TYPE_DISK:
190 case VD_DISK_TYPE_SLICE:
191 break;
192
193 default:
194 printk(KERN_ERR PFX "%s: Bogus vdisk_type 0x%x\n",
195 vio->name, pkt->vdisk_type);
196 return -ECONNRESET;
197 }
198
199 if (pkt->vdisk_block_size > port->vdisk_block_size) {
200 printk(KERN_ERR PFX "%s: BLOCK size increased "
201 "%u --> %u\n",
202 vio->name,
203 port->vdisk_block_size, pkt->vdisk_block_size);
204 return -ECONNRESET;
205 }
206
207 port->operations = pkt->operations;
208 port->vdisk_size = pkt->vdisk_size;
209 port->vdisk_type = pkt->vdisk_type;
210 if (pkt->max_xfer_size < port->max_xfer_size)
211 port->max_xfer_size = pkt->max_xfer_size;
212 port->vdisk_block_size = pkt->vdisk_block_size;
213 return 0;
214 } else {
215 printk(KERN_ERR PFX "%s: Attribute NACK\n", vio->name);
216
217 return -ECONNRESET;
218 }
219}
220
221static void vdc_end_special(struct vdc_port *port, struct vio_disk_desc *desc)
222{
223 int err = desc->status;
224
225 vdc_finish(&port->vio, -err, WAITING_FOR_GEN_CMD);
226}
227
228static void vdc_end_request(struct request *req, int uptodate, int num_sectors)
229{
230 if (end_that_request_first(req, uptodate, num_sectors))
231 return;
232 add_disk_randomness(req->rq_disk);
233 end_that_request_last(req, uptodate);
234}
235
236static void vdc_end_one(struct vdc_port *port, struct vio_dring_state *dr,
237 unsigned int index)
238{
239 struct vio_disk_desc *desc = vio_dring_entry(dr, index);
240 struct vdc_req_entry *rqe = &port->rq_arr[index];
241 struct request *req;
242
243 if (unlikely(desc->hdr.state != VIO_DESC_DONE))
244 return;
245
246 ldc_unmap(port->vio.lp, desc->cookies, desc->ncookies);
247 desc->hdr.state = VIO_DESC_FREE;
248 dr->cons = (index + 1) & (VDC_TX_RING_SIZE - 1);
249
250 req = rqe->req;
251 if (req == NULL) {
252 vdc_end_special(port, desc);
253 return;
254 }
255
256 rqe->req = NULL;
257
258 vdc_end_request(req, !desc->status, desc->size >> 9);
259
260 if (blk_queue_stopped(port->disk->queue))
261 blk_start_queue(port->disk->queue);
262}
263
264static int vdc_ack(struct vdc_port *port, void *msgbuf)
265{
266 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
267 struct vio_dring_data *pkt = msgbuf;
268
269 if (unlikely(pkt->dring_ident != dr->ident ||
270 pkt->start_idx != pkt->end_idx ||
271 pkt->start_idx >= VDC_TX_RING_SIZE))
272 return 0;
273
274 vdc_end_one(port, dr, pkt->start_idx);
275
276 return 0;
277}
278
279static int vdc_nack(struct vdc_port *port, void *msgbuf)
280{
281 /* XXX Implement me XXX */
282 return 0;
283}
284
285static void vdc_event(void *arg, int event)
286{
287 struct vdc_port *port = arg;
288 struct vio_driver_state *vio = &port->vio;
289 unsigned long flags;
290 int err;
291
292 spin_lock_irqsave(&vio->lock, flags);
293
294 if (unlikely(event == LDC_EVENT_RESET ||
295 event == LDC_EVENT_UP)) {
296 vio_link_state_change(vio, event);
297 spin_unlock_irqrestore(&vio->lock, flags);
298 return;
299 }
300
301 if (unlikely(event != LDC_EVENT_DATA_READY)) {
302 printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event);
303 spin_unlock_irqrestore(&vio->lock, flags);
304 return;
305 }
306
307 err = 0;
308 while (1) {
309 union {
310 struct vio_msg_tag tag;
311 u64 raw[8];
312 } msgbuf;
313
314 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
315 if (unlikely(err < 0)) {
316 if (err == -ECONNRESET)
317 vio_conn_reset(vio);
318 break;
319 }
320 if (err == 0)
321 break;
322 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
323 msgbuf.tag.type,
324 msgbuf.tag.stype,
325 msgbuf.tag.stype_env,
326 msgbuf.tag.sid);
327 err = vio_validate_sid(vio, &msgbuf.tag);
328 if (err < 0)
329 break;
330
331 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
332 if (msgbuf.tag.stype == VIO_SUBTYPE_ACK)
333 err = vdc_ack(port, &msgbuf);
334 else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK)
335 err = vdc_nack(port, &msgbuf);
336 else
337 err = vdc_handle_unknown(port, &msgbuf);
338 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
339 err = vio_control_pkt_engine(vio, &msgbuf);
340 } else {
341 err = vdc_handle_unknown(port, &msgbuf);
342 }
343 if (err < 0)
344 break;
345 }
346 if (err < 0)
347 vdc_finish(&port->vio, err, WAITING_FOR_ANY);
348 spin_unlock_irqrestore(&vio->lock, flags);
349}
350
351static int __vdc_tx_trigger(struct vdc_port *port)
352{
353 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
354 struct vio_dring_data hdr = {
355 .tag = {
356 .type = VIO_TYPE_DATA,
357 .stype = VIO_SUBTYPE_INFO,
358 .stype_env = VIO_DRING_DATA,
359 .sid = vio_send_sid(&port->vio),
360 },
361 .dring_ident = dr->ident,
362 .start_idx = dr->prod,
363 .end_idx = dr->prod,
364 };
365 int err, delay;
366
367 hdr.seq = dr->snd_nxt;
368 delay = 1;
369 do {
370 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
371 if (err > 0) {
372 dr->snd_nxt++;
373 break;
374 }
375 udelay(delay);
376 if ((delay <<= 1) > 128)
377 delay = 128;
378 } while (err == -EAGAIN);
379
380 return err;
381}
382
383static int __send_request(struct request *req)
384{
385 struct vdc_port *port = req->rq_disk->private_data;
386 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
387 struct scatterlist sg[port->ring_cookies];
388 struct vdc_req_entry *rqe;
389 struct vio_disk_desc *desc;
390 unsigned int map_perm;
391 int nsg, err, i;
392 u64 len;
393 u8 op;
394
395 map_perm = LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
396
397 if (rq_data_dir(req) == READ) {
398 map_perm |= LDC_MAP_W;
399 op = VD_OP_BREAD;
400 } else {
401 map_perm |= LDC_MAP_R;
402 op = VD_OP_BWRITE;
403 }
404
405 nsg = blk_rq_map_sg(req->q, req, sg);
406
407 len = 0;
408 for (i = 0; i < nsg; i++)
409 len += sg[i].length;
410
411 if (unlikely(vdc_tx_dring_avail(dr) < 1)) {
412 blk_stop_queue(port->disk->queue);
413 err = -ENOMEM;
414 goto out;
415 }
416
417 desc = vio_dring_cur(dr);
418
419 err = ldc_map_sg(port->vio.lp, sg, nsg,
420 desc->cookies, port->ring_cookies,
421 map_perm);
422 if (err < 0) {
423 printk(KERN_ERR PFX "ldc_map_sg() failure, err=%d.\n", err);
424 return err;
425 }
426
427 rqe = &port->rq_arr[dr->prod];
428 rqe->req = req;
429
430 desc->hdr.ack = VIO_ACK_ENABLE;
431 desc->req_id = port->req_id;
432 desc->operation = op;
433 if (port->vdisk_type == VD_DISK_TYPE_DISK) {
434 desc->slice = 2;
435 } else {
436 desc->slice = 0;
437 }
438 desc->status = ~0;
439 desc->offset = (req->sector << 9) / port->vdisk_block_size;
440 desc->size = len;
441 desc->ncookies = err;
442
443 /* This has to be a non-SMP write barrier because we are writing
444 * to memory which is shared with the peer LDOM.
445 */
446 wmb();
447 desc->hdr.state = VIO_DESC_READY;
448
449 err = __vdc_tx_trigger(port);
450 if (err < 0) {
451 printk(KERN_ERR PFX "vdc_tx_trigger() failure, err=%d\n", err);
452 } else {
453 port->req_id++;
454 dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
455 }
456out:
457
458 return err;
459}
460
461static void do_vdc_request(request_queue_t *q)
462{
463 while (1) {
464 struct request *req = elv_next_request(q);
465
466 if (!req)
467 break;
468
469 blkdev_dequeue_request(req);
470 if (__send_request(req) < 0)
471 vdc_end_request(req, 0, req->hard_nr_sectors);
472 }
473}
474
475static int generic_request(struct vdc_port *port, u8 op, void *buf, int len)
476{
477 struct vio_dring_state *dr;
478 struct vio_completion comp;
479 struct vio_disk_desc *desc;
480 unsigned int map_perm;
481 unsigned long flags;
482 int op_len, err;
483 void *req_buf;
484
485 if (!(((u64)1 << ((u64)op - 1)) & port->operations))
486 return -EOPNOTSUPP;
487
488 switch (op) {
489 case VD_OP_BREAD:
490 case VD_OP_BWRITE:
491 default:
492 return -EINVAL;
493
494 case VD_OP_FLUSH:
495 op_len = 0;
496 map_perm = 0;
497 break;
498
499 case VD_OP_GET_WCE:
500 op_len = sizeof(u32);
501 map_perm = LDC_MAP_W;
502 break;
503
504 case VD_OP_SET_WCE:
505 op_len = sizeof(u32);
506 map_perm = LDC_MAP_R;
507 break;
508
509 case VD_OP_GET_VTOC:
510 op_len = sizeof(struct vio_disk_vtoc);
511 map_perm = LDC_MAP_W;
512 break;
513
514 case VD_OP_SET_VTOC:
515 op_len = sizeof(struct vio_disk_vtoc);
516 map_perm = LDC_MAP_R;
517 break;
518
519 case VD_OP_GET_DISKGEOM:
520 op_len = sizeof(struct vio_disk_geom);
521 map_perm = LDC_MAP_W;
522 break;
523
524 case VD_OP_SET_DISKGEOM:
525 op_len = sizeof(struct vio_disk_geom);
526 map_perm = LDC_MAP_R;
527 break;
528
529 case VD_OP_SCSICMD:
530 op_len = 16;
531 map_perm = LDC_MAP_RW;
532 break;
533
534 case VD_OP_GET_DEVID:
535 op_len = sizeof(struct vio_disk_devid);
536 map_perm = LDC_MAP_W;
537 break;
538
539 case VD_OP_GET_EFI:
540 case VD_OP_SET_EFI:
541 return -EOPNOTSUPP;
542 break;
543 };
544
545 map_perm |= LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_IO;
546
547 op_len = (op_len + 7) & ~7;
548 req_buf = kzalloc(op_len, GFP_KERNEL);
549 if (!req_buf)
550 return -ENOMEM;
551
552 if (len > op_len)
553 len = op_len;
554
555 if (map_perm & LDC_MAP_R)
556 memcpy(req_buf, buf, len);
557
558 spin_lock_irqsave(&port->vio.lock, flags);
559
560 dr = &port->vio.drings[VIO_DRIVER_TX_RING];
561
562 /* XXX If we want to use this code generically we have to
563 * XXX handle TX ring exhaustion etc.
564 */
565 desc = vio_dring_cur(dr);
566
567 err = ldc_map_single(port->vio.lp, req_buf, op_len,
568 desc->cookies, port->ring_cookies,
569 map_perm);
570 if (err < 0) {
571 spin_unlock_irqrestore(&port->vio.lock, flags);
572 kfree(req_buf);
573 return err;
574 }
575
576 init_completion(&comp.com);
577 comp.waiting_for = WAITING_FOR_GEN_CMD;
578 port->vio.cmp = &comp;
579
580 desc->hdr.ack = VIO_ACK_ENABLE;
581 desc->req_id = port->req_id;
582 desc->operation = op;
583 desc->slice = 0;
584 desc->status = ~0;
585 desc->offset = 0;
586 desc->size = op_len;
587 desc->ncookies = err;
588
589 /* This has to be a non-SMP write barrier because we are writing
590 * to memory which is shared with the peer LDOM.
591 */
592 wmb();
593 desc->hdr.state = VIO_DESC_READY;
594
595 err = __vdc_tx_trigger(port);
596 if (err >= 0) {
597 port->req_id++;
598 dr->prod = (dr->prod + 1) & (VDC_TX_RING_SIZE - 1);
599 spin_unlock_irqrestore(&port->vio.lock, flags);
600
601 wait_for_completion(&comp.com);
602 err = comp.err;
603 } else {
604 port->vio.cmp = NULL;
605 spin_unlock_irqrestore(&port->vio.lock, flags);
606 }
607
608 if (map_perm & LDC_MAP_W)
609 memcpy(buf, req_buf, len);
610
611 kfree(req_buf);
612
613 return err;
614}
615
616static int __devinit vdc_alloc_tx_ring(struct vdc_port *port)
617{
618 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
619 unsigned long len, entry_size;
620 int ncookies;
621 void *dring;
622
623 entry_size = sizeof(struct vio_disk_desc) +
624 (sizeof(struct ldc_trans_cookie) * port->ring_cookies);
625 len = (VDC_TX_RING_SIZE * entry_size);
626
627 ncookies = VIO_MAX_RING_COOKIES;
628 dring = ldc_alloc_exp_dring(port->vio.lp, len,
629 dr->cookies, &ncookies,
630 (LDC_MAP_SHADOW |
631 LDC_MAP_DIRECT |
632 LDC_MAP_RW));
633 if (IS_ERR(dring))
634 return PTR_ERR(dring);
635
636 dr->base = dring;
637 dr->entry_size = entry_size;
638 dr->num_entries = VDC_TX_RING_SIZE;
639 dr->prod = dr->cons = 0;
640 dr->pending = VDC_TX_RING_SIZE;
641 dr->ncookies = ncookies;
642
643 return 0;
644}
645
646static void vdc_free_tx_ring(struct vdc_port *port)
647{
648 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
649
650 if (dr->base) {
651 ldc_free_exp_dring(port->vio.lp, dr->base,
652 (dr->entry_size * dr->num_entries),
653 dr->cookies, dr->ncookies);
654 dr->base = NULL;
655 dr->entry_size = 0;
656 dr->num_entries = 0;
657 dr->pending = 0;
658 dr->ncookies = 0;
659 }
660}
661
662static int probe_disk(struct vdc_port *port)
663{
664 struct vio_completion comp;
665 struct request_queue *q;
666 struct gendisk *g;
667 int err;
668
669 init_completion(&comp.com);
670 comp.err = 0;
671 comp.waiting_for = WAITING_FOR_LINK_UP;
672 port->vio.cmp = &comp;
673
674 vio_port_up(&port->vio);
675
676 wait_for_completion(&comp.com);
677 if (comp.err)
678 return comp.err;
679
680 err = generic_request(port, VD_OP_GET_VTOC,
681 &port->label, sizeof(port->label));
682 if (err < 0) {
683 printk(KERN_ERR PFX "VD_OP_GET_VTOC returns error %d\n", err);
684 return err;
685 }
686
687 err = generic_request(port, VD_OP_GET_DISKGEOM,
688 &port->geom, sizeof(port->geom));
689 if (err < 0) {
690 printk(KERN_ERR PFX "VD_OP_GET_DISKGEOM returns "
691 "error %d\n", err);
692 return err;
693 }
694
695 port->vdisk_size = ((u64)port->geom.num_cyl *
696 (u64)port->geom.num_hd *
697 (u64)port->geom.num_sec);
698
699 q = blk_init_queue(do_vdc_request, &port->vio.lock);
700 if (!q) {
701 printk(KERN_ERR PFX "%s: Could not allocate queue.\n",
702 port->vio.name);
703 return -ENOMEM;
704 }
705 g = alloc_disk(1 << PARTITION_SHIFT);
706 if (!g) {
707 printk(KERN_ERR PFX "%s: Could not allocate gendisk.\n",
708 port->vio.name);
709 blk_cleanup_queue(q);
710 return -ENOMEM;
711 }
712
713 port->disk = g;
714
715 blk_queue_max_hw_segments(q, port->ring_cookies);
716 blk_queue_max_phys_segments(q, port->ring_cookies);
717 blk_queue_max_sectors(q, port->max_xfer_size);
718 g->major = vdc_major;
719 g->first_minor = port->dev_no << PARTITION_SHIFT;
720 strcpy(g->disk_name, port->disk_name);
721
722 g->fops = &vdc_fops;
723 g->queue = q;
724 g->private_data = port;
725 g->driverfs_dev = &port->vio.vdev->dev;
726
727 set_capacity(g, port->vdisk_size);
728
729 printk(KERN_INFO PFX "%s: %u sectors (%u MB)\n",
730 g->disk_name,
731 port->vdisk_size, (port->vdisk_size >> (20 - 9)));
732
733 add_disk(g);
734
735 return 0;
736}
737
738static struct ldc_channel_config vdc_ldc_cfg = {
739 .event = vdc_event,
740 .mtu = 64,
741 .mode = LDC_MODE_UNRELIABLE,
742};
743
744static struct vio_driver_ops vdc_vio_ops = {
745 .send_attr = vdc_send_attr,
746 .handle_attr = vdc_handle_attr,
747 .handshake_complete = vdc_handshake_complete,
748};
749
750static int __devinit vdc_port_probe(struct vio_dev *vdev,
751 const struct vio_device_id *id)
752{
753 struct mdesc_handle *hp;
754 struct vdc_port *port;
755 unsigned long flags;
756 struct vdc *vp;
757 const u64 *port_id;
758 int err;
759
760 vp = dev_get_drvdata(vdev->dev.parent);
761 if (!vp) {
762 printk(KERN_ERR PFX "Cannot find port parent vdc.\n");
763 return -ENODEV;
764 }
765
766 hp = mdesc_grab();
767
768 port_id = mdesc_get_property(hp, vdev->mp, "id", NULL);
769 err = -ENODEV;
770 if (!port_id) {
771 printk(KERN_ERR PFX "Port lacks id property.\n");
772 goto err_out_release_mdesc;
773 }
774 if ((*port_id << PARTITION_SHIFT) & ~(u64)MINORMASK) {
775 printk(KERN_ERR PFX "Port id [%lu] too large.\n", *port_id);
776 goto err_out_release_mdesc;
777 }
778
779 port = kzalloc(sizeof(*port), GFP_KERNEL);
780 err = -ENOMEM;
781 if (!port) {
782 printk(KERN_ERR PFX "Cannot allocate vdc_port.\n");
783 goto err_out_release_mdesc;
784 }
785
786 port->vp = vp;
787 port->dev_no = *port_id;
788
789 if (port->dev_no >= 26)
790 snprintf(port->disk_name, sizeof(port->disk_name),
791 VDCBLK_NAME "%c%c",
792 'a' + (port->dev_no / 26) - 1,
793 'a' + (port->dev_no % 26));
794 else
795 snprintf(port->disk_name, sizeof(port->disk_name),
796 VDCBLK_NAME "%c", 'a' + (port->dev_no % 26));
797
798 err = vio_driver_init(&port->vio, vdev, VDEV_DISK,
799 vdc_versions, ARRAY_SIZE(vdc_versions),
800 &vdc_vio_ops, port->disk_name);
801 if (err)
802 goto err_out_free_port;
803
804 port->vdisk_block_size = 512;
805 port->max_xfer_size = ((128 * 1024) / port->vdisk_block_size);
806 port->ring_cookies = ((port->max_xfer_size *
807 port->vdisk_block_size) / PAGE_SIZE) + 2;
808
809 err = vio_ldc_alloc(&port->vio, &vdc_ldc_cfg, port);
810 if (err)
811 goto err_out_free_port;
812
813 err = vdc_alloc_tx_ring(port);
814 if (err)
815 goto err_out_free_ldc;
816
817 err = probe_disk(port);
818 if (err)
819 goto err_out_free_tx_ring;
820
821 INIT_LIST_HEAD(&port->list);
822
823 spin_lock_irqsave(&vp->lock, flags);
824 list_add(&port->list, &vp->port_list);
825 spin_unlock_irqrestore(&vp->lock, flags);
826
827 dev_set_drvdata(&vdev->dev, port);
828
829 mdesc_release(hp);
830
831 return 0;
832
833err_out_free_tx_ring:
834 vdc_free_tx_ring(port);
835
836err_out_free_ldc:
837 vio_ldc_free(&port->vio);
838
839err_out_free_port:
840 kfree(port);
841
842err_out_release_mdesc:
843 mdesc_release(hp);
844 return err;
845}
846
847static int vdc_port_remove(struct vio_dev *vdev)
848{
849 struct vdc_port *port = dev_get_drvdata(&vdev->dev);
850
851 if (port) {
852 del_timer_sync(&port->vio.timer);
853
854 vdc_free_tx_ring(port);
855 vio_ldc_free(&port->vio);
856
857 dev_set_drvdata(&vdev->dev, NULL);
858
859 kfree(port);
860 }
861 return 0;
862}
863
864static struct vio_device_id vdc_port_match[] = {
865 {
866 .type = "vdc-port",
867 },
868 {},
869};
870MODULE_DEVICE_TABLE(vio, vdc_match);
871
872static struct vio_driver vdc_port_driver = {
873 .id_table = vdc_port_match,
874 .probe = vdc_port_probe,
875 .remove = vdc_port_remove,
876 .driver = {
877 .name = "vdc_port",
878 .owner = THIS_MODULE,
879 }
880};
881
882static int __devinit vdc_probe(struct vio_dev *vdev,
883 const struct vio_device_id *id)
884{
885 static int vdc_version_printed;
886 struct vdc *vp;
887
888 if (vdc_version_printed++ == 0)
889 printk(KERN_INFO "%s", version);
890
891 vp = kzalloc(sizeof(struct vdc), GFP_KERNEL);
892 if (!vp)
893 return -ENOMEM;
894
895 spin_lock_init(&vp->lock);
896 vp->dev = vdev;
897 INIT_LIST_HEAD(&vp->port_list);
898
899 dev_set_drvdata(&vdev->dev, vp);
900
901 return 0;
902}
903
904static int vdc_remove(struct vio_dev *vdev)
905{
906
907 struct vdc *vp = dev_get_drvdata(&vdev->dev);
908
909 if (vp) {
910 kfree(vp);
911 dev_set_drvdata(&vdev->dev, NULL);
912 }
913 return 0;
914}
915
916static struct vio_device_id vdc_match[] = {
917 {
918 .type = "block",
919 },
920 {},
921};
922MODULE_DEVICE_TABLE(vio, vdc_match);
923
924static struct vio_driver vdc_driver = {
925 .id_table = vdc_match,
926 .probe = vdc_probe,
927 .remove = vdc_remove,
928 .driver = {
929 .name = "vdc",
930 .owner = THIS_MODULE,
931 }
932};
933
934static int __init vdc_init(void)
935{
936 int err;
937
938 err = register_blkdev(0, VDCBLK_NAME);
939 if (err < 0)
940 goto out_err;
941
942 vdc_major = err;
943 err = vio_register_driver(&vdc_driver);
944 if (err)
945 goto out_unregister_blkdev;
946
947 err = vio_register_driver(&vdc_port_driver);
948 if (err)
949 goto out_unregister_vdc;
950
951 return 0;
952
953out_unregister_vdc:
954 vio_unregister_driver(&vdc_driver);
955
956out_unregister_blkdev:
957 unregister_blkdev(vdc_major, VDCBLK_NAME);
958 vdc_major = 0;
959
960out_err:
961 return err;
962}
963
964static void __exit vdc_exit(void)
965{
966 vio_unregister_driver(&vdc_port_driver);
967 vio_unregister_driver(&vdc_driver);
968 unregister_blkdev(vdc_major, VDCBLK_NAME);
969}
970
971module_init(vdc_init);
972module_exit(vdc_exit);
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index d17d64eb7065..7903f9c7839e 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -604,6 +604,12 @@ config CASSINI
604 Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also 604 Support for the Sun Cassini chip, aka Sun GigaSwift Ethernet. See also
605 <http://www.sun.com/products-n-solutions/hardware/docs/pdf/817-4341-10.pdf> 605 <http://www.sun.com/products-n-solutions/hardware/docs/pdf/817-4341-10.pdf>
606 606
607config SUNVNET
608 tristate "Sun Virtual Network support"
609 depends on SUN_LDOMS
610 help
611 Support for virtual network devices under Sun Logical Domains.
612
607config NET_VENDOR_3COM 613config NET_VENDOR_3COM
608 bool "3COM cards" 614 bool "3COM cards"
609 depends on ISA || EISA || MCA || PCI 615 depends on ISA || EISA || MCA || PCI
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index c26b8674213c..b95b1b237a26 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_SUNBMAC) += sunbmac.o
34obj-$(CONFIG_MYRI_SBUS) += myri_sbus.o 34obj-$(CONFIG_MYRI_SBUS) += myri_sbus.o
35obj-$(CONFIG_SUNGEM) += sungem.o sungem_phy.o 35obj-$(CONFIG_SUNGEM) += sungem.o sungem_phy.o
36obj-$(CONFIG_CASSINI) += cassini.o 36obj-$(CONFIG_CASSINI) += cassini.o
37obj-$(CONFIG_SUNVNET) += sunvnet.o
37 38
38obj-$(CONFIG_MACE) += mace.o 39obj-$(CONFIG_MACE) += mace.o
39obj-$(CONFIG_BMAC) += bmac.o 40obj-$(CONFIG_BMAC) += bmac.o
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c
new file mode 100644
index 000000000000..8a667c13faef
--- /dev/null
+++ b/drivers/net/sunvnet.c
@@ -0,0 +1,1164 @@
1/* sunvnet.c: Sun LDOM Virtual Network Driver.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/module.h>
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/slab.h>
10#include <linux/delay.h>
11#include <linux/init.h>
12#include <linux/netdevice.h>
13#include <linux/ethtool.h>
14#include <linux/etherdevice.h>
15
16#include <asm/vio.h>
17#include <asm/ldc.h>
18
19#include "sunvnet.h"
20
21#define DRV_MODULE_NAME "sunvnet"
22#define PFX DRV_MODULE_NAME ": "
23#define DRV_MODULE_VERSION "1.0"
24#define DRV_MODULE_RELDATE "June 25, 2007"
25
26static char version[] __devinitdata =
27 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
28MODULE_AUTHOR("David S. Miller (davem@davemloft.net)");
29MODULE_DESCRIPTION("Sun LDOM virtual network driver");
30MODULE_LICENSE("GPL");
31MODULE_VERSION(DRV_MODULE_VERSION);
32
33/* Ordered from largest major to lowest */
34static struct vio_version vnet_versions[] = {
35 { .major = 1, .minor = 0 },
36};
37
38static inline u32 vnet_tx_dring_avail(struct vio_dring_state *dr)
39{
40 return vio_dring_avail(dr, VNET_TX_RING_SIZE);
41}
42
43static int vnet_handle_unknown(struct vnet_port *port, void *arg)
44{
45 struct vio_msg_tag *pkt = arg;
46
47 printk(KERN_ERR PFX "Received unknown msg [%02x:%02x:%04x:%08x]\n",
48 pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
49 printk(KERN_ERR PFX "Resetting connection.\n");
50
51 ldc_disconnect(port->vio.lp);
52
53 return -ECONNRESET;
54}
55
56static int vnet_send_attr(struct vio_driver_state *vio)
57{
58 struct vnet_port *port = to_vnet_port(vio);
59 struct net_device *dev = port->vp->dev;
60 struct vio_net_attr_info pkt;
61 int i;
62
63 memset(&pkt, 0, sizeof(pkt));
64 pkt.tag.type = VIO_TYPE_CTRL;
65 pkt.tag.stype = VIO_SUBTYPE_INFO;
66 pkt.tag.stype_env = VIO_ATTR_INFO;
67 pkt.tag.sid = vio_send_sid(vio);
68 pkt.xfer_mode = VIO_DRING_MODE;
69 pkt.addr_type = VNET_ADDR_ETHERMAC;
70 pkt.ack_freq = 0;
71 for (i = 0; i < 6; i++)
72 pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8);
73 pkt.mtu = ETH_FRAME_LEN;
74
75 viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] "
76 "ackfreq[%u] mtu[%llu]\n",
77 pkt.xfer_mode, pkt.addr_type,
78 (unsigned long long) pkt.addr,
79 pkt.ack_freq,
80 (unsigned long long) pkt.mtu);
81
82 return vio_ldc_send(vio, &pkt, sizeof(pkt));
83}
84
85static int handle_attr_info(struct vio_driver_state *vio,
86 struct vio_net_attr_info *pkt)
87{
88 viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] "
89 "ackfreq[%u] mtu[%llu]\n",
90 pkt->xfer_mode, pkt->addr_type,
91 (unsigned long long) pkt->addr,
92 pkt->ack_freq,
93 (unsigned long long) pkt->mtu);
94
95 pkt->tag.sid = vio_send_sid(vio);
96
97 if (pkt->xfer_mode != VIO_DRING_MODE ||
98 pkt->addr_type != VNET_ADDR_ETHERMAC ||
99 pkt->mtu != ETH_FRAME_LEN) {
100 viodbg(HS, "SEND NET ATTR NACK\n");
101
102 pkt->tag.stype = VIO_SUBTYPE_NACK;
103
104 (void) vio_ldc_send(vio, pkt, sizeof(*pkt));
105
106 return -ECONNRESET;
107 } else {
108 viodbg(HS, "SEND NET ATTR ACK\n");
109
110 pkt->tag.stype = VIO_SUBTYPE_ACK;
111
112 return vio_ldc_send(vio, pkt, sizeof(*pkt));
113 }
114
115}
116
117static int handle_attr_ack(struct vio_driver_state *vio,
118 struct vio_net_attr_info *pkt)
119{
120 viodbg(HS, "GOT NET ATTR ACK\n");
121
122 return 0;
123}
124
125static int handle_attr_nack(struct vio_driver_state *vio,
126 struct vio_net_attr_info *pkt)
127{
128 viodbg(HS, "GOT NET ATTR NACK\n");
129
130 return -ECONNRESET;
131}
132
133static int vnet_handle_attr(struct vio_driver_state *vio, void *arg)
134{
135 struct vio_net_attr_info *pkt = arg;
136
137 switch (pkt->tag.stype) {
138 case VIO_SUBTYPE_INFO:
139 return handle_attr_info(vio, pkt);
140
141 case VIO_SUBTYPE_ACK:
142 return handle_attr_ack(vio, pkt);
143
144 case VIO_SUBTYPE_NACK:
145 return handle_attr_nack(vio, pkt);
146
147 default:
148 return -ECONNRESET;
149 }
150}
151
152static void vnet_handshake_complete(struct vio_driver_state *vio)
153{
154 struct vio_dring_state *dr;
155
156 dr = &vio->drings[VIO_DRIVER_RX_RING];
157 dr->snd_nxt = dr->rcv_nxt = 1;
158
159 dr = &vio->drings[VIO_DRIVER_TX_RING];
160 dr->snd_nxt = dr->rcv_nxt = 1;
161}
162
163/* The hypervisor interface that implements copying to/from imported
164 * memory from another domain requires that copies are done to 8-byte
165 * aligned buffers, and that the lengths of such copies are also 8-byte
166 * multiples.
167 *
168 * So we align skb->data to an 8-byte multiple and pad-out the data
169 * area so we can round the copy length up to the next multiple of
170 * 8 for the copy.
171 *
172 * The transmitter puts the actual start of the packet 6 bytes into
173 * the buffer it sends over, so that the IP headers after the ethernet
174 * header are aligned properly. These 6 bytes are not in the descriptor
175 * length, they are simply implied. This offset is represented using
176 * the VNET_PACKET_SKIP macro.
177 */
178static struct sk_buff *alloc_and_align_skb(struct net_device *dev,
179 unsigned int len)
180{
181 struct sk_buff *skb = netdev_alloc_skb(dev, len+VNET_PACKET_SKIP+8+8);
182 unsigned long addr, off;
183
184 if (unlikely(!skb))
185 return NULL;
186
187 addr = (unsigned long) skb->data;
188 off = ((addr + 7UL) & ~7UL) - addr;
189 if (off)
190 skb_reserve(skb, off);
191
192 return skb;
193}
194
195static int vnet_rx_one(struct vnet_port *port, unsigned int len,
196 struct ldc_trans_cookie *cookies, int ncookies)
197{
198 struct net_device *dev = port->vp->dev;
199 unsigned int copy_len;
200 struct sk_buff *skb;
201 int err;
202
203 err = -EMSGSIZE;
204 if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) {
205 dev->stats.rx_length_errors++;
206 goto out_dropped;
207 }
208
209 skb = alloc_and_align_skb(dev, len);
210 err = -ENOMEM;
211 if (unlikely(!skb)) {
212 dev->stats.rx_missed_errors++;
213 goto out_dropped;
214 }
215
216 copy_len = (len + VNET_PACKET_SKIP + 7U) & ~7U;
217 skb_put(skb, copy_len);
218 err = ldc_copy(port->vio.lp, LDC_COPY_IN,
219 skb->data, copy_len, 0,
220 cookies, ncookies);
221 if (unlikely(err < 0)) {
222 dev->stats.rx_frame_errors++;
223 goto out_free_skb;
224 }
225
226 skb_pull(skb, VNET_PACKET_SKIP);
227 skb_trim(skb, len);
228 skb->protocol = eth_type_trans(skb, dev);
229
230 dev->stats.rx_packets++;
231 dev->stats.rx_bytes += len;
232
233 netif_rx(skb);
234
235 return 0;
236
237out_free_skb:
238 kfree_skb(skb);
239
240out_dropped:
241 dev->stats.rx_dropped++;
242 return err;
243}
244
245static int vnet_send_ack(struct vnet_port *port, struct vio_dring_state *dr,
246 u32 start, u32 end, u8 vio_dring_state)
247{
248 struct vio_dring_data hdr = {
249 .tag = {
250 .type = VIO_TYPE_DATA,
251 .stype = VIO_SUBTYPE_ACK,
252 .stype_env = VIO_DRING_DATA,
253 .sid = vio_send_sid(&port->vio),
254 },
255 .dring_ident = dr->ident,
256 .start_idx = start,
257 .end_idx = end,
258 .state = vio_dring_state,
259 };
260 int err, delay;
261
262 hdr.seq = dr->snd_nxt;
263 delay = 1;
264 do {
265 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
266 if (err > 0) {
267 dr->snd_nxt++;
268 break;
269 }
270 udelay(delay);
271 if ((delay <<= 1) > 128)
272 delay = 128;
273 } while (err == -EAGAIN);
274
275 return err;
276}
277
278static u32 next_idx(u32 idx, struct vio_dring_state *dr)
279{
280 if (++idx == dr->num_entries)
281 idx = 0;
282 return idx;
283}
284
285static u32 prev_idx(u32 idx, struct vio_dring_state *dr)
286{
287 if (idx == 0)
288 idx = dr->num_entries - 1;
289 else
290 idx--;
291
292 return idx;
293}
294
295static struct vio_net_desc *get_rx_desc(struct vnet_port *port,
296 struct vio_dring_state *dr,
297 u32 index)
298{
299 struct vio_net_desc *desc = port->vio.desc_buf;
300 int err;
301
302 err = ldc_get_dring_entry(port->vio.lp, desc, dr->entry_size,
303 (index * dr->entry_size),
304 dr->cookies, dr->ncookies);
305 if (err < 0)
306 return ERR_PTR(err);
307
308 return desc;
309}
310
311static int put_rx_desc(struct vnet_port *port,
312 struct vio_dring_state *dr,
313 struct vio_net_desc *desc,
314 u32 index)
315{
316 int err;
317
318 err = ldc_put_dring_entry(port->vio.lp, desc, dr->entry_size,
319 (index * dr->entry_size),
320 dr->cookies, dr->ncookies);
321 if (err < 0)
322 return err;
323
324 return 0;
325}
326
327static int vnet_walk_rx_one(struct vnet_port *port,
328 struct vio_dring_state *dr,
329 u32 index, int *needs_ack)
330{
331 struct vio_net_desc *desc = get_rx_desc(port, dr, index);
332 struct vio_driver_state *vio = &port->vio;
333 int err;
334
335 if (IS_ERR(desc))
336 return PTR_ERR(desc);
337
338 viodbg(DATA, "vio_walk_rx_one desc[%02x:%02x:%08x:%08x:%lx:%lx]\n",
339 desc->hdr.state, desc->hdr.ack,
340 desc->size, desc->ncookies,
341 desc->cookies[0].cookie_addr,
342 desc->cookies[0].cookie_size);
343
344 if (desc->hdr.state != VIO_DESC_READY)
345 return 1;
346 err = vnet_rx_one(port, desc->size, desc->cookies, desc->ncookies);
347 if (err == -ECONNRESET)
348 return err;
349 desc->hdr.state = VIO_DESC_DONE;
350 err = put_rx_desc(port, dr, desc, index);
351 if (err < 0)
352 return err;
353 *needs_ack = desc->hdr.ack;
354 return 0;
355}
356
357static int vnet_walk_rx(struct vnet_port *port, struct vio_dring_state *dr,
358 u32 start, u32 end)
359{
360 struct vio_driver_state *vio = &port->vio;
361 int ack_start = -1, ack_end = -1;
362
363 end = (end == (u32) -1) ? prev_idx(start, dr) : next_idx(end, dr);
364
365 viodbg(DATA, "vnet_walk_rx start[%08x] end[%08x]\n", start, end);
366
367 while (start != end) {
368 int ack = 0, err = vnet_walk_rx_one(port, dr, start, &ack);
369 if (err == -ECONNRESET)
370 return err;
371 if (err != 0)
372 break;
373 if (ack_start == -1)
374 ack_start = start;
375 ack_end = start;
376 start = next_idx(start, dr);
377 if (ack && start != end) {
378 err = vnet_send_ack(port, dr, ack_start, ack_end,
379 VIO_DRING_ACTIVE);
380 if (err == -ECONNRESET)
381 return err;
382 ack_start = -1;
383 }
384 }
385 if (unlikely(ack_start == -1))
386 ack_start = ack_end = prev_idx(start, dr);
387 return vnet_send_ack(port, dr, ack_start, ack_end, VIO_DRING_STOPPED);
388}
389
390static int vnet_rx(struct vnet_port *port, void *msgbuf)
391{
392 struct vio_dring_data *pkt = msgbuf;
393 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_RX_RING];
394 struct vio_driver_state *vio = &port->vio;
395
396 viodbg(DATA, "vnet_rx stype_env[%04x] seq[%016lx] rcv_nxt[%016lx]\n",
397 pkt->tag.stype_env, pkt->seq, dr->rcv_nxt);
398
399 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
400 return 0;
401 if (unlikely(pkt->seq != dr->rcv_nxt)) {
402 printk(KERN_ERR PFX "RX out of sequence seq[0x%lx] "
403 "rcv_nxt[0x%lx]\n", pkt->seq, dr->rcv_nxt);
404 return 0;
405 }
406
407 dr->rcv_nxt++;
408
409 /* XXX Validate pkt->start_idx and pkt->end_idx XXX */
410
411 return vnet_walk_rx(port, dr, pkt->start_idx, pkt->end_idx);
412}
413
414static int idx_is_pending(struct vio_dring_state *dr, u32 end)
415{
416 u32 idx = dr->cons;
417 int found = 0;
418
419 while (idx != dr->prod) {
420 if (idx == end) {
421 found = 1;
422 break;
423 }
424 idx = next_idx(idx, dr);
425 }
426 return found;
427}
428
429static int vnet_ack(struct vnet_port *port, void *msgbuf)
430{
431 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
432 struct vio_dring_data *pkt = msgbuf;
433 struct net_device *dev;
434 struct vnet *vp;
435 u32 end;
436
437 if (unlikely(pkt->tag.stype_env != VIO_DRING_DATA))
438 return 0;
439
440 end = pkt->end_idx;
441 if (unlikely(!idx_is_pending(dr, end)))
442 return 0;
443
444 dr->cons = next_idx(end, dr);
445
446 vp = port->vp;
447 dev = vp->dev;
448 if (unlikely(netif_queue_stopped(dev) &&
449 vnet_tx_dring_avail(dr) >= VNET_TX_WAKEUP_THRESH(dr)))
450 return 1;
451
452 return 0;
453}
454
455static int vnet_nack(struct vnet_port *port, void *msgbuf)
456{
457 /* XXX just reset or similar XXX */
458 return 0;
459}
460
461static void maybe_tx_wakeup(struct vnet *vp)
462{
463 struct net_device *dev = vp->dev;
464
465 netif_tx_lock(dev);
466 if (likely(netif_queue_stopped(dev))) {
467 struct vnet_port *port;
468 int wake = 1;
469
470 list_for_each_entry(port, &vp->port_list, list) {
471 struct vio_dring_state *dr;
472
473 dr = &port->vio.drings[VIO_DRIVER_TX_RING];
474 if (vnet_tx_dring_avail(dr) <
475 VNET_TX_WAKEUP_THRESH(dr)) {
476 wake = 0;
477 break;
478 }
479 }
480 if (wake)
481 netif_wake_queue(dev);
482 }
483 netif_tx_unlock(dev);
484}
485
486static void vnet_event(void *arg, int event)
487{
488 struct vnet_port *port = arg;
489 struct vio_driver_state *vio = &port->vio;
490 unsigned long flags;
491 int tx_wakeup, err;
492
493 spin_lock_irqsave(&vio->lock, flags);
494
495 if (unlikely(event == LDC_EVENT_RESET ||
496 event == LDC_EVENT_UP)) {
497 vio_link_state_change(vio, event);
498 spin_unlock_irqrestore(&vio->lock, flags);
499
500 return;
501 }
502
503 if (unlikely(event != LDC_EVENT_DATA_READY)) {
504 printk(KERN_WARNING PFX "Unexpected LDC event %d\n", event);
505 spin_unlock_irqrestore(&vio->lock, flags);
506 return;
507 }
508
509 tx_wakeup = err = 0;
510 while (1) {
511 union {
512 struct vio_msg_tag tag;
513 u64 raw[8];
514 } msgbuf;
515
516 err = ldc_read(vio->lp, &msgbuf, sizeof(msgbuf));
517 if (unlikely(err < 0)) {
518 if (err == -ECONNRESET)
519 vio_conn_reset(vio);
520 break;
521 }
522 if (err == 0)
523 break;
524 viodbg(DATA, "TAG [%02x:%02x:%04x:%08x]\n",
525 msgbuf.tag.type,
526 msgbuf.tag.stype,
527 msgbuf.tag.stype_env,
528 msgbuf.tag.sid);
529 err = vio_validate_sid(vio, &msgbuf.tag);
530 if (err < 0)
531 break;
532
533 if (likely(msgbuf.tag.type == VIO_TYPE_DATA)) {
534 if (msgbuf.tag.stype == VIO_SUBTYPE_INFO) {
535 err = vnet_rx(port, &msgbuf);
536 } else if (msgbuf.tag.stype == VIO_SUBTYPE_ACK) {
537 err = vnet_ack(port, &msgbuf);
538 if (err > 0)
539 tx_wakeup |= err;
540 } else if (msgbuf.tag.stype == VIO_SUBTYPE_NACK) {
541 err = vnet_nack(port, &msgbuf);
542 }
543 } else if (msgbuf.tag.type == VIO_TYPE_CTRL) {
544 err = vio_control_pkt_engine(vio, &msgbuf);
545 if (err)
546 break;
547 } else {
548 err = vnet_handle_unknown(port, &msgbuf);
549 }
550 if (err == -ECONNRESET)
551 break;
552 }
553 spin_unlock(&vio->lock);
554 if (unlikely(tx_wakeup && err != -ECONNRESET))
555 maybe_tx_wakeup(port->vp);
556 local_irq_restore(flags);
557}
558
559static int __vnet_tx_trigger(struct vnet_port *port)
560{
561 struct vio_dring_state *dr = &port->vio.drings[VIO_DRIVER_TX_RING];
562 struct vio_dring_data hdr = {
563 .tag = {
564 .type = VIO_TYPE_DATA,
565 .stype = VIO_SUBTYPE_INFO,
566 .stype_env = VIO_DRING_DATA,
567 .sid = vio_send_sid(&port->vio),
568 },
569 .dring_ident = dr->ident,
570 .start_idx = dr->prod,
571 .end_idx = (u32) -1,
572 };
573 int err, delay;
574
575 hdr.seq = dr->snd_nxt;
576 delay = 1;
577 do {
578 err = vio_ldc_send(&port->vio, &hdr, sizeof(hdr));
579 if (err > 0) {
580 dr->snd_nxt++;
581 break;
582 }
583 udelay(delay);
584 if ((delay <<= 1) > 128)
585 delay = 128;
586 } while (err == -EAGAIN);
587
588 return err;
589}
590
591struct vnet_port *__tx_port_find(struct vnet *vp, struct sk_buff *skb)
592{
593 unsigned int hash = vnet_hashfn(skb->data);
594 struct hlist_head *hp = &vp->port_hash[hash];
595 struct hlist_node *n;
596 struct vnet_port *port;
597
598 hlist_for_each_entry(port, n, hp, hash) {
599 if (!compare_ether_addr(port->raddr, skb->data))
600 return port;
601 }
602 port = NULL;
603 if (!list_empty(&vp->port_list))
604 port = list_entry(vp->port_list.next, struct vnet_port, list);
605
606 return port;
607}
608
609struct vnet_port *tx_port_find(struct vnet *vp, struct sk_buff *skb)
610{
611 struct vnet_port *ret;
612 unsigned long flags;
613
614 spin_lock_irqsave(&vp->lock, flags);
615 ret = __tx_port_find(vp, skb);
616 spin_unlock_irqrestore(&vp->lock, flags);
617
618 return ret;
619}
620
621static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev)
622{
623 struct vnet *vp = netdev_priv(dev);
624 struct vnet_port *port = tx_port_find(vp, skb);
625 struct vio_dring_state *dr;
626 struct vio_net_desc *d;
627 unsigned long flags;
628 unsigned int len;
629 void *tx_buf;
630 int i, err;
631
632 if (unlikely(!port))
633 goto out_dropped;
634
635 spin_lock_irqsave(&port->vio.lock, flags);
636
637 dr = &port->vio.drings[VIO_DRIVER_TX_RING];
638 if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
639 if (!netif_queue_stopped(dev)) {
640 netif_stop_queue(dev);
641
642 /* This is a hard error, log it. */
643 printk(KERN_ERR PFX "%s: BUG! Tx Ring full when "
644 "queue awake!\n", dev->name);
645 dev->stats.tx_errors++;
646 }
647 spin_unlock_irqrestore(&port->vio.lock, flags);
648 return NETDEV_TX_BUSY;
649 }
650
651 d = vio_dring_cur(dr);
652
653 tx_buf = port->tx_bufs[dr->prod].buf;
654 skb_copy_from_linear_data(skb, tx_buf + VNET_PACKET_SKIP, skb->len);
655
656 len = skb->len;
657 if (len < ETH_ZLEN) {
658 len = ETH_ZLEN;
659 memset(tx_buf+VNET_PACKET_SKIP+skb->len, 0, len - skb->len);
660 }
661
662 d->hdr.ack = VIO_ACK_ENABLE;
663 d->size = len;
664 d->ncookies = port->tx_bufs[dr->prod].ncookies;
665 for (i = 0; i < d->ncookies; i++)
666 d->cookies[i] = port->tx_bufs[dr->prod].cookies[i];
667
668 /* This has to be a non-SMP write barrier because we are writing
669 * to memory which is shared with the peer LDOM.
670 */
671 wmb();
672
673 d->hdr.state = VIO_DESC_READY;
674
675 err = __vnet_tx_trigger(port);
676 if (unlikely(err < 0)) {
677 printk(KERN_INFO PFX "%s: TX trigger error %d\n",
678 dev->name, err);
679 d->hdr.state = VIO_DESC_FREE;
680 dev->stats.tx_carrier_errors++;
681 goto out_dropped_unlock;
682 }
683
684 dev->stats.tx_packets++;
685 dev->stats.tx_bytes += skb->len;
686
687 dr->prod = (dr->prod + 1) & (VNET_TX_RING_SIZE - 1);
688 if (unlikely(vnet_tx_dring_avail(dr) < 2)) {
689 netif_stop_queue(dev);
690 if (vnet_tx_dring_avail(dr) > VNET_TX_WAKEUP_THRESH(dr))
691 netif_wake_queue(dev);
692 }
693
694 spin_unlock_irqrestore(&port->vio.lock, flags);
695
696 dev_kfree_skb(skb);
697
698 dev->trans_start = jiffies;
699 return NETDEV_TX_OK;
700
701out_dropped_unlock:
702 spin_unlock_irqrestore(&port->vio.lock, flags);
703
704out_dropped:
705 dev_kfree_skb(skb);
706 dev->stats.tx_dropped++;
707 return NETDEV_TX_OK;
708}
709
710static void vnet_tx_timeout(struct net_device *dev)
711{
712 /* XXX Implement me XXX */
713}
714
715static int vnet_open(struct net_device *dev)
716{
717 netif_carrier_on(dev);
718 netif_start_queue(dev);
719
720 return 0;
721}
722
723static int vnet_close(struct net_device *dev)
724{
725 netif_stop_queue(dev);
726 netif_carrier_off(dev);
727
728 return 0;
729}
730
731static void vnet_set_rx_mode(struct net_device *dev)
732{
733 /* XXX Implement multicast support XXX */
734}
735
736static int vnet_change_mtu(struct net_device *dev, int new_mtu)
737{
738 if (new_mtu != ETH_DATA_LEN)
739 return -EINVAL;
740
741 dev->mtu = new_mtu;
742 return 0;
743}
744
745static int vnet_set_mac_addr(struct net_device *dev, void *p)
746{
747 return -EINVAL;
748}
749
750static void vnet_get_drvinfo(struct net_device *dev,
751 struct ethtool_drvinfo *info)
752{
753 strcpy(info->driver, DRV_MODULE_NAME);
754 strcpy(info->version, DRV_MODULE_VERSION);
755}
756
757static u32 vnet_get_msglevel(struct net_device *dev)
758{
759 struct vnet *vp = netdev_priv(dev);
760 return vp->msg_enable;
761}
762
763static void vnet_set_msglevel(struct net_device *dev, u32 value)
764{
765 struct vnet *vp = netdev_priv(dev);
766 vp->msg_enable = value;
767}
768
769static const struct ethtool_ops vnet_ethtool_ops = {
770 .get_drvinfo = vnet_get_drvinfo,
771 .get_msglevel = vnet_get_msglevel,
772 .set_msglevel = vnet_set_msglevel,
773 .get_link = ethtool_op_get_link,
774 .get_perm_addr = ethtool_op_get_perm_addr,
775};
776
777static void vnet_port_free_tx_bufs(struct vnet_port *port)
778{
779 struct vio_dring_state *dr;
780 int i;
781
782 dr = &port->vio.drings[VIO_DRIVER_TX_RING];
783 if (dr->base) {
784 ldc_free_exp_dring(port->vio.lp, dr->base,
785 (dr->entry_size * dr->num_entries),
786 dr->cookies, dr->ncookies);
787 dr->base = NULL;
788 dr->entry_size = 0;
789 dr->num_entries = 0;
790 dr->pending = 0;
791 dr->ncookies = 0;
792 }
793
794 for (i = 0; i < VNET_TX_RING_SIZE; i++) {
795 void *buf = port->tx_bufs[i].buf;
796
797 if (!buf)
798 continue;
799
800 ldc_unmap(port->vio.lp,
801 port->tx_bufs[i].cookies,
802 port->tx_bufs[i].ncookies);
803
804 kfree(buf);
805 port->tx_bufs[i].buf = NULL;
806 }
807}
808
809static int __devinit vnet_port_alloc_tx_bufs(struct vnet_port *port)
810{
811 struct vio_dring_state *dr;
812 unsigned long len;
813 int i, err, ncookies;
814 void *dring;
815
816 for (i = 0; i < VNET_TX_RING_SIZE; i++) {
817 void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL);
818 int map_len = (ETH_FRAME_LEN + 7) & ~7;
819
820 err = -ENOMEM;
821 if (!buf) {
822 printk(KERN_ERR "TX buffer allocation failure\n");
823 goto err_out;
824 }
825 err = -EFAULT;
826 if ((unsigned long)buf & (8UL - 1)) {
827 printk(KERN_ERR "TX buffer misaligned\n");
828 kfree(buf);
829 goto err_out;
830 }
831
832 err = ldc_map_single(port->vio.lp, buf, map_len,
833 port->tx_bufs[i].cookies, 2,
834 (LDC_MAP_SHADOW |
835 LDC_MAP_DIRECT |
836 LDC_MAP_RW));
837 if (err < 0) {
838 kfree(buf);
839 goto err_out;
840 }
841 port->tx_bufs[i].buf = buf;
842 port->tx_bufs[i].ncookies = err;
843 }
844
845 dr = &port->vio.drings[VIO_DRIVER_TX_RING];
846
847 len = (VNET_TX_RING_SIZE *
848 (sizeof(struct vio_net_desc) +
849 (sizeof(struct ldc_trans_cookie) * 2)));
850
851 ncookies = VIO_MAX_RING_COOKIES;
852 dring = ldc_alloc_exp_dring(port->vio.lp, len,
853 dr->cookies, &ncookies,
854 (LDC_MAP_SHADOW |
855 LDC_MAP_DIRECT |
856 LDC_MAP_RW));
857 if (IS_ERR(dring)) {
858 err = PTR_ERR(dring);
859 goto err_out;
860 }
861
862 dr->base = dring;
863 dr->entry_size = (sizeof(struct vio_net_desc) +
864 (sizeof(struct ldc_trans_cookie) * 2));
865 dr->num_entries = VNET_TX_RING_SIZE;
866 dr->prod = dr->cons = 0;
867 dr->pending = VNET_TX_RING_SIZE;
868 dr->ncookies = ncookies;
869
870 return 0;
871
872err_out:
873 vnet_port_free_tx_bufs(port);
874
875 return err;
876}
877
878static struct ldc_channel_config vnet_ldc_cfg = {
879 .event = vnet_event,
880 .mtu = 64,
881 .mode = LDC_MODE_UNRELIABLE,
882};
883
884static struct vio_driver_ops vnet_vio_ops = {
885 .send_attr = vnet_send_attr,
886 .handle_attr = vnet_handle_attr,
887 .handshake_complete = vnet_handshake_complete,
888};
889
890const char *remote_macaddr_prop = "remote-mac-address";
891
892static int __devinit vnet_port_probe(struct vio_dev *vdev,
893 const struct vio_device_id *id)
894{
895 struct mdesc_handle *hp;
896 struct vnet_port *port;
897 unsigned long flags;
898 struct vnet *vp;
899 const u64 *rmac;
900 int len, i, err, switch_port;
901
902 vp = dev_get_drvdata(vdev->dev.parent);
903 if (!vp) {
904 printk(KERN_ERR PFX "Cannot find port parent vnet.\n");
905 return -ENODEV;
906 }
907
908 hp = mdesc_grab();
909
910 rmac = mdesc_get_property(hp, vdev->mp, remote_macaddr_prop, &len);
911 err = -ENODEV;
912 if (!rmac) {
913 printk(KERN_ERR PFX "Port lacks %s property.\n",
914 remote_macaddr_prop);
915 goto err_out_put_mdesc;
916 }
917
918 port = kzalloc(sizeof(*port), GFP_KERNEL);
919 err = -ENOMEM;
920 if (!port) {
921 printk(KERN_ERR PFX "Cannot allocate vnet_port.\n");
922 goto err_out_put_mdesc;
923 }
924
925 for (i = 0; i < ETH_ALEN; i++)
926 port->raddr[i] = (*rmac >> (5 - i) * 8) & 0xff;
927
928 port->vp = vp;
929
930 err = vio_driver_init(&port->vio, vdev, VDEV_NETWORK,
931 vnet_versions, ARRAY_SIZE(vnet_versions),
932 &vnet_vio_ops, vp->dev->name);
933 if (err)
934 goto err_out_free_port;
935
936 err = vio_ldc_alloc(&port->vio, &vnet_ldc_cfg, port);
937 if (err)
938 goto err_out_free_port;
939
940 err = vnet_port_alloc_tx_bufs(port);
941 if (err)
942 goto err_out_free_ldc;
943
944 INIT_HLIST_NODE(&port->hash);
945 INIT_LIST_HEAD(&port->list);
946
947 switch_port = 0;
948 if (mdesc_get_property(hp, vdev->mp, "switch-port", NULL) != NULL)
949 switch_port = 1;
950
951 spin_lock_irqsave(&vp->lock, flags);
952 if (switch_port)
953 list_add(&port->list, &vp->port_list);
954 else
955 list_add_tail(&port->list, &vp->port_list);
956 hlist_add_head(&port->hash, &vp->port_hash[vnet_hashfn(port->raddr)]);
957 spin_unlock_irqrestore(&vp->lock, flags);
958
959 dev_set_drvdata(&vdev->dev, port);
960
961 printk(KERN_INFO "%s: PORT ( remote-mac ", vp->dev->name);
962 for (i = 0; i < 6; i++)
963 printk("%2.2x%c", port->raddr[i], i == 5 ? ' ' : ':');
964 if (switch_port)
965 printk("switch-port ");
966 printk(")\n");
967
968 vio_port_up(&port->vio);
969
970 mdesc_release(hp);
971
972 return 0;
973
974err_out_free_ldc:
975 vio_ldc_free(&port->vio);
976
977err_out_free_port:
978 kfree(port);
979
980err_out_put_mdesc:
981 mdesc_release(hp);
982 return err;
983}
984
985static int vnet_port_remove(struct vio_dev *vdev)
986{
987 struct vnet_port *port = dev_get_drvdata(&vdev->dev);
988
989 if (port) {
990 struct vnet *vp = port->vp;
991 unsigned long flags;
992
993 del_timer_sync(&port->vio.timer);
994
995 spin_lock_irqsave(&vp->lock, flags);
996 list_del(&port->list);
997 hlist_del(&port->hash);
998 spin_unlock_irqrestore(&vp->lock, flags);
999
1000 vnet_port_free_tx_bufs(port);
1001 vio_ldc_free(&port->vio);
1002
1003 dev_set_drvdata(&vdev->dev, NULL);
1004
1005 kfree(port);
1006 }
1007 return 0;
1008}
1009
1010static struct vio_device_id vnet_port_match[] = {
1011 {
1012 .type = "vnet-port",
1013 },
1014 {},
1015};
1016MODULE_DEVICE_TABLE(vio, vnet_match);
1017
1018static struct vio_driver vnet_port_driver = {
1019 .id_table = vnet_port_match,
1020 .probe = vnet_port_probe,
1021 .remove = vnet_port_remove,
1022 .driver = {
1023 .name = "vnet_port",
1024 .owner = THIS_MODULE,
1025 }
1026};
1027
1028const char *local_mac_prop = "local-mac-address";
1029
1030static int __devinit vnet_probe(struct vio_dev *vdev,
1031 const struct vio_device_id *id)
1032{
1033 static int vnet_version_printed;
1034 struct mdesc_handle *hp;
1035 struct net_device *dev;
1036 struct vnet *vp;
1037 const u64 *mac;
1038 int err, i, len;
1039
1040 if (vnet_version_printed++ == 0)
1041 printk(KERN_INFO "%s", version);
1042
1043 hp = mdesc_grab();
1044
1045 mac = mdesc_get_property(hp, vdev->mp, local_mac_prop, &len);
1046 if (!mac) {
1047 printk(KERN_ERR PFX "vnet lacks %s property.\n",
1048 local_mac_prop);
1049 err = -ENODEV;
1050 goto err_out;
1051 }
1052
1053 dev = alloc_etherdev(sizeof(*vp));
1054 if (!dev) {
1055 printk(KERN_ERR PFX "Etherdev alloc failed, aborting.\n");
1056 err = -ENOMEM;
1057 goto err_out;
1058 }
1059
1060 for (i = 0; i < ETH_ALEN; i++)
1061 dev->dev_addr[i] = (*mac >> (5 - i) * 8) & 0xff;
1062
1063 memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);
1064
1065 SET_NETDEV_DEV(dev, &vdev->dev);
1066
1067 vp = netdev_priv(dev);
1068
1069 spin_lock_init(&vp->lock);
1070 vp->dev = dev;
1071 vp->vdev = vdev;
1072
1073 INIT_LIST_HEAD(&vp->port_list);
1074 for (i = 0; i < VNET_PORT_HASH_SIZE; i++)
1075 INIT_HLIST_HEAD(&vp->port_hash[i]);
1076
1077 dev->open = vnet_open;
1078 dev->stop = vnet_close;
1079 dev->set_multicast_list = vnet_set_rx_mode;
1080 dev->set_mac_address = vnet_set_mac_addr;
1081 dev->tx_timeout = vnet_tx_timeout;
1082 dev->ethtool_ops = &vnet_ethtool_ops;
1083 dev->watchdog_timeo = VNET_TX_TIMEOUT;
1084 dev->change_mtu = vnet_change_mtu;
1085 dev->hard_start_xmit = vnet_start_xmit;
1086
1087 err = register_netdev(dev);
1088 if (err) {
1089 printk(KERN_ERR PFX "Cannot register net device, "
1090 "aborting.\n");
1091 goto err_out_free_dev;
1092 }
1093
1094 printk(KERN_INFO "%s: Sun LDOM vnet ", dev->name);
1095
1096 for (i = 0; i < 6; i++)
1097 printk("%2.2x%c", dev->dev_addr[i], i == 5 ? '\n' : ':');
1098
1099 dev_set_drvdata(&vdev->dev, vp);
1100
1101 mdesc_release(hp);
1102
1103 return 0;
1104
1105err_out_free_dev:
1106 free_netdev(dev);
1107
1108err_out:
1109 mdesc_release(hp);
1110 return err;
1111}
1112
1113static int vnet_remove(struct vio_dev *vdev)
1114{
1115
1116 struct vnet *vp = dev_get_drvdata(&vdev->dev);
1117
1118 if (vp) {
1119 /* XXX unregister port, or at least check XXX */
1120 unregister_netdevice(vp->dev);
1121 dev_set_drvdata(&vdev->dev, NULL);
1122 }
1123 return 0;
1124}
1125
1126static struct vio_device_id vnet_match[] = {
1127 {
1128 .type = "network",
1129 },
1130 {},
1131};
1132MODULE_DEVICE_TABLE(vio, vnet_match);
1133
1134static struct vio_driver vnet_driver = {
1135 .id_table = vnet_match,
1136 .probe = vnet_probe,
1137 .remove = vnet_remove,
1138 .driver = {
1139 .name = "vnet",
1140 .owner = THIS_MODULE,
1141 }
1142};
1143
1144static int __init vnet_init(void)
1145{
1146 int err = vio_register_driver(&vnet_driver);
1147
1148 if (!err) {
1149 err = vio_register_driver(&vnet_port_driver);
1150 if (err)
1151 vio_unregister_driver(&vnet_driver);
1152 }
1153
1154 return err;
1155}
1156
1157static void __exit vnet_exit(void)
1158{
1159 vio_unregister_driver(&vnet_port_driver);
1160 vio_unregister_driver(&vnet_driver);
1161}
1162
1163module_init(vnet_init);
1164module_exit(vnet_exit);
diff --git a/drivers/net/sunvnet.h b/drivers/net/sunvnet.h
new file mode 100644
index 000000000000..1c887302d46d
--- /dev/null
+++ b/drivers/net/sunvnet.h
@@ -0,0 +1,70 @@
1#ifndef _SUNVNET_H
2#define _SUNVNET_H
3
4#define DESC_NCOOKIES(entry_size) \
5 ((entry_size) - sizeof(struct vio_net_desc))
6
7/* length of time before we decide the hardware is borked,
8 * and dev->tx_timeout() should be called to fix the problem
9 */
10#define VNET_TX_TIMEOUT (5 * HZ)
11
12#define VNET_TX_RING_SIZE 512
13#define VNET_TX_WAKEUP_THRESH(dr) ((dr)->pending / 4)
14
15/* VNET packets are sent in buffers with the first 6 bytes skipped
16 * so that after the ethernet header the IPv4/IPv6 headers are aligned
17 * properly.
18 */
19#define VNET_PACKET_SKIP 6
20
21struct vnet_tx_entry {
22 void *buf;
23 unsigned int ncookies;
24 struct ldc_trans_cookie cookies[2];
25};
26
27struct vnet;
28struct vnet_port {
29 struct vio_driver_state vio;
30
31 struct hlist_node hash;
32 u8 raddr[ETH_ALEN];
33
34 struct vnet *vp;
35
36 struct vnet_tx_entry tx_bufs[VNET_TX_RING_SIZE];
37
38 struct list_head list;
39};
40
41static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio)
42{
43 return container_of(vio, struct vnet_port, vio);
44}
45
46#define VNET_PORT_HASH_SIZE 16
47#define VNET_PORT_HASH_MASK (VNET_PORT_HASH_SIZE - 1)
48
49static inline unsigned int vnet_hashfn(u8 *mac)
50{
51 unsigned int val = mac[4] ^ mac[5];
52
53 return val & (VNET_PORT_HASH_MASK);
54}
55
56struct vnet {
57 /* Protects port_list and port_hash. */
58 spinlock_t lock;
59
60 struct net_device *dev;
61
62 u32 msg_enable;
63 struct vio_dev *vdev;
64
65 struct list_head port_list;
66
67 struct hlist_head port_hash[VNET_PORT_HASH_SIZE];
68};
69
70#endif /* _SUNVNET_H */
diff --git a/drivers/serial/sunhv.c b/drivers/serial/sunhv.c
index 96557e6dba60..17bcca53d6a1 100644
--- a/drivers/serial/sunhv.c
+++ b/drivers/serial/sunhv.c
@@ -440,8 +440,16 @@ static void sunhv_console_write_paged(struct console *con, const char *s, unsign
440{ 440{
441 struct uart_port *port = sunhv_port; 441 struct uart_port *port = sunhv_port;
442 unsigned long flags; 442 unsigned long flags;
443 int locked = 1;
444
445 local_irq_save(flags);
446 if (port->sysrq) {
447 locked = 0;
448 } else if (oops_in_progress) {
449 locked = spin_trylock(&port->lock);
450 } else
451 spin_lock(&port->lock);
443 452
444 spin_lock_irqsave(&port->lock, flags);
445 while (n > 0) { 453 while (n > 0) {
446 unsigned long ra = __pa(con_write_page); 454 unsigned long ra = __pa(con_write_page);
447 unsigned long page_bytes; 455 unsigned long page_bytes;
@@ -469,7 +477,10 @@ static void sunhv_console_write_paged(struct console *con, const char *s, unsign
469 ra += written; 477 ra += written;
470 } 478 }
471 } 479 }
472 spin_unlock_irqrestore(&port->lock, flags); 480
481 if (locked)
482 spin_unlock(&port->lock);
483 local_irq_restore(flags);
473} 484}
474 485
475static inline void sunhv_console_putchar(struct uart_port *port, char c) 486static inline void sunhv_console_putchar(struct uart_port *port, char c)
@@ -488,7 +499,15 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig
488{ 499{
489 struct uart_port *port = sunhv_port; 500 struct uart_port *port = sunhv_port;
490 unsigned long flags; 501 unsigned long flags;
491 int i; 502 int i, locked = 1;
503
504 local_irq_save(flags);
505 if (port->sysrq) {
506 locked = 0;
507 } else if (oops_in_progress) {
508 locked = spin_trylock(&port->lock);
509 } else
510 spin_lock(&port->lock);
492 511
493 spin_lock_irqsave(&port->lock, flags); 512 spin_lock_irqsave(&port->lock, flags);
494 for (i = 0; i < n; i++) { 513 for (i = 0; i < n; i++) {
@@ -496,7 +515,10 @@ static void sunhv_console_write_bychar(struct console *con, const char *s, unsig
496 sunhv_console_putchar(port, '\r'); 515 sunhv_console_putchar(port, '\r');
497 sunhv_console_putchar(port, *s++); 516 sunhv_console_putchar(port, *s++);
498 } 517 }
499 spin_unlock_irqrestore(&port->lock, flags); 518
519 if (locked)
520 spin_unlock(&port->lock);
521 local_irq_restore(flags);
500} 522}
501 523
502static struct console sunhv_console = { 524static struct console sunhv_console = {
diff --git a/drivers/serial/sunsab.c b/drivers/serial/sunsab.c
index deb9ab4b5a0b..8a0f9e4408d4 100644
--- a/drivers/serial/sunsab.c
+++ b/drivers/serial/sunsab.c
@@ -860,22 +860,31 @@ static int num_channels;
860static void sunsab_console_putchar(struct uart_port *port, int c) 860static void sunsab_console_putchar(struct uart_port *port, int c)
861{ 861{
862 struct uart_sunsab_port *up = (struct uart_sunsab_port *)port; 862 struct uart_sunsab_port *up = (struct uart_sunsab_port *)port;
863 unsigned long flags;
864
865 spin_lock_irqsave(&up->port.lock, flags);
866 863
867 sunsab_tec_wait(up); 864 sunsab_tec_wait(up);
868 writeb(c, &up->regs->w.tic); 865 writeb(c, &up->regs->w.tic);
869
870 spin_unlock_irqrestore(&up->port.lock, flags);
871} 866}
872 867
873static void sunsab_console_write(struct console *con, const char *s, unsigned n) 868static void sunsab_console_write(struct console *con, const char *s, unsigned n)
874{ 869{
875 struct uart_sunsab_port *up = &sunsab_ports[con->index]; 870 struct uart_sunsab_port *up = &sunsab_ports[con->index];
871 unsigned long flags;
872 int locked = 1;
873
874 local_irq_save(flags);
875 if (up->port.sysrq) {
876 locked = 0;
877 } else if (oops_in_progress) {
878 locked = spin_trylock(&up->port.lock);
879 } else
880 spin_lock(&up->port.lock);
876 881
877 uart_console_write(&up->port, s, n, sunsab_console_putchar); 882 uart_console_write(&up->port, s, n, sunsab_console_putchar);
878 sunsab_tec_wait(up); 883 sunsab_tec_wait(up);
884
885 if (locked)
886 spin_unlock(&up->port.lock);
887 local_irq_restore(flags);
879} 888}
880 889
881static int sunsab_console_setup(struct console *con, char *options) 890static int sunsab_console_setup(struct console *con, char *options)
diff --git a/drivers/serial/sunsu.c b/drivers/serial/sunsu.c
index 2a63cdba3208..26d720baf88c 100644
--- a/drivers/serial/sunsu.c
+++ b/drivers/serial/sunsu.c
@@ -1288,7 +1288,17 @@ static void sunsu_console_write(struct console *co, const char *s,
1288 unsigned int count) 1288 unsigned int count)
1289{ 1289{
1290 struct uart_sunsu_port *up = &sunsu_ports[co->index]; 1290 struct uart_sunsu_port *up = &sunsu_ports[co->index];
1291 unsigned long flags;
1291 unsigned int ier; 1292 unsigned int ier;
1293 int locked = 1;
1294
1295 local_irq_save(flags);
1296 if (up->port.sysrq) {
1297 locked = 0;
1298 } else if (oops_in_progress) {
1299 locked = spin_trylock(&up->port.lock);
1300 } else
1301 spin_lock(&up->port.lock);
1292 1302
1293 /* 1303 /*
1294 * First save the UER then disable the interrupts 1304 * First save the UER then disable the interrupts
@@ -1304,6 +1314,10 @@ static void sunsu_console_write(struct console *co, const char *s,
1304 */ 1314 */
1305 wait_for_xmitr(up); 1315 wait_for_xmitr(up);
1306 serial_out(up, UART_IER, ier); 1316 serial_out(up, UART_IER, ier);
1317
1318 if (locked)
1319 spin_unlock(&up->port.lock);
1320 local_irq_restore(flags);
1307} 1321}
1308 1322
1309/* 1323/*
diff --git a/drivers/serial/sunzilog.c b/drivers/serial/sunzilog.c
index 15b6e1cb040b..0a3e10a4a35d 100644
--- a/drivers/serial/sunzilog.c
+++ b/drivers/serial/sunzilog.c
@@ -9,7 +9,7 @@
9 * C. Dost, Pete Zaitcev, Ted Ts'o and Alex Buell for their 9 * C. Dost, Pete Zaitcev, Ted Ts'o and Alex Buell for their
10 * work there. 10 * work there.
11 * 11 *
12 * Copyright (C) 2002, 2006 David S. Miller (davem@davemloft.net) 12 * Copyright (C) 2002, 2006, 2007 David S. Miller (davem@davemloft.net)
13 */ 13 */
14 14
15#include <linux/module.h> 15#include <linux/module.h>
@@ -1151,11 +1151,22 @@ sunzilog_console_write(struct console *con, const char *s, unsigned int count)
1151{ 1151{
1152 struct uart_sunzilog_port *up = &sunzilog_port_table[con->index]; 1152 struct uart_sunzilog_port *up = &sunzilog_port_table[con->index];
1153 unsigned long flags; 1153 unsigned long flags;
1154 int locked = 1;
1155
1156 local_irq_save(flags);
1157 if (up->port.sysrq) {
1158 locked = 0;
1159 } else if (oops_in_progress) {
1160 locked = spin_trylock(&up->port.lock);
1161 } else
1162 spin_lock(&up->port.lock);
1154 1163
1155 spin_lock_irqsave(&up->port.lock, flags);
1156 uart_console_write(&up->port, s, count, sunzilog_putchar); 1164 uart_console_write(&up->port, s, count, sunzilog_putchar);
1157 udelay(2); 1165 udelay(2);
1158 spin_unlock_irqrestore(&up->port.lock, flags); 1166
1167 if (locked)
1168 spin_unlock(&up->port.lock);
1169 local_irq_restore(flags);
1159} 1170}
1160 1171
1161static int __init sunzilog_console_setup(struct console *con, char *options) 1172static int __init sunzilog_console_setup(struct console *con, char *options)
diff --git a/include/asm-sparc64/bugs.h b/include/asm-sparc64/bugs.h
index bf39d86c0c9e..11ade6841971 100644
--- a/include/asm-sparc64/bugs.h
+++ b/include/asm-sparc64/bugs.h
@@ -4,12 +4,7 @@
4 */ 4 */
5#include <asm/sstate.h> 5#include <asm/sstate.h>
6 6
7extern unsigned long loops_per_jiffy;
8
9static void __init check_bugs(void) 7static void __init check_bugs(void)
10{ 8{
11#ifndef CONFIG_SMP
12 cpu_data(0).udelay_val = loops_per_jiffy;
13#endif
14 sstate_running(); 9 sstate_running();
15} 10}
diff --git a/include/asm-sparc64/cpudata.h b/include/asm-sparc64/cpudata.h
index 445026fbec35..98a6e609163e 100644
--- a/include/asm-sparc64/cpudata.h
+++ b/include/asm-sparc64/cpudata.h
@@ -19,7 +19,7 @@ typedef struct {
19 unsigned int __softirq_pending; /* must be 1st, see rtrap.S */ 19 unsigned int __softirq_pending; /* must be 1st, see rtrap.S */
20 unsigned int __pad0; 20 unsigned int __pad0;
21 unsigned long clock_tick; /* %tick's per second */ 21 unsigned long clock_tick; /* %tick's per second */
22 unsigned long udelay_val; 22 unsigned long __pad;
23 unsigned int __pad1; 23 unsigned int __pad1;
24 unsigned int __pad2; 24 unsigned int __pad2;
25 25
@@ -80,7 +80,8 @@ struct trap_per_cpu {
80 unsigned int dev_mondo_qmask; 80 unsigned int dev_mondo_qmask;
81 unsigned int resum_qmask; 81 unsigned int resum_qmask;
82 unsigned int nonresum_qmask; 82 unsigned int nonresum_qmask;
83 unsigned int __pad2[3]; 83 unsigned int __pad2[1];
84 void *hdesc;
84} __attribute__((aligned(64))); 85} __attribute__((aligned(64)));
85extern struct trap_per_cpu trap_block[NR_CPUS]; 86extern struct trap_per_cpu trap_block[NR_CPUS];
86extern void init_cur_cpu_trap(struct thread_info *); 87extern void init_cur_cpu_trap(struct thread_info *);
diff --git a/include/asm-sparc64/delay.h b/include/asm-sparc64/delay.h
index a4aae6f80627..a77aa622d762 100644
--- a/include/asm-sparc64/delay.h
+++ b/include/asm-sparc64/delay.h
@@ -1,37 +1,17 @@
1/* delay.h: Linux delay routines on sparc64. 1/* delay.h: Linux delay routines on sparc64.
2 * 2 *
3 * Copyright (C) 1996, 2004 David S. Miller (davem@davemloft.net). 3 * Copyright (C) 1996, 2004, 2007 David S. Miller (davem@davemloft.net).
4 *
5 * Based heavily upon x86 variant which is:
6 * Copyright (C) 1993 Linus Torvalds
7 *
8 * Delay routines calling functions in arch/sparc64/lib/delay.c
9 */ 4 */
10 5
11#ifndef __SPARC64_DELAY_H 6#ifndef _SPARC64_DELAY_H
12#define __SPARC64_DELAY_H 7#define _SPARC64_DELAY_H
13
14#include <linux/param.h>
15#include <asm/cpudata.h>
16 8
17#ifndef __ASSEMBLY__ 9#ifndef __ASSEMBLY__
18 10
19extern void __bad_udelay(void);
20extern void __bad_ndelay(void);
21
22extern void __udelay(unsigned long usecs);
23extern void __ndelay(unsigned long nsecs);
24extern void __const_udelay(unsigned long usecs);
25extern void __delay(unsigned long loops); 11extern void __delay(unsigned long loops);
26 12extern void udelay(unsigned long usecs);
27#define udelay(n) (__builtin_constant_p(n) ? \ 13#define mdelay(n) udelay((n) * 1000)
28 ((n) > 20000 ? __bad_udelay() : __const_udelay((n) * 0x10c7ul)) : \
29 __udelay(n))
30
31#define ndelay(n) (__builtin_constant_p(n) ? \
32 ((n) > 20000 ? __bad_ndelay() : __const_udelay((n) * 5ul)) : \
33 __ndelay(n))
34 14
35#endif /* !__ASSEMBLY__ */ 15#endif /* !__ASSEMBLY__ */
36 16
37#endif /* defined(__SPARC64_DELAY_H) */ 17#endif /* _SPARC64_DELAY_H */
diff --git a/include/asm-sparc64/hvtramp.h b/include/asm-sparc64/hvtramp.h
new file mode 100644
index 000000000000..c7dd6ad056df
--- /dev/null
+++ b/include/asm-sparc64/hvtramp.h
@@ -0,0 +1,37 @@
1#ifndef _SPARC64_HVTRAP_H
2#define _SPARC64_HVTRAP_H
3
4#ifndef __ASSEMBLY__
5
6#include <linux/types.h>
7
8struct hvtramp_mapping {
9 __u64 vaddr;
10 __u64 tte;
11};
12
13struct hvtramp_descr {
14 __u32 cpu;
15 __u32 num_mappings;
16 __u64 fault_info_va;
17 __u64 fault_info_pa;
18 __u64 thread_reg;
19 struct hvtramp_mapping maps[2];
20};
21
22extern void hv_cpu_startup(unsigned long hvdescr_pa);
23
24#endif
25
26#define HVTRAMP_DESCR_CPU 0x00
27#define HVTRAMP_DESCR_NUM_MAPPINGS 0x04
28#define HVTRAMP_DESCR_FAULT_INFO_VA 0x08
29#define HVTRAMP_DESCR_FAULT_INFO_PA 0x10
30#define HVTRAMP_DESCR_THREAD_REG 0x18
31#define HVTRAMP_DESCR_MAPS 0x20
32
33#define HVTRAMP_MAPPING_VADDR 0x00
34#define HVTRAMP_MAPPING_TTE 0x08
35#define HVTRAMP_MAPPING_SIZE 0x10
36
37#endif /* _SPARC64_HVTRAP_H */
diff --git a/include/asm-sparc64/hypervisor.h b/include/asm-sparc64/hypervisor.h
index db2130a95d68..524d49835dfd 100644
--- a/include/asm-sparc64/hypervisor.h
+++ b/include/asm-sparc64/hypervisor.h
@@ -98,7 +98,7 @@
98#define HV_FAST_MACH_EXIT 0x00 98#define HV_FAST_MACH_EXIT 0x00
99 99
100#ifndef __ASSEMBLY__ 100#ifndef __ASSEMBLY__
101extern void sun4v_mach_exit(unsigned long exit_core); 101extern void sun4v_mach_exit(unsigned long exit_code);
102#endif 102#endif
103 103
104/* Domain services. */ 104/* Domain services. */
diff --git a/include/asm-sparc64/irq.h b/include/asm-sparc64/irq.h
index 90781e34a95c..e6c436ef9356 100644
--- a/include/asm-sparc64/irq.h
+++ b/include/asm-sparc64/irq.h
@@ -53,6 +53,8 @@ extern unsigned int sun4v_build_msi(u32 devhandle, unsigned int *virt_irq_p,
53extern void sun4v_destroy_msi(unsigned int virt_irq); 53extern void sun4v_destroy_msi(unsigned int virt_irq);
54extern unsigned int sbus_build_irq(void *sbus, unsigned int ino); 54extern unsigned int sbus_build_irq(void *sbus, unsigned int ino);
55 55
56extern void fixup_irqs(void);
57
56static __inline__ void set_softint(unsigned long bits) 58static __inline__ void set_softint(unsigned long bits)
57{ 59{
58 __asm__ __volatile__("wr %0, 0x0, %%set_softint" 60 __asm__ __volatile__("wr %0, 0x0, %%set_softint"
diff --git a/include/asm-sparc64/ldc.h b/include/asm-sparc64/ldc.h
new file mode 100644
index 000000000000..bdb524a7b814
--- /dev/null
+++ b/include/asm-sparc64/ldc.h
@@ -0,0 +1,138 @@
1#ifndef _SPARC64_LDC_H
2#define _SPARC64_LDC_H
3
4#include <asm/hypervisor.h>
5
6extern int ldom_domaining_enabled;
7extern void ldom_set_var(const char *var, const char *value);
8extern void ldom_reboot(const char *boot_command);
9extern void ldom_power_off(void);
10
11/* The event handler will be evoked when link state changes
12 * or data becomes available on the receive side.
13 *
14 * For non-RAW links, if the LDC_EVENT_RESET event arrives the
15 * driver should reset all of it's internal state and reinvoke
16 * ldc_connect() to try and bring the link up again.
17 *
18 * For RAW links, ldc_connect() is not used. Instead the driver
19 * just waits for the LDC_EVENT_UP event.
20 */
21struct ldc_channel_config {
22 void (*event)(void *arg, int event);
23
24 u32 mtu;
25 unsigned int rx_irq;
26 unsigned int tx_irq;
27 u8 mode;
28#define LDC_MODE_RAW 0x00
29#define LDC_MODE_UNRELIABLE 0x01
30#define LDC_MODE_RESERVED 0x02
31#define LDC_MODE_STREAM 0x03
32
33 u8 debug;
34#define LDC_DEBUG_HS 0x01
35#define LDC_DEBUG_STATE 0x02
36#define LDC_DEBUG_RX 0x04
37#define LDC_DEBUG_TX 0x08
38#define LDC_DEBUG_DATA 0x10
39};
40
41#define LDC_EVENT_RESET 0x01
42#define LDC_EVENT_UP 0x02
43#define LDC_EVENT_DATA_READY 0x04
44
45#define LDC_STATE_INVALID 0x00
46#define LDC_STATE_INIT 0x01
47#define LDC_STATE_BOUND 0x02
48#define LDC_STATE_READY 0x03
49#define LDC_STATE_CONNECTED 0x04
50
51struct ldc_channel;
52
53/* Allocate state for a channel. */
54extern struct ldc_channel *ldc_alloc(unsigned long id,
55 const struct ldc_channel_config *cfgp,
56 void *event_arg);
57
58/* Shut down and free state for a channel. */
59extern void ldc_free(struct ldc_channel *lp);
60
61/* Register TX and RX queues of the link with the hypervisor. */
62extern int ldc_bind(struct ldc_channel *lp, const char *name);
63
64/* For non-RAW protocols we need to complete a handshake before
65 * communication can proceed. ldc_connect() does that, if the
66 * handshake completes successfully, an LDC_EVENT_UP event will
67 * be sent up to the driver.
68 */
69extern int ldc_connect(struct ldc_channel *lp);
70extern int ldc_disconnect(struct ldc_channel *lp);
71
72extern int ldc_state(struct ldc_channel *lp);
73
74/* Read and write operations. Only valid when the link is up. */
75extern int ldc_write(struct ldc_channel *lp, const void *buf,
76 unsigned int size);
77extern int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size);
78
79#define LDC_MAP_SHADOW 0x01
80#define LDC_MAP_DIRECT 0x02
81#define LDC_MAP_IO 0x04
82#define LDC_MAP_R 0x08
83#define LDC_MAP_W 0x10
84#define LDC_MAP_X 0x20
85#define LDC_MAP_RW (LDC_MAP_R | LDC_MAP_W)
86#define LDC_MAP_RWX (LDC_MAP_R | LDC_MAP_W | LDC_MAP_X)
87#define LDC_MAP_ALL 0x03f
88
89struct ldc_trans_cookie {
90 u64 cookie_addr;
91 u64 cookie_size;
92};
93
94struct scatterlist;
95extern int ldc_map_sg(struct ldc_channel *lp,
96 struct scatterlist *sg, int num_sg,
97 struct ldc_trans_cookie *cookies, int ncookies,
98 unsigned int map_perm);
99
100extern int ldc_map_single(struct ldc_channel *lp,
101 void *buf, unsigned int len,
102 struct ldc_trans_cookie *cookies, int ncookies,
103 unsigned int map_perm);
104
105extern void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
106 int ncookies);
107
108extern int ldc_copy(struct ldc_channel *lp, int copy_dir,
109 void *buf, unsigned int len, unsigned long offset,
110 struct ldc_trans_cookie *cookies, int ncookies);
111
112static inline int ldc_get_dring_entry(struct ldc_channel *lp,
113 void *buf, unsigned int len,
114 unsigned long offset,
115 struct ldc_trans_cookie *cookies,
116 int ncookies)
117{
118 return ldc_copy(lp, LDC_COPY_IN, buf, len, offset, cookies, ncookies);
119}
120
121static inline int ldc_put_dring_entry(struct ldc_channel *lp,
122 void *buf, unsigned int len,
123 unsigned long offset,
124 struct ldc_trans_cookie *cookies,
125 int ncookies)
126{
127 return ldc_copy(lp, LDC_COPY_OUT, buf, len, offset, cookies, ncookies);
128}
129
130extern void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
131 struct ldc_trans_cookie *cookies,
132 int *ncookies, unsigned int map_perm);
133
134extern void ldc_free_exp_dring(struct ldc_channel *lp, void *buf,
135 unsigned int len,
136 struct ldc_trans_cookie *cookies, int ncookies);
137
138#endif /* _SPARC64_LDC_H */
diff --git a/include/asm-sparc64/mdesc.h b/include/asm-sparc64/mdesc.h
index c6383982b53d..e97c43133752 100644
--- a/include/asm-sparc64/mdesc.h
+++ b/include/asm-sparc64/mdesc.h
@@ -2,38 +2,66 @@
2#define _SPARC64_MDESC_H 2#define _SPARC64_MDESC_H
3 3
4#include <linux/types.h> 4#include <linux/types.h>
5#include <linux/cpumask.h>
5#include <asm/prom.h> 6#include <asm/prom.h>
6 7
7struct mdesc_node; 8struct mdesc_handle;
8struct mdesc_arc { 9
9 const char *name; 10/* Machine description operations are to be surrounded by grab and
10 struct mdesc_node *arc; 11 * release calls. The mdesc_handle returned from the grab is
11}; 12 * the first argument to all of the operational calls that work
12 13 * on mdescs.
13struct mdesc_node { 14 */
14 const char *name; 15extern struct mdesc_handle *mdesc_grab(void);
15 u64 node; 16extern void mdesc_release(struct mdesc_handle *);
16 unsigned int unique_id; 17
17 unsigned int num_arcs; 18#define MDESC_NODE_NULL (~(u64)0)
18 unsigned int irqs[2]; 19
19 struct property *properties; 20extern u64 mdesc_node_by_name(struct mdesc_handle *handle,
20 struct mdesc_node *hash_next; 21 u64 from_node, const char *name);
21 struct mdesc_node *allnodes_next; 22#define mdesc_for_each_node_by_name(__hdl, __node, __name) \
22 struct mdesc_arc arcs[0]; 23 for (__node = mdesc_node_by_name(__hdl, MDESC_NODE_NULL, __name); \
23}; 24 (__node) != MDESC_NODE_NULL; \
24 25 __node = mdesc_node_by_name(__hdl, __node, __name))
25extern struct mdesc_node *md_find_node_by_name(struct mdesc_node *from, 26
26 const char *name); 27/* Access to property values returned from mdesc_get_property() are
27#define md_for_each_node_by_name(__mn, __name) \ 28 * only valid inside of a mdesc_grab()/mdesc_release() sequence.
28 for (__mn = md_find_node_by_name(NULL, __name); __mn; \ 29 * Once mdesc_release() is called, the memory backed up by these
29 __mn = md_find_node_by_name(__mn, __name)) 30 * pointers may reference freed up memory.
30 31 *
31extern struct property *md_find_property(const struct mdesc_node *mp, 32 * Therefore callers must make copies of any property values
32 const char *name, 33 * they need.
33 int *lenp); 34 *
34extern const void *md_get_property(const struct mdesc_node *mp, 35 * These same rules apply to mdesc_node_name().
35 const char *name, 36 */
36 int *lenp); 37extern const void *mdesc_get_property(struct mdesc_handle *handle,
38 u64 node, const char *name, int *lenp);
39extern const char *mdesc_node_name(struct mdesc_handle *hp, u64 node);
40
41/* MD arc iteration, the standard sequence is:
42 *
43 * unsigned long arc;
44 * mdesc_for_each_arc(arc, handle, node, MDESC_ARC_TYPE_{FWD,BACK}) {
45 * unsigned long target = mdesc_arc_target(handle, arc);
46 * ...
47 * }
48 */
49
50#define MDESC_ARC_TYPE_FWD "fwd"
51#define MDESC_ARC_TYPE_BACK "back"
52
53extern u64 mdesc_next_arc(struct mdesc_handle *handle, u64 from,
54 const char *arc_type);
55#define mdesc_for_each_arc(__arc, __hdl, __node, __type) \
56 for (__arc = mdesc_next_arc(__hdl, __node, __type); \
57 (__arc) != MDESC_NODE_NULL; \
58 __arc = mdesc_next_arc(__hdl, __arc, __type))
59
60extern u64 mdesc_arc_target(struct mdesc_handle *hp, u64 arc);
61
62extern void mdesc_update(void);
63
64extern void mdesc_fill_in_cpu_data(cpumask_t mask);
37 65
38extern void sun4v_mdesc_init(void); 66extern void sun4v_mdesc_init(void);
39 67
diff --git a/include/asm-sparc64/mmu_context.h b/include/asm-sparc64/mmu_context.h
index 8d129032013e..9fc225ed5500 100644
--- a/include/asm-sparc64/mmu_context.h
+++ b/include/asm-sparc64/mmu_context.h
@@ -76,6 +76,9 @@ static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, str
76 unsigned long ctx_valid, flags; 76 unsigned long ctx_valid, flags;
77 int cpu; 77 int cpu;
78 78
79 if (unlikely(mm == &init_mm))
80 return;
81
79 spin_lock_irqsave(&mm->context.lock, flags); 82 spin_lock_irqsave(&mm->context.lock, flags);
80 ctx_valid = CTX_VALID(mm->context); 83 ctx_valid = CTX_VALID(mm->context);
81 if (!ctx_valid) 84 if (!ctx_valid)
diff --git a/include/asm-sparc64/power.h b/include/asm-sparc64/power.h
new file mode 100644
index 000000000000..94495c1ac4f6
--- /dev/null
+++ b/include/asm-sparc64/power.h
@@ -0,0 +1,7 @@
1#ifndef _SPARC64_POWER_H
2#define _SPARC64_POWER_H
3
4extern void wake_up_powerd(void);
5extern int start_powerd(void);
6
7#endif /* !(_SPARC64_POWER_H) */
diff --git a/include/asm-sparc64/smp.h b/include/asm-sparc64/smp.h
index 4fb8c4bfb848..e8a96a31761b 100644
--- a/include/asm-sparc64/smp.h
+++ b/include/asm-sparc64/smp.h
@@ -29,9 +29,6 @@
29#include <asm/bitops.h> 29#include <asm/bitops.h>
30#include <asm/atomic.h> 30#include <asm/atomic.h>
31 31
32extern cpumask_t phys_cpu_present_map;
33#define cpu_possible_map phys_cpu_present_map
34
35extern cpumask_t cpu_sibling_map[NR_CPUS]; 32extern cpumask_t cpu_sibling_map[NR_CPUS];
36extern cpumask_t cpu_core_map[NR_CPUS]; 33extern cpumask_t cpu_core_map[NR_CPUS];
37extern int sparc64_multi_core; 34extern int sparc64_multi_core;
@@ -44,7 +41,12 @@ extern int hard_smp_processor_id(void);
44#define raw_smp_processor_id() (current_thread_info()->cpu) 41#define raw_smp_processor_id() (current_thread_info()->cpu)
45 42
46extern void smp_fill_in_sib_core_maps(void); 43extern void smp_fill_in_sib_core_maps(void);
47extern unsigned char boot_cpu_id; 44extern void cpu_play_dead(void);
45
46#ifdef CONFIG_HOTPLUG_CPU
47extern int __cpu_disable(void);
48extern void __cpu_die(unsigned int cpu);
49#endif
48 50
49#endif /* !(__ASSEMBLY__) */ 51#endif /* !(__ASSEMBLY__) */
50 52
@@ -52,7 +54,6 @@ extern unsigned char boot_cpu_id;
52 54
53#define hard_smp_processor_id() 0 55#define hard_smp_processor_id() 0
54#define smp_fill_in_sib_core_maps() do { } while (0) 56#define smp_fill_in_sib_core_maps() do { } while (0)
55#define boot_cpu_id (0)
56 57
57#endif /* !(CONFIG_SMP) */ 58#endif /* !(CONFIG_SMP) */
58 59
diff --git a/include/asm-sparc64/vio.h b/include/asm-sparc64/vio.h
new file mode 100644
index 000000000000..83c96422e9d6
--- /dev/null
+++ b/include/asm-sparc64/vio.h
@@ -0,0 +1,404 @@
1#ifndef _SPARC64_VIO_H
2#define _SPARC64_VIO_H
3
4#include <linux/kernel.h>
5#include <linux/device.h>
6#include <linux/mod_devicetable.h>
7#include <linux/timer.h>
8#include <linux/spinlock.h>
9#include <linux/completion.h>
10#include <linux/list.h>
11
12#include <asm/ldc.h>
13#include <asm/mdesc.h>
14
15struct vio_msg_tag {
16 u8 type;
17#define VIO_TYPE_CTRL 0x01
18#define VIO_TYPE_DATA 0x02
19#define VIO_TYPE_ERR 0x04
20
21 u8 stype;
22#define VIO_SUBTYPE_INFO 0x01
23#define VIO_SUBTYPE_ACK 0x02
24#define VIO_SUBTYPE_NACK 0x04
25
26 u16 stype_env;
27#define VIO_VER_INFO 0x0001
28#define VIO_ATTR_INFO 0x0002
29#define VIO_DRING_REG 0x0003
30#define VIO_DRING_UNREG 0x0004
31#define VIO_RDX 0x0005
32#define VIO_PKT_DATA 0x0040
33#define VIO_DESC_DATA 0x0041
34#define VIO_DRING_DATA 0x0042
35#define VNET_MCAST_INFO 0x0101
36
37 u32 sid;
38};
39
40struct vio_rdx {
41 struct vio_msg_tag tag;
42 u64 resv[6];
43};
44
45struct vio_ver_info {
46 struct vio_msg_tag tag;
47 u16 major;
48 u16 minor;
49 u8 dev_class;
50#define VDEV_NETWORK 0x01
51#define VDEV_NETWORK_SWITCH 0x02
52#define VDEV_DISK 0x03
53#define VDEV_DISK_SERVER 0x04
54
55 u8 resv1[3];
56 u64 resv2[5];
57};
58
59struct vio_dring_register {
60 struct vio_msg_tag tag;
61 u64 dring_ident;
62 u32 num_descr;
63 u32 descr_size;
64 u16 options;
65#define VIO_TX_DRING 0x0001
66#define VIO_RX_DRING 0x0002
67 u16 resv;
68 u32 num_cookies;
69 struct ldc_trans_cookie cookies[0];
70};
71
72struct vio_dring_unregister {
73 struct vio_msg_tag tag;
74 u64 dring_ident;
75 u64 resv[5];
76};
77
78/* Data transfer modes */
79#define VIO_PKT_MODE 0x01 /* Packet based transfer */
80#define VIO_DESC_MODE 0x02 /* In-band descriptors */
81#define VIO_DRING_MODE 0x03 /* Descriptor rings */
82
83struct vio_dring_data {
84 struct vio_msg_tag tag;
85 u64 seq;
86 u64 dring_ident;
87 u32 start_idx;
88 u32 end_idx;
89 u8 state;
90#define VIO_DRING_ACTIVE 0x01
91#define VIO_DRING_STOPPED 0x02
92
93 u8 __pad1;
94 u16 __pad2;
95 u32 __pad3;
96 u64 __par4[2];
97};
98
99struct vio_dring_hdr {
100 u8 state;
101#define VIO_DESC_FREE 0x01
102#define VIO_DESC_READY 0x02
103#define VIO_DESC_ACCEPTED 0x03
104#define VIO_DESC_DONE 0x04
105 u8 ack;
106#define VIO_ACK_ENABLE 0x01
107#define VIO_ACK_DISABLE 0x00
108
109 u16 __pad1;
110 u32 __pad2;
111};
112
113/* VIO disk specific structures and defines */
114struct vio_disk_attr_info {
115 struct vio_msg_tag tag;
116 u8 xfer_mode;
117 u8 vdisk_type;
118#define VD_DISK_TYPE_SLICE 0x01 /* Slice in block device */
119#define VD_DISK_TYPE_DISK 0x02 /* Entire block device */
120 u16 resv1;
121 u32 vdisk_block_size;
122 u64 operations;
123 u64 vdisk_size;
124 u64 max_xfer_size;
125 u64 resv2[2];
126};
127
128struct vio_disk_desc {
129 struct vio_dring_hdr hdr;
130 u64 req_id;
131 u8 operation;
132#define VD_OP_BREAD 0x01 /* Block read */
133#define VD_OP_BWRITE 0x02 /* Block write */
134#define VD_OP_FLUSH 0x03 /* Flush disk contents */
135#define VD_OP_GET_WCE 0x04 /* Get write-cache status */
136#define VD_OP_SET_WCE 0x05 /* Enable/disable write-cache */
137#define VD_OP_GET_VTOC 0x06 /* Get VTOC */
138#define VD_OP_SET_VTOC 0x07 /* Set VTOC */
139#define VD_OP_GET_DISKGEOM 0x08 /* Get disk geometry */
140#define VD_OP_SET_DISKGEOM 0x09 /* Set disk geometry */
141#define VD_OP_SCSICMD 0x0a /* SCSI control command */
142#define VD_OP_GET_DEVID 0x0b /* Get device ID */
143#define VD_OP_GET_EFI 0x0c /* Get EFI */
144#define VD_OP_SET_EFI 0x0d /* Set EFI */
145 u8 slice;
146 u16 resv1;
147 u32 status;
148 u64 offset;
149 u64 size;
150 u32 ncookies;
151 u32 resv2;
152 struct ldc_trans_cookie cookies[0];
153};
154
155#define VIO_DISK_VNAME_LEN 8
156#define VIO_DISK_ALABEL_LEN 128
157#define VIO_DISK_NUM_PART 8
158
159struct vio_disk_vtoc {
160 u8 volume_name[VIO_DISK_VNAME_LEN];
161 u16 sector_size;
162 u16 num_partitions;
163 u8 ascii_label[VIO_DISK_ALABEL_LEN];
164 struct {
165 u16 id;
166 u16 perm_flags;
167 u32 resv;
168 u64 start_block;
169 u64 num_blocks;
170 } partitions[VIO_DISK_NUM_PART];
171};
172
173struct vio_disk_geom {
174 u16 num_cyl; /* Num data cylinders */
175 u16 alt_cyl; /* Num alternate cylinders */
176 u16 beg_cyl; /* Cyl off of fixed head area */
177 u16 num_hd; /* Num heads */
178 u16 num_sec; /* Num sectors */
179 u16 ifact; /* Interleave factor */
180 u16 apc; /* Alts per cylinder (SCSI) */
181 u16 rpm; /* Revolutions per minute */
182 u16 phy_cyl; /* Num physical cylinders */
183 u16 wr_skip; /* Num sects to skip, writes */
184 u16 rd_skip; /* Num sects to skip, writes */
185};
186
187struct vio_disk_devid {
188 u16 resv;
189 u16 type;
190 u32 len;
191 char id[0];
192};
193
194struct vio_disk_efi {
195 u64 lba;
196 u64 len;
197 char data[0];
198};
199
200/* VIO net specific structures and defines */
201struct vio_net_attr_info {
202 struct vio_msg_tag tag;
203 u8 xfer_mode;
204 u8 addr_type;
205#define VNET_ADDR_ETHERMAC 0x01
206 u16 ack_freq;
207 u32 resv1;
208 u64 addr;
209 u64 mtu;
210 u64 resv2[3];
211};
212
213#define VNET_NUM_MCAST 7
214
215struct vio_net_mcast_info {
216 struct vio_msg_tag tag;
217 u8 set;
218 u8 count;
219 u8 mcast_addr[VNET_NUM_MCAST * 6];
220 u32 resv;
221};
222
223struct vio_net_desc {
224 struct vio_dring_hdr hdr;
225 u32 size;
226 u32 ncookies;
227 struct ldc_trans_cookie cookies[0];
228};
229
230#define VIO_MAX_RING_COOKIES 24
231
232struct vio_dring_state {
233 u64 ident;
234 void *base;
235 u64 snd_nxt;
236 u64 rcv_nxt;
237 u32 entry_size;
238 u32 num_entries;
239 u32 prod;
240 u32 cons;
241 u32 pending;
242 int ncookies;
243 struct ldc_trans_cookie cookies[VIO_MAX_RING_COOKIES];
244};
245
246static inline void *vio_dring_cur(struct vio_dring_state *dr)
247{
248 return dr->base + (dr->entry_size * dr->prod);
249}
250
251static inline void *vio_dring_entry(struct vio_dring_state *dr,
252 unsigned int index)
253{
254 return dr->base + (dr->entry_size * index);
255}
256
257static inline u32 vio_dring_avail(struct vio_dring_state *dr,
258 unsigned int ring_size)
259{
260 /* Ensure build-time power-of-2. */
261 BUILD_BUG_ON(ring_size & (ring_size - 1));
262
263 return (dr->pending -
264 ((dr->prod - dr->cons) & (ring_size - 1)));
265}
266
267#define VIO_MAX_TYPE_LEN 64
268#define VIO_MAX_COMPAT_LEN 64
269
270struct vio_dev {
271 u64 mp;
272 struct device_node *dp;
273
274 char type[VIO_MAX_TYPE_LEN];
275 char compat[VIO_MAX_COMPAT_LEN];
276 int compat_len;
277
278 unsigned long channel_id;
279
280 unsigned int tx_irq;
281 unsigned int rx_irq;
282
283 struct device dev;
284};
285
286struct vio_driver {
287 struct list_head node;
288 const struct vio_device_id *id_table;
289 int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
290 int (*remove)(struct vio_dev *dev);
291 void (*shutdown)(struct vio_dev *dev);
292 unsigned long driver_data;
293 struct device_driver driver;
294};
295
296struct vio_version {
297 u16 major;
298 u16 minor;
299};
300
301struct vio_driver_state;
302struct vio_driver_ops {
303 int (*send_attr)(struct vio_driver_state *vio);
304 int (*handle_attr)(struct vio_driver_state *vio, void *pkt);
305 void (*handshake_complete)(struct vio_driver_state *vio);
306};
307
308struct vio_completion {
309 struct completion com;
310 int err;
311 int waiting_for;
312};
313
314struct vio_driver_state {
315 /* Protects VIO handshake and, optionally, driver private state. */
316 spinlock_t lock;
317
318 struct ldc_channel *lp;
319
320 u32 _peer_sid;
321 u32 _local_sid;
322 struct vio_dring_state drings[2];
323#define VIO_DRIVER_TX_RING 0
324#define VIO_DRIVER_RX_RING 1
325
326 u8 hs_state;
327#define VIO_HS_INVALID 0x00
328#define VIO_HS_GOTVERS 0x01
329#define VIO_HS_GOT_ATTR 0x04
330#define VIO_HS_SENT_DREG 0x08
331#define VIO_HS_SENT_RDX 0x10
332#define VIO_HS_GOT_RDX_ACK 0x20
333#define VIO_HS_GOT_RDX 0x40
334#define VIO_HS_SENT_RDX_ACK 0x80
335#define VIO_HS_COMPLETE (VIO_HS_GOT_RDX_ACK | VIO_HS_SENT_RDX_ACK)
336
337 u8 dev_class;
338
339 u8 dr_state;
340#define VIO_DR_STATE_TXREG 0x01
341#define VIO_DR_STATE_RXREG 0x02
342#define VIO_DR_STATE_TXREQ 0x10
343#define VIO_DR_STATE_RXREQ 0x20
344
345 u8 debug;
346#define VIO_DEBUG_HS 0x01
347#define VIO_DEBUG_DATA 0x02
348
349 void *desc_buf;
350 unsigned int desc_buf_len;
351
352 struct vio_completion *cmp;
353
354 struct vio_dev *vdev;
355
356 struct timer_list timer;
357
358 struct vio_version ver;
359
360 struct vio_version *ver_table;
361 int ver_table_entries;
362
363 char *name;
364
365 struct vio_driver_ops *ops;
366};
367
368#define viodbg(TYPE, f, a...) \
369do { if (vio->debug & VIO_DEBUG_##TYPE) \
370 printk(KERN_INFO "vio: ID[%lu] " f, \
371 vio->vdev->channel_id, ## a); \
372} while (0)
373
374extern int vio_register_driver(struct vio_driver *drv);
375extern void vio_unregister_driver(struct vio_driver *drv);
376
377static inline struct vio_driver *to_vio_driver(struct device_driver *drv)
378{
379 return container_of(drv, struct vio_driver, driver);
380}
381
382static inline struct vio_dev *to_vio_dev(struct device *dev)
383{
384 return container_of(dev, struct vio_dev, dev);
385}
386
387extern int vio_ldc_send(struct vio_driver_state *vio, void *data, int len);
388extern void vio_link_state_change(struct vio_driver_state *vio, int event);
389extern void vio_conn_reset(struct vio_driver_state *vio);
390extern int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt);
391extern int vio_validate_sid(struct vio_driver_state *vio,
392 struct vio_msg_tag *tp);
393extern u32 vio_send_sid(struct vio_driver_state *vio);
394extern int vio_ldc_alloc(struct vio_driver_state *vio,
395 struct ldc_channel_config *base_cfg, void *event_arg);
396extern void vio_ldc_free(struct vio_driver_state *vio);
397extern int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
398 u8 dev_class, struct vio_version *ver_table,
399 int ver_table_size, struct vio_driver_ops *ops,
400 char *name);
401
402extern void vio_port_up(struct vio_driver_state *vio);
403
404#endif /* _SPARC64_VIO_H */