aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorDavid S. Miller <davem@sunset.davemloft.net>2007-07-10 01:22:44 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-07-16 07:03:18 -0400
commite53e97ce3c7119199d2788d8fd1618efa9c2d1eb (patch)
tree799f1b7960fcaf9a02800419b038d42eb031f776 /arch
parent8f41958bdd577731f7411c9605cfaa9db6766809 (diff)
[SPARC64]: Add LDOM virtual channel driver and VIO device layer.
Virtual devices on Sun Logical Domains are built on top of a virtual channel framework. This, with help of hypervisor interfaces, provides a link layer protocol with basic handshaking over which virtual device clients and servers communicate. Built on top of this is a VIO device protocol which has it's own handshaking and message types. At this layer attributes are exchanged (disk size, network device addresses, etc.) descriptor rings are registered, and data transfers are triggers and replied to. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/Kconfig6
-rw-r--r--arch/sparc64/kernel/Makefile1
-rw-r--r--arch/sparc64/kernel/ldc.c2338
-rw-r--r--arch/sparc64/kernel/vio.c347
-rw-r--r--arch/sparc64/kernel/viohs.c809
5 files changed, 3501 insertions, 0 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 6566d13db04f..af59daa81058 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -305,6 +305,12 @@ config SUN_IO
305 bool 305 bool
306 default y 306 default y
307 307
308config SUN_LDOMS
309 bool "Sun Logical Domains support"
310 help
311 Say Y here is you want to support virtual devices via
312 Logical Domains.
313
308config PCI 314config PCI
309 bool "PCI support" 315 bool "PCI support"
310 select ARCH_SUPPORTS_MSI 316 select ARCH_SUPPORTS_MSI
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index f964bf28d21a..719ab23b1938 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_MODULES) += module.o
26obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o 26obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
27obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o 27obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
28obj-$(CONFIG_KPROBES) += kprobes.o 28obj-$(CONFIG_KPROBES) += kprobes.o
29obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o
29obj-$(CONFIG_AUDIT) += audit.o 30obj-$(CONFIG_AUDIT) += audit.o
30obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o 31obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o
31obj-y += $(obj-yy) 32obj-y += $(obj-yy)
diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c
new file mode 100644
index 000000000000..0fa04d6f978d
--- /dev/null
+++ b/arch/sparc64/kernel/ldc.c
@@ -0,0 +1,2338 @@
1/* ldc.c: Logical Domain Channel link-layer protocol driver.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/delay.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/scatterlist.h>
14#include <linux/interrupt.h>
15#include <linux/list.h>
16#include <linux/init.h>
17
18#include <asm/hypervisor.h>
19#include <asm/iommu.h>
20#include <asm/page.h>
21#include <asm/ldc.h>
22#include <asm/mdesc.h>
23
24#define DRV_MODULE_NAME "ldc"
25#define PFX DRV_MODULE_NAME ": "
26#define DRV_MODULE_VERSION "1.0"
27#define DRV_MODULE_RELDATE "June 25, 2007"
28
29static char version[] __devinitdata =
30 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31#define LDC_PACKET_SIZE 64
32
33/* Packet header layout for unreliable and reliable mode frames.
34 * When in RAW mode, packets are simply straight 64-byte payloads
35 * with no headers.
36 */
37struct ldc_packet {
38 u8 type;
39#define LDC_CTRL 0x01
40#define LDC_DATA 0x02
41#define LDC_ERR 0x10
42
43 u8 stype;
44#define LDC_INFO 0x01
45#define LDC_ACK 0x02
46#define LDC_NACK 0x04
47
48 u8 ctrl;
49#define LDC_VERS 0x01 /* Link Version */
50#define LDC_RTS 0x02 /* Request To Send */
51#define LDC_RTR 0x03 /* Ready To Receive */
52#define LDC_RDX 0x04 /* Ready for Data eXchange */
53#define LDC_CTRL_MSK 0x0f
54
55 u8 env;
56#define LDC_LEN 0x3f
57#define LDC_FRAG_MASK 0xc0
58#define LDC_START 0x40
59#define LDC_STOP 0x80
60
61 u32 seqid;
62
63 union {
64 u8 u_data[LDC_PACKET_SIZE - 8];
65 struct {
66 u32 pad;
67 u32 ackid;
68 u8 r_data[LDC_PACKET_SIZE - 8 - 8];
69 } r;
70 } u;
71};
72
73struct ldc_version {
74 u16 major;
75 u16 minor;
76};
77
78/* Ordered from largest major to lowest. */
79static struct ldc_version ver_arr[] = {
80 { .major = 1, .minor = 0 },
81};
82
83#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
84#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
85
86struct ldc_channel;
87
88struct ldc_mode_ops {
89 int (*write)(struct ldc_channel *, const void *, unsigned int);
90 int (*read)(struct ldc_channel *, void *, unsigned int);
91};
92
93static const struct ldc_mode_ops raw_ops;
94static const struct ldc_mode_ops nonraw_ops;
95static const struct ldc_mode_ops stream_ops;
96
97int ldom_domaining_enabled;
98
99struct ldc_iommu {
100 /* Protects arena alloc/free. */
101 spinlock_t lock;
102 struct iommu_arena arena;
103 struct ldc_mtable_entry *page_table;
104};
105
106struct ldc_channel {
107 /* Protects all operations that depend upon channel state. */
108 spinlock_t lock;
109
110 unsigned long id;
111
112 u8 *mssbuf;
113 u32 mssbuf_len;
114 u32 mssbuf_off;
115
116 struct ldc_packet *tx_base;
117 unsigned long tx_head;
118 unsigned long tx_tail;
119 unsigned long tx_num_entries;
120 unsigned long tx_ra;
121
122 unsigned long tx_acked;
123
124 struct ldc_packet *rx_base;
125 unsigned long rx_head;
126 unsigned long rx_tail;
127 unsigned long rx_num_entries;
128 unsigned long rx_ra;
129
130 u32 rcv_nxt;
131 u32 snd_nxt;
132
133 unsigned long chan_state;
134
135 struct ldc_channel_config cfg;
136 void *event_arg;
137
138 const struct ldc_mode_ops *mops;
139
140 struct ldc_iommu iommu;
141
142 struct ldc_version ver;
143
144 u8 hs_state;
145#define LDC_HS_CLOSED 0x00
146#define LDC_HS_OPEN 0x01
147#define LDC_HS_GOTVERS 0x02
148#define LDC_HS_SENTRTR 0x03
149#define LDC_HS_GOTRTR 0x04
150#define LDC_HS_COMPLETE 0x10
151
152 u8 flags;
153#define LDC_FLAG_ALLOCED_QUEUES 0x01
154#define LDC_FLAG_REGISTERED_QUEUES 0x02
155#define LDC_FLAG_REGISTERED_IRQS 0x04
156#define LDC_FLAG_RESET 0x10
157
158 u8 mss;
159 u8 state;
160
161 struct hlist_head mh_list;
162
163 struct hlist_node list;
164};
165
166#define ldcdbg(TYPE, f, a...) \
167do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
168 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
169} while (0)
170
171static const char *state_to_str(u8 state)
172{
173 switch (state) {
174 case LDC_STATE_INVALID:
175 return "INVALID";
176 case LDC_STATE_INIT:
177 return "INIT";
178 case LDC_STATE_BOUND:
179 return "BOUND";
180 case LDC_STATE_READY:
181 return "READY";
182 case LDC_STATE_CONNECTED:
183 return "CONNECTED";
184 default:
185 return "<UNKNOWN>";
186 }
187}
188
189static void ldc_set_state(struct ldc_channel *lp, u8 state)
190{
191 ldcdbg(STATE, "STATE (%s) --> (%s)\n",
192 state_to_str(lp->state),
193 state_to_str(state));
194
195 lp->state = state;
196}
197
198static unsigned long __advance(unsigned long off, unsigned long num_entries)
199{
200 off += LDC_PACKET_SIZE;
201 if (off == (num_entries * LDC_PACKET_SIZE))
202 off = 0;
203
204 return off;
205}
206
207static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
208{
209 return __advance(off, lp->rx_num_entries);
210}
211
212static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
213{
214 return __advance(off, lp->tx_num_entries);
215}
216
217static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
218 unsigned long *new_tail)
219{
220 struct ldc_packet *p;
221 unsigned long t;
222
223 t = tx_advance(lp, lp->tx_tail);
224 if (t == lp->tx_head)
225 return NULL;
226
227 *new_tail = t;
228
229 p = lp->tx_base;
230 return p + (lp->tx_tail / LDC_PACKET_SIZE);
231}
232
233/* When we are in reliable or stream mode, have to track the next packet
234 * we haven't gotten an ACK for in the TX queue using tx_acked. We have
235 * to be careful not to stomp over the queue past that point. During
236 * the handshake, we don't have TX data packets pending in the queue
237 * and that's why handshake_get_tx_packet() need not be mindful of
238 * lp->tx_acked.
239 */
240static unsigned long head_for_data(struct ldc_channel *lp)
241{
242 if (lp->cfg.mode == LDC_MODE_RELIABLE ||
243 lp->cfg.mode == LDC_MODE_STREAM)
244 return lp->tx_acked;
245 return lp->tx_head;
246}
247
248static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
249{
250 unsigned long limit, tail, new_tail, diff;
251 unsigned int mss;
252
253 limit = head_for_data(lp);
254 tail = lp->tx_tail;
255 new_tail = tx_advance(lp, tail);
256 if (new_tail == limit)
257 return 0;
258
259 if (limit > new_tail)
260 diff = limit - new_tail;
261 else
262 diff = (limit +
263 ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
264 diff /= LDC_PACKET_SIZE;
265 mss = lp->mss;
266
267 if (diff * mss < size)
268 return 0;
269
270 return 1;
271}
272
273static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
274 unsigned long *new_tail)
275{
276 struct ldc_packet *p;
277 unsigned long h, t;
278
279 h = head_for_data(lp);
280 t = tx_advance(lp, lp->tx_tail);
281 if (t == h)
282 return NULL;
283
284 *new_tail = t;
285
286 p = lp->tx_base;
287 return p + (lp->tx_tail / LDC_PACKET_SIZE);
288}
289
290static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
291{
292 unsigned long orig_tail = lp->tx_tail;
293 int limit = 1000;
294
295 lp->tx_tail = tail;
296 while (limit-- > 0) {
297 unsigned long err;
298
299 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
300 if (!err)
301 return 0;
302
303 if (err != HV_EWOULDBLOCK) {
304 lp->tx_tail = orig_tail;
305 return -EINVAL;
306 }
307 udelay(1);
308 }
309
310 lp->tx_tail = orig_tail;
311 return -EBUSY;
312}
313
314/* This just updates the head value in the hypervisor using
315 * a polling loop with a timeout. The caller takes care of
316 * upating software state representing the head change, if any.
317 */
318static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
319{
320 int limit = 1000;
321
322 while (limit-- > 0) {
323 unsigned long err;
324
325 err = sun4v_ldc_rx_set_qhead(lp->id, head);
326 if (!err)
327 return 0;
328
329 if (err != HV_EWOULDBLOCK)
330 return -EINVAL;
331
332 udelay(1);
333 }
334
335 return -EBUSY;
336}
337
338static int send_tx_packet(struct ldc_channel *lp,
339 struct ldc_packet *p,
340 unsigned long new_tail)
341{
342 BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
343
344 return set_tx_tail(lp, new_tail);
345}
346
347static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
348 u8 stype, u8 ctrl,
349 void *data, int dlen,
350 unsigned long *new_tail)
351{
352 struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
353
354 if (p) {
355 memset(p, 0, sizeof(*p));
356 p->type = LDC_CTRL;
357 p->stype = stype;
358 p->ctrl = ctrl;
359 if (data)
360 memcpy(p->u.u_data, data, dlen);
361 }
362 return p;
363}
364
365static int start_handshake(struct ldc_channel *lp)
366{
367 struct ldc_packet *p;
368 struct ldc_version *ver;
369 unsigned long new_tail;
370
371 ver = &ver_arr[0];
372
373 ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
374 ver->major, ver->minor);
375
376 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
377 ver, sizeof(*ver), &new_tail);
378 if (p) {
379 int err = send_tx_packet(lp, p, new_tail);
380 if (!err)
381 lp->flags &= ~LDC_FLAG_RESET;
382 return err;
383 }
384 return -EBUSY;
385}
386
387static int send_version_nack(struct ldc_channel *lp,
388 u16 major, u16 minor)
389{
390 struct ldc_packet *p;
391 struct ldc_version ver;
392 unsigned long new_tail;
393
394 ver.major = major;
395 ver.minor = minor;
396
397 p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
398 &ver, sizeof(ver), &new_tail);
399 if (p) {
400 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
401 ver.major, ver.minor);
402
403 return send_tx_packet(lp, p, new_tail);
404 }
405 return -EBUSY;
406}
407
408static int send_version_ack(struct ldc_channel *lp,
409 struct ldc_version *vp)
410{
411 struct ldc_packet *p;
412 unsigned long new_tail;
413
414 p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
415 vp, sizeof(*vp), &new_tail);
416 if (p) {
417 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
418 vp->major, vp->minor);
419
420 return send_tx_packet(lp, p, new_tail);
421 }
422 return -EBUSY;
423}
424
425static int send_rts(struct ldc_channel *lp)
426{
427 struct ldc_packet *p;
428 unsigned long new_tail;
429
430 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
431 &new_tail);
432 if (p) {
433 p->env = lp->cfg.mode;
434 p->seqid = 0;
435 lp->rcv_nxt = 0;
436
437 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
438 p->env, p->seqid);
439
440 return send_tx_packet(lp, p, new_tail);
441 }
442 return -EBUSY;
443}
444
445static int send_rtr(struct ldc_channel *lp)
446{
447 struct ldc_packet *p;
448 unsigned long new_tail;
449
450 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
451 &new_tail);
452 if (p) {
453 p->env = lp->cfg.mode;
454 p->seqid = 0;
455
456 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
457 p->env, p->seqid);
458
459 return send_tx_packet(lp, p, new_tail);
460 }
461 return -EBUSY;
462}
463
464static int send_rdx(struct ldc_channel *lp)
465{
466 struct ldc_packet *p;
467 unsigned long new_tail;
468
469 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
470 &new_tail);
471 if (p) {
472 p->env = 0;
473 p->seqid = ++lp->snd_nxt;
474 p->u.r.ackid = lp->rcv_nxt;
475
476 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
477 p->env, p->seqid, p->u.r.ackid);
478
479 return send_tx_packet(lp, p, new_tail);
480 }
481 return -EBUSY;
482}
483
484static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
485{
486 struct ldc_packet *p;
487 unsigned long new_tail;
488 int err;
489
490 p = data_get_tx_packet(lp, &new_tail);
491 if (!p)
492 return -EBUSY;
493 memset(p, 0, sizeof(*p));
494 p->type = data_pkt->type;
495 p->stype = LDC_NACK;
496 p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
497 p->seqid = lp->snd_nxt;
498 p->u.r.ackid = lp->rcv_nxt;
499
500 ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
501 p->type, p->ctrl, p->seqid, p->u.r.ackid);
502
503 err = send_tx_packet(lp, p, new_tail);
504 if (!err)
505 lp->snd_nxt++;
506
507 return err;
508}
509
510static int ldc_abort(struct ldc_channel *lp)
511{
512 unsigned long hv_err;
513
514 ldcdbg(STATE, "ABORT\n");
515
516 /* We report but do not act upon the hypervisor errors because
517 * there really isn't much we can do if they fail at this point.
518 */
519 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
520 if (hv_err)
521 printk(KERN_ERR PFX "ldc_abort: "
522 "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
523 lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
524
525 hv_err = sun4v_ldc_tx_get_state(lp->id,
526 &lp->tx_head,
527 &lp->tx_tail,
528 &lp->chan_state);
529 if (hv_err)
530 printk(KERN_ERR PFX "ldc_abort: "
531 "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
532 lp->id, hv_err);
533
534 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
535 if (hv_err)
536 printk(KERN_ERR PFX "ldc_abort: "
537 "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
538 lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
539
540 /* Refetch the RX queue state as well, because we could be invoked
541 * here in the queue processing context.
542 */
543 hv_err = sun4v_ldc_rx_get_state(lp->id,
544 &lp->rx_head,
545 &lp->rx_tail,
546 &lp->chan_state);
547 if (hv_err)
548 printk(KERN_ERR PFX "ldc_abort: "
549 "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
550 lp->id, hv_err);
551
552 return -ECONNRESET;
553}
554
555static struct ldc_version *find_by_major(u16 major)
556{
557 struct ldc_version *ret = NULL;
558 int i;
559
560 for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
561 struct ldc_version *v = &ver_arr[i];
562 if (v->major <= major) {
563 ret = v;
564 break;
565 }
566 }
567 return ret;
568}
569
570static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
571{
572 struct ldc_version *vap;
573 int err;
574
575 ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
576 vp->major, vp->minor);
577
578 if (lp->hs_state == LDC_HS_GOTVERS) {
579 lp->hs_state = LDC_HS_OPEN;
580 memset(&lp->ver, 0, sizeof(lp->ver));
581 }
582
583 vap = find_by_major(vp->major);
584 if (!vap) {
585 err = send_version_nack(lp, 0, 0);
586 } else if (vap->major != vp->major) {
587 err = send_version_nack(lp, vap->major, vap->minor);
588 } else {
589 struct ldc_version ver = *vp;
590 if (ver.minor > vap->minor)
591 ver.minor = vap->minor;
592 err = send_version_ack(lp, &ver);
593 if (!err) {
594 lp->ver = ver;
595 lp->hs_state = LDC_HS_GOTVERS;
596 }
597 }
598 if (err)
599 return ldc_abort(lp);
600
601 return 0;
602}
603
604static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
605{
606 ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
607 vp->major, vp->minor);
608
609 if (lp->hs_state == LDC_HS_GOTVERS) {
610 if (lp->ver.major != vp->major ||
611 lp->ver.minor != vp->minor)
612 return ldc_abort(lp);
613 } else {
614 lp->ver = *vp;
615 lp->hs_state = LDC_HS_GOTVERS;
616 }
617 if (send_rts(lp))
618 return ldc_abort(lp);
619 return 0;
620}
621
622static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
623{
624 struct ldc_version *vap;
625
626 if ((vp->major == 0 && vp->minor == 0) ||
627 !(vap = find_by_major(vp->major))) {
628 return ldc_abort(lp);
629 } else {
630 struct ldc_packet *p;
631 unsigned long new_tail;
632
633 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
634 vap, sizeof(*vap),
635 &new_tail);
636 if (p)
637 return send_tx_packet(lp, p, new_tail);
638 else
639 return ldc_abort(lp);
640 }
641}
642
643static int process_version(struct ldc_channel *lp,
644 struct ldc_packet *p)
645{
646 struct ldc_version *vp;
647
648 vp = (struct ldc_version *) p->u.u_data;
649
650 switch (p->stype) {
651 case LDC_INFO:
652 return process_ver_info(lp, vp);
653
654 case LDC_ACK:
655 return process_ver_ack(lp, vp);
656
657 case LDC_NACK:
658 return process_ver_nack(lp, vp);
659
660 default:
661 return ldc_abort(lp);
662 }
663}
664
665static int process_rts(struct ldc_channel *lp,
666 struct ldc_packet *p)
667{
668 ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
669 p->stype, p->seqid, p->env);
670
671 if (p->stype != LDC_INFO ||
672 lp->hs_state != LDC_HS_GOTVERS ||
673 p->env != lp->cfg.mode)
674 return ldc_abort(lp);
675
676 lp->snd_nxt = p->seqid;
677 lp->rcv_nxt = p->seqid;
678 lp->hs_state = LDC_HS_SENTRTR;
679 if (send_rtr(lp))
680 return ldc_abort(lp);
681
682 return 0;
683}
684
685static int process_rtr(struct ldc_channel *lp,
686 struct ldc_packet *p)
687{
688 ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
689 p->stype, p->seqid, p->env);
690
691 if (p->stype != LDC_INFO ||
692 p->env != lp->cfg.mode)
693 return ldc_abort(lp);
694
695 lp->snd_nxt = p->seqid;
696 lp->hs_state = LDC_HS_COMPLETE;
697 ldc_set_state(lp, LDC_STATE_CONNECTED);
698 send_rdx(lp);
699
700 return LDC_EVENT_UP;
701}
702
703static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
704{
705 return lp->rcv_nxt + 1 == seqid;
706}
707
708static int process_rdx(struct ldc_channel *lp,
709 struct ldc_packet *p)
710{
711 ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
712 p->stype, p->seqid, p->env, p->u.r.ackid);
713
714 if (p->stype != LDC_INFO ||
715 !(rx_seq_ok(lp, p->seqid)))
716 return ldc_abort(lp);
717
718 lp->rcv_nxt = p->seqid;
719
720 lp->hs_state = LDC_HS_COMPLETE;
721 ldc_set_state(lp, LDC_STATE_CONNECTED);
722
723 return LDC_EVENT_UP;
724}
725
726static int process_control_frame(struct ldc_channel *lp,
727 struct ldc_packet *p)
728{
729 switch (p->ctrl) {
730 case LDC_VERS:
731 return process_version(lp, p);
732
733 case LDC_RTS:
734 return process_rts(lp, p);
735
736 case LDC_RTR:
737 return process_rtr(lp, p);
738
739 case LDC_RDX:
740 return process_rdx(lp, p);
741
742 default:
743 return ldc_abort(lp);
744 }
745}
746
747static int process_error_frame(struct ldc_channel *lp,
748 struct ldc_packet *p)
749{
750 return ldc_abort(lp);
751}
752
753static int process_data_ack(struct ldc_channel *lp,
754 struct ldc_packet *ack)
755{
756 unsigned long head = lp->tx_acked;
757 u32 ackid = ack->u.r.ackid;
758
759 while (1) {
760 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
761
762 head = tx_advance(lp, head);
763
764 if (p->seqid == ackid) {
765 lp->tx_acked = head;
766 return 0;
767 }
768 if (head == lp->tx_head)
769 return ldc_abort(lp);
770 }
771
772 return 0;
773}
774
775static void send_events(struct ldc_channel *lp, unsigned int event_mask)
776{
777 if (event_mask & LDC_EVENT_RESET)
778 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
779 if (event_mask & LDC_EVENT_UP)
780 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
781 if (event_mask & LDC_EVENT_DATA_READY)
782 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
783}
784
785static irqreturn_t ldc_rx(int irq, void *dev_id)
786{
787 struct ldc_channel *lp = dev_id;
788 unsigned long orig_state, hv_err, flags;
789 unsigned int event_mask;
790
791 spin_lock_irqsave(&lp->lock, flags);
792
793 orig_state = lp->chan_state;
794 hv_err = sun4v_ldc_rx_get_state(lp->id,
795 &lp->rx_head,
796 &lp->rx_tail,
797 &lp->chan_state);
798
799 ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
800 orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
801
802 event_mask = 0;
803
804 if (lp->cfg.mode == LDC_MODE_RAW &&
805 lp->chan_state == LDC_CHANNEL_UP) {
806 lp->hs_state = LDC_HS_COMPLETE;
807 ldc_set_state(lp, LDC_STATE_CONNECTED);
808
809 event_mask |= LDC_EVENT_UP;
810
811 orig_state = lp->chan_state;
812 }
813
814 /* If we are in reset state, flush the RX queue and ignore
815 * everything.
816 */
817 if (lp->flags & LDC_FLAG_RESET) {
818 (void) __set_rx_head(lp, lp->rx_tail);
819 goto out;
820 }
821
822 /* Once we finish the handshake, we let the ldc_read()
823 * paths do all of the control frame and state management.
824 * Just trigger the callback.
825 */
826 if (lp->hs_state == LDC_HS_COMPLETE) {
827handshake_complete:
828 if (lp->chan_state != orig_state) {
829 unsigned int event = LDC_EVENT_RESET;
830
831 if (lp->chan_state == LDC_CHANNEL_UP)
832 event = LDC_EVENT_UP;
833
834 event_mask |= event;
835 }
836 if (lp->rx_head != lp->rx_tail)
837 event_mask |= LDC_EVENT_DATA_READY;
838
839 goto out;
840 }
841
842 if (lp->chan_state != orig_state)
843 goto out;
844
845 while (lp->rx_head != lp->rx_tail) {
846 struct ldc_packet *p;
847 unsigned long new;
848 int err;
849
850 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
851
852 switch (p->type) {
853 case LDC_CTRL:
854 err = process_control_frame(lp, p);
855 if (err > 0)
856 event_mask |= err;
857 break;
858
859 case LDC_DATA:
860 event_mask |= LDC_EVENT_DATA_READY;
861 err = 0;
862 break;
863
864 case LDC_ERR:
865 err = process_error_frame(lp, p);
866 break;
867
868 default:
869 err = ldc_abort(lp);
870 break;
871 }
872
873 if (err < 0)
874 break;
875
876 new = lp->rx_head;
877 new += LDC_PACKET_SIZE;
878 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
879 new = 0;
880 lp->rx_head = new;
881
882 err = __set_rx_head(lp, new);
883 if (err < 0) {
884 (void) ldc_abort(lp);
885 break;
886 }
887 if (lp->hs_state == LDC_HS_COMPLETE)
888 goto handshake_complete;
889 }
890
891out:
892 spin_unlock_irqrestore(&lp->lock, flags);
893
894 send_events(lp, event_mask);
895
896 return IRQ_HANDLED;
897}
898
899static irqreturn_t ldc_tx(int irq, void *dev_id)
900{
901 struct ldc_channel *lp = dev_id;
902 unsigned long flags, hv_err, orig_state;
903 unsigned int event_mask = 0;
904
905 spin_lock_irqsave(&lp->lock, flags);
906
907 orig_state = lp->chan_state;
908 hv_err = sun4v_ldc_tx_get_state(lp->id,
909 &lp->tx_head,
910 &lp->tx_tail,
911 &lp->chan_state);
912
913 ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
914 orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
915
916 if (lp->cfg.mode == LDC_MODE_RAW &&
917 lp->chan_state == LDC_CHANNEL_UP) {
918 lp->hs_state = LDC_HS_COMPLETE;
919 ldc_set_state(lp, LDC_STATE_CONNECTED);
920
921 event_mask |= LDC_EVENT_UP;
922 }
923
924 spin_unlock_irqrestore(&lp->lock, flags);
925
926 send_events(lp, event_mask);
927
928 return IRQ_HANDLED;
929}
930
931/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
932 * XXX that addition and removal from the ldc_channel_list has
933 * XXX atomicity, otherwise the __ldc_channel_exists() check is
934 * XXX totally pointless as another thread can slip into ldc_alloc()
935 * XXX and add a channel with the same ID. There also needs to be
936 * XXX a spinlock for ldc_channel_list.
937 */
938static HLIST_HEAD(ldc_channel_list);
939
940static int __ldc_channel_exists(unsigned long id)
941{
942 struct ldc_channel *lp;
943 struct hlist_node *n;
944
945 hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
946 if (lp->id == id)
947 return 1;
948 }
949 return 0;
950}
951
952static int alloc_queue(const char *name, unsigned long num_entries,
953 struct ldc_packet **base, unsigned long *ra)
954{
955 unsigned long size, order;
956 void *q;
957
958 size = num_entries * LDC_PACKET_SIZE;
959 order = get_order(size);
960
961 q = (void *) __get_free_pages(GFP_KERNEL, order);
962 if (!q) {
963 printk(KERN_ERR PFX "Alloc of %s queue failed with "
964 "size=%lu order=%lu\n", name, size, order);
965 return -ENOMEM;
966 }
967
968 memset(q, 0, PAGE_SIZE << order);
969
970 *base = q;
971 *ra = __pa(q);
972
973 return 0;
974}
975
976static void free_queue(unsigned long num_entries, struct ldc_packet *q)
977{
978 unsigned long size, order;
979
980 if (!q)
981 return;
982
983 size = num_entries * LDC_PACKET_SIZE;
984 order = get_order(size);
985
986 free_pages((unsigned long)q, order);
987}
988
989/* XXX Make this configurable... XXX */
990#define LDC_IOTABLE_SIZE (8 * 1024)
991
992static int ldc_iommu_init(struct ldc_channel *lp)
993{
994 unsigned long sz, num_tsb_entries, tsbsize, order;
995 struct ldc_iommu *iommu = &lp->iommu;
996 struct ldc_mtable_entry *table;
997 unsigned long hv_err;
998 int err;
999
1000 num_tsb_entries = LDC_IOTABLE_SIZE;
1001 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1002
1003 spin_lock_init(&iommu->lock);
1004
1005 sz = num_tsb_entries / 8;
1006 sz = (sz + 7UL) & ~7UL;
1007 iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1008 if (!iommu->arena.map) {
1009 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1010 return -ENOMEM;
1011 }
1012
1013 iommu->arena.limit = num_tsb_entries;
1014
1015 order = get_order(tsbsize);
1016
1017 table = (struct ldc_mtable_entry *)
1018 __get_free_pages(GFP_KERNEL, order);
1019 err = -ENOMEM;
1020 if (!table) {
1021 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1022 "size=%lu order=%lu\n", tsbsize, order);
1023 goto out_free_map;
1024 }
1025
1026 memset(table, 0, PAGE_SIZE << order);
1027
1028 iommu->page_table = table;
1029
1030 hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1031 num_tsb_entries);
1032 err = -EINVAL;
1033 if (hv_err)
1034 goto out_free_table;
1035
1036 return 0;
1037
1038out_free_table:
1039 free_pages((unsigned long) table, order);
1040 iommu->page_table = NULL;
1041
1042out_free_map:
1043 kfree(iommu->arena.map);
1044 iommu->arena.map = NULL;
1045
1046 return err;
1047}
1048
1049static void ldc_iommu_release(struct ldc_channel *lp)
1050{
1051 struct ldc_iommu *iommu = &lp->iommu;
1052 unsigned long num_tsb_entries, tsbsize, order;
1053
1054 (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1055
1056 num_tsb_entries = iommu->arena.limit;
1057 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1058 order = get_order(tsbsize);
1059
1060 free_pages((unsigned long) iommu->page_table, order);
1061 iommu->page_table = NULL;
1062
1063 kfree(iommu->arena.map);
1064 iommu->arena.map = NULL;
1065}
1066
1067struct ldc_channel *ldc_alloc(unsigned long id,
1068 const struct ldc_channel_config *cfgp,
1069 void *event_arg)
1070{
1071 struct ldc_channel *lp;
1072 const struct ldc_mode_ops *mops;
1073 unsigned long dummy1, dummy2, hv_err;
1074 u8 mss, *mssbuf;
1075 int err;
1076
1077 err = -ENODEV;
1078 if (!ldom_domaining_enabled)
1079 goto out_err;
1080
1081 err = -EINVAL;
1082 if (!cfgp)
1083 goto out_err;
1084
1085 switch (cfgp->mode) {
1086 case LDC_MODE_RAW:
1087 mops = &raw_ops;
1088 mss = LDC_PACKET_SIZE;
1089 break;
1090
1091 case LDC_MODE_UNRELIABLE:
1092 mops = &nonraw_ops;
1093 mss = LDC_PACKET_SIZE - 8;
1094 break;
1095
1096 case LDC_MODE_RELIABLE:
1097 mops = &nonraw_ops;
1098 mss = LDC_PACKET_SIZE - 8 - 8;
1099 break;
1100
1101 case LDC_MODE_STREAM:
1102 mops = &stream_ops;
1103 mss = LDC_PACKET_SIZE - 8 - 8;
1104 break;
1105
1106 default:
1107 goto out_err;
1108 }
1109
1110 if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1111 goto out_err;
1112
1113 hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1114 err = -ENODEV;
1115 if (hv_err == HV_ECHANNEL)
1116 goto out_err;
1117
1118 err = -EEXIST;
1119 if (__ldc_channel_exists(id))
1120 goto out_err;
1121
1122 mssbuf = NULL;
1123
1124 lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1125 err = -ENOMEM;
1126 if (!lp)
1127 goto out_err;
1128
1129 spin_lock_init(&lp->lock);
1130
1131 lp->id = id;
1132
1133 err = ldc_iommu_init(lp);
1134 if (err)
1135 goto out_free_ldc;
1136
1137 lp->mops = mops;
1138 lp->mss = mss;
1139
1140 lp->cfg = *cfgp;
1141 if (!lp->cfg.mtu)
1142 lp->cfg.mtu = LDC_DEFAULT_MTU;
1143
1144 if (lp->cfg.mode == LDC_MODE_STREAM) {
1145 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1146 if (!mssbuf) {
1147 err = -ENOMEM;
1148 goto out_free_iommu;
1149 }
1150 lp->mssbuf = mssbuf;
1151 }
1152
1153 lp->event_arg = event_arg;
1154
1155 /* XXX allow setting via ldc_channel_config to override defaults
1156 * XXX or use some formula based upon mtu
1157 */
1158 lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1159 lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1160
1161 err = alloc_queue("TX", lp->tx_num_entries,
1162 &lp->tx_base, &lp->tx_ra);
1163 if (err)
1164 goto out_free_mssbuf;
1165
1166 err = alloc_queue("RX", lp->rx_num_entries,
1167 &lp->rx_base, &lp->rx_ra);
1168 if (err)
1169 goto out_free_txq;
1170
1171 lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1172
1173 lp->hs_state = LDC_HS_CLOSED;
1174 ldc_set_state(lp, LDC_STATE_INIT);
1175
1176 INIT_HLIST_NODE(&lp->list);
1177 hlist_add_head(&lp->list, &ldc_channel_list);
1178
1179 INIT_HLIST_HEAD(&lp->mh_list);
1180
1181 return lp;
1182
1183out_free_txq:
1184 free_queue(lp->tx_num_entries, lp->tx_base);
1185
1186out_free_mssbuf:
1187 if (mssbuf)
1188 kfree(mssbuf);
1189
1190out_free_iommu:
1191 ldc_iommu_release(lp);
1192
1193out_free_ldc:
1194 kfree(lp);
1195
1196out_err:
1197 return ERR_PTR(err);
1198}
1199EXPORT_SYMBOL(ldc_alloc);
1200
1201void ldc_free(struct ldc_channel *lp)
1202{
1203 if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1204 free_irq(lp->cfg.rx_irq, lp);
1205 free_irq(lp->cfg.tx_irq, lp);
1206 }
1207
1208 if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1209 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1210 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1211 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1212 }
1213 if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1214 free_queue(lp->tx_num_entries, lp->tx_base);
1215 free_queue(lp->rx_num_entries, lp->rx_base);
1216 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1217 }
1218
1219 hlist_del(&lp->list);
1220
1221 if (lp->mssbuf)
1222 kfree(lp->mssbuf);
1223
1224 ldc_iommu_release(lp);
1225
1226 kfree(lp);
1227}
1228EXPORT_SYMBOL(ldc_free);
1229
1230/* Bind the channel. This registers the LDC queues with
1231 * the hypervisor and puts the channel into a pseudo-listening
1232 * state. This does not initiate a handshake, ldc_connect() does
1233 * that.
1234 */
1235int ldc_bind(struct ldc_channel *lp)
1236{
1237 unsigned long hv_err, flags;
1238 int err = -EINVAL;
1239
1240 spin_lock_irqsave(&lp->lock, flags);
1241
1242 if (lp->state != LDC_STATE_INIT)
1243 goto out_err;
1244
1245 err = request_irq(lp->cfg.rx_irq, ldc_rx,
1246 IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1247 "LDC RX", lp);
1248 if (err)
1249 goto out_err;
1250
1251 err = request_irq(lp->cfg.tx_irq, ldc_tx,
1252 IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1253 "LDC TX", lp);
1254 if (err)
1255 goto out_free_rx_irq;
1256
1257
1258 lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1259
1260 err = -ENODEV;
1261 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1262 if (hv_err)
1263 goto out_free_tx_irq;
1264
1265 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1266 if (hv_err)
1267 goto out_free_tx_irq;
1268
1269 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1270 if (hv_err)
1271 goto out_unmap_tx;
1272
1273 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1274 if (hv_err)
1275 goto out_unmap_tx;
1276
1277 lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1278
1279 hv_err = sun4v_ldc_tx_get_state(lp->id,
1280 &lp->tx_head,
1281 &lp->tx_tail,
1282 &lp->chan_state);
1283 err = -EBUSY;
1284 if (hv_err)
1285 goto out_unmap_rx;
1286
1287 lp->tx_acked = lp->tx_head;
1288
1289 lp->hs_state = LDC_HS_OPEN;
1290 ldc_set_state(lp, LDC_STATE_BOUND);
1291
1292 spin_unlock_irqrestore(&lp->lock, flags);
1293
1294 return 0;
1295
1296out_unmap_rx:
1297 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1298 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1299
1300out_unmap_tx:
1301 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1302
1303out_free_tx_irq:
1304 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1305 free_irq(lp->cfg.tx_irq, lp);
1306
1307out_free_rx_irq:
1308 free_irq(lp->cfg.rx_irq, lp);
1309
1310out_err:
1311 spin_unlock_irqrestore(&lp->lock, flags);
1312
1313 return err;
1314}
1315EXPORT_SYMBOL(ldc_bind);
1316
1317int ldc_connect(struct ldc_channel *lp)
1318{
1319 unsigned long flags;
1320 int err;
1321
1322 if (lp->cfg.mode == LDC_MODE_RAW)
1323 return -EINVAL;
1324
1325 spin_lock_irqsave(&lp->lock, flags);
1326
1327 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1328 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1329 lp->hs_state != LDC_HS_OPEN)
1330 err = -EINVAL;
1331 else
1332 err = start_handshake(lp);
1333
1334 spin_unlock_irqrestore(&lp->lock, flags);
1335
1336 return err;
1337}
1338EXPORT_SYMBOL(ldc_connect);
1339
1340int ldc_disconnect(struct ldc_channel *lp)
1341{
1342 unsigned long hv_err, flags;
1343 int err;
1344
1345 if (lp->cfg.mode == LDC_MODE_RAW)
1346 return -EINVAL;
1347
1348 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1349 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1350 return -EINVAL;
1351
1352 spin_lock_irqsave(&lp->lock, flags);
1353
1354 err = -ENODEV;
1355 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1356 if (hv_err)
1357 goto out_err;
1358
1359 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1360 if (hv_err)
1361 goto out_err;
1362
1363 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1364 if (hv_err)
1365 goto out_err;
1366
1367 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1368 if (hv_err)
1369 goto out_err;
1370
1371 ldc_set_state(lp, LDC_STATE_BOUND);
1372 lp->hs_state = LDC_HS_OPEN;
1373 lp->flags |= LDC_FLAG_RESET;
1374
1375 spin_unlock_irqrestore(&lp->lock, flags);
1376
1377 return 0;
1378
1379out_err:
1380 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1381 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1382 free_irq(lp->cfg.tx_irq, lp);
1383 free_irq(lp->cfg.rx_irq, lp);
1384 lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1385 LDC_FLAG_REGISTERED_QUEUES);
1386 ldc_set_state(lp, LDC_STATE_INIT);
1387
1388 spin_unlock_irqrestore(&lp->lock, flags);
1389
1390 return err;
1391}
1392EXPORT_SYMBOL(ldc_disconnect);
1393
1394int ldc_state(struct ldc_channel *lp)
1395{
1396 return lp->state;
1397}
1398EXPORT_SYMBOL(ldc_state);
1399
1400static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1401{
1402 struct ldc_packet *p;
1403 unsigned long new_tail;
1404 int err;
1405
1406 if (size > LDC_PACKET_SIZE)
1407 return -EMSGSIZE;
1408
1409 p = data_get_tx_packet(lp, &new_tail);
1410 if (!p)
1411 return -EAGAIN;
1412
1413 memcpy(p, buf, size);
1414
1415 err = send_tx_packet(lp, p, new_tail);
1416 if (!err)
1417 err = size;
1418
1419 return err;
1420}
1421
1422static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1423{
1424 struct ldc_packet *p;
1425 unsigned long hv_err, new;
1426 int err;
1427
1428 if (size < LDC_PACKET_SIZE)
1429 return -EINVAL;
1430
1431 hv_err = sun4v_ldc_rx_get_state(lp->id,
1432 &lp->rx_head,
1433 &lp->rx_tail,
1434 &lp->chan_state);
1435 if (hv_err)
1436 return ldc_abort(lp);
1437
1438 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1439 lp->chan_state == LDC_CHANNEL_RESETTING)
1440 return -ECONNRESET;
1441
1442 if (lp->rx_head == lp->rx_tail)
1443 return 0;
1444
1445 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1446 memcpy(buf, p, LDC_PACKET_SIZE);
1447
1448 new = rx_advance(lp, lp->rx_head);
1449 lp->rx_head = new;
1450
1451 err = __set_rx_head(lp, new);
1452 if (err < 0)
1453 err = -ECONNRESET;
1454 else
1455 err = LDC_PACKET_SIZE;
1456
1457 return err;
1458}
1459
1460static const struct ldc_mode_ops raw_ops = {
1461 .write = write_raw,
1462 .read = read_raw,
1463};
1464
1465static int write_nonraw(struct ldc_channel *lp, const void *buf,
1466 unsigned int size)
1467{
1468 unsigned long hv_err, tail;
1469 unsigned int copied;
1470 u32 seq;
1471 int err;
1472
1473 hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1474 &lp->chan_state);
1475 if (unlikely(hv_err))
1476 return -EBUSY;
1477
1478 if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1479 return ldc_abort(lp);
1480
1481 if (!tx_has_space_for(lp, size))
1482 return -EAGAIN;
1483
1484 seq = lp->snd_nxt;
1485 copied = 0;
1486 tail = lp->tx_tail;
1487 while (copied < size) {
1488 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1489 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1490 p->u.u_data :
1491 p->u.r.r_data);
1492 int data_len;
1493
1494 p->type = LDC_DATA;
1495 p->stype = LDC_INFO;
1496 p->ctrl = 0;
1497
1498 data_len = size - copied;
1499 if (data_len > lp->mss)
1500 data_len = lp->mss;
1501
1502 BUG_ON(data_len > LDC_LEN);
1503
1504 p->env = (data_len |
1505 (copied == 0 ? LDC_START : 0) |
1506 (data_len == size - copied ? LDC_STOP : 0));
1507
1508 p->seqid = ++seq;
1509
1510 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1511 p->type,
1512 p->stype,
1513 p->ctrl,
1514 p->env,
1515 p->seqid);
1516
1517 memcpy(data, buf, data_len);
1518 buf += data_len;
1519 copied += data_len;
1520
1521 tail = tx_advance(lp, tail);
1522 }
1523
1524 err = set_tx_tail(lp, tail);
1525 if (!err) {
1526 lp->snd_nxt = seq;
1527 err = size;
1528 }
1529
1530 return err;
1531}
1532
1533static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1534 struct ldc_packet *first_frag)
1535{
1536 int err;
1537
1538 if (first_frag)
1539 lp->rcv_nxt = first_frag->seqid - 1;
1540
1541 err = send_data_nack(lp, p);
1542 if (err)
1543 return err;
1544
1545 err = __set_rx_head(lp, lp->rx_tail);
1546 if (err < 0)
1547 return ldc_abort(lp);
1548
1549 return 0;
1550}
1551
1552static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1553{
1554 if (p->stype & LDC_ACK) {
1555 int err = process_data_ack(lp, p);
1556 if (err)
1557 return err;
1558 }
1559 if (p->stype & LDC_NACK)
1560 return ldc_abort(lp);
1561
1562 return 0;
1563}
1564
1565static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1566{
1567 unsigned long dummy;
1568 int limit = 1000;
1569
1570 ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1571 cur_head, lp->rx_head, lp->rx_tail);
1572 while (limit-- > 0) {
1573 unsigned long hv_err;
1574
1575 hv_err = sun4v_ldc_rx_get_state(lp->id,
1576 &dummy,
1577 &lp->rx_tail,
1578 &lp->chan_state);
1579 if (hv_err)
1580 return ldc_abort(lp);
1581
1582 ldcdbg(DATA, "REREAD head[%lx] tail[%lx] chan_state[%lx]\n",
1583 dummy, lp->rx_tail, lp->chan_state);
1584
1585 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1586 lp->chan_state == LDC_CHANNEL_RESETTING)
1587 return -ECONNRESET;
1588
1589 if (cur_head != lp->rx_tail) {
1590 ldcdbg(DATA, "DATA WAIT DONE\n");
1591 return 0;
1592 }
1593
1594 udelay(1);
1595 }
1596 return -EAGAIN;
1597}
1598
1599static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1600{
1601 int err = __set_rx_head(lp, head);
1602
1603 if (err < 0)
1604 return ldc_abort(lp);
1605
1606 lp->rx_head = head;
1607 return 0;
1608}
1609
1610static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1611{
1612 struct ldc_packet *first_frag;
1613 unsigned long hv_err, new;
1614 int err, copied;
1615
1616 hv_err = sun4v_ldc_rx_get_state(lp->id,
1617 &lp->rx_head,
1618 &lp->rx_tail,
1619 &lp->chan_state);
1620 if (hv_err)
1621 return ldc_abort(lp);
1622
1623 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1624 lp->chan_state == LDC_CHANNEL_RESETTING)
1625 return -ECONNRESET;
1626
1627 if (lp->rx_head == lp->rx_tail)
1628 return 0;
1629
1630 first_frag = NULL;
1631 copied = err = 0;
1632 new = lp->rx_head;
1633 while (1) {
1634 struct ldc_packet *p;
1635 int pkt_len;
1636
1637 BUG_ON(new == lp->rx_tail);
1638 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1639
1640 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x] "
1641 "rcv_nxt[%08x]\n",
1642 p->type,
1643 p->stype,
1644 p->ctrl,
1645 p->env,
1646 p->seqid,
1647 lp->rcv_nxt);
1648
1649 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1650 err = rx_bad_seq(lp, p, first_frag);
1651 copied = 0;
1652 break;
1653 }
1654
1655 if (p->type & LDC_CTRL) {
1656 err = process_control_frame(lp, p);
1657 if (err < 0)
1658 break;
1659 err = 0;
1660 }
1661
1662 lp->rcv_nxt = p->seqid;
1663
1664 if (!(p->type & LDC_DATA)) {
1665 new = rx_advance(lp, new);
1666 goto no_data;
1667 }
1668 if (p->stype & (LDC_ACK | LDC_NACK)) {
1669 err = data_ack_nack(lp, p);
1670 if (err)
1671 break;
1672 }
1673 if (!(p->stype & LDC_INFO)) {
1674 new = rx_advance(lp, new);
1675 goto no_data;
1676 }
1677
1678 pkt_len = p->env & LDC_LEN;
1679
1680 /* Every initial packet starts with the START bit set.
1681 *
1682 * Singleton packets will have both START+STOP set.
1683 *
1684 * Fragments will have START set in the first frame, STOP
1685 * set in the last frame, and neither bit set in middle
1686 * frames of the packet.
1687 *
1688 * Therefore if we are at the beginning of a packet and
1689 * we don't see START, or we are in the middle of a fragmented
1690 * packet and do see START, we are unsynchronized and should
1691 * flush the RX queue.
1692 */
1693 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1694 (first_frag != NULL && (p->env & LDC_START))) {
1695 if (!first_frag)
1696 new = rx_advance(lp, new);
1697
1698 err = rx_set_head(lp, new);
1699 if (err)
1700 break;
1701
1702 if (!first_frag)
1703 goto no_data;
1704 }
1705 if (!first_frag)
1706 first_frag = p;
1707
1708 if (pkt_len > size - copied) {
1709 /* User didn't give us a big enough buffer,
1710 * what to do? This is a pretty serious error.
1711 *
1712 * Since we haven't updated the RX ring head to
1713 * consume any of the packets, signal the error
1714 * to the user and just leave the RX ring alone.
1715 *
1716 * This seems the best behavior because this allows
1717 * a user of the LDC layer to start with a small
1718 * RX buffer for ldc_read() calls and use -EMSGSIZE
1719 * as a cue to enlarge it's read buffer.
1720 */
1721 err = -EMSGSIZE;
1722 break;
1723 }
1724
1725 /* Ok, we are gonna eat this one. */
1726 new = rx_advance(lp, new);
1727
1728 memcpy(buf,
1729 (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1730 p->u.u_data : p->u.r.r_data), pkt_len);
1731 buf += pkt_len;
1732 copied += pkt_len;
1733
1734 if (p->env & LDC_STOP)
1735 break;
1736
1737no_data:
1738 if (new == lp->rx_tail) {
1739 err = rx_data_wait(lp, new);
1740 if (err)
1741 break;
1742 }
1743 }
1744
1745 if (!err)
1746 err = rx_set_head(lp, new);
1747
1748 if (err && first_frag)
1749 lp->rcv_nxt = first_frag->seqid - 1;
1750
1751 if (!err)
1752 err = copied;
1753
1754 return err;
1755}
1756
1757static const struct ldc_mode_ops nonraw_ops = {
1758 .write = write_nonraw,
1759 .read = read_nonraw,
1760};
1761
1762static int write_stream(struct ldc_channel *lp, const void *buf,
1763 unsigned int size)
1764{
1765 if (size > lp->cfg.mtu)
1766 size = lp->cfg.mtu;
1767 return write_nonraw(lp, buf, size);
1768}
1769
1770static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1771{
1772 if (!lp->mssbuf_len) {
1773 int err = read_nonraw(lp, lp->mssbuf,
1774 (size > lp->cfg.mtu ?
1775 lp->cfg.mtu : size));
1776 if (err < 0)
1777 return err;
1778
1779 lp->mssbuf_len = err;
1780 lp->mssbuf_off = 0;
1781 }
1782
1783 if (size > lp->mssbuf_len)
1784 size = lp->mssbuf_len;
1785 memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1786
1787 lp->mssbuf_off += size;
1788 lp->mssbuf_len -= size;
1789
1790 return size;
1791}
1792
1793static const struct ldc_mode_ops stream_ops = {
1794 .write = write_stream,
1795 .read = read_stream,
1796};
1797
1798int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1799{
1800 unsigned long flags;
1801 int err;
1802
1803 if (!buf)
1804 return -EINVAL;
1805
1806 if (!size)
1807 return 0;
1808
1809 spin_lock_irqsave(&lp->lock, flags);
1810
1811 if (lp->hs_state != LDC_HS_COMPLETE)
1812 err = -ENOTCONN;
1813 else
1814 err = lp->mops->write(lp, buf, size);
1815
1816 spin_unlock_irqrestore(&lp->lock, flags);
1817
1818 return err;
1819}
1820EXPORT_SYMBOL(ldc_write);
1821
1822int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1823{
1824 unsigned long flags;
1825 int err;
1826
1827 if (!buf)
1828 return -EINVAL;
1829
1830 if (!size)
1831 return 0;
1832
1833 spin_lock_irqsave(&lp->lock, flags);
1834
1835 if (lp->hs_state != LDC_HS_COMPLETE)
1836 err = -ENOTCONN;
1837 else
1838 err = lp->mops->read(lp, buf, size);
1839
1840 spin_unlock_irqrestore(&lp->lock, flags);
1841
1842 return err;
1843}
1844EXPORT_SYMBOL(ldc_read);
1845
1846static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1847{
1848 struct iommu_arena *arena = &iommu->arena;
1849 unsigned long n, i, start, end, limit;
1850 int pass;
1851
1852 limit = arena->limit;
1853 start = arena->hint;
1854 pass = 0;
1855
1856again:
1857 n = find_next_zero_bit(arena->map, limit, start);
1858 end = n + npages;
1859 if (unlikely(end >= limit)) {
1860 if (likely(pass < 1)) {
1861 limit = start;
1862 start = 0;
1863 pass++;
1864 goto again;
1865 } else {
1866 /* Scanned the whole thing, give up. */
1867 return -1;
1868 }
1869 }
1870
1871 for (i = n; i < end; i++) {
1872 if (test_bit(i, arena->map)) {
1873 start = i + 1;
1874 goto again;
1875 }
1876 }
1877
1878 for (i = n; i < end; i++)
1879 __set_bit(i, arena->map);
1880
1881 arena->hint = end;
1882
1883 return n;
1884}
1885
1886#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
1887#define COOKIE_PGSZ_CODE_SHIFT 60ULL
1888
1889static u64 pagesize_code(void)
1890{
1891 switch (PAGE_SIZE) {
1892 default:
1893 case (8ULL * 1024ULL):
1894 return 0;
1895 case (64ULL * 1024ULL):
1896 return 1;
1897 case (512ULL * 1024ULL):
1898 return 2;
1899 case (4ULL * 1024ULL * 1024ULL):
1900 return 3;
1901 case (32ULL * 1024ULL * 1024ULL):
1902 return 4;
1903 case (256ULL * 1024ULL * 1024ULL):
1904 return 5;
1905 }
1906}
1907
1908static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1909{
1910 return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1911 (index << PAGE_SHIFT) |
1912 page_offset);
1913}
1914
1915static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1916{
1917 u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1918
1919 cookie &= ~COOKIE_PGSZ_CODE;
1920
1921 *shift = szcode * 3;
1922
1923 return (cookie >> (13ULL + (szcode * 3ULL)));
1924}
1925
1926static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1927 unsigned long npages)
1928{
1929 long entry;
1930
1931 entry = arena_alloc(iommu, npages);
1932 if (unlikely(entry < 0))
1933 return NULL;
1934
1935 return iommu->page_table + entry;
1936}
1937
1938static u64 perm_to_mte(unsigned int map_perm)
1939{
1940 u64 mte_base;
1941
1942 mte_base = pagesize_code();
1943
1944 if (map_perm & LDC_MAP_SHADOW) {
1945 if (map_perm & LDC_MAP_R)
1946 mte_base |= LDC_MTE_COPY_R;
1947 if (map_perm & LDC_MAP_W)
1948 mte_base |= LDC_MTE_COPY_W;
1949 }
1950 if (map_perm & LDC_MAP_DIRECT) {
1951 if (map_perm & LDC_MAP_R)
1952 mte_base |= LDC_MTE_READ;
1953 if (map_perm & LDC_MAP_W)
1954 mte_base |= LDC_MTE_WRITE;
1955 if (map_perm & LDC_MAP_X)
1956 mte_base |= LDC_MTE_EXEC;
1957 }
1958 if (map_perm & LDC_MAP_IO) {
1959 if (map_perm & LDC_MAP_R)
1960 mte_base |= LDC_MTE_IOMMU_R;
1961 if (map_perm & LDC_MAP_W)
1962 mte_base |= LDC_MTE_IOMMU_W;
1963 }
1964
1965 return mte_base;
1966}
1967
1968static int pages_in_region(unsigned long base, long len)
1969{
1970 int count = 0;
1971
1972 do {
1973 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
1974
1975 len -= (new - base);
1976 base = new;
1977 count++;
1978 } while (len > 0);
1979
1980 return count;
1981}
1982
1983struct cookie_state {
1984 struct ldc_mtable_entry *page_table;
1985 struct ldc_trans_cookie *cookies;
1986 u64 mte_base;
1987 u64 prev_cookie;
1988 u32 pte_idx;
1989 u32 nc;
1990};
1991
1992static void fill_cookies(struct cookie_state *sp, unsigned long pa,
1993 unsigned long off, unsigned long len)
1994{
1995 do {
1996 unsigned long tlen, new = pa + PAGE_SIZE;
1997 u64 this_cookie;
1998
1999 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2000
2001 tlen = PAGE_SIZE;
2002 if (off)
2003 tlen = PAGE_SIZE - off;
2004 if (tlen > len)
2005 tlen = len;
2006
2007 this_cookie = make_cookie(sp->pte_idx,
2008 pagesize_code(), off);
2009
2010 off = 0;
2011
2012 if (this_cookie == sp->prev_cookie) {
2013 sp->cookies[sp->nc - 1].cookie_size += tlen;
2014 } else {
2015 sp->cookies[sp->nc].cookie_addr = this_cookie;
2016 sp->cookies[sp->nc].cookie_size = tlen;
2017 sp->nc++;
2018 }
2019 sp->prev_cookie = this_cookie + tlen;
2020
2021 sp->pte_idx++;
2022
2023 len -= tlen;
2024 pa = new;
2025 } while (len > 0);
2026}
2027
2028static int sg_count_one(struct scatterlist *sg)
2029{
2030 unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT;
2031 long len = sg->length;
2032
2033 if ((sg->offset | len) & (8UL - 1))
2034 return -EFAULT;
2035
2036 return pages_in_region(base + sg->offset, len);
2037}
2038
2039static int sg_count_pages(struct scatterlist *sg, int num_sg)
2040{
2041 int count;
2042 int i;
2043
2044 count = 0;
2045 for (i = 0; i < num_sg; i++) {
2046 int err = sg_count_one(sg + i);
2047 if (err < 0)
2048 return err;
2049 count += err;
2050 }
2051
2052 return count;
2053}
2054
2055int ldc_map_sg(struct ldc_channel *lp,
2056 struct scatterlist *sg, int num_sg,
2057 struct ldc_trans_cookie *cookies, int ncookies,
2058 unsigned int map_perm)
2059{
2060 unsigned long i, npages, flags;
2061 struct ldc_mtable_entry *base;
2062 struct cookie_state state;
2063 struct ldc_iommu *iommu;
2064 int err;
2065
2066 if (map_perm & ~LDC_MAP_ALL)
2067 return -EINVAL;
2068
2069 err = sg_count_pages(sg, num_sg);
2070 if (err < 0)
2071 return err;
2072
2073 npages = err;
2074 if (err > ncookies)
2075 return -EMSGSIZE;
2076
2077 iommu = &lp->iommu;
2078
2079 spin_lock_irqsave(&iommu->lock, flags);
2080 base = alloc_npages(iommu, npages);
2081 spin_unlock_irqrestore(&iommu->lock, flags);
2082
2083 if (!base)
2084 return -ENOMEM;
2085
2086 state.page_table = iommu->page_table;
2087 state.cookies = cookies;
2088 state.mte_base = perm_to_mte(map_perm);
2089 state.prev_cookie = ~(u64)0;
2090 state.pte_idx = (base - iommu->page_table);
2091 state.nc = 0;
2092
2093 for (i = 0; i < num_sg; i++)
2094 fill_cookies(&state, page_to_pfn(sg[i].page) << PAGE_SHIFT,
2095 sg[i].offset, sg[i].length);
2096
2097 return state.nc;
2098}
2099EXPORT_SYMBOL(ldc_map_sg);
2100
2101int ldc_map_single(struct ldc_channel *lp,
2102 void *buf, unsigned int len,
2103 struct ldc_trans_cookie *cookies, int ncookies,
2104 unsigned int map_perm)
2105{
2106 unsigned long npages, pa, flags;
2107 struct ldc_mtable_entry *base;
2108 struct cookie_state state;
2109 struct ldc_iommu *iommu;
2110
2111 if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2112 return -EINVAL;
2113
2114 pa = __pa(buf);
2115 if ((pa | len) & (8UL - 1))
2116 return -EFAULT;
2117
2118 npages = pages_in_region(pa, len);
2119
2120 iommu = &lp->iommu;
2121
2122 spin_lock_irqsave(&iommu->lock, flags);
2123 base = alloc_npages(iommu, npages);
2124 spin_unlock_irqrestore(&iommu->lock, flags);
2125
2126 if (!base)
2127 return -ENOMEM;
2128
2129 state.page_table = iommu->page_table;
2130 state.cookies = cookies;
2131 state.mte_base = perm_to_mte(map_perm);
2132 state.prev_cookie = ~(u64)0;
2133 state.pte_idx = (base - iommu->page_table);
2134 state.nc = 0;
2135 fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2136 BUG_ON(state.nc != 1);
2137
2138 return state.nc;
2139}
2140EXPORT_SYMBOL(ldc_map_single);
2141
2142static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2143 u64 cookie, u64 size)
2144{
2145 struct iommu_arena *arena = &iommu->arena;
2146 unsigned long i, shift, index, npages;
2147 struct ldc_mtable_entry *base;
2148
2149 npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2150 index = cookie_to_index(cookie, &shift);
2151 base = iommu->page_table + index;
2152
2153 BUG_ON(index > arena->limit ||
2154 (index + npages) > arena->limit);
2155
2156 for (i = 0; i < npages; i++) {
2157 if (base->cookie)
2158 sun4v_ldc_revoke(id, cookie + (i << shift),
2159 base->cookie);
2160 base->mte = 0;
2161 __clear_bit(index + i, arena->map);
2162 }
2163}
2164
2165void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2166 int ncookies)
2167{
2168 struct ldc_iommu *iommu = &lp->iommu;
2169 unsigned long flags;
2170 int i;
2171
2172 spin_lock_irqsave(&iommu->lock, flags);
2173 for (i = 0; i < ncookies; i++) {
2174 u64 addr = cookies[i].cookie_addr;
2175 u64 size = cookies[i].cookie_size;
2176
2177 free_npages(lp->id, iommu, addr, size);
2178 }
2179 spin_unlock_irqrestore(&iommu->lock, flags);
2180}
2181EXPORT_SYMBOL(ldc_unmap);
2182
2183int ldc_copy(struct ldc_channel *lp, int copy_dir,
2184 void *buf, unsigned int len, unsigned long offset,
2185 struct ldc_trans_cookie *cookies, int ncookies)
2186{
2187 unsigned int orig_len;
2188 unsigned long ra;
2189 int i;
2190
2191 if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2192 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2193 lp->id, copy_dir);
2194 return -EINVAL;
2195 }
2196
2197 ra = __pa(buf);
2198 if ((ra | len | offset) & (8UL - 1)) {
2199 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2200 "ra[%lx] len[%x] offset[%lx]\n",
2201 lp->id, ra, len, offset);
2202 return -EFAULT;
2203 }
2204
2205 if (lp->hs_state != LDC_HS_COMPLETE ||
2206 (lp->flags & LDC_FLAG_RESET)) {
2207 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2208 "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2209 return -ECONNRESET;
2210 }
2211
2212 orig_len = len;
2213 for (i = 0; i < ncookies; i++) {
2214 unsigned long cookie_raddr = cookies[i].cookie_addr;
2215 unsigned long this_len = cookies[i].cookie_size;
2216 unsigned long actual_len;
2217
2218 if (unlikely(offset)) {
2219 unsigned long this_off = offset;
2220
2221 if (this_off > this_len)
2222 this_off = this_len;
2223
2224 offset -= this_off;
2225 this_len -= this_off;
2226 if (!this_len)
2227 continue;
2228 cookie_raddr += this_off;
2229 }
2230
2231 if (this_len > len)
2232 this_len = len;
2233
2234 while (1) {
2235 unsigned long hv_err;
2236
2237 hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2238 cookie_raddr, ra,
2239 this_len, &actual_len);
2240 if (unlikely(hv_err)) {
2241 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2242 "HV error %lu\n",
2243 lp->id, hv_err);
2244 if (lp->hs_state != LDC_HS_COMPLETE ||
2245 (lp->flags & LDC_FLAG_RESET))
2246 return -ECONNRESET;
2247 else
2248 return -EFAULT;
2249 }
2250
2251 cookie_raddr += actual_len;
2252 ra += actual_len;
2253 len -= actual_len;
2254 if (actual_len == this_len)
2255 break;
2256
2257 this_len -= actual_len;
2258 }
2259
2260 if (!len)
2261 break;
2262 }
2263
2264 /* It is caller policy what to do about short copies.
2265 * For example, a networking driver can declare the
2266 * packet a runt and drop it.
2267 */
2268
2269 return orig_len - len;
2270}
2271EXPORT_SYMBOL(ldc_copy);
2272
2273void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2274 struct ldc_trans_cookie *cookies, int *ncookies,
2275 unsigned int map_perm)
2276{
2277 void *buf;
2278 int err;
2279
2280 if (len & (8UL - 1))
2281 return ERR_PTR(-EINVAL);
2282
2283 buf = kzalloc(len, GFP_KERNEL);
2284 if (!buf)
2285 return ERR_PTR(-ENOMEM);
2286
2287 err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2288 if (err < 0) {
2289 kfree(buf);
2290 return ERR_PTR(err);
2291 }
2292 *ncookies = err;
2293
2294 return buf;
2295}
2296EXPORT_SYMBOL(ldc_alloc_exp_dring);
2297
2298void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2299 struct ldc_trans_cookie *cookies, int ncookies)
2300{
2301 ldc_unmap(lp, cookies, ncookies);
2302 kfree(buf);
2303}
2304EXPORT_SYMBOL(ldc_free_exp_dring);
2305
2306static int __init ldc_init(void)
2307{
2308 struct mdesc_node *mp;
2309 unsigned long major, minor;
2310 const u64 *v;
2311
2312 mp = md_find_node_by_name(NULL, "platform");
2313 if (!mp)
2314 return -ENODEV;
2315
2316 v = md_get_property(mp, "domaining-enabled", NULL);
2317 if (!v)
2318 return -ENODEV;
2319
2320 major = 1;
2321 minor = 0;
2322 if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2323 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2324 return -ENODEV;
2325 }
2326
2327 printk(KERN_INFO "%s", version);
2328
2329 if (!*v) {
2330 printk(KERN_INFO PFX "Domaining disabled.\n");
2331 return -ENODEV;
2332 }
2333 ldom_domaining_enabled = 1;
2334
2335 return 0;
2336}
2337
2338core_initcall(ldc_init);
diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c
new file mode 100644
index 000000000000..21c015e8365b
--- /dev/null
+++ b/arch/sparc64/kernel/vio.c
@@ -0,0 +1,347 @@
1/* vio.c: Virtual I/O channel devices probing infrastructure.
2 *
3 * Copyright (c) 2003-2005 IBM Corp.
4 * Dave Engebretsen engebret@us.ibm.com
5 * Santiago Leon santil@us.ibm.com
6 * Hollis Blanchard <hollisb@us.ibm.com>
7 * Stephen Rothwell
8 *
9 * Adapted to sparc64 by David S. Miller davem@davemloft.net
10 */
11
12#include <linux/kernel.h>
13#include <linux/irq.h>
14#include <linux/init.h>
15
16#include <asm/mdesc.h>
17#include <asm/vio.h>
18
19static inline int find_in_proplist(const char *list, const char *match,
20 int len)
21{
22 while (len > 0) {
23 int l;
24
25 if (!strcmp(list, match))
26 return 1;
27 l = strlen(list) + 1;
28 list += l;
29 len -= l;
30 }
31 return 0;
32}
33
34static const struct vio_device_id *vio_match_device(
35 const struct vio_device_id *matches,
36 const struct vio_dev *dev)
37{
38 const char *type, *compat;
39 int len;
40
41 type = dev->type;
42 compat = dev->compat;
43 len = dev->compat_len;
44
45 while (matches->type[0] || matches->compat[0]) {
46 int match = 1;
47 if (matches->type[0]) {
48 match &= type
49 && !strcmp(matches->type, type);
50 }
51 if (matches->compat[0]) {
52 match &= compat &&
53 find_in_proplist(compat, matches->compat, len);
54 }
55 if (match)
56 return matches;
57 matches++;
58 }
59 return NULL;
60}
61
62static int vio_bus_match(struct device *dev, struct device_driver *drv)
63{
64 struct vio_dev *vio_dev = to_vio_dev(dev);
65 struct vio_driver *vio_drv = to_vio_driver(drv);
66 const struct vio_device_id *matches = vio_drv->id_table;
67
68 if (!matches)
69 return 0;
70
71 return vio_match_device(matches, vio_dev) != NULL;
72}
73
74static int vio_device_probe(struct device *dev)
75{
76 struct vio_dev *vdev = to_vio_dev(dev);
77 struct vio_driver *drv = to_vio_driver(dev->driver);
78 const struct vio_device_id *id;
79 int error = -ENODEV;
80
81 if (drv->probe) {
82 id = vio_match_device(drv->id_table, vdev);
83 if (id)
84 error = drv->probe(vdev, id);
85 }
86
87 return error;
88}
89
90static int vio_device_remove(struct device *dev)
91{
92 struct vio_dev *vdev = to_vio_dev(dev);
93 struct vio_driver *drv = to_vio_driver(dev->driver);
94
95 if (drv->remove)
96 return drv->remove(vdev);
97
98 return 1;
99}
100
101static ssize_t devspec_show(struct device *dev,
102 struct device_attribute *attr, char *buf)
103{
104 struct vio_dev *vdev = to_vio_dev(dev);
105 const char *str = "none";
106
107 if (vdev->type) {
108 if (!strcmp(vdev->type, "network"))
109 str = "vnet";
110 else if (!strcmp(vdev->type, "block"))
111 str = "vdisk";
112 }
113
114 return sprintf(buf, "%s\n", str);
115}
116
117static struct device_attribute vio_dev_attrs[] = {
118 __ATTR_RO(devspec),
119 __ATTR_NULL
120};
121
122static struct bus_type vio_bus_type = {
123 .name = "vio",
124 .dev_attrs = vio_dev_attrs,
125 .match = vio_bus_match,
126 .probe = vio_device_probe,
127 .remove = vio_device_remove,
128};
129
130int vio_register_driver(struct vio_driver *viodrv)
131{
132 viodrv->driver.bus = &vio_bus_type;
133
134 return driver_register(&viodrv->driver);
135}
136EXPORT_SYMBOL(vio_register_driver);
137
138void vio_unregister_driver(struct vio_driver *viodrv)
139{
140 driver_unregister(&viodrv->driver);
141}
142EXPORT_SYMBOL(vio_unregister_driver);
143
144struct mdesc_node *vio_find_endpoint(struct vio_dev *vdev)
145{
146 struct mdesc_node *endp, *mp = vdev->mp;
147 int i;
148
149 endp = NULL;
150 for (i = 0; i < mp->num_arcs; i++) {
151 struct mdesc_node *t;
152
153 if (strcmp(mp->arcs[i].name, "fwd"))
154 continue;
155
156 t = mp->arcs[i].arc;
157 if (strcmp(t->name, "channel-endpoint"))
158 continue;
159
160 endp = t;
161 break;
162 }
163
164 return endp;
165}
166EXPORT_SYMBOL(vio_find_endpoint);
167
168static void __devinit vio_dev_release(struct device *dev)
169{
170 kfree(to_vio_dev(dev));
171}
172
173static ssize_t
174show_pciobppath_attr(struct device *dev, struct device_attribute *attr,
175 char *buf)
176{
177 struct vio_dev *vdev;
178 struct device_node *dp;
179
180 vdev = to_vio_dev(dev);
181 dp = vdev->dp;
182
183 return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
184}
185
186static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH,
187 show_pciobppath_attr, NULL);
188
189struct device_node *cdev_node;
190
191static struct vio_dev *root_vdev;
192static u64 cdev_cfg_handle;
193
194static struct vio_dev *vio_create_one(struct mdesc_node *mp,
195 struct device *parent)
196{
197 const char *type, *compat;
198 struct device_node *dp;
199 struct vio_dev *vdev;
200 const u64 *irq;
201 int err, clen;
202
203 type = md_get_property(mp, "device-type", NULL);
204 if (!type)
205 type = md_get_property(mp, "name", NULL);
206 compat = md_get_property(mp, "device-type", &clen);
207
208 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
209 if (!vdev) {
210 printk(KERN_ERR "VIO: Could not allocate vio_dev\n");
211 return NULL;
212 }
213
214 vdev->mp = mp;
215 vdev->type = type;
216 vdev->compat = compat;
217 vdev->compat_len = clen;
218
219 irq = md_get_property(mp, "tx-ino", NULL);
220 if (irq)
221 mp->irqs[0] = sun4v_build_virq(cdev_cfg_handle, *irq);
222
223 irq = md_get_property(mp, "rx-ino", NULL);
224 if (irq)
225 mp->irqs[1] = sun4v_build_virq(cdev_cfg_handle, *irq);
226
227 snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%lx", mp->node);
228 vdev->dev.parent = parent;
229 vdev->dev.bus = &vio_bus_type;
230 vdev->dev.release = vio_dev_release;
231
232 if (parent == NULL) {
233 dp = cdev_node;
234 } else if (to_vio_dev(parent) == root_vdev) {
235 dp = of_get_next_child(cdev_node, NULL);
236 while (dp) {
237 if (!strcmp(dp->type, type))
238 break;
239
240 dp = of_get_next_child(cdev_node, dp);
241 }
242 } else {
243 dp = to_vio_dev(parent)->dp;
244 }
245 vdev->dp = dp;
246
247 err = device_register(&vdev->dev);
248 if (err) {
249 printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
250 vdev->dev.bus_id, err);
251 kfree(vdev);
252 return NULL;
253 }
254 if (vdev->dp)
255 err = sysfs_create_file(&vdev->dev.kobj,
256 &dev_attr_obppath.attr);
257
258 return vdev;
259}
260
261static void walk_tree(struct mdesc_node *n, struct vio_dev *parent)
262{
263 int i;
264
265 for (i = 0; i < n->num_arcs; i++) {
266 struct mdesc_node *mp;
267 struct vio_dev *vdev;
268
269 if (strcmp(n->arcs[i].name, "fwd"))
270 continue;
271
272 mp = n->arcs[i].arc;
273
274 vdev = vio_create_one(mp, &parent->dev);
275 if (vdev && mp->num_arcs)
276 walk_tree(mp, vdev);
277 }
278}
279
280static void create_devices(struct mdesc_node *root)
281{
282 root_vdev = vio_create_one(root, NULL);
283 if (!root_vdev) {
284 printk(KERN_ERR "VIO: Coult not create root device.\n");
285 return;
286 }
287
288 walk_tree(root, root_vdev);
289}
290
291const char *channel_devices_node = "channel-devices";
292const char *channel_devices_compat = "SUNW,sun4v-channel-devices";
293const char *cfg_handle_prop = "cfg-handle";
294
295static int __init vio_init(void)
296{
297 struct mdesc_node *root;
298 const char *compat;
299 const u64 *cfg_handle;
300 int err, len;
301
302 root = md_find_node_by_name(NULL, channel_devices_node);
303 if (!root) {
304 printk(KERN_INFO "VIO: No channel-devices MDESC node.\n");
305 return 0;
306 }
307
308 cdev_node = of_find_node_by_name(NULL, "channel-devices");
309 if (!cdev_node) {
310 printk(KERN_INFO "VIO: No channel-devices OBP node.\n");
311 return -ENODEV;
312 }
313
314 compat = md_get_property(root, "compatible", &len);
315 if (!compat) {
316 printk(KERN_ERR "VIO: Channel devices lacks compatible "
317 "property\n");
318 return -ENODEV;
319 }
320 if (!find_in_proplist(compat, channel_devices_compat, len)) {
321 printk(KERN_ERR "VIO: Channel devices node lacks (%s) "
322 "compat entry.\n", channel_devices_compat);
323 return -ENODEV;
324 }
325
326 cfg_handle = md_get_property(root, cfg_handle_prop, NULL);
327 if (!cfg_handle) {
328 printk(KERN_ERR "VIO: Channel devices lacks %s property\n",
329 cfg_handle_prop);
330 return -ENODEV;
331 }
332
333 cdev_cfg_handle = *cfg_handle;
334
335 err = bus_register(&vio_bus_type);
336 if (err) {
337 printk(KERN_ERR "VIO: Could not register bus type err=%d\n",
338 err);
339 return err;
340 }
341
342 create_devices(root);
343
344 return 0;
345}
346
347postcore_initcall(vio_init);
diff --git a/arch/sparc64/kernel/viohs.c b/arch/sparc64/kernel/viohs.c
new file mode 100644
index 000000000000..3eb42e3624f3
--- /dev/null
+++ b/arch/sparc64/kernel/viohs.c
@@ -0,0 +1,809 @@
1/* viohs.c: LDOM Virtual I/O handshake helper layer.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/string.h>
9#include <linux/delay.h>
10#include <linux/sched.h>
11#include <linux/slab.h>
12
13#include <asm/ldc.h>
14#include <asm/vio.h>
15
16int vio_ldc_send(struct vio_driver_state *vio, void *data, int len)
17{
18 int err, limit = 1000;
19
20 err = -EINVAL;
21 while (limit-- > 0) {
22 err = ldc_write(vio->lp, data, len);
23 if (!err || (err != -EAGAIN))
24 break;
25 udelay(1);
26 }
27
28 return err;
29}
30EXPORT_SYMBOL(vio_ldc_send);
31
32static int send_ctrl(struct vio_driver_state *vio,
33 struct vio_msg_tag *tag, int len)
34{
35 tag->sid = vio_send_sid(vio);
36 return vio_ldc_send(vio, tag, len);
37}
38
39static void init_tag(struct vio_msg_tag *tag, u8 type, u8 stype, u16 stype_env)
40{
41 tag->type = type;
42 tag->stype = stype;
43 tag->stype_env = stype_env;
44}
45
46static int send_version(struct vio_driver_state *vio, u16 major, u16 minor)
47{
48 struct vio_ver_info pkt;
49
50 vio->_local_sid = (u32) sched_clock();
51
52 memset(&pkt, 0, sizeof(pkt));
53 init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_VER_INFO);
54 pkt.major = major;
55 pkt.minor = minor;
56 pkt.dev_class = vio->dev_class;
57
58 viodbg(HS, "SEND VERSION INFO maj[%u] min[%u] devclass[%u]\n",
59 major, minor, vio->dev_class);
60
61 return send_ctrl(vio, &pkt.tag, sizeof(pkt));
62}
63
64static int start_handshake(struct vio_driver_state *vio)
65{
66 int err;
67
68 viodbg(HS, "START HANDSHAKE\n");
69
70 vio->hs_state = VIO_HS_INVALID;
71
72 err = send_version(vio,
73 vio->ver_table[0].major,
74 vio->ver_table[0].minor);
75 if (err < 0)
76 return err;
77
78 return 0;
79}
80
81void vio_link_state_change(struct vio_driver_state *vio, int event)
82{
83 if (event == LDC_EVENT_UP) {
84 vio->hs_state = VIO_HS_INVALID;
85
86 switch (vio->dev_class) {
87 case VDEV_NETWORK:
88 case VDEV_NETWORK_SWITCH:
89 vio->dr_state = (VIO_DR_STATE_TXREQ |
90 VIO_DR_STATE_RXREQ);
91 break;
92
93 case VDEV_DISK:
94 vio->dr_state = VIO_DR_STATE_TXREQ;
95 break;
96 case VDEV_DISK_SERVER:
97 vio->dr_state = VIO_DR_STATE_RXREQ;
98 break;
99 }
100 start_handshake(vio);
101 }
102}
103EXPORT_SYMBOL(vio_link_state_change);
104
105static int handshake_failure(struct vio_driver_state *vio)
106{
107 struct vio_dring_state *dr;
108
109 /* XXX Put policy here... Perhaps start a timer to fire
110 * XXX in 100 ms, which will bring the link up and retry
111 * XXX the handshake.
112 */
113
114 viodbg(HS, "HANDSHAKE FAILURE\n");
115
116 vio->dr_state &= ~(VIO_DR_STATE_TXREG |
117 VIO_DR_STATE_RXREG);
118
119 dr = &vio->drings[VIO_DRIVER_RX_RING];
120 memset(dr, 0, sizeof(*dr));
121
122 kfree(vio->desc_buf);
123 vio->desc_buf = NULL;
124 vio->desc_buf_len = 0;
125
126 vio->hs_state = VIO_HS_INVALID;
127
128 return -ECONNRESET;
129}
130
131static int process_unknown(struct vio_driver_state *vio, void *arg)
132{
133 struct vio_msg_tag *pkt = arg;
134
135 viodbg(HS, "UNKNOWN CONTROL [%02x:%02x:%04x:%08x]\n",
136 pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
137
138 printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n",
139 vio->channel_id);
140
141 ldc_disconnect(vio->lp);
142
143 return -ECONNRESET;
144}
145
146static int send_dreg(struct vio_driver_state *vio)
147{
148 struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_TX_RING];
149 union {
150 struct vio_dring_register pkt;
151 char all[sizeof(struct vio_dring_register) +
152 (sizeof(struct ldc_trans_cookie) *
153 dr->ncookies)];
154 } u;
155 int i;
156
157 memset(&u, 0, sizeof(u));
158 init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG);
159 u.pkt.dring_ident = 0;
160 u.pkt.num_descr = dr->num_entries;
161 u.pkt.descr_size = dr->entry_size;
162 u.pkt.options = VIO_TX_DRING;
163 u.pkt.num_cookies = dr->ncookies;
164
165 viodbg(HS, "SEND DRING_REG INFO ndesc[%u] dsz[%u] opt[0x%x] "
166 "ncookies[%u]\n",
167 u.pkt.num_descr, u.pkt.descr_size, u.pkt.options,
168 u.pkt.num_cookies);
169
170 for (i = 0; i < dr->ncookies; i++) {
171 u.pkt.cookies[i] = dr->cookies[i];
172
173 viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
174 i,
175 (unsigned long long) u.pkt.cookies[i].cookie_addr,
176 (unsigned long long) u.pkt.cookies[i].cookie_size);
177 }
178
179 return send_ctrl(vio, &u.pkt.tag, sizeof(u));
180}
181
182static int send_rdx(struct vio_driver_state *vio)
183{
184 struct vio_rdx pkt;
185
186 memset(&pkt, 0, sizeof(pkt));
187
188 init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX);
189
190 viodbg(HS, "SEND RDX INFO\n");
191
192 return send_ctrl(vio, &pkt.tag, sizeof(pkt));
193}
194
195static int send_attr(struct vio_driver_state *vio)
196{
197 return vio->ops->send_attr(vio);
198}
199
200static struct vio_version *find_by_major(struct vio_driver_state *vio,
201 u16 major)
202{
203 struct vio_version *ret = NULL;
204 int i;
205
206 for (i = 0; i < vio->ver_table_entries; i++) {
207 struct vio_version *v = &vio->ver_table[i];
208 if (v->major <= major) {
209 ret = v;
210 break;
211 }
212 }
213 return ret;
214}
215
216static int process_ver_info(struct vio_driver_state *vio,
217 struct vio_ver_info *pkt)
218{
219 struct vio_version *vap;
220 int err;
221
222 viodbg(HS, "GOT VERSION INFO maj[%u] min[%u] devclass[%u]\n",
223 pkt->major, pkt->minor, pkt->dev_class);
224
225 if (vio->hs_state != VIO_HS_INVALID) {
226 /* XXX Perhaps invoke start_handshake? XXX */
227 memset(&vio->ver, 0, sizeof(vio->ver));
228 vio->hs_state = VIO_HS_INVALID;
229 }
230
231 vap = find_by_major(vio, pkt->major);
232
233 vio->_peer_sid = pkt->tag.sid;
234
235 if (!vap) {
236 pkt->tag.stype = VIO_SUBTYPE_NACK;
237 pkt->major = 0;
238 pkt->minor = 0;
239 viodbg(HS, "SEND VERSION NACK maj[0] min[0]\n");
240 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
241 } else if (vap->major != pkt->major) {
242 pkt->tag.stype = VIO_SUBTYPE_NACK;
243 pkt->major = vap->major;
244 pkt->minor = vap->minor;
245 viodbg(HS, "SEND VERSION NACK maj[%u] min[%u]\n",
246 pkt->major, pkt->minor);
247 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
248 } else {
249 struct vio_version ver = {
250 .major = pkt->major,
251 .minor = pkt->minor,
252 };
253 if (ver.minor > vap->minor)
254 ver.minor = vap->minor;
255 pkt->minor = ver.minor;
256 pkt->tag.stype = VIO_SUBTYPE_ACK;
257 viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n",
258 pkt->major, pkt->minor);
259 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
260 if (err > 0) {
261 vio->ver = ver;
262 vio->hs_state = VIO_HS_GOTVERS;
263 }
264 }
265 if (err < 0)
266 return handshake_failure(vio);
267
268 return 0;
269}
270
271static int process_ver_ack(struct vio_driver_state *vio,
272 struct vio_ver_info *pkt)
273{
274 viodbg(HS, "GOT VERSION ACK maj[%u] min[%u] devclass[%u]\n",
275 pkt->major, pkt->minor, pkt->dev_class);
276
277 if (vio->hs_state & VIO_HS_GOTVERS) {
278 if (vio->ver.major != pkt->major ||
279 vio->ver.minor != pkt->minor) {
280 pkt->tag.stype = VIO_SUBTYPE_NACK;
281 (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
282 return handshake_failure(vio);
283 }
284 } else {
285 vio->ver.major = pkt->major;
286 vio->ver.minor = pkt->minor;
287 vio->hs_state = VIO_HS_GOTVERS;
288 }
289
290 switch (vio->dev_class) {
291 case VDEV_NETWORK:
292 case VDEV_DISK:
293 if (send_attr(vio) < 0)
294 return handshake_failure(vio);
295 break;
296
297 default:
298 break;
299 }
300
301 return 0;
302}
303
304static int process_ver_nack(struct vio_driver_state *vio,
305 struct vio_ver_info *pkt)
306{
307 struct vio_version *nver;
308
309 viodbg(HS, "GOT VERSION NACK maj[%u] min[%u] devclass[%u]\n",
310 pkt->major, pkt->minor, pkt->dev_class);
311
312 if ((pkt->major == 0 && pkt->minor == 0) ||
313 !(nver = find_by_major(vio, pkt->major)))
314 return handshake_failure(vio);
315
316 if (send_version(vio, nver->major, nver->minor) < 0)
317 return handshake_failure(vio);
318
319 return 0;
320}
321
322static int process_ver(struct vio_driver_state *vio, struct vio_ver_info *pkt)
323{
324 switch (pkt->tag.stype) {
325 case VIO_SUBTYPE_INFO:
326 return process_ver_info(vio, pkt);
327
328 case VIO_SUBTYPE_ACK:
329 return process_ver_ack(vio, pkt);
330
331 case VIO_SUBTYPE_NACK:
332 return process_ver_nack(vio, pkt);
333
334 default:
335 return handshake_failure(vio);
336 };
337}
338
339static int process_attr(struct vio_driver_state *vio, void *pkt)
340{
341 int err;
342
343 if (!(vio->hs_state & VIO_HS_GOTVERS))
344 return handshake_failure(vio);
345
346 err = vio->ops->handle_attr(vio, pkt);
347 if (err < 0) {
348 return handshake_failure(vio);
349 } else {
350 vio->hs_state |= VIO_HS_GOT_ATTR;
351
352 if ((vio->dr_state & VIO_DR_STATE_TXREQ) &&
353 !(vio->hs_state & VIO_HS_SENT_DREG)) {
354 if (send_dreg(vio) < 0)
355 return handshake_failure(vio);
356
357 vio->hs_state |= VIO_HS_SENT_DREG;
358 }
359 }
360 return 0;
361}
362
363static int all_drings_registered(struct vio_driver_state *vio)
364{
365 int need_rx, need_tx;
366
367 need_rx = (vio->dr_state & VIO_DR_STATE_RXREQ);
368 need_tx = (vio->dr_state & VIO_DR_STATE_TXREQ);
369
370 if (need_rx &&
371 !(vio->dr_state & VIO_DR_STATE_RXREG))
372 return 0;
373
374 if (need_tx &&
375 !(vio->dr_state & VIO_DR_STATE_TXREG))
376 return 0;
377
378 return 1;
379}
380
381static int process_dreg_info(struct vio_driver_state *vio,
382 struct vio_dring_register *pkt)
383{
384 struct vio_dring_state *dr;
385 int i, len;
386
387 viodbg(HS, "GOT DRING_REG INFO ident[%llx] "
388 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
389 (unsigned long long) pkt->dring_ident,
390 pkt->num_descr, pkt->descr_size, pkt->options,
391 pkt->num_cookies);
392
393 if (!(vio->dr_state & VIO_DR_STATE_RXREQ))
394 goto send_nack;
395
396 if (vio->dr_state & VIO_DR_STATE_RXREG)
397 goto send_nack;
398
399 vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
400 if (!vio->desc_buf)
401 goto send_nack;
402
403 vio->desc_buf_len = pkt->descr_size;
404
405 dr = &vio->drings[VIO_DRIVER_RX_RING];
406
407 dr->num_entries = pkt->num_descr;
408 dr->entry_size = pkt->descr_size;
409 dr->ncookies = pkt->num_cookies;
410 for (i = 0; i < dr->ncookies; i++) {
411 dr->cookies[i] = pkt->cookies[i];
412
413 viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
414 i,
415 (unsigned long long)
416 pkt->cookies[i].cookie_addr,
417 (unsigned long long)
418 pkt->cookies[i].cookie_size);
419 }
420
421 pkt->tag.stype = VIO_SUBTYPE_ACK;
422 pkt->dring_ident = ++dr->ident;
423
424 viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n",
425 (unsigned long long) pkt->dring_ident);
426
427 len = (sizeof(*pkt) +
428 (dr->ncookies * sizeof(struct ldc_trans_cookie)));
429 if (send_ctrl(vio, &pkt->tag, len) < 0)
430 goto send_nack;
431
432 vio->dr_state |= VIO_DR_STATE_RXREG;
433
434 return 0;
435
436send_nack:
437 pkt->tag.stype = VIO_SUBTYPE_NACK;
438 viodbg(HS, "SEND DRING_REG NACK\n");
439 (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
440
441 return handshake_failure(vio);
442}
443
444static int process_dreg_ack(struct vio_driver_state *vio,
445 struct vio_dring_register *pkt)
446{
447 struct vio_dring_state *dr;
448
449 viodbg(HS, "GOT DRING_REG ACK ident[%llx] "
450 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
451 (unsigned long long) pkt->dring_ident,
452 pkt->num_descr, pkt->descr_size, pkt->options,
453 pkt->num_cookies);
454
455 dr = &vio->drings[VIO_DRIVER_TX_RING];
456
457 if (!(vio->dr_state & VIO_DR_STATE_TXREQ))
458 return handshake_failure(vio);
459
460 dr->ident = pkt->dring_ident;
461 vio->dr_state |= VIO_DR_STATE_TXREG;
462
463 if (all_drings_registered(vio)) {
464 if (send_rdx(vio) < 0)
465 return handshake_failure(vio);
466 vio->hs_state = VIO_HS_SENT_RDX;
467 }
468 return 0;
469}
470
471static int process_dreg_nack(struct vio_driver_state *vio,
472 struct vio_dring_register *pkt)
473{
474 viodbg(HS, "GOT DRING_REG NACK ident[%llx] "
475 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
476 (unsigned long long) pkt->dring_ident,
477 pkt->num_descr, pkt->descr_size, pkt->options,
478 pkt->num_cookies);
479
480 return handshake_failure(vio);
481}
482
483static int process_dreg(struct vio_driver_state *vio,
484 struct vio_dring_register *pkt)
485{
486 if (!(vio->hs_state & VIO_HS_GOTVERS))
487 return handshake_failure(vio);
488
489 switch (pkt->tag.stype) {
490 case VIO_SUBTYPE_INFO:
491 return process_dreg_info(vio, pkt);
492
493 case VIO_SUBTYPE_ACK:
494 return process_dreg_ack(vio, pkt);
495
496 case VIO_SUBTYPE_NACK:
497 return process_dreg_nack(vio, pkt);
498
499 default:
500 return handshake_failure(vio);
501 }
502}
503
504static int process_dunreg(struct vio_driver_state *vio,
505 struct vio_dring_unregister *pkt)
506{
507 struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING];
508
509 viodbg(HS, "GOT DRING_UNREG\n");
510
511 if (pkt->dring_ident != dr->ident)
512 return 0;
513
514 vio->dr_state &= ~VIO_DR_STATE_RXREG;
515
516 memset(dr, 0, sizeof(*dr));
517
518 kfree(vio->desc_buf);
519 vio->desc_buf = NULL;
520 vio->desc_buf_len = 0;
521
522 return 0;
523}
524
525static int process_rdx_info(struct vio_driver_state *vio, struct vio_rdx *pkt)
526{
527 viodbg(HS, "GOT RDX INFO\n");
528
529 pkt->tag.stype = VIO_SUBTYPE_ACK;
530 viodbg(HS, "SEND RDX ACK\n");
531 if (send_ctrl(vio, &pkt->tag, sizeof(*pkt)) < 0)
532 return handshake_failure(vio);
533
534 vio->hs_state |= VIO_HS_SENT_RDX_ACK;
535 return 0;
536}
537
538static int process_rdx_ack(struct vio_driver_state *vio, struct vio_rdx *pkt)
539{
540 viodbg(HS, "GOT RDX ACK\n");
541
542 if (!(vio->hs_state & VIO_HS_SENT_RDX))
543 return handshake_failure(vio);
544
545 vio->hs_state |= VIO_HS_GOT_RDX_ACK;
546 return 0;
547}
548
549static int process_rdx_nack(struct vio_driver_state *vio, struct vio_rdx *pkt)
550{
551 viodbg(HS, "GOT RDX NACK\n");
552
553 return handshake_failure(vio);
554}
555
556static int process_rdx(struct vio_driver_state *vio, struct vio_rdx *pkt)
557{
558 if (!all_drings_registered(vio))
559 handshake_failure(vio);
560
561 switch (pkt->tag.stype) {
562 case VIO_SUBTYPE_INFO:
563 return process_rdx_info(vio, pkt);
564
565 case VIO_SUBTYPE_ACK:
566 return process_rdx_ack(vio, pkt);
567
568 case VIO_SUBTYPE_NACK:
569 return process_rdx_nack(vio, pkt);
570
571 default:
572 return handshake_failure(vio);
573 }
574}
575
576int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt)
577{
578 struct vio_msg_tag *tag = pkt;
579 u8 prev_state = vio->hs_state;
580 int err;
581
582 switch (tag->stype_env) {
583 case VIO_VER_INFO:
584 err = process_ver(vio, pkt);
585 break;
586
587 case VIO_ATTR_INFO:
588 err = process_attr(vio, pkt);
589 break;
590
591 case VIO_DRING_REG:
592 err = process_dreg(vio, pkt);
593 break;
594
595 case VIO_DRING_UNREG:
596 err = process_dunreg(vio, pkt);
597 break;
598
599 case VIO_RDX:
600 err = process_rdx(vio, pkt);
601 break;
602
603 default:
604 err = process_unknown(vio, pkt);
605 break;
606 }
607 if (!err &&
608 vio->hs_state != prev_state &&
609 (vio->hs_state & VIO_HS_COMPLETE))
610 vio->ops->handshake_complete(vio);
611
612 return err;
613}
614EXPORT_SYMBOL(vio_control_pkt_engine);
615
616void vio_conn_reset(struct vio_driver_state *vio)
617{
618}
619EXPORT_SYMBOL(vio_conn_reset);
620
621/* The issue is that the Solaris virtual disk server just mirrors the
622 * SID values it gets from the client peer. So we work around that
623 * here in vio_{validate,send}_sid() so that the drivers don't need
624 * to be aware of this crap.
625 */
626int vio_validate_sid(struct vio_driver_state *vio, struct vio_msg_tag *tp)
627{
628 u32 sid;
629
630 /* Always let VERSION+INFO packets through unchecked, they
631 * define the new SID.
632 */
633 if (tp->type == VIO_TYPE_CTRL &&
634 tp->stype == VIO_SUBTYPE_INFO &&
635 tp->stype_env == VIO_VER_INFO)
636 return 0;
637
638 /* Ok, now figure out which SID to use. */
639 switch (vio->dev_class) {
640 case VDEV_NETWORK:
641 case VDEV_NETWORK_SWITCH:
642 case VDEV_DISK_SERVER:
643 default:
644 sid = vio->_peer_sid;
645 break;
646
647 case VDEV_DISK:
648 sid = vio->_local_sid;
649 break;
650 }
651
652 if (sid == tp->sid)
653 return 0;
654 viodbg(DATA, "BAD SID tag->sid[%08x] peer_sid[%08x] local_sid[%08x]\n",
655 tp->sid, vio->_peer_sid, vio->_local_sid);
656 return -EINVAL;
657}
658EXPORT_SYMBOL(vio_validate_sid);
659
660u32 vio_send_sid(struct vio_driver_state *vio)
661{
662 switch (vio->dev_class) {
663 case VDEV_NETWORK:
664 case VDEV_NETWORK_SWITCH:
665 case VDEV_DISK:
666 default:
667 return vio->_local_sid;
668
669 case VDEV_DISK_SERVER:
670 return vio->_peer_sid;
671 }
672}
673EXPORT_SYMBOL(vio_send_sid);
674
675extern int vio_ldc_alloc(struct vio_driver_state *vio,
676 struct ldc_channel_config *base_cfg,
677 void *event_arg)
678{
679 struct ldc_channel_config cfg = *base_cfg;
680 struct ldc_channel *lp;
681 const u64 *id;
682
683 id = md_get_property(vio->endpoint, "id", NULL);
684 if (!id) {
685 printk(KERN_ERR "%s: Channel lacks id property.\n",
686 vio->name);
687 return -ENODEV;
688 }
689
690 vio->channel_id = *id;
691
692 cfg.rx_irq = vio->rx_irq;
693 cfg.tx_irq = vio->tx_irq;
694
695 lp = ldc_alloc(vio->channel_id, &cfg, event_arg);
696 if (IS_ERR(lp))
697 return PTR_ERR(lp);
698
699 vio->lp = lp;
700
701 return 0;
702}
703EXPORT_SYMBOL(vio_ldc_alloc);
704
705void vio_ldc_free(struct vio_driver_state *vio)
706{
707 ldc_free(vio->lp);
708 vio->lp = NULL;
709
710 kfree(vio->desc_buf);
711 vio->desc_buf = NULL;
712 vio->desc_buf_len = 0;
713}
714EXPORT_SYMBOL(vio_ldc_free);
715
716void vio_port_up(struct vio_driver_state *vio)
717{
718 unsigned long flags;
719 int err, state;
720
721 spin_lock_irqsave(&vio->lock, flags);
722
723 state = ldc_state(vio->lp);
724
725 err = 0;
726 if (state == LDC_STATE_INIT) {
727 err = ldc_bind(vio->lp);
728 if (err)
729 printk(KERN_WARNING "%s: Port %lu bind failed, "
730 "err=%d\n",
731 vio->name, vio->channel_id, err);
732 }
733
734 if (!err) {
735 err = ldc_connect(vio->lp);
736 if (err)
737 printk(KERN_WARNING "%s: Port %lu connect failed, "
738 "err=%d\n",
739 vio->name, vio->channel_id, err);
740 }
741 if (err) {
742 unsigned long expires = jiffies + HZ;
743
744 expires = round_jiffies(expires);
745 mod_timer(&vio->timer, expires);
746 }
747
748 spin_unlock_irqrestore(&vio->lock, flags);
749}
750EXPORT_SYMBOL(vio_port_up);
751
752static void vio_port_timer(unsigned long _arg)
753{
754 struct vio_driver_state *vio = (struct vio_driver_state *) _arg;
755
756 vio_port_up(vio);
757}
758
759int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
760 u8 dev_class, struct mdesc_node *channel_endpoint,
761 struct vio_version *ver_table, int ver_table_size,
762 struct vio_driver_ops *ops, char *name)
763{
764 switch (dev_class) {
765 case VDEV_NETWORK:
766 case VDEV_NETWORK_SWITCH:
767 case VDEV_DISK:
768 case VDEV_DISK_SERVER:
769 break;
770
771 default:
772 return -EINVAL;
773 }
774
775 if (!ops->send_attr ||
776 !ops->handle_attr ||
777 !ops->handshake_complete)
778 return -EINVAL;
779
780 if (!channel_endpoint)
781 return -EINVAL;
782
783 if (!ver_table || ver_table_size < 0)
784 return -EINVAL;
785
786 if (!name)
787 return -EINVAL;
788
789 spin_lock_init(&vio->lock);
790
791 vio->name = name;
792
793 vio->dev_class = dev_class;
794 vio->vdev = vdev;
795
796 vio->endpoint = channel_endpoint;
797 vio->tx_irq = channel_endpoint->irqs[0];
798 vio->rx_irq = channel_endpoint->irqs[1];
799
800 vio->ver_table = ver_table;
801 vio->ver_table_entries = ver_table_size;
802
803 vio->ops = ops;
804
805 setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio);
806
807 return 0;
808}
809EXPORT_SYMBOL(vio_driver_init);