aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/sparc64/Kconfig6
-rw-r--r--arch/sparc64/kernel/Makefile1
-rw-r--r--arch/sparc64/kernel/ldc.c2338
-rw-r--r--arch/sparc64/kernel/vio.c347
-rw-r--r--arch/sparc64/kernel/viohs.c809
5 files changed, 3501 insertions, 0 deletions
diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig
index 6566d13db04f..af59daa81058 100644
--- a/arch/sparc64/Kconfig
+++ b/arch/sparc64/Kconfig
@@ -305,6 +305,12 @@ config SUN_IO
305 bool 305 bool
306 default y 306 default y
307 307
308config SUN_LDOMS
309 bool "Sun Logical Domains support"
310 help
311 Say Y here is you want to support virtual devices via
312 Logical Domains.
313
308config PCI 314config PCI
309 bool "PCI support" 315 bool "PCI support"
310 select ARCH_SUPPORTS_MSI 316 select ARCH_SUPPORTS_MSI
diff --git a/arch/sparc64/kernel/Makefile b/arch/sparc64/kernel/Makefile
index f964bf28d21a..719ab23b1938 100644
--- a/arch/sparc64/kernel/Makefile
+++ b/arch/sparc64/kernel/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_MODULES) += module.o
26obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o 26obj-$(CONFIG_US3_FREQ) += us3_cpufreq.o
27obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o 27obj-$(CONFIG_US2E_FREQ) += us2e_cpufreq.o
28obj-$(CONFIG_KPROBES) += kprobes.o 28obj-$(CONFIG_KPROBES) += kprobes.o
29obj-$(CONFIG_SUN_LDOMS) += ldc.o vio.o viohs.o
29obj-$(CONFIG_AUDIT) += audit.o 30obj-$(CONFIG_AUDIT) += audit.o
30obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o 31obj-$(CONFIG_AUDIT)$(CONFIG_SPARC32_COMPAT) += compat_audit.o
31obj-y += $(obj-yy) 32obj-y += $(obj-yy)
diff --git a/arch/sparc64/kernel/ldc.c b/arch/sparc64/kernel/ldc.c
new file mode 100644
index 000000000000..0fa04d6f978d
--- /dev/null
+++ b/arch/sparc64/kernel/ldc.c
@@ -0,0 +1,2338 @@
1/* ldc.c: Logical Domain Channel link-layer protocol driver.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/delay.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/scatterlist.h>
14#include <linux/interrupt.h>
15#include <linux/list.h>
16#include <linux/init.h>
17
18#include <asm/hypervisor.h>
19#include <asm/iommu.h>
20#include <asm/page.h>
21#include <asm/ldc.h>
22#include <asm/mdesc.h>
23
24#define DRV_MODULE_NAME "ldc"
25#define PFX DRV_MODULE_NAME ": "
26#define DRV_MODULE_VERSION "1.0"
27#define DRV_MODULE_RELDATE "June 25, 2007"
28
29static char version[] __devinitdata =
30 DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
31#define LDC_PACKET_SIZE 64
32
33/* Packet header layout for unreliable and reliable mode frames.
34 * When in RAW mode, packets are simply straight 64-byte payloads
35 * with no headers.
36 */
37struct ldc_packet {
38 u8 type;
39#define LDC_CTRL 0x01
40#define LDC_DATA 0x02
41#define LDC_ERR 0x10
42
43 u8 stype;
44#define LDC_INFO 0x01
45#define LDC_ACK 0x02
46#define LDC_NACK 0x04
47
48 u8 ctrl;
49#define LDC_VERS 0x01 /* Link Version */
50#define LDC_RTS 0x02 /* Request To Send */
51#define LDC_RTR 0x03 /* Ready To Receive */
52#define LDC_RDX 0x04 /* Ready for Data eXchange */
53#define LDC_CTRL_MSK 0x0f
54
55 u8 env;
56#define LDC_LEN 0x3f
57#define LDC_FRAG_MASK 0xc0
58#define LDC_START 0x40
59#define LDC_STOP 0x80
60
61 u32 seqid;
62
63 union {
64 u8 u_data[LDC_PACKET_SIZE - 8];
65 struct {
66 u32 pad;
67 u32 ackid;
68 u8 r_data[LDC_PACKET_SIZE - 8 - 8];
69 } r;
70 } u;
71};
72
73struct ldc_version {
74 u16 major;
75 u16 minor;
76};
77
78/* Ordered from largest major to lowest. */
79static struct ldc_version ver_arr[] = {
80 { .major = 1, .minor = 0 },
81};
82
83#define LDC_DEFAULT_MTU (4 * LDC_PACKET_SIZE)
84#define LDC_DEFAULT_NUM_ENTRIES (PAGE_SIZE / LDC_PACKET_SIZE)
85
86struct ldc_channel;
87
88struct ldc_mode_ops {
89 int (*write)(struct ldc_channel *, const void *, unsigned int);
90 int (*read)(struct ldc_channel *, void *, unsigned int);
91};
92
93static const struct ldc_mode_ops raw_ops;
94static const struct ldc_mode_ops nonraw_ops;
95static const struct ldc_mode_ops stream_ops;
96
97int ldom_domaining_enabled;
98
99struct ldc_iommu {
100 /* Protects arena alloc/free. */
101 spinlock_t lock;
102 struct iommu_arena arena;
103 struct ldc_mtable_entry *page_table;
104};
105
106struct ldc_channel {
107 /* Protects all operations that depend upon channel state. */
108 spinlock_t lock;
109
110 unsigned long id;
111
112 u8 *mssbuf;
113 u32 mssbuf_len;
114 u32 mssbuf_off;
115
116 struct ldc_packet *tx_base;
117 unsigned long tx_head;
118 unsigned long tx_tail;
119 unsigned long tx_num_entries;
120 unsigned long tx_ra;
121
122 unsigned long tx_acked;
123
124 struct ldc_packet *rx_base;
125 unsigned long rx_head;
126 unsigned long rx_tail;
127 unsigned long rx_num_entries;
128 unsigned long rx_ra;
129
130 u32 rcv_nxt;
131 u32 snd_nxt;
132
133 unsigned long chan_state;
134
135 struct ldc_channel_config cfg;
136 void *event_arg;
137
138 const struct ldc_mode_ops *mops;
139
140 struct ldc_iommu iommu;
141
142 struct ldc_version ver;
143
144 u8 hs_state;
145#define LDC_HS_CLOSED 0x00
146#define LDC_HS_OPEN 0x01
147#define LDC_HS_GOTVERS 0x02
148#define LDC_HS_SENTRTR 0x03
149#define LDC_HS_GOTRTR 0x04
150#define LDC_HS_COMPLETE 0x10
151
152 u8 flags;
153#define LDC_FLAG_ALLOCED_QUEUES 0x01
154#define LDC_FLAG_REGISTERED_QUEUES 0x02
155#define LDC_FLAG_REGISTERED_IRQS 0x04
156#define LDC_FLAG_RESET 0x10
157
158 u8 mss;
159 u8 state;
160
161 struct hlist_head mh_list;
162
163 struct hlist_node list;
164};
165
166#define ldcdbg(TYPE, f, a...) \
167do { if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
168 printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
169} while (0)
170
171static const char *state_to_str(u8 state)
172{
173 switch (state) {
174 case LDC_STATE_INVALID:
175 return "INVALID";
176 case LDC_STATE_INIT:
177 return "INIT";
178 case LDC_STATE_BOUND:
179 return "BOUND";
180 case LDC_STATE_READY:
181 return "READY";
182 case LDC_STATE_CONNECTED:
183 return "CONNECTED";
184 default:
185 return "<UNKNOWN>";
186 }
187}
188
189static void ldc_set_state(struct ldc_channel *lp, u8 state)
190{
191 ldcdbg(STATE, "STATE (%s) --> (%s)\n",
192 state_to_str(lp->state),
193 state_to_str(state));
194
195 lp->state = state;
196}
197
198static unsigned long __advance(unsigned long off, unsigned long num_entries)
199{
200 off += LDC_PACKET_SIZE;
201 if (off == (num_entries * LDC_PACKET_SIZE))
202 off = 0;
203
204 return off;
205}
206
207static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
208{
209 return __advance(off, lp->rx_num_entries);
210}
211
212static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
213{
214 return __advance(off, lp->tx_num_entries);
215}
216
217static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
218 unsigned long *new_tail)
219{
220 struct ldc_packet *p;
221 unsigned long t;
222
223 t = tx_advance(lp, lp->tx_tail);
224 if (t == lp->tx_head)
225 return NULL;
226
227 *new_tail = t;
228
229 p = lp->tx_base;
230 return p + (lp->tx_tail / LDC_PACKET_SIZE);
231}
232
233/* When we are in reliable or stream mode, have to track the next packet
234 * we haven't gotten an ACK for in the TX queue using tx_acked. We have
235 * to be careful not to stomp over the queue past that point. During
236 * the handshake, we don't have TX data packets pending in the queue
237 * and that's why handshake_get_tx_packet() need not be mindful of
238 * lp->tx_acked.
239 */
240static unsigned long head_for_data(struct ldc_channel *lp)
241{
242 if (lp->cfg.mode == LDC_MODE_RELIABLE ||
243 lp->cfg.mode == LDC_MODE_STREAM)
244 return lp->tx_acked;
245 return lp->tx_head;
246}
247
248static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
249{
250 unsigned long limit, tail, new_tail, diff;
251 unsigned int mss;
252
253 limit = head_for_data(lp);
254 tail = lp->tx_tail;
255 new_tail = tx_advance(lp, tail);
256 if (new_tail == limit)
257 return 0;
258
259 if (limit > new_tail)
260 diff = limit - new_tail;
261 else
262 diff = (limit +
263 ((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
264 diff /= LDC_PACKET_SIZE;
265 mss = lp->mss;
266
267 if (diff * mss < size)
268 return 0;
269
270 return 1;
271}
272
273static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
274 unsigned long *new_tail)
275{
276 struct ldc_packet *p;
277 unsigned long h, t;
278
279 h = head_for_data(lp);
280 t = tx_advance(lp, lp->tx_tail);
281 if (t == h)
282 return NULL;
283
284 *new_tail = t;
285
286 p = lp->tx_base;
287 return p + (lp->tx_tail / LDC_PACKET_SIZE);
288}
289
290static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
291{
292 unsigned long orig_tail = lp->tx_tail;
293 int limit = 1000;
294
295 lp->tx_tail = tail;
296 while (limit-- > 0) {
297 unsigned long err;
298
299 err = sun4v_ldc_tx_set_qtail(lp->id, tail);
300 if (!err)
301 return 0;
302
303 if (err != HV_EWOULDBLOCK) {
304 lp->tx_tail = orig_tail;
305 return -EINVAL;
306 }
307 udelay(1);
308 }
309
310 lp->tx_tail = orig_tail;
311 return -EBUSY;
312}
313
314/* This just updates the head value in the hypervisor using
315 * a polling loop with a timeout. The caller takes care of
316 * upating software state representing the head change, if any.
317 */
318static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
319{
320 int limit = 1000;
321
322 while (limit-- > 0) {
323 unsigned long err;
324
325 err = sun4v_ldc_rx_set_qhead(lp->id, head);
326 if (!err)
327 return 0;
328
329 if (err != HV_EWOULDBLOCK)
330 return -EINVAL;
331
332 udelay(1);
333 }
334
335 return -EBUSY;
336}
337
338static int send_tx_packet(struct ldc_channel *lp,
339 struct ldc_packet *p,
340 unsigned long new_tail)
341{
342 BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
343
344 return set_tx_tail(lp, new_tail);
345}
346
347static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
348 u8 stype, u8 ctrl,
349 void *data, int dlen,
350 unsigned long *new_tail)
351{
352 struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
353
354 if (p) {
355 memset(p, 0, sizeof(*p));
356 p->type = LDC_CTRL;
357 p->stype = stype;
358 p->ctrl = ctrl;
359 if (data)
360 memcpy(p->u.u_data, data, dlen);
361 }
362 return p;
363}
364
365static int start_handshake(struct ldc_channel *lp)
366{
367 struct ldc_packet *p;
368 struct ldc_version *ver;
369 unsigned long new_tail;
370
371 ver = &ver_arr[0];
372
373 ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
374 ver->major, ver->minor);
375
376 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
377 ver, sizeof(*ver), &new_tail);
378 if (p) {
379 int err = send_tx_packet(lp, p, new_tail);
380 if (!err)
381 lp->flags &= ~LDC_FLAG_RESET;
382 return err;
383 }
384 return -EBUSY;
385}
386
387static int send_version_nack(struct ldc_channel *lp,
388 u16 major, u16 minor)
389{
390 struct ldc_packet *p;
391 struct ldc_version ver;
392 unsigned long new_tail;
393
394 ver.major = major;
395 ver.minor = minor;
396
397 p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
398 &ver, sizeof(ver), &new_tail);
399 if (p) {
400 ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
401 ver.major, ver.minor);
402
403 return send_tx_packet(lp, p, new_tail);
404 }
405 return -EBUSY;
406}
407
408static int send_version_ack(struct ldc_channel *lp,
409 struct ldc_version *vp)
410{
411 struct ldc_packet *p;
412 unsigned long new_tail;
413
414 p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
415 vp, sizeof(*vp), &new_tail);
416 if (p) {
417 ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
418 vp->major, vp->minor);
419
420 return send_tx_packet(lp, p, new_tail);
421 }
422 return -EBUSY;
423}
424
425static int send_rts(struct ldc_channel *lp)
426{
427 struct ldc_packet *p;
428 unsigned long new_tail;
429
430 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
431 &new_tail);
432 if (p) {
433 p->env = lp->cfg.mode;
434 p->seqid = 0;
435 lp->rcv_nxt = 0;
436
437 ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
438 p->env, p->seqid);
439
440 return send_tx_packet(lp, p, new_tail);
441 }
442 return -EBUSY;
443}
444
445static int send_rtr(struct ldc_channel *lp)
446{
447 struct ldc_packet *p;
448 unsigned long new_tail;
449
450 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
451 &new_tail);
452 if (p) {
453 p->env = lp->cfg.mode;
454 p->seqid = 0;
455
456 ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
457 p->env, p->seqid);
458
459 return send_tx_packet(lp, p, new_tail);
460 }
461 return -EBUSY;
462}
463
464static int send_rdx(struct ldc_channel *lp)
465{
466 struct ldc_packet *p;
467 unsigned long new_tail;
468
469 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
470 &new_tail);
471 if (p) {
472 p->env = 0;
473 p->seqid = ++lp->snd_nxt;
474 p->u.r.ackid = lp->rcv_nxt;
475
476 ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
477 p->env, p->seqid, p->u.r.ackid);
478
479 return send_tx_packet(lp, p, new_tail);
480 }
481 return -EBUSY;
482}
483
484static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
485{
486 struct ldc_packet *p;
487 unsigned long new_tail;
488 int err;
489
490 p = data_get_tx_packet(lp, &new_tail);
491 if (!p)
492 return -EBUSY;
493 memset(p, 0, sizeof(*p));
494 p->type = data_pkt->type;
495 p->stype = LDC_NACK;
496 p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
497 p->seqid = lp->snd_nxt;
498 p->u.r.ackid = lp->rcv_nxt;
499
500 ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
501 p->type, p->ctrl, p->seqid, p->u.r.ackid);
502
503 err = send_tx_packet(lp, p, new_tail);
504 if (!err)
505 lp->snd_nxt++;
506
507 return err;
508}
509
510static int ldc_abort(struct ldc_channel *lp)
511{
512 unsigned long hv_err;
513
514 ldcdbg(STATE, "ABORT\n");
515
516 /* We report but do not act upon the hypervisor errors because
517 * there really isn't much we can do if they fail at this point.
518 */
519 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
520 if (hv_err)
521 printk(KERN_ERR PFX "ldc_abort: "
522 "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
523 lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
524
525 hv_err = sun4v_ldc_tx_get_state(lp->id,
526 &lp->tx_head,
527 &lp->tx_tail,
528 &lp->chan_state);
529 if (hv_err)
530 printk(KERN_ERR PFX "ldc_abort: "
531 "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
532 lp->id, hv_err);
533
534 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
535 if (hv_err)
536 printk(KERN_ERR PFX "ldc_abort: "
537 "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
538 lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
539
540 /* Refetch the RX queue state as well, because we could be invoked
541 * here in the queue processing context.
542 */
543 hv_err = sun4v_ldc_rx_get_state(lp->id,
544 &lp->rx_head,
545 &lp->rx_tail,
546 &lp->chan_state);
547 if (hv_err)
548 printk(KERN_ERR PFX "ldc_abort: "
549 "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
550 lp->id, hv_err);
551
552 return -ECONNRESET;
553}
554
555static struct ldc_version *find_by_major(u16 major)
556{
557 struct ldc_version *ret = NULL;
558 int i;
559
560 for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
561 struct ldc_version *v = &ver_arr[i];
562 if (v->major <= major) {
563 ret = v;
564 break;
565 }
566 }
567 return ret;
568}
569
570static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
571{
572 struct ldc_version *vap;
573 int err;
574
575 ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
576 vp->major, vp->minor);
577
578 if (lp->hs_state == LDC_HS_GOTVERS) {
579 lp->hs_state = LDC_HS_OPEN;
580 memset(&lp->ver, 0, sizeof(lp->ver));
581 }
582
583 vap = find_by_major(vp->major);
584 if (!vap) {
585 err = send_version_nack(lp, 0, 0);
586 } else if (vap->major != vp->major) {
587 err = send_version_nack(lp, vap->major, vap->minor);
588 } else {
589 struct ldc_version ver = *vp;
590 if (ver.minor > vap->minor)
591 ver.minor = vap->minor;
592 err = send_version_ack(lp, &ver);
593 if (!err) {
594 lp->ver = ver;
595 lp->hs_state = LDC_HS_GOTVERS;
596 }
597 }
598 if (err)
599 return ldc_abort(lp);
600
601 return 0;
602}
603
604static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
605{
606 ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
607 vp->major, vp->minor);
608
609 if (lp->hs_state == LDC_HS_GOTVERS) {
610 if (lp->ver.major != vp->major ||
611 lp->ver.minor != vp->minor)
612 return ldc_abort(lp);
613 } else {
614 lp->ver = *vp;
615 lp->hs_state = LDC_HS_GOTVERS;
616 }
617 if (send_rts(lp))
618 return ldc_abort(lp);
619 return 0;
620}
621
622static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
623{
624 struct ldc_version *vap;
625
626 if ((vp->major == 0 && vp->minor == 0) ||
627 !(vap = find_by_major(vp->major))) {
628 return ldc_abort(lp);
629 } else {
630 struct ldc_packet *p;
631 unsigned long new_tail;
632
633 p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
634 vap, sizeof(*vap),
635 &new_tail);
636 if (p)
637 return send_tx_packet(lp, p, new_tail);
638 else
639 return ldc_abort(lp);
640 }
641}
642
643static int process_version(struct ldc_channel *lp,
644 struct ldc_packet *p)
645{
646 struct ldc_version *vp;
647
648 vp = (struct ldc_version *) p->u.u_data;
649
650 switch (p->stype) {
651 case LDC_INFO:
652 return process_ver_info(lp, vp);
653
654 case LDC_ACK:
655 return process_ver_ack(lp, vp);
656
657 case LDC_NACK:
658 return process_ver_nack(lp, vp);
659
660 default:
661 return ldc_abort(lp);
662 }
663}
664
665static int process_rts(struct ldc_channel *lp,
666 struct ldc_packet *p)
667{
668 ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
669 p->stype, p->seqid, p->env);
670
671 if (p->stype != LDC_INFO ||
672 lp->hs_state != LDC_HS_GOTVERS ||
673 p->env != lp->cfg.mode)
674 return ldc_abort(lp);
675
676 lp->snd_nxt = p->seqid;
677 lp->rcv_nxt = p->seqid;
678 lp->hs_state = LDC_HS_SENTRTR;
679 if (send_rtr(lp))
680 return ldc_abort(lp);
681
682 return 0;
683}
684
685static int process_rtr(struct ldc_channel *lp,
686 struct ldc_packet *p)
687{
688 ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
689 p->stype, p->seqid, p->env);
690
691 if (p->stype != LDC_INFO ||
692 p->env != lp->cfg.mode)
693 return ldc_abort(lp);
694
695 lp->snd_nxt = p->seqid;
696 lp->hs_state = LDC_HS_COMPLETE;
697 ldc_set_state(lp, LDC_STATE_CONNECTED);
698 send_rdx(lp);
699
700 return LDC_EVENT_UP;
701}
702
703static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
704{
705 return lp->rcv_nxt + 1 == seqid;
706}
707
708static int process_rdx(struct ldc_channel *lp,
709 struct ldc_packet *p)
710{
711 ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
712 p->stype, p->seqid, p->env, p->u.r.ackid);
713
714 if (p->stype != LDC_INFO ||
715 !(rx_seq_ok(lp, p->seqid)))
716 return ldc_abort(lp);
717
718 lp->rcv_nxt = p->seqid;
719
720 lp->hs_state = LDC_HS_COMPLETE;
721 ldc_set_state(lp, LDC_STATE_CONNECTED);
722
723 return LDC_EVENT_UP;
724}
725
726static int process_control_frame(struct ldc_channel *lp,
727 struct ldc_packet *p)
728{
729 switch (p->ctrl) {
730 case LDC_VERS:
731 return process_version(lp, p);
732
733 case LDC_RTS:
734 return process_rts(lp, p);
735
736 case LDC_RTR:
737 return process_rtr(lp, p);
738
739 case LDC_RDX:
740 return process_rdx(lp, p);
741
742 default:
743 return ldc_abort(lp);
744 }
745}
746
747static int process_error_frame(struct ldc_channel *lp,
748 struct ldc_packet *p)
749{
750 return ldc_abort(lp);
751}
752
753static int process_data_ack(struct ldc_channel *lp,
754 struct ldc_packet *ack)
755{
756 unsigned long head = lp->tx_acked;
757 u32 ackid = ack->u.r.ackid;
758
759 while (1) {
760 struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
761
762 head = tx_advance(lp, head);
763
764 if (p->seqid == ackid) {
765 lp->tx_acked = head;
766 return 0;
767 }
768 if (head == lp->tx_head)
769 return ldc_abort(lp);
770 }
771
772 return 0;
773}
774
775static void send_events(struct ldc_channel *lp, unsigned int event_mask)
776{
777 if (event_mask & LDC_EVENT_RESET)
778 lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
779 if (event_mask & LDC_EVENT_UP)
780 lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
781 if (event_mask & LDC_EVENT_DATA_READY)
782 lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
783}
784
785static irqreturn_t ldc_rx(int irq, void *dev_id)
786{
787 struct ldc_channel *lp = dev_id;
788 unsigned long orig_state, hv_err, flags;
789 unsigned int event_mask;
790
791 spin_lock_irqsave(&lp->lock, flags);
792
793 orig_state = lp->chan_state;
794 hv_err = sun4v_ldc_rx_get_state(lp->id,
795 &lp->rx_head,
796 &lp->rx_tail,
797 &lp->chan_state);
798
799 ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
800 orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
801
802 event_mask = 0;
803
804 if (lp->cfg.mode == LDC_MODE_RAW &&
805 lp->chan_state == LDC_CHANNEL_UP) {
806 lp->hs_state = LDC_HS_COMPLETE;
807 ldc_set_state(lp, LDC_STATE_CONNECTED);
808
809 event_mask |= LDC_EVENT_UP;
810
811 orig_state = lp->chan_state;
812 }
813
814 /* If we are in reset state, flush the RX queue and ignore
815 * everything.
816 */
817 if (lp->flags & LDC_FLAG_RESET) {
818 (void) __set_rx_head(lp, lp->rx_tail);
819 goto out;
820 }
821
822 /* Once we finish the handshake, we let the ldc_read()
823 * paths do all of the control frame and state management.
824 * Just trigger the callback.
825 */
826 if (lp->hs_state == LDC_HS_COMPLETE) {
827handshake_complete:
828 if (lp->chan_state != orig_state) {
829 unsigned int event = LDC_EVENT_RESET;
830
831 if (lp->chan_state == LDC_CHANNEL_UP)
832 event = LDC_EVENT_UP;
833
834 event_mask |= event;
835 }
836 if (lp->rx_head != lp->rx_tail)
837 event_mask |= LDC_EVENT_DATA_READY;
838
839 goto out;
840 }
841
842 if (lp->chan_state != orig_state)
843 goto out;
844
845 while (lp->rx_head != lp->rx_tail) {
846 struct ldc_packet *p;
847 unsigned long new;
848 int err;
849
850 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
851
852 switch (p->type) {
853 case LDC_CTRL:
854 err = process_control_frame(lp, p);
855 if (err > 0)
856 event_mask |= err;
857 break;
858
859 case LDC_DATA:
860 event_mask |= LDC_EVENT_DATA_READY;
861 err = 0;
862 break;
863
864 case LDC_ERR:
865 err = process_error_frame(lp, p);
866 break;
867
868 default:
869 err = ldc_abort(lp);
870 break;
871 }
872
873 if (err < 0)
874 break;
875
876 new = lp->rx_head;
877 new += LDC_PACKET_SIZE;
878 if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
879 new = 0;
880 lp->rx_head = new;
881
882 err = __set_rx_head(lp, new);
883 if (err < 0) {
884 (void) ldc_abort(lp);
885 break;
886 }
887 if (lp->hs_state == LDC_HS_COMPLETE)
888 goto handshake_complete;
889 }
890
891out:
892 spin_unlock_irqrestore(&lp->lock, flags);
893
894 send_events(lp, event_mask);
895
896 return IRQ_HANDLED;
897}
898
899static irqreturn_t ldc_tx(int irq, void *dev_id)
900{
901 struct ldc_channel *lp = dev_id;
902 unsigned long flags, hv_err, orig_state;
903 unsigned int event_mask = 0;
904
905 spin_lock_irqsave(&lp->lock, flags);
906
907 orig_state = lp->chan_state;
908 hv_err = sun4v_ldc_tx_get_state(lp->id,
909 &lp->tx_head,
910 &lp->tx_tail,
911 &lp->chan_state);
912
913 ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
914 orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
915
916 if (lp->cfg.mode == LDC_MODE_RAW &&
917 lp->chan_state == LDC_CHANNEL_UP) {
918 lp->hs_state = LDC_HS_COMPLETE;
919 ldc_set_state(lp, LDC_STATE_CONNECTED);
920
921 event_mask |= LDC_EVENT_UP;
922 }
923
924 spin_unlock_irqrestore(&lp->lock, flags);
925
926 send_events(lp, event_mask);
927
928 return IRQ_HANDLED;
929}
930
931/* XXX ldc_alloc() and ldc_free() needs to run under a mutex so
932 * XXX that addition and removal from the ldc_channel_list has
933 * XXX atomicity, otherwise the __ldc_channel_exists() check is
934 * XXX totally pointless as another thread can slip into ldc_alloc()
935 * XXX and add a channel with the same ID. There also needs to be
936 * XXX a spinlock for ldc_channel_list.
937 */
938static HLIST_HEAD(ldc_channel_list);
939
940static int __ldc_channel_exists(unsigned long id)
941{
942 struct ldc_channel *lp;
943 struct hlist_node *n;
944
945 hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
946 if (lp->id == id)
947 return 1;
948 }
949 return 0;
950}
951
952static int alloc_queue(const char *name, unsigned long num_entries,
953 struct ldc_packet **base, unsigned long *ra)
954{
955 unsigned long size, order;
956 void *q;
957
958 size = num_entries * LDC_PACKET_SIZE;
959 order = get_order(size);
960
961 q = (void *) __get_free_pages(GFP_KERNEL, order);
962 if (!q) {
963 printk(KERN_ERR PFX "Alloc of %s queue failed with "
964 "size=%lu order=%lu\n", name, size, order);
965 return -ENOMEM;
966 }
967
968 memset(q, 0, PAGE_SIZE << order);
969
970 *base = q;
971 *ra = __pa(q);
972
973 return 0;
974}
975
976static void free_queue(unsigned long num_entries, struct ldc_packet *q)
977{
978 unsigned long size, order;
979
980 if (!q)
981 return;
982
983 size = num_entries * LDC_PACKET_SIZE;
984 order = get_order(size);
985
986 free_pages((unsigned long)q, order);
987}
988
989/* XXX Make this configurable... XXX */
990#define LDC_IOTABLE_SIZE (8 * 1024)
991
992static int ldc_iommu_init(struct ldc_channel *lp)
993{
994 unsigned long sz, num_tsb_entries, tsbsize, order;
995 struct ldc_iommu *iommu = &lp->iommu;
996 struct ldc_mtable_entry *table;
997 unsigned long hv_err;
998 int err;
999
1000 num_tsb_entries = LDC_IOTABLE_SIZE;
1001 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1002
1003 spin_lock_init(&iommu->lock);
1004
1005 sz = num_tsb_entries / 8;
1006 sz = (sz + 7UL) & ~7UL;
1007 iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1008 if (!iommu->arena.map) {
1009 printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1010 return -ENOMEM;
1011 }
1012
1013 iommu->arena.limit = num_tsb_entries;
1014
1015 order = get_order(tsbsize);
1016
1017 table = (struct ldc_mtable_entry *)
1018 __get_free_pages(GFP_KERNEL, order);
1019 err = -ENOMEM;
1020 if (!table) {
1021 printk(KERN_ERR PFX "Alloc of MTE table failed, "
1022 "size=%lu order=%lu\n", tsbsize, order);
1023 goto out_free_map;
1024 }
1025
1026 memset(table, 0, PAGE_SIZE << order);
1027
1028 iommu->page_table = table;
1029
1030 hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1031 num_tsb_entries);
1032 err = -EINVAL;
1033 if (hv_err)
1034 goto out_free_table;
1035
1036 return 0;
1037
1038out_free_table:
1039 free_pages((unsigned long) table, order);
1040 iommu->page_table = NULL;
1041
1042out_free_map:
1043 kfree(iommu->arena.map);
1044 iommu->arena.map = NULL;
1045
1046 return err;
1047}
1048
1049static void ldc_iommu_release(struct ldc_channel *lp)
1050{
1051 struct ldc_iommu *iommu = &lp->iommu;
1052 unsigned long num_tsb_entries, tsbsize, order;
1053
1054 (void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1055
1056 num_tsb_entries = iommu->arena.limit;
1057 tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1058 order = get_order(tsbsize);
1059
1060 free_pages((unsigned long) iommu->page_table, order);
1061 iommu->page_table = NULL;
1062
1063 kfree(iommu->arena.map);
1064 iommu->arena.map = NULL;
1065}
1066
1067struct ldc_channel *ldc_alloc(unsigned long id,
1068 const struct ldc_channel_config *cfgp,
1069 void *event_arg)
1070{
1071 struct ldc_channel *lp;
1072 const struct ldc_mode_ops *mops;
1073 unsigned long dummy1, dummy2, hv_err;
1074 u8 mss, *mssbuf;
1075 int err;
1076
1077 err = -ENODEV;
1078 if (!ldom_domaining_enabled)
1079 goto out_err;
1080
1081 err = -EINVAL;
1082 if (!cfgp)
1083 goto out_err;
1084
1085 switch (cfgp->mode) {
1086 case LDC_MODE_RAW:
1087 mops = &raw_ops;
1088 mss = LDC_PACKET_SIZE;
1089 break;
1090
1091 case LDC_MODE_UNRELIABLE:
1092 mops = &nonraw_ops;
1093 mss = LDC_PACKET_SIZE - 8;
1094 break;
1095
1096 case LDC_MODE_RELIABLE:
1097 mops = &nonraw_ops;
1098 mss = LDC_PACKET_SIZE - 8 - 8;
1099 break;
1100
1101 case LDC_MODE_STREAM:
1102 mops = &stream_ops;
1103 mss = LDC_PACKET_SIZE - 8 - 8;
1104 break;
1105
1106 default:
1107 goto out_err;
1108 }
1109
1110 if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1111 goto out_err;
1112
1113 hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1114 err = -ENODEV;
1115 if (hv_err == HV_ECHANNEL)
1116 goto out_err;
1117
1118 err = -EEXIST;
1119 if (__ldc_channel_exists(id))
1120 goto out_err;
1121
1122 mssbuf = NULL;
1123
1124 lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1125 err = -ENOMEM;
1126 if (!lp)
1127 goto out_err;
1128
1129 spin_lock_init(&lp->lock);
1130
1131 lp->id = id;
1132
1133 err = ldc_iommu_init(lp);
1134 if (err)
1135 goto out_free_ldc;
1136
1137 lp->mops = mops;
1138 lp->mss = mss;
1139
1140 lp->cfg = *cfgp;
1141 if (!lp->cfg.mtu)
1142 lp->cfg.mtu = LDC_DEFAULT_MTU;
1143
1144 if (lp->cfg.mode == LDC_MODE_STREAM) {
1145 mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1146 if (!mssbuf) {
1147 err = -ENOMEM;
1148 goto out_free_iommu;
1149 }
1150 lp->mssbuf = mssbuf;
1151 }
1152
1153 lp->event_arg = event_arg;
1154
1155 /* XXX allow setting via ldc_channel_config to override defaults
1156 * XXX or use some formula based upon mtu
1157 */
1158 lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1159 lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1160
1161 err = alloc_queue("TX", lp->tx_num_entries,
1162 &lp->tx_base, &lp->tx_ra);
1163 if (err)
1164 goto out_free_mssbuf;
1165
1166 err = alloc_queue("RX", lp->rx_num_entries,
1167 &lp->rx_base, &lp->rx_ra);
1168 if (err)
1169 goto out_free_txq;
1170
1171 lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1172
1173 lp->hs_state = LDC_HS_CLOSED;
1174 ldc_set_state(lp, LDC_STATE_INIT);
1175
1176 INIT_HLIST_NODE(&lp->list);
1177 hlist_add_head(&lp->list, &ldc_channel_list);
1178
1179 INIT_HLIST_HEAD(&lp->mh_list);
1180
1181 return lp;
1182
1183out_free_txq:
1184 free_queue(lp->tx_num_entries, lp->tx_base);
1185
1186out_free_mssbuf:
1187 if (mssbuf)
1188 kfree(mssbuf);
1189
1190out_free_iommu:
1191 ldc_iommu_release(lp);
1192
1193out_free_ldc:
1194 kfree(lp);
1195
1196out_err:
1197 return ERR_PTR(err);
1198}
1199EXPORT_SYMBOL(ldc_alloc);
1200
1201void ldc_free(struct ldc_channel *lp)
1202{
1203 if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1204 free_irq(lp->cfg.rx_irq, lp);
1205 free_irq(lp->cfg.tx_irq, lp);
1206 }
1207
1208 if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1209 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1210 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1211 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1212 }
1213 if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1214 free_queue(lp->tx_num_entries, lp->tx_base);
1215 free_queue(lp->rx_num_entries, lp->rx_base);
1216 lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1217 }
1218
1219 hlist_del(&lp->list);
1220
1221 if (lp->mssbuf)
1222 kfree(lp->mssbuf);
1223
1224 ldc_iommu_release(lp);
1225
1226 kfree(lp);
1227}
1228EXPORT_SYMBOL(ldc_free);
1229
1230/* Bind the channel. This registers the LDC queues with
1231 * the hypervisor and puts the channel into a pseudo-listening
1232 * state. This does not initiate a handshake, ldc_connect() does
1233 * that.
1234 */
1235int ldc_bind(struct ldc_channel *lp)
1236{
1237 unsigned long hv_err, flags;
1238 int err = -EINVAL;
1239
1240 spin_lock_irqsave(&lp->lock, flags);
1241
1242 if (lp->state != LDC_STATE_INIT)
1243 goto out_err;
1244
1245 err = request_irq(lp->cfg.rx_irq, ldc_rx,
1246 IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1247 "LDC RX", lp);
1248 if (err)
1249 goto out_err;
1250
1251 err = request_irq(lp->cfg.tx_irq, ldc_tx,
1252 IRQF_SAMPLE_RANDOM | IRQF_SHARED,
1253 "LDC TX", lp);
1254 if (err)
1255 goto out_free_rx_irq;
1256
1257
1258 lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1259
1260 err = -ENODEV;
1261 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1262 if (hv_err)
1263 goto out_free_tx_irq;
1264
1265 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1266 if (hv_err)
1267 goto out_free_tx_irq;
1268
1269 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1270 if (hv_err)
1271 goto out_unmap_tx;
1272
1273 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1274 if (hv_err)
1275 goto out_unmap_tx;
1276
1277 lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1278
1279 hv_err = sun4v_ldc_tx_get_state(lp->id,
1280 &lp->tx_head,
1281 &lp->tx_tail,
1282 &lp->chan_state);
1283 err = -EBUSY;
1284 if (hv_err)
1285 goto out_unmap_rx;
1286
1287 lp->tx_acked = lp->tx_head;
1288
1289 lp->hs_state = LDC_HS_OPEN;
1290 ldc_set_state(lp, LDC_STATE_BOUND);
1291
1292 spin_unlock_irqrestore(&lp->lock, flags);
1293
1294 return 0;
1295
1296out_unmap_rx:
1297 lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1298 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1299
1300out_unmap_tx:
1301 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1302
1303out_free_tx_irq:
1304 lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1305 free_irq(lp->cfg.tx_irq, lp);
1306
1307out_free_rx_irq:
1308 free_irq(lp->cfg.rx_irq, lp);
1309
1310out_err:
1311 spin_unlock_irqrestore(&lp->lock, flags);
1312
1313 return err;
1314}
1315EXPORT_SYMBOL(ldc_bind);
1316
1317int ldc_connect(struct ldc_channel *lp)
1318{
1319 unsigned long flags;
1320 int err;
1321
1322 if (lp->cfg.mode == LDC_MODE_RAW)
1323 return -EINVAL;
1324
1325 spin_lock_irqsave(&lp->lock, flags);
1326
1327 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1328 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1329 lp->hs_state != LDC_HS_OPEN)
1330 err = -EINVAL;
1331 else
1332 err = start_handshake(lp);
1333
1334 spin_unlock_irqrestore(&lp->lock, flags);
1335
1336 return err;
1337}
1338EXPORT_SYMBOL(ldc_connect);
1339
1340int ldc_disconnect(struct ldc_channel *lp)
1341{
1342 unsigned long hv_err, flags;
1343 int err;
1344
1345 if (lp->cfg.mode == LDC_MODE_RAW)
1346 return -EINVAL;
1347
1348 if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1349 !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1350 return -EINVAL;
1351
1352 spin_lock_irqsave(&lp->lock, flags);
1353
1354 err = -ENODEV;
1355 hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1356 if (hv_err)
1357 goto out_err;
1358
1359 hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1360 if (hv_err)
1361 goto out_err;
1362
1363 hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1364 if (hv_err)
1365 goto out_err;
1366
1367 hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1368 if (hv_err)
1369 goto out_err;
1370
1371 ldc_set_state(lp, LDC_STATE_BOUND);
1372 lp->hs_state = LDC_HS_OPEN;
1373 lp->flags |= LDC_FLAG_RESET;
1374
1375 spin_unlock_irqrestore(&lp->lock, flags);
1376
1377 return 0;
1378
1379out_err:
1380 sun4v_ldc_tx_qconf(lp->id, 0, 0);
1381 sun4v_ldc_rx_qconf(lp->id, 0, 0);
1382 free_irq(lp->cfg.tx_irq, lp);
1383 free_irq(lp->cfg.rx_irq, lp);
1384 lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1385 LDC_FLAG_REGISTERED_QUEUES);
1386 ldc_set_state(lp, LDC_STATE_INIT);
1387
1388 spin_unlock_irqrestore(&lp->lock, flags);
1389
1390 return err;
1391}
1392EXPORT_SYMBOL(ldc_disconnect);
1393
1394int ldc_state(struct ldc_channel *lp)
1395{
1396 return lp->state;
1397}
1398EXPORT_SYMBOL(ldc_state);
1399
1400static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1401{
1402 struct ldc_packet *p;
1403 unsigned long new_tail;
1404 int err;
1405
1406 if (size > LDC_PACKET_SIZE)
1407 return -EMSGSIZE;
1408
1409 p = data_get_tx_packet(lp, &new_tail);
1410 if (!p)
1411 return -EAGAIN;
1412
1413 memcpy(p, buf, size);
1414
1415 err = send_tx_packet(lp, p, new_tail);
1416 if (!err)
1417 err = size;
1418
1419 return err;
1420}
1421
1422static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1423{
1424 struct ldc_packet *p;
1425 unsigned long hv_err, new;
1426 int err;
1427
1428 if (size < LDC_PACKET_SIZE)
1429 return -EINVAL;
1430
1431 hv_err = sun4v_ldc_rx_get_state(lp->id,
1432 &lp->rx_head,
1433 &lp->rx_tail,
1434 &lp->chan_state);
1435 if (hv_err)
1436 return ldc_abort(lp);
1437
1438 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1439 lp->chan_state == LDC_CHANNEL_RESETTING)
1440 return -ECONNRESET;
1441
1442 if (lp->rx_head == lp->rx_tail)
1443 return 0;
1444
1445 p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1446 memcpy(buf, p, LDC_PACKET_SIZE);
1447
1448 new = rx_advance(lp, lp->rx_head);
1449 lp->rx_head = new;
1450
1451 err = __set_rx_head(lp, new);
1452 if (err < 0)
1453 err = -ECONNRESET;
1454 else
1455 err = LDC_PACKET_SIZE;
1456
1457 return err;
1458}
1459
1460static const struct ldc_mode_ops raw_ops = {
1461 .write = write_raw,
1462 .read = read_raw,
1463};
1464
1465static int write_nonraw(struct ldc_channel *lp, const void *buf,
1466 unsigned int size)
1467{
1468 unsigned long hv_err, tail;
1469 unsigned int copied;
1470 u32 seq;
1471 int err;
1472
1473 hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1474 &lp->chan_state);
1475 if (unlikely(hv_err))
1476 return -EBUSY;
1477
1478 if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1479 return ldc_abort(lp);
1480
1481 if (!tx_has_space_for(lp, size))
1482 return -EAGAIN;
1483
1484 seq = lp->snd_nxt;
1485 copied = 0;
1486 tail = lp->tx_tail;
1487 while (copied < size) {
1488 struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1489 u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1490 p->u.u_data :
1491 p->u.r.r_data);
1492 int data_len;
1493
1494 p->type = LDC_DATA;
1495 p->stype = LDC_INFO;
1496 p->ctrl = 0;
1497
1498 data_len = size - copied;
1499 if (data_len > lp->mss)
1500 data_len = lp->mss;
1501
1502 BUG_ON(data_len > LDC_LEN);
1503
1504 p->env = (data_len |
1505 (copied == 0 ? LDC_START : 0) |
1506 (data_len == size - copied ? LDC_STOP : 0));
1507
1508 p->seqid = ++seq;
1509
1510 ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1511 p->type,
1512 p->stype,
1513 p->ctrl,
1514 p->env,
1515 p->seqid);
1516
1517 memcpy(data, buf, data_len);
1518 buf += data_len;
1519 copied += data_len;
1520
1521 tail = tx_advance(lp, tail);
1522 }
1523
1524 err = set_tx_tail(lp, tail);
1525 if (!err) {
1526 lp->snd_nxt = seq;
1527 err = size;
1528 }
1529
1530 return err;
1531}
1532
1533static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1534 struct ldc_packet *first_frag)
1535{
1536 int err;
1537
1538 if (first_frag)
1539 lp->rcv_nxt = first_frag->seqid - 1;
1540
1541 err = send_data_nack(lp, p);
1542 if (err)
1543 return err;
1544
1545 err = __set_rx_head(lp, lp->rx_tail);
1546 if (err < 0)
1547 return ldc_abort(lp);
1548
1549 return 0;
1550}
1551
1552static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1553{
1554 if (p->stype & LDC_ACK) {
1555 int err = process_data_ack(lp, p);
1556 if (err)
1557 return err;
1558 }
1559 if (p->stype & LDC_NACK)
1560 return ldc_abort(lp);
1561
1562 return 0;
1563}
1564
1565static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1566{
1567 unsigned long dummy;
1568 int limit = 1000;
1569
1570 ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1571 cur_head, lp->rx_head, lp->rx_tail);
1572 while (limit-- > 0) {
1573 unsigned long hv_err;
1574
1575 hv_err = sun4v_ldc_rx_get_state(lp->id,
1576 &dummy,
1577 &lp->rx_tail,
1578 &lp->chan_state);
1579 if (hv_err)
1580 return ldc_abort(lp);
1581
1582 ldcdbg(DATA, "REREAD head[%lx] tail[%lx] chan_state[%lx]\n",
1583 dummy, lp->rx_tail, lp->chan_state);
1584
1585 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1586 lp->chan_state == LDC_CHANNEL_RESETTING)
1587 return -ECONNRESET;
1588
1589 if (cur_head != lp->rx_tail) {
1590 ldcdbg(DATA, "DATA WAIT DONE\n");
1591 return 0;
1592 }
1593
1594 udelay(1);
1595 }
1596 return -EAGAIN;
1597}
1598
1599static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1600{
1601 int err = __set_rx_head(lp, head);
1602
1603 if (err < 0)
1604 return ldc_abort(lp);
1605
1606 lp->rx_head = head;
1607 return 0;
1608}
1609
1610static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1611{
1612 struct ldc_packet *first_frag;
1613 unsigned long hv_err, new;
1614 int err, copied;
1615
1616 hv_err = sun4v_ldc_rx_get_state(lp->id,
1617 &lp->rx_head,
1618 &lp->rx_tail,
1619 &lp->chan_state);
1620 if (hv_err)
1621 return ldc_abort(lp);
1622
1623 if (lp->chan_state == LDC_CHANNEL_DOWN ||
1624 lp->chan_state == LDC_CHANNEL_RESETTING)
1625 return -ECONNRESET;
1626
1627 if (lp->rx_head == lp->rx_tail)
1628 return 0;
1629
1630 first_frag = NULL;
1631 copied = err = 0;
1632 new = lp->rx_head;
1633 while (1) {
1634 struct ldc_packet *p;
1635 int pkt_len;
1636
1637 BUG_ON(new == lp->rx_tail);
1638 p = lp->rx_base + (new / LDC_PACKET_SIZE);
1639
1640 ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x] "
1641 "rcv_nxt[%08x]\n",
1642 p->type,
1643 p->stype,
1644 p->ctrl,
1645 p->env,
1646 p->seqid,
1647 lp->rcv_nxt);
1648
1649 if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1650 err = rx_bad_seq(lp, p, first_frag);
1651 copied = 0;
1652 break;
1653 }
1654
1655 if (p->type & LDC_CTRL) {
1656 err = process_control_frame(lp, p);
1657 if (err < 0)
1658 break;
1659 err = 0;
1660 }
1661
1662 lp->rcv_nxt = p->seqid;
1663
1664 if (!(p->type & LDC_DATA)) {
1665 new = rx_advance(lp, new);
1666 goto no_data;
1667 }
1668 if (p->stype & (LDC_ACK | LDC_NACK)) {
1669 err = data_ack_nack(lp, p);
1670 if (err)
1671 break;
1672 }
1673 if (!(p->stype & LDC_INFO)) {
1674 new = rx_advance(lp, new);
1675 goto no_data;
1676 }
1677
1678 pkt_len = p->env & LDC_LEN;
1679
1680 /* Every initial packet starts with the START bit set.
1681 *
1682 * Singleton packets will have both START+STOP set.
1683 *
1684 * Fragments will have START set in the first frame, STOP
1685 * set in the last frame, and neither bit set in middle
1686 * frames of the packet.
1687 *
1688 * Therefore if we are at the beginning of a packet and
1689 * we don't see START, or we are in the middle of a fragmented
1690 * packet and do see START, we are unsynchronized and should
1691 * flush the RX queue.
1692 */
1693 if ((first_frag == NULL && !(p->env & LDC_START)) ||
1694 (first_frag != NULL && (p->env & LDC_START))) {
1695 if (!first_frag)
1696 new = rx_advance(lp, new);
1697
1698 err = rx_set_head(lp, new);
1699 if (err)
1700 break;
1701
1702 if (!first_frag)
1703 goto no_data;
1704 }
1705 if (!first_frag)
1706 first_frag = p;
1707
1708 if (pkt_len > size - copied) {
1709 /* User didn't give us a big enough buffer,
1710 * what to do? This is a pretty serious error.
1711 *
1712 * Since we haven't updated the RX ring head to
1713 * consume any of the packets, signal the error
1714 * to the user and just leave the RX ring alone.
1715 *
1716 * This seems the best behavior because this allows
1717 * a user of the LDC layer to start with a small
1718 * RX buffer for ldc_read() calls and use -EMSGSIZE
1719 * as a cue to enlarge it's read buffer.
1720 */
1721 err = -EMSGSIZE;
1722 break;
1723 }
1724
1725 /* Ok, we are gonna eat this one. */
1726 new = rx_advance(lp, new);
1727
1728 memcpy(buf,
1729 (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1730 p->u.u_data : p->u.r.r_data), pkt_len);
1731 buf += pkt_len;
1732 copied += pkt_len;
1733
1734 if (p->env & LDC_STOP)
1735 break;
1736
1737no_data:
1738 if (new == lp->rx_tail) {
1739 err = rx_data_wait(lp, new);
1740 if (err)
1741 break;
1742 }
1743 }
1744
1745 if (!err)
1746 err = rx_set_head(lp, new);
1747
1748 if (err && first_frag)
1749 lp->rcv_nxt = first_frag->seqid - 1;
1750
1751 if (!err)
1752 err = copied;
1753
1754 return err;
1755}
1756
1757static const struct ldc_mode_ops nonraw_ops = {
1758 .write = write_nonraw,
1759 .read = read_nonraw,
1760};
1761
1762static int write_stream(struct ldc_channel *lp, const void *buf,
1763 unsigned int size)
1764{
1765 if (size > lp->cfg.mtu)
1766 size = lp->cfg.mtu;
1767 return write_nonraw(lp, buf, size);
1768}
1769
1770static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1771{
1772 if (!lp->mssbuf_len) {
1773 int err = read_nonraw(lp, lp->mssbuf,
1774 (size > lp->cfg.mtu ?
1775 lp->cfg.mtu : size));
1776 if (err < 0)
1777 return err;
1778
1779 lp->mssbuf_len = err;
1780 lp->mssbuf_off = 0;
1781 }
1782
1783 if (size > lp->mssbuf_len)
1784 size = lp->mssbuf_len;
1785 memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1786
1787 lp->mssbuf_off += size;
1788 lp->mssbuf_len -= size;
1789
1790 return size;
1791}
1792
1793static const struct ldc_mode_ops stream_ops = {
1794 .write = write_stream,
1795 .read = read_stream,
1796};
1797
1798int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1799{
1800 unsigned long flags;
1801 int err;
1802
1803 if (!buf)
1804 return -EINVAL;
1805
1806 if (!size)
1807 return 0;
1808
1809 spin_lock_irqsave(&lp->lock, flags);
1810
1811 if (lp->hs_state != LDC_HS_COMPLETE)
1812 err = -ENOTCONN;
1813 else
1814 err = lp->mops->write(lp, buf, size);
1815
1816 spin_unlock_irqrestore(&lp->lock, flags);
1817
1818 return err;
1819}
1820EXPORT_SYMBOL(ldc_write);
1821
1822int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1823{
1824 unsigned long flags;
1825 int err;
1826
1827 if (!buf)
1828 return -EINVAL;
1829
1830 if (!size)
1831 return 0;
1832
1833 spin_lock_irqsave(&lp->lock, flags);
1834
1835 if (lp->hs_state != LDC_HS_COMPLETE)
1836 err = -ENOTCONN;
1837 else
1838 err = lp->mops->read(lp, buf, size);
1839
1840 spin_unlock_irqrestore(&lp->lock, flags);
1841
1842 return err;
1843}
1844EXPORT_SYMBOL(ldc_read);
1845
1846static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1847{
1848 struct iommu_arena *arena = &iommu->arena;
1849 unsigned long n, i, start, end, limit;
1850 int pass;
1851
1852 limit = arena->limit;
1853 start = arena->hint;
1854 pass = 0;
1855
1856again:
1857 n = find_next_zero_bit(arena->map, limit, start);
1858 end = n + npages;
1859 if (unlikely(end >= limit)) {
1860 if (likely(pass < 1)) {
1861 limit = start;
1862 start = 0;
1863 pass++;
1864 goto again;
1865 } else {
1866 /* Scanned the whole thing, give up. */
1867 return -1;
1868 }
1869 }
1870
1871 for (i = n; i < end; i++) {
1872 if (test_bit(i, arena->map)) {
1873 start = i + 1;
1874 goto again;
1875 }
1876 }
1877
1878 for (i = n; i < end; i++)
1879 __set_bit(i, arena->map);
1880
1881 arena->hint = end;
1882
1883 return n;
1884}
1885
1886#define COOKIE_PGSZ_CODE 0xf000000000000000ULL
1887#define COOKIE_PGSZ_CODE_SHIFT 60ULL
1888
1889static u64 pagesize_code(void)
1890{
1891 switch (PAGE_SIZE) {
1892 default:
1893 case (8ULL * 1024ULL):
1894 return 0;
1895 case (64ULL * 1024ULL):
1896 return 1;
1897 case (512ULL * 1024ULL):
1898 return 2;
1899 case (4ULL * 1024ULL * 1024ULL):
1900 return 3;
1901 case (32ULL * 1024ULL * 1024ULL):
1902 return 4;
1903 case (256ULL * 1024ULL * 1024ULL):
1904 return 5;
1905 }
1906}
1907
1908static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1909{
1910 return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1911 (index << PAGE_SHIFT) |
1912 page_offset);
1913}
1914
1915static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1916{
1917 u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1918
1919 cookie &= ~COOKIE_PGSZ_CODE;
1920
1921 *shift = szcode * 3;
1922
1923 return (cookie >> (13ULL + (szcode * 3ULL)));
1924}
1925
1926static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1927 unsigned long npages)
1928{
1929 long entry;
1930
1931 entry = arena_alloc(iommu, npages);
1932 if (unlikely(entry < 0))
1933 return NULL;
1934
1935 return iommu->page_table + entry;
1936}
1937
1938static u64 perm_to_mte(unsigned int map_perm)
1939{
1940 u64 mte_base;
1941
1942 mte_base = pagesize_code();
1943
1944 if (map_perm & LDC_MAP_SHADOW) {
1945 if (map_perm & LDC_MAP_R)
1946 mte_base |= LDC_MTE_COPY_R;
1947 if (map_perm & LDC_MAP_W)
1948 mte_base |= LDC_MTE_COPY_W;
1949 }
1950 if (map_perm & LDC_MAP_DIRECT) {
1951 if (map_perm & LDC_MAP_R)
1952 mte_base |= LDC_MTE_READ;
1953 if (map_perm & LDC_MAP_W)
1954 mte_base |= LDC_MTE_WRITE;
1955 if (map_perm & LDC_MAP_X)
1956 mte_base |= LDC_MTE_EXEC;
1957 }
1958 if (map_perm & LDC_MAP_IO) {
1959 if (map_perm & LDC_MAP_R)
1960 mte_base |= LDC_MTE_IOMMU_R;
1961 if (map_perm & LDC_MAP_W)
1962 mte_base |= LDC_MTE_IOMMU_W;
1963 }
1964
1965 return mte_base;
1966}
1967
1968static int pages_in_region(unsigned long base, long len)
1969{
1970 int count = 0;
1971
1972 do {
1973 unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
1974
1975 len -= (new - base);
1976 base = new;
1977 count++;
1978 } while (len > 0);
1979
1980 return count;
1981}
1982
1983struct cookie_state {
1984 struct ldc_mtable_entry *page_table;
1985 struct ldc_trans_cookie *cookies;
1986 u64 mte_base;
1987 u64 prev_cookie;
1988 u32 pte_idx;
1989 u32 nc;
1990};
1991
1992static void fill_cookies(struct cookie_state *sp, unsigned long pa,
1993 unsigned long off, unsigned long len)
1994{
1995 do {
1996 unsigned long tlen, new = pa + PAGE_SIZE;
1997 u64 this_cookie;
1998
1999 sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2000
2001 tlen = PAGE_SIZE;
2002 if (off)
2003 tlen = PAGE_SIZE - off;
2004 if (tlen > len)
2005 tlen = len;
2006
2007 this_cookie = make_cookie(sp->pte_idx,
2008 pagesize_code(), off);
2009
2010 off = 0;
2011
2012 if (this_cookie == sp->prev_cookie) {
2013 sp->cookies[sp->nc - 1].cookie_size += tlen;
2014 } else {
2015 sp->cookies[sp->nc].cookie_addr = this_cookie;
2016 sp->cookies[sp->nc].cookie_size = tlen;
2017 sp->nc++;
2018 }
2019 sp->prev_cookie = this_cookie + tlen;
2020
2021 sp->pte_idx++;
2022
2023 len -= tlen;
2024 pa = new;
2025 } while (len > 0);
2026}
2027
2028static int sg_count_one(struct scatterlist *sg)
2029{
2030 unsigned long base = page_to_pfn(sg->page) << PAGE_SHIFT;
2031 long len = sg->length;
2032
2033 if ((sg->offset | len) & (8UL - 1))
2034 return -EFAULT;
2035
2036 return pages_in_region(base + sg->offset, len);
2037}
2038
2039static int sg_count_pages(struct scatterlist *sg, int num_sg)
2040{
2041 int count;
2042 int i;
2043
2044 count = 0;
2045 for (i = 0; i < num_sg; i++) {
2046 int err = sg_count_one(sg + i);
2047 if (err < 0)
2048 return err;
2049 count += err;
2050 }
2051
2052 return count;
2053}
2054
2055int ldc_map_sg(struct ldc_channel *lp,
2056 struct scatterlist *sg, int num_sg,
2057 struct ldc_trans_cookie *cookies, int ncookies,
2058 unsigned int map_perm)
2059{
2060 unsigned long i, npages, flags;
2061 struct ldc_mtable_entry *base;
2062 struct cookie_state state;
2063 struct ldc_iommu *iommu;
2064 int err;
2065
2066 if (map_perm & ~LDC_MAP_ALL)
2067 return -EINVAL;
2068
2069 err = sg_count_pages(sg, num_sg);
2070 if (err < 0)
2071 return err;
2072
2073 npages = err;
2074 if (err > ncookies)
2075 return -EMSGSIZE;
2076
2077 iommu = &lp->iommu;
2078
2079 spin_lock_irqsave(&iommu->lock, flags);
2080 base = alloc_npages(iommu, npages);
2081 spin_unlock_irqrestore(&iommu->lock, flags);
2082
2083 if (!base)
2084 return -ENOMEM;
2085
2086 state.page_table = iommu->page_table;
2087 state.cookies = cookies;
2088 state.mte_base = perm_to_mte(map_perm);
2089 state.prev_cookie = ~(u64)0;
2090 state.pte_idx = (base - iommu->page_table);
2091 state.nc = 0;
2092
2093 for (i = 0; i < num_sg; i++)
2094 fill_cookies(&state, page_to_pfn(sg[i].page) << PAGE_SHIFT,
2095 sg[i].offset, sg[i].length);
2096
2097 return state.nc;
2098}
2099EXPORT_SYMBOL(ldc_map_sg);
2100
2101int ldc_map_single(struct ldc_channel *lp,
2102 void *buf, unsigned int len,
2103 struct ldc_trans_cookie *cookies, int ncookies,
2104 unsigned int map_perm)
2105{
2106 unsigned long npages, pa, flags;
2107 struct ldc_mtable_entry *base;
2108 struct cookie_state state;
2109 struct ldc_iommu *iommu;
2110
2111 if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2112 return -EINVAL;
2113
2114 pa = __pa(buf);
2115 if ((pa | len) & (8UL - 1))
2116 return -EFAULT;
2117
2118 npages = pages_in_region(pa, len);
2119
2120 iommu = &lp->iommu;
2121
2122 spin_lock_irqsave(&iommu->lock, flags);
2123 base = alloc_npages(iommu, npages);
2124 spin_unlock_irqrestore(&iommu->lock, flags);
2125
2126 if (!base)
2127 return -ENOMEM;
2128
2129 state.page_table = iommu->page_table;
2130 state.cookies = cookies;
2131 state.mte_base = perm_to_mte(map_perm);
2132 state.prev_cookie = ~(u64)0;
2133 state.pte_idx = (base - iommu->page_table);
2134 state.nc = 0;
2135 fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2136 BUG_ON(state.nc != 1);
2137
2138 return state.nc;
2139}
2140EXPORT_SYMBOL(ldc_map_single);
2141
2142static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2143 u64 cookie, u64 size)
2144{
2145 struct iommu_arena *arena = &iommu->arena;
2146 unsigned long i, shift, index, npages;
2147 struct ldc_mtable_entry *base;
2148
2149 npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2150 index = cookie_to_index(cookie, &shift);
2151 base = iommu->page_table + index;
2152
2153 BUG_ON(index > arena->limit ||
2154 (index + npages) > arena->limit);
2155
2156 for (i = 0; i < npages; i++) {
2157 if (base->cookie)
2158 sun4v_ldc_revoke(id, cookie + (i << shift),
2159 base->cookie);
2160 base->mte = 0;
2161 __clear_bit(index + i, arena->map);
2162 }
2163}
2164
2165void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2166 int ncookies)
2167{
2168 struct ldc_iommu *iommu = &lp->iommu;
2169 unsigned long flags;
2170 int i;
2171
2172 spin_lock_irqsave(&iommu->lock, flags);
2173 for (i = 0; i < ncookies; i++) {
2174 u64 addr = cookies[i].cookie_addr;
2175 u64 size = cookies[i].cookie_size;
2176
2177 free_npages(lp->id, iommu, addr, size);
2178 }
2179 spin_unlock_irqrestore(&iommu->lock, flags);
2180}
2181EXPORT_SYMBOL(ldc_unmap);
2182
2183int ldc_copy(struct ldc_channel *lp, int copy_dir,
2184 void *buf, unsigned int len, unsigned long offset,
2185 struct ldc_trans_cookie *cookies, int ncookies)
2186{
2187 unsigned int orig_len;
2188 unsigned long ra;
2189 int i;
2190
2191 if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2192 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2193 lp->id, copy_dir);
2194 return -EINVAL;
2195 }
2196
2197 ra = __pa(buf);
2198 if ((ra | len | offset) & (8UL - 1)) {
2199 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2200 "ra[%lx] len[%x] offset[%lx]\n",
2201 lp->id, ra, len, offset);
2202 return -EFAULT;
2203 }
2204
2205 if (lp->hs_state != LDC_HS_COMPLETE ||
2206 (lp->flags & LDC_FLAG_RESET)) {
2207 printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2208 "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2209 return -ECONNRESET;
2210 }
2211
2212 orig_len = len;
2213 for (i = 0; i < ncookies; i++) {
2214 unsigned long cookie_raddr = cookies[i].cookie_addr;
2215 unsigned long this_len = cookies[i].cookie_size;
2216 unsigned long actual_len;
2217
2218 if (unlikely(offset)) {
2219 unsigned long this_off = offset;
2220
2221 if (this_off > this_len)
2222 this_off = this_len;
2223
2224 offset -= this_off;
2225 this_len -= this_off;
2226 if (!this_len)
2227 continue;
2228 cookie_raddr += this_off;
2229 }
2230
2231 if (this_len > len)
2232 this_len = len;
2233
2234 while (1) {
2235 unsigned long hv_err;
2236
2237 hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2238 cookie_raddr, ra,
2239 this_len, &actual_len);
2240 if (unlikely(hv_err)) {
2241 printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2242 "HV error %lu\n",
2243 lp->id, hv_err);
2244 if (lp->hs_state != LDC_HS_COMPLETE ||
2245 (lp->flags & LDC_FLAG_RESET))
2246 return -ECONNRESET;
2247 else
2248 return -EFAULT;
2249 }
2250
2251 cookie_raddr += actual_len;
2252 ra += actual_len;
2253 len -= actual_len;
2254 if (actual_len == this_len)
2255 break;
2256
2257 this_len -= actual_len;
2258 }
2259
2260 if (!len)
2261 break;
2262 }
2263
2264 /* It is caller policy what to do about short copies.
2265 * For example, a networking driver can declare the
2266 * packet a runt and drop it.
2267 */
2268
2269 return orig_len - len;
2270}
2271EXPORT_SYMBOL(ldc_copy);
2272
2273void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2274 struct ldc_trans_cookie *cookies, int *ncookies,
2275 unsigned int map_perm)
2276{
2277 void *buf;
2278 int err;
2279
2280 if (len & (8UL - 1))
2281 return ERR_PTR(-EINVAL);
2282
2283 buf = kzalloc(len, GFP_KERNEL);
2284 if (!buf)
2285 return ERR_PTR(-ENOMEM);
2286
2287 err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2288 if (err < 0) {
2289 kfree(buf);
2290 return ERR_PTR(err);
2291 }
2292 *ncookies = err;
2293
2294 return buf;
2295}
2296EXPORT_SYMBOL(ldc_alloc_exp_dring);
2297
2298void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2299 struct ldc_trans_cookie *cookies, int ncookies)
2300{
2301 ldc_unmap(lp, cookies, ncookies);
2302 kfree(buf);
2303}
2304EXPORT_SYMBOL(ldc_free_exp_dring);
2305
2306static int __init ldc_init(void)
2307{
2308 struct mdesc_node *mp;
2309 unsigned long major, minor;
2310 const u64 *v;
2311
2312 mp = md_find_node_by_name(NULL, "platform");
2313 if (!mp)
2314 return -ENODEV;
2315
2316 v = md_get_property(mp, "domaining-enabled", NULL);
2317 if (!v)
2318 return -ENODEV;
2319
2320 major = 1;
2321 minor = 0;
2322 if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2323 printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2324 return -ENODEV;
2325 }
2326
2327 printk(KERN_INFO "%s", version);
2328
2329 if (!*v) {
2330 printk(KERN_INFO PFX "Domaining disabled.\n");
2331 return -ENODEV;
2332 }
2333 ldom_domaining_enabled = 1;
2334
2335 return 0;
2336}
2337
2338core_initcall(ldc_init);
diff --git a/arch/sparc64/kernel/vio.c b/arch/sparc64/kernel/vio.c
new file mode 100644
index 000000000000..21c015e8365b
--- /dev/null
+++ b/arch/sparc64/kernel/vio.c
@@ -0,0 +1,347 @@
1/* vio.c: Virtual I/O channel devices probing infrastructure.
2 *
3 * Copyright (c) 2003-2005 IBM Corp.
4 * Dave Engebretsen engebret@us.ibm.com
5 * Santiago Leon santil@us.ibm.com
6 * Hollis Blanchard <hollisb@us.ibm.com>
7 * Stephen Rothwell
8 *
9 * Adapted to sparc64 by David S. Miller davem@davemloft.net
10 */
11
12#include <linux/kernel.h>
13#include <linux/irq.h>
14#include <linux/init.h>
15
16#include <asm/mdesc.h>
17#include <asm/vio.h>
18
19static inline int find_in_proplist(const char *list, const char *match,
20 int len)
21{
22 while (len > 0) {
23 int l;
24
25 if (!strcmp(list, match))
26 return 1;
27 l = strlen(list) + 1;
28 list += l;
29 len -= l;
30 }
31 return 0;
32}
33
34static const struct vio_device_id *vio_match_device(
35 const struct vio_device_id *matches,
36 const struct vio_dev *dev)
37{
38 const char *type, *compat;
39 int len;
40
41 type = dev->type;
42 compat = dev->compat;
43 len = dev->compat_len;
44
45 while (matches->type[0] || matches->compat[0]) {
46 int match = 1;
47 if (matches->type[0]) {
48 match &= type
49 && !strcmp(matches->type, type);
50 }
51 if (matches->compat[0]) {
52 match &= compat &&
53 find_in_proplist(compat, matches->compat, len);
54 }
55 if (match)
56 return matches;
57 matches++;
58 }
59 return NULL;
60}
61
62static int vio_bus_match(struct device *dev, struct device_driver *drv)
63{
64 struct vio_dev *vio_dev = to_vio_dev(dev);
65 struct vio_driver *vio_drv = to_vio_driver(drv);
66 const struct vio_device_id *matches = vio_drv->id_table;
67
68 if (!matches)
69 return 0;
70
71 return vio_match_device(matches, vio_dev) != NULL;
72}
73
74static int vio_device_probe(struct device *dev)
75{
76 struct vio_dev *vdev = to_vio_dev(dev);
77 struct vio_driver *drv = to_vio_driver(dev->driver);
78 const struct vio_device_id *id;
79 int error = -ENODEV;
80
81 if (drv->probe) {
82 id = vio_match_device(drv->id_table, vdev);
83 if (id)
84 error = drv->probe(vdev, id);
85 }
86
87 return error;
88}
89
90static int vio_device_remove(struct device *dev)
91{
92 struct vio_dev *vdev = to_vio_dev(dev);
93 struct vio_driver *drv = to_vio_driver(dev->driver);
94
95 if (drv->remove)
96 return drv->remove(vdev);
97
98 return 1;
99}
100
101static ssize_t devspec_show(struct device *dev,
102 struct device_attribute *attr, char *buf)
103{
104 struct vio_dev *vdev = to_vio_dev(dev);
105 const char *str = "none";
106
107 if (vdev->type) {
108 if (!strcmp(vdev->type, "network"))
109 str = "vnet";
110 else if (!strcmp(vdev->type, "block"))
111 str = "vdisk";
112 }
113
114 return sprintf(buf, "%s\n", str);
115}
116
117static struct device_attribute vio_dev_attrs[] = {
118 __ATTR_RO(devspec),
119 __ATTR_NULL
120};
121
122static struct bus_type vio_bus_type = {
123 .name = "vio",
124 .dev_attrs = vio_dev_attrs,
125 .match = vio_bus_match,
126 .probe = vio_device_probe,
127 .remove = vio_device_remove,
128};
129
130int vio_register_driver(struct vio_driver *viodrv)
131{
132 viodrv->driver.bus = &vio_bus_type;
133
134 return driver_register(&viodrv->driver);
135}
136EXPORT_SYMBOL(vio_register_driver);
137
138void vio_unregister_driver(struct vio_driver *viodrv)
139{
140 driver_unregister(&viodrv->driver);
141}
142EXPORT_SYMBOL(vio_unregister_driver);
143
144struct mdesc_node *vio_find_endpoint(struct vio_dev *vdev)
145{
146 struct mdesc_node *endp, *mp = vdev->mp;
147 int i;
148
149 endp = NULL;
150 for (i = 0; i < mp->num_arcs; i++) {
151 struct mdesc_node *t;
152
153 if (strcmp(mp->arcs[i].name, "fwd"))
154 continue;
155
156 t = mp->arcs[i].arc;
157 if (strcmp(t->name, "channel-endpoint"))
158 continue;
159
160 endp = t;
161 break;
162 }
163
164 return endp;
165}
166EXPORT_SYMBOL(vio_find_endpoint);
167
168static void __devinit vio_dev_release(struct device *dev)
169{
170 kfree(to_vio_dev(dev));
171}
172
173static ssize_t
174show_pciobppath_attr(struct device *dev, struct device_attribute *attr,
175 char *buf)
176{
177 struct vio_dev *vdev;
178 struct device_node *dp;
179
180 vdev = to_vio_dev(dev);
181 dp = vdev->dp;
182
183 return snprintf (buf, PAGE_SIZE, "%s\n", dp->full_name);
184}
185
186static DEVICE_ATTR(obppath, S_IRUSR | S_IRGRP | S_IROTH,
187 show_pciobppath_attr, NULL);
188
189struct device_node *cdev_node;
190
191static struct vio_dev *root_vdev;
192static u64 cdev_cfg_handle;
193
194static struct vio_dev *vio_create_one(struct mdesc_node *mp,
195 struct device *parent)
196{
197 const char *type, *compat;
198 struct device_node *dp;
199 struct vio_dev *vdev;
200 const u64 *irq;
201 int err, clen;
202
203 type = md_get_property(mp, "device-type", NULL);
204 if (!type)
205 type = md_get_property(mp, "name", NULL);
206 compat = md_get_property(mp, "device-type", &clen);
207
208 vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
209 if (!vdev) {
210 printk(KERN_ERR "VIO: Could not allocate vio_dev\n");
211 return NULL;
212 }
213
214 vdev->mp = mp;
215 vdev->type = type;
216 vdev->compat = compat;
217 vdev->compat_len = clen;
218
219 irq = md_get_property(mp, "tx-ino", NULL);
220 if (irq)
221 mp->irqs[0] = sun4v_build_virq(cdev_cfg_handle, *irq);
222
223 irq = md_get_property(mp, "rx-ino", NULL);
224 if (irq)
225 mp->irqs[1] = sun4v_build_virq(cdev_cfg_handle, *irq);
226
227 snprintf(vdev->dev.bus_id, BUS_ID_SIZE, "%lx", mp->node);
228 vdev->dev.parent = parent;
229 vdev->dev.bus = &vio_bus_type;
230 vdev->dev.release = vio_dev_release;
231
232 if (parent == NULL) {
233 dp = cdev_node;
234 } else if (to_vio_dev(parent) == root_vdev) {
235 dp = of_get_next_child(cdev_node, NULL);
236 while (dp) {
237 if (!strcmp(dp->type, type))
238 break;
239
240 dp = of_get_next_child(cdev_node, dp);
241 }
242 } else {
243 dp = to_vio_dev(parent)->dp;
244 }
245 vdev->dp = dp;
246
247 err = device_register(&vdev->dev);
248 if (err) {
249 printk(KERN_ERR "VIO: Could not register device %s, err=%d\n",
250 vdev->dev.bus_id, err);
251 kfree(vdev);
252 return NULL;
253 }
254 if (vdev->dp)
255 err = sysfs_create_file(&vdev->dev.kobj,
256 &dev_attr_obppath.attr);
257
258 return vdev;
259}
260
261static void walk_tree(struct mdesc_node *n, struct vio_dev *parent)
262{
263 int i;
264
265 for (i = 0; i < n->num_arcs; i++) {
266 struct mdesc_node *mp;
267 struct vio_dev *vdev;
268
269 if (strcmp(n->arcs[i].name, "fwd"))
270 continue;
271
272 mp = n->arcs[i].arc;
273
274 vdev = vio_create_one(mp, &parent->dev);
275 if (vdev && mp->num_arcs)
276 walk_tree(mp, vdev);
277 }
278}
279
280static void create_devices(struct mdesc_node *root)
281{
282 root_vdev = vio_create_one(root, NULL);
283 if (!root_vdev) {
284 printk(KERN_ERR "VIO: Coult not create root device.\n");
285 return;
286 }
287
288 walk_tree(root, root_vdev);
289}
290
291const char *channel_devices_node = "channel-devices";
292const char *channel_devices_compat = "SUNW,sun4v-channel-devices";
293const char *cfg_handle_prop = "cfg-handle";
294
295static int __init vio_init(void)
296{
297 struct mdesc_node *root;
298 const char *compat;
299 const u64 *cfg_handle;
300 int err, len;
301
302 root = md_find_node_by_name(NULL, channel_devices_node);
303 if (!root) {
304 printk(KERN_INFO "VIO: No channel-devices MDESC node.\n");
305 return 0;
306 }
307
308 cdev_node = of_find_node_by_name(NULL, "channel-devices");
309 if (!cdev_node) {
310 printk(KERN_INFO "VIO: No channel-devices OBP node.\n");
311 return -ENODEV;
312 }
313
314 compat = md_get_property(root, "compatible", &len);
315 if (!compat) {
316 printk(KERN_ERR "VIO: Channel devices lacks compatible "
317 "property\n");
318 return -ENODEV;
319 }
320 if (!find_in_proplist(compat, channel_devices_compat, len)) {
321 printk(KERN_ERR "VIO: Channel devices node lacks (%s) "
322 "compat entry.\n", channel_devices_compat);
323 return -ENODEV;
324 }
325
326 cfg_handle = md_get_property(root, cfg_handle_prop, NULL);
327 if (!cfg_handle) {
328 printk(KERN_ERR "VIO: Channel devices lacks %s property\n",
329 cfg_handle_prop);
330 return -ENODEV;
331 }
332
333 cdev_cfg_handle = *cfg_handle;
334
335 err = bus_register(&vio_bus_type);
336 if (err) {
337 printk(KERN_ERR "VIO: Could not register bus type err=%d\n",
338 err);
339 return err;
340 }
341
342 create_devices(root);
343
344 return 0;
345}
346
347postcore_initcall(vio_init);
diff --git a/arch/sparc64/kernel/viohs.c b/arch/sparc64/kernel/viohs.c
new file mode 100644
index 000000000000..3eb42e3624f3
--- /dev/null
+++ b/arch/sparc64/kernel/viohs.c
@@ -0,0 +1,809 @@
1/* viohs.c: LDOM Virtual I/O handshake helper layer.
2 *
3 * Copyright (C) 2007 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/string.h>
9#include <linux/delay.h>
10#include <linux/sched.h>
11#include <linux/slab.h>
12
13#include <asm/ldc.h>
14#include <asm/vio.h>
15
16int vio_ldc_send(struct vio_driver_state *vio, void *data, int len)
17{
18 int err, limit = 1000;
19
20 err = -EINVAL;
21 while (limit-- > 0) {
22 err = ldc_write(vio->lp, data, len);
23 if (!err || (err != -EAGAIN))
24 break;
25 udelay(1);
26 }
27
28 return err;
29}
30EXPORT_SYMBOL(vio_ldc_send);
31
32static int send_ctrl(struct vio_driver_state *vio,
33 struct vio_msg_tag *tag, int len)
34{
35 tag->sid = vio_send_sid(vio);
36 return vio_ldc_send(vio, tag, len);
37}
38
39static void init_tag(struct vio_msg_tag *tag, u8 type, u8 stype, u16 stype_env)
40{
41 tag->type = type;
42 tag->stype = stype;
43 tag->stype_env = stype_env;
44}
45
46static int send_version(struct vio_driver_state *vio, u16 major, u16 minor)
47{
48 struct vio_ver_info pkt;
49
50 vio->_local_sid = (u32) sched_clock();
51
52 memset(&pkt, 0, sizeof(pkt));
53 init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_VER_INFO);
54 pkt.major = major;
55 pkt.minor = minor;
56 pkt.dev_class = vio->dev_class;
57
58 viodbg(HS, "SEND VERSION INFO maj[%u] min[%u] devclass[%u]\n",
59 major, minor, vio->dev_class);
60
61 return send_ctrl(vio, &pkt.tag, sizeof(pkt));
62}
63
64static int start_handshake(struct vio_driver_state *vio)
65{
66 int err;
67
68 viodbg(HS, "START HANDSHAKE\n");
69
70 vio->hs_state = VIO_HS_INVALID;
71
72 err = send_version(vio,
73 vio->ver_table[0].major,
74 vio->ver_table[0].minor);
75 if (err < 0)
76 return err;
77
78 return 0;
79}
80
81void vio_link_state_change(struct vio_driver_state *vio, int event)
82{
83 if (event == LDC_EVENT_UP) {
84 vio->hs_state = VIO_HS_INVALID;
85
86 switch (vio->dev_class) {
87 case VDEV_NETWORK:
88 case VDEV_NETWORK_SWITCH:
89 vio->dr_state = (VIO_DR_STATE_TXREQ |
90 VIO_DR_STATE_RXREQ);
91 break;
92
93 case VDEV_DISK:
94 vio->dr_state = VIO_DR_STATE_TXREQ;
95 break;
96 case VDEV_DISK_SERVER:
97 vio->dr_state = VIO_DR_STATE_RXREQ;
98 break;
99 }
100 start_handshake(vio);
101 }
102}
103EXPORT_SYMBOL(vio_link_state_change);
104
105static int handshake_failure(struct vio_driver_state *vio)
106{
107 struct vio_dring_state *dr;
108
109 /* XXX Put policy here... Perhaps start a timer to fire
110 * XXX in 100 ms, which will bring the link up and retry
111 * XXX the handshake.
112 */
113
114 viodbg(HS, "HANDSHAKE FAILURE\n");
115
116 vio->dr_state &= ~(VIO_DR_STATE_TXREG |
117 VIO_DR_STATE_RXREG);
118
119 dr = &vio->drings[VIO_DRIVER_RX_RING];
120 memset(dr, 0, sizeof(*dr));
121
122 kfree(vio->desc_buf);
123 vio->desc_buf = NULL;
124 vio->desc_buf_len = 0;
125
126 vio->hs_state = VIO_HS_INVALID;
127
128 return -ECONNRESET;
129}
130
131static int process_unknown(struct vio_driver_state *vio, void *arg)
132{
133 struct vio_msg_tag *pkt = arg;
134
135 viodbg(HS, "UNKNOWN CONTROL [%02x:%02x:%04x:%08x]\n",
136 pkt->type, pkt->stype, pkt->stype_env, pkt->sid);
137
138 printk(KERN_ERR "vio: ID[%lu] Resetting connection.\n",
139 vio->channel_id);
140
141 ldc_disconnect(vio->lp);
142
143 return -ECONNRESET;
144}
145
146static int send_dreg(struct vio_driver_state *vio)
147{
148 struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_TX_RING];
149 union {
150 struct vio_dring_register pkt;
151 char all[sizeof(struct vio_dring_register) +
152 (sizeof(struct ldc_trans_cookie) *
153 dr->ncookies)];
154 } u;
155 int i;
156
157 memset(&u, 0, sizeof(u));
158 init_tag(&u.pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_DRING_REG);
159 u.pkt.dring_ident = 0;
160 u.pkt.num_descr = dr->num_entries;
161 u.pkt.descr_size = dr->entry_size;
162 u.pkt.options = VIO_TX_DRING;
163 u.pkt.num_cookies = dr->ncookies;
164
165 viodbg(HS, "SEND DRING_REG INFO ndesc[%u] dsz[%u] opt[0x%x] "
166 "ncookies[%u]\n",
167 u.pkt.num_descr, u.pkt.descr_size, u.pkt.options,
168 u.pkt.num_cookies);
169
170 for (i = 0; i < dr->ncookies; i++) {
171 u.pkt.cookies[i] = dr->cookies[i];
172
173 viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
174 i,
175 (unsigned long long) u.pkt.cookies[i].cookie_addr,
176 (unsigned long long) u.pkt.cookies[i].cookie_size);
177 }
178
179 return send_ctrl(vio, &u.pkt.tag, sizeof(u));
180}
181
182static int send_rdx(struct vio_driver_state *vio)
183{
184 struct vio_rdx pkt;
185
186 memset(&pkt, 0, sizeof(pkt));
187
188 init_tag(&pkt.tag, VIO_TYPE_CTRL, VIO_SUBTYPE_INFO, VIO_RDX);
189
190 viodbg(HS, "SEND RDX INFO\n");
191
192 return send_ctrl(vio, &pkt.tag, sizeof(pkt));
193}
194
195static int send_attr(struct vio_driver_state *vio)
196{
197 return vio->ops->send_attr(vio);
198}
199
200static struct vio_version *find_by_major(struct vio_driver_state *vio,
201 u16 major)
202{
203 struct vio_version *ret = NULL;
204 int i;
205
206 for (i = 0; i < vio->ver_table_entries; i++) {
207 struct vio_version *v = &vio->ver_table[i];
208 if (v->major <= major) {
209 ret = v;
210 break;
211 }
212 }
213 return ret;
214}
215
216static int process_ver_info(struct vio_driver_state *vio,
217 struct vio_ver_info *pkt)
218{
219 struct vio_version *vap;
220 int err;
221
222 viodbg(HS, "GOT VERSION INFO maj[%u] min[%u] devclass[%u]\n",
223 pkt->major, pkt->minor, pkt->dev_class);
224
225 if (vio->hs_state != VIO_HS_INVALID) {
226 /* XXX Perhaps invoke start_handshake? XXX */
227 memset(&vio->ver, 0, sizeof(vio->ver));
228 vio->hs_state = VIO_HS_INVALID;
229 }
230
231 vap = find_by_major(vio, pkt->major);
232
233 vio->_peer_sid = pkt->tag.sid;
234
235 if (!vap) {
236 pkt->tag.stype = VIO_SUBTYPE_NACK;
237 pkt->major = 0;
238 pkt->minor = 0;
239 viodbg(HS, "SEND VERSION NACK maj[0] min[0]\n");
240 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
241 } else if (vap->major != pkt->major) {
242 pkt->tag.stype = VIO_SUBTYPE_NACK;
243 pkt->major = vap->major;
244 pkt->minor = vap->minor;
245 viodbg(HS, "SEND VERSION NACK maj[%u] min[%u]\n",
246 pkt->major, pkt->minor);
247 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
248 } else {
249 struct vio_version ver = {
250 .major = pkt->major,
251 .minor = pkt->minor,
252 };
253 if (ver.minor > vap->minor)
254 ver.minor = vap->minor;
255 pkt->minor = ver.minor;
256 pkt->tag.stype = VIO_SUBTYPE_ACK;
257 viodbg(HS, "SEND VERSION ACK maj[%u] min[%u]\n",
258 pkt->major, pkt->minor);
259 err = send_ctrl(vio, &pkt->tag, sizeof(*pkt));
260 if (err > 0) {
261 vio->ver = ver;
262 vio->hs_state = VIO_HS_GOTVERS;
263 }
264 }
265 if (err < 0)
266 return handshake_failure(vio);
267
268 return 0;
269}
270
271static int process_ver_ack(struct vio_driver_state *vio,
272 struct vio_ver_info *pkt)
273{
274 viodbg(HS, "GOT VERSION ACK maj[%u] min[%u] devclass[%u]\n",
275 pkt->major, pkt->minor, pkt->dev_class);
276
277 if (vio->hs_state & VIO_HS_GOTVERS) {
278 if (vio->ver.major != pkt->major ||
279 vio->ver.minor != pkt->minor) {
280 pkt->tag.stype = VIO_SUBTYPE_NACK;
281 (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
282 return handshake_failure(vio);
283 }
284 } else {
285 vio->ver.major = pkt->major;
286 vio->ver.minor = pkt->minor;
287 vio->hs_state = VIO_HS_GOTVERS;
288 }
289
290 switch (vio->dev_class) {
291 case VDEV_NETWORK:
292 case VDEV_DISK:
293 if (send_attr(vio) < 0)
294 return handshake_failure(vio);
295 break;
296
297 default:
298 break;
299 }
300
301 return 0;
302}
303
304static int process_ver_nack(struct vio_driver_state *vio,
305 struct vio_ver_info *pkt)
306{
307 struct vio_version *nver;
308
309 viodbg(HS, "GOT VERSION NACK maj[%u] min[%u] devclass[%u]\n",
310 pkt->major, pkt->minor, pkt->dev_class);
311
312 if ((pkt->major == 0 && pkt->minor == 0) ||
313 !(nver = find_by_major(vio, pkt->major)))
314 return handshake_failure(vio);
315
316 if (send_version(vio, nver->major, nver->minor) < 0)
317 return handshake_failure(vio);
318
319 return 0;
320}
321
322static int process_ver(struct vio_driver_state *vio, struct vio_ver_info *pkt)
323{
324 switch (pkt->tag.stype) {
325 case VIO_SUBTYPE_INFO:
326 return process_ver_info(vio, pkt);
327
328 case VIO_SUBTYPE_ACK:
329 return process_ver_ack(vio, pkt);
330
331 case VIO_SUBTYPE_NACK:
332 return process_ver_nack(vio, pkt);
333
334 default:
335 return handshake_failure(vio);
336 };
337}
338
339static int process_attr(struct vio_driver_state *vio, void *pkt)
340{
341 int err;
342
343 if (!(vio->hs_state & VIO_HS_GOTVERS))
344 return handshake_failure(vio);
345
346 err = vio->ops->handle_attr(vio, pkt);
347 if (err < 0) {
348 return handshake_failure(vio);
349 } else {
350 vio->hs_state |= VIO_HS_GOT_ATTR;
351
352 if ((vio->dr_state & VIO_DR_STATE_TXREQ) &&
353 !(vio->hs_state & VIO_HS_SENT_DREG)) {
354 if (send_dreg(vio) < 0)
355 return handshake_failure(vio);
356
357 vio->hs_state |= VIO_HS_SENT_DREG;
358 }
359 }
360 return 0;
361}
362
363static int all_drings_registered(struct vio_driver_state *vio)
364{
365 int need_rx, need_tx;
366
367 need_rx = (vio->dr_state & VIO_DR_STATE_RXREQ);
368 need_tx = (vio->dr_state & VIO_DR_STATE_TXREQ);
369
370 if (need_rx &&
371 !(vio->dr_state & VIO_DR_STATE_RXREG))
372 return 0;
373
374 if (need_tx &&
375 !(vio->dr_state & VIO_DR_STATE_TXREG))
376 return 0;
377
378 return 1;
379}
380
381static int process_dreg_info(struct vio_driver_state *vio,
382 struct vio_dring_register *pkt)
383{
384 struct vio_dring_state *dr;
385 int i, len;
386
387 viodbg(HS, "GOT DRING_REG INFO ident[%llx] "
388 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
389 (unsigned long long) pkt->dring_ident,
390 pkt->num_descr, pkt->descr_size, pkt->options,
391 pkt->num_cookies);
392
393 if (!(vio->dr_state & VIO_DR_STATE_RXREQ))
394 goto send_nack;
395
396 if (vio->dr_state & VIO_DR_STATE_RXREG)
397 goto send_nack;
398
399 vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC);
400 if (!vio->desc_buf)
401 goto send_nack;
402
403 vio->desc_buf_len = pkt->descr_size;
404
405 dr = &vio->drings[VIO_DRIVER_RX_RING];
406
407 dr->num_entries = pkt->num_descr;
408 dr->entry_size = pkt->descr_size;
409 dr->ncookies = pkt->num_cookies;
410 for (i = 0; i < dr->ncookies; i++) {
411 dr->cookies[i] = pkt->cookies[i];
412
413 viodbg(HS, "DRING COOKIE(%d) [%016llx:%016llx]\n",
414 i,
415 (unsigned long long)
416 pkt->cookies[i].cookie_addr,
417 (unsigned long long)
418 pkt->cookies[i].cookie_size);
419 }
420
421 pkt->tag.stype = VIO_SUBTYPE_ACK;
422 pkt->dring_ident = ++dr->ident;
423
424 viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n",
425 (unsigned long long) pkt->dring_ident);
426
427 len = (sizeof(*pkt) +
428 (dr->ncookies * sizeof(struct ldc_trans_cookie)));
429 if (send_ctrl(vio, &pkt->tag, len) < 0)
430 goto send_nack;
431
432 vio->dr_state |= VIO_DR_STATE_RXREG;
433
434 return 0;
435
436send_nack:
437 pkt->tag.stype = VIO_SUBTYPE_NACK;
438 viodbg(HS, "SEND DRING_REG NACK\n");
439 (void) send_ctrl(vio, &pkt->tag, sizeof(*pkt));
440
441 return handshake_failure(vio);
442}
443
444static int process_dreg_ack(struct vio_driver_state *vio,
445 struct vio_dring_register *pkt)
446{
447 struct vio_dring_state *dr;
448
449 viodbg(HS, "GOT DRING_REG ACK ident[%llx] "
450 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
451 (unsigned long long) pkt->dring_ident,
452 pkt->num_descr, pkt->descr_size, pkt->options,
453 pkt->num_cookies);
454
455 dr = &vio->drings[VIO_DRIVER_TX_RING];
456
457 if (!(vio->dr_state & VIO_DR_STATE_TXREQ))
458 return handshake_failure(vio);
459
460 dr->ident = pkt->dring_ident;
461 vio->dr_state |= VIO_DR_STATE_TXREG;
462
463 if (all_drings_registered(vio)) {
464 if (send_rdx(vio) < 0)
465 return handshake_failure(vio);
466 vio->hs_state = VIO_HS_SENT_RDX;
467 }
468 return 0;
469}
470
471static int process_dreg_nack(struct vio_driver_state *vio,
472 struct vio_dring_register *pkt)
473{
474 viodbg(HS, "GOT DRING_REG NACK ident[%llx] "
475 "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
476 (unsigned long long) pkt->dring_ident,
477 pkt->num_descr, pkt->descr_size, pkt->options,
478 pkt->num_cookies);
479
480 return handshake_failure(vio);
481}
482
483static int process_dreg(struct vio_driver_state *vio,
484 struct vio_dring_register *pkt)
485{
486 if (!(vio->hs_state & VIO_HS_GOTVERS))
487 return handshake_failure(vio);
488
489 switch (pkt->tag.stype) {
490 case VIO_SUBTYPE_INFO:
491 return process_dreg_info(vio, pkt);
492
493 case VIO_SUBTYPE_ACK:
494 return process_dreg_ack(vio, pkt);
495
496 case VIO_SUBTYPE_NACK:
497 return process_dreg_nack(vio, pkt);
498
499 default:
500 return handshake_failure(vio);
501 }
502}
503
504static int process_dunreg(struct vio_driver_state *vio,
505 struct vio_dring_unregister *pkt)
506{
507 struct vio_dring_state *dr = &vio->drings[VIO_DRIVER_RX_RING];
508
509 viodbg(HS, "GOT DRING_UNREG\n");
510
511 if (pkt->dring_ident != dr->ident)
512 return 0;
513
514 vio->dr_state &= ~VIO_DR_STATE_RXREG;
515
516 memset(dr, 0, sizeof(*dr));
517
518 kfree(vio->desc_buf);
519 vio->desc_buf = NULL;
520 vio->desc_buf_len = 0;
521
522 return 0;
523}
524
525static int process_rdx_info(struct vio_driver_state *vio, struct vio_rdx *pkt)
526{
527 viodbg(HS, "GOT RDX INFO\n");
528
529 pkt->tag.stype = VIO_SUBTYPE_ACK;
530 viodbg(HS, "SEND RDX ACK\n");
531 if (send_ctrl(vio, &pkt->tag, sizeof(*pkt)) < 0)
532 return handshake_failure(vio);
533
534 vio->hs_state |= VIO_HS_SENT_RDX_ACK;
535 return 0;
536}
537
538static int process_rdx_ack(struct vio_driver_state *vio, struct vio_rdx *pkt)
539{
540 viodbg(HS, "GOT RDX ACK\n");
541
542 if (!(vio->hs_state & VIO_HS_SENT_RDX))
543 return handshake_failure(vio);
544
545 vio->hs_state |= VIO_HS_GOT_RDX_ACK;
546 return 0;
547}
548
549static int process_rdx_nack(struct vio_driver_state *vio, struct vio_rdx *pkt)
550{
551 viodbg(HS, "GOT RDX NACK\n");
552
553 return handshake_failure(vio);
554}
555
556static int process_rdx(struct vio_driver_state *vio, struct vio_rdx *pkt)
557{
558 if (!all_drings_registered(vio))
559 handshake_failure(vio);
560
561 switch (pkt->tag.stype) {
562 case VIO_SUBTYPE_INFO:
563 return process_rdx_info(vio, pkt);
564
565 case VIO_SUBTYPE_ACK:
566 return process_rdx_ack(vio, pkt);
567
568 case VIO_SUBTYPE_NACK:
569 return process_rdx_nack(vio, pkt);
570
571 default:
572 return handshake_failure(vio);
573 }
574}
575
576int vio_control_pkt_engine(struct vio_driver_state *vio, void *pkt)
577{
578 struct vio_msg_tag *tag = pkt;
579 u8 prev_state = vio->hs_state;
580 int err;
581
582 switch (tag->stype_env) {
583 case VIO_VER_INFO:
584 err = process_ver(vio, pkt);
585 break;
586
587 case VIO_ATTR_INFO:
588 err = process_attr(vio, pkt);
589 break;
590
591 case VIO_DRING_REG:
592 err = process_dreg(vio, pkt);
593 break;
594
595 case VIO_DRING_UNREG:
596 err = process_dunreg(vio, pkt);
597 break;
598
599 case VIO_RDX:
600 err = process_rdx(vio, pkt);
601 break;
602
603 default:
604 err = process_unknown(vio, pkt);
605 break;
606 }
607 if (!err &&
608 vio->hs_state != prev_state &&
609 (vio->hs_state & VIO_HS_COMPLETE))
610 vio->ops->handshake_complete(vio);
611
612 return err;
613}
614EXPORT_SYMBOL(vio_control_pkt_engine);
615
616void vio_conn_reset(struct vio_driver_state *vio)
617{
618}
619EXPORT_SYMBOL(vio_conn_reset);
620
621/* The issue is that the Solaris virtual disk server just mirrors the
622 * SID values it gets from the client peer. So we work around that
623 * here in vio_{validate,send}_sid() so that the drivers don't need
624 * to be aware of this crap.
625 */
626int vio_validate_sid(struct vio_driver_state *vio, struct vio_msg_tag *tp)
627{
628 u32 sid;
629
630 /* Always let VERSION+INFO packets through unchecked, they
631 * define the new SID.
632 */
633 if (tp->type == VIO_TYPE_CTRL &&
634 tp->stype == VIO_SUBTYPE_INFO &&
635 tp->stype_env == VIO_VER_INFO)
636 return 0;
637
638 /* Ok, now figure out which SID to use. */
639 switch (vio->dev_class) {
640 case VDEV_NETWORK:
641 case VDEV_NETWORK_SWITCH:
642 case VDEV_DISK_SERVER:
643 default:
644 sid = vio->_peer_sid;
645 break;
646
647 case VDEV_DISK:
648 sid = vio->_local_sid;
649 break;
650 }
651
652 if (sid == tp->sid)
653 return 0;
654 viodbg(DATA, "BAD SID tag->sid[%08x] peer_sid[%08x] local_sid[%08x]\n",
655 tp->sid, vio->_peer_sid, vio->_local_sid);
656 return -EINVAL;
657}
658EXPORT_SYMBOL(vio_validate_sid);
659
660u32 vio_send_sid(struct vio_driver_state *vio)
661{
662 switch (vio->dev_class) {
663 case VDEV_NETWORK:
664 case VDEV_NETWORK_SWITCH:
665 case VDEV_DISK:
666 default:
667 return vio->_local_sid;
668
669 case VDEV_DISK_SERVER:
670 return vio->_peer_sid;
671 }
672}
673EXPORT_SYMBOL(vio_send_sid);
674
675extern int vio_ldc_alloc(struct vio_driver_state *vio,
676 struct ldc_channel_config *base_cfg,
677 void *event_arg)
678{
679 struct ldc_channel_config cfg = *base_cfg;
680 struct ldc_channel *lp;
681 const u64 *id;
682
683 id = md_get_property(vio->endpoint, "id", NULL);
684 if (!id) {
685 printk(KERN_ERR "%s: Channel lacks id property.\n",
686 vio->name);
687 return -ENODEV;
688 }
689
690 vio->channel_id = *id;
691
692 cfg.rx_irq = vio->rx_irq;
693 cfg.tx_irq = vio->tx_irq;
694
695 lp = ldc_alloc(vio->channel_id, &cfg, event_arg);
696 if (IS_ERR(lp))
697 return PTR_ERR(lp);
698
699 vio->lp = lp;
700
701 return 0;
702}
703EXPORT_SYMBOL(vio_ldc_alloc);
704
705void vio_ldc_free(struct vio_driver_state *vio)
706{
707 ldc_free(vio->lp);
708 vio->lp = NULL;
709
710 kfree(vio->desc_buf);
711 vio->desc_buf = NULL;
712 vio->desc_buf_len = 0;
713}
714EXPORT_SYMBOL(vio_ldc_free);
715
716void vio_port_up(struct vio_driver_state *vio)
717{
718 unsigned long flags;
719 int err, state;
720
721 spin_lock_irqsave(&vio->lock, flags);
722
723 state = ldc_state(vio->lp);
724
725 err = 0;
726 if (state == LDC_STATE_INIT) {
727 err = ldc_bind(vio->lp);
728 if (err)
729 printk(KERN_WARNING "%s: Port %lu bind failed, "
730 "err=%d\n",
731 vio->name, vio->channel_id, err);
732 }
733
734 if (!err) {
735 err = ldc_connect(vio->lp);
736 if (err)
737 printk(KERN_WARNING "%s: Port %lu connect failed, "
738 "err=%d\n",
739 vio->name, vio->channel_id, err);
740 }
741 if (err) {
742 unsigned long expires = jiffies + HZ;
743
744 expires = round_jiffies(expires);
745 mod_timer(&vio->timer, expires);
746 }
747
748 spin_unlock_irqrestore(&vio->lock, flags);
749}
750EXPORT_SYMBOL(vio_port_up);
751
752static void vio_port_timer(unsigned long _arg)
753{
754 struct vio_driver_state *vio = (struct vio_driver_state *) _arg;
755
756 vio_port_up(vio);
757}
758
759int vio_driver_init(struct vio_driver_state *vio, struct vio_dev *vdev,
760 u8 dev_class, struct mdesc_node *channel_endpoint,
761 struct vio_version *ver_table, int ver_table_size,
762 struct vio_driver_ops *ops, char *name)
763{
764 switch (dev_class) {
765 case VDEV_NETWORK:
766 case VDEV_NETWORK_SWITCH:
767 case VDEV_DISK:
768 case VDEV_DISK_SERVER:
769 break;
770
771 default:
772 return -EINVAL;
773 }
774
775 if (!ops->send_attr ||
776 !ops->handle_attr ||
777 !ops->handshake_complete)
778 return -EINVAL;
779
780 if (!channel_endpoint)
781 return -EINVAL;
782
783 if (!ver_table || ver_table_size < 0)
784 return -EINVAL;
785
786 if (!name)
787 return -EINVAL;
788
789 spin_lock_init(&vio->lock);
790
791 vio->name = name;
792
793 vio->dev_class = dev_class;
794 vio->vdev = vdev;
795
796 vio->endpoint = channel_endpoint;
797 vio->tx_irq = channel_endpoint->irqs[0];
798 vio->rx_irq = channel_endpoint->irqs[1];
799
800 vio->ver_table = ver_table;
801 vio->ver_table_entries = ver_table_size;
802
803 vio->ops = ops;
804
805 setup_timer(&vio->timer, vio_port_timer, (unsigned long) vio);
806
807 return 0;
808}
809EXPORT_SYMBOL(vio_driver_init);