aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/genwqe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/genwqe')
-rw-r--r--drivers/misc/genwqe/card_ddcb.c1373
-rw-r--r--drivers/misc/genwqe/card_ddcb.h188
-rw-r--r--drivers/misc/genwqe/card_dev.c1486
3 files changed, 3047 insertions, 0 deletions
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
new file mode 100644
index 000000000000..cc6fca7a4851
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -0,0 +1,1373 @@
1/**
2 * IBM Accelerator Family 'GenWQE'
3 *
4 * (C) Copyright IBM Corp. 2013
5 *
6 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
7 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
8 * Author: Michael Jung <mijung@de.ibm.com>
9 * Author: Michael Ruettger <michael@ibmra.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License (version 2 only)
13 * as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
20
21/*
22 * Device Driver Control Block (DDCB) queue support. Definition of
23 * interrupt handlers for queue support as well as triggering the
24 * health monitor code in case of problems. The current hardware uses
25 * an MSI interrupt which is shared between error handling and
26 * functional code.
27 */
28
29#include <linux/types.h>
30#include <linux/module.h>
31#include <linux/sched.h>
32#include <linux/wait.h>
33#include <linux/pci.h>
34#include <linux/string.h>
35#include <linux/dma-mapping.h>
36#include <linux/delay.h>
37#include <linux/module.h>
38#include <linux/interrupt.h>
39#include <linux/crc-itu-t.h>
40
41#include "card_ddcb.h"
42
43/*
44 * N: next DDCB, this is where the next DDCB will be put.
45 * A: active DDCB, this is where the code will look for the next completion.
46 * x: DDCB is enqueued, we are waiting for its completion.
47
48 * Situation (1): Empty queue
49 * +---+---+---+---+---+---+---+---+
50 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
51 * | | | | | | | | |
52 * +---+---+---+---+---+---+---+---+
53 * A/N
54 * enqueued_ddcbs = A - N = 2 - 2 = 0
55 *
56 * Situation (2): Wrapped, N > A
57 * +---+---+---+---+---+---+---+---+
58 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
59 * | | | x | x | | | | |
60 * +---+---+---+---+---+---+---+---+
61 * A N
62 * enqueued_ddcbs = N - A = 4 - 2 = 2
63 *
64 * Situation (3): Queue wrapped, A > N
65 * +---+---+---+---+---+---+---+---+
66 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
67 * | x | x | | | x | x | x | x |
68 * +---+---+---+---+---+---+---+---+
69 * N A
70 * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 2) = 6
71 *
72 * Situation (4a): Queue full N > A
73 * +---+---+---+---+---+---+---+---+
74 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
75 * | x | x | x | x | x | x | x | |
76 * +---+---+---+---+---+---+---+---+
77 * A N
78 *
79 * enqueued_ddcbs = N - A = 7 - 0 = 7
80 *
81 * Situation (4a): Queue full A > N
82 * +---+---+---+---+---+---+---+---+
83 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
84 * | x | x | x | | x | x | x | x |
85 * +---+---+---+---+---+---+---+---+
86 * N A
87 * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7
88 */
89
90static int queue_empty(struct ddcb_queue *queue)
91{
92 return queue->ddcb_next == queue->ddcb_act;
93}
94
95static int queue_enqueued_ddcbs(struct ddcb_queue *queue)
96{
97 if (queue->ddcb_next >= queue->ddcb_act)
98 return queue->ddcb_next - queue->ddcb_act;
99
100 return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next);
101}
102
103static int queue_free_ddcbs(struct ddcb_queue *queue)
104{
105 int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1;
106
107 if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */
108 return 0;
109 }
110 return free_ddcbs;
111}
112
113/*
114 * Use of the PRIV field in the DDCB for queue debugging:
115 *
116 * (1) Trying to get rid of a DDCB which saw a timeout:
117 * pddcb->priv[6] = 0xcc; # cleared
118 *
119 * (2) Append a DDCB via NEXT bit:
120 * pddcb->priv[7] = 0xaa; # appended
121 *
122 * (3) DDCB needed tapping:
123 * pddcb->priv[7] = 0xbb; # tapped
124 *
125 * (4) DDCB marked as correctly finished:
126 * pddcb->priv[6] = 0xff; # finished
127 */
128
129static inline void ddcb_mark_tapped(struct ddcb *pddcb)
130{
131 pddcb->priv[7] = 0xbb; /* tapped */
132}
133
134static inline void ddcb_mark_appended(struct ddcb *pddcb)
135{
136 pddcb->priv[7] = 0xaa; /* appended */
137}
138
139static inline void ddcb_mark_cleared(struct ddcb *pddcb)
140{
141 pddcb->priv[6] = 0xcc; /* cleared */
142}
143
144static inline void ddcb_mark_finished(struct ddcb *pddcb)
145{
146 pddcb->priv[6] = 0xff; /* finished */
147}
148
149static inline void ddcb_mark_unused(struct ddcb *pddcb)
150{
151 pddcb->priv_64 = cpu_to_be64(0); /* not tapped */
152}
153
154/**
155 * genwqe_crc16() - Generate 16-bit crc as required for DDCBs
156 * @buff: pointer to data buffer
157 * @len: length of data for calculation
158 * @init: initial crc (0xffff at start)
159 *
160 * Polynomial = x^16 + x^12 + x^5 + 1 (0x1021)
161 * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff
162 * should result in a crc16 of 0x89c3
163 *
164 * Return: crc16 checksum in big endian format !
165 */
166static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init)
167{
168 return crc_itu_t(init, buff, len);
169}
170
171static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue)
172{
173 int i;
174 struct ddcb *pddcb;
175 unsigned long flags;
176 struct pci_dev *pci_dev = cd->pci_dev;
177
178 spin_lock_irqsave(&cd->print_lock, flags);
179
180 dev_info(&pci_dev->dev,
181 "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n",
182 cd->card_idx, queue->ddcb_act, queue->ddcb_next);
183
184 pddcb = queue->ddcb_vaddr;
185 for (i = 0; i < queue->ddcb_max; i++) {
186 dev_err(&pci_dev->dev,
187 " %c %-3d: RETC=%03x SEQ=%04x "
188 "HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n",
189 i == queue->ddcb_act ? '>' : ' ',
190 i,
191 be16_to_cpu(pddcb->retc_16),
192 be16_to_cpu(pddcb->seqnum_16),
193 pddcb->hsi,
194 pddcb->shi,
195 be64_to_cpu(pddcb->priv_64),
196 pddcb->cmd);
197 pddcb++;
198 }
199 spin_unlock_irqrestore(&cd->print_lock, flags);
200}
201
202struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
203{
204 struct ddcb_requ *req;
205
206 req = kzalloc(sizeof(*req), GFP_ATOMIC);
207 if (!req)
208 return NULL;
209
210 return &req->cmd;
211}
212
213void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd)
214{
215 struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
216 kfree(req);
217}
218
219static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req)
220{
221 return req->req_state;
222}
223
224static inline void ddcb_requ_set_state(struct ddcb_requ *req,
225 enum genwqe_requ_state new_state)
226{
227 req->req_state = new_state;
228}
229
230static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req)
231{
232 return req->cmd.ddata_addr != 0x0;
233}
234
235/**
236 * ddcb_requ_finished() - Returns the hardware state of the associated DDCB
237 * @cd: pointer to genwqe device descriptor
238 * @req: DDCB work request
239 *
240 * Status of ddcb_requ mirrors this hardware state, but is copied in
241 * the ddcb_requ on interrupt/polling function. The lowlevel code
242 * should check the hardware state directly, the higher level code
243 * should check the copy.
244 *
245 * This function will also return true if the state of the queue is
246 * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the
247 * shutdown case.
248 */
249static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req)
250{
251 return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) ||
252 (cd->card_state != GENWQE_CARD_USED);
253}
254
255/**
256 * enqueue_ddcb() - Enqueue a DDCB
257 * @cd: pointer to genwqe device descriptor
258 * @queue: queue this operation should be done on
259 * @ddcb_no: pointer to ddcb number being tapped
260 *
261 * Start execution of DDCB by tapping or append to queue via NEXT
262 * bit. This is done by an atomic 'compare and swap' instruction and
263 * checking SHI and HSI of the previous DDCB.
264 *
265 * This function must only be called with ddcb_lock held.
266 *
267 * Return: 1 if new DDCB is appended to previous
268 * 2 if DDCB queue is tapped via register/simulation
269 */
270#define RET_DDCB_APPENDED 1
271#define RET_DDCB_TAPPED 2
272
273static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue,
274 struct ddcb *pddcb, int ddcb_no)
275{
276 unsigned int try;
277 int prev_no;
278 struct ddcb *prev_ddcb;
279 u32 old, new, icrc_hsi_shi;
280 u64 num;
281
282 /*
283 * For performance checks a Dispatch Timestamp can be put into
284 * DDCB It is supposed to use the SLU's free running counter,
285 * but this requires PCIe cycles.
286 */
287 ddcb_mark_unused(pddcb);
288
289 /* check previous DDCB if already fetched */
290 prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1;
291 prev_ddcb = &queue->ddcb_vaddr[prev_no];
292
293 /*
294 * It might have happened that the HSI.FETCHED bit is
295 * set. Retry in this case. Therefore I expect maximum 2 times
296 * trying.
297 */
298 ddcb_mark_appended(pddcb);
299 for (try = 0; try < 2; try++) {
300 old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
301
302 /* try to append via NEXT bit if prev DDCB is not completed */
303 if ((old & DDCB_COMPLETED_BE32) != 0x00000000)
304 break;
305
306 new = (old | DDCB_NEXT_BE32);
307 icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new);
308
309 if (icrc_hsi_shi == old)
310 return RET_DDCB_APPENDED; /* appended to queue */
311 }
312
313 /* Queue must be re-started by updating QUEUE_OFFSET */
314 ddcb_mark_tapped(pddcb);
315 num = (u64)ddcb_no << 8;
316 __genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */
317
318 return RET_DDCB_TAPPED;
319}
320
321/**
322 * copy_ddcb_results() - Copy output state from real DDCB to request
323 *
324 * Copy DDCB ASV to request struct. There is no endian
325 * conversion made, since data structure in ASV is still
326 * unknown here.
327 *
328 * This is needed by:
329 * - genwqe_purge_ddcb()
330 * - genwqe_check_ddcb_queue()
331 */
332static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no)
333{
334 struct ddcb_queue *queue = req->queue;
335 struct ddcb *pddcb = &queue->ddcb_vaddr[req->num];
336
337 memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH);
338
339 /* copy status flags of the variant part */
340 req->cmd.vcrc = be16_to_cpu(pddcb->vcrc_16);
341 req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64);
342 req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64);
343
344 req->cmd.attn = be16_to_cpu(pddcb->attn_16);
345 req->cmd.progress = be32_to_cpu(pddcb->progress_32);
346 req->cmd.retc = be16_to_cpu(pddcb->retc_16);
347
348 if (ddcb_requ_collect_debug_data(req)) {
349 int prev_no = (ddcb_no == 0) ?
350 queue->ddcb_max - 1 : ddcb_no - 1;
351 struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no];
352
353 memcpy(&req->debug_data.ddcb_finished, pddcb,
354 sizeof(req->debug_data.ddcb_finished));
355 memcpy(&req->debug_data.ddcb_prev, prev_pddcb,
356 sizeof(req->debug_data.ddcb_prev));
357 }
358}
359
360/**
361 * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests.
362 * @cd: pointer to genwqe device descriptor
363 *
364 * Return: Number of DDCBs which were finished
365 */
366static int genwqe_check_ddcb_queue(struct genwqe_dev *cd,
367 struct ddcb_queue *queue)
368{
369 unsigned long flags;
370 int ddcbs_finished = 0;
371 struct pci_dev *pci_dev = cd->pci_dev;
372
373 spin_lock_irqsave(&queue->ddcb_lock, flags);
374
375 /* FIXME avoid soft locking CPU */
376 while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) {
377
378 struct ddcb *pddcb;
379 struct ddcb_requ *req;
380 u16 vcrc, vcrc_16, retc_16;
381
382 pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
383
384 if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) ==
385 0x00000000)
386 goto go_home; /* not completed, continue waiting */
387
388 /* Note: DDCB could be purged */
389
390 req = queue->ddcb_req[queue->ddcb_act];
391 if (req == NULL) {
392 /* this occurs if DDCB is purged, not an error */
393 /* Move active DDCB further; Nothing to do anymore. */
394 goto pick_next_one;
395 }
396
397 /*
398 * HSI=0x44 (fetched and completed), but RETC is
399 * 0x101, or even worse 0x000.
400 *
401 * In case of seeing the queue in inconsistent state
402 * we read the errcnts and the queue status to provide
403 * a trigger for our PCIe analyzer stop capturing.
404 */
405 retc_16 = be16_to_cpu(pddcb->retc_16);
406 if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) {
407 u64 errcnts, status;
408 u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr;
409
410 errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS);
411 status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
412
413 dev_err(&pci_dev->dev,
414 "[%s] SEQN=%04x HSI=%02x RETC=%03x "
415 " Q_ERRCNTS=%016llx Q_STATUS=%016llx\n"
416 " DDCB_DMA_ADDR=%016llx\n",
417 __func__, be16_to_cpu(pddcb->seqnum_16),
418 pddcb->hsi, retc_16, errcnts, status,
419 queue->ddcb_daddr + ddcb_offs);
420 }
421
422 copy_ddcb_results(req, queue->ddcb_act);
423 queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */
424
425 dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num);
426 genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
427
428 ddcb_mark_finished(pddcb);
429
430 /* calculate CRC_16 to see if VCRC is correct */
431 vcrc = genwqe_crc16(pddcb->asv,
432 VCRC_LENGTH(req->cmd.asv_length),
433 0xffff);
434 vcrc_16 = be16_to_cpu(pddcb->vcrc_16);
435 if (vcrc != vcrc_16) {
436 printk_ratelimited(KERN_ERR
437 "%s %s: err: wrong VCRC pre=%02x vcrc_len=%d "
438 "bytes vcrc_data=%04x is not vcrc_card=%04x\n",
439 GENWQE_DEVNAME, dev_name(&pci_dev->dev),
440 pddcb->pre, VCRC_LENGTH(req->cmd.asv_length),
441 vcrc, vcrc_16);
442 }
443
444 ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
445 queue->ddcbs_completed++;
446 queue->ddcbs_in_flight--;
447
448 /* wake up process waiting for this DDCB */
449 wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
450
451pick_next_one:
452 queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max;
453 ddcbs_finished++;
454 }
455
456 go_home:
457 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
458 return ddcbs_finished;
459}
460
461/**
462 * __genwqe_wait_ddcb(): Waits until DDCB is completed
463 * @cd: pointer to genwqe device descriptor
464 * @req: pointer to requsted DDCB parameters
465 *
466 * The Service Layer will update the RETC in DDCB when processing is
467 * pending or done.
468 *
469 * Return: > 0 remaining jiffies, DDCB completed
470 * -ETIMEDOUT when timeout
471 * -ERESTARTSYS when ^C
472 * -EINVAL when unknown error condition
473 *
474 * When an error is returned the called needs to ensure that
475 * purge_ddcb() is being called to get the &req removed from the
476 * queue.
477 */
478int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
479{
480 int rc;
481 unsigned int ddcb_no;
482 struct ddcb_queue *queue;
483 struct pci_dev *pci_dev = cd->pci_dev;
484
485 if (req == NULL)
486 return -EINVAL;
487
488 queue = req->queue;
489 if (queue == NULL)
490 return -EINVAL;
491
492 ddcb_no = req->num;
493 if (ddcb_no >= queue->ddcb_max)
494 return -EINVAL;
495
496 rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no],
497 ddcb_requ_finished(cd, req),
498 genwqe_ddcb_software_timeout * HZ);
499
500 /*
501 * We need to distinguish 3 cases here:
502 * 1. rc == 0 timeout occured
503 * 2. rc == -ERESTARTSYS signal received
504 * 3. rc > 0 remaining jiffies condition is true
505 */
506 if (rc == 0) {
507 struct ddcb_queue *queue = req->queue;
508 struct ddcb *pddcb;
509
510 /*
511 * Timeout may be caused by long task switching time.
512 * When timeout happens, check if the request has
513 * meanwhile completed.
514 */
515 genwqe_check_ddcb_queue(cd, req->queue);
516 if (ddcb_requ_finished(cd, req))
517 return rc;
518
519 dev_err(&pci_dev->dev,
520 "[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n",
521 __func__, req->num, rc, ddcb_requ_get_state(req),
522 req);
523 dev_err(&pci_dev->dev,
524 "[%s] IO_QUEUE_STATUS=0x%016llx\n", __func__,
525 __genwqe_readq(cd, queue->IO_QUEUE_STATUS));
526
527 pddcb = &queue->ddcb_vaddr[req->num];
528 genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
529
530 print_ddcb_info(cd, req->queue);
531 return -ETIMEDOUT;
532
533 } else if (rc == -ERESTARTSYS) {
534 return rc;
535 /*
536 * EINTR: Stops the application
537 * ERESTARTSYS: Restartable systemcall; called again
538 */
539
540 } else if (rc < 0) {
541 dev_err(&pci_dev->dev,
542 "[%s] err: DDCB#%d unknown result (rc=%d) %d!\n",
543 __func__, req->num, rc, ddcb_requ_get_state(req));
544 return -EINVAL;
545 }
546
547 /* Severe error occured. Driver is forced to stop operation */
548 if (cd->card_state != GENWQE_CARD_USED) {
549 dev_err(&pci_dev->dev,
550 "[%s] err: DDCB#%d forced to stop (rc=%d)\n",
551 __func__, req->num, rc);
552 return -EIO;
553 }
554 return rc;
555}
556
557/**
558 * get_next_ddcb() - Get next available DDCB
559 * @cd: pointer to genwqe device descriptor
560 *
561 * DDCB's content is completely cleared but presets for PRE and
562 * SEQNUM. This function must only be called when ddcb_lock is held.
563 *
564 * Return: NULL if no empty DDCB available otherwise ptr to next DDCB.
565 */
566static struct ddcb *get_next_ddcb(struct genwqe_dev *cd,
567 struct ddcb_queue *queue,
568 int *num)
569{
570 u64 *pu64;
571 struct ddcb *pddcb;
572
573 if (queue_free_ddcbs(queue) == 0) /* queue is full */
574 return NULL;
575
576 /* find new ddcb */
577 pddcb = &queue->ddcb_vaddr[queue->ddcb_next];
578
579 /* if it is not completed, we are not allowed to use it */
580 /* barrier(); */
581 if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000)
582 return NULL;
583
584 *num = queue->ddcb_next; /* internal DDCB number */
585 queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max;
586
587 /* clear important DDCB fields */
588 pu64 = (u64 *)pddcb;
589 pu64[0] = 0ULL; /* offs 0x00 (ICRC,HSI,SHI,...) */
590 pu64[1] = 0ULL; /* offs 0x01 (ACFUNC,CMD...) */
591
592 /* destroy previous results in ASV */
593 pu64[0x80/8] = 0ULL; /* offs 0x80 (ASV + 0) */
594 pu64[0x88/8] = 0ULL; /* offs 0x88 (ASV + 0x08) */
595 pu64[0x90/8] = 0ULL; /* offs 0x90 (ASV + 0x10) */
596 pu64[0x98/8] = 0ULL; /* offs 0x98 (ASV + 0x18) */
597 pu64[0xd0/8] = 0ULL; /* offs 0xd0 (RETC,ATTN...) */
598
599 pddcb->pre = DDCB_PRESET_PRE; /* 128 */
600 pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++);
601 return pddcb;
602}
603
604/**
605 * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue
606 * @cd: genwqe device descriptor
607 * @req: DDCB request
608 *
609 * This will fail when the request was already FETCHED. In this case
610 * we need to wait until it is finished. Else the DDCB can be
611 * reused. This function also ensures that the request data structure
612 * is removed from ddcb_req[].
613 *
614 * Do not forget to call this function when genwqe_wait_ddcb() fails,
615 * such that the request gets really removed from ddcb_req[].
616 *
617 * Return: 0 success
618 */
619int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
620{
621 struct ddcb *pddcb = NULL;
622 unsigned int t;
623 unsigned long flags;
624 struct ddcb_queue *queue = req->queue;
625 struct pci_dev *pci_dev = cd->pci_dev;
626 u32 icrc_hsi_shi = 0x0000;
627 u64 queue_status;
628 u32 old, new;
629
630 /* unsigned long flags; */
631 if (genwqe_ddcb_software_timeout <= 0) {
632 dev_err(&pci_dev->dev,
633 "[%s] err: software timeout is not set!\n", __func__);
634 return -EFAULT;
635 }
636
637 pddcb = &queue->ddcb_vaddr[req->num];
638
639 for (t = 0; t < genwqe_ddcb_software_timeout * 10; t++) {
640
641 spin_lock_irqsave(&queue->ddcb_lock, flags);
642
643 /* Check if req was meanwhile finished */
644 if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED)
645 goto go_home;
646
647 /* try to set PURGE bit if FETCHED/COMPLETED are not set */
648 old = pddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
649 if ((old & DDCB_FETCHED_BE32) == 0x00000000) {
650
651 new = (old | DDCB_PURGE_BE32);
652 icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32,
653 old, new);
654 if (icrc_hsi_shi == old)
655 goto finish_ddcb;
656 }
657
658 /* normal finish with HSI bit */
659 barrier();
660 icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
661 if (icrc_hsi_shi & DDCB_COMPLETED_BE32)
662 goto finish_ddcb;
663
664 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
665
666 /*
667 * Here the check_ddcb() function will most likely
668 * discover this DDCB to be finished some point in
669 * time. It will mark the req finished and free it up
670 * in the list.
671 */
672
673 copy_ddcb_results(req, req->num); /* for the failing case */
674 msleep(100); /* sleep for 1/10 second and try again */
675 continue;
676
677finish_ddcb:
678 copy_ddcb_results(req, req->num);
679 ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
680 queue->ddcbs_in_flight--;
681 queue->ddcb_req[req->num] = NULL; /* delete from array */
682 ddcb_mark_cleared(pddcb);
683
684 /* Move active DDCB further; Nothing to do here anymore. */
685
686 /*
687 * We need to ensure that there is at least one free
688 * DDCB in the queue. To do that, we must update
689 * ddcb_act only if the COMPLETED bit is set for the
690 * DDCB we are working on else we treat that DDCB even
691 * if we PURGED it as occupied (hardware is supposed
692 * to set the COMPLETED bit yet!).
693 */
694 icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
695 if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) &&
696 (queue->ddcb_act == req->num)) {
697 queue->ddcb_act = ((queue->ddcb_act + 1) %
698 queue->ddcb_max);
699 }
700go_home:
701 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
702 return 0;
703 }
704
705 /*
706 * If the card is dead and the queue is forced to stop, we
707 * might see this in the queue status register.
708 */
709 queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
710
711 dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num);
712 genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
713
714 dev_err(&pci_dev->dev,
715 "[%s] err: DDCB#%d not purged and not completed "
716 "after %d seconds QSTAT=%016llx!!\n",
717 __func__, req->num, genwqe_ddcb_software_timeout,
718 queue_status);
719
720 print_ddcb_info(cd, req->queue);
721
722 return -EFAULT;
723}
724
725int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d)
726{
727 int len;
728 struct pci_dev *pci_dev = cd->pci_dev;
729
730 if (d == NULL) {
731 dev_err(&pci_dev->dev,
732 "[%s] err: invalid memory for debug data!\n",
733 __func__);
734 return -EFAULT;
735 }
736
737 len = sizeof(d->driver_version);
738 snprintf(d->driver_version, len, "%s", DRV_VERS_STRING);
739 d->slu_unitcfg = cd->slu_unitcfg;
740 d->app_unitcfg = cd->app_unitcfg;
741 return 0;
742}
743
744/**
745 * __genwqe_enqueue_ddcb() - Enqueue a DDCB
746 * @cd: pointer to genwqe device descriptor
747 * @req: pointer to DDCB execution request
748 *
749 * Return: 0 if enqueuing succeeded
750 * -EIO if card is unusable/PCIe problems
751 * -EBUSY if enqueuing failed
752 */
753int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
754{
755 struct ddcb *pddcb;
756 unsigned long flags;
757 struct ddcb_queue *queue;
758 struct pci_dev *pci_dev = cd->pci_dev;
759 u16 icrc;
760
761 if (cd->card_state != GENWQE_CARD_USED) {
762 printk_ratelimited(KERN_ERR
763 "%s %s: [%s] Card is unusable/PCIe problem Req#%d\n",
764 GENWQE_DEVNAME, dev_name(&pci_dev->dev),
765 __func__, req->num);
766 return -EIO;
767 }
768
769 queue = req->queue = &cd->queue;
770
771 /* FIXME circumvention to improve performance when no irq is
772 * there.
773 */
774 if (genwqe_polling_enabled)
775 genwqe_check_ddcb_queue(cd, queue);
776
777 /*
778 * It must be ensured to process all DDCBs in successive
779 * order. Use a lock here in order to prevent nested DDCB
780 * enqueuing.
781 */
782 spin_lock_irqsave(&queue->ddcb_lock, flags);
783
784 pddcb = get_next_ddcb(cd, queue, &req->num); /* get ptr and num */
785 if (pddcb == NULL) {
786 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
787 queue->busy++;
788 return -EBUSY;
789 }
790
791 if (queue->ddcb_req[req->num] != NULL) {
792 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
793
794 dev_err(&pci_dev->dev,
795 "[%s] picked DDCB %d with req=%p still in use!!\n",
796 __func__, req->num, req);
797 return -EFAULT;
798 }
799 ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED);
800 queue->ddcb_req[req->num] = req;
801
802 pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts);
803 pddcb->cmd = req->cmd.cmd;
804 pddcb->acfunc = req->cmd.acfunc; /* functional unit */
805
806 /*
807 * We know that we can get retc 0x104 with CRC error, do not
808 * stop the queue in those cases for this command. XDIR = 1
809 * does not work for old SLU versions.
810 *
811 * Last bitstream with the old XDIR behavior had SLU_ID
812 * 0x34199.
813 */
814 if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull)
815 pddcb->xdir = 0x1;
816 else
817 pddcb->xdir = 0x0;
818
819
820 pddcb->psp = (((req->cmd.asiv_length / 8) << 4) |
821 ((req->cmd.asv_length / 8)));
822 pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts);
823
824 /*
825 * If copying the whole DDCB_ASIV_LENGTH is impacting
826 * performance we need to change it to
827 * req->cmd.asiv_length. But simulation benefits from some
828 * non-architectured bits behind the architectured content.
829 *
830 * How much data is copied depends on the availability of the
831 * ATS field, which was introduced late. If the ATS field is
832 * supported ASIV is 8 bytes shorter than it used to be. Since
833 * the ATS field is copied too, the code should do exactly
834 * what it did before, but I wanted to make copying of the ATS
835 * field very explicit.
836 */
837 if (genwqe_get_slu_id(cd) <= 0x2) {
838 memcpy(&pddcb->__asiv[0], /* destination */
839 &req->cmd.__asiv[0], /* source */
840 DDCB_ASIV_LENGTH); /* req->cmd.asiv_length */
841 } else {
842 pddcb->n.ats_64 = req->cmd.ats;
843 memcpy(&pddcb->n.asiv[0], /* destination */
844 &req->cmd.asiv[0], /* source */
845 DDCB_ASIV_LENGTH_ATS); /* req->cmd.asiv_length */
846 }
847
848 pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */
849
850 /*
851 * Calculate CRC_16 for corresponding range PSP(7:4). Include
852 * empty 4 bytes prior to the data.
853 */
854 icrc = genwqe_crc16((const u8 *)pddcb,
855 ICRC_LENGTH(req->cmd.asiv_length), 0xffff);
856 pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16);
857
858 /* enable DDCB completion irq */
859 if (!genwqe_polling_enabled)
860 pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32;
861
862 dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num);
863 genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
864
865 if (ddcb_requ_collect_debug_data(req)) {
866 /* use the kernel copy of debug data. copying back to
867 user buffer happens later */
868
869 genwqe_init_debug_data(cd, &req->debug_data);
870 memcpy(&req->debug_data.ddcb_before, pddcb,
871 sizeof(req->debug_data.ddcb_before));
872 }
873
874 enqueue_ddcb(cd, queue, pddcb, req->num);
875 queue->ddcbs_in_flight++;
876
877 if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight)
878 queue->ddcbs_max_in_flight = queue->ddcbs_in_flight;
879
880 ddcb_requ_set_state(req, GENWQE_REQU_TAPPED);
881 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
882 wake_up_interruptible(&cd->queue_waitq);
883
884 return 0;
885}
886
887/**
888 * __genwqe_execute_raw_ddcb() - Setup and execute DDCB
889 * @cd: pointer to genwqe device descriptor
890 * @req: user provided DDCB request
891 */
892int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
893 struct genwqe_ddcb_cmd *cmd)
894{
895 int rc = 0;
896 struct pci_dev *pci_dev = cd->pci_dev;
897 struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
898
899 if (cmd->asiv_length > DDCB_ASIV_LENGTH) {
900 dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n",
901 __func__, cmd->asiv_length);
902 return -EINVAL;
903 }
904 if (cmd->asv_length > DDCB_ASV_LENGTH) {
905 dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n",
906 __func__, cmd->asiv_length);
907 return -EINVAL;
908 }
909 rc = __genwqe_enqueue_ddcb(cd, req);
910 if (rc != 0)
911 return rc;
912
913 rc = __genwqe_wait_ddcb(cd, req);
914 if (rc < 0) /* error or signal interrupt */
915 goto err_exit;
916
917 if (ddcb_requ_collect_debug_data(req)) {
918 if (copy_to_user((void __user *)cmd->ddata_addr,
919 &req->debug_data,
920 sizeof(struct genwqe_debug_data)))
921 return -EFAULT;
922 }
923
924 /*
925 * Higher values than 0x102 indicate completion with faults,
926 * lower values than 0x102 indicate processing faults. Note
927 * that DDCB might have been purged. E.g. Cntl+C.
928 */
929 if (cmd->retc != DDCB_RETC_COMPLETE) {
930 /* This might happen e.g. flash read, and needs to be
931 handled by the upper layer code. */
932 rc = -EBADMSG; /* not processed/error retc */
933 }
934
935 return rc;
936
937 err_exit:
938 __genwqe_purge_ddcb(cd, req);
939
940 if (ddcb_requ_collect_debug_data(req)) {
941 if (copy_to_user((void __user *)cmd->ddata_addr,
942 &req->debug_data,
943 sizeof(struct genwqe_debug_data)))
944 return -EFAULT;
945 }
946 return rc;
947}
948
949/**
950 * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished
951 *
952 * We use this as condition for our wait-queue code.
953 */
954static int genwqe_next_ddcb_ready(struct genwqe_dev *cd)
955{
956 unsigned long flags;
957 struct ddcb *pddcb;
958 struct ddcb_queue *queue = &cd->queue;
959
960 spin_lock_irqsave(&queue->ddcb_lock, flags);
961
962 if (queue_empty(queue)) { /* emtpy queue */
963 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
964 return 0;
965 }
966
967 pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
968 if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */
969 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
970 return 1;
971 }
972
973 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
974 return 0;
975}
976
977/**
978 * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight
979 *
980 * Keep track on the number of DDCBs which ware currently in the
981 * queue. This is needed for statistics as well as conditon if we want
982 * to wait or better do polling in case of no interrupts available.
983 */
984int genwqe_ddcbs_in_flight(struct genwqe_dev *cd)
985{
986 unsigned long flags;
987 int ddcbs_in_flight = 0;
988 struct ddcb_queue *queue = &cd->queue;
989
990 spin_lock_irqsave(&queue->ddcb_lock, flags);
991 ddcbs_in_flight += queue->ddcbs_in_flight;
992 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
993
994 return ddcbs_in_flight;
995}
996
997static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
998{
999 int rc, i;
1000 struct ddcb *pddcb;
1001 u64 val64;
1002 unsigned int queue_size;
1003 struct pci_dev *pci_dev = cd->pci_dev;
1004
1005 if (genwqe_ddcb_max < 2)
1006 return -EINVAL;
1007
1008 queue_size = roundup(genwqe_ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
1009
1010 queue->ddcbs_in_flight = 0; /* statistics */
1011 queue->ddcbs_max_in_flight = 0;
1012 queue->ddcbs_completed = 0;
1013 queue->busy = 0;
1014
1015 queue->ddcb_seq = 0x100; /* start sequence number */
1016 queue->ddcb_max = genwqe_ddcb_max; /* module parameter */
1017 queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size,
1018 &queue->ddcb_daddr);
1019 if (queue->ddcb_vaddr == NULL) {
1020 dev_err(&pci_dev->dev,
1021 "[%s] **err: could not allocate DDCB **\n", __func__);
1022 return -ENOMEM;
1023 }
1024 memset(queue->ddcb_vaddr, 0, queue_size);
1025
1026 queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) *
1027 queue->ddcb_max, GFP_KERNEL);
1028 if (!queue->ddcb_req) {
1029 rc = -ENOMEM;
1030 goto free_ddcbs;
1031 }
1032
1033 queue->ddcb_waitqs = kzalloc(sizeof(wait_queue_head_t) *
1034 queue->ddcb_max, GFP_KERNEL);
1035 if (!queue->ddcb_waitqs) {
1036 rc = -ENOMEM;
1037 goto free_requs;
1038 }
1039
1040 for (i = 0; i < queue->ddcb_max; i++) {
1041 pddcb = &queue->ddcb_vaddr[i]; /* DDCBs */
1042 pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32;
1043 pddcb->retc_16 = cpu_to_be16(0xfff);
1044
1045 queue->ddcb_req[i] = NULL; /* requests */
1046 init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */
1047 }
1048
1049 queue->ddcb_act = 0;
1050 queue->ddcb_next = 0; /* queue is empty */
1051
1052 spin_lock_init(&queue->ddcb_lock);
1053 init_waitqueue_head(&queue->ddcb_waitq);
1054
1055 val64 = ((u64)(queue->ddcb_max - 1) << 8); /* lastptr */
1056 __genwqe_writeq(cd, queue->IO_QUEUE_CONFIG, 0x07); /* iCRC/vCRC */
1057 __genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr);
1058 __genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq);
1059 __genwqe_writeq(cd, queue->IO_QUEUE_WRAP, val64);
1060 return 0;
1061
1062 free_requs:
1063 kfree(queue->ddcb_req);
1064 queue->ddcb_req = NULL;
1065 free_ddcbs:
1066 __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
1067 queue->ddcb_daddr);
1068 queue->ddcb_vaddr = NULL;
1069 queue->ddcb_daddr = 0ull;
1070 return -ENODEV;
1071
1072}
1073
1074static int ddcb_queue_initialized(struct ddcb_queue *queue)
1075{
1076 return queue->ddcb_vaddr != NULL;
1077}
1078
1079static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
1080{
1081 unsigned int queue_size;
1082
1083 queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
1084
1085 kfree(queue->ddcb_req);
1086 queue->ddcb_req = NULL;
1087
1088 if (queue->ddcb_vaddr) {
1089 __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
1090 queue->ddcb_daddr);
1091 queue->ddcb_vaddr = NULL;
1092 queue->ddcb_daddr = 0ull;
1093 }
1094}
1095
1096static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
1097{
1098 u64 gfir;
1099 struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
1100 struct pci_dev *pci_dev = cd->pci_dev;
1101
1102 /*
1103 * In case of fatal FIR error the queue is stopped, such that
1104 * we can safely check it without risking anything.
1105 */
1106 cd->irqs_processed++;
1107 wake_up_interruptible(&cd->queue_waitq);
1108
1109 /*
1110 * Checking for errors before kicking the queue might be
1111 * safer, but slower for the good-case ... See above.
1112 */
1113 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
1114 if ((gfir & GFIR_ERR_TRIGGER) != 0x0) {
1115
1116 wake_up_interruptible(&cd->health_waitq);
1117
1118 /*
1119 * By default GFIRs causes recovery actions. This
1120 * count is just for debug when recovery is masked.
1121 */
1122 printk_ratelimited(KERN_ERR
1123 "%s %s: [%s] GFIR=%016llx\n",
1124 GENWQE_DEVNAME, dev_name(&pci_dev->dev),
1125 __func__, gfir);
1126 }
1127
1128 return IRQ_HANDLED;
1129}
1130
1131static irqreturn_t genwqe_vf_isr(int irq, void *dev_id)
1132{
1133 struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
1134
1135 cd->irqs_processed++;
1136 wake_up_interruptible(&cd->queue_waitq);
1137
1138 return IRQ_HANDLED;
1139}
1140
1141/**
1142 * genwqe_card_thread() - Work thread for the DDCB queue
1143 *
1144 * The idea is to check if there are DDCBs in processing. If there are
1145 * some finished DDCBs, we process them and wakeup the
1146 * requestors. Otherwise we give other processes time using
1147 * cond_resched().
1148 */
1149static int genwqe_card_thread(void *data)
1150{
1151 int should_stop = 0, rc = 0;
1152 struct genwqe_dev *cd = (struct genwqe_dev *)data;
1153
1154 while (!kthread_should_stop()) {
1155
1156 genwqe_check_ddcb_queue(cd, &cd->queue);
1157
1158 if (genwqe_polling_enabled) {
1159 rc = wait_event_interruptible_timeout(
1160 cd->queue_waitq,
1161 genwqe_ddcbs_in_flight(cd) ||
1162 (should_stop = kthread_should_stop()), 1);
1163 } else {
1164 rc = wait_event_interruptible_timeout(
1165 cd->queue_waitq,
1166 genwqe_next_ddcb_ready(cd) ||
1167 (should_stop = kthread_should_stop()), HZ);
1168 }
1169 if (should_stop)
1170 break;
1171
1172 /*
1173 * Avoid soft lockups on heavy loads; we do not want
1174 * to disable our interrupts.
1175 */
1176 cond_resched();
1177 }
1178 return 0;
1179}
1180
1181/**
1182 * genwqe_setup_service_layer() - Setup DDCB queue
1183 * @cd: pointer to genwqe device descriptor
1184 *
1185 * Allocate DDCBs. Configure Service Layer Controller (SLC).
1186 *
1187 * Return: 0 success
1188 */
1189int genwqe_setup_service_layer(struct genwqe_dev *cd)
1190{
1191 int rc;
1192 struct ddcb_queue *queue;
1193 struct pci_dev *pci_dev = cd->pci_dev;
1194
1195 if (genwqe_is_privileged(cd)) {
1196 rc = genwqe_card_reset(cd);
1197 if (rc < 0) {
1198 dev_err(&pci_dev->dev,
1199 "[%s] err: reset failed.\n", __func__);
1200 return rc;
1201 }
1202 genwqe_read_softreset(cd);
1203 }
1204
1205 queue = &cd->queue;
1206 queue->IO_QUEUE_CONFIG = IO_SLC_QUEUE_CONFIG;
1207 queue->IO_QUEUE_STATUS = IO_SLC_QUEUE_STATUS;
1208 queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT;
1209 queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN;
1210 queue->IO_QUEUE_OFFSET = IO_SLC_QUEUE_OFFSET;
1211 queue->IO_QUEUE_WRAP = IO_SLC_QUEUE_WRAP;
1212 queue->IO_QUEUE_WTIME = IO_SLC_QUEUE_WTIME;
1213 queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS;
1214 queue->IO_QUEUE_LRW = IO_SLC_QUEUE_LRW;
1215
1216 rc = setup_ddcb_queue(cd, queue);
1217 if (rc != 0) {
1218 rc = -ENODEV;
1219 goto err_out;
1220 }
1221
1222 init_waitqueue_head(&cd->queue_waitq);
1223 cd->card_thread = kthread_run(genwqe_card_thread, cd,
1224 GENWQE_DEVNAME "%d_thread",
1225 cd->card_idx);
1226 if (IS_ERR(cd->card_thread)) {
1227 rc = PTR_ERR(cd->card_thread);
1228 cd->card_thread = NULL;
1229 goto stop_free_queue;
1230 }
1231
1232 rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS);
1233 if (rc > 0)
1234 rc = genwqe_set_interrupt_capability(cd, rc);
1235 if (rc != 0) {
1236 rc = -ENODEV;
1237 goto stop_kthread;
1238 }
1239
1240 /*
1241 * We must have all wait-queues initialized when we enable the
1242 * interrupts. Otherwise we might crash if we get an early
1243 * irq.
1244 */
1245 init_waitqueue_head(&cd->health_waitq);
1246
1247 if (genwqe_is_privileged(cd)) {
1248 rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED,
1249 GENWQE_DEVNAME, cd);
1250 } else {
1251 rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED,
1252 GENWQE_DEVNAME, cd);
1253 }
1254 if (rc < 0) {
1255 dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq);
1256 goto stop_irq_cap;
1257 }
1258
1259 cd->card_state = GENWQE_CARD_USED;
1260 return 0;
1261
1262 stop_irq_cap:
1263 genwqe_reset_interrupt_capability(cd);
1264 stop_kthread:
1265 kthread_stop(cd->card_thread);
1266 cd->card_thread = NULL;
1267 stop_free_queue:
1268 free_ddcb_queue(cd, queue);
1269 err_out:
1270 return rc;
1271}
1272
1273/**
1274 * queue_wake_up_all() - Handles fatal error case
1275 *
1276 * The PCI device got unusable and we have to stop all pending
1277 * requests as fast as we can. The code after this must purge the
1278 * DDCBs in question and ensure that all mappings are freed.
1279 */
1280static int queue_wake_up_all(struct genwqe_dev *cd)
1281{
1282 unsigned int i;
1283 unsigned long flags;
1284 struct ddcb_queue *queue = &cd->queue;
1285
1286 spin_lock_irqsave(&queue->ddcb_lock, flags);
1287
1288 for (i = 0; i < queue->ddcb_max; i++)
1289 wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
1290
1291 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
1292
1293 return 0;
1294}
1295
1296/**
1297 * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces
1298 *
1299 * Relies on the pre-condition that there are no users of the card
1300 * device anymore e.g. with open file-descriptors.
1301 *
1302 * This function must be robust enough to be called twice.
1303 */
1304int genwqe_finish_queue(struct genwqe_dev *cd)
1305{
1306 int i, rc, in_flight;
1307 int waitmax = genwqe_ddcb_software_timeout;
1308 struct pci_dev *pci_dev = cd->pci_dev;
1309 struct ddcb_queue *queue = &cd->queue;
1310
1311 if (!ddcb_queue_initialized(queue))
1312 return 0;
1313
1314 /* Do not wipe out the error state. */
1315 if (cd->card_state == GENWQE_CARD_USED)
1316 cd->card_state = GENWQE_CARD_UNUSED;
1317
1318 /* Wake up all requests in the DDCB queue such that they
1319 should be removed nicely. */
1320 queue_wake_up_all(cd);
1321
1322 /* We must wait to get rid of the DDCBs in flight */
1323 for (i = 0; i < waitmax; i++) {
1324 in_flight = genwqe_ddcbs_in_flight(cd);
1325
1326 if (in_flight == 0)
1327 break;
1328
1329 dev_dbg(&pci_dev->dev,
1330 " DEBUG [%d/%d] waiting for queue to get empty: "
1331 "%d requests!\n", i, waitmax, in_flight);
1332
1333 /*
1334 * Severe severe error situation: The card itself has
1335 * 16 DDCB queues, each queue has e.g. 32 entries,
1336 * each DDBC has a hardware timeout of currently 250
1337 * msec but the PFs have a hardware timeout of 8 sec
1338 * ... so I take something large.
1339 */
1340 msleep(1000);
1341 }
1342 if (i == waitmax) {
1343 dev_err(&pci_dev->dev, " [%s] err: queue is not empty!!\n",
1344 __func__);
1345 rc = -EIO;
1346 }
1347 return rc;
1348}
1349
1350/**
1351 * genwqe_release_service_layer() - Shutdown DDCB queue
1352 * @cd: genwqe device descriptor
1353 *
1354 * This function must be robust enough to be called twice.
1355 */
1356int genwqe_release_service_layer(struct genwqe_dev *cd)
1357{
1358 struct pci_dev *pci_dev = cd->pci_dev;
1359
1360 if (!ddcb_queue_initialized(&cd->queue))
1361 return 1;
1362
1363 free_irq(pci_dev->irq, cd);
1364 genwqe_reset_interrupt_capability(cd);
1365
1366 if (cd->card_thread != NULL) {
1367 kthread_stop(cd->card_thread);
1368 cd->card_thread = NULL;
1369 }
1370
1371 free_ddcb_queue(cd, &cd->queue);
1372 return 0;
1373}
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h
new file mode 100644
index 000000000000..c4f26720753e
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.h
@@ -0,0 +1,188 @@
1#ifndef __CARD_DDCB_H__
2#define __CARD_DDCB_H__
3
4/**
5 * IBM Accelerator Family 'GenWQE'
6 *
7 * (C) Copyright IBM Corp. 2013
8 *
9 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
10 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
11 * Author: Michael Jung <mijung@de.ibm.com>
12 * Author: Michael Ruettger <michael@ibmra.de>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2, or (at your option)
17 * any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 */
24
25#include <linux/types.h>
26#include <asm/byteorder.h>
27
28#include "genwqe_driver.h"
29#include "card_base.h"
30
31/**
32 * struct ddcb - Device Driver Control Block DDCB
33 * @hsi: Hardware software interlock
34 * @shi: Software hardware interlock. Hsi and shi are used to interlock
35 * software and hardware activities. We are using a compare and
36 * swap operation to ensure that there are no races when
37 * activating new DDCBs on the queue, or when we need to
38 * purge a DDCB from a running queue.
39 * @acfunc: Accelerator function addresses a unit within the chip
40 * @cmd: Command to work on
41 * @cmdopts_16: Options for the command
42 * @asiv: Input data
43 * @asv: Output data
44 *
45 * The DDCB data format is big endian. Multiple consequtive DDBCs form
46 * a DDCB queue.
47 */
48#define ASIV_LENGTH 104 /* Old specification without ATS field */
49#define ASIV_LENGTH_ATS 96 /* New specification with ATS field */
50#define ASV_LENGTH 64
51
52struct ddcb {
53 union {
54 __be32 icrc_hsi_shi_32; /* iCRC, Hardware/SW interlock */
55 struct {
56 __be16 icrc_16;
57 u8 hsi;
58 u8 shi;
59 };
60 };
61 u8 pre; /* Preamble */
62 u8 xdir; /* Execution Directives */
63 __be16 seqnum_16; /* Sequence Number */
64
65 u8 acfunc; /* Accelerator Function.. */
66 u8 cmd; /* Command. */
67 __be16 cmdopts_16; /* Command Options */
68 u8 sur; /* Status Update Rate */
69 u8 psp; /* Protection Section Pointer */
70 __be16 rsvd_0e_16; /* Reserved invariant */
71
72 __be64 fwiv_64; /* Firmware Invariant. */
73
74 union {
75 struct {
76 __be64 ats_64; /* Address Translation Spec */
77 u8 asiv[ASIV_LENGTH_ATS]; /* New ASIV */
78 } n;
79 u8 __asiv[ASIV_LENGTH]; /* obsolete */
80 };
81 u8 asv[ASV_LENGTH]; /* Appl Spec Variant */
82
83 __be16 rsvd_c0_16; /* Reserved Variant */
84 __be16 vcrc_16; /* Variant CRC */
85 __be32 rsvd_32; /* Reserved unprotected */
86
87 __be64 deque_ts_64; /* Deque Time Stamp. */
88
89 __be16 retc_16; /* Return Code */
90 __be16 attn_16; /* Attention/Extended Error Codes */
91 __be32 progress_32; /* Progress indicator. */
92
93 __be64 cmplt_ts_64; /* Completion Time Stamp. */
94
95 /* The following layout matches the new service layer format */
96 __be32 ibdc_32; /* Inbound Data Count (* 256) */
97 __be32 obdc_32; /* Outbound Data Count (* 256) */
98
99 __be64 rsvd_SLH_64; /* Reserved for hardware */
100 union { /* private data for driver */
101 u8 priv[8];
102 __be64 priv_64;
103 };
104 __be64 disp_ts_64; /* Dispatch TimeStamp */
105} __attribute__((__packed__));
106
107/* CRC polynomials for DDCB */
108#define CRC16_POLYNOMIAL 0x1021
109
110/*
111 * SHI: Software to Hardware Interlock
112 * This 1 byte field is written by software to interlock the
113 * movement of one queue entry to another with the hardware in the
114 * chip.
115 */
116#define DDCB_SHI_INTR 0x04 /* Bit 2 */
117#define DDCB_SHI_PURGE 0x02 /* Bit 1 */
118#define DDCB_SHI_NEXT 0x01 /* Bit 0 */
119
120/*
121 * HSI: Hardware to Software interlock
122 * This 1 byte field is written by hardware to interlock the movement
123 * of one queue entry to another with the software in the chip.
124 */
125#define DDCB_HSI_COMPLETED 0x40 /* Bit 6 */
126#define DDCB_HSI_FETCHED 0x04 /* Bit 2 */
127
128/*
129 * Accessing HSI/SHI is done 32-bit wide
130 * Normally 16-bit access would work too, but on some platforms the
131 * 16 compare and swap operation is not supported. Therefore
132 * switching to 32-bit such that those platforms will work too.
133 *
134 * iCRC HSI/SHI
135 */
136#define DDCB_INTR_BE32 cpu_to_be32(0x00000004)
137#define DDCB_PURGE_BE32 cpu_to_be32(0x00000002)
138#define DDCB_NEXT_BE32 cpu_to_be32(0x00000001)
139#define DDCB_COMPLETED_BE32 cpu_to_be32(0x00004000)
140#define DDCB_FETCHED_BE32 cpu_to_be32(0x00000400)
141
142/* Definitions of DDCB presets */
143#define DDCB_PRESET_PRE 0x80
144#define ICRC_LENGTH(n) ((n) + 8 + 8 + 8) /* used ASIV + hdr fields */
145#define VCRC_LENGTH(n) ((n)) /* used ASV */
146
147/*
148 * Genwqe Scatter Gather list
149 * Each element has up to 8 entries.
150 * The chaining element is element 0 cause of prefetching needs.
151 */
152
153/*
154 * 0b0110 Chained descriptor. The descriptor is describing the next
155 * descriptor list.
156 */
157#define SG_CHAINED (0x6)
158
159/*
160 * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty
161 * condition.
162 */
163#define SG_DATA (0x2)
164
165/*
166 * 0b0000 Early terminator. This is the last entry on the list
167 * irregardless of the length indicated.
168 */
169#define SG_END_LIST (0x0)
170
171/**
172 * struct sglist - Scatter gather list
173 * @target_addr: Either a dma addr of memory to work on or a
174 * dma addr or a subsequent sglist block.
175 * @len: Length of the data block.
176 * @flags: See above.
177 *
178 * Depending on the command the GenWQE card can use a scatter gather
179 * list to describe the memory it works on. Always 8 sg_entry's form
180 * a block.
181 */
182struct sg_entry {
183 __be64 target_addr;
184 __be32 len;
185 __be32 flags;
186};
187
188#endif /* __CARD_DDCB_H__ */
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
new file mode 100644
index 000000000000..9b231bb1005c
--- /dev/null
+++ b/drivers/misc/genwqe/card_dev.c
@@ -0,0 +1,1486 @@
1/**
2 * IBM Accelerator Family 'GenWQE'
3 *
4 * (C) Copyright IBM Corp. 2013
5 *
6 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
7 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
8 * Author: Michael Jung <mijung@de.ibm.com>
9 * Author: Michael Ruettger <michael@ibmra.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License (version 2 only)
13 * as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
20
21/*
22 * Character device representation of the GenWQE device. This allows
23 * user-space applications to communicate with the card.
24 */
25
26#include <linux/kernel.h>
27#include <linux/types.h>
28#include <linux/module.h>
29#include <linux/pci.h>
30#include <linux/string.h>
31#include <linux/fs.h>
32#include <linux/sched.h>
33#include <linux/wait.h>
34#include <linux/delay.h>
35#include <linux/atomic.h>
36
37#include "card_base.h"
38#include "card_ddcb.h"
39
40static int genwqe_open_files(struct genwqe_dev *cd)
41{
42 int rc;
43 unsigned long flags;
44
45 spin_lock_irqsave(&cd->file_lock, flags);
46 rc = list_empty(&cd->file_list);
47 spin_unlock_irqrestore(&cd->file_lock, flags);
48 return !rc;
49}
50
51static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
52{
53 unsigned long flags;
54
55 cfile->owner = current;
56 spin_lock_irqsave(&cd->file_lock, flags);
57 list_add(&cfile->list, &cd->file_list);
58 spin_unlock_irqrestore(&cd->file_lock, flags);
59}
60
61static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
62{
63 unsigned long flags;
64
65 spin_lock_irqsave(&cd->file_lock, flags);
66 list_del(&cfile->list);
67 spin_unlock_irqrestore(&cd->file_lock, flags);
68
69 return 0;
70}
71
72static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m)
73{
74 unsigned long flags;
75
76 spin_lock_irqsave(&cfile->pin_lock, flags);
77 list_add(&m->pin_list, &cfile->pin_list);
78 spin_unlock_irqrestore(&cfile->pin_lock, flags);
79}
80
81static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m)
82{
83 unsigned long flags;
84
85 spin_lock_irqsave(&cfile->pin_lock, flags);
86 list_del(&m->pin_list);
87 spin_unlock_irqrestore(&cfile->pin_lock, flags);
88
89 return 0;
90}
91
92/**
93 * genwqe_search_pin() - Search for the mapping for a userspace address
94 * @cfile: Descriptor of opened file
95 * @u_addr: User virtual address
96 * @size: Size of buffer
97 * @dma_addr: DMA address to be updated
98 *
99 * Return: Pointer to the corresponding mapping NULL if not found
100 */
101static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile,
102 unsigned long u_addr,
103 unsigned int size,
104 void **virt_addr)
105{
106 unsigned long flags;
107 struct dma_mapping *m;
108
109 spin_lock_irqsave(&cfile->pin_lock, flags);
110
111 list_for_each_entry(m, &cfile->pin_list, pin_list) {
112 if ((((u64)m->u_vaddr) <= (u_addr)) &&
113 (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
114
115 if (virt_addr)
116 *virt_addr = m->k_vaddr +
117 (u_addr - (u64)m->u_vaddr);
118
119 spin_unlock_irqrestore(&cfile->pin_lock, flags);
120 return m;
121 }
122 }
123 spin_unlock_irqrestore(&cfile->pin_lock, flags);
124 return NULL;
125}
126
127static void __genwqe_add_mapping(struct genwqe_file *cfile,
128 struct dma_mapping *dma_map)
129{
130 unsigned long flags;
131
132 spin_lock_irqsave(&cfile->map_lock, flags);
133 list_add(&dma_map->card_list, &cfile->map_list);
134 spin_unlock_irqrestore(&cfile->map_lock, flags);
135}
136
137static void __genwqe_del_mapping(struct genwqe_file *cfile,
138 struct dma_mapping *dma_map)
139{
140 unsigned long flags;
141
142 spin_lock_irqsave(&cfile->map_lock, flags);
143 list_del(&dma_map->card_list);
144 spin_unlock_irqrestore(&cfile->map_lock, flags);
145}
146
147
148/**
149 * __genwqe_search_mapping() - Search for the mapping for a userspace address
150 * @cfile: descriptor of opened file
151 * @u_addr: user virtual address
152 * @size: size of buffer
153 * @dma_addr: DMA address to be updated
154 * Return: Pointer to the corresponding mapping NULL if not found
155 */
156static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile,
157 unsigned long u_addr,
158 unsigned int size,
159 dma_addr_t *dma_addr,
160 void **virt_addr)
161{
162 unsigned long flags;
163 struct dma_mapping *m;
164 struct pci_dev *pci_dev = cfile->cd->pci_dev;
165
166 spin_lock_irqsave(&cfile->map_lock, flags);
167 list_for_each_entry(m, &cfile->map_list, card_list) {
168
169 if ((((u64)m->u_vaddr) <= (u_addr)) &&
170 (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
171
172 /* match found: current is as expected and
173 addr is in range */
174 if (dma_addr)
175 *dma_addr = m->dma_addr +
176 (u_addr - (u64)m->u_vaddr);
177
178 if (virt_addr)
179 *virt_addr = m->k_vaddr +
180 (u_addr - (u64)m->u_vaddr);
181
182 spin_unlock_irqrestore(&cfile->map_lock, flags);
183 return m;
184 }
185 }
186 spin_unlock_irqrestore(&cfile->map_lock, flags);
187
188 dev_err(&pci_dev->dev,
189 "[%s] Entry not found: u_addr=%lx, size=%x\n",
190 __func__, u_addr, size);
191
192 return NULL;
193}
194
195static void genwqe_remove_mappings(struct genwqe_file *cfile)
196{
197 int i = 0;
198 struct list_head *node, *next;
199 struct dma_mapping *dma_map;
200 struct genwqe_dev *cd = cfile->cd;
201 struct pci_dev *pci_dev = cfile->cd->pci_dev;
202
203 list_for_each_safe(node, next, &cfile->map_list) {
204 dma_map = list_entry(node, struct dma_mapping, card_list);
205
206 list_del_init(&dma_map->card_list);
207
208 /*
209 * This is really a bug, because those things should
210 * have been already tidied up.
211 *
212 * GENWQE_MAPPING_RAW should have been removed via mmunmap().
213 * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code.
214 */
215 dev_err(&pci_dev->dev,
216 "[%s] %d. cleanup mapping: u_vaddr=%p "
217 "u_kaddr=%016lx dma_addr=%llx\n", __func__, i++,
218 dma_map->u_vaddr, (unsigned long)dma_map->k_vaddr,
219 dma_map->dma_addr);
220
221 if (dma_map->type == GENWQE_MAPPING_RAW) {
222 /* we allocated this dynamically */
223 __genwqe_free_consistent(cd, dma_map->size,
224 dma_map->k_vaddr,
225 dma_map->dma_addr);
226 kfree(dma_map);
227 } else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) {
228 /* we use dma_map statically from the request */
229 genwqe_user_vunmap(cd, dma_map, NULL);
230 }
231 }
232}
233
234static void genwqe_remove_pinnings(struct genwqe_file *cfile)
235{
236 struct list_head *node, *next;
237 struct dma_mapping *dma_map;
238 struct genwqe_dev *cd = cfile->cd;
239
240 list_for_each_safe(node, next, &cfile->pin_list) {
241 dma_map = list_entry(node, struct dma_mapping, pin_list);
242
243 /*
244 * This is not a bug, because a killed processed might
245 * not call the unpin ioctl, which is supposed to free
246 * the resources.
247 *
248 * Pinnings are dymically allocated and need to be
249 * deleted.
250 */
251 list_del_init(&dma_map->pin_list);
252 genwqe_user_vunmap(cd, dma_map, NULL);
253 kfree(dma_map);
254 }
255}
256
257/**
258 * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files
259 *
260 * E.g. genwqe_send_signal(cd, SIGIO);
261 */
262static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig)
263{
264 unsigned int files = 0;
265 unsigned long flags;
266 struct genwqe_file *cfile;
267
268 spin_lock_irqsave(&cd->file_lock, flags);
269 list_for_each_entry(cfile, &cd->file_list, list) {
270 if (cfile->async_queue)
271 kill_fasync(&cfile->async_queue, sig, POLL_HUP);
272 files++;
273 }
274 spin_unlock_irqrestore(&cd->file_lock, flags);
275 return files;
276}
277
278static int genwqe_force_sig(struct genwqe_dev *cd, int sig)
279{
280 unsigned int files = 0;
281 unsigned long flags;
282 struct genwqe_file *cfile;
283
284 spin_lock_irqsave(&cd->file_lock, flags);
285 list_for_each_entry(cfile, &cd->file_list, list) {
286 force_sig(sig, cfile->owner);
287 files++;
288 }
289 spin_unlock_irqrestore(&cd->file_lock, flags);
290 return files;
291}
292
293/**
294 * genwqe_open() - file open
295 * @inode: file system information
296 * @filp: file handle
297 *
298 * This function is executed whenever an application calls
299 * open("/dev/genwqe",..).
300 *
301 * Return: 0 if successful or <0 if errors
302 */
303static int genwqe_open(struct inode *inode, struct file *filp)
304{
305 struct genwqe_dev *cd;
306 struct genwqe_file *cfile;
307 struct pci_dev *pci_dev;
308
309 cfile = kzalloc(sizeof(*cfile), GFP_KERNEL);
310 if (cfile == NULL)
311 return -ENOMEM;
312
313 cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe);
314 pci_dev = cd->pci_dev;
315 cfile->cd = cd;
316 cfile->filp = filp;
317 cfile->client = NULL;
318
319 spin_lock_init(&cfile->map_lock); /* list of raw memory allocations */
320 INIT_LIST_HEAD(&cfile->map_list);
321
322 spin_lock_init(&cfile->pin_lock); /* list of user pinned memory */
323 INIT_LIST_HEAD(&cfile->pin_list);
324
325 filp->private_data = cfile;
326
327 genwqe_add_file(cd, cfile);
328 return 0;
329}
330
331/**
332 * genwqe_fasync() - Setup process to receive SIGIO.
333 * @fd: file descriptor
334 * @filp: file handle
335 * @mode: file mode
336 *
337 * Sending a signal is working as following:
338 *
339 * if (cdev->async_queue)
340 * kill_fasync(&cdev->async_queue, SIGIO, POLL_IN);
341 *
342 * Some devices also implement asynchronous notification to indicate
343 * when the device can be written; in this case, of course,
344 * kill_fasync must be called with a mode of POLL_OUT.
345 */
346static int genwqe_fasync(int fd, struct file *filp, int mode)
347{
348 struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data;
349 return fasync_helper(fd, filp, mode, &cdev->async_queue);
350}
351
352
353/**
354 * genwqe_release() - file close
355 * @inode: file system information
356 * @filp: file handle
357 *
358 * This function is executed whenever an application calls 'close(fd_genwqe)'
359 *
360 * Return: always 0
361 */
362static int genwqe_release(struct inode *inode, struct file *filp)
363{
364 struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
365 struct genwqe_dev *cd = cfile->cd;
366
367 /* there must be no entries in these lists! */
368 genwqe_remove_mappings(cfile);
369 genwqe_remove_pinnings(cfile);
370
371 /* remove this filp from the asynchronously notified filp's */
372 genwqe_fasync(-1, filp, 0);
373
374 /*
375 * For this to work we must not release cd when this cfile is
376 * not yet released, otherwise the list entry is invalid,
377 * because the list itself gets reinstantiated!
378 */
379 genwqe_del_file(cd, cfile);
380 kfree(cfile);
381 return 0;
382}
383
384static void genwqe_vma_open(struct vm_area_struct *vma)
385{
386 /* nothing ... */
387}
388
389/**
390 * genwqe_vma_close() - Called each time when vma is unmapped
391 *
392 * Free memory which got allocated by GenWQE mmap().
393 */
394static void genwqe_vma_close(struct vm_area_struct *vma)
395{
396 unsigned long vsize = vma->vm_end - vma->vm_start;
397 struct inode *inode = vma->vm_file->f_dentry->d_inode;
398 struct dma_mapping *dma_map;
399 struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev,
400 cdev_genwqe);
401 struct pci_dev *pci_dev = cd->pci_dev;
402 dma_addr_t d_addr = 0;
403 struct genwqe_file *cfile = vma->vm_private_data;
404
405 dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize,
406 &d_addr, NULL);
407 if (dma_map == NULL) {
408 dev_err(&pci_dev->dev,
409 " [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n",
410 __func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
411 vsize);
412 return;
413 }
414 __genwqe_del_mapping(cfile, dma_map);
415 __genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr,
416 dma_map->dma_addr);
417 kfree(dma_map);
418}
419
420static struct vm_operations_struct genwqe_vma_ops = {
421 .open = genwqe_vma_open,
422 .close = genwqe_vma_close,
423};
424
425/**
426 * genwqe_mmap() - Provide contignous buffers to userspace
427 *
428 * We use mmap() to allocate contignous buffers used for DMA
429 * transfers. After the buffer is allocated we remap it to user-space
430 * and remember a reference to our dma_mapping data structure, where
431 * we store the associated DMA address and allocated size.
432 *
433 * When we receive a DDCB execution request with the ATS bits set to
434 * plain buffer, we lookup our dma_mapping list to find the
435 * corresponding DMA address for the associated user-space address.
436 */
437static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
438{
439 int rc;
440 unsigned long pfn, vsize = vma->vm_end - vma->vm_start;
441 struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
442 struct genwqe_dev *cd = cfile->cd;
443 struct dma_mapping *dma_map;
444
445 if (vsize == 0)
446 return -EINVAL;
447
448 if (get_order(vsize) > MAX_ORDER)
449 return -ENOMEM;
450
451 dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
452 if (dma_map == NULL)
453 return -ENOMEM;
454
455 genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW);
456 dma_map->u_vaddr = (void *)vma->vm_start;
457 dma_map->size = vsize;
458 dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE);
459 dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize,
460 &dma_map->dma_addr);
461 if (dma_map->k_vaddr == NULL) {
462 rc = -ENOMEM;
463 goto free_dma_map;
464 }
465
466 if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t)))
467 *(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr;
468
469 pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT;
470 rc = remap_pfn_range(vma,
471 vma->vm_start,
472 pfn,
473 vsize,
474 vma->vm_page_prot);
475 if (rc != 0) {
476 rc = -EFAULT;
477 goto free_dma_mem;
478 }
479
480 vma->vm_private_data = cfile;
481 vma->vm_ops = &genwqe_vma_ops;
482 __genwqe_add_mapping(cfile, dma_map);
483
484 return 0;
485
486 free_dma_mem:
487 __genwqe_free_consistent(cd, dma_map->size,
488 dma_map->k_vaddr,
489 dma_map->dma_addr);
490 free_dma_map:
491 kfree(dma_map);
492 return rc;
493}
494
495/**
496 * do_flash_update() - Excute flash update (write image or CVPD)
497 * @cd: genwqe device
498 * @load: details about image load
499 *
500 * Return: 0 if successful
501 */
502
503#define FLASH_BLOCK 0x40000 /* we use 256k blocks */
504
505static int do_flash_update(struct genwqe_file *cfile,
506 struct genwqe_bitstream *load)
507{
508 int rc = 0;
509 int blocks_to_flash;
510 u64 dma_addr, flash = 0;
511 size_t tocopy = 0;
512 u8 __user *buf;
513 u8 *xbuf;
514 u32 crc;
515 u8 cmdopts;
516 struct genwqe_dev *cd = cfile->cd;
517 struct pci_dev *pci_dev = cd->pci_dev;
518
519 if ((load->size & 0x3) != 0) {
520 dev_err(&pci_dev->dev,
521 "err: buf %d bytes not 4 bytes aligned!\n",
522 load->size);
523 return -EINVAL;
524 }
525 if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) {
526 dev_err(&pci_dev->dev,
527 "err: buf is not page aligned!\n");
528 return -EINVAL;
529 }
530
531 /* FIXME Bits have changed for new service layer! */
532 switch ((char)load->partition) {
533 case '0':
534 cmdopts = 0x14;
535 break; /* download/erase_first/part_0 */
536 case '1':
537 cmdopts = 0x1C;
538 break; /* download/erase_first/part_1 */
539 case 'v': /* cmdopts = 0x0c (VPD) */
540 default:
541 dev_err(&pci_dev->dev,
542 "err: invalid partition %02x!\n", load->partition);
543 return -EINVAL;
544 }
545 dev_info(&pci_dev->dev,
546 "[%s] start flash update UID: 0x%x size: %u bytes part: %c\n",
547 __func__, load->uid, load->size, (char)load->partition);
548
549 buf = (u8 __user *)load->data_addr;
550 xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
551 if (xbuf == NULL) {
552 dev_err(&pci_dev->dev, "err: no memory\n");
553 return -ENOMEM;
554 }
555
556 blocks_to_flash = load->size / FLASH_BLOCK;
557 while (load->size) {
558 struct genwqe_ddcb_cmd *req;
559
560 /*
561 * We must be 4 byte aligned. Buffer must be 0 appened
562 * to have defined values when calculating CRC.
563 */
564 tocopy = min_t(size_t, load->size, FLASH_BLOCK);
565
566 rc = copy_from_user(xbuf, buf, tocopy);
567 if (rc) {
568 dev_err(&pci_dev->dev,
569 "err: could not copy all data rc=%d\n", rc);
570 goto free_buffer;
571 }
572 crc = genwqe_crc32(xbuf, tocopy, 0xffffffff);
573
574 dev_info(&pci_dev->dev,
575 "[%s] DMA: 0x%llx CRC: %08x SZ: %ld %d\n",
576 __func__, dma_addr, crc, tocopy, blocks_to_flash);
577
578 /* prepare DDCB for SLU process */
579 req = ddcb_requ_alloc();
580 if (req == NULL) {
581 rc = -ENOMEM;
582 goto free_buffer;
583 }
584
585 req->cmd = SLCMD_MOVE_FLASH;
586 req->cmdopts = cmdopts;
587
588 /* prepare invariant values */
589 if (genwqe_get_slu_id(cd) <= 0x2) {
590 *(u64 *)&req->__asiv[0] = cpu_to_be64(dma_addr);
591 *(u64 *)&req->__asiv[8] = cpu_to_be64(tocopy);
592 *(u64 *)&req->__asiv[16] = cpu_to_be64(flash);
593 *(u32 *)&req->__asiv[24] = cpu_to_be32(0);
594 req->__asiv[24] = load->uid;
595 *(u32 *)&req->__asiv[28] = cpu_to_be32(crc);
596
597 /* for simulation only */
598 *(u64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id);
599 *(u64 *)&req->__asiv[96] = cpu_to_be64(load->app_id);
600 req->asiv_length = 32; /* bytes included in crc calc */
601 } else { /* setup DDCB for ATS architecture */
602 *(u64 *)&req->asiv[0] = cpu_to_be64(dma_addr);
603 *(u32 *)&req->asiv[8] = cpu_to_be32(tocopy);
604 *(u32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */
605 *(u64 *)&req->asiv[16] = cpu_to_be64(flash);
606 *(u32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24);
607 *(u32 *)&req->asiv[28] = cpu_to_be32(crc);
608
609 /* for simulation only */
610 *(u64 *)&req->asiv[80] = cpu_to_be64(load->slu_id);
611 *(u64 *)&req->asiv[88] = cpu_to_be64(load->app_id);
612
613 req->ats = cpu_to_be64(0x4ULL << 44); /* Rd only */
614 req->asiv_length = 40; /* bytes included in crc calc */
615 }
616 req->asv_length = 8;
617
618 /* For Genwqe5 we get back the calculated CRC */
619 *(u64 *)&req->asv[0] = 0ULL; /* 0x80 */
620
621 rc = __genwqe_execute_raw_ddcb(cd, req);
622
623 load->retc = req->retc;
624 load->attn = req->attn;
625 load->progress = req->progress;
626
627 if (rc < 0) {
628 dev_err(&pci_dev->dev,
629 " [%s] DDCB returned (RETC=%x ATTN=%x "
630 "PROG=%x rc=%d)\n", __func__, req->retc,
631 req->attn, req->progress, rc);
632
633 ddcb_requ_free(req);
634 goto free_buffer;
635 }
636
637 if (req->retc != DDCB_RETC_COMPLETE) {
638 dev_info(&pci_dev->dev,
639 " [%s] DDCB returned (RETC=%x ATTN=%x "
640 "PROG=%x)\n", __func__, req->retc,
641 req->attn, req->progress);
642
643 rc = -EIO;
644 ddcb_requ_free(req);
645 goto free_buffer;
646 }
647
648 load->size -= tocopy;
649 flash += tocopy;
650 buf += tocopy;
651 blocks_to_flash--;
652 ddcb_requ_free(req);
653 }
654
655 free_buffer:
656 __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
657 return rc;
658}
659
660static int do_flash_read(struct genwqe_file *cfile,
661 struct genwqe_bitstream *load)
662{
663 int rc, blocks_to_flash;
664 u64 dma_addr, flash = 0;
665 size_t tocopy = 0;
666 u8 __user *buf;
667 u8 *xbuf;
668 u8 cmdopts;
669 struct genwqe_dev *cd = cfile->cd;
670 struct pci_dev *pci_dev = cd->pci_dev;
671 struct genwqe_ddcb_cmd *cmd;
672
673 if ((load->size & 0x3) != 0) {
674 dev_err(&pci_dev->dev,
675 "err: buf size %d bytes not 4 bytes aligned!\n",
676 load->size);
677 return -EINVAL;
678 }
679 if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) {
680 dev_err(&pci_dev->dev, "err: buf is not page aligned!\n");
681 return -EINVAL;
682 }
683
684 /* FIXME Bits have changed for new service layer! */
685 switch ((char)load->partition) {
686 case '0':
687 cmdopts = 0x12;
688 break; /* upload/part_0 */
689 case '1':
690 cmdopts = 0x1A;
691 break; /* upload/part_1 */
692 case 'v':
693 default:
694 dev_err(&pci_dev->dev,
695 "err: invalid partition %02x!\n", load->partition);
696 return -EINVAL;
697 }
698 dev_info(&pci_dev->dev,
699 "[%s] start flash read UID: 0x%x size: %u bytes part: %c\n",
700 __func__, load->uid, load->size, (char)load->partition);
701
702 buf = (u8 __user *)load->data_addr;
703 xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
704 if (xbuf == NULL) {
705 dev_err(&pci_dev->dev, "err: no memory\n");
706 return -ENOMEM;
707 }
708
709 blocks_to_flash = load->size / FLASH_BLOCK;
710 while (load->size) {
711 /*
712 * We must be 4 byte aligned. Buffer must be 0 appened
713 * to have defined values when calculating CRC.
714 */
715 tocopy = min_t(size_t, load->size, FLASH_BLOCK);
716
717 dev_info(&pci_dev->dev,
718 "[%s] DMA: 0x%llx SZ: %ld %d\n",
719 __func__, dma_addr, tocopy, blocks_to_flash);
720
721 /* prepare DDCB for SLU process */
722 cmd = ddcb_requ_alloc();
723 if (cmd == NULL) {
724 rc = -ENOMEM;
725 goto free_buffer;
726 }
727 cmd->cmd = SLCMD_MOVE_FLASH;
728 cmd->cmdopts = cmdopts;
729
730 /* prepare invariant values */
731 if (genwqe_get_slu_id(cd) <= 0x2) {
732 *(u64 *)&cmd->__asiv[0] = cpu_to_be64(dma_addr);
733 *(u64 *)&cmd->__asiv[8] = cpu_to_be64(tocopy);
734 *(u64 *)&cmd->__asiv[16] = cpu_to_be64(flash);
735 *(u32 *)&cmd->__asiv[24] = cpu_to_be32(0);
736 cmd->__asiv[24] = load->uid;
737 *(u32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */;
738 cmd->asiv_length = 32; /* bytes included in crc calc */
739 } else { /* setup DDCB for ATS architecture */
740 *(u64 *)&cmd->asiv[0] = cpu_to_be64(dma_addr);
741 *(u32 *)&cmd->asiv[8] = cpu_to_be32(tocopy);
742 *(u32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */
743 *(u64 *)&cmd->asiv[16] = cpu_to_be64(flash);
744 *(u32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24);
745 *(u32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */
746 cmd->ats = cpu_to_be64(0x5ULL << 44); /* rd/wr */
747 cmd->asiv_length = 40; /* bytes included in crc calc */
748 }
749 cmd->asv_length = 8;
750
751 /* we only get back the calculated CRC */
752 *(u64 *)&cmd->asv[0] = 0ULL; /* 0x80 */
753
754 rc = __genwqe_execute_raw_ddcb(cd, cmd);
755
756 load->retc = cmd->retc;
757 load->attn = cmd->attn;
758 load->progress = cmd->progress;
759
760 if ((rc < 0) && (rc != -EBADMSG)) {
761 dev_err(&pci_dev->dev,
762 " [%s] DDCB returned (RETC=%x ATTN=%x "
763 "PROG=%x rc=%d)\n", __func__, cmd->retc,
764 cmd->attn, cmd->progress, rc);
765 ddcb_requ_free(cmd);
766 goto free_buffer;
767 }
768
769 rc = copy_to_user(buf, xbuf, tocopy);
770 if (rc) {
771 dev_err(&pci_dev->dev,
772 " [%s] copy data to user failed rc=%d\n",
773 __func__, rc);
774 rc = -EIO;
775 ddcb_requ_free(cmd);
776 goto free_buffer;
777 }
778
779 /* We know that we can get retc 0x104 with CRC err */
780 if (((cmd->retc == DDCB_RETC_FAULT) &&
781 (cmd->attn != 0x02)) || /* Normally ignore CRC error */
782 ((cmd->retc == DDCB_RETC_COMPLETE) &&
783 (cmd->attn != 0x00))) { /* Everything was fine */
784 dev_err(&pci_dev->dev,
785 " [%s] DDCB returned (RETC=%x ATTN=%x "
786 "PROG=%x rc=%d)\n", __func__, cmd->retc,
787 cmd->attn, cmd->progress, rc);
788 rc = -EIO;
789 ddcb_requ_free(cmd);
790 goto free_buffer;
791 }
792
793 load->size -= tocopy;
794 flash += tocopy;
795 buf += tocopy;
796 blocks_to_flash--;
797 ddcb_requ_free(cmd);
798 }
799 rc = 0;
800
801 free_buffer:
802 __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
803 return rc;
804}
805
806static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
807{
808 int rc;
809 struct genwqe_dev *cd = cfile->cd;
810 struct pci_dev *pci_dev = cfile->cd->pci_dev;
811 struct dma_mapping *dma_map;
812 unsigned long map_addr;
813 unsigned long map_size;
814
815 if ((m->addr == 0x0) || (m->size == 0))
816 return -EINVAL;
817
818 map_addr = (m->addr & PAGE_MASK);
819 map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
820
821 dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
822 if (dma_map == NULL)
823 return -ENOMEM;
824
825 genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED);
826 rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size, NULL);
827 if (rc != 0) {
828 dev_err(&pci_dev->dev,
829 "[%s] genwqe_user_vmap rc=%d\n", __func__, rc);
830 return rc;
831 }
832
833 genwqe_add_pin(cfile, dma_map);
834 return 0;
835}
836
837static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
838{
839 struct genwqe_dev *cd = cfile->cd;
840 struct dma_mapping *dma_map;
841 unsigned long map_addr;
842 unsigned long map_size;
843
844 if (m->addr == 0x0)
845 return -EINVAL;
846
847 map_addr = (m->addr & PAGE_MASK);
848 map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
849
850 dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL);
851 if (dma_map == NULL)
852 return -ENOENT;
853
854 genwqe_del_pin(cfile, dma_map);
855 genwqe_user_vunmap(cd, dma_map, NULL);
856 kfree(dma_map);
857 return 0;
858}
859
860/**
861 * ddcb_cmd_cleanup() - Remove dynamically created fixup entries
862 *
863 * Only if there are any. Pinnings are not removed.
864 */
865static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req)
866{
867 unsigned int i;
868 struct dma_mapping *dma_map;
869 struct genwqe_dev *cd = cfile->cd;
870
871 for (i = 0; i < DDCB_FIXUPS; i++) {
872 dma_map = &req->dma_mappings[i];
873
874 if (dma_mapping_used(dma_map)) {
875 __genwqe_del_mapping(cfile, dma_map);
876 genwqe_user_vunmap(cd, dma_map, req);
877 }
878 if (req->sgl[i] != NULL) {
879 genwqe_free_sgl(cd, req->sgl[i],
880 req->sgl_dma_addr[i],
881 req->sgl_size[i]);
882 req->sgl[i] = NULL;
883 req->sgl_dma_addr[i] = 0x0;
884 req->sgl_size[i] = 0;
885 }
886
887 }
888 return 0;
889}
890
891/**
892 * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references
893 *
894 * Before the DDCB gets executed we need to handle the fixups. We
895 * replace the user-space addresses with DMA addresses or do
896 * additional setup work e.g. generating a scatter-gather list which
897 * is used to describe the memory referred to in the fixup.
898 */
899static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
900{
901 int rc;
902 unsigned int asiv_offs, i;
903 struct genwqe_dev *cd = cfile->cd;
904 struct genwqe_ddcb_cmd *cmd = &req->cmd;
905 struct dma_mapping *m;
906 struct pci_dev *pci_dev = cd->pci_dev;
907 const char *type = "UNKNOWN";
908
909 for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58;
910 i++, asiv_offs += 0x08) {
911
912 u64 u_addr, d_addr;
913 u32 u_size = 0;
914 unsigned long ats_flags;
915
916 ats_flags = ATS_GET_FLAGS(be64_to_cpu(cmd->ats), asiv_offs);
917
918 switch (ats_flags) {
919
920 case ATS_TYPE_DATA:
921 break; /* nothing to do here */
922
923 case ATS_TYPE_FLAT_RDWR:
924 case ATS_TYPE_FLAT_RD: {
925 u_addr = be64_to_cpu(*((u64 *)&cmd->
926 asiv[asiv_offs]));
927 u_size = be32_to_cpu(*((u32 *)&cmd->
928 asiv[asiv_offs + 0x08]));
929
930 /*
931 * No data available. Ignore u_addr in this
932 * case and set addr to 0. Hardware must not
933 * fetch the buffer.
934 */
935 if (u_size == 0x0) {
936 *((u64 *)&cmd->asiv[asiv_offs]) =
937 cpu_to_be64(0x0);
938 break;
939 }
940
941 m = __genwqe_search_mapping(cfile, u_addr, u_size,
942 &d_addr, NULL);
943 if (m == NULL) {
944 rc = -EFAULT;
945 goto err_out;
946 }
947
948 *((u64 *)&cmd->asiv[asiv_offs]) = cpu_to_be64(d_addr);
949 break;
950 }
951
952 case ATS_TYPE_SGL_RDWR:
953 case ATS_TYPE_SGL_RD: {
954 int page_offs, nr_pages, offs;
955
956 u_addr = be64_to_cpu(*((u64 *)&cmd->asiv[asiv_offs]));
957 u_size = be32_to_cpu(*((u32 *)&cmd->asiv[asiv_offs +
958 0x08]));
959
960 /*
961 * No data available. Ignore u_addr in this
962 * case and set addr to 0. Hardware must not
963 * fetch the empty sgl.
964 */
965 if (u_size == 0x0) {
966 *((u64 *)&cmd->asiv[asiv_offs]) =
967 cpu_to_be64(0x0);
968 break;
969 }
970
971 m = genwqe_search_pin(cfile, u_addr, u_size, NULL);
972 if (m != NULL) {
973 type = "PINNING";
974 page_offs = (u_addr -
975 (u64)m->u_vaddr)/PAGE_SIZE;
976 } else {
977 type = "MAPPING";
978 m = &req->dma_mappings[i];
979
980 genwqe_mapping_init(m,
981 GENWQE_MAPPING_SGL_TEMP);
982 rc = genwqe_user_vmap(cd, m, (void *)u_addr,
983 u_size, req);
984 if (rc != 0)
985 goto err_out;
986
987 __genwqe_add_mapping(cfile, m);
988 page_offs = 0;
989 }
990
991 offs = offset_in_page(u_addr);
992 nr_pages = DIV_ROUND_UP(offs + u_size, PAGE_SIZE);
993
994 /* create genwqe style scatter gather list */
995 req->sgl[i] = genwqe_alloc_sgl(cd, m->nr_pages,
996 &req->sgl_dma_addr[i],
997 &req->sgl_size[i]);
998 if (req->sgl[i] == NULL) {
999 rc = -ENOMEM;
1000 goto err_out;
1001 }
1002 genwqe_setup_sgl(cd, offs, u_size,
1003 req->sgl[i],
1004 req->sgl_dma_addr[i],
1005 req->sgl_size[i],
1006 m->dma_list,
1007 page_offs,
1008 nr_pages);
1009
1010 *((u64 *)&cmd->asiv[asiv_offs]) =
1011 cpu_to_be64(req->sgl_dma_addr[i]);
1012
1013 break;
1014 }
1015 default:
1016 dev_err(&pci_dev->dev,
1017 "[%s] err: invalid ATS flags %01lx\n",
1018 __func__, ats_flags);
1019 rc = -EINVAL;
1020 goto err_out;
1021 }
1022 }
1023 return 0;
1024
1025 err_out:
1026 dev_err(&pci_dev->dev, "[%s] err: rc=%d\n", __func__, rc);
1027 ddcb_cmd_cleanup(cfile, req);
1028 return rc;
1029}
1030
1031/**
1032 * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups
1033 *
1034 * The code will build up the translation tables or lookup the
1035 * contignous memory allocation table to find the right translations
1036 * and DMA addresses.
1037 */
1038static int genwqe_execute_ddcb(struct genwqe_file *cfile,
1039 struct genwqe_ddcb_cmd *cmd)
1040{
1041 int rc;
1042 struct genwqe_dev *cd = cfile->cd;
1043 struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
1044
1045 rc = ddcb_cmd_fixups(cfile, req);
1046 if (rc != 0)
1047 return rc;
1048
1049 rc = __genwqe_execute_raw_ddcb(cd, cmd);
1050 ddcb_cmd_cleanup(cfile, req);
1051 return rc;
1052}
1053
1054static int do_execute_ddcb(struct genwqe_file *cfile,
1055 unsigned long arg, int raw)
1056{
1057 int rc;
1058 struct genwqe_ddcb_cmd *cmd;
1059 struct ddcb_requ *req;
1060 struct genwqe_dev *cd = cfile->cd;
1061 struct pci_dev *pci_dev = cd->pci_dev;
1062
1063 cmd = ddcb_requ_alloc();
1064 if (cmd == NULL)
1065 return -ENOMEM;
1066
1067 req = container_of(cmd, struct ddcb_requ, cmd);
1068
1069 if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) {
1070 dev_err(&pci_dev->dev,
1071 "err: could not copy params from user\n");
1072 ddcb_requ_free(cmd);
1073 return -EFAULT;
1074 }
1075
1076 if (!raw)
1077 rc = genwqe_execute_ddcb(cfile, cmd);
1078 else
1079 rc = __genwqe_execute_raw_ddcb(cd, cmd);
1080
1081 /* Copy back only the modifed fields. Do not copy ASIV
1082 back since the copy got modified by the driver. */
1083 if (copy_to_user((void __user *)arg, cmd,
1084 sizeof(*cmd) - DDCB_ASIV_LENGTH)) {
1085 dev_err(&pci_dev->dev,
1086 "err: could not copy params to user\n");
1087 ddcb_requ_free(cmd);
1088 return -EFAULT;
1089 }
1090
1091 ddcb_requ_free(cmd);
1092 return rc;
1093}
1094
1095/**
1096 * genwqe_ioctl() - IO control
1097 * @filp: file handle
1098 * @cmd: command identifier (passed from user)
1099 * @arg: argument (passed from user)
1100 *
1101 * Return: 0 success
1102 */
1103static long genwqe_ioctl(struct file *filp, unsigned int cmd,
1104 unsigned long arg)
1105{
1106 int rc = 0;
1107 struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
1108 struct genwqe_dev *cd = cfile->cd;
1109 struct genwqe_reg_io __user *io;
1110 u64 val;
1111 u32 reg_offs;
1112 struct pci_dev *pci_dev = cd->pci_dev;
1113
1114 if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE) {
1115 dev_err(&pci_dev->dev, "err: ioctl code does not match!\n");
1116 return -EINVAL;
1117 }
1118
1119 switch (cmd) {
1120
1121 case GENWQE_GET_CARD_STATE:
1122 put_user(cd->card_state, (enum genwqe_card_state __user *)arg);
1123 return 0;
1124
1125 /* Register access */
1126 case GENWQE_READ_REG64: {
1127 io = (struct genwqe_reg_io __user *)arg;
1128
1129 if (get_user(reg_offs, &io->num)) {
1130 dev_err(&pci_dev->dev, "err: reg read64\n");
1131 return -EFAULT;
1132 }
1133 if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
1134 return -EINVAL;
1135
1136 val = __genwqe_readq(cd, reg_offs);
1137 put_user(val, &io->val64);
1138 return 0;
1139 }
1140
1141 case GENWQE_WRITE_REG64: {
1142 io = (struct genwqe_reg_io __user *)arg;
1143
1144 if (!capable(CAP_SYS_ADMIN))
1145 return -EPERM;
1146
1147 if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
1148 return -EPERM;
1149
1150 if (get_user(reg_offs, &io->num)) {
1151 dev_err(&pci_dev->dev, "err: reg write64\n");
1152 return -EFAULT;
1153 }
1154 if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
1155 return -EINVAL;
1156
1157 if (get_user(val, &io->val64)) {
1158 dev_err(&pci_dev->dev, "err: reg write64\n");
1159 return -EFAULT;
1160 }
1161 __genwqe_writeq(cd, reg_offs, val);
1162 return 0;
1163 }
1164
1165 case GENWQE_READ_REG32: {
1166 io = (struct genwqe_reg_io __user *)arg;
1167
1168 if (get_user(reg_offs, &io->num)) {
1169 dev_err(&pci_dev->dev, "err: reg read32\n");
1170 return -EFAULT;
1171 }
1172 if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
1173 return -EINVAL;
1174
1175 val = __genwqe_readl(cd, reg_offs);
1176 put_user(val, &io->val64);
1177 return 0;
1178 }
1179
1180 case GENWQE_WRITE_REG32: {
1181 io = (struct genwqe_reg_io __user *)arg;
1182
1183 if (!capable(CAP_SYS_ADMIN))
1184 return -EPERM;
1185
1186 if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
1187 return -EPERM;
1188
1189 if (get_user(reg_offs, &io->num)) {
1190 dev_err(&pci_dev->dev, "err: reg write32\n");
1191 return -EFAULT;
1192 }
1193 if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
1194 return -EINVAL;
1195
1196 if (get_user(val, &io->val64)) {
1197 dev_err(&pci_dev->dev, "err: reg write32\n");
1198 return -EFAULT;
1199 }
1200 __genwqe_writel(cd, reg_offs, val);
1201 return 0;
1202 }
1203
1204 /* Flash update/reading */
1205 case GENWQE_SLU_UPDATE: {
1206 struct genwqe_bitstream load;
1207
1208 if (!genwqe_is_privileged(cd))
1209 return -EPERM;
1210
1211 if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
1212 return -EPERM;
1213
1214 if (copy_from_user(&load, (void __user *)arg, sizeof(load))) {
1215 dev_err(&pci_dev->dev,
1216 "err: could not copy params from user\n");
1217 return -EFAULT;
1218 }
1219 rc = do_flash_update(cfile, &load);
1220
1221 if (copy_to_user((void __user *)arg, &load, sizeof(load))) {
1222 dev_err(&pci_dev->dev,
1223 "err: could not copy params to user\n");
1224 return -EFAULT;
1225 }
1226 dev_info(&pci_dev->dev, "[%s] rc=%d\n", __func__, rc);
1227 return rc;
1228 }
1229
1230 case GENWQE_SLU_READ: {
1231 struct genwqe_bitstream load;
1232
1233 if (!genwqe_is_privileged(cd))
1234 return -EPERM;
1235
1236 if (genwqe_flash_readback_fails(cd))
1237 return -ENOSPC; /* known to fail for old versions */
1238
1239 if (copy_from_user(&load, (void __user *)arg, sizeof(load))) {
1240 dev_err(&pci_dev->dev,
1241 "err: could not copy params from user\n");
1242 return -EFAULT;
1243 }
1244 rc = do_flash_read(cfile, &load);
1245
1246 if (copy_to_user((void __user *)arg, &load, sizeof(load))) {
1247 dev_err(&pci_dev->dev,
1248 "err: could not copy params to user\n");
1249 return -EFAULT;
1250 }
1251 dev_info(&pci_dev->dev, "[%s] rc=%d\n", __func__, rc);
1252 return rc;
1253 }
1254
1255 /* memory pinning and unpinning */
1256 case GENWQE_PIN_MEM: {
1257 struct genwqe_mem m;
1258
1259 if (copy_from_user(&m, (void __user *)arg, sizeof(m))) {
1260 dev_err(&pci_dev->dev,
1261 "err: could not copy params from user\n");
1262 return -EFAULT;
1263 }
1264 return genwqe_pin_mem(cfile, &m);
1265 }
1266
1267 case GENWQE_UNPIN_MEM: {
1268 struct genwqe_mem m;
1269
1270 if (copy_from_user(&m, (void __user *)arg, sizeof(m))) {
1271 dev_err(&pci_dev->dev,
1272 "err: could not copy params from user\n");
1273 return -EFAULT;
1274 }
1275 return genwqe_unpin_mem(cfile, &m);
1276 }
1277
1278 /* launch an DDCB and wait for completion */
1279 case GENWQE_EXECUTE_DDCB:
1280 return do_execute_ddcb(cfile, arg, 0);
1281
1282 case GENWQE_EXECUTE_RAW_DDCB: {
1283
1284 if (!capable(CAP_SYS_ADMIN)) {
1285 dev_err(&pci_dev->dev,
1286 "err: must be superuser execute raw DDCB!\n");
1287 return -EPERM;
1288 }
1289 return do_execute_ddcb(cfile, arg, 1);
1290 }
1291
1292 default:
1293 pr_err("unknown ioctl %x/%lx**\n", cmd, arg);
1294 return -EINVAL;
1295 }
1296
1297 return rc;
1298}
1299
1300#if defined(CONFIG_COMPAT)
1301/**
1302 * genwqe_compat_ioctl() - Compatibility ioctl
1303 *
1304 * Called whenever a 32-bit process running under a 64-bit kernel
1305 * performs an ioctl on /dev/genwqe<n>_card.
1306 *
1307 * @filp: file pointer.
1308 * @cmd: command.
1309 * @arg: user argument.
1310 * Return: zero on success or negative number on failure.
1311 */
1312static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd,
1313 unsigned long arg)
1314{
1315 return genwqe_ioctl(filp, cmd, arg);
1316}
1317#endif /* defined(CONFIG_COMPAT) */
1318
1319static const struct file_operations genwqe_fops = {
1320 .owner = THIS_MODULE,
1321 .open = genwqe_open,
1322 .fasync = genwqe_fasync,
1323 .mmap = genwqe_mmap,
1324 .unlocked_ioctl = genwqe_ioctl,
1325#if defined(CONFIG_COMPAT)
1326 .compat_ioctl = genwqe_compat_ioctl,
1327#endif
1328 .release = genwqe_release,
1329};
1330
1331static int genwqe_device_initialized(struct genwqe_dev *cd)
1332{
1333 return cd->dev != NULL;
1334}
1335
1336/**
1337 * genwqe_device_create() - Create and configure genwqe char device
1338 * @cd: genwqe device descriptor
1339 *
1340 * This function must be called before we create any more genwqe
1341 * character devices, because it is allocating the major and minor
1342 * number which are supposed to be used by the client drivers.
1343 */
1344int genwqe_device_create(struct genwqe_dev *cd)
1345{
1346 int rc;
1347 struct pci_dev *pci_dev = cd->pci_dev;
1348
1349 /*
1350 * Here starts the individual setup per client. It must
1351 * initialize its own cdev data structure with its own fops.
1352 * The appropriate devnum needs to be created. The ranges must
1353 * not overlap.
1354 */
1355 rc = alloc_chrdev_region(&cd->devnum_genwqe, 0,
1356 GENWQE_MAX_MINOR, GENWQE_DEVNAME);
1357 if (rc < 0) {
1358 dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n");
1359 goto err_dev;
1360 }
1361
1362 cdev_init(&cd->cdev_genwqe, &genwqe_fops);
1363 cd->cdev_genwqe.owner = THIS_MODULE;
1364
1365 rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1);
1366 if (rc < 0) {
1367 dev_err(&pci_dev->dev, "err: cdev_add failed\n");
1368 goto err_add;
1369 }
1370
1371 /*
1372 * Finally the device in /dev/... must be created. The rule is
1373 * to use card%d_clientname for each created device.
1374 */
1375 cd->dev = device_create_with_groups(cd->class_genwqe,
1376 &cd->pci_dev->dev,
1377 cd->devnum_genwqe, cd,
1378 genwqe_attribute_groups,
1379 GENWQE_DEVNAME "%u_card",
1380 cd->card_idx);
1381 if (cd->dev == NULL) {
1382 rc = -ENODEV;
1383 goto err_cdev;
1384 }
1385
1386 rc = genwqe_init_debugfs(cd);
1387 if (rc != 0)
1388 goto err_debugfs;
1389
1390 return 0;
1391
1392 err_debugfs:
1393 device_destroy(cd->class_genwqe, cd->devnum_genwqe);
1394 err_cdev:
1395 cdev_del(&cd->cdev_genwqe);
1396 err_add:
1397 unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
1398 err_dev:
1399 cd->dev = NULL;
1400 return rc;
1401}
1402
1403static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd)
1404{
1405 int rc;
1406 unsigned int i;
1407 struct pci_dev *pci_dev = cd->pci_dev;
1408
1409 if (!genwqe_open_files(cd))
1410 return 0;
1411
1412 dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__);
1413
1414 rc = genwqe_kill_fasync(cd, SIGIO);
1415 if (rc > 0) {
1416 /* give kill_timeout seconds to close file descriptors ... */
1417 for (i = 0; (i < genwqe_kill_timeout) &&
1418 genwqe_open_files(cd); i++) {
1419 dev_info(&pci_dev->dev, " %d sec ...", i);
1420
1421 cond_resched();
1422 msleep(1000);
1423 }
1424
1425 /* if no open files we can safely continue, else ... */
1426 if (!genwqe_open_files(cd))
1427 return 0;
1428
1429 dev_warn(&pci_dev->dev,
1430 "[%s] send SIGKILL and wait ...\n", __func__);
1431
1432 rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */
1433 if (rc) {
1434 /* Give kill_timout more seconds to end processes */
1435 for (i = 0; (i < genwqe_kill_timeout) &&
1436 genwqe_open_files(cd); i++) {
1437 dev_warn(&pci_dev->dev, " %d sec ...", i);
1438
1439 cond_resched();
1440 msleep(1000);
1441 }
1442 }
1443 }
1444 return 0;
1445}
1446
1447/**
1448 * genwqe_device_remove() - Remove genwqe's char device
1449 *
1450 * This function must be called after the client devices are removed
1451 * because it will free the major/minor number range for the genwqe
1452 * drivers.
1453 *
1454 * This function must be robust enough to be called twice.
1455 */
1456int genwqe_device_remove(struct genwqe_dev *cd)
1457{
1458 int rc;
1459 struct pci_dev *pci_dev = cd->pci_dev;
1460
1461 if (!genwqe_device_initialized(cd))
1462 return 1;
1463
1464 genwqe_inform_and_stop_processes(cd);
1465
1466 /*
1467 * We currently do wait until all filedescriptors are
1468 * closed. This leads to a problem when we abort the
1469 * application which will decrease this reference from
1470 * 1/unused to 0/illegal and not from 2/used 1/empty.
1471 */
1472 rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount);
1473 if (rc != 1) {
1474 dev_err(&pci_dev->dev,
1475 "[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc);
1476 panic("Fatal err: cannot free resources with pending references!");
1477 }
1478
1479 genqwe_exit_debugfs(cd);
1480 device_destroy(cd->class_genwqe, cd->devnum_genwqe);
1481 cdev_del(&cd->cdev_genwqe);
1482 unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
1483 cd->dev = NULL;
1484
1485 return 0;
1486}