aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc/genwqe
diff options
context:
space:
mode:
authorFrank Haverkamp <haver@linux.vnet.ibm.com>2013-12-09 07:30:40 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2013-12-18 19:51:15 -0500
commiteaf4722d4645c6b5a0cacd1f7bbe03ab1af14f6b (patch)
treef25a6c74fbbaa9f590b1bbb705982fc5f1de98f8 /drivers/misc/genwqe
parent12eb4683251ebfb12afba9c56556fade7a3d7372 (diff)
GenWQE Character device and DDCB queue
The GenWQE card itself provides access to a generic work queue into which the work can be put, which should be executed, e.g. compression or decompression request, or whatever the card was configured to do. Each request comes with a set of input data (ASV) and will produce some output data (ASIV). The request will also contain a sequence number, some timestamps and a command code/subcode plus some fields for hardware-/ software-interaction. A request can contain references to blocks of memory. Since the card requires DMA-addresses of that memory, the driver provides two ways to solve that task: 1) The drivers mmap() will allocate some DMAable memory for the user. The driver has a lookup table such that the virtual userspace address can properly be replaced and checked. 2) The user allocates memory and the driver will pin/unpin that memory and setup a scatter gatherlist with matching DMA addresses. Currently work requests are synchronous. Signed-off-by: Frank Haverkamp <haver@linux.vnet.ibm.com> Co-authors: Joerg-Stephan Vogt <jsvogt@de.ibm.com>, Michael Jung <MIJUNG@de.ibm.com>, Michael Ruettger <michael@ibmra.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc/genwqe')
-rw-r--r--drivers/misc/genwqe/card_ddcb.c1373
-rw-r--r--drivers/misc/genwqe/card_ddcb.h188
-rw-r--r--drivers/misc/genwqe/card_dev.c1486
3 files changed, 3047 insertions, 0 deletions
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
new file mode 100644
index 000000000000..cc6fca7a4851
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -0,0 +1,1373 @@
1/**
2 * IBM Accelerator Family 'GenWQE'
3 *
4 * (C) Copyright IBM Corp. 2013
5 *
6 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
7 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
8 * Author: Michael Jung <mijung@de.ibm.com>
9 * Author: Michael Ruettger <michael@ibmra.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License (version 2 only)
13 * as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
20
21/*
22 * Device Driver Control Block (DDCB) queue support. Definition of
23 * interrupt handlers for queue support as well as triggering the
24 * health monitor code in case of problems. The current hardware uses
25 * an MSI interrupt which is shared between error handling and
26 * functional code.
27 */
28
29#include <linux/types.h>
30#include <linux/module.h>
31#include <linux/sched.h>
32#include <linux/wait.h>
33#include <linux/pci.h>
34#include <linux/string.h>
35#include <linux/dma-mapping.h>
36#include <linux/delay.h>
37#include <linux/module.h>
38#include <linux/interrupt.h>
39#include <linux/crc-itu-t.h>
40
41#include "card_ddcb.h"
42
43/*
44 * N: next DDCB, this is where the next DDCB will be put.
45 * A: active DDCB, this is where the code will look for the next completion.
46 * x: DDCB is enqueued, we are waiting for its completion.
47
48 * Situation (1): Empty queue
49 * +---+---+---+---+---+---+---+---+
50 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
51 * | | | | | | | | |
52 * +---+---+---+---+---+---+---+---+
53 * A/N
54 * enqueued_ddcbs = A - N = 2 - 2 = 0
55 *
56 * Situation (2): Wrapped, N > A
57 * +---+---+---+---+---+---+---+---+
58 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
59 * | | | x | x | | | | |
60 * +---+---+---+---+---+---+---+---+
61 * A N
62 * enqueued_ddcbs = N - A = 4 - 2 = 2
63 *
64 * Situation (3): Queue wrapped, A > N
65 * +---+---+---+---+---+---+---+---+
66 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
67 * | x | x | | | x | x | x | x |
68 * +---+---+---+---+---+---+---+---+
69 * N A
70 * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 2) = 6
71 *
72 * Situation (4a): Queue full N > A
73 * +---+---+---+---+---+---+---+---+
74 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
75 * | x | x | x | x | x | x | x | |
76 * +---+---+---+---+---+---+---+---+
77 * A N
78 *
79 * enqueued_ddcbs = N - A = 7 - 0 = 7
80 *
81 * Situation (4a): Queue full A > N
82 * +---+---+---+---+---+---+---+---+
83 * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
84 * | x | x | x | | x | x | x | x |
85 * +---+---+---+---+---+---+---+---+
86 * N A
87 * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7
88 */
89
90static int queue_empty(struct ddcb_queue *queue)
91{
92 return queue->ddcb_next == queue->ddcb_act;
93}
94
95static int queue_enqueued_ddcbs(struct ddcb_queue *queue)
96{
97 if (queue->ddcb_next >= queue->ddcb_act)
98 return queue->ddcb_next - queue->ddcb_act;
99
100 return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next);
101}
102
103static int queue_free_ddcbs(struct ddcb_queue *queue)
104{
105 int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1;
106
107 if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */
108 return 0;
109 }
110 return free_ddcbs;
111}
112
113/*
114 * Use of the PRIV field in the DDCB for queue debugging:
115 *
116 * (1) Trying to get rid of a DDCB which saw a timeout:
117 * pddcb->priv[6] = 0xcc; # cleared
118 *
119 * (2) Append a DDCB via NEXT bit:
120 * pddcb->priv[7] = 0xaa; # appended
121 *
122 * (3) DDCB needed tapping:
123 * pddcb->priv[7] = 0xbb; # tapped
124 *
125 * (4) DDCB marked as correctly finished:
126 * pddcb->priv[6] = 0xff; # finished
127 */
128
129static inline void ddcb_mark_tapped(struct ddcb *pddcb)
130{
131 pddcb->priv[7] = 0xbb; /* tapped */
132}
133
134static inline void ddcb_mark_appended(struct ddcb *pddcb)
135{
136 pddcb->priv[7] = 0xaa; /* appended */
137}
138
139static inline void ddcb_mark_cleared(struct ddcb *pddcb)
140{
141 pddcb->priv[6] = 0xcc; /* cleared */
142}
143
144static inline void ddcb_mark_finished(struct ddcb *pddcb)
145{
146 pddcb->priv[6] = 0xff; /* finished */
147}
148
149static inline void ddcb_mark_unused(struct ddcb *pddcb)
150{
151 pddcb->priv_64 = cpu_to_be64(0); /* not tapped */
152}
153
154/**
155 * genwqe_crc16() - Generate 16-bit crc as required for DDCBs
156 * @buff: pointer to data buffer
157 * @len: length of data for calculation
158 * @init: initial crc (0xffff at start)
159 *
160 * Polynomial = x^16 + x^12 + x^5 + 1 (0x1021)
161 * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff
162 * should result in a crc16 of 0x89c3
163 *
164 * Return: crc16 checksum in big endian format !
165 */
166static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init)
167{
168 return crc_itu_t(init, buff, len);
169}
170
171static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue)
172{
173 int i;
174 struct ddcb *pddcb;
175 unsigned long flags;
176 struct pci_dev *pci_dev = cd->pci_dev;
177
178 spin_lock_irqsave(&cd->print_lock, flags);
179
180 dev_info(&pci_dev->dev,
181 "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n",
182 cd->card_idx, queue->ddcb_act, queue->ddcb_next);
183
184 pddcb = queue->ddcb_vaddr;
185 for (i = 0; i < queue->ddcb_max; i++) {
186 dev_err(&pci_dev->dev,
187 " %c %-3d: RETC=%03x SEQ=%04x "
188 "HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n",
189 i == queue->ddcb_act ? '>' : ' ',
190 i,
191 be16_to_cpu(pddcb->retc_16),
192 be16_to_cpu(pddcb->seqnum_16),
193 pddcb->hsi,
194 pddcb->shi,
195 be64_to_cpu(pddcb->priv_64),
196 pddcb->cmd);
197 pddcb++;
198 }
199 spin_unlock_irqrestore(&cd->print_lock, flags);
200}
201
202struct genwqe_ddcb_cmd *ddcb_requ_alloc(void)
203{
204 struct ddcb_requ *req;
205
206 req = kzalloc(sizeof(*req), GFP_ATOMIC);
207 if (!req)
208 return NULL;
209
210 return &req->cmd;
211}
212
213void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd)
214{
215 struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
216 kfree(req);
217}
218
219static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req)
220{
221 return req->req_state;
222}
223
224static inline void ddcb_requ_set_state(struct ddcb_requ *req,
225 enum genwqe_requ_state new_state)
226{
227 req->req_state = new_state;
228}
229
230static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req)
231{
232 return req->cmd.ddata_addr != 0x0;
233}
234
235/**
236 * ddcb_requ_finished() - Returns the hardware state of the associated DDCB
237 * @cd: pointer to genwqe device descriptor
238 * @req: DDCB work request
239 *
240 * Status of ddcb_requ mirrors this hardware state, but is copied in
241 * the ddcb_requ on interrupt/polling function. The lowlevel code
242 * should check the hardware state directly, the higher level code
243 * should check the copy.
244 *
245 * This function will also return true if the state of the queue is
246 * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the
247 * shutdown case.
248 */
249static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req)
250{
251 return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) ||
252 (cd->card_state != GENWQE_CARD_USED);
253}
254
255/**
256 * enqueue_ddcb() - Enqueue a DDCB
257 * @cd: pointer to genwqe device descriptor
258 * @queue: queue this operation should be done on
259 * @ddcb_no: pointer to ddcb number being tapped
260 *
261 * Start execution of DDCB by tapping or append to queue via NEXT
262 * bit. This is done by an atomic 'compare and swap' instruction and
263 * checking SHI and HSI of the previous DDCB.
264 *
265 * This function must only be called with ddcb_lock held.
266 *
267 * Return: 1 if new DDCB is appended to previous
268 * 2 if DDCB queue is tapped via register/simulation
269 */
270#define RET_DDCB_APPENDED 1
271#define RET_DDCB_TAPPED 2
272
273static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue,
274 struct ddcb *pddcb, int ddcb_no)
275{
276 unsigned int try;
277 int prev_no;
278 struct ddcb *prev_ddcb;
279 u32 old, new, icrc_hsi_shi;
280 u64 num;
281
282 /*
283 * For performance checks a Dispatch Timestamp can be put into
284 * DDCB It is supposed to use the SLU's free running counter,
285 * but this requires PCIe cycles.
286 */
287 ddcb_mark_unused(pddcb);
288
289 /* check previous DDCB if already fetched */
290 prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1;
291 prev_ddcb = &queue->ddcb_vaddr[prev_no];
292
293 /*
294 * It might have happened that the HSI.FETCHED bit is
295 * set. Retry in this case. Therefore I expect maximum 2 times
296 * trying.
297 */
298 ddcb_mark_appended(pddcb);
299 for (try = 0; try < 2; try++) {
300 old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
301
302 /* try to append via NEXT bit if prev DDCB is not completed */
303 if ((old & DDCB_COMPLETED_BE32) != 0x00000000)
304 break;
305
306 new = (old | DDCB_NEXT_BE32);
307 icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new);
308
309 if (icrc_hsi_shi == old)
310 return RET_DDCB_APPENDED; /* appended to queue */
311 }
312
313 /* Queue must be re-started by updating QUEUE_OFFSET */
314 ddcb_mark_tapped(pddcb);
315 num = (u64)ddcb_no << 8;
316 __genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */
317
318 return RET_DDCB_TAPPED;
319}
320
321/**
322 * copy_ddcb_results() - Copy output state from real DDCB to request
323 *
324 * Copy DDCB ASV to request struct. There is no endian
325 * conversion made, since data structure in ASV is still
326 * unknown here.
327 *
328 * This is needed by:
329 * - genwqe_purge_ddcb()
330 * - genwqe_check_ddcb_queue()
331 */
332static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no)
333{
334 struct ddcb_queue *queue = req->queue;
335 struct ddcb *pddcb = &queue->ddcb_vaddr[req->num];
336
337 memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH);
338
339 /* copy status flags of the variant part */
340 req->cmd.vcrc = be16_to_cpu(pddcb->vcrc_16);
341 req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64);
342 req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64);
343
344 req->cmd.attn = be16_to_cpu(pddcb->attn_16);
345 req->cmd.progress = be32_to_cpu(pddcb->progress_32);
346 req->cmd.retc = be16_to_cpu(pddcb->retc_16);
347
348 if (ddcb_requ_collect_debug_data(req)) {
349 int prev_no = (ddcb_no == 0) ?
350 queue->ddcb_max - 1 : ddcb_no - 1;
351 struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no];
352
353 memcpy(&req->debug_data.ddcb_finished, pddcb,
354 sizeof(req->debug_data.ddcb_finished));
355 memcpy(&req->debug_data.ddcb_prev, prev_pddcb,
356 sizeof(req->debug_data.ddcb_prev));
357 }
358}
359
360/**
361 * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests.
362 * @cd: pointer to genwqe device descriptor
363 *
364 * Return: Number of DDCBs which were finished
365 */
366static int genwqe_check_ddcb_queue(struct genwqe_dev *cd,
367 struct ddcb_queue *queue)
368{
369 unsigned long flags;
370 int ddcbs_finished = 0;
371 struct pci_dev *pci_dev = cd->pci_dev;
372
373 spin_lock_irqsave(&queue->ddcb_lock, flags);
374
375 /* FIXME avoid soft locking CPU */
376 while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) {
377
378 struct ddcb *pddcb;
379 struct ddcb_requ *req;
380 u16 vcrc, vcrc_16, retc_16;
381
382 pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
383
384 if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) ==
385 0x00000000)
386 goto go_home; /* not completed, continue waiting */
387
388 /* Note: DDCB could be purged */
389
390 req = queue->ddcb_req[queue->ddcb_act];
391 if (req == NULL) {
392 /* this occurs if DDCB is purged, not an error */
393 /* Move active DDCB further; Nothing to do anymore. */
394 goto pick_next_one;
395 }
396
397 /*
398 * HSI=0x44 (fetched and completed), but RETC is
399 * 0x101, or even worse 0x000.
400 *
401 * In case of seeing the queue in inconsistent state
402 * we read the errcnts and the queue status to provide
403 * a trigger for our PCIe analyzer stop capturing.
404 */
405 retc_16 = be16_to_cpu(pddcb->retc_16);
406 if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) {
407 u64 errcnts, status;
408 u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr;
409
410 errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS);
411 status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
412
413 dev_err(&pci_dev->dev,
414 "[%s] SEQN=%04x HSI=%02x RETC=%03x "
415 " Q_ERRCNTS=%016llx Q_STATUS=%016llx\n"
416 " DDCB_DMA_ADDR=%016llx\n",
417 __func__, be16_to_cpu(pddcb->seqnum_16),
418 pddcb->hsi, retc_16, errcnts, status,
419 queue->ddcb_daddr + ddcb_offs);
420 }
421
422 copy_ddcb_results(req, queue->ddcb_act);
423 queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */
424
425 dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num);
426 genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
427
428 ddcb_mark_finished(pddcb);
429
430 /* calculate CRC_16 to see if VCRC is correct */
431 vcrc = genwqe_crc16(pddcb->asv,
432 VCRC_LENGTH(req->cmd.asv_length),
433 0xffff);
434 vcrc_16 = be16_to_cpu(pddcb->vcrc_16);
435 if (vcrc != vcrc_16) {
436 printk_ratelimited(KERN_ERR
437 "%s %s: err: wrong VCRC pre=%02x vcrc_len=%d "
438 "bytes vcrc_data=%04x is not vcrc_card=%04x\n",
439 GENWQE_DEVNAME, dev_name(&pci_dev->dev),
440 pddcb->pre, VCRC_LENGTH(req->cmd.asv_length),
441 vcrc, vcrc_16);
442 }
443
444 ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
445 queue->ddcbs_completed++;
446 queue->ddcbs_in_flight--;
447
448 /* wake up process waiting for this DDCB */
449 wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
450
451pick_next_one:
452 queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max;
453 ddcbs_finished++;
454 }
455
456 go_home:
457 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
458 return ddcbs_finished;
459}
460
461/**
462 * __genwqe_wait_ddcb(): Waits until DDCB is completed
463 * @cd: pointer to genwqe device descriptor
464 * @req: pointer to requsted DDCB parameters
465 *
466 * The Service Layer will update the RETC in DDCB when processing is
467 * pending or done.
468 *
469 * Return: > 0 remaining jiffies, DDCB completed
470 * -ETIMEDOUT when timeout
471 * -ERESTARTSYS when ^C
472 * -EINVAL when unknown error condition
473 *
474 * When an error is returned the called needs to ensure that
475 * purge_ddcb() is being called to get the &req removed from the
476 * queue.
477 */
478int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
479{
480 int rc;
481 unsigned int ddcb_no;
482 struct ddcb_queue *queue;
483 struct pci_dev *pci_dev = cd->pci_dev;
484
485 if (req == NULL)
486 return -EINVAL;
487
488 queue = req->queue;
489 if (queue == NULL)
490 return -EINVAL;
491
492 ddcb_no = req->num;
493 if (ddcb_no >= queue->ddcb_max)
494 return -EINVAL;
495
496 rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no],
497 ddcb_requ_finished(cd, req),
498 genwqe_ddcb_software_timeout * HZ);
499
500 /*
501 * We need to distinguish 3 cases here:
502 * 1. rc == 0 timeout occured
503 * 2. rc == -ERESTARTSYS signal received
504 * 3. rc > 0 remaining jiffies condition is true
505 */
506 if (rc == 0) {
507 struct ddcb_queue *queue = req->queue;
508 struct ddcb *pddcb;
509
510 /*
511 * Timeout may be caused by long task switching time.
512 * When timeout happens, check if the request has
513 * meanwhile completed.
514 */
515 genwqe_check_ddcb_queue(cd, req->queue);
516 if (ddcb_requ_finished(cd, req))
517 return rc;
518
519 dev_err(&pci_dev->dev,
520 "[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n",
521 __func__, req->num, rc, ddcb_requ_get_state(req),
522 req);
523 dev_err(&pci_dev->dev,
524 "[%s] IO_QUEUE_STATUS=0x%016llx\n", __func__,
525 __genwqe_readq(cd, queue->IO_QUEUE_STATUS));
526
527 pddcb = &queue->ddcb_vaddr[req->num];
528 genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
529
530 print_ddcb_info(cd, req->queue);
531 return -ETIMEDOUT;
532
533 } else if (rc == -ERESTARTSYS) {
534 return rc;
535 /*
536 * EINTR: Stops the application
537 * ERESTARTSYS: Restartable systemcall; called again
538 */
539
540 } else if (rc < 0) {
541 dev_err(&pci_dev->dev,
542 "[%s] err: DDCB#%d unknown result (rc=%d) %d!\n",
543 __func__, req->num, rc, ddcb_requ_get_state(req));
544 return -EINVAL;
545 }
546
547 /* Severe error occured. Driver is forced to stop operation */
548 if (cd->card_state != GENWQE_CARD_USED) {
549 dev_err(&pci_dev->dev,
550 "[%s] err: DDCB#%d forced to stop (rc=%d)\n",
551 __func__, req->num, rc);
552 return -EIO;
553 }
554 return rc;
555}
556
557/**
558 * get_next_ddcb() - Get next available DDCB
559 * @cd: pointer to genwqe device descriptor
560 *
561 * DDCB's content is completely cleared but presets for PRE and
562 * SEQNUM. This function must only be called when ddcb_lock is held.
563 *
564 * Return: NULL if no empty DDCB available otherwise ptr to next DDCB.
565 */
566static struct ddcb *get_next_ddcb(struct genwqe_dev *cd,
567 struct ddcb_queue *queue,
568 int *num)
569{
570 u64 *pu64;
571 struct ddcb *pddcb;
572
573 if (queue_free_ddcbs(queue) == 0) /* queue is full */
574 return NULL;
575
576 /* find new ddcb */
577 pddcb = &queue->ddcb_vaddr[queue->ddcb_next];
578
579 /* if it is not completed, we are not allowed to use it */
580 /* barrier(); */
581 if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000)
582 return NULL;
583
584 *num = queue->ddcb_next; /* internal DDCB number */
585 queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max;
586
587 /* clear important DDCB fields */
588 pu64 = (u64 *)pddcb;
589 pu64[0] = 0ULL; /* offs 0x00 (ICRC,HSI,SHI,...) */
590 pu64[1] = 0ULL; /* offs 0x01 (ACFUNC,CMD...) */
591
592 /* destroy previous results in ASV */
593 pu64[0x80/8] = 0ULL; /* offs 0x80 (ASV + 0) */
594 pu64[0x88/8] = 0ULL; /* offs 0x88 (ASV + 0x08) */
595 pu64[0x90/8] = 0ULL; /* offs 0x90 (ASV + 0x10) */
596 pu64[0x98/8] = 0ULL; /* offs 0x98 (ASV + 0x18) */
597 pu64[0xd0/8] = 0ULL; /* offs 0xd0 (RETC,ATTN...) */
598
599 pddcb->pre = DDCB_PRESET_PRE; /* 128 */
600 pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++);
601 return pddcb;
602}
603
604/**
605 * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue
606 * @cd: genwqe device descriptor
607 * @req: DDCB request
608 *
609 * This will fail when the request was already FETCHED. In this case
610 * we need to wait until it is finished. Else the DDCB can be
611 * reused. This function also ensures that the request data structure
612 * is removed from ddcb_req[].
613 *
614 * Do not forget to call this function when genwqe_wait_ddcb() fails,
615 * such that the request gets really removed from ddcb_req[].
616 *
617 * Return: 0 success
618 */
619int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
620{
621 struct ddcb *pddcb = NULL;
622 unsigned int t;
623 unsigned long flags;
624 struct ddcb_queue *queue = req->queue;
625 struct pci_dev *pci_dev = cd->pci_dev;
626 u32 icrc_hsi_shi = 0x0000;
627 u64 queue_status;
628 u32 old, new;
629
630 /* unsigned long flags; */
631 if (genwqe_ddcb_software_timeout <= 0) {
632 dev_err(&pci_dev->dev,
633 "[%s] err: software timeout is not set!\n", __func__);
634 return -EFAULT;
635 }
636
637 pddcb = &queue->ddcb_vaddr[req->num];
638
639 for (t = 0; t < genwqe_ddcb_software_timeout * 10; t++) {
640
641 spin_lock_irqsave(&queue->ddcb_lock, flags);
642
643 /* Check if req was meanwhile finished */
644 if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED)
645 goto go_home;
646
647 /* try to set PURGE bit if FETCHED/COMPLETED are not set */
648 old = pddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */
649 if ((old & DDCB_FETCHED_BE32) == 0x00000000) {
650
651 new = (old | DDCB_PURGE_BE32);
652 icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32,
653 old, new);
654 if (icrc_hsi_shi == old)
655 goto finish_ddcb;
656 }
657
658 /* normal finish with HSI bit */
659 barrier();
660 icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
661 if (icrc_hsi_shi & DDCB_COMPLETED_BE32)
662 goto finish_ddcb;
663
664 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
665
666 /*
667 * Here the check_ddcb() function will most likely
668 * discover this DDCB to be finished some point in
669 * time. It will mark the req finished and free it up
670 * in the list.
671 */
672
673 copy_ddcb_results(req, req->num); /* for the failing case */
674 msleep(100); /* sleep for 1/10 second and try again */
675 continue;
676
677finish_ddcb:
678 copy_ddcb_results(req, req->num);
679 ddcb_requ_set_state(req, GENWQE_REQU_FINISHED);
680 queue->ddcbs_in_flight--;
681 queue->ddcb_req[req->num] = NULL; /* delete from array */
682 ddcb_mark_cleared(pddcb);
683
684 /* Move active DDCB further; Nothing to do here anymore. */
685
686 /*
687 * We need to ensure that there is at least one free
688 * DDCB in the queue. To do that, we must update
689 * ddcb_act only if the COMPLETED bit is set for the
690 * DDCB we are working on else we treat that DDCB even
691 * if we PURGED it as occupied (hardware is supposed
692 * to set the COMPLETED bit yet!).
693 */
694 icrc_hsi_shi = pddcb->icrc_hsi_shi_32;
695 if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) &&
696 (queue->ddcb_act == req->num)) {
697 queue->ddcb_act = ((queue->ddcb_act + 1) %
698 queue->ddcb_max);
699 }
700go_home:
701 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
702 return 0;
703 }
704
705 /*
706 * If the card is dead and the queue is forced to stop, we
707 * might see this in the queue status register.
708 */
709 queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS);
710
711 dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num);
712 genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
713
714 dev_err(&pci_dev->dev,
715 "[%s] err: DDCB#%d not purged and not completed "
716 "after %d seconds QSTAT=%016llx!!\n",
717 __func__, req->num, genwqe_ddcb_software_timeout,
718 queue_status);
719
720 print_ddcb_info(cd, req->queue);
721
722 return -EFAULT;
723}
724
725int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d)
726{
727 int len;
728 struct pci_dev *pci_dev = cd->pci_dev;
729
730 if (d == NULL) {
731 dev_err(&pci_dev->dev,
732 "[%s] err: invalid memory for debug data!\n",
733 __func__);
734 return -EFAULT;
735 }
736
737 len = sizeof(d->driver_version);
738 snprintf(d->driver_version, len, "%s", DRV_VERS_STRING);
739 d->slu_unitcfg = cd->slu_unitcfg;
740 d->app_unitcfg = cd->app_unitcfg;
741 return 0;
742}
743
744/**
745 * __genwqe_enqueue_ddcb() - Enqueue a DDCB
746 * @cd: pointer to genwqe device descriptor
747 * @req: pointer to DDCB execution request
748 *
749 * Return: 0 if enqueuing succeeded
750 * -EIO if card is unusable/PCIe problems
751 * -EBUSY if enqueuing failed
752 */
753int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req)
754{
755 struct ddcb *pddcb;
756 unsigned long flags;
757 struct ddcb_queue *queue;
758 struct pci_dev *pci_dev = cd->pci_dev;
759 u16 icrc;
760
761 if (cd->card_state != GENWQE_CARD_USED) {
762 printk_ratelimited(KERN_ERR
763 "%s %s: [%s] Card is unusable/PCIe problem Req#%d\n",
764 GENWQE_DEVNAME, dev_name(&pci_dev->dev),
765 __func__, req->num);
766 return -EIO;
767 }
768
769 queue = req->queue = &cd->queue;
770
771 /* FIXME circumvention to improve performance when no irq is
772 * there.
773 */
774 if (genwqe_polling_enabled)
775 genwqe_check_ddcb_queue(cd, queue);
776
777 /*
778 * It must be ensured to process all DDCBs in successive
779 * order. Use a lock here in order to prevent nested DDCB
780 * enqueuing.
781 */
782 spin_lock_irqsave(&queue->ddcb_lock, flags);
783
784 pddcb = get_next_ddcb(cd, queue, &req->num); /* get ptr and num */
785 if (pddcb == NULL) {
786 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
787 queue->busy++;
788 return -EBUSY;
789 }
790
791 if (queue->ddcb_req[req->num] != NULL) {
792 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
793
794 dev_err(&pci_dev->dev,
795 "[%s] picked DDCB %d with req=%p still in use!!\n",
796 __func__, req->num, req);
797 return -EFAULT;
798 }
799 ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED);
800 queue->ddcb_req[req->num] = req;
801
802 pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts);
803 pddcb->cmd = req->cmd.cmd;
804 pddcb->acfunc = req->cmd.acfunc; /* functional unit */
805
806 /*
807 * We know that we can get retc 0x104 with CRC error, do not
808 * stop the queue in those cases for this command. XDIR = 1
809 * does not work for old SLU versions.
810 *
811 * Last bitstream with the old XDIR behavior had SLU_ID
812 * 0x34199.
813 */
814 if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull)
815 pddcb->xdir = 0x1;
816 else
817 pddcb->xdir = 0x0;
818
819
820 pddcb->psp = (((req->cmd.asiv_length / 8) << 4) |
821 ((req->cmd.asv_length / 8)));
822 pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts);
823
824 /*
825 * If copying the whole DDCB_ASIV_LENGTH is impacting
826 * performance we need to change it to
827 * req->cmd.asiv_length. But simulation benefits from some
828 * non-architectured bits behind the architectured content.
829 *
830 * How much data is copied depends on the availability of the
831 * ATS field, which was introduced late. If the ATS field is
832 * supported ASIV is 8 bytes shorter than it used to be. Since
833 * the ATS field is copied too, the code should do exactly
834 * what it did before, but I wanted to make copying of the ATS
835 * field very explicit.
836 */
837 if (genwqe_get_slu_id(cd) <= 0x2) {
838 memcpy(&pddcb->__asiv[0], /* destination */
839 &req->cmd.__asiv[0], /* source */
840 DDCB_ASIV_LENGTH); /* req->cmd.asiv_length */
841 } else {
842 pddcb->n.ats_64 = req->cmd.ats;
843 memcpy(&pddcb->n.asiv[0], /* destination */
844 &req->cmd.asiv[0], /* source */
845 DDCB_ASIV_LENGTH_ATS); /* req->cmd.asiv_length */
846 }
847
848 pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */
849
850 /*
851 * Calculate CRC_16 for corresponding range PSP(7:4). Include
852 * empty 4 bytes prior to the data.
853 */
854 icrc = genwqe_crc16((const u8 *)pddcb,
855 ICRC_LENGTH(req->cmd.asiv_length), 0xffff);
856 pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16);
857
858 /* enable DDCB completion irq */
859 if (!genwqe_polling_enabled)
860 pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32;
861
862 dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num);
863 genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb));
864
865 if (ddcb_requ_collect_debug_data(req)) {
866 /* use the kernel copy of debug data. copying back to
867 user buffer happens later */
868
869 genwqe_init_debug_data(cd, &req->debug_data);
870 memcpy(&req->debug_data.ddcb_before, pddcb,
871 sizeof(req->debug_data.ddcb_before));
872 }
873
874 enqueue_ddcb(cd, queue, pddcb, req->num);
875 queue->ddcbs_in_flight++;
876
877 if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight)
878 queue->ddcbs_max_in_flight = queue->ddcbs_in_flight;
879
880 ddcb_requ_set_state(req, GENWQE_REQU_TAPPED);
881 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
882 wake_up_interruptible(&cd->queue_waitq);
883
884 return 0;
885}
886
887/**
888 * __genwqe_execute_raw_ddcb() - Setup and execute DDCB
889 * @cd: pointer to genwqe device descriptor
890 * @req: user provided DDCB request
891 */
892int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd,
893 struct genwqe_ddcb_cmd *cmd)
894{
895 int rc = 0;
896 struct pci_dev *pci_dev = cd->pci_dev;
897 struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
898
899 if (cmd->asiv_length > DDCB_ASIV_LENGTH) {
900 dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n",
901 __func__, cmd->asiv_length);
902 return -EINVAL;
903 }
904 if (cmd->asv_length > DDCB_ASV_LENGTH) {
905 dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n",
906 __func__, cmd->asiv_length);
907 return -EINVAL;
908 }
909 rc = __genwqe_enqueue_ddcb(cd, req);
910 if (rc != 0)
911 return rc;
912
913 rc = __genwqe_wait_ddcb(cd, req);
914 if (rc < 0) /* error or signal interrupt */
915 goto err_exit;
916
917 if (ddcb_requ_collect_debug_data(req)) {
918 if (copy_to_user((void __user *)cmd->ddata_addr,
919 &req->debug_data,
920 sizeof(struct genwqe_debug_data)))
921 return -EFAULT;
922 }
923
924 /*
925 * Higher values than 0x102 indicate completion with faults,
926 * lower values than 0x102 indicate processing faults. Note
927 * that DDCB might have been purged. E.g. Cntl+C.
928 */
929 if (cmd->retc != DDCB_RETC_COMPLETE) {
930 /* This might happen e.g. flash read, and needs to be
931 handled by the upper layer code. */
932 rc = -EBADMSG; /* not processed/error retc */
933 }
934
935 return rc;
936
937 err_exit:
938 __genwqe_purge_ddcb(cd, req);
939
940 if (ddcb_requ_collect_debug_data(req)) {
941 if (copy_to_user((void __user *)cmd->ddata_addr,
942 &req->debug_data,
943 sizeof(struct genwqe_debug_data)))
944 return -EFAULT;
945 }
946 return rc;
947}
948
949/**
950 * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished
951 *
952 * We use this as condition for our wait-queue code.
953 */
954static int genwqe_next_ddcb_ready(struct genwqe_dev *cd)
955{
956 unsigned long flags;
957 struct ddcb *pddcb;
958 struct ddcb_queue *queue = &cd->queue;
959
960 spin_lock_irqsave(&queue->ddcb_lock, flags);
961
962 if (queue_empty(queue)) { /* emtpy queue */
963 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
964 return 0;
965 }
966
967 pddcb = &queue->ddcb_vaddr[queue->ddcb_act];
968 if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */
969 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
970 return 1;
971 }
972
973 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
974 return 0;
975}
976
977/**
978 * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight
979 *
980 * Keep track on the number of DDCBs which ware currently in the
981 * queue. This is needed for statistics as well as conditon if we want
982 * to wait or better do polling in case of no interrupts available.
983 */
984int genwqe_ddcbs_in_flight(struct genwqe_dev *cd)
985{
986 unsigned long flags;
987 int ddcbs_in_flight = 0;
988 struct ddcb_queue *queue = &cd->queue;
989
990 spin_lock_irqsave(&queue->ddcb_lock, flags);
991 ddcbs_in_flight += queue->ddcbs_in_flight;
992 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
993
994 return ddcbs_in_flight;
995}
996
997static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
998{
999 int rc, i;
1000 struct ddcb *pddcb;
1001 u64 val64;
1002 unsigned int queue_size;
1003 struct pci_dev *pci_dev = cd->pci_dev;
1004
1005 if (genwqe_ddcb_max < 2)
1006 return -EINVAL;
1007
1008 queue_size = roundup(genwqe_ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
1009
1010 queue->ddcbs_in_flight = 0; /* statistics */
1011 queue->ddcbs_max_in_flight = 0;
1012 queue->ddcbs_completed = 0;
1013 queue->busy = 0;
1014
1015 queue->ddcb_seq = 0x100; /* start sequence number */
1016 queue->ddcb_max = genwqe_ddcb_max; /* module parameter */
1017 queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size,
1018 &queue->ddcb_daddr);
1019 if (queue->ddcb_vaddr == NULL) {
1020 dev_err(&pci_dev->dev,
1021 "[%s] **err: could not allocate DDCB **\n", __func__);
1022 return -ENOMEM;
1023 }
1024 memset(queue->ddcb_vaddr, 0, queue_size);
1025
1026 queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) *
1027 queue->ddcb_max, GFP_KERNEL);
1028 if (!queue->ddcb_req) {
1029 rc = -ENOMEM;
1030 goto free_ddcbs;
1031 }
1032
1033 queue->ddcb_waitqs = kzalloc(sizeof(wait_queue_head_t) *
1034 queue->ddcb_max, GFP_KERNEL);
1035 if (!queue->ddcb_waitqs) {
1036 rc = -ENOMEM;
1037 goto free_requs;
1038 }
1039
1040 for (i = 0; i < queue->ddcb_max; i++) {
1041 pddcb = &queue->ddcb_vaddr[i]; /* DDCBs */
1042 pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32;
1043 pddcb->retc_16 = cpu_to_be16(0xfff);
1044
1045 queue->ddcb_req[i] = NULL; /* requests */
1046 init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */
1047 }
1048
1049 queue->ddcb_act = 0;
1050 queue->ddcb_next = 0; /* queue is empty */
1051
1052 spin_lock_init(&queue->ddcb_lock);
1053 init_waitqueue_head(&queue->ddcb_waitq);
1054
1055 val64 = ((u64)(queue->ddcb_max - 1) << 8); /* lastptr */
1056 __genwqe_writeq(cd, queue->IO_QUEUE_CONFIG, 0x07); /* iCRC/vCRC */
1057 __genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr);
1058 __genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq);
1059 __genwqe_writeq(cd, queue->IO_QUEUE_WRAP, val64);
1060 return 0;
1061
1062 free_requs:
1063 kfree(queue->ddcb_req);
1064 queue->ddcb_req = NULL;
1065 free_ddcbs:
1066 __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
1067 queue->ddcb_daddr);
1068 queue->ddcb_vaddr = NULL;
1069 queue->ddcb_daddr = 0ull;
1070 return -ENODEV;
1071
1072}
1073
1074static int ddcb_queue_initialized(struct ddcb_queue *queue)
1075{
1076 return queue->ddcb_vaddr != NULL;
1077}
1078
1079static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue)
1080{
1081 unsigned int queue_size;
1082
1083 queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE);
1084
1085 kfree(queue->ddcb_req);
1086 queue->ddcb_req = NULL;
1087
1088 if (queue->ddcb_vaddr) {
1089 __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr,
1090 queue->ddcb_daddr);
1091 queue->ddcb_vaddr = NULL;
1092 queue->ddcb_daddr = 0ull;
1093 }
1094}
1095
1096static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
1097{
1098 u64 gfir;
1099 struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
1100 struct pci_dev *pci_dev = cd->pci_dev;
1101
1102 /*
1103 * In case of fatal FIR error the queue is stopped, such that
1104 * we can safely check it without risking anything.
1105 */
1106 cd->irqs_processed++;
1107 wake_up_interruptible(&cd->queue_waitq);
1108
1109 /*
1110 * Checking for errors before kicking the queue might be
1111 * safer, but slower for the good-case ... See above.
1112 */
1113 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
1114 if ((gfir & GFIR_ERR_TRIGGER) != 0x0) {
1115
1116 wake_up_interruptible(&cd->health_waitq);
1117
1118 /*
1119 * By default GFIRs causes recovery actions. This
1120 * count is just for debug when recovery is masked.
1121 */
1122 printk_ratelimited(KERN_ERR
1123 "%s %s: [%s] GFIR=%016llx\n",
1124 GENWQE_DEVNAME, dev_name(&pci_dev->dev),
1125 __func__, gfir);
1126 }
1127
1128 return IRQ_HANDLED;
1129}
1130
1131static irqreturn_t genwqe_vf_isr(int irq, void *dev_id)
1132{
1133 struct genwqe_dev *cd = (struct genwqe_dev *)dev_id;
1134
1135 cd->irqs_processed++;
1136 wake_up_interruptible(&cd->queue_waitq);
1137
1138 return IRQ_HANDLED;
1139}
1140
1141/**
1142 * genwqe_card_thread() - Work thread for the DDCB queue
1143 *
1144 * The idea is to check if there are DDCBs in processing. If there are
1145 * some finished DDCBs, we process them and wakeup the
1146 * requestors. Otherwise we give other processes time using
1147 * cond_resched().
1148 */
1149static int genwqe_card_thread(void *data)
1150{
1151 int should_stop = 0, rc = 0;
1152 struct genwqe_dev *cd = (struct genwqe_dev *)data;
1153
1154 while (!kthread_should_stop()) {
1155
1156 genwqe_check_ddcb_queue(cd, &cd->queue);
1157
1158 if (genwqe_polling_enabled) {
1159 rc = wait_event_interruptible_timeout(
1160 cd->queue_waitq,
1161 genwqe_ddcbs_in_flight(cd) ||
1162 (should_stop = kthread_should_stop()), 1);
1163 } else {
1164 rc = wait_event_interruptible_timeout(
1165 cd->queue_waitq,
1166 genwqe_next_ddcb_ready(cd) ||
1167 (should_stop = kthread_should_stop()), HZ);
1168 }
1169 if (should_stop)
1170 break;
1171
1172 /*
1173 * Avoid soft lockups on heavy loads; we do not want
1174 * to disable our interrupts.
1175 */
1176 cond_resched();
1177 }
1178 return 0;
1179}
1180
1181/**
1182 * genwqe_setup_service_layer() - Setup DDCB queue
1183 * @cd: pointer to genwqe device descriptor
1184 *
1185 * Allocate DDCBs. Configure Service Layer Controller (SLC).
1186 *
1187 * Return: 0 success
1188 */
1189int genwqe_setup_service_layer(struct genwqe_dev *cd)
1190{
1191 int rc;
1192 struct ddcb_queue *queue;
1193 struct pci_dev *pci_dev = cd->pci_dev;
1194
1195 if (genwqe_is_privileged(cd)) {
1196 rc = genwqe_card_reset(cd);
1197 if (rc < 0) {
1198 dev_err(&pci_dev->dev,
1199 "[%s] err: reset failed.\n", __func__);
1200 return rc;
1201 }
1202 genwqe_read_softreset(cd);
1203 }
1204
1205 queue = &cd->queue;
1206 queue->IO_QUEUE_CONFIG = IO_SLC_QUEUE_CONFIG;
1207 queue->IO_QUEUE_STATUS = IO_SLC_QUEUE_STATUS;
1208 queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT;
1209 queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN;
1210 queue->IO_QUEUE_OFFSET = IO_SLC_QUEUE_OFFSET;
1211 queue->IO_QUEUE_WRAP = IO_SLC_QUEUE_WRAP;
1212 queue->IO_QUEUE_WTIME = IO_SLC_QUEUE_WTIME;
1213 queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS;
1214 queue->IO_QUEUE_LRW = IO_SLC_QUEUE_LRW;
1215
1216 rc = setup_ddcb_queue(cd, queue);
1217 if (rc != 0) {
1218 rc = -ENODEV;
1219 goto err_out;
1220 }
1221
1222 init_waitqueue_head(&cd->queue_waitq);
1223 cd->card_thread = kthread_run(genwqe_card_thread, cd,
1224 GENWQE_DEVNAME "%d_thread",
1225 cd->card_idx);
1226 if (IS_ERR(cd->card_thread)) {
1227 rc = PTR_ERR(cd->card_thread);
1228 cd->card_thread = NULL;
1229 goto stop_free_queue;
1230 }
1231
1232 rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS);
1233 if (rc > 0)
1234 rc = genwqe_set_interrupt_capability(cd, rc);
1235 if (rc != 0) {
1236 rc = -ENODEV;
1237 goto stop_kthread;
1238 }
1239
1240 /*
1241 * We must have all wait-queues initialized when we enable the
1242 * interrupts. Otherwise we might crash if we get an early
1243 * irq.
1244 */
1245 init_waitqueue_head(&cd->health_waitq);
1246
1247 if (genwqe_is_privileged(cd)) {
1248 rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED,
1249 GENWQE_DEVNAME, cd);
1250 } else {
1251 rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED,
1252 GENWQE_DEVNAME, cd);
1253 }
1254 if (rc < 0) {
1255 dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq);
1256 goto stop_irq_cap;
1257 }
1258
1259 cd->card_state = GENWQE_CARD_USED;
1260 return 0;
1261
1262 stop_irq_cap:
1263 genwqe_reset_interrupt_capability(cd);
1264 stop_kthread:
1265 kthread_stop(cd->card_thread);
1266 cd->card_thread = NULL;
1267 stop_free_queue:
1268 free_ddcb_queue(cd, queue);
1269 err_out:
1270 return rc;
1271}
1272
1273/**
1274 * queue_wake_up_all() - Handles fatal error case
1275 *
1276 * The PCI device got unusable and we have to stop all pending
1277 * requests as fast as we can. The code after this must purge the
1278 * DDCBs in question and ensure that all mappings are freed.
1279 */
1280static int queue_wake_up_all(struct genwqe_dev *cd)
1281{
1282 unsigned int i;
1283 unsigned long flags;
1284 struct ddcb_queue *queue = &cd->queue;
1285
1286 spin_lock_irqsave(&queue->ddcb_lock, flags);
1287
1288 for (i = 0; i < queue->ddcb_max; i++)
1289 wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]);
1290
1291 spin_unlock_irqrestore(&queue->ddcb_lock, flags);
1292
1293 return 0;
1294}
1295
1296/**
1297 * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces
1298 *
1299 * Relies on the pre-condition that there are no users of the card
1300 * device anymore e.g. with open file-descriptors.
1301 *
1302 * This function must be robust enough to be called twice.
1303 */
1304int genwqe_finish_queue(struct genwqe_dev *cd)
1305{
1306 int i, rc, in_flight;
1307 int waitmax = genwqe_ddcb_software_timeout;
1308 struct pci_dev *pci_dev = cd->pci_dev;
1309 struct ddcb_queue *queue = &cd->queue;
1310
1311 if (!ddcb_queue_initialized(queue))
1312 return 0;
1313
1314 /* Do not wipe out the error state. */
1315 if (cd->card_state == GENWQE_CARD_USED)
1316 cd->card_state = GENWQE_CARD_UNUSED;
1317
1318 /* Wake up all requests in the DDCB queue such that they
1319 should be removed nicely. */
1320 queue_wake_up_all(cd);
1321
1322 /* We must wait to get rid of the DDCBs in flight */
1323 for (i = 0; i < waitmax; i++) {
1324 in_flight = genwqe_ddcbs_in_flight(cd);
1325
1326 if (in_flight == 0)
1327 break;
1328
1329 dev_dbg(&pci_dev->dev,
1330 " DEBUG [%d/%d] waiting for queue to get empty: "
1331 "%d requests!\n", i, waitmax, in_flight);
1332
1333 /*
1334 * Severe severe error situation: The card itself has
1335 * 16 DDCB queues, each queue has e.g. 32 entries,
1336 * each DDBC has a hardware timeout of currently 250
1337 * msec but the PFs have a hardware timeout of 8 sec
1338 * ... so I take something large.
1339 */
1340 msleep(1000);
1341 }
1342 if (i == waitmax) {
1343 dev_err(&pci_dev->dev, " [%s] err: queue is not empty!!\n",
1344 __func__);
1345 rc = -EIO;
1346 }
1347 return rc;
1348}
1349
1350/**
1351 * genwqe_release_service_layer() - Shutdown DDCB queue
1352 * @cd: genwqe device descriptor
1353 *
1354 * This function must be robust enough to be called twice.
1355 */
1356int genwqe_release_service_layer(struct genwqe_dev *cd)
1357{
1358 struct pci_dev *pci_dev = cd->pci_dev;
1359
1360 if (!ddcb_queue_initialized(&cd->queue))
1361 return 1;
1362
1363 free_irq(pci_dev->irq, cd);
1364 genwqe_reset_interrupt_capability(cd);
1365
1366 if (cd->card_thread != NULL) {
1367 kthread_stop(cd->card_thread);
1368 cd->card_thread = NULL;
1369 }
1370
1371 free_ddcb_queue(cd, &cd->queue);
1372 return 0;
1373}
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h
new file mode 100644
index 000000000000..c4f26720753e
--- /dev/null
+++ b/drivers/misc/genwqe/card_ddcb.h
@@ -0,0 +1,188 @@
1#ifndef __CARD_DDCB_H__
2#define __CARD_DDCB_H__
3
4/**
5 * IBM Accelerator Family 'GenWQE'
6 *
7 * (C) Copyright IBM Corp. 2013
8 *
9 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
10 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
11 * Author: Michael Jung <mijung@de.ibm.com>
12 * Author: Michael Ruettger <michael@ibmra.de>
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2, or (at your option)
17 * any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 */
24
25#include <linux/types.h>
26#include <asm/byteorder.h>
27
28#include "genwqe_driver.h"
29#include "card_base.h"
30
31/**
32 * struct ddcb - Device Driver Control Block DDCB
33 * @hsi: Hardware software interlock
34 * @shi: Software hardware interlock. Hsi and shi are used to interlock
35 * software and hardware activities. We are using a compare and
36 * swap operation to ensure that there are no races when
37 * activating new DDCBs on the queue, or when we need to
38 * purge a DDCB from a running queue.
39 * @acfunc: Accelerator function addresses a unit within the chip
40 * @cmd: Command to work on
41 * @cmdopts_16: Options for the command
42 * @asiv: Input data
43 * @asv: Output data
44 *
45 * The DDCB data format is big endian. Multiple consequtive DDBCs form
46 * a DDCB queue.
47 */
48#define ASIV_LENGTH 104 /* Old specification without ATS field */
49#define ASIV_LENGTH_ATS 96 /* New specification with ATS field */
50#define ASV_LENGTH 64
51
52struct ddcb {
53 union {
54 __be32 icrc_hsi_shi_32; /* iCRC, Hardware/SW interlock */
55 struct {
56 __be16 icrc_16;
57 u8 hsi;
58 u8 shi;
59 };
60 };
61 u8 pre; /* Preamble */
62 u8 xdir; /* Execution Directives */
63 __be16 seqnum_16; /* Sequence Number */
64
65 u8 acfunc; /* Accelerator Function.. */
66 u8 cmd; /* Command. */
67 __be16 cmdopts_16; /* Command Options */
68 u8 sur; /* Status Update Rate */
69 u8 psp; /* Protection Section Pointer */
70 __be16 rsvd_0e_16; /* Reserved invariant */
71
72 __be64 fwiv_64; /* Firmware Invariant. */
73
74 union {
75 struct {
76 __be64 ats_64; /* Address Translation Spec */
77 u8 asiv[ASIV_LENGTH_ATS]; /* New ASIV */
78 } n;
79 u8 __asiv[ASIV_LENGTH]; /* obsolete */
80 };
81 u8 asv[ASV_LENGTH]; /* Appl Spec Variant */
82
83 __be16 rsvd_c0_16; /* Reserved Variant */
84 __be16 vcrc_16; /* Variant CRC */
85 __be32 rsvd_32; /* Reserved unprotected */
86
87 __be64 deque_ts_64; /* Deque Time Stamp. */
88
89 __be16 retc_16; /* Return Code */
90 __be16 attn_16; /* Attention/Extended Error Codes */
91 __be32 progress_32; /* Progress indicator. */
92
93 __be64 cmplt_ts_64; /* Completion Time Stamp. */
94
95 /* The following layout matches the new service layer format */
96 __be32 ibdc_32; /* Inbound Data Count (* 256) */
97 __be32 obdc_32; /* Outbound Data Count (* 256) */
98
99 __be64 rsvd_SLH_64; /* Reserved for hardware */
100 union { /* private data for driver */
101 u8 priv[8];
102 __be64 priv_64;
103 };
104 __be64 disp_ts_64; /* Dispatch TimeStamp */
105} __attribute__((__packed__));
106
107/* CRC polynomials for DDCB */
108#define CRC16_POLYNOMIAL 0x1021
109
110/*
111 * SHI: Software to Hardware Interlock
112 * This 1 byte field is written by software to interlock the
113 * movement of one queue entry to another with the hardware in the
114 * chip.
115 */
116#define DDCB_SHI_INTR 0x04 /* Bit 2 */
117#define DDCB_SHI_PURGE 0x02 /* Bit 1 */
118#define DDCB_SHI_NEXT 0x01 /* Bit 0 */
119
120/*
121 * HSI: Hardware to Software interlock
122 * This 1 byte field is written by hardware to interlock the movement
123 * of one queue entry to another with the software in the chip.
124 */
125#define DDCB_HSI_COMPLETED 0x40 /* Bit 6 */
126#define DDCB_HSI_FETCHED 0x04 /* Bit 2 */
127
128/*
129 * Accessing HSI/SHI is done 32-bit wide
130 * Normally 16-bit access would work too, but on some platforms the
131 * 16 compare and swap operation is not supported. Therefore
132 * switching to 32-bit such that those platforms will work too.
133 *
134 * iCRC HSI/SHI
135 */
136#define DDCB_INTR_BE32 cpu_to_be32(0x00000004)
137#define DDCB_PURGE_BE32 cpu_to_be32(0x00000002)
138#define DDCB_NEXT_BE32 cpu_to_be32(0x00000001)
139#define DDCB_COMPLETED_BE32 cpu_to_be32(0x00004000)
140#define DDCB_FETCHED_BE32 cpu_to_be32(0x00000400)
141
142/* Definitions of DDCB presets */
143#define DDCB_PRESET_PRE 0x80
144#define ICRC_LENGTH(n) ((n) + 8 + 8 + 8) /* used ASIV + hdr fields */
145#define VCRC_LENGTH(n) ((n)) /* used ASV */
146
147/*
148 * Genwqe Scatter Gather list
149 * Each element has up to 8 entries.
150 * The chaining element is element 0 cause of prefetching needs.
151 */
152
153/*
154 * 0b0110 Chained descriptor. The descriptor is describing the next
155 * descriptor list.
156 */
157#define SG_CHAINED (0x6)
158
159/*
160 * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty
161 * condition.
162 */
163#define SG_DATA (0x2)
164
165/*
166 * 0b0000 Early terminator. This is the last entry on the list
167 * irregardless of the length indicated.
168 */
169#define SG_END_LIST (0x0)
170
171/**
172 * struct sglist - Scatter gather list
173 * @target_addr: Either a dma addr of memory to work on or a
174 * dma addr or a subsequent sglist block.
175 * @len: Length of the data block.
176 * @flags: See above.
177 *
178 * Depending on the command the GenWQE card can use a scatter gather
179 * list to describe the memory it works on. Always 8 sg_entry's form
180 * a block.
181 */
182struct sg_entry {
183 __be64 target_addr;
184 __be32 len;
185 __be32 flags;
186};
187
188#endif /* __CARD_DDCB_H__ */
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
new file mode 100644
index 000000000000..9b231bb1005c
--- /dev/null
+++ b/drivers/misc/genwqe/card_dev.c
@@ -0,0 +1,1486 @@
1/**
2 * IBM Accelerator Family 'GenWQE'
3 *
4 * (C) Copyright IBM Corp. 2013
5 *
6 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com>
7 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com>
8 * Author: Michael Jung <mijung@de.ibm.com>
9 * Author: Michael Ruettger <michael@ibmra.de>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License (version 2 only)
13 * as published by the Free Software Foundation.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 */
20
21/*
22 * Character device representation of the GenWQE device. This allows
23 * user-space applications to communicate with the card.
24 */
25
26#include <linux/kernel.h>
27#include <linux/types.h>
28#include <linux/module.h>
29#include <linux/pci.h>
30#include <linux/string.h>
31#include <linux/fs.h>
32#include <linux/sched.h>
33#include <linux/wait.h>
34#include <linux/delay.h>
35#include <linux/atomic.h>
36
37#include "card_base.h"
38#include "card_ddcb.h"
39
40static int genwqe_open_files(struct genwqe_dev *cd)
41{
42 int rc;
43 unsigned long flags;
44
45 spin_lock_irqsave(&cd->file_lock, flags);
46 rc = list_empty(&cd->file_list);
47 spin_unlock_irqrestore(&cd->file_lock, flags);
48 return !rc;
49}
50
51static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
52{
53 unsigned long flags;
54
55 cfile->owner = current;
56 spin_lock_irqsave(&cd->file_lock, flags);
57 list_add(&cfile->list, &cd->file_list);
58 spin_unlock_irqrestore(&cd->file_lock, flags);
59}
60
61static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile)
62{
63 unsigned long flags;
64
65 spin_lock_irqsave(&cd->file_lock, flags);
66 list_del(&cfile->list);
67 spin_unlock_irqrestore(&cd->file_lock, flags);
68
69 return 0;
70}
71
72static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m)
73{
74 unsigned long flags;
75
76 spin_lock_irqsave(&cfile->pin_lock, flags);
77 list_add(&m->pin_list, &cfile->pin_list);
78 spin_unlock_irqrestore(&cfile->pin_lock, flags);
79}
80
81static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m)
82{
83 unsigned long flags;
84
85 spin_lock_irqsave(&cfile->pin_lock, flags);
86 list_del(&m->pin_list);
87 spin_unlock_irqrestore(&cfile->pin_lock, flags);
88
89 return 0;
90}
91
92/**
93 * genwqe_search_pin() - Search for the mapping for a userspace address
94 * @cfile: Descriptor of opened file
95 * @u_addr: User virtual address
96 * @size: Size of buffer
97 * @dma_addr: DMA address to be updated
98 *
99 * Return: Pointer to the corresponding mapping NULL if not found
100 */
101static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile,
102 unsigned long u_addr,
103 unsigned int size,
104 void **virt_addr)
105{
106 unsigned long flags;
107 struct dma_mapping *m;
108
109 spin_lock_irqsave(&cfile->pin_lock, flags);
110
111 list_for_each_entry(m, &cfile->pin_list, pin_list) {
112 if ((((u64)m->u_vaddr) <= (u_addr)) &&
113 (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
114
115 if (virt_addr)
116 *virt_addr = m->k_vaddr +
117 (u_addr - (u64)m->u_vaddr);
118
119 spin_unlock_irqrestore(&cfile->pin_lock, flags);
120 return m;
121 }
122 }
123 spin_unlock_irqrestore(&cfile->pin_lock, flags);
124 return NULL;
125}
126
127static void __genwqe_add_mapping(struct genwqe_file *cfile,
128 struct dma_mapping *dma_map)
129{
130 unsigned long flags;
131
132 spin_lock_irqsave(&cfile->map_lock, flags);
133 list_add(&dma_map->card_list, &cfile->map_list);
134 spin_unlock_irqrestore(&cfile->map_lock, flags);
135}
136
137static void __genwqe_del_mapping(struct genwqe_file *cfile,
138 struct dma_mapping *dma_map)
139{
140 unsigned long flags;
141
142 spin_lock_irqsave(&cfile->map_lock, flags);
143 list_del(&dma_map->card_list);
144 spin_unlock_irqrestore(&cfile->map_lock, flags);
145}
146
147
148/**
149 * __genwqe_search_mapping() - Search for the mapping for a userspace address
150 * @cfile: descriptor of opened file
151 * @u_addr: user virtual address
152 * @size: size of buffer
153 * @dma_addr: DMA address to be updated
154 * Return: Pointer to the corresponding mapping NULL if not found
155 */
156static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile,
157 unsigned long u_addr,
158 unsigned int size,
159 dma_addr_t *dma_addr,
160 void **virt_addr)
161{
162 unsigned long flags;
163 struct dma_mapping *m;
164 struct pci_dev *pci_dev = cfile->cd->pci_dev;
165
166 spin_lock_irqsave(&cfile->map_lock, flags);
167 list_for_each_entry(m, &cfile->map_list, card_list) {
168
169 if ((((u64)m->u_vaddr) <= (u_addr)) &&
170 (((u64)m->u_vaddr + m->size) >= (u_addr + size))) {
171
172 /* match found: current is as expected and
173 addr is in range */
174 if (dma_addr)
175 *dma_addr = m->dma_addr +
176 (u_addr - (u64)m->u_vaddr);
177
178 if (virt_addr)
179 *virt_addr = m->k_vaddr +
180 (u_addr - (u64)m->u_vaddr);
181
182 spin_unlock_irqrestore(&cfile->map_lock, flags);
183 return m;
184 }
185 }
186 spin_unlock_irqrestore(&cfile->map_lock, flags);
187
188 dev_err(&pci_dev->dev,
189 "[%s] Entry not found: u_addr=%lx, size=%x\n",
190 __func__, u_addr, size);
191
192 return NULL;
193}
194
195static void genwqe_remove_mappings(struct genwqe_file *cfile)
196{
197 int i = 0;
198 struct list_head *node, *next;
199 struct dma_mapping *dma_map;
200 struct genwqe_dev *cd = cfile->cd;
201 struct pci_dev *pci_dev = cfile->cd->pci_dev;
202
203 list_for_each_safe(node, next, &cfile->map_list) {
204 dma_map = list_entry(node, struct dma_mapping, card_list);
205
206 list_del_init(&dma_map->card_list);
207
208 /*
209 * This is really a bug, because those things should
210 * have been already tidied up.
211 *
212 * GENWQE_MAPPING_RAW should have been removed via mmunmap().
213 * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code.
214 */
215 dev_err(&pci_dev->dev,
216 "[%s] %d. cleanup mapping: u_vaddr=%p "
217 "u_kaddr=%016lx dma_addr=%llx\n", __func__, i++,
218 dma_map->u_vaddr, (unsigned long)dma_map->k_vaddr,
219 dma_map->dma_addr);
220
221 if (dma_map->type == GENWQE_MAPPING_RAW) {
222 /* we allocated this dynamically */
223 __genwqe_free_consistent(cd, dma_map->size,
224 dma_map->k_vaddr,
225 dma_map->dma_addr);
226 kfree(dma_map);
227 } else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) {
228 /* we use dma_map statically from the request */
229 genwqe_user_vunmap(cd, dma_map, NULL);
230 }
231 }
232}
233
234static void genwqe_remove_pinnings(struct genwqe_file *cfile)
235{
236 struct list_head *node, *next;
237 struct dma_mapping *dma_map;
238 struct genwqe_dev *cd = cfile->cd;
239
240 list_for_each_safe(node, next, &cfile->pin_list) {
241 dma_map = list_entry(node, struct dma_mapping, pin_list);
242
243 /*
244 * This is not a bug, because a killed processed might
245 * not call the unpin ioctl, which is supposed to free
246 * the resources.
247 *
248 * Pinnings are dymically allocated and need to be
249 * deleted.
250 */
251 list_del_init(&dma_map->pin_list);
252 genwqe_user_vunmap(cd, dma_map, NULL);
253 kfree(dma_map);
254 }
255}
256
257/**
258 * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files
259 *
260 * E.g. genwqe_send_signal(cd, SIGIO);
261 */
262static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig)
263{
264 unsigned int files = 0;
265 unsigned long flags;
266 struct genwqe_file *cfile;
267
268 spin_lock_irqsave(&cd->file_lock, flags);
269 list_for_each_entry(cfile, &cd->file_list, list) {
270 if (cfile->async_queue)
271 kill_fasync(&cfile->async_queue, sig, POLL_HUP);
272 files++;
273 }
274 spin_unlock_irqrestore(&cd->file_lock, flags);
275 return files;
276}
277
278static int genwqe_force_sig(struct genwqe_dev *cd, int sig)
279{
280 unsigned int files = 0;
281 unsigned long flags;
282 struct genwqe_file *cfile;
283
284 spin_lock_irqsave(&cd->file_lock, flags);
285 list_for_each_entry(cfile, &cd->file_list, list) {
286 force_sig(sig, cfile->owner);
287 files++;
288 }
289 spin_unlock_irqrestore(&cd->file_lock, flags);
290 return files;
291}
292
293/**
294 * genwqe_open() - file open
295 * @inode: file system information
296 * @filp: file handle
297 *
298 * This function is executed whenever an application calls
299 * open("/dev/genwqe",..).
300 *
301 * Return: 0 if successful or <0 if errors
302 */
303static int genwqe_open(struct inode *inode, struct file *filp)
304{
305 struct genwqe_dev *cd;
306 struct genwqe_file *cfile;
307 struct pci_dev *pci_dev;
308
309 cfile = kzalloc(sizeof(*cfile), GFP_KERNEL);
310 if (cfile == NULL)
311 return -ENOMEM;
312
313 cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe);
314 pci_dev = cd->pci_dev;
315 cfile->cd = cd;
316 cfile->filp = filp;
317 cfile->client = NULL;
318
319 spin_lock_init(&cfile->map_lock); /* list of raw memory allocations */
320 INIT_LIST_HEAD(&cfile->map_list);
321
322 spin_lock_init(&cfile->pin_lock); /* list of user pinned memory */
323 INIT_LIST_HEAD(&cfile->pin_list);
324
325 filp->private_data = cfile;
326
327 genwqe_add_file(cd, cfile);
328 return 0;
329}
330
331/**
332 * genwqe_fasync() - Setup process to receive SIGIO.
333 * @fd: file descriptor
334 * @filp: file handle
335 * @mode: file mode
336 *
337 * Sending a signal is working as following:
338 *
339 * if (cdev->async_queue)
340 * kill_fasync(&cdev->async_queue, SIGIO, POLL_IN);
341 *
342 * Some devices also implement asynchronous notification to indicate
343 * when the device can be written; in this case, of course,
344 * kill_fasync must be called with a mode of POLL_OUT.
345 */
346static int genwqe_fasync(int fd, struct file *filp, int mode)
347{
348 struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data;
349 return fasync_helper(fd, filp, mode, &cdev->async_queue);
350}
351
352
353/**
354 * genwqe_release() - file close
355 * @inode: file system information
356 * @filp: file handle
357 *
358 * This function is executed whenever an application calls 'close(fd_genwqe)'
359 *
360 * Return: always 0
361 */
362static int genwqe_release(struct inode *inode, struct file *filp)
363{
364 struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
365 struct genwqe_dev *cd = cfile->cd;
366
367 /* there must be no entries in these lists! */
368 genwqe_remove_mappings(cfile);
369 genwqe_remove_pinnings(cfile);
370
371 /* remove this filp from the asynchronously notified filp's */
372 genwqe_fasync(-1, filp, 0);
373
374 /*
375 * For this to work we must not release cd when this cfile is
376 * not yet released, otherwise the list entry is invalid,
377 * because the list itself gets reinstantiated!
378 */
379 genwqe_del_file(cd, cfile);
380 kfree(cfile);
381 return 0;
382}
383
384static void genwqe_vma_open(struct vm_area_struct *vma)
385{
386 /* nothing ... */
387}
388
389/**
390 * genwqe_vma_close() - Called each time when vma is unmapped
391 *
392 * Free memory which got allocated by GenWQE mmap().
393 */
394static void genwqe_vma_close(struct vm_area_struct *vma)
395{
396 unsigned long vsize = vma->vm_end - vma->vm_start;
397 struct inode *inode = vma->vm_file->f_dentry->d_inode;
398 struct dma_mapping *dma_map;
399 struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev,
400 cdev_genwqe);
401 struct pci_dev *pci_dev = cd->pci_dev;
402 dma_addr_t d_addr = 0;
403 struct genwqe_file *cfile = vma->vm_private_data;
404
405 dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize,
406 &d_addr, NULL);
407 if (dma_map == NULL) {
408 dev_err(&pci_dev->dev,
409 " [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n",
410 __func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT,
411 vsize);
412 return;
413 }
414 __genwqe_del_mapping(cfile, dma_map);
415 __genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr,
416 dma_map->dma_addr);
417 kfree(dma_map);
418}
419
420static struct vm_operations_struct genwqe_vma_ops = {
421 .open = genwqe_vma_open,
422 .close = genwqe_vma_close,
423};
424
425/**
426 * genwqe_mmap() - Provide contignous buffers to userspace
427 *
428 * We use mmap() to allocate contignous buffers used for DMA
429 * transfers. After the buffer is allocated we remap it to user-space
430 * and remember a reference to our dma_mapping data structure, where
431 * we store the associated DMA address and allocated size.
432 *
433 * When we receive a DDCB execution request with the ATS bits set to
434 * plain buffer, we lookup our dma_mapping list to find the
435 * corresponding DMA address for the associated user-space address.
436 */
437static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma)
438{
439 int rc;
440 unsigned long pfn, vsize = vma->vm_end - vma->vm_start;
441 struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
442 struct genwqe_dev *cd = cfile->cd;
443 struct dma_mapping *dma_map;
444
445 if (vsize == 0)
446 return -EINVAL;
447
448 if (get_order(vsize) > MAX_ORDER)
449 return -ENOMEM;
450
451 dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
452 if (dma_map == NULL)
453 return -ENOMEM;
454
455 genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW);
456 dma_map->u_vaddr = (void *)vma->vm_start;
457 dma_map->size = vsize;
458 dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE);
459 dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize,
460 &dma_map->dma_addr);
461 if (dma_map->k_vaddr == NULL) {
462 rc = -ENOMEM;
463 goto free_dma_map;
464 }
465
466 if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t)))
467 *(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr;
468
469 pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT;
470 rc = remap_pfn_range(vma,
471 vma->vm_start,
472 pfn,
473 vsize,
474 vma->vm_page_prot);
475 if (rc != 0) {
476 rc = -EFAULT;
477 goto free_dma_mem;
478 }
479
480 vma->vm_private_data = cfile;
481 vma->vm_ops = &genwqe_vma_ops;
482 __genwqe_add_mapping(cfile, dma_map);
483
484 return 0;
485
486 free_dma_mem:
487 __genwqe_free_consistent(cd, dma_map->size,
488 dma_map->k_vaddr,
489 dma_map->dma_addr);
490 free_dma_map:
491 kfree(dma_map);
492 return rc;
493}
494
495/**
496 * do_flash_update() - Excute flash update (write image or CVPD)
497 * @cd: genwqe device
498 * @load: details about image load
499 *
500 * Return: 0 if successful
501 */
502
503#define FLASH_BLOCK 0x40000 /* we use 256k blocks */
504
505static int do_flash_update(struct genwqe_file *cfile,
506 struct genwqe_bitstream *load)
507{
508 int rc = 0;
509 int blocks_to_flash;
510 u64 dma_addr, flash = 0;
511 size_t tocopy = 0;
512 u8 __user *buf;
513 u8 *xbuf;
514 u32 crc;
515 u8 cmdopts;
516 struct genwqe_dev *cd = cfile->cd;
517 struct pci_dev *pci_dev = cd->pci_dev;
518
519 if ((load->size & 0x3) != 0) {
520 dev_err(&pci_dev->dev,
521 "err: buf %d bytes not 4 bytes aligned!\n",
522 load->size);
523 return -EINVAL;
524 }
525 if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) {
526 dev_err(&pci_dev->dev,
527 "err: buf is not page aligned!\n");
528 return -EINVAL;
529 }
530
531 /* FIXME Bits have changed for new service layer! */
532 switch ((char)load->partition) {
533 case '0':
534 cmdopts = 0x14;
535 break; /* download/erase_first/part_0 */
536 case '1':
537 cmdopts = 0x1C;
538 break; /* download/erase_first/part_1 */
539 case 'v': /* cmdopts = 0x0c (VPD) */
540 default:
541 dev_err(&pci_dev->dev,
542 "err: invalid partition %02x!\n", load->partition);
543 return -EINVAL;
544 }
545 dev_info(&pci_dev->dev,
546 "[%s] start flash update UID: 0x%x size: %u bytes part: %c\n",
547 __func__, load->uid, load->size, (char)load->partition);
548
549 buf = (u8 __user *)load->data_addr;
550 xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
551 if (xbuf == NULL) {
552 dev_err(&pci_dev->dev, "err: no memory\n");
553 return -ENOMEM;
554 }
555
556 blocks_to_flash = load->size / FLASH_BLOCK;
557 while (load->size) {
558 struct genwqe_ddcb_cmd *req;
559
560 /*
561 * We must be 4 byte aligned. Buffer must be 0 appened
562 * to have defined values when calculating CRC.
563 */
564 tocopy = min_t(size_t, load->size, FLASH_BLOCK);
565
566 rc = copy_from_user(xbuf, buf, tocopy);
567 if (rc) {
568 dev_err(&pci_dev->dev,
569 "err: could not copy all data rc=%d\n", rc);
570 goto free_buffer;
571 }
572 crc = genwqe_crc32(xbuf, tocopy, 0xffffffff);
573
574 dev_info(&pci_dev->dev,
575 "[%s] DMA: 0x%llx CRC: %08x SZ: %ld %d\n",
576 __func__, dma_addr, crc, tocopy, blocks_to_flash);
577
578 /* prepare DDCB for SLU process */
579 req = ddcb_requ_alloc();
580 if (req == NULL) {
581 rc = -ENOMEM;
582 goto free_buffer;
583 }
584
585 req->cmd = SLCMD_MOVE_FLASH;
586 req->cmdopts = cmdopts;
587
588 /* prepare invariant values */
589 if (genwqe_get_slu_id(cd) <= 0x2) {
590 *(u64 *)&req->__asiv[0] = cpu_to_be64(dma_addr);
591 *(u64 *)&req->__asiv[8] = cpu_to_be64(tocopy);
592 *(u64 *)&req->__asiv[16] = cpu_to_be64(flash);
593 *(u32 *)&req->__asiv[24] = cpu_to_be32(0);
594 req->__asiv[24] = load->uid;
595 *(u32 *)&req->__asiv[28] = cpu_to_be32(crc);
596
597 /* for simulation only */
598 *(u64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id);
599 *(u64 *)&req->__asiv[96] = cpu_to_be64(load->app_id);
600 req->asiv_length = 32; /* bytes included in crc calc */
601 } else { /* setup DDCB for ATS architecture */
602 *(u64 *)&req->asiv[0] = cpu_to_be64(dma_addr);
603 *(u32 *)&req->asiv[8] = cpu_to_be32(tocopy);
604 *(u32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */
605 *(u64 *)&req->asiv[16] = cpu_to_be64(flash);
606 *(u32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24);
607 *(u32 *)&req->asiv[28] = cpu_to_be32(crc);
608
609 /* for simulation only */
610 *(u64 *)&req->asiv[80] = cpu_to_be64(load->slu_id);
611 *(u64 *)&req->asiv[88] = cpu_to_be64(load->app_id);
612
613 req->ats = cpu_to_be64(0x4ULL << 44); /* Rd only */
614 req->asiv_length = 40; /* bytes included in crc calc */
615 }
616 req->asv_length = 8;
617
618 /* For Genwqe5 we get back the calculated CRC */
619 *(u64 *)&req->asv[0] = 0ULL; /* 0x80 */
620
621 rc = __genwqe_execute_raw_ddcb(cd, req);
622
623 load->retc = req->retc;
624 load->attn = req->attn;
625 load->progress = req->progress;
626
627 if (rc < 0) {
628 dev_err(&pci_dev->dev,
629 " [%s] DDCB returned (RETC=%x ATTN=%x "
630 "PROG=%x rc=%d)\n", __func__, req->retc,
631 req->attn, req->progress, rc);
632
633 ddcb_requ_free(req);
634 goto free_buffer;
635 }
636
637 if (req->retc != DDCB_RETC_COMPLETE) {
638 dev_info(&pci_dev->dev,
639 " [%s] DDCB returned (RETC=%x ATTN=%x "
640 "PROG=%x)\n", __func__, req->retc,
641 req->attn, req->progress);
642
643 rc = -EIO;
644 ddcb_requ_free(req);
645 goto free_buffer;
646 }
647
648 load->size -= tocopy;
649 flash += tocopy;
650 buf += tocopy;
651 blocks_to_flash--;
652 ddcb_requ_free(req);
653 }
654
655 free_buffer:
656 __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
657 return rc;
658}
659
660static int do_flash_read(struct genwqe_file *cfile,
661 struct genwqe_bitstream *load)
662{
663 int rc, blocks_to_flash;
664 u64 dma_addr, flash = 0;
665 size_t tocopy = 0;
666 u8 __user *buf;
667 u8 *xbuf;
668 u8 cmdopts;
669 struct genwqe_dev *cd = cfile->cd;
670 struct pci_dev *pci_dev = cd->pci_dev;
671 struct genwqe_ddcb_cmd *cmd;
672
673 if ((load->size & 0x3) != 0) {
674 dev_err(&pci_dev->dev,
675 "err: buf size %d bytes not 4 bytes aligned!\n",
676 load->size);
677 return -EINVAL;
678 }
679 if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) {
680 dev_err(&pci_dev->dev, "err: buf is not page aligned!\n");
681 return -EINVAL;
682 }
683
684 /* FIXME Bits have changed for new service layer! */
685 switch ((char)load->partition) {
686 case '0':
687 cmdopts = 0x12;
688 break; /* upload/part_0 */
689 case '1':
690 cmdopts = 0x1A;
691 break; /* upload/part_1 */
692 case 'v':
693 default:
694 dev_err(&pci_dev->dev,
695 "err: invalid partition %02x!\n", load->partition);
696 return -EINVAL;
697 }
698 dev_info(&pci_dev->dev,
699 "[%s] start flash read UID: 0x%x size: %u bytes part: %c\n",
700 __func__, load->uid, load->size, (char)load->partition);
701
702 buf = (u8 __user *)load->data_addr;
703 xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr);
704 if (xbuf == NULL) {
705 dev_err(&pci_dev->dev, "err: no memory\n");
706 return -ENOMEM;
707 }
708
709 blocks_to_flash = load->size / FLASH_BLOCK;
710 while (load->size) {
711 /*
712 * We must be 4 byte aligned. Buffer must be 0 appened
713 * to have defined values when calculating CRC.
714 */
715 tocopy = min_t(size_t, load->size, FLASH_BLOCK);
716
717 dev_info(&pci_dev->dev,
718 "[%s] DMA: 0x%llx SZ: %ld %d\n",
719 __func__, dma_addr, tocopy, blocks_to_flash);
720
721 /* prepare DDCB for SLU process */
722 cmd = ddcb_requ_alloc();
723 if (cmd == NULL) {
724 rc = -ENOMEM;
725 goto free_buffer;
726 }
727 cmd->cmd = SLCMD_MOVE_FLASH;
728 cmd->cmdopts = cmdopts;
729
730 /* prepare invariant values */
731 if (genwqe_get_slu_id(cd) <= 0x2) {
732 *(u64 *)&cmd->__asiv[0] = cpu_to_be64(dma_addr);
733 *(u64 *)&cmd->__asiv[8] = cpu_to_be64(tocopy);
734 *(u64 *)&cmd->__asiv[16] = cpu_to_be64(flash);
735 *(u32 *)&cmd->__asiv[24] = cpu_to_be32(0);
736 cmd->__asiv[24] = load->uid;
737 *(u32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */;
738 cmd->asiv_length = 32; /* bytes included in crc calc */
739 } else { /* setup DDCB for ATS architecture */
740 *(u64 *)&cmd->asiv[0] = cpu_to_be64(dma_addr);
741 *(u32 *)&cmd->asiv[8] = cpu_to_be32(tocopy);
742 *(u32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */
743 *(u64 *)&cmd->asiv[16] = cpu_to_be64(flash);
744 *(u32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24);
745 *(u32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */
746 cmd->ats = cpu_to_be64(0x5ULL << 44); /* rd/wr */
747 cmd->asiv_length = 40; /* bytes included in crc calc */
748 }
749 cmd->asv_length = 8;
750
751 /* we only get back the calculated CRC */
752 *(u64 *)&cmd->asv[0] = 0ULL; /* 0x80 */
753
754 rc = __genwqe_execute_raw_ddcb(cd, cmd);
755
756 load->retc = cmd->retc;
757 load->attn = cmd->attn;
758 load->progress = cmd->progress;
759
760 if ((rc < 0) && (rc != -EBADMSG)) {
761 dev_err(&pci_dev->dev,
762 " [%s] DDCB returned (RETC=%x ATTN=%x "
763 "PROG=%x rc=%d)\n", __func__, cmd->retc,
764 cmd->attn, cmd->progress, rc);
765 ddcb_requ_free(cmd);
766 goto free_buffer;
767 }
768
769 rc = copy_to_user(buf, xbuf, tocopy);
770 if (rc) {
771 dev_err(&pci_dev->dev,
772 " [%s] copy data to user failed rc=%d\n",
773 __func__, rc);
774 rc = -EIO;
775 ddcb_requ_free(cmd);
776 goto free_buffer;
777 }
778
779 /* We know that we can get retc 0x104 with CRC err */
780 if (((cmd->retc == DDCB_RETC_FAULT) &&
781 (cmd->attn != 0x02)) || /* Normally ignore CRC error */
782 ((cmd->retc == DDCB_RETC_COMPLETE) &&
783 (cmd->attn != 0x00))) { /* Everything was fine */
784 dev_err(&pci_dev->dev,
785 " [%s] DDCB returned (RETC=%x ATTN=%x "
786 "PROG=%x rc=%d)\n", __func__, cmd->retc,
787 cmd->attn, cmd->progress, rc);
788 rc = -EIO;
789 ddcb_requ_free(cmd);
790 goto free_buffer;
791 }
792
793 load->size -= tocopy;
794 flash += tocopy;
795 buf += tocopy;
796 blocks_to_flash--;
797 ddcb_requ_free(cmd);
798 }
799 rc = 0;
800
801 free_buffer:
802 __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr);
803 return rc;
804}
805
806static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
807{
808 int rc;
809 struct genwqe_dev *cd = cfile->cd;
810 struct pci_dev *pci_dev = cfile->cd->pci_dev;
811 struct dma_mapping *dma_map;
812 unsigned long map_addr;
813 unsigned long map_size;
814
815 if ((m->addr == 0x0) || (m->size == 0))
816 return -EINVAL;
817
818 map_addr = (m->addr & PAGE_MASK);
819 map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
820
821 dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC);
822 if (dma_map == NULL)
823 return -ENOMEM;
824
825 genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED);
826 rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size, NULL);
827 if (rc != 0) {
828 dev_err(&pci_dev->dev,
829 "[%s] genwqe_user_vmap rc=%d\n", __func__, rc);
830 return rc;
831 }
832
833 genwqe_add_pin(cfile, dma_map);
834 return 0;
835}
836
837static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m)
838{
839 struct genwqe_dev *cd = cfile->cd;
840 struct dma_mapping *dma_map;
841 unsigned long map_addr;
842 unsigned long map_size;
843
844 if (m->addr == 0x0)
845 return -EINVAL;
846
847 map_addr = (m->addr & PAGE_MASK);
848 map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE);
849
850 dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL);
851 if (dma_map == NULL)
852 return -ENOENT;
853
854 genwqe_del_pin(cfile, dma_map);
855 genwqe_user_vunmap(cd, dma_map, NULL);
856 kfree(dma_map);
857 return 0;
858}
859
860/**
861 * ddcb_cmd_cleanup() - Remove dynamically created fixup entries
862 *
863 * Only if there are any. Pinnings are not removed.
864 */
865static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req)
866{
867 unsigned int i;
868 struct dma_mapping *dma_map;
869 struct genwqe_dev *cd = cfile->cd;
870
871 for (i = 0; i < DDCB_FIXUPS; i++) {
872 dma_map = &req->dma_mappings[i];
873
874 if (dma_mapping_used(dma_map)) {
875 __genwqe_del_mapping(cfile, dma_map);
876 genwqe_user_vunmap(cd, dma_map, req);
877 }
878 if (req->sgl[i] != NULL) {
879 genwqe_free_sgl(cd, req->sgl[i],
880 req->sgl_dma_addr[i],
881 req->sgl_size[i]);
882 req->sgl[i] = NULL;
883 req->sgl_dma_addr[i] = 0x0;
884 req->sgl_size[i] = 0;
885 }
886
887 }
888 return 0;
889}
890
891/**
892 * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references
893 *
894 * Before the DDCB gets executed we need to handle the fixups. We
895 * replace the user-space addresses with DMA addresses or do
896 * additional setup work e.g. generating a scatter-gather list which
897 * is used to describe the memory referred to in the fixup.
898 */
899static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req)
900{
901 int rc;
902 unsigned int asiv_offs, i;
903 struct genwqe_dev *cd = cfile->cd;
904 struct genwqe_ddcb_cmd *cmd = &req->cmd;
905 struct dma_mapping *m;
906 struct pci_dev *pci_dev = cd->pci_dev;
907 const char *type = "UNKNOWN";
908
909 for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58;
910 i++, asiv_offs += 0x08) {
911
912 u64 u_addr, d_addr;
913 u32 u_size = 0;
914 unsigned long ats_flags;
915
916 ats_flags = ATS_GET_FLAGS(be64_to_cpu(cmd->ats), asiv_offs);
917
918 switch (ats_flags) {
919
920 case ATS_TYPE_DATA:
921 break; /* nothing to do here */
922
923 case ATS_TYPE_FLAT_RDWR:
924 case ATS_TYPE_FLAT_RD: {
925 u_addr = be64_to_cpu(*((u64 *)&cmd->
926 asiv[asiv_offs]));
927 u_size = be32_to_cpu(*((u32 *)&cmd->
928 asiv[asiv_offs + 0x08]));
929
930 /*
931 * No data available. Ignore u_addr in this
932 * case and set addr to 0. Hardware must not
933 * fetch the buffer.
934 */
935 if (u_size == 0x0) {
936 *((u64 *)&cmd->asiv[asiv_offs]) =
937 cpu_to_be64(0x0);
938 break;
939 }
940
941 m = __genwqe_search_mapping(cfile, u_addr, u_size,
942 &d_addr, NULL);
943 if (m == NULL) {
944 rc = -EFAULT;
945 goto err_out;
946 }
947
948 *((u64 *)&cmd->asiv[asiv_offs]) = cpu_to_be64(d_addr);
949 break;
950 }
951
952 case ATS_TYPE_SGL_RDWR:
953 case ATS_TYPE_SGL_RD: {
954 int page_offs, nr_pages, offs;
955
956 u_addr = be64_to_cpu(*((u64 *)&cmd->asiv[asiv_offs]));
957 u_size = be32_to_cpu(*((u32 *)&cmd->asiv[asiv_offs +
958 0x08]));
959
960 /*
961 * No data available. Ignore u_addr in this
962 * case and set addr to 0. Hardware must not
963 * fetch the empty sgl.
964 */
965 if (u_size == 0x0) {
966 *((u64 *)&cmd->asiv[asiv_offs]) =
967 cpu_to_be64(0x0);
968 break;
969 }
970
971 m = genwqe_search_pin(cfile, u_addr, u_size, NULL);
972 if (m != NULL) {
973 type = "PINNING";
974 page_offs = (u_addr -
975 (u64)m->u_vaddr)/PAGE_SIZE;
976 } else {
977 type = "MAPPING";
978 m = &req->dma_mappings[i];
979
980 genwqe_mapping_init(m,
981 GENWQE_MAPPING_SGL_TEMP);
982 rc = genwqe_user_vmap(cd, m, (void *)u_addr,
983 u_size, req);
984 if (rc != 0)
985 goto err_out;
986
987 __genwqe_add_mapping(cfile, m);
988 page_offs = 0;
989 }
990
991 offs = offset_in_page(u_addr);
992 nr_pages = DIV_ROUND_UP(offs + u_size, PAGE_SIZE);
993
994 /* create genwqe style scatter gather list */
995 req->sgl[i] = genwqe_alloc_sgl(cd, m->nr_pages,
996 &req->sgl_dma_addr[i],
997 &req->sgl_size[i]);
998 if (req->sgl[i] == NULL) {
999 rc = -ENOMEM;
1000 goto err_out;
1001 }
1002 genwqe_setup_sgl(cd, offs, u_size,
1003 req->sgl[i],
1004 req->sgl_dma_addr[i],
1005 req->sgl_size[i],
1006 m->dma_list,
1007 page_offs,
1008 nr_pages);
1009
1010 *((u64 *)&cmd->asiv[asiv_offs]) =
1011 cpu_to_be64(req->sgl_dma_addr[i]);
1012
1013 break;
1014 }
1015 default:
1016 dev_err(&pci_dev->dev,
1017 "[%s] err: invalid ATS flags %01lx\n",
1018 __func__, ats_flags);
1019 rc = -EINVAL;
1020 goto err_out;
1021 }
1022 }
1023 return 0;
1024
1025 err_out:
1026 dev_err(&pci_dev->dev, "[%s] err: rc=%d\n", __func__, rc);
1027 ddcb_cmd_cleanup(cfile, req);
1028 return rc;
1029}
1030
1031/**
1032 * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups
1033 *
1034 * The code will build up the translation tables or lookup the
1035 * contignous memory allocation table to find the right translations
1036 * and DMA addresses.
1037 */
1038static int genwqe_execute_ddcb(struct genwqe_file *cfile,
1039 struct genwqe_ddcb_cmd *cmd)
1040{
1041 int rc;
1042 struct genwqe_dev *cd = cfile->cd;
1043 struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd);
1044
1045 rc = ddcb_cmd_fixups(cfile, req);
1046 if (rc != 0)
1047 return rc;
1048
1049 rc = __genwqe_execute_raw_ddcb(cd, cmd);
1050 ddcb_cmd_cleanup(cfile, req);
1051 return rc;
1052}
1053
1054static int do_execute_ddcb(struct genwqe_file *cfile,
1055 unsigned long arg, int raw)
1056{
1057 int rc;
1058 struct genwqe_ddcb_cmd *cmd;
1059 struct ddcb_requ *req;
1060 struct genwqe_dev *cd = cfile->cd;
1061 struct pci_dev *pci_dev = cd->pci_dev;
1062
1063 cmd = ddcb_requ_alloc();
1064 if (cmd == NULL)
1065 return -ENOMEM;
1066
1067 req = container_of(cmd, struct ddcb_requ, cmd);
1068
1069 if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) {
1070 dev_err(&pci_dev->dev,
1071 "err: could not copy params from user\n");
1072 ddcb_requ_free(cmd);
1073 return -EFAULT;
1074 }
1075
1076 if (!raw)
1077 rc = genwqe_execute_ddcb(cfile, cmd);
1078 else
1079 rc = __genwqe_execute_raw_ddcb(cd, cmd);
1080
1081 /* Copy back only the modifed fields. Do not copy ASIV
1082 back since the copy got modified by the driver. */
1083 if (copy_to_user((void __user *)arg, cmd,
1084 sizeof(*cmd) - DDCB_ASIV_LENGTH)) {
1085 dev_err(&pci_dev->dev,
1086 "err: could not copy params to user\n");
1087 ddcb_requ_free(cmd);
1088 return -EFAULT;
1089 }
1090
1091 ddcb_requ_free(cmd);
1092 return rc;
1093}
1094
1095/**
1096 * genwqe_ioctl() - IO control
1097 * @filp: file handle
1098 * @cmd: command identifier (passed from user)
1099 * @arg: argument (passed from user)
1100 *
1101 * Return: 0 success
1102 */
1103static long genwqe_ioctl(struct file *filp, unsigned int cmd,
1104 unsigned long arg)
1105{
1106 int rc = 0;
1107 struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
1108 struct genwqe_dev *cd = cfile->cd;
1109 struct genwqe_reg_io __user *io;
1110 u64 val;
1111 u32 reg_offs;
1112 struct pci_dev *pci_dev = cd->pci_dev;
1113
1114 if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE) {
1115 dev_err(&pci_dev->dev, "err: ioctl code does not match!\n");
1116 return -EINVAL;
1117 }
1118
1119 switch (cmd) {
1120
1121 case GENWQE_GET_CARD_STATE:
1122 put_user(cd->card_state, (enum genwqe_card_state __user *)arg);
1123 return 0;
1124
1125 /* Register access */
1126 case GENWQE_READ_REG64: {
1127 io = (struct genwqe_reg_io __user *)arg;
1128
1129 if (get_user(reg_offs, &io->num)) {
1130 dev_err(&pci_dev->dev, "err: reg read64\n");
1131 return -EFAULT;
1132 }
1133 if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
1134 return -EINVAL;
1135
1136 val = __genwqe_readq(cd, reg_offs);
1137 put_user(val, &io->val64);
1138 return 0;
1139 }
1140
1141 case GENWQE_WRITE_REG64: {
1142 io = (struct genwqe_reg_io __user *)arg;
1143
1144 if (!capable(CAP_SYS_ADMIN))
1145 return -EPERM;
1146
1147 if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
1148 return -EPERM;
1149
1150 if (get_user(reg_offs, &io->num)) {
1151 dev_err(&pci_dev->dev, "err: reg write64\n");
1152 return -EFAULT;
1153 }
1154 if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7))
1155 return -EINVAL;
1156
1157 if (get_user(val, &io->val64)) {
1158 dev_err(&pci_dev->dev, "err: reg write64\n");
1159 return -EFAULT;
1160 }
1161 __genwqe_writeq(cd, reg_offs, val);
1162 return 0;
1163 }
1164
1165 case GENWQE_READ_REG32: {
1166 io = (struct genwqe_reg_io __user *)arg;
1167
1168 if (get_user(reg_offs, &io->num)) {
1169 dev_err(&pci_dev->dev, "err: reg read32\n");
1170 return -EFAULT;
1171 }
1172 if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
1173 return -EINVAL;
1174
1175 val = __genwqe_readl(cd, reg_offs);
1176 put_user(val, &io->val64);
1177 return 0;
1178 }
1179
1180 case GENWQE_WRITE_REG32: {
1181 io = (struct genwqe_reg_io __user *)arg;
1182
1183 if (!capable(CAP_SYS_ADMIN))
1184 return -EPERM;
1185
1186 if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
1187 return -EPERM;
1188
1189 if (get_user(reg_offs, &io->num)) {
1190 dev_err(&pci_dev->dev, "err: reg write32\n");
1191 return -EFAULT;
1192 }
1193 if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3))
1194 return -EINVAL;
1195
1196 if (get_user(val, &io->val64)) {
1197 dev_err(&pci_dev->dev, "err: reg write32\n");
1198 return -EFAULT;
1199 }
1200 __genwqe_writel(cd, reg_offs, val);
1201 return 0;
1202 }
1203
1204 /* Flash update/reading */
1205 case GENWQE_SLU_UPDATE: {
1206 struct genwqe_bitstream load;
1207
1208 if (!genwqe_is_privileged(cd))
1209 return -EPERM;
1210
1211 if ((filp->f_flags & O_ACCMODE) == O_RDONLY)
1212 return -EPERM;
1213
1214 if (copy_from_user(&load, (void __user *)arg, sizeof(load))) {
1215 dev_err(&pci_dev->dev,
1216 "err: could not copy params from user\n");
1217 return -EFAULT;
1218 }
1219 rc = do_flash_update(cfile, &load);
1220
1221 if (copy_to_user((void __user *)arg, &load, sizeof(load))) {
1222 dev_err(&pci_dev->dev,
1223 "err: could not copy params to user\n");
1224 return -EFAULT;
1225 }
1226 dev_info(&pci_dev->dev, "[%s] rc=%d\n", __func__, rc);
1227 return rc;
1228 }
1229
1230 case GENWQE_SLU_READ: {
1231 struct genwqe_bitstream load;
1232
1233 if (!genwqe_is_privileged(cd))
1234 return -EPERM;
1235
1236 if (genwqe_flash_readback_fails(cd))
1237 return -ENOSPC; /* known to fail for old versions */
1238
1239 if (copy_from_user(&load, (void __user *)arg, sizeof(load))) {
1240 dev_err(&pci_dev->dev,
1241 "err: could not copy params from user\n");
1242 return -EFAULT;
1243 }
1244 rc = do_flash_read(cfile, &load);
1245
1246 if (copy_to_user((void __user *)arg, &load, sizeof(load))) {
1247 dev_err(&pci_dev->dev,
1248 "err: could not copy params to user\n");
1249 return -EFAULT;
1250 }
1251 dev_info(&pci_dev->dev, "[%s] rc=%d\n", __func__, rc);
1252 return rc;
1253 }
1254
1255 /* memory pinning and unpinning */
1256 case GENWQE_PIN_MEM: {
1257 struct genwqe_mem m;
1258
1259 if (copy_from_user(&m, (void __user *)arg, sizeof(m))) {
1260 dev_err(&pci_dev->dev,
1261 "err: could not copy params from user\n");
1262 return -EFAULT;
1263 }
1264 return genwqe_pin_mem(cfile, &m);
1265 }
1266
1267 case GENWQE_UNPIN_MEM: {
1268 struct genwqe_mem m;
1269
1270 if (copy_from_user(&m, (void __user *)arg, sizeof(m))) {
1271 dev_err(&pci_dev->dev,
1272 "err: could not copy params from user\n");
1273 return -EFAULT;
1274 }
1275 return genwqe_unpin_mem(cfile, &m);
1276 }
1277
1278 /* launch an DDCB and wait for completion */
1279 case GENWQE_EXECUTE_DDCB:
1280 return do_execute_ddcb(cfile, arg, 0);
1281
1282 case GENWQE_EXECUTE_RAW_DDCB: {
1283
1284 if (!capable(CAP_SYS_ADMIN)) {
1285 dev_err(&pci_dev->dev,
1286 "err: must be superuser execute raw DDCB!\n");
1287 return -EPERM;
1288 }
1289 return do_execute_ddcb(cfile, arg, 1);
1290 }
1291
1292 default:
1293 pr_err("unknown ioctl %x/%lx**\n", cmd, arg);
1294 return -EINVAL;
1295 }
1296
1297 return rc;
1298}
1299
1300#if defined(CONFIG_COMPAT)
1301/**
1302 * genwqe_compat_ioctl() - Compatibility ioctl
1303 *
1304 * Called whenever a 32-bit process running under a 64-bit kernel
1305 * performs an ioctl on /dev/genwqe<n>_card.
1306 *
1307 * @filp: file pointer.
1308 * @cmd: command.
1309 * @arg: user argument.
1310 * Return: zero on success or negative number on failure.
1311 */
1312static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd,
1313 unsigned long arg)
1314{
1315 return genwqe_ioctl(filp, cmd, arg);
1316}
1317#endif /* defined(CONFIG_COMPAT) */
1318
1319static const struct file_operations genwqe_fops = {
1320 .owner = THIS_MODULE,
1321 .open = genwqe_open,
1322 .fasync = genwqe_fasync,
1323 .mmap = genwqe_mmap,
1324 .unlocked_ioctl = genwqe_ioctl,
1325#if defined(CONFIG_COMPAT)
1326 .compat_ioctl = genwqe_compat_ioctl,
1327#endif
1328 .release = genwqe_release,
1329};
1330
1331static int genwqe_device_initialized(struct genwqe_dev *cd)
1332{
1333 return cd->dev != NULL;
1334}
1335
1336/**
1337 * genwqe_device_create() - Create and configure genwqe char device
1338 * @cd: genwqe device descriptor
1339 *
1340 * This function must be called before we create any more genwqe
1341 * character devices, because it is allocating the major and minor
1342 * number which are supposed to be used by the client drivers.
1343 */
1344int genwqe_device_create(struct genwqe_dev *cd)
1345{
1346 int rc;
1347 struct pci_dev *pci_dev = cd->pci_dev;
1348
1349 /*
1350 * Here starts the individual setup per client. It must
1351 * initialize its own cdev data structure with its own fops.
1352 * The appropriate devnum needs to be created. The ranges must
1353 * not overlap.
1354 */
1355 rc = alloc_chrdev_region(&cd->devnum_genwqe, 0,
1356 GENWQE_MAX_MINOR, GENWQE_DEVNAME);
1357 if (rc < 0) {
1358 dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n");
1359 goto err_dev;
1360 }
1361
1362 cdev_init(&cd->cdev_genwqe, &genwqe_fops);
1363 cd->cdev_genwqe.owner = THIS_MODULE;
1364
1365 rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1);
1366 if (rc < 0) {
1367 dev_err(&pci_dev->dev, "err: cdev_add failed\n");
1368 goto err_add;
1369 }
1370
1371 /*
1372 * Finally the device in /dev/... must be created. The rule is
1373 * to use card%d_clientname for each created device.
1374 */
1375 cd->dev = device_create_with_groups(cd->class_genwqe,
1376 &cd->pci_dev->dev,
1377 cd->devnum_genwqe, cd,
1378 genwqe_attribute_groups,
1379 GENWQE_DEVNAME "%u_card",
1380 cd->card_idx);
1381 if (cd->dev == NULL) {
1382 rc = -ENODEV;
1383 goto err_cdev;
1384 }
1385
1386 rc = genwqe_init_debugfs(cd);
1387 if (rc != 0)
1388 goto err_debugfs;
1389
1390 return 0;
1391
1392 err_debugfs:
1393 device_destroy(cd->class_genwqe, cd->devnum_genwqe);
1394 err_cdev:
1395 cdev_del(&cd->cdev_genwqe);
1396 err_add:
1397 unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
1398 err_dev:
1399 cd->dev = NULL;
1400 return rc;
1401}
1402
1403static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd)
1404{
1405 int rc;
1406 unsigned int i;
1407 struct pci_dev *pci_dev = cd->pci_dev;
1408
1409 if (!genwqe_open_files(cd))
1410 return 0;
1411
1412 dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__);
1413
1414 rc = genwqe_kill_fasync(cd, SIGIO);
1415 if (rc > 0) {
1416 /* give kill_timeout seconds to close file descriptors ... */
1417 for (i = 0; (i < genwqe_kill_timeout) &&
1418 genwqe_open_files(cd); i++) {
1419 dev_info(&pci_dev->dev, " %d sec ...", i);
1420
1421 cond_resched();
1422 msleep(1000);
1423 }
1424
1425 /* if no open files we can safely continue, else ... */
1426 if (!genwqe_open_files(cd))
1427 return 0;
1428
1429 dev_warn(&pci_dev->dev,
1430 "[%s] send SIGKILL and wait ...\n", __func__);
1431
1432 rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */
1433 if (rc) {
1434 /* Give kill_timout more seconds to end processes */
1435 for (i = 0; (i < genwqe_kill_timeout) &&
1436 genwqe_open_files(cd); i++) {
1437 dev_warn(&pci_dev->dev, " %d sec ...", i);
1438
1439 cond_resched();
1440 msleep(1000);
1441 }
1442 }
1443 }
1444 return 0;
1445}
1446
1447/**
1448 * genwqe_device_remove() - Remove genwqe's char device
1449 *
1450 * This function must be called after the client devices are removed
1451 * because it will free the major/minor number range for the genwqe
1452 * drivers.
1453 *
1454 * This function must be robust enough to be called twice.
1455 */
1456int genwqe_device_remove(struct genwqe_dev *cd)
1457{
1458 int rc;
1459 struct pci_dev *pci_dev = cd->pci_dev;
1460
1461 if (!genwqe_device_initialized(cd))
1462 return 1;
1463
1464 genwqe_inform_and_stop_processes(cd);
1465
1466 /*
1467 * We currently do wait until all filedescriptors are
1468 * closed. This leads to a problem when we abort the
1469 * application which will decrease this reference from
1470 * 1/unused to 0/illegal and not from 2/used 1/empty.
1471 */
1472 rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount);
1473 if (rc != 1) {
1474 dev_err(&pci_dev->dev,
1475 "[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc);
1476 panic("Fatal err: cannot free resources with pending references!");
1477 }
1478
1479 genqwe_exit_debugfs(cd);
1480 device_destroy(cd->class_genwqe, cd->devnum_genwqe);
1481 cdev_del(&cd->cdev_genwqe);
1482 unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR);
1483 cd->dev = NULL;
1484
1485 return 0;
1486}