diff options
Diffstat (limited to 'drivers/misc/genwqe')
-rw-r--r-- | drivers/misc/genwqe/card_ddcb.c | 1373 | ||||
-rw-r--r-- | drivers/misc/genwqe/card_ddcb.h | 188 | ||||
-rw-r--r-- | drivers/misc/genwqe/card_dev.c | 1486 |
3 files changed, 3047 insertions, 0 deletions
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c new file mode 100644 index 000000000000..cc6fca7a4851 --- /dev/null +++ b/drivers/misc/genwqe/card_ddcb.c | |||
@@ -0,0 +1,1373 @@ | |||
1 | /** | ||
2 | * IBM Accelerator Family 'GenWQE' | ||
3 | * | ||
4 | * (C) Copyright IBM Corp. 2013 | ||
5 | * | ||
6 | * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> | ||
7 | * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> | ||
8 | * Author: Michael Jung <mijung@de.ibm.com> | ||
9 | * Author: Michael Ruettger <michael@ibmra.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License (version 2 only) | ||
13 | * as published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | */ | ||
20 | |||
21 | /* | ||
22 | * Device Driver Control Block (DDCB) queue support. Definition of | ||
23 | * interrupt handlers for queue support as well as triggering the | ||
24 | * health monitor code in case of problems. The current hardware uses | ||
25 | * an MSI interrupt which is shared between error handling and | ||
26 | * functional code. | ||
27 | */ | ||
28 | |||
29 | #include <linux/types.h> | ||
30 | #include <linux/module.h> | ||
31 | #include <linux/sched.h> | ||
32 | #include <linux/wait.h> | ||
33 | #include <linux/pci.h> | ||
34 | #include <linux/string.h> | ||
35 | #include <linux/dma-mapping.h> | ||
36 | #include <linux/delay.h> | ||
37 | #include <linux/module.h> | ||
38 | #include <linux/interrupt.h> | ||
39 | #include <linux/crc-itu-t.h> | ||
40 | |||
41 | #include "card_ddcb.h" | ||
42 | |||
43 | /* | ||
44 | * N: next DDCB, this is where the next DDCB will be put. | ||
45 | * A: active DDCB, this is where the code will look for the next completion. | ||
46 | * x: DDCB is enqueued, we are waiting for its completion. | ||
47 | |||
48 | * Situation (1): Empty queue | ||
49 | * +---+---+---+---+---+---+---+---+ | ||
50 | * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | ||
51 | * | | | | | | | | | | ||
52 | * +---+---+---+---+---+---+---+---+ | ||
53 | * A/N | ||
54 | * enqueued_ddcbs = A - N = 2 - 2 = 0 | ||
55 | * | ||
56 | * Situation (2): Wrapped, N > A | ||
57 | * +---+---+---+---+---+---+---+---+ | ||
58 | * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | ||
59 | * | | | x | x | | | | | | ||
60 | * +---+---+---+---+---+---+---+---+ | ||
61 | * A N | ||
62 | * enqueued_ddcbs = N - A = 4 - 2 = 2 | ||
63 | * | ||
64 | * Situation (3): Queue wrapped, A > N | ||
65 | * +---+---+---+---+---+---+---+---+ | ||
66 | * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | ||
67 | * | x | x | | | x | x | x | x | | ||
68 | * +---+---+---+---+---+---+---+---+ | ||
69 | * N A | ||
70 | * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 2) = 6 | ||
71 | * | ||
72 | * Situation (4a): Queue full N > A | ||
73 | * +---+---+---+---+---+---+---+---+ | ||
74 | * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | ||
75 | * | x | x | x | x | x | x | x | | | ||
76 | * +---+---+---+---+---+---+---+---+ | ||
77 | * A N | ||
78 | * | ||
79 | * enqueued_ddcbs = N - A = 7 - 0 = 7 | ||
80 | * | ||
81 | * Situation (4a): Queue full A > N | ||
82 | * +---+---+---+---+---+---+---+---+ | ||
83 | * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | | ||
84 | * | x | x | x | | x | x | x | x | | ||
85 | * +---+---+---+---+---+---+---+---+ | ||
86 | * N A | ||
87 | * enqueued_ddcbs = queue_max - (A - N) = 8 - (4 - 3) = 7 | ||
88 | */ | ||
89 | |||
90 | static int queue_empty(struct ddcb_queue *queue) | ||
91 | { | ||
92 | return queue->ddcb_next == queue->ddcb_act; | ||
93 | } | ||
94 | |||
95 | static int queue_enqueued_ddcbs(struct ddcb_queue *queue) | ||
96 | { | ||
97 | if (queue->ddcb_next >= queue->ddcb_act) | ||
98 | return queue->ddcb_next - queue->ddcb_act; | ||
99 | |||
100 | return queue->ddcb_max - (queue->ddcb_act - queue->ddcb_next); | ||
101 | } | ||
102 | |||
103 | static int queue_free_ddcbs(struct ddcb_queue *queue) | ||
104 | { | ||
105 | int free_ddcbs = queue->ddcb_max - queue_enqueued_ddcbs(queue) - 1; | ||
106 | |||
107 | if (WARN_ON_ONCE(free_ddcbs < 0)) { /* must never ever happen! */ | ||
108 | return 0; | ||
109 | } | ||
110 | return free_ddcbs; | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * Use of the PRIV field in the DDCB for queue debugging: | ||
115 | * | ||
116 | * (1) Trying to get rid of a DDCB which saw a timeout: | ||
117 | * pddcb->priv[6] = 0xcc; # cleared | ||
118 | * | ||
119 | * (2) Append a DDCB via NEXT bit: | ||
120 | * pddcb->priv[7] = 0xaa; # appended | ||
121 | * | ||
122 | * (3) DDCB needed tapping: | ||
123 | * pddcb->priv[7] = 0xbb; # tapped | ||
124 | * | ||
125 | * (4) DDCB marked as correctly finished: | ||
126 | * pddcb->priv[6] = 0xff; # finished | ||
127 | */ | ||
128 | |||
129 | static inline void ddcb_mark_tapped(struct ddcb *pddcb) | ||
130 | { | ||
131 | pddcb->priv[7] = 0xbb; /* tapped */ | ||
132 | } | ||
133 | |||
134 | static inline void ddcb_mark_appended(struct ddcb *pddcb) | ||
135 | { | ||
136 | pddcb->priv[7] = 0xaa; /* appended */ | ||
137 | } | ||
138 | |||
139 | static inline void ddcb_mark_cleared(struct ddcb *pddcb) | ||
140 | { | ||
141 | pddcb->priv[6] = 0xcc; /* cleared */ | ||
142 | } | ||
143 | |||
144 | static inline void ddcb_mark_finished(struct ddcb *pddcb) | ||
145 | { | ||
146 | pddcb->priv[6] = 0xff; /* finished */ | ||
147 | } | ||
148 | |||
149 | static inline void ddcb_mark_unused(struct ddcb *pddcb) | ||
150 | { | ||
151 | pddcb->priv_64 = cpu_to_be64(0); /* not tapped */ | ||
152 | } | ||
153 | |||
154 | /** | ||
155 | * genwqe_crc16() - Generate 16-bit crc as required for DDCBs | ||
156 | * @buff: pointer to data buffer | ||
157 | * @len: length of data for calculation | ||
158 | * @init: initial crc (0xffff at start) | ||
159 | * | ||
160 | * Polynomial = x^16 + x^12 + x^5 + 1 (0x1021) | ||
161 | * Example: 4 bytes 0x01 0x02 0x03 0x04 with init = 0xffff | ||
162 | * should result in a crc16 of 0x89c3 | ||
163 | * | ||
164 | * Return: crc16 checksum in big endian format ! | ||
165 | */ | ||
166 | static inline u16 genwqe_crc16(const u8 *buff, size_t len, u16 init) | ||
167 | { | ||
168 | return crc_itu_t(init, buff, len); | ||
169 | } | ||
170 | |||
171 | static void print_ddcb_info(struct genwqe_dev *cd, struct ddcb_queue *queue) | ||
172 | { | ||
173 | int i; | ||
174 | struct ddcb *pddcb; | ||
175 | unsigned long flags; | ||
176 | struct pci_dev *pci_dev = cd->pci_dev; | ||
177 | |||
178 | spin_lock_irqsave(&cd->print_lock, flags); | ||
179 | |||
180 | dev_info(&pci_dev->dev, | ||
181 | "DDCB list for card #%d (ddcb_act=%d / ddcb_next=%d):\n", | ||
182 | cd->card_idx, queue->ddcb_act, queue->ddcb_next); | ||
183 | |||
184 | pddcb = queue->ddcb_vaddr; | ||
185 | for (i = 0; i < queue->ddcb_max; i++) { | ||
186 | dev_err(&pci_dev->dev, | ||
187 | " %c %-3d: RETC=%03x SEQ=%04x " | ||
188 | "HSI=%02X SHI=%02x PRIV=%06llx CMD=%03x\n", | ||
189 | i == queue->ddcb_act ? '>' : ' ', | ||
190 | i, | ||
191 | be16_to_cpu(pddcb->retc_16), | ||
192 | be16_to_cpu(pddcb->seqnum_16), | ||
193 | pddcb->hsi, | ||
194 | pddcb->shi, | ||
195 | be64_to_cpu(pddcb->priv_64), | ||
196 | pddcb->cmd); | ||
197 | pddcb++; | ||
198 | } | ||
199 | spin_unlock_irqrestore(&cd->print_lock, flags); | ||
200 | } | ||
201 | |||
202 | struct genwqe_ddcb_cmd *ddcb_requ_alloc(void) | ||
203 | { | ||
204 | struct ddcb_requ *req; | ||
205 | |||
206 | req = kzalloc(sizeof(*req), GFP_ATOMIC); | ||
207 | if (!req) | ||
208 | return NULL; | ||
209 | |||
210 | return &req->cmd; | ||
211 | } | ||
212 | |||
213 | void ddcb_requ_free(struct genwqe_ddcb_cmd *cmd) | ||
214 | { | ||
215 | struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); | ||
216 | kfree(req); | ||
217 | } | ||
218 | |||
219 | static inline enum genwqe_requ_state ddcb_requ_get_state(struct ddcb_requ *req) | ||
220 | { | ||
221 | return req->req_state; | ||
222 | } | ||
223 | |||
224 | static inline void ddcb_requ_set_state(struct ddcb_requ *req, | ||
225 | enum genwqe_requ_state new_state) | ||
226 | { | ||
227 | req->req_state = new_state; | ||
228 | } | ||
229 | |||
230 | static inline int ddcb_requ_collect_debug_data(struct ddcb_requ *req) | ||
231 | { | ||
232 | return req->cmd.ddata_addr != 0x0; | ||
233 | } | ||
234 | |||
235 | /** | ||
236 | * ddcb_requ_finished() - Returns the hardware state of the associated DDCB | ||
237 | * @cd: pointer to genwqe device descriptor | ||
238 | * @req: DDCB work request | ||
239 | * | ||
240 | * Status of ddcb_requ mirrors this hardware state, but is copied in | ||
241 | * the ddcb_requ on interrupt/polling function. The lowlevel code | ||
242 | * should check the hardware state directly, the higher level code | ||
243 | * should check the copy. | ||
244 | * | ||
245 | * This function will also return true if the state of the queue is | ||
246 | * not GENWQE_CARD_USED. This enables us to purge all DDCBs in the | ||
247 | * shutdown case. | ||
248 | */ | ||
249 | static int ddcb_requ_finished(struct genwqe_dev *cd, struct ddcb_requ *req) | ||
250 | { | ||
251 | return (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) || | ||
252 | (cd->card_state != GENWQE_CARD_USED); | ||
253 | } | ||
254 | |||
255 | /** | ||
256 | * enqueue_ddcb() - Enqueue a DDCB | ||
257 | * @cd: pointer to genwqe device descriptor | ||
258 | * @queue: queue this operation should be done on | ||
259 | * @ddcb_no: pointer to ddcb number being tapped | ||
260 | * | ||
261 | * Start execution of DDCB by tapping or append to queue via NEXT | ||
262 | * bit. This is done by an atomic 'compare and swap' instruction and | ||
263 | * checking SHI and HSI of the previous DDCB. | ||
264 | * | ||
265 | * This function must only be called with ddcb_lock held. | ||
266 | * | ||
267 | * Return: 1 if new DDCB is appended to previous | ||
268 | * 2 if DDCB queue is tapped via register/simulation | ||
269 | */ | ||
270 | #define RET_DDCB_APPENDED 1 | ||
271 | #define RET_DDCB_TAPPED 2 | ||
272 | |||
273 | static int enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_queue *queue, | ||
274 | struct ddcb *pddcb, int ddcb_no) | ||
275 | { | ||
276 | unsigned int try; | ||
277 | int prev_no; | ||
278 | struct ddcb *prev_ddcb; | ||
279 | u32 old, new, icrc_hsi_shi; | ||
280 | u64 num; | ||
281 | |||
282 | /* | ||
283 | * For performance checks a Dispatch Timestamp can be put into | ||
284 | * DDCB It is supposed to use the SLU's free running counter, | ||
285 | * but this requires PCIe cycles. | ||
286 | */ | ||
287 | ddcb_mark_unused(pddcb); | ||
288 | |||
289 | /* check previous DDCB if already fetched */ | ||
290 | prev_no = (ddcb_no == 0) ? queue->ddcb_max - 1 : ddcb_no - 1; | ||
291 | prev_ddcb = &queue->ddcb_vaddr[prev_no]; | ||
292 | |||
293 | /* | ||
294 | * It might have happened that the HSI.FETCHED bit is | ||
295 | * set. Retry in this case. Therefore I expect maximum 2 times | ||
296 | * trying. | ||
297 | */ | ||
298 | ddcb_mark_appended(pddcb); | ||
299 | for (try = 0; try < 2; try++) { | ||
300 | old = prev_ddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */ | ||
301 | |||
302 | /* try to append via NEXT bit if prev DDCB is not completed */ | ||
303 | if ((old & DDCB_COMPLETED_BE32) != 0x00000000) | ||
304 | break; | ||
305 | |||
306 | new = (old | DDCB_NEXT_BE32); | ||
307 | icrc_hsi_shi = cmpxchg(&prev_ddcb->icrc_hsi_shi_32, old, new); | ||
308 | |||
309 | if (icrc_hsi_shi == old) | ||
310 | return RET_DDCB_APPENDED; /* appended to queue */ | ||
311 | } | ||
312 | |||
313 | /* Queue must be re-started by updating QUEUE_OFFSET */ | ||
314 | ddcb_mark_tapped(pddcb); | ||
315 | num = (u64)ddcb_no << 8; | ||
316 | __genwqe_writeq(cd, queue->IO_QUEUE_OFFSET, num); /* start queue */ | ||
317 | |||
318 | return RET_DDCB_TAPPED; | ||
319 | } | ||
320 | |||
321 | /** | ||
322 | * copy_ddcb_results() - Copy output state from real DDCB to request | ||
323 | * | ||
324 | * Copy DDCB ASV to request struct. There is no endian | ||
325 | * conversion made, since data structure in ASV is still | ||
326 | * unknown here. | ||
327 | * | ||
328 | * This is needed by: | ||
329 | * - genwqe_purge_ddcb() | ||
330 | * - genwqe_check_ddcb_queue() | ||
331 | */ | ||
332 | static void copy_ddcb_results(struct ddcb_requ *req, int ddcb_no) | ||
333 | { | ||
334 | struct ddcb_queue *queue = req->queue; | ||
335 | struct ddcb *pddcb = &queue->ddcb_vaddr[req->num]; | ||
336 | |||
337 | memcpy(&req->cmd.asv[0], &pddcb->asv[0], DDCB_ASV_LENGTH); | ||
338 | |||
339 | /* copy status flags of the variant part */ | ||
340 | req->cmd.vcrc = be16_to_cpu(pddcb->vcrc_16); | ||
341 | req->cmd.deque_ts = be64_to_cpu(pddcb->deque_ts_64); | ||
342 | req->cmd.cmplt_ts = be64_to_cpu(pddcb->cmplt_ts_64); | ||
343 | |||
344 | req->cmd.attn = be16_to_cpu(pddcb->attn_16); | ||
345 | req->cmd.progress = be32_to_cpu(pddcb->progress_32); | ||
346 | req->cmd.retc = be16_to_cpu(pddcb->retc_16); | ||
347 | |||
348 | if (ddcb_requ_collect_debug_data(req)) { | ||
349 | int prev_no = (ddcb_no == 0) ? | ||
350 | queue->ddcb_max - 1 : ddcb_no - 1; | ||
351 | struct ddcb *prev_pddcb = &queue->ddcb_vaddr[prev_no]; | ||
352 | |||
353 | memcpy(&req->debug_data.ddcb_finished, pddcb, | ||
354 | sizeof(req->debug_data.ddcb_finished)); | ||
355 | memcpy(&req->debug_data.ddcb_prev, prev_pddcb, | ||
356 | sizeof(req->debug_data.ddcb_prev)); | ||
357 | } | ||
358 | } | ||
359 | |||
360 | /** | ||
361 | * genwqe_check_ddcb_queue() - Checks DDCB queue for completed work equests. | ||
362 | * @cd: pointer to genwqe device descriptor | ||
363 | * | ||
364 | * Return: Number of DDCBs which were finished | ||
365 | */ | ||
366 | static int genwqe_check_ddcb_queue(struct genwqe_dev *cd, | ||
367 | struct ddcb_queue *queue) | ||
368 | { | ||
369 | unsigned long flags; | ||
370 | int ddcbs_finished = 0; | ||
371 | struct pci_dev *pci_dev = cd->pci_dev; | ||
372 | |||
373 | spin_lock_irqsave(&queue->ddcb_lock, flags); | ||
374 | |||
375 | /* FIXME avoid soft locking CPU */ | ||
376 | while (!queue_empty(queue) && (ddcbs_finished < queue->ddcb_max)) { | ||
377 | |||
378 | struct ddcb *pddcb; | ||
379 | struct ddcb_requ *req; | ||
380 | u16 vcrc, vcrc_16, retc_16; | ||
381 | |||
382 | pddcb = &queue->ddcb_vaddr[queue->ddcb_act]; | ||
383 | |||
384 | if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == | ||
385 | 0x00000000) | ||
386 | goto go_home; /* not completed, continue waiting */ | ||
387 | |||
388 | /* Note: DDCB could be purged */ | ||
389 | |||
390 | req = queue->ddcb_req[queue->ddcb_act]; | ||
391 | if (req == NULL) { | ||
392 | /* this occurs if DDCB is purged, not an error */ | ||
393 | /* Move active DDCB further; Nothing to do anymore. */ | ||
394 | goto pick_next_one; | ||
395 | } | ||
396 | |||
397 | /* | ||
398 | * HSI=0x44 (fetched and completed), but RETC is | ||
399 | * 0x101, or even worse 0x000. | ||
400 | * | ||
401 | * In case of seeing the queue in inconsistent state | ||
402 | * we read the errcnts and the queue status to provide | ||
403 | * a trigger for our PCIe analyzer stop capturing. | ||
404 | */ | ||
405 | retc_16 = be16_to_cpu(pddcb->retc_16); | ||
406 | if ((pddcb->hsi == 0x44) && (retc_16 <= 0x101)) { | ||
407 | u64 errcnts, status; | ||
408 | u64 ddcb_offs = (u64)pddcb - (u64)queue->ddcb_vaddr; | ||
409 | |||
410 | errcnts = __genwqe_readq(cd, queue->IO_QUEUE_ERRCNTS); | ||
411 | status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS); | ||
412 | |||
413 | dev_err(&pci_dev->dev, | ||
414 | "[%s] SEQN=%04x HSI=%02x RETC=%03x " | ||
415 | " Q_ERRCNTS=%016llx Q_STATUS=%016llx\n" | ||
416 | " DDCB_DMA_ADDR=%016llx\n", | ||
417 | __func__, be16_to_cpu(pddcb->seqnum_16), | ||
418 | pddcb->hsi, retc_16, errcnts, status, | ||
419 | queue->ddcb_daddr + ddcb_offs); | ||
420 | } | ||
421 | |||
422 | copy_ddcb_results(req, queue->ddcb_act); | ||
423 | queue->ddcb_req[queue->ddcb_act] = NULL; /* take from queue */ | ||
424 | |||
425 | dev_dbg(&pci_dev->dev, "FINISHED DDCB#%d\n", req->num); | ||
426 | genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); | ||
427 | |||
428 | ddcb_mark_finished(pddcb); | ||
429 | |||
430 | /* calculate CRC_16 to see if VCRC is correct */ | ||
431 | vcrc = genwqe_crc16(pddcb->asv, | ||
432 | VCRC_LENGTH(req->cmd.asv_length), | ||
433 | 0xffff); | ||
434 | vcrc_16 = be16_to_cpu(pddcb->vcrc_16); | ||
435 | if (vcrc != vcrc_16) { | ||
436 | printk_ratelimited(KERN_ERR | ||
437 | "%s %s: err: wrong VCRC pre=%02x vcrc_len=%d " | ||
438 | "bytes vcrc_data=%04x is not vcrc_card=%04x\n", | ||
439 | GENWQE_DEVNAME, dev_name(&pci_dev->dev), | ||
440 | pddcb->pre, VCRC_LENGTH(req->cmd.asv_length), | ||
441 | vcrc, vcrc_16); | ||
442 | } | ||
443 | |||
444 | ddcb_requ_set_state(req, GENWQE_REQU_FINISHED); | ||
445 | queue->ddcbs_completed++; | ||
446 | queue->ddcbs_in_flight--; | ||
447 | |||
448 | /* wake up process waiting for this DDCB */ | ||
449 | wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]); | ||
450 | |||
451 | pick_next_one: | ||
452 | queue->ddcb_act = (queue->ddcb_act + 1) % queue->ddcb_max; | ||
453 | ddcbs_finished++; | ||
454 | } | ||
455 | |||
456 | go_home: | ||
457 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
458 | return ddcbs_finished; | ||
459 | } | ||
460 | |||
461 | /** | ||
462 | * __genwqe_wait_ddcb(): Waits until DDCB is completed | ||
463 | * @cd: pointer to genwqe device descriptor | ||
464 | * @req: pointer to requsted DDCB parameters | ||
465 | * | ||
466 | * The Service Layer will update the RETC in DDCB when processing is | ||
467 | * pending or done. | ||
468 | * | ||
469 | * Return: > 0 remaining jiffies, DDCB completed | ||
470 | * -ETIMEDOUT when timeout | ||
471 | * -ERESTARTSYS when ^C | ||
472 | * -EINVAL when unknown error condition | ||
473 | * | ||
474 | * When an error is returned the called needs to ensure that | ||
475 | * purge_ddcb() is being called to get the &req removed from the | ||
476 | * queue. | ||
477 | */ | ||
478 | int __genwqe_wait_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) | ||
479 | { | ||
480 | int rc; | ||
481 | unsigned int ddcb_no; | ||
482 | struct ddcb_queue *queue; | ||
483 | struct pci_dev *pci_dev = cd->pci_dev; | ||
484 | |||
485 | if (req == NULL) | ||
486 | return -EINVAL; | ||
487 | |||
488 | queue = req->queue; | ||
489 | if (queue == NULL) | ||
490 | return -EINVAL; | ||
491 | |||
492 | ddcb_no = req->num; | ||
493 | if (ddcb_no >= queue->ddcb_max) | ||
494 | return -EINVAL; | ||
495 | |||
496 | rc = wait_event_interruptible_timeout(queue->ddcb_waitqs[ddcb_no], | ||
497 | ddcb_requ_finished(cd, req), | ||
498 | genwqe_ddcb_software_timeout * HZ); | ||
499 | |||
500 | /* | ||
501 | * We need to distinguish 3 cases here: | ||
502 | * 1. rc == 0 timeout occured | ||
503 | * 2. rc == -ERESTARTSYS signal received | ||
504 | * 3. rc > 0 remaining jiffies condition is true | ||
505 | */ | ||
506 | if (rc == 0) { | ||
507 | struct ddcb_queue *queue = req->queue; | ||
508 | struct ddcb *pddcb; | ||
509 | |||
510 | /* | ||
511 | * Timeout may be caused by long task switching time. | ||
512 | * When timeout happens, check if the request has | ||
513 | * meanwhile completed. | ||
514 | */ | ||
515 | genwqe_check_ddcb_queue(cd, req->queue); | ||
516 | if (ddcb_requ_finished(cd, req)) | ||
517 | return rc; | ||
518 | |||
519 | dev_err(&pci_dev->dev, | ||
520 | "[%s] err: DDCB#%d timeout rc=%d state=%d req @ %p\n", | ||
521 | __func__, req->num, rc, ddcb_requ_get_state(req), | ||
522 | req); | ||
523 | dev_err(&pci_dev->dev, | ||
524 | "[%s] IO_QUEUE_STATUS=0x%016llx\n", __func__, | ||
525 | __genwqe_readq(cd, queue->IO_QUEUE_STATUS)); | ||
526 | |||
527 | pddcb = &queue->ddcb_vaddr[req->num]; | ||
528 | genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); | ||
529 | |||
530 | print_ddcb_info(cd, req->queue); | ||
531 | return -ETIMEDOUT; | ||
532 | |||
533 | } else if (rc == -ERESTARTSYS) { | ||
534 | return rc; | ||
535 | /* | ||
536 | * EINTR: Stops the application | ||
537 | * ERESTARTSYS: Restartable systemcall; called again | ||
538 | */ | ||
539 | |||
540 | } else if (rc < 0) { | ||
541 | dev_err(&pci_dev->dev, | ||
542 | "[%s] err: DDCB#%d unknown result (rc=%d) %d!\n", | ||
543 | __func__, req->num, rc, ddcb_requ_get_state(req)); | ||
544 | return -EINVAL; | ||
545 | } | ||
546 | |||
547 | /* Severe error occured. Driver is forced to stop operation */ | ||
548 | if (cd->card_state != GENWQE_CARD_USED) { | ||
549 | dev_err(&pci_dev->dev, | ||
550 | "[%s] err: DDCB#%d forced to stop (rc=%d)\n", | ||
551 | __func__, req->num, rc); | ||
552 | return -EIO; | ||
553 | } | ||
554 | return rc; | ||
555 | } | ||
556 | |||
557 | /** | ||
558 | * get_next_ddcb() - Get next available DDCB | ||
559 | * @cd: pointer to genwqe device descriptor | ||
560 | * | ||
561 | * DDCB's content is completely cleared but presets for PRE and | ||
562 | * SEQNUM. This function must only be called when ddcb_lock is held. | ||
563 | * | ||
564 | * Return: NULL if no empty DDCB available otherwise ptr to next DDCB. | ||
565 | */ | ||
566 | static struct ddcb *get_next_ddcb(struct genwqe_dev *cd, | ||
567 | struct ddcb_queue *queue, | ||
568 | int *num) | ||
569 | { | ||
570 | u64 *pu64; | ||
571 | struct ddcb *pddcb; | ||
572 | |||
573 | if (queue_free_ddcbs(queue) == 0) /* queue is full */ | ||
574 | return NULL; | ||
575 | |||
576 | /* find new ddcb */ | ||
577 | pddcb = &queue->ddcb_vaddr[queue->ddcb_next]; | ||
578 | |||
579 | /* if it is not completed, we are not allowed to use it */ | ||
580 | /* barrier(); */ | ||
581 | if ((pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) == 0x00000000) | ||
582 | return NULL; | ||
583 | |||
584 | *num = queue->ddcb_next; /* internal DDCB number */ | ||
585 | queue->ddcb_next = (queue->ddcb_next + 1) % queue->ddcb_max; | ||
586 | |||
587 | /* clear important DDCB fields */ | ||
588 | pu64 = (u64 *)pddcb; | ||
589 | pu64[0] = 0ULL; /* offs 0x00 (ICRC,HSI,SHI,...) */ | ||
590 | pu64[1] = 0ULL; /* offs 0x01 (ACFUNC,CMD...) */ | ||
591 | |||
592 | /* destroy previous results in ASV */ | ||
593 | pu64[0x80/8] = 0ULL; /* offs 0x80 (ASV + 0) */ | ||
594 | pu64[0x88/8] = 0ULL; /* offs 0x88 (ASV + 0x08) */ | ||
595 | pu64[0x90/8] = 0ULL; /* offs 0x90 (ASV + 0x10) */ | ||
596 | pu64[0x98/8] = 0ULL; /* offs 0x98 (ASV + 0x18) */ | ||
597 | pu64[0xd0/8] = 0ULL; /* offs 0xd0 (RETC,ATTN...) */ | ||
598 | |||
599 | pddcb->pre = DDCB_PRESET_PRE; /* 128 */ | ||
600 | pddcb->seqnum_16 = cpu_to_be16(queue->ddcb_seq++); | ||
601 | return pddcb; | ||
602 | } | ||
603 | |||
604 | /** | ||
605 | * __genwqe_purge_ddcb() - Remove a DDCB from the workqueue | ||
606 | * @cd: genwqe device descriptor | ||
607 | * @req: DDCB request | ||
608 | * | ||
609 | * This will fail when the request was already FETCHED. In this case | ||
610 | * we need to wait until it is finished. Else the DDCB can be | ||
611 | * reused. This function also ensures that the request data structure | ||
612 | * is removed from ddcb_req[]. | ||
613 | * | ||
614 | * Do not forget to call this function when genwqe_wait_ddcb() fails, | ||
615 | * such that the request gets really removed from ddcb_req[]. | ||
616 | * | ||
617 | * Return: 0 success | ||
618 | */ | ||
619 | int __genwqe_purge_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) | ||
620 | { | ||
621 | struct ddcb *pddcb = NULL; | ||
622 | unsigned int t; | ||
623 | unsigned long flags; | ||
624 | struct ddcb_queue *queue = req->queue; | ||
625 | struct pci_dev *pci_dev = cd->pci_dev; | ||
626 | u32 icrc_hsi_shi = 0x0000; | ||
627 | u64 queue_status; | ||
628 | u32 old, new; | ||
629 | |||
630 | /* unsigned long flags; */ | ||
631 | if (genwqe_ddcb_software_timeout <= 0) { | ||
632 | dev_err(&pci_dev->dev, | ||
633 | "[%s] err: software timeout is not set!\n", __func__); | ||
634 | return -EFAULT; | ||
635 | } | ||
636 | |||
637 | pddcb = &queue->ddcb_vaddr[req->num]; | ||
638 | |||
639 | for (t = 0; t < genwqe_ddcb_software_timeout * 10; t++) { | ||
640 | |||
641 | spin_lock_irqsave(&queue->ddcb_lock, flags); | ||
642 | |||
643 | /* Check if req was meanwhile finished */ | ||
644 | if (ddcb_requ_get_state(req) == GENWQE_REQU_FINISHED) | ||
645 | goto go_home; | ||
646 | |||
647 | /* try to set PURGE bit if FETCHED/COMPLETED are not set */ | ||
648 | old = pddcb->icrc_hsi_shi_32; /* read SHI/HSI in BE32 */ | ||
649 | if ((old & DDCB_FETCHED_BE32) == 0x00000000) { | ||
650 | |||
651 | new = (old | DDCB_PURGE_BE32); | ||
652 | icrc_hsi_shi = cmpxchg(&pddcb->icrc_hsi_shi_32, | ||
653 | old, new); | ||
654 | if (icrc_hsi_shi == old) | ||
655 | goto finish_ddcb; | ||
656 | } | ||
657 | |||
658 | /* normal finish with HSI bit */ | ||
659 | barrier(); | ||
660 | icrc_hsi_shi = pddcb->icrc_hsi_shi_32; | ||
661 | if (icrc_hsi_shi & DDCB_COMPLETED_BE32) | ||
662 | goto finish_ddcb; | ||
663 | |||
664 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
665 | |||
666 | /* | ||
667 | * Here the check_ddcb() function will most likely | ||
668 | * discover this DDCB to be finished some point in | ||
669 | * time. It will mark the req finished and free it up | ||
670 | * in the list. | ||
671 | */ | ||
672 | |||
673 | copy_ddcb_results(req, req->num); /* for the failing case */ | ||
674 | msleep(100); /* sleep for 1/10 second and try again */ | ||
675 | continue; | ||
676 | |||
677 | finish_ddcb: | ||
678 | copy_ddcb_results(req, req->num); | ||
679 | ddcb_requ_set_state(req, GENWQE_REQU_FINISHED); | ||
680 | queue->ddcbs_in_flight--; | ||
681 | queue->ddcb_req[req->num] = NULL; /* delete from array */ | ||
682 | ddcb_mark_cleared(pddcb); | ||
683 | |||
684 | /* Move active DDCB further; Nothing to do here anymore. */ | ||
685 | |||
686 | /* | ||
687 | * We need to ensure that there is at least one free | ||
688 | * DDCB in the queue. To do that, we must update | ||
689 | * ddcb_act only if the COMPLETED bit is set for the | ||
690 | * DDCB we are working on else we treat that DDCB even | ||
691 | * if we PURGED it as occupied (hardware is supposed | ||
692 | * to set the COMPLETED bit yet!). | ||
693 | */ | ||
694 | icrc_hsi_shi = pddcb->icrc_hsi_shi_32; | ||
695 | if ((icrc_hsi_shi & DDCB_COMPLETED_BE32) && | ||
696 | (queue->ddcb_act == req->num)) { | ||
697 | queue->ddcb_act = ((queue->ddcb_act + 1) % | ||
698 | queue->ddcb_max); | ||
699 | } | ||
700 | go_home: | ||
701 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
702 | return 0; | ||
703 | } | ||
704 | |||
705 | /* | ||
706 | * If the card is dead and the queue is forced to stop, we | ||
707 | * might see this in the queue status register. | ||
708 | */ | ||
709 | queue_status = __genwqe_readq(cd, queue->IO_QUEUE_STATUS); | ||
710 | |||
711 | dev_dbg(&pci_dev->dev, "UN/FINISHED DDCB#%d\n", req->num); | ||
712 | genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); | ||
713 | |||
714 | dev_err(&pci_dev->dev, | ||
715 | "[%s] err: DDCB#%d not purged and not completed " | ||
716 | "after %d seconds QSTAT=%016llx!!\n", | ||
717 | __func__, req->num, genwqe_ddcb_software_timeout, | ||
718 | queue_status); | ||
719 | |||
720 | print_ddcb_info(cd, req->queue); | ||
721 | |||
722 | return -EFAULT; | ||
723 | } | ||
724 | |||
725 | int genwqe_init_debug_data(struct genwqe_dev *cd, struct genwqe_debug_data *d) | ||
726 | { | ||
727 | int len; | ||
728 | struct pci_dev *pci_dev = cd->pci_dev; | ||
729 | |||
730 | if (d == NULL) { | ||
731 | dev_err(&pci_dev->dev, | ||
732 | "[%s] err: invalid memory for debug data!\n", | ||
733 | __func__); | ||
734 | return -EFAULT; | ||
735 | } | ||
736 | |||
737 | len = sizeof(d->driver_version); | ||
738 | snprintf(d->driver_version, len, "%s", DRV_VERS_STRING); | ||
739 | d->slu_unitcfg = cd->slu_unitcfg; | ||
740 | d->app_unitcfg = cd->app_unitcfg; | ||
741 | return 0; | ||
742 | } | ||
743 | |||
744 | /** | ||
745 | * __genwqe_enqueue_ddcb() - Enqueue a DDCB | ||
746 | * @cd: pointer to genwqe device descriptor | ||
747 | * @req: pointer to DDCB execution request | ||
748 | * | ||
749 | * Return: 0 if enqueuing succeeded | ||
750 | * -EIO if card is unusable/PCIe problems | ||
751 | * -EBUSY if enqueuing failed | ||
752 | */ | ||
753 | int __genwqe_enqueue_ddcb(struct genwqe_dev *cd, struct ddcb_requ *req) | ||
754 | { | ||
755 | struct ddcb *pddcb; | ||
756 | unsigned long flags; | ||
757 | struct ddcb_queue *queue; | ||
758 | struct pci_dev *pci_dev = cd->pci_dev; | ||
759 | u16 icrc; | ||
760 | |||
761 | if (cd->card_state != GENWQE_CARD_USED) { | ||
762 | printk_ratelimited(KERN_ERR | ||
763 | "%s %s: [%s] Card is unusable/PCIe problem Req#%d\n", | ||
764 | GENWQE_DEVNAME, dev_name(&pci_dev->dev), | ||
765 | __func__, req->num); | ||
766 | return -EIO; | ||
767 | } | ||
768 | |||
769 | queue = req->queue = &cd->queue; | ||
770 | |||
771 | /* FIXME circumvention to improve performance when no irq is | ||
772 | * there. | ||
773 | */ | ||
774 | if (genwqe_polling_enabled) | ||
775 | genwqe_check_ddcb_queue(cd, queue); | ||
776 | |||
777 | /* | ||
778 | * It must be ensured to process all DDCBs in successive | ||
779 | * order. Use a lock here in order to prevent nested DDCB | ||
780 | * enqueuing. | ||
781 | */ | ||
782 | spin_lock_irqsave(&queue->ddcb_lock, flags); | ||
783 | |||
784 | pddcb = get_next_ddcb(cd, queue, &req->num); /* get ptr and num */ | ||
785 | if (pddcb == NULL) { | ||
786 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
787 | queue->busy++; | ||
788 | return -EBUSY; | ||
789 | } | ||
790 | |||
791 | if (queue->ddcb_req[req->num] != NULL) { | ||
792 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
793 | |||
794 | dev_err(&pci_dev->dev, | ||
795 | "[%s] picked DDCB %d with req=%p still in use!!\n", | ||
796 | __func__, req->num, req); | ||
797 | return -EFAULT; | ||
798 | } | ||
799 | ddcb_requ_set_state(req, GENWQE_REQU_ENQUEUED); | ||
800 | queue->ddcb_req[req->num] = req; | ||
801 | |||
802 | pddcb->cmdopts_16 = cpu_to_be16(req->cmd.cmdopts); | ||
803 | pddcb->cmd = req->cmd.cmd; | ||
804 | pddcb->acfunc = req->cmd.acfunc; /* functional unit */ | ||
805 | |||
806 | /* | ||
807 | * We know that we can get retc 0x104 with CRC error, do not | ||
808 | * stop the queue in those cases for this command. XDIR = 1 | ||
809 | * does not work for old SLU versions. | ||
810 | * | ||
811 | * Last bitstream with the old XDIR behavior had SLU_ID | ||
812 | * 0x34199. | ||
813 | */ | ||
814 | if ((cd->slu_unitcfg & 0xFFFF0ull) > 0x34199ull) | ||
815 | pddcb->xdir = 0x1; | ||
816 | else | ||
817 | pddcb->xdir = 0x0; | ||
818 | |||
819 | |||
820 | pddcb->psp = (((req->cmd.asiv_length / 8) << 4) | | ||
821 | ((req->cmd.asv_length / 8))); | ||
822 | pddcb->disp_ts_64 = cpu_to_be64(req->cmd.disp_ts); | ||
823 | |||
824 | /* | ||
825 | * If copying the whole DDCB_ASIV_LENGTH is impacting | ||
826 | * performance we need to change it to | ||
827 | * req->cmd.asiv_length. But simulation benefits from some | ||
828 | * non-architectured bits behind the architectured content. | ||
829 | * | ||
830 | * How much data is copied depends on the availability of the | ||
831 | * ATS field, which was introduced late. If the ATS field is | ||
832 | * supported ASIV is 8 bytes shorter than it used to be. Since | ||
833 | * the ATS field is copied too, the code should do exactly | ||
834 | * what it did before, but I wanted to make copying of the ATS | ||
835 | * field very explicit. | ||
836 | */ | ||
837 | if (genwqe_get_slu_id(cd) <= 0x2) { | ||
838 | memcpy(&pddcb->__asiv[0], /* destination */ | ||
839 | &req->cmd.__asiv[0], /* source */ | ||
840 | DDCB_ASIV_LENGTH); /* req->cmd.asiv_length */ | ||
841 | } else { | ||
842 | pddcb->n.ats_64 = req->cmd.ats; | ||
843 | memcpy(&pddcb->n.asiv[0], /* destination */ | ||
844 | &req->cmd.asiv[0], /* source */ | ||
845 | DDCB_ASIV_LENGTH_ATS); /* req->cmd.asiv_length */ | ||
846 | } | ||
847 | |||
848 | pddcb->icrc_hsi_shi_32 = cpu_to_be32(0x00000000); /* for crc */ | ||
849 | |||
850 | /* | ||
851 | * Calculate CRC_16 for corresponding range PSP(7:4). Include | ||
852 | * empty 4 bytes prior to the data. | ||
853 | */ | ||
854 | icrc = genwqe_crc16((const u8 *)pddcb, | ||
855 | ICRC_LENGTH(req->cmd.asiv_length), 0xffff); | ||
856 | pddcb->icrc_hsi_shi_32 = cpu_to_be32((u32)icrc << 16); | ||
857 | |||
858 | /* enable DDCB completion irq */ | ||
859 | if (!genwqe_polling_enabled) | ||
860 | pddcb->icrc_hsi_shi_32 |= DDCB_INTR_BE32; | ||
861 | |||
862 | dev_dbg(&pci_dev->dev, "INPUT DDCB#%d\n", req->num); | ||
863 | genwqe_hexdump(pci_dev, pddcb, sizeof(*pddcb)); | ||
864 | |||
865 | if (ddcb_requ_collect_debug_data(req)) { | ||
866 | /* use the kernel copy of debug data. copying back to | ||
867 | user buffer happens later */ | ||
868 | |||
869 | genwqe_init_debug_data(cd, &req->debug_data); | ||
870 | memcpy(&req->debug_data.ddcb_before, pddcb, | ||
871 | sizeof(req->debug_data.ddcb_before)); | ||
872 | } | ||
873 | |||
874 | enqueue_ddcb(cd, queue, pddcb, req->num); | ||
875 | queue->ddcbs_in_flight++; | ||
876 | |||
877 | if (queue->ddcbs_in_flight > queue->ddcbs_max_in_flight) | ||
878 | queue->ddcbs_max_in_flight = queue->ddcbs_in_flight; | ||
879 | |||
880 | ddcb_requ_set_state(req, GENWQE_REQU_TAPPED); | ||
881 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
882 | wake_up_interruptible(&cd->queue_waitq); | ||
883 | |||
884 | return 0; | ||
885 | } | ||
886 | |||
887 | /** | ||
888 | * __genwqe_execute_raw_ddcb() - Setup and execute DDCB | ||
889 | * @cd: pointer to genwqe device descriptor | ||
890 | * @req: user provided DDCB request | ||
891 | */ | ||
892 | int __genwqe_execute_raw_ddcb(struct genwqe_dev *cd, | ||
893 | struct genwqe_ddcb_cmd *cmd) | ||
894 | { | ||
895 | int rc = 0; | ||
896 | struct pci_dev *pci_dev = cd->pci_dev; | ||
897 | struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); | ||
898 | |||
899 | if (cmd->asiv_length > DDCB_ASIV_LENGTH) { | ||
900 | dev_err(&pci_dev->dev, "[%s] err: wrong asiv_length of %d\n", | ||
901 | __func__, cmd->asiv_length); | ||
902 | return -EINVAL; | ||
903 | } | ||
904 | if (cmd->asv_length > DDCB_ASV_LENGTH) { | ||
905 | dev_err(&pci_dev->dev, "[%s] err: wrong asv_length of %d\n", | ||
906 | __func__, cmd->asiv_length); | ||
907 | return -EINVAL; | ||
908 | } | ||
909 | rc = __genwqe_enqueue_ddcb(cd, req); | ||
910 | if (rc != 0) | ||
911 | return rc; | ||
912 | |||
913 | rc = __genwqe_wait_ddcb(cd, req); | ||
914 | if (rc < 0) /* error or signal interrupt */ | ||
915 | goto err_exit; | ||
916 | |||
917 | if (ddcb_requ_collect_debug_data(req)) { | ||
918 | if (copy_to_user((void __user *)cmd->ddata_addr, | ||
919 | &req->debug_data, | ||
920 | sizeof(struct genwqe_debug_data))) | ||
921 | return -EFAULT; | ||
922 | } | ||
923 | |||
924 | /* | ||
925 | * Higher values than 0x102 indicate completion with faults, | ||
926 | * lower values than 0x102 indicate processing faults. Note | ||
927 | * that DDCB might have been purged. E.g. Cntl+C. | ||
928 | */ | ||
929 | if (cmd->retc != DDCB_RETC_COMPLETE) { | ||
930 | /* This might happen e.g. flash read, and needs to be | ||
931 | handled by the upper layer code. */ | ||
932 | rc = -EBADMSG; /* not processed/error retc */ | ||
933 | } | ||
934 | |||
935 | return rc; | ||
936 | |||
937 | err_exit: | ||
938 | __genwqe_purge_ddcb(cd, req); | ||
939 | |||
940 | if (ddcb_requ_collect_debug_data(req)) { | ||
941 | if (copy_to_user((void __user *)cmd->ddata_addr, | ||
942 | &req->debug_data, | ||
943 | sizeof(struct genwqe_debug_data))) | ||
944 | return -EFAULT; | ||
945 | } | ||
946 | return rc; | ||
947 | } | ||
948 | |||
949 | /** | ||
950 | * genwqe_next_ddcb_ready() - Figure out if the next DDCB is already finished | ||
951 | * | ||
952 | * We use this as condition for our wait-queue code. | ||
953 | */ | ||
954 | static int genwqe_next_ddcb_ready(struct genwqe_dev *cd) | ||
955 | { | ||
956 | unsigned long flags; | ||
957 | struct ddcb *pddcb; | ||
958 | struct ddcb_queue *queue = &cd->queue; | ||
959 | |||
960 | spin_lock_irqsave(&queue->ddcb_lock, flags); | ||
961 | |||
962 | if (queue_empty(queue)) { /* emtpy queue */ | ||
963 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
964 | return 0; | ||
965 | } | ||
966 | |||
967 | pddcb = &queue->ddcb_vaddr[queue->ddcb_act]; | ||
968 | if (pddcb->icrc_hsi_shi_32 & DDCB_COMPLETED_BE32) { /* ddcb ready */ | ||
969 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
970 | return 1; | ||
971 | } | ||
972 | |||
973 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
974 | return 0; | ||
975 | } | ||
976 | |||
977 | /** | ||
978 | * genwqe_ddcbs_in_flight() - Check how many DDCBs are in flight | ||
979 | * | ||
980 | * Keep track on the number of DDCBs which ware currently in the | ||
981 | * queue. This is needed for statistics as well as conditon if we want | ||
982 | * to wait or better do polling in case of no interrupts available. | ||
983 | */ | ||
984 | int genwqe_ddcbs_in_flight(struct genwqe_dev *cd) | ||
985 | { | ||
986 | unsigned long flags; | ||
987 | int ddcbs_in_flight = 0; | ||
988 | struct ddcb_queue *queue = &cd->queue; | ||
989 | |||
990 | spin_lock_irqsave(&queue->ddcb_lock, flags); | ||
991 | ddcbs_in_flight += queue->ddcbs_in_flight; | ||
992 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
993 | |||
994 | return ddcbs_in_flight; | ||
995 | } | ||
996 | |||
997 | static int setup_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) | ||
998 | { | ||
999 | int rc, i; | ||
1000 | struct ddcb *pddcb; | ||
1001 | u64 val64; | ||
1002 | unsigned int queue_size; | ||
1003 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1004 | |||
1005 | if (genwqe_ddcb_max < 2) | ||
1006 | return -EINVAL; | ||
1007 | |||
1008 | queue_size = roundup(genwqe_ddcb_max * sizeof(struct ddcb), PAGE_SIZE); | ||
1009 | |||
1010 | queue->ddcbs_in_flight = 0; /* statistics */ | ||
1011 | queue->ddcbs_max_in_flight = 0; | ||
1012 | queue->ddcbs_completed = 0; | ||
1013 | queue->busy = 0; | ||
1014 | |||
1015 | queue->ddcb_seq = 0x100; /* start sequence number */ | ||
1016 | queue->ddcb_max = genwqe_ddcb_max; /* module parameter */ | ||
1017 | queue->ddcb_vaddr = __genwqe_alloc_consistent(cd, queue_size, | ||
1018 | &queue->ddcb_daddr); | ||
1019 | if (queue->ddcb_vaddr == NULL) { | ||
1020 | dev_err(&pci_dev->dev, | ||
1021 | "[%s] **err: could not allocate DDCB **\n", __func__); | ||
1022 | return -ENOMEM; | ||
1023 | } | ||
1024 | memset(queue->ddcb_vaddr, 0, queue_size); | ||
1025 | |||
1026 | queue->ddcb_req = kzalloc(sizeof(struct ddcb_requ *) * | ||
1027 | queue->ddcb_max, GFP_KERNEL); | ||
1028 | if (!queue->ddcb_req) { | ||
1029 | rc = -ENOMEM; | ||
1030 | goto free_ddcbs; | ||
1031 | } | ||
1032 | |||
1033 | queue->ddcb_waitqs = kzalloc(sizeof(wait_queue_head_t) * | ||
1034 | queue->ddcb_max, GFP_KERNEL); | ||
1035 | if (!queue->ddcb_waitqs) { | ||
1036 | rc = -ENOMEM; | ||
1037 | goto free_requs; | ||
1038 | } | ||
1039 | |||
1040 | for (i = 0; i < queue->ddcb_max; i++) { | ||
1041 | pddcb = &queue->ddcb_vaddr[i]; /* DDCBs */ | ||
1042 | pddcb->icrc_hsi_shi_32 = DDCB_COMPLETED_BE32; | ||
1043 | pddcb->retc_16 = cpu_to_be16(0xfff); | ||
1044 | |||
1045 | queue->ddcb_req[i] = NULL; /* requests */ | ||
1046 | init_waitqueue_head(&queue->ddcb_waitqs[i]); /* waitqueues */ | ||
1047 | } | ||
1048 | |||
1049 | queue->ddcb_act = 0; | ||
1050 | queue->ddcb_next = 0; /* queue is empty */ | ||
1051 | |||
1052 | spin_lock_init(&queue->ddcb_lock); | ||
1053 | init_waitqueue_head(&queue->ddcb_waitq); | ||
1054 | |||
1055 | val64 = ((u64)(queue->ddcb_max - 1) << 8); /* lastptr */ | ||
1056 | __genwqe_writeq(cd, queue->IO_QUEUE_CONFIG, 0x07); /* iCRC/vCRC */ | ||
1057 | __genwqe_writeq(cd, queue->IO_QUEUE_SEGMENT, queue->ddcb_daddr); | ||
1058 | __genwqe_writeq(cd, queue->IO_QUEUE_INITSQN, queue->ddcb_seq); | ||
1059 | __genwqe_writeq(cd, queue->IO_QUEUE_WRAP, val64); | ||
1060 | return 0; | ||
1061 | |||
1062 | free_requs: | ||
1063 | kfree(queue->ddcb_req); | ||
1064 | queue->ddcb_req = NULL; | ||
1065 | free_ddcbs: | ||
1066 | __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr, | ||
1067 | queue->ddcb_daddr); | ||
1068 | queue->ddcb_vaddr = NULL; | ||
1069 | queue->ddcb_daddr = 0ull; | ||
1070 | return -ENODEV; | ||
1071 | |||
1072 | } | ||
1073 | |||
1074 | static int ddcb_queue_initialized(struct ddcb_queue *queue) | ||
1075 | { | ||
1076 | return queue->ddcb_vaddr != NULL; | ||
1077 | } | ||
1078 | |||
1079 | static void free_ddcb_queue(struct genwqe_dev *cd, struct ddcb_queue *queue) | ||
1080 | { | ||
1081 | unsigned int queue_size; | ||
1082 | |||
1083 | queue_size = roundup(queue->ddcb_max * sizeof(struct ddcb), PAGE_SIZE); | ||
1084 | |||
1085 | kfree(queue->ddcb_req); | ||
1086 | queue->ddcb_req = NULL; | ||
1087 | |||
1088 | if (queue->ddcb_vaddr) { | ||
1089 | __genwqe_free_consistent(cd, queue_size, queue->ddcb_vaddr, | ||
1090 | queue->ddcb_daddr); | ||
1091 | queue->ddcb_vaddr = NULL; | ||
1092 | queue->ddcb_daddr = 0ull; | ||
1093 | } | ||
1094 | } | ||
1095 | |||
1096 | static irqreturn_t genwqe_pf_isr(int irq, void *dev_id) | ||
1097 | { | ||
1098 | u64 gfir; | ||
1099 | struct genwqe_dev *cd = (struct genwqe_dev *)dev_id; | ||
1100 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1101 | |||
1102 | /* | ||
1103 | * In case of fatal FIR error the queue is stopped, such that | ||
1104 | * we can safely check it without risking anything. | ||
1105 | */ | ||
1106 | cd->irqs_processed++; | ||
1107 | wake_up_interruptible(&cd->queue_waitq); | ||
1108 | |||
1109 | /* | ||
1110 | * Checking for errors before kicking the queue might be | ||
1111 | * safer, but slower for the good-case ... See above. | ||
1112 | */ | ||
1113 | gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); | ||
1114 | if ((gfir & GFIR_ERR_TRIGGER) != 0x0) { | ||
1115 | |||
1116 | wake_up_interruptible(&cd->health_waitq); | ||
1117 | |||
1118 | /* | ||
1119 | * By default GFIRs causes recovery actions. This | ||
1120 | * count is just for debug when recovery is masked. | ||
1121 | */ | ||
1122 | printk_ratelimited(KERN_ERR | ||
1123 | "%s %s: [%s] GFIR=%016llx\n", | ||
1124 | GENWQE_DEVNAME, dev_name(&pci_dev->dev), | ||
1125 | __func__, gfir); | ||
1126 | } | ||
1127 | |||
1128 | return IRQ_HANDLED; | ||
1129 | } | ||
1130 | |||
1131 | static irqreturn_t genwqe_vf_isr(int irq, void *dev_id) | ||
1132 | { | ||
1133 | struct genwqe_dev *cd = (struct genwqe_dev *)dev_id; | ||
1134 | |||
1135 | cd->irqs_processed++; | ||
1136 | wake_up_interruptible(&cd->queue_waitq); | ||
1137 | |||
1138 | return IRQ_HANDLED; | ||
1139 | } | ||
1140 | |||
1141 | /** | ||
1142 | * genwqe_card_thread() - Work thread for the DDCB queue | ||
1143 | * | ||
1144 | * The idea is to check if there are DDCBs in processing. If there are | ||
1145 | * some finished DDCBs, we process them and wakeup the | ||
1146 | * requestors. Otherwise we give other processes time using | ||
1147 | * cond_resched(). | ||
1148 | */ | ||
1149 | static int genwqe_card_thread(void *data) | ||
1150 | { | ||
1151 | int should_stop = 0, rc = 0; | ||
1152 | struct genwqe_dev *cd = (struct genwqe_dev *)data; | ||
1153 | |||
1154 | while (!kthread_should_stop()) { | ||
1155 | |||
1156 | genwqe_check_ddcb_queue(cd, &cd->queue); | ||
1157 | |||
1158 | if (genwqe_polling_enabled) { | ||
1159 | rc = wait_event_interruptible_timeout( | ||
1160 | cd->queue_waitq, | ||
1161 | genwqe_ddcbs_in_flight(cd) || | ||
1162 | (should_stop = kthread_should_stop()), 1); | ||
1163 | } else { | ||
1164 | rc = wait_event_interruptible_timeout( | ||
1165 | cd->queue_waitq, | ||
1166 | genwqe_next_ddcb_ready(cd) || | ||
1167 | (should_stop = kthread_should_stop()), HZ); | ||
1168 | } | ||
1169 | if (should_stop) | ||
1170 | break; | ||
1171 | |||
1172 | /* | ||
1173 | * Avoid soft lockups on heavy loads; we do not want | ||
1174 | * to disable our interrupts. | ||
1175 | */ | ||
1176 | cond_resched(); | ||
1177 | } | ||
1178 | return 0; | ||
1179 | } | ||
1180 | |||
1181 | /** | ||
1182 | * genwqe_setup_service_layer() - Setup DDCB queue | ||
1183 | * @cd: pointer to genwqe device descriptor | ||
1184 | * | ||
1185 | * Allocate DDCBs. Configure Service Layer Controller (SLC). | ||
1186 | * | ||
1187 | * Return: 0 success | ||
1188 | */ | ||
1189 | int genwqe_setup_service_layer(struct genwqe_dev *cd) | ||
1190 | { | ||
1191 | int rc; | ||
1192 | struct ddcb_queue *queue; | ||
1193 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1194 | |||
1195 | if (genwqe_is_privileged(cd)) { | ||
1196 | rc = genwqe_card_reset(cd); | ||
1197 | if (rc < 0) { | ||
1198 | dev_err(&pci_dev->dev, | ||
1199 | "[%s] err: reset failed.\n", __func__); | ||
1200 | return rc; | ||
1201 | } | ||
1202 | genwqe_read_softreset(cd); | ||
1203 | } | ||
1204 | |||
1205 | queue = &cd->queue; | ||
1206 | queue->IO_QUEUE_CONFIG = IO_SLC_QUEUE_CONFIG; | ||
1207 | queue->IO_QUEUE_STATUS = IO_SLC_QUEUE_STATUS; | ||
1208 | queue->IO_QUEUE_SEGMENT = IO_SLC_QUEUE_SEGMENT; | ||
1209 | queue->IO_QUEUE_INITSQN = IO_SLC_QUEUE_INITSQN; | ||
1210 | queue->IO_QUEUE_OFFSET = IO_SLC_QUEUE_OFFSET; | ||
1211 | queue->IO_QUEUE_WRAP = IO_SLC_QUEUE_WRAP; | ||
1212 | queue->IO_QUEUE_WTIME = IO_SLC_QUEUE_WTIME; | ||
1213 | queue->IO_QUEUE_ERRCNTS = IO_SLC_QUEUE_ERRCNTS; | ||
1214 | queue->IO_QUEUE_LRW = IO_SLC_QUEUE_LRW; | ||
1215 | |||
1216 | rc = setup_ddcb_queue(cd, queue); | ||
1217 | if (rc != 0) { | ||
1218 | rc = -ENODEV; | ||
1219 | goto err_out; | ||
1220 | } | ||
1221 | |||
1222 | init_waitqueue_head(&cd->queue_waitq); | ||
1223 | cd->card_thread = kthread_run(genwqe_card_thread, cd, | ||
1224 | GENWQE_DEVNAME "%d_thread", | ||
1225 | cd->card_idx); | ||
1226 | if (IS_ERR(cd->card_thread)) { | ||
1227 | rc = PTR_ERR(cd->card_thread); | ||
1228 | cd->card_thread = NULL; | ||
1229 | goto stop_free_queue; | ||
1230 | } | ||
1231 | |||
1232 | rc = genwqe_set_interrupt_capability(cd, GENWQE_MSI_IRQS); | ||
1233 | if (rc > 0) | ||
1234 | rc = genwqe_set_interrupt_capability(cd, rc); | ||
1235 | if (rc != 0) { | ||
1236 | rc = -ENODEV; | ||
1237 | goto stop_kthread; | ||
1238 | } | ||
1239 | |||
1240 | /* | ||
1241 | * We must have all wait-queues initialized when we enable the | ||
1242 | * interrupts. Otherwise we might crash if we get an early | ||
1243 | * irq. | ||
1244 | */ | ||
1245 | init_waitqueue_head(&cd->health_waitq); | ||
1246 | |||
1247 | if (genwqe_is_privileged(cd)) { | ||
1248 | rc = request_irq(pci_dev->irq, genwqe_pf_isr, IRQF_SHARED, | ||
1249 | GENWQE_DEVNAME, cd); | ||
1250 | } else { | ||
1251 | rc = request_irq(pci_dev->irq, genwqe_vf_isr, IRQF_SHARED, | ||
1252 | GENWQE_DEVNAME, cd); | ||
1253 | } | ||
1254 | if (rc < 0) { | ||
1255 | dev_err(&pci_dev->dev, "irq %d not free.\n", pci_dev->irq); | ||
1256 | goto stop_irq_cap; | ||
1257 | } | ||
1258 | |||
1259 | cd->card_state = GENWQE_CARD_USED; | ||
1260 | return 0; | ||
1261 | |||
1262 | stop_irq_cap: | ||
1263 | genwqe_reset_interrupt_capability(cd); | ||
1264 | stop_kthread: | ||
1265 | kthread_stop(cd->card_thread); | ||
1266 | cd->card_thread = NULL; | ||
1267 | stop_free_queue: | ||
1268 | free_ddcb_queue(cd, queue); | ||
1269 | err_out: | ||
1270 | return rc; | ||
1271 | } | ||
1272 | |||
1273 | /** | ||
1274 | * queue_wake_up_all() - Handles fatal error case | ||
1275 | * | ||
1276 | * The PCI device got unusable and we have to stop all pending | ||
1277 | * requests as fast as we can. The code after this must purge the | ||
1278 | * DDCBs in question and ensure that all mappings are freed. | ||
1279 | */ | ||
1280 | static int queue_wake_up_all(struct genwqe_dev *cd) | ||
1281 | { | ||
1282 | unsigned int i; | ||
1283 | unsigned long flags; | ||
1284 | struct ddcb_queue *queue = &cd->queue; | ||
1285 | |||
1286 | spin_lock_irqsave(&queue->ddcb_lock, flags); | ||
1287 | |||
1288 | for (i = 0; i < queue->ddcb_max; i++) | ||
1289 | wake_up_interruptible(&queue->ddcb_waitqs[queue->ddcb_act]); | ||
1290 | |||
1291 | spin_unlock_irqrestore(&queue->ddcb_lock, flags); | ||
1292 | |||
1293 | return 0; | ||
1294 | } | ||
1295 | |||
1296 | /** | ||
1297 | * genwqe_finish_queue() - Remove any genwqe devices and user-interfaces | ||
1298 | * | ||
1299 | * Relies on the pre-condition that there are no users of the card | ||
1300 | * device anymore e.g. with open file-descriptors. | ||
1301 | * | ||
1302 | * This function must be robust enough to be called twice. | ||
1303 | */ | ||
1304 | int genwqe_finish_queue(struct genwqe_dev *cd) | ||
1305 | { | ||
1306 | int i, rc, in_flight; | ||
1307 | int waitmax = genwqe_ddcb_software_timeout; | ||
1308 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1309 | struct ddcb_queue *queue = &cd->queue; | ||
1310 | |||
1311 | if (!ddcb_queue_initialized(queue)) | ||
1312 | return 0; | ||
1313 | |||
1314 | /* Do not wipe out the error state. */ | ||
1315 | if (cd->card_state == GENWQE_CARD_USED) | ||
1316 | cd->card_state = GENWQE_CARD_UNUSED; | ||
1317 | |||
1318 | /* Wake up all requests in the DDCB queue such that they | ||
1319 | should be removed nicely. */ | ||
1320 | queue_wake_up_all(cd); | ||
1321 | |||
1322 | /* We must wait to get rid of the DDCBs in flight */ | ||
1323 | for (i = 0; i < waitmax; i++) { | ||
1324 | in_flight = genwqe_ddcbs_in_flight(cd); | ||
1325 | |||
1326 | if (in_flight == 0) | ||
1327 | break; | ||
1328 | |||
1329 | dev_dbg(&pci_dev->dev, | ||
1330 | " DEBUG [%d/%d] waiting for queue to get empty: " | ||
1331 | "%d requests!\n", i, waitmax, in_flight); | ||
1332 | |||
1333 | /* | ||
1334 | * Severe severe error situation: The card itself has | ||
1335 | * 16 DDCB queues, each queue has e.g. 32 entries, | ||
1336 | * each DDBC has a hardware timeout of currently 250 | ||
1337 | * msec but the PFs have a hardware timeout of 8 sec | ||
1338 | * ... so I take something large. | ||
1339 | */ | ||
1340 | msleep(1000); | ||
1341 | } | ||
1342 | if (i == waitmax) { | ||
1343 | dev_err(&pci_dev->dev, " [%s] err: queue is not empty!!\n", | ||
1344 | __func__); | ||
1345 | rc = -EIO; | ||
1346 | } | ||
1347 | return rc; | ||
1348 | } | ||
1349 | |||
1350 | /** | ||
1351 | * genwqe_release_service_layer() - Shutdown DDCB queue | ||
1352 | * @cd: genwqe device descriptor | ||
1353 | * | ||
1354 | * This function must be robust enough to be called twice. | ||
1355 | */ | ||
1356 | int genwqe_release_service_layer(struct genwqe_dev *cd) | ||
1357 | { | ||
1358 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1359 | |||
1360 | if (!ddcb_queue_initialized(&cd->queue)) | ||
1361 | return 1; | ||
1362 | |||
1363 | free_irq(pci_dev->irq, cd); | ||
1364 | genwqe_reset_interrupt_capability(cd); | ||
1365 | |||
1366 | if (cd->card_thread != NULL) { | ||
1367 | kthread_stop(cd->card_thread); | ||
1368 | cd->card_thread = NULL; | ||
1369 | } | ||
1370 | |||
1371 | free_ddcb_queue(cd, &cd->queue); | ||
1372 | return 0; | ||
1373 | } | ||
diff --git a/drivers/misc/genwqe/card_ddcb.h b/drivers/misc/genwqe/card_ddcb.h new file mode 100644 index 000000000000..c4f26720753e --- /dev/null +++ b/drivers/misc/genwqe/card_ddcb.h | |||
@@ -0,0 +1,188 @@ | |||
1 | #ifndef __CARD_DDCB_H__ | ||
2 | #define __CARD_DDCB_H__ | ||
3 | |||
4 | /** | ||
5 | * IBM Accelerator Family 'GenWQE' | ||
6 | * | ||
7 | * (C) Copyright IBM Corp. 2013 | ||
8 | * | ||
9 | * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> | ||
10 | * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> | ||
11 | * Author: Michael Jung <mijung@de.ibm.com> | ||
12 | * Author: Michael Ruettger <michael@ibmra.de> | ||
13 | * | ||
14 | * This program is free software; you can redistribute it and/or modify | ||
15 | * it under the terms of the GNU General Public License as published by | ||
16 | * the Free Software Foundation; either version 2, or (at your option) | ||
17 | * any later version. | ||
18 | * | ||
19 | * This program is distributed in the hope that it will be useful, | ||
20 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
21 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
22 | * GNU General Public License for more details. | ||
23 | */ | ||
24 | |||
25 | #include <linux/types.h> | ||
26 | #include <asm/byteorder.h> | ||
27 | |||
28 | #include "genwqe_driver.h" | ||
29 | #include "card_base.h" | ||
30 | |||
31 | /** | ||
32 | * struct ddcb - Device Driver Control Block DDCB | ||
33 | * @hsi: Hardware software interlock | ||
34 | * @shi: Software hardware interlock. Hsi and shi are used to interlock | ||
35 | * software and hardware activities. We are using a compare and | ||
36 | * swap operation to ensure that there are no races when | ||
37 | * activating new DDCBs on the queue, or when we need to | ||
38 | * purge a DDCB from a running queue. | ||
39 | * @acfunc: Accelerator function addresses a unit within the chip | ||
40 | * @cmd: Command to work on | ||
41 | * @cmdopts_16: Options for the command | ||
42 | * @asiv: Input data | ||
43 | * @asv: Output data | ||
44 | * | ||
45 | * The DDCB data format is big endian. Multiple consequtive DDBCs form | ||
46 | * a DDCB queue. | ||
47 | */ | ||
48 | #define ASIV_LENGTH 104 /* Old specification without ATS field */ | ||
49 | #define ASIV_LENGTH_ATS 96 /* New specification with ATS field */ | ||
50 | #define ASV_LENGTH 64 | ||
51 | |||
52 | struct ddcb { | ||
53 | union { | ||
54 | __be32 icrc_hsi_shi_32; /* iCRC, Hardware/SW interlock */ | ||
55 | struct { | ||
56 | __be16 icrc_16; | ||
57 | u8 hsi; | ||
58 | u8 shi; | ||
59 | }; | ||
60 | }; | ||
61 | u8 pre; /* Preamble */ | ||
62 | u8 xdir; /* Execution Directives */ | ||
63 | __be16 seqnum_16; /* Sequence Number */ | ||
64 | |||
65 | u8 acfunc; /* Accelerator Function.. */ | ||
66 | u8 cmd; /* Command. */ | ||
67 | __be16 cmdopts_16; /* Command Options */ | ||
68 | u8 sur; /* Status Update Rate */ | ||
69 | u8 psp; /* Protection Section Pointer */ | ||
70 | __be16 rsvd_0e_16; /* Reserved invariant */ | ||
71 | |||
72 | __be64 fwiv_64; /* Firmware Invariant. */ | ||
73 | |||
74 | union { | ||
75 | struct { | ||
76 | __be64 ats_64; /* Address Translation Spec */ | ||
77 | u8 asiv[ASIV_LENGTH_ATS]; /* New ASIV */ | ||
78 | } n; | ||
79 | u8 __asiv[ASIV_LENGTH]; /* obsolete */ | ||
80 | }; | ||
81 | u8 asv[ASV_LENGTH]; /* Appl Spec Variant */ | ||
82 | |||
83 | __be16 rsvd_c0_16; /* Reserved Variant */ | ||
84 | __be16 vcrc_16; /* Variant CRC */ | ||
85 | __be32 rsvd_32; /* Reserved unprotected */ | ||
86 | |||
87 | __be64 deque_ts_64; /* Deque Time Stamp. */ | ||
88 | |||
89 | __be16 retc_16; /* Return Code */ | ||
90 | __be16 attn_16; /* Attention/Extended Error Codes */ | ||
91 | __be32 progress_32; /* Progress indicator. */ | ||
92 | |||
93 | __be64 cmplt_ts_64; /* Completion Time Stamp. */ | ||
94 | |||
95 | /* The following layout matches the new service layer format */ | ||
96 | __be32 ibdc_32; /* Inbound Data Count (* 256) */ | ||
97 | __be32 obdc_32; /* Outbound Data Count (* 256) */ | ||
98 | |||
99 | __be64 rsvd_SLH_64; /* Reserved for hardware */ | ||
100 | union { /* private data for driver */ | ||
101 | u8 priv[8]; | ||
102 | __be64 priv_64; | ||
103 | }; | ||
104 | __be64 disp_ts_64; /* Dispatch TimeStamp */ | ||
105 | } __attribute__((__packed__)); | ||
106 | |||
107 | /* CRC polynomials for DDCB */ | ||
108 | #define CRC16_POLYNOMIAL 0x1021 | ||
109 | |||
110 | /* | ||
111 | * SHI: Software to Hardware Interlock | ||
112 | * This 1 byte field is written by software to interlock the | ||
113 | * movement of one queue entry to another with the hardware in the | ||
114 | * chip. | ||
115 | */ | ||
116 | #define DDCB_SHI_INTR 0x04 /* Bit 2 */ | ||
117 | #define DDCB_SHI_PURGE 0x02 /* Bit 1 */ | ||
118 | #define DDCB_SHI_NEXT 0x01 /* Bit 0 */ | ||
119 | |||
120 | /* | ||
121 | * HSI: Hardware to Software interlock | ||
122 | * This 1 byte field is written by hardware to interlock the movement | ||
123 | * of one queue entry to another with the software in the chip. | ||
124 | */ | ||
125 | #define DDCB_HSI_COMPLETED 0x40 /* Bit 6 */ | ||
126 | #define DDCB_HSI_FETCHED 0x04 /* Bit 2 */ | ||
127 | |||
128 | /* | ||
129 | * Accessing HSI/SHI is done 32-bit wide | ||
130 | * Normally 16-bit access would work too, but on some platforms the | ||
131 | * 16 compare and swap operation is not supported. Therefore | ||
132 | * switching to 32-bit such that those platforms will work too. | ||
133 | * | ||
134 | * iCRC HSI/SHI | ||
135 | */ | ||
136 | #define DDCB_INTR_BE32 cpu_to_be32(0x00000004) | ||
137 | #define DDCB_PURGE_BE32 cpu_to_be32(0x00000002) | ||
138 | #define DDCB_NEXT_BE32 cpu_to_be32(0x00000001) | ||
139 | #define DDCB_COMPLETED_BE32 cpu_to_be32(0x00004000) | ||
140 | #define DDCB_FETCHED_BE32 cpu_to_be32(0x00000400) | ||
141 | |||
142 | /* Definitions of DDCB presets */ | ||
143 | #define DDCB_PRESET_PRE 0x80 | ||
144 | #define ICRC_LENGTH(n) ((n) + 8 + 8 + 8) /* used ASIV + hdr fields */ | ||
145 | #define VCRC_LENGTH(n) ((n)) /* used ASV */ | ||
146 | |||
147 | /* | ||
148 | * Genwqe Scatter Gather list | ||
149 | * Each element has up to 8 entries. | ||
150 | * The chaining element is element 0 cause of prefetching needs. | ||
151 | */ | ||
152 | |||
153 | /* | ||
154 | * 0b0110 Chained descriptor. The descriptor is describing the next | ||
155 | * descriptor list. | ||
156 | */ | ||
157 | #define SG_CHAINED (0x6) | ||
158 | |||
159 | /* | ||
160 | * 0b0010 First entry of a descriptor list. Start from a Buffer-Empty | ||
161 | * condition. | ||
162 | */ | ||
163 | #define SG_DATA (0x2) | ||
164 | |||
165 | /* | ||
166 | * 0b0000 Early terminator. This is the last entry on the list | ||
167 | * irregardless of the length indicated. | ||
168 | */ | ||
169 | #define SG_END_LIST (0x0) | ||
170 | |||
171 | /** | ||
172 | * struct sglist - Scatter gather list | ||
173 | * @target_addr: Either a dma addr of memory to work on or a | ||
174 | * dma addr or a subsequent sglist block. | ||
175 | * @len: Length of the data block. | ||
176 | * @flags: See above. | ||
177 | * | ||
178 | * Depending on the command the GenWQE card can use a scatter gather | ||
179 | * list to describe the memory it works on. Always 8 sg_entry's form | ||
180 | * a block. | ||
181 | */ | ||
182 | struct sg_entry { | ||
183 | __be64 target_addr; | ||
184 | __be32 len; | ||
185 | __be32 flags; | ||
186 | }; | ||
187 | |||
188 | #endif /* __CARD_DDCB_H__ */ | ||
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c new file mode 100644 index 000000000000..9b231bb1005c --- /dev/null +++ b/drivers/misc/genwqe/card_dev.c | |||
@@ -0,0 +1,1486 @@ | |||
1 | /** | ||
2 | * IBM Accelerator Family 'GenWQE' | ||
3 | * | ||
4 | * (C) Copyright IBM Corp. 2013 | ||
5 | * | ||
6 | * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> | ||
7 | * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> | ||
8 | * Author: Michael Jung <mijung@de.ibm.com> | ||
9 | * Author: Michael Ruettger <michael@ibmra.de> | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License (version 2 only) | ||
13 | * as published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, | ||
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 | * GNU General Public License for more details. | ||
19 | */ | ||
20 | |||
21 | /* | ||
22 | * Character device representation of the GenWQE device. This allows | ||
23 | * user-space applications to communicate with the card. | ||
24 | */ | ||
25 | |||
26 | #include <linux/kernel.h> | ||
27 | #include <linux/types.h> | ||
28 | #include <linux/module.h> | ||
29 | #include <linux/pci.h> | ||
30 | #include <linux/string.h> | ||
31 | #include <linux/fs.h> | ||
32 | #include <linux/sched.h> | ||
33 | #include <linux/wait.h> | ||
34 | #include <linux/delay.h> | ||
35 | #include <linux/atomic.h> | ||
36 | |||
37 | #include "card_base.h" | ||
38 | #include "card_ddcb.h" | ||
39 | |||
40 | static int genwqe_open_files(struct genwqe_dev *cd) | ||
41 | { | ||
42 | int rc; | ||
43 | unsigned long flags; | ||
44 | |||
45 | spin_lock_irqsave(&cd->file_lock, flags); | ||
46 | rc = list_empty(&cd->file_list); | ||
47 | spin_unlock_irqrestore(&cd->file_lock, flags); | ||
48 | return !rc; | ||
49 | } | ||
50 | |||
51 | static void genwqe_add_file(struct genwqe_dev *cd, struct genwqe_file *cfile) | ||
52 | { | ||
53 | unsigned long flags; | ||
54 | |||
55 | cfile->owner = current; | ||
56 | spin_lock_irqsave(&cd->file_lock, flags); | ||
57 | list_add(&cfile->list, &cd->file_list); | ||
58 | spin_unlock_irqrestore(&cd->file_lock, flags); | ||
59 | } | ||
60 | |||
61 | static int genwqe_del_file(struct genwqe_dev *cd, struct genwqe_file *cfile) | ||
62 | { | ||
63 | unsigned long flags; | ||
64 | |||
65 | spin_lock_irqsave(&cd->file_lock, flags); | ||
66 | list_del(&cfile->list); | ||
67 | spin_unlock_irqrestore(&cd->file_lock, flags); | ||
68 | |||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | static void genwqe_add_pin(struct genwqe_file *cfile, struct dma_mapping *m) | ||
73 | { | ||
74 | unsigned long flags; | ||
75 | |||
76 | spin_lock_irqsave(&cfile->pin_lock, flags); | ||
77 | list_add(&m->pin_list, &cfile->pin_list); | ||
78 | spin_unlock_irqrestore(&cfile->pin_lock, flags); | ||
79 | } | ||
80 | |||
81 | static int genwqe_del_pin(struct genwqe_file *cfile, struct dma_mapping *m) | ||
82 | { | ||
83 | unsigned long flags; | ||
84 | |||
85 | spin_lock_irqsave(&cfile->pin_lock, flags); | ||
86 | list_del(&m->pin_list); | ||
87 | spin_unlock_irqrestore(&cfile->pin_lock, flags); | ||
88 | |||
89 | return 0; | ||
90 | } | ||
91 | |||
92 | /** | ||
93 | * genwqe_search_pin() - Search for the mapping for a userspace address | ||
94 | * @cfile: Descriptor of opened file | ||
95 | * @u_addr: User virtual address | ||
96 | * @size: Size of buffer | ||
97 | * @dma_addr: DMA address to be updated | ||
98 | * | ||
99 | * Return: Pointer to the corresponding mapping NULL if not found | ||
100 | */ | ||
101 | static struct dma_mapping *genwqe_search_pin(struct genwqe_file *cfile, | ||
102 | unsigned long u_addr, | ||
103 | unsigned int size, | ||
104 | void **virt_addr) | ||
105 | { | ||
106 | unsigned long flags; | ||
107 | struct dma_mapping *m; | ||
108 | |||
109 | spin_lock_irqsave(&cfile->pin_lock, flags); | ||
110 | |||
111 | list_for_each_entry(m, &cfile->pin_list, pin_list) { | ||
112 | if ((((u64)m->u_vaddr) <= (u_addr)) && | ||
113 | (((u64)m->u_vaddr + m->size) >= (u_addr + size))) { | ||
114 | |||
115 | if (virt_addr) | ||
116 | *virt_addr = m->k_vaddr + | ||
117 | (u_addr - (u64)m->u_vaddr); | ||
118 | |||
119 | spin_unlock_irqrestore(&cfile->pin_lock, flags); | ||
120 | return m; | ||
121 | } | ||
122 | } | ||
123 | spin_unlock_irqrestore(&cfile->pin_lock, flags); | ||
124 | return NULL; | ||
125 | } | ||
126 | |||
127 | static void __genwqe_add_mapping(struct genwqe_file *cfile, | ||
128 | struct dma_mapping *dma_map) | ||
129 | { | ||
130 | unsigned long flags; | ||
131 | |||
132 | spin_lock_irqsave(&cfile->map_lock, flags); | ||
133 | list_add(&dma_map->card_list, &cfile->map_list); | ||
134 | spin_unlock_irqrestore(&cfile->map_lock, flags); | ||
135 | } | ||
136 | |||
137 | static void __genwqe_del_mapping(struct genwqe_file *cfile, | ||
138 | struct dma_mapping *dma_map) | ||
139 | { | ||
140 | unsigned long flags; | ||
141 | |||
142 | spin_lock_irqsave(&cfile->map_lock, flags); | ||
143 | list_del(&dma_map->card_list); | ||
144 | spin_unlock_irqrestore(&cfile->map_lock, flags); | ||
145 | } | ||
146 | |||
147 | |||
148 | /** | ||
149 | * __genwqe_search_mapping() - Search for the mapping for a userspace address | ||
150 | * @cfile: descriptor of opened file | ||
151 | * @u_addr: user virtual address | ||
152 | * @size: size of buffer | ||
153 | * @dma_addr: DMA address to be updated | ||
154 | * Return: Pointer to the corresponding mapping NULL if not found | ||
155 | */ | ||
156 | static struct dma_mapping *__genwqe_search_mapping(struct genwqe_file *cfile, | ||
157 | unsigned long u_addr, | ||
158 | unsigned int size, | ||
159 | dma_addr_t *dma_addr, | ||
160 | void **virt_addr) | ||
161 | { | ||
162 | unsigned long flags; | ||
163 | struct dma_mapping *m; | ||
164 | struct pci_dev *pci_dev = cfile->cd->pci_dev; | ||
165 | |||
166 | spin_lock_irqsave(&cfile->map_lock, flags); | ||
167 | list_for_each_entry(m, &cfile->map_list, card_list) { | ||
168 | |||
169 | if ((((u64)m->u_vaddr) <= (u_addr)) && | ||
170 | (((u64)m->u_vaddr + m->size) >= (u_addr + size))) { | ||
171 | |||
172 | /* match found: current is as expected and | ||
173 | addr is in range */ | ||
174 | if (dma_addr) | ||
175 | *dma_addr = m->dma_addr + | ||
176 | (u_addr - (u64)m->u_vaddr); | ||
177 | |||
178 | if (virt_addr) | ||
179 | *virt_addr = m->k_vaddr + | ||
180 | (u_addr - (u64)m->u_vaddr); | ||
181 | |||
182 | spin_unlock_irqrestore(&cfile->map_lock, flags); | ||
183 | return m; | ||
184 | } | ||
185 | } | ||
186 | spin_unlock_irqrestore(&cfile->map_lock, flags); | ||
187 | |||
188 | dev_err(&pci_dev->dev, | ||
189 | "[%s] Entry not found: u_addr=%lx, size=%x\n", | ||
190 | __func__, u_addr, size); | ||
191 | |||
192 | return NULL; | ||
193 | } | ||
194 | |||
195 | static void genwqe_remove_mappings(struct genwqe_file *cfile) | ||
196 | { | ||
197 | int i = 0; | ||
198 | struct list_head *node, *next; | ||
199 | struct dma_mapping *dma_map; | ||
200 | struct genwqe_dev *cd = cfile->cd; | ||
201 | struct pci_dev *pci_dev = cfile->cd->pci_dev; | ||
202 | |||
203 | list_for_each_safe(node, next, &cfile->map_list) { | ||
204 | dma_map = list_entry(node, struct dma_mapping, card_list); | ||
205 | |||
206 | list_del_init(&dma_map->card_list); | ||
207 | |||
208 | /* | ||
209 | * This is really a bug, because those things should | ||
210 | * have been already tidied up. | ||
211 | * | ||
212 | * GENWQE_MAPPING_RAW should have been removed via mmunmap(). | ||
213 | * GENWQE_MAPPING_SGL_TEMP should be removed by tidy up code. | ||
214 | */ | ||
215 | dev_err(&pci_dev->dev, | ||
216 | "[%s] %d. cleanup mapping: u_vaddr=%p " | ||
217 | "u_kaddr=%016lx dma_addr=%llx\n", __func__, i++, | ||
218 | dma_map->u_vaddr, (unsigned long)dma_map->k_vaddr, | ||
219 | dma_map->dma_addr); | ||
220 | |||
221 | if (dma_map->type == GENWQE_MAPPING_RAW) { | ||
222 | /* we allocated this dynamically */ | ||
223 | __genwqe_free_consistent(cd, dma_map->size, | ||
224 | dma_map->k_vaddr, | ||
225 | dma_map->dma_addr); | ||
226 | kfree(dma_map); | ||
227 | } else if (dma_map->type == GENWQE_MAPPING_SGL_TEMP) { | ||
228 | /* we use dma_map statically from the request */ | ||
229 | genwqe_user_vunmap(cd, dma_map, NULL); | ||
230 | } | ||
231 | } | ||
232 | } | ||
233 | |||
234 | static void genwqe_remove_pinnings(struct genwqe_file *cfile) | ||
235 | { | ||
236 | struct list_head *node, *next; | ||
237 | struct dma_mapping *dma_map; | ||
238 | struct genwqe_dev *cd = cfile->cd; | ||
239 | |||
240 | list_for_each_safe(node, next, &cfile->pin_list) { | ||
241 | dma_map = list_entry(node, struct dma_mapping, pin_list); | ||
242 | |||
243 | /* | ||
244 | * This is not a bug, because a killed processed might | ||
245 | * not call the unpin ioctl, which is supposed to free | ||
246 | * the resources. | ||
247 | * | ||
248 | * Pinnings are dymically allocated and need to be | ||
249 | * deleted. | ||
250 | */ | ||
251 | list_del_init(&dma_map->pin_list); | ||
252 | genwqe_user_vunmap(cd, dma_map, NULL); | ||
253 | kfree(dma_map); | ||
254 | } | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * genwqe_kill_fasync() - Send signal to all processes with open GenWQE files | ||
259 | * | ||
260 | * E.g. genwqe_send_signal(cd, SIGIO); | ||
261 | */ | ||
262 | static int genwqe_kill_fasync(struct genwqe_dev *cd, int sig) | ||
263 | { | ||
264 | unsigned int files = 0; | ||
265 | unsigned long flags; | ||
266 | struct genwqe_file *cfile; | ||
267 | |||
268 | spin_lock_irqsave(&cd->file_lock, flags); | ||
269 | list_for_each_entry(cfile, &cd->file_list, list) { | ||
270 | if (cfile->async_queue) | ||
271 | kill_fasync(&cfile->async_queue, sig, POLL_HUP); | ||
272 | files++; | ||
273 | } | ||
274 | spin_unlock_irqrestore(&cd->file_lock, flags); | ||
275 | return files; | ||
276 | } | ||
277 | |||
278 | static int genwqe_force_sig(struct genwqe_dev *cd, int sig) | ||
279 | { | ||
280 | unsigned int files = 0; | ||
281 | unsigned long flags; | ||
282 | struct genwqe_file *cfile; | ||
283 | |||
284 | spin_lock_irqsave(&cd->file_lock, flags); | ||
285 | list_for_each_entry(cfile, &cd->file_list, list) { | ||
286 | force_sig(sig, cfile->owner); | ||
287 | files++; | ||
288 | } | ||
289 | spin_unlock_irqrestore(&cd->file_lock, flags); | ||
290 | return files; | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * genwqe_open() - file open | ||
295 | * @inode: file system information | ||
296 | * @filp: file handle | ||
297 | * | ||
298 | * This function is executed whenever an application calls | ||
299 | * open("/dev/genwqe",..). | ||
300 | * | ||
301 | * Return: 0 if successful or <0 if errors | ||
302 | */ | ||
303 | static int genwqe_open(struct inode *inode, struct file *filp) | ||
304 | { | ||
305 | struct genwqe_dev *cd; | ||
306 | struct genwqe_file *cfile; | ||
307 | struct pci_dev *pci_dev; | ||
308 | |||
309 | cfile = kzalloc(sizeof(*cfile), GFP_KERNEL); | ||
310 | if (cfile == NULL) | ||
311 | return -ENOMEM; | ||
312 | |||
313 | cd = container_of(inode->i_cdev, struct genwqe_dev, cdev_genwqe); | ||
314 | pci_dev = cd->pci_dev; | ||
315 | cfile->cd = cd; | ||
316 | cfile->filp = filp; | ||
317 | cfile->client = NULL; | ||
318 | |||
319 | spin_lock_init(&cfile->map_lock); /* list of raw memory allocations */ | ||
320 | INIT_LIST_HEAD(&cfile->map_list); | ||
321 | |||
322 | spin_lock_init(&cfile->pin_lock); /* list of user pinned memory */ | ||
323 | INIT_LIST_HEAD(&cfile->pin_list); | ||
324 | |||
325 | filp->private_data = cfile; | ||
326 | |||
327 | genwqe_add_file(cd, cfile); | ||
328 | return 0; | ||
329 | } | ||
330 | |||
331 | /** | ||
332 | * genwqe_fasync() - Setup process to receive SIGIO. | ||
333 | * @fd: file descriptor | ||
334 | * @filp: file handle | ||
335 | * @mode: file mode | ||
336 | * | ||
337 | * Sending a signal is working as following: | ||
338 | * | ||
339 | * if (cdev->async_queue) | ||
340 | * kill_fasync(&cdev->async_queue, SIGIO, POLL_IN); | ||
341 | * | ||
342 | * Some devices also implement asynchronous notification to indicate | ||
343 | * when the device can be written; in this case, of course, | ||
344 | * kill_fasync must be called with a mode of POLL_OUT. | ||
345 | */ | ||
346 | static int genwqe_fasync(int fd, struct file *filp, int mode) | ||
347 | { | ||
348 | struct genwqe_file *cdev = (struct genwqe_file *)filp->private_data; | ||
349 | return fasync_helper(fd, filp, mode, &cdev->async_queue); | ||
350 | } | ||
351 | |||
352 | |||
353 | /** | ||
354 | * genwqe_release() - file close | ||
355 | * @inode: file system information | ||
356 | * @filp: file handle | ||
357 | * | ||
358 | * This function is executed whenever an application calls 'close(fd_genwqe)' | ||
359 | * | ||
360 | * Return: always 0 | ||
361 | */ | ||
362 | static int genwqe_release(struct inode *inode, struct file *filp) | ||
363 | { | ||
364 | struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; | ||
365 | struct genwqe_dev *cd = cfile->cd; | ||
366 | |||
367 | /* there must be no entries in these lists! */ | ||
368 | genwqe_remove_mappings(cfile); | ||
369 | genwqe_remove_pinnings(cfile); | ||
370 | |||
371 | /* remove this filp from the asynchronously notified filp's */ | ||
372 | genwqe_fasync(-1, filp, 0); | ||
373 | |||
374 | /* | ||
375 | * For this to work we must not release cd when this cfile is | ||
376 | * not yet released, otherwise the list entry is invalid, | ||
377 | * because the list itself gets reinstantiated! | ||
378 | */ | ||
379 | genwqe_del_file(cd, cfile); | ||
380 | kfree(cfile); | ||
381 | return 0; | ||
382 | } | ||
383 | |||
384 | static void genwqe_vma_open(struct vm_area_struct *vma) | ||
385 | { | ||
386 | /* nothing ... */ | ||
387 | } | ||
388 | |||
389 | /** | ||
390 | * genwqe_vma_close() - Called each time when vma is unmapped | ||
391 | * | ||
392 | * Free memory which got allocated by GenWQE mmap(). | ||
393 | */ | ||
394 | static void genwqe_vma_close(struct vm_area_struct *vma) | ||
395 | { | ||
396 | unsigned long vsize = vma->vm_end - vma->vm_start; | ||
397 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | ||
398 | struct dma_mapping *dma_map; | ||
399 | struct genwqe_dev *cd = container_of(inode->i_cdev, struct genwqe_dev, | ||
400 | cdev_genwqe); | ||
401 | struct pci_dev *pci_dev = cd->pci_dev; | ||
402 | dma_addr_t d_addr = 0; | ||
403 | struct genwqe_file *cfile = vma->vm_private_data; | ||
404 | |||
405 | dma_map = __genwqe_search_mapping(cfile, vma->vm_start, vsize, | ||
406 | &d_addr, NULL); | ||
407 | if (dma_map == NULL) { | ||
408 | dev_err(&pci_dev->dev, | ||
409 | " [%s] err: mapping not found: v=%lx, p=%lx s=%lx\n", | ||
410 | __func__, vma->vm_start, vma->vm_pgoff << PAGE_SHIFT, | ||
411 | vsize); | ||
412 | return; | ||
413 | } | ||
414 | __genwqe_del_mapping(cfile, dma_map); | ||
415 | __genwqe_free_consistent(cd, dma_map->size, dma_map->k_vaddr, | ||
416 | dma_map->dma_addr); | ||
417 | kfree(dma_map); | ||
418 | } | ||
419 | |||
420 | static struct vm_operations_struct genwqe_vma_ops = { | ||
421 | .open = genwqe_vma_open, | ||
422 | .close = genwqe_vma_close, | ||
423 | }; | ||
424 | |||
425 | /** | ||
426 | * genwqe_mmap() - Provide contignous buffers to userspace | ||
427 | * | ||
428 | * We use mmap() to allocate contignous buffers used for DMA | ||
429 | * transfers. After the buffer is allocated we remap it to user-space | ||
430 | * and remember a reference to our dma_mapping data structure, where | ||
431 | * we store the associated DMA address and allocated size. | ||
432 | * | ||
433 | * When we receive a DDCB execution request with the ATS bits set to | ||
434 | * plain buffer, we lookup our dma_mapping list to find the | ||
435 | * corresponding DMA address for the associated user-space address. | ||
436 | */ | ||
437 | static int genwqe_mmap(struct file *filp, struct vm_area_struct *vma) | ||
438 | { | ||
439 | int rc; | ||
440 | unsigned long pfn, vsize = vma->vm_end - vma->vm_start; | ||
441 | struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; | ||
442 | struct genwqe_dev *cd = cfile->cd; | ||
443 | struct dma_mapping *dma_map; | ||
444 | |||
445 | if (vsize == 0) | ||
446 | return -EINVAL; | ||
447 | |||
448 | if (get_order(vsize) > MAX_ORDER) | ||
449 | return -ENOMEM; | ||
450 | |||
451 | dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); | ||
452 | if (dma_map == NULL) | ||
453 | return -ENOMEM; | ||
454 | |||
455 | genwqe_mapping_init(dma_map, GENWQE_MAPPING_RAW); | ||
456 | dma_map->u_vaddr = (void *)vma->vm_start; | ||
457 | dma_map->size = vsize; | ||
458 | dma_map->nr_pages = DIV_ROUND_UP(vsize, PAGE_SIZE); | ||
459 | dma_map->k_vaddr = __genwqe_alloc_consistent(cd, vsize, | ||
460 | &dma_map->dma_addr); | ||
461 | if (dma_map->k_vaddr == NULL) { | ||
462 | rc = -ENOMEM; | ||
463 | goto free_dma_map; | ||
464 | } | ||
465 | |||
466 | if (capable(CAP_SYS_ADMIN) && (vsize > sizeof(dma_addr_t))) | ||
467 | *(dma_addr_t *)dma_map->k_vaddr = dma_map->dma_addr; | ||
468 | |||
469 | pfn = virt_to_phys(dma_map->k_vaddr) >> PAGE_SHIFT; | ||
470 | rc = remap_pfn_range(vma, | ||
471 | vma->vm_start, | ||
472 | pfn, | ||
473 | vsize, | ||
474 | vma->vm_page_prot); | ||
475 | if (rc != 0) { | ||
476 | rc = -EFAULT; | ||
477 | goto free_dma_mem; | ||
478 | } | ||
479 | |||
480 | vma->vm_private_data = cfile; | ||
481 | vma->vm_ops = &genwqe_vma_ops; | ||
482 | __genwqe_add_mapping(cfile, dma_map); | ||
483 | |||
484 | return 0; | ||
485 | |||
486 | free_dma_mem: | ||
487 | __genwqe_free_consistent(cd, dma_map->size, | ||
488 | dma_map->k_vaddr, | ||
489 | dma_map->dma_addr); | ||
490 | free_dma_map: | ||
491 | kfree(dma_map); | ||
492 | return rc; | ||
493 | } | ||
494 | |||
495 | /** | ||
496 | * do_flash_update() - Excute flash update (write image or CVPD) | ||
497 | * @cd: genwqe device | ||
498 | * @load: details about image load | ||
499 | * | ||
500 | * Return: 0 if successful | ||
501 | */ | ||
502 | |||
503 | #define FLASH_BLOCK 0x40000 /* we use 256k blocks */ | ||
504 | |||
505 | static int do_flash_update(struct genwqe_file *cfile, | ||
506 | struct genwqe_bitstream *load) | ||
507 | { | ||
508 | int rc = 0; | ||
509 | int blocks_to_flash; | ||
510 | u64 dma_addr, flash = 0; | ||
511 | size_t tocopy = 0; | ||
512 | u8 __user *buf; | ||
513 | u8 *xbuf; | ||
514 | u32 crc; | ||
515 | u8 cmdopts; | ||
516 | struct genwqe_dev *cd = cfile->cd; | ||
517 | struct pci_dev *pci_dev = cd->pci_dev; | ||
518 | |||
519 | if ((load->size & 0x3) != 0) { | ||
520 | dev_err(&pci_dev->dev, | ||
521 | "err: buf %d bytes not 4 bytes aligned!\n", | ||
522 | load->size); | ||
523 | return -EINVAL; | ||
524 | } | ||
525 | if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) { | ||
526 | dev_err(&pci_dev->dev, | ||
527 | "err: buf is not page aligned!\n"); | ||
528 | return -EINVAL; | ||
529 | } | ||
530 | |||
531 | /* FIXME Bits have changed for new service layer! */ | ||
532 | switch ((char)load->partition) { | ||
533 | case '0': | ||
534 | cmdopts = 0x14; | ||
535 | break; /* download/erase_first/part_0 */ | ||
536 | case '1': | ||
537 | cmdopts = 0x1C; | ||
538 | break; /* download/erase_first/part_1 */ | ||
539 | case 'v': /* cmdopts = 0x0c (VPD) */ | ||
540 | default: | ||
541 | dev_err(&pci_dev->dev, | ||
542 | "err: invalid partition %02x!\n", load->partition); | ||
543 | return -EINVAL; | ||
544 | } | ||
545 | dev_info(&pci_dev->dev, | ||
546 | "[%s] start flash update UID: 0x%x size: %u bytes part: %c\n", | ||
547 | __func__, load->uid, load->size, (char)load->partition); | ||
548 | |||
549 | buf = (u8 __user *)load->data_addr; | ||
550 | xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr); | ||
551 | if (xbuf == NULL) { | ||
552 | dev_err(&pci_dev->dev, "err: no memory\n"); | ||
553 | return -ENOMEM; | ||
554 | } | ||
555 | |||
556 | blocks_to_flash = load->size / FLASH_BLOCK; | ||
557 | while (load->size) { | ||
558 | struct genwqe_ddcb_cmd *req; | ||
559 | |||
560 | /* | ||
561 | * We must be 4 byte aligned. Buffer must be 0 appened | ||
562 | * to have defined values when calculating CRC. | ||
563 | */ | ||
564 | tocopy = min_t(size_t, load->size, FLASH_BLOCK); | ||
565 | |||
566 | rc = copy_from_user(xbuf, buf, tocopy); | ||
567 | if (rc) { | ||
568 | dev_err(&pci_dev->dev, | ||
569 | "err: could not copy all data rc=%d\n", rc); | ||
570 | goto free_buffer; | ||
571 | } | ||
572 | crc = genwqe_crc32(xbuf, tocopy, 0xffffffff); | ||
573 | |||
574 | dev_info(&pci_dev->dev, | ||
575 | "[%s] DMA: 0x%llx CRC: %08x SZ: %ld %d\n", | ||
576 | __func__, dma_addr, crc, tocopy, blocks_to_flash); | ||
577 | |||
578 | /* prepare DDCB for SLU process */ | ||
579 | req = ddcb_requ_alloc(); | ||
580 | if (req == NULL) { | ||
581 | rc = -ENOMEM; | ||
582 | goto free_buffer; | ||
583 | } | ||
584 | |||
585 | req->cmd = SLCMD_MOVE_FLASH; | ||
586 | req->cmdopts = cmdopts; | ||
587 | |||
588 | /* prepare invariant values */ | ||
589 | if (genwqe_get_slu_id(cd) <= 0x2) { | ||
590 | *(u64 *)&req->__asiv[0] = cpu_to_be64(dma_addr); | ||
591 | *(u64 *)&req->__asiv[8] = cpu_to_be64(tocopy); | ||
592 | *(u64 *)&req->__asiv[16] = cpu_to_be64(flash); | ||
593 | *(u32 *)&req->__asiv[24] = cpu_to_be32(0); | ||
594 | req->__asiv[24] = load->uid; | ||
595 | *(u32 *)&req->__asiv[28] = cpu_to_be32(crc); | ||
596 | |||
597 | /* for simulation only */ | ||
598 | *(u64 *)&req->__asiv[88] = cpu_to_be64(load->slu_id); | ||
599 | *(u64 *)&req->__asiv[96] = cpu_to_be64(load->app_id); | ||
600 | req->asiv_length = 32; /* bytes included in crc calc */ | ||
601 | } else { /* setup DDCB for ATS architecture */ | ||
602 | *(u64 *)&req->asiv[0] = cpu_to_be64(dma_addr); | ||
603 | *(u32 *)&req->asiv[8] = cpu_to_be32(tocopy); | ||
604 | *(u32 *)&req->asiv[12] = cpu_to_be32(0); /* resvd */ | ||
605 | *(u64 *)&req->asiv[16] = cpu_to_be64(flash); | ||
606 | *(u32 *)&req->asiv[24] = cpu_to_be32(load->uid<<24); | ||
607 | *(u32 *)&req->asiv[28] = cpu_to_be32(crc); | ||
608 | |||
609 | /* for simulation only */ | ||
610 | *(u64 *)&req->asiv[80] = cpu_to_be64(load->slu_id); | ||
611 | *(u64 *)&req->asiv[88] = cpu_to_be64(load->app_id); | ||
612 | |||
613 | req->ats = cpu_to_be64(0x4ULL << 44); /* Rd only */ | ||
614 | req->asiv_length = 40; /* bytes included in crc calc */ | ||
615 | } | ||
616 | req->asv_length = 8; | ||
617 | |||
618 | /* For Genwqe5 we get back the calculated CRC */ | ||
619 | *(u64 *)&req->asv[0] = 0ULL; /* 0x80 */ | ||
620 | |||
621 | rc = __genwqe_execute_raw_ddcb(cd, req); | ||
622 | |||
623 | load->retc = req->retc; | ||
624 | load->attn = req->attn; | ||
625 | load->progress = req->progress; | ||
626 | |||
627 | if (rc < 0) { | ||
628 | dev_err(&pci_dev->dev, | ||
629 | " [%s] DDCB returned (RETC=%x ATTN=%x " | ||
630 | "PROG=%x rc=%d)\n", __func__, req->retc, | ||
631 | req->attn, req->progress, rc); | ||
632 | |||
633 | ddcb_requ_free(req); | ||
634 | goto free_buffer; | ||
635 | } | ||
636 | |||
637 | if (req->retc != DDCB_RETC_COMPLETE) { | ||
638 | dev_info(&pci_dev->dev, | ||
639 | " [%s] DDCB returned (RETC=%x ATTN=%x " | ||
640 | "PROG=%x)\n", __func__, req->retc, | ||
641 | req->attn, req->progress); | ||
642 | |||
643 | rc = -EIO; | ||
644 | ddcb_requ_free(req); | ||
645 | goto free_buffer; | ||
646 | } | ||
647 | |||
648 | load->size -= tocopy; | ||
649 | flash += tocopy; | ||
650 | buf += tocopy; | ||
651 | blocks_to_flash--; | ||
652 | ddcb_requ_free(req); | ||
653 | } | ||
654 | |||
655 | free_buffer: | ||
656 | __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr); | ||
657 | return rc; | ||
658 | } | ||
659 | |||
660 | static int do_flash_read(struct genwqe_file *cfile, | ||
661 | struct genwqe_bitstream *load) | ||
662 | { | ||
663 | int rc, blocks_to_flash; | ||
664 | u64 dma_addr, flash = 0; | ||
665 | size_t tocopy = 0; | ||
666 | u8 __user *buf; | ||
667 | u8 *xbuf; | ||
668 | u8 cmdopts; | ||
669 | struct genwqe_dev *cd = cfile->cd; | ||
670 | struct pci_dev *pci_dev = cd->pci_dev; | ||
671 | struct genwqe_ddcb_cmd *cmd; | ||
672 | |||
673 | if ((load->size & 0x3) != 0) { | ||
674 | dev_err(&pci_dev->dev, | ||
675 | "err: buf size %d bytes not 4 bytes aligned!\n", | ||
676 | load->size); | ||
677 | return -EINVAL; | ||
678 | } | ||
679 | if (((unsigned long)(load->data_addr) & ~PAGE_MASK) != 0) { | ||
680 | dev_err(&pci_dev->dev, "err: buf is not page aligned!\n"); | ||
681 | return -EINVAL; | ||
682 | } | ||
683 | |||
684 | /* FIXME Bits have changed for new service layer! */ | ||
685 | switch ((char)load->partition) { | ||
686 | case '0': | ||
687 | cmdopts = 0x12; | ||
688 | break; /* upload/part_0 */ | ||
689 | case '1': | ||
690 | cmdopts = 0x1A; | ||
691 | break; /* upload/part_1 */ | ||
692 | case 'v': | ||
693 | default: | ||
694 | dev_err(&pci_dev->dev, | ||
695 | "err: invalid partition %02x!\n", load->partition); | ||
696 | return -EINVAL; | ||
697 | } | ||
698 | dev_info(&pci_dev->dev, | ||
699 | "[%s] start flash read UID: 0x%x size: %u bytes part: %c\n", | ||
700 | __func__, load->uid, load->size, (char)load->partition); | ||
701 | |||
702 | buf = (u8 __user *)load->data_addr; | ||
703 | xbuf = __genwqe_alloc_consistent(cd, FLASH_BLOCK, &dma_addr); | ||
704 | if (xbuf == NULL) { | ||
705 | dev_err(&pci_dev->dev, "err: no memory\n"); | ||
706 | return -ENOMEM; | ||
707 | } | ||
708 | |||
709 | blocks_to_flash = load->size / FLASH_BLOCK; | ||
710 | while (load->size) { | ||
711 | /* | ||
712 | * We must be 4 byte aligned. Buffer must be 0 appened | ||
713 | * to have defined values when calculating CRC. | ||
714 | */ | ||
715 | tocopy = min_t(size_t, load->size, FLASH_BLOCK); | ||
716 | |||
717 | dev_info(&pci_dev->dev, | ||
718 | "[%s] DMA: 0x%llx SZ: %ld %d\n", | ||
719 | __func__, dma_addr, tocopy, blocks_to_flash); | ||
720 | |||
721 | /* prepare DDCB for SLU process */ | ||
722 | cmd = ddcb_requ_alloc(); | ||
723 | if (cmd == NULL) { | ||
724 | rc = -ENOMEM; | ||
725 | goto free_buffer; | ||
726 | } | ||
727 | cmd->cmd = SLCMD_MOVE_FLASH; | ||
728 | cmd->cmdopts = cmdopts; | ||
729 | |||
730 | /* prepare invariant values */ | ||
731 | if (genwqe_get_slu_id(cd) <= 0x2) { | ||
732 | *(u64 *)&cmd->__asiv[0] = cpu_to_be64(dma_addr); | ||
733 | *(u64 *)&cmd->__asiv[8] = cpu_to_be64(tocopy); | ||
734 | *(u64 *)&cmd->__asiv[16] = cpu_to_be64(flash); | ||
735 | *(u32 *)&cmd->__asiv[24] = cpu_to_be32(0); | ||
736 | cmd->__asiv[24] = load->uid; | ||
737 | *(u32 *)&cmd->__asiv[28] = cpu_to_be32(0) /* CRC */; | ||
738 | cmd->asiv_length = 32; /* bytes included in crc calc */ | ||
739 | } else { /* setup DDCB for ATS architecture */ | ||
740 | *(u64 *)&cmd->asiv[0] = cpu_to_be64(dma_addr); | ||
741 | *(u32 *)&cmd->asiv[8] = cpu_to_be32(tocopy); | ||
742 | *(u32 *)&cmd->asiv[12] = cpu_to_be32(0); /* resvd */ | ||
743 | *(u64 *)&cmd->asiv[16] = cpu_to_be64(flash); | ||
744 | *(u32 *)&cmd->asiv[24] = cpu_to_be32(load->uid<<24); | ||
745 | *(u32 *)&cmd->asiv[28] = cpu_to_be32(0); /* CRC */ | ||
746 | cmd->ats = cpu_to_be64(0x5ULL << 44); /* rd/wr */ | ||
747 | cmd->asiv_length = 40; /* bytes included in crc calc */ | ||
748 | } | ||
749 | cmd->asv_length = 8; | ||
750 | |||
751 | /* we only get back the calculated CRC */ | ||
752 | *(u64 *)&cmd->asv[0] = 0ULL; /* 0x80 */ | ||
753 | |||
754 | rc = __genwqe_execute_raw_ddcb(cd, cmd); | ||
755 | |||
756 | load->retc = cmd->retc; | ||
757 | load->attn = cmd->attn; | ||
758 | load->progress = cmd->progress; | ||
759 | |||
760 | if ((rc < 0) && (rc != -EBADMSG)) { | ||
761 | dev_err(&pci_dev->dev, | ||
762 | " [%s] DDCB returned (RETC=%x ATTN=%x " | ||
763 | "PROG=%x rc=%d)\n", __func__, cmd->retc, | ||
764 | cmd->attn, cmd->progress, rc); | ||
765 | ddcb_requ_free(cmd); | ||
766 | goto free_buffer; | ||
767 | } | ||
768 | |||
769 | rc = copy_to_user(buf, xbuf, tocopy); | ||
770 | if (rc) { | ||
771 | dev_err(&pci_dev->dev, | ||
772 | " [%s] copy data to user failed rc=%d\n", | ||
773 | __func__, rc); | ||
774 | rc = -EIO; | ||
775 | ddcb_requ_free(cmd); | ||
776 | goto free_buffer; | ||
777 | } | ||
778 | |||
779 | /* We know that we can get retc 0x104 with CRC err */ | ||
780 | if (((cmd->retc == DDCB_RETC_FAULT) && | ||
781 | (cmd->attn != 0x02)) || /* Normally ignore CRC error */ | ||
782 | ((cmd->retc == DDCB_RETC_COMPLETE) && | ||
783 | (cmd->attn != 0x00))) { /* Everything was fine */ | ||
784 | dev_err(&pci_dev->dev, | ||
785 | " [%s] DDCB returned (RETC=%x ATTN=%x " | ||
786 | "PROG=%x rc=%d)\n", __func__, cmd->retc, | ||
787 | cmd->attn, cmd->progress, rc); | ||
788 | rc = -EIO; | ||
789 | ddcb_requ_free(cmd); | ||
790 | goto free_buffer; | ||
791 | } | ||
792 | |||
793 | load->size -= tocopy; | ||
794 | flash += tocopy; | ||
795 | buf += tocopy; | ||
796 | blocks_to_flash--; | ||
797 | ddcb_requ_free(cmd); | ||
798 | } | ||
799 | rc = 0; | ||
800 | |||
801 | free_buffer: | ||
802 | __genwqe_free_consistent(cd, FLASH_BLOCK, xbuf, dma_addr); | ||
803 | return rc; | ||
804 | } | ||
805 | |||
806 | static int genwqe_pin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) | ||
807 | { | ||
808 | int rc; | ||
809 | struct genwqe_dev *cd = cfile->cd; | ||
810 | struct pci_dev *pci_dev = cfile->cd->pci_dev; | ||
811 | struct dma_mapping *dma_map; | ||
812 | unsigned long map_addr; | ||
813 | unsigned long map_size; | ||
814 | |||
815 | if ((m->addr == 0x0) || (m->size == 0)) | ||
816 | return -EINVAL; | ||
817 | |||
818 | map_addr = (m->addr & PAGE_MASK); | ||
819 | map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); | ||
820 | |||
821 | dma_map = kzalloc(sizeof(struct dma_mapping), GFP_ATOMIC); | ||
822 | if (dma_map == NULL) | ||
823 | return -ENOMEM; | ||
824 | |||
825 | genwqe_mapping_init(dma_map, GENWQE_MAPPING_SGL_PINNED); | ||
826 | rc = genwqe_user_vmap(cd, dma_map, (void *)map_addr, map_size, NULL); | ||
827 | if (rc != 0) { | ||
828 | dev_err(&pci_dev->dev, | ||
829 | "[%s] genwqe_user_vmap rc=%d\n", __func__, rc); | ||
830 | return rc; | ||
831 | } | ||
832 | |||
833 | genwqe_add_pin(cfile, dma_map); | ||
834 | return 0; | ||
835 | } | ||
836 | |||
837 | static int genwqe_unpin_mem(struct genwqe_file *cfile, struct genwqe_mem *m) | ||
838 | { | ||
839 | struct genwqe_dev *cd = cfile->cd; | ||
840 | struct dma_mapping *dma_map; | ||
841 | unsigned long map_addr; | ||
842 | unsigned long map_size; | ||
843 | |||
844 | if (m->addr == 0x0) | ||
845 | return -EINVAL; | ||
846 | |||
847 | map_addr = (m->addr & PAGE_MASK); | ||
848 | map_size = round_up(m->size + (m->addr & ~PAGE_MASK), PAGE_SIZE); | ||
849 | |||
850 | dma_map = genwqe_search_pin(cfile, map_addr, map_size, NULL); | ||
851 | if (dma_map == NULL) | ||
852 | return -ENOENT; | ||
853 | |||
854 | genwqe_del_pin(cfile, dma_map); | ||
855 | genwqe_user_vunmap(cd, dma_map, NULL); | ||
856 | kfree(dma_map); | ||
857 | return 0; | ||
858 | } | ||
859 | |||
860 | /** | ||
861 | * ddcb_cmd_cleanup() - Remove dynamically created fixup entries | ||
862 | * | ||
863 | * Only if there are any. Pinnings are not removed. | ||
864 | */ | ||
865 | static int ddcb_cmd_cleanup(struct genwqe_file *cfile, struct ddcb_requ *req) | ||
866 | { | ||
867 | unsigned int i; | ||
868 | struct dma_mapping *dma_map; | ||
869 | struct genwqe_dev *cd = cfile->cd; | ||
870 | |||
871 | for (i = 0; i < DDCB_FIXUPS; i++) { | ||
872 | dma_map = &req->dma_mappings[i]; | ||
873 | |||
874 | if (dma_mapping_used(dma_map)) { | ||
875 | __genwqe_del_mapping(cfile, dma_map); | ||
876 | genwqe_user_vunmap(cd, dma_map, req); | ||
877 | } | ||
878 | if (req->sgl[i] != NULL) { | ||
879 | genwqe_free_sgl(cd, req->sgl[i], | ||
880 | req->sgl_dma_addr[i], | ||
881 | req->sgl_size[i]); | ||
882 | req->sgl[i] = NULL; | ||
883 | req->sgl_dma_addr[i] = 0x0; | ||
884 | req->sgl_size[i] = 0; | ||
885 | } | ||
886 | |||
887 | } | ||
888 | return 0; | ||
889 | } | ||
890 | |||
891 | /** | ||
892 | * ddcb_cmd_fixups() - Establish DMA fixups/sglists for user memory references | ||
893 | * | ||
894 | * Before the DDCB gets executed we need to handle the fixups. We | ||
895 | * replace the user-space addresses with DMA addresses or do | ||
896 | * additional setup work e.g. generating a scatter-gather list which | ||
897 | * is used to describe the memory referred to in the fixup. | ||
898 | */ | ||
899 | static int ddcb_cmd_fixups(struct genwqe_file *cfile, struct ddcb_requ *req) | ||
900 | { | ||
901 | int rc; | ||
902 | unsigned int asiv_offs, i; | ||
903 | struct genwqe_dev *cd = cfile->cd; | ||
904 | struct genwqe_ddcb_cmd *cmd = &req->cmd; | ||
905 | struct dma_mapping *m; | ||
906 | struct pci_dev *pci_dev = cd->pci_dev; | ||
907 | const char *type = "UNKNOWN"; | ||
908 | |||
909 | for (i = 0, asiv_offs = 0x00; asiv_offs <= 0x58; | ||
910 | i++, asiv_offs += 0x08) { | ||
911 | |||
912 | u64 u_addr, d_addr; | ||
913 | u32 u_size = 0; | ||
914 | unsigned long ats_flags; | ||
915 | |||
916 | ats_flags = ATS_GET_FLAGS(be64_to_cpu(cmd->ats), asiv_offs); | ||
917 | |||
918 | switch (ats_flags) { | ||
919 | |||
920 | case ATS_TYPE_DATA: | ||
921 | break; /* nothing to do here */ | ||
922 | |||
923 | case ATS_TYPE_FLAT_RDWR: | ||
924 | case ATS_TYPE_FLAT_RD: { | ||
925 | u_addr = be64_to_cpu(*((u64 *)&cmd-> | ||
926 | asiv[asiv_offs])); | ||
927 | u_size = be32_to_cpu(*((u32 *)&cmd-> | ||
928 | asiv[asiv_offs + 0x08])); | ||
929 | |||
930 | /* | ||
931 | * No data available. Ignore u_addr in this | ||
932 | * case and set addr to 0. Hardware must not | ||
933 | * fetch the buffer. | ||
934 | */ | ||
935 | if (u_size == 0x0) { | ||
936 | *((u64 *)&cmd->asiv[asiv_offs]) = | ||
937 | cpu_to_be64(0x0); | ||
938 | break; | ||
939 | } | ||
940 | |||
941 | m = __genwqe_search_mapping(cfile, u_addr, u_size, | ||
942 | &d_addr, NULL); | ||
943 | if (m == NULL) { | ||
944 | rc = -EFAULT; | ||
945 | goto err_out; | ||
946 | } | ||
947 | |||
948 | *((u64 *)&cmd->asiv[asiv_offs]) = cpu_to_be64(d_addr); | ||
949 | break; | ||
950 | } | ||
951 | |||
952 | case ATS_TYPE_SGL_RDWR: | ||
953 | case ATS_TYPE_SGL_RD: { | ||
954 | int page_offs, nr_pages, offs; | ||
955 | |||
956 | u_addr = be64_to_cpu(*((u64 *)&cmd->asiv[asiv_offs])); | ||
957 | u_size = be32_to_cpu(*((u32 *)&cmd->asiv[asiv_offs + | ||
958 | 0x08])); | ||
959 | |||
960 | /* | ||
961 | * No data available. Ignore u_addr in this | ||
962 | * case and set addr to 0. Hardware must not | ||
963 | * fetch the empty sgl. | ||
964 | */ | ||
965 | if (u_size == 0x0) { | ||
966 | *((u64 *)&cmd->asiv[asiv_offs]) = | ||
967 | cpu_to_be64(0x0); | ||
968 | break; | ||
969 | } | ||
970 | |||
971 | m = genwqe_search_pin(cfile, u_addr, u_size, NULL); | ||
972 | if (m != NULL) { | ||
973 | type = "PINNING"; | ||
974 | page_offs = (u_addr - | ||
975 | (u64)m->u_vaddr)/PAGE_SIZE; | ||
976 | } else { | ||
977 | type = "MAPPING"; | ||
978 | m = &req->dma_mappings[i]; | ||
979 | |||
980 | genwqe_mapping_init(m, | ||
981 | GENWQE_MAPPING_SGL_TEMP); | ||
982 | rc = genwqe_user_vmap(cd, m, (void *)u_addr, | ||
983 | u_size, req); | ||
984 | if (rc != 0) | ||
985 | goto err_out; | ||
986 | |||
987 | __genwqe_add_mapping(cfile, m); | ||
988 | page_offs = 0; | ||
989 | } | ||
990 | |||
991 | offs = offset_in_page(u_addr); | ||
992 | nr_pages = DIV_ROUND_UP(offs + u_size, PAGE_SIZE); | ||
993 | |||
994 | /* create genwqe style scatter gather list */ | ||
995 | req->sgl[i] = genwqe_alloc_sgl(cd, m->nr_pages, | ||
996 | &req->sgl_dma_addr[i], | ||
997 | &req->sgl_size[i]); | ||
998 | if (req->sgl[i] == NULL) { | ||
999 | rc = -ENOMEM; | ||
1000 | goto err_out; | ||
1001 | } | ||
1002 | genwqe_setup_sgl(cd, offs, u_size, | ||
1003 | req->sgl[i], | ||
1004 | req->sgl_dma_addr[i], | ||
1005 | req->sgl_size[i], | ||
1006 | m->dma_list, | ||
1007 | page_offs, | ||
1008 | nr_pages); | ||
1009 | |||
1010 | *((u64 *)&cmd->asiv[asiv_offs]) = | ||
1011 | cpu_to_be64(req->sgl_dma_addr[i]); | ||
1012 | |||
1013 | break; | ||
1014 | } | ||
1015 | default: | ||
1016 | dev_err(&pci_dev->dev, | ||
1017 | "[%s] err: invalid ATS flags %01lx\n", | ||
1018 | __func__, ats_flags); | ||
1019 | rc = -EINVAL; | ||
1020 | goto err_out; | ||
1021 | } | ||
1022 | } | ||
1023 | return 0; | ||
1024 | |||
1025 | err_out: | ||
1026 | dev_err(&pci_dev->dev, "[%s] err: rc=%d\n", __func__, rc); | ||
1027 | ddcb_cmd_cleanup(cfile, req); | ||
1028 | return rc; | ||
1029 | } | ||
1030 | |||
1031 | /** | ||
1032 | * genwqe_execute_ddcb() - Execute DDCB using userspace address fixups | ||
1033 | * | ||
1034 | * The code will build up the translation tables or lookup the | ||
1035 | * contignous memory allocation table to find the right translations | ||
1036 | * and DMA addresses. | ||
1037 | */ | ||
1038 | static int genwqe_execute_ddcb(struct genwqe_file *cfile, | ||
1039 | struct genwqe_ddcb_cmd *cmd) | ||
1040 | { | ||
1041 | int rc; | ||
1042 | struct genwqe_dev *cd = cfile->cd; | ||
1043 | struct ddcb_requ *req = container_of(cmd, struct ddcb_requ, cmd); | ||
1044 | |||
1045 | rc = ddcb_cmd_fixups(cfile, req); | ||
1046 | if (rc != 0) | ||
1047 | return rc; | ||
1048 | |||
1049 | rc = __genwqe_execute_raw_ddcb(cd, cmd); | ||
1050 | ddcb_cmd_cleanup(cfile, req); | ||
1051 | return rc; | ||
1052 | } | ||
1053 | |||
1054 | static int do_execute_ddcb(struct genwqe_file *cfile, | ||
1055 | unsigned long arg, int raw) | ||
1056 | { | ||
1057 | int rc; | ||
1058 | struct genwqe_ddcb_cmd *cmd; | ||
1059 | struct ddcb_requ *req; | ||
1060 | struct genwqe_dev *cd = cfile->cd; | ||
1061 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1062 | |||
1063 | cmd = ddcb_requ_alloc(); | ||
1064 | if (cmd == NULL) | ||
1065 | return -ENOMEM; | ||
1066 | |||
1067 | req = container_of(cmd, struct ddcb_requ, cmd); | ||
1068 | |||
1069 | if (copy_from_user(cmd, (void __user *)arg, sizeof(*cmd))) { | ||
1070 | dev_err(&pci_dev->dev, | ||
1071 | "err: could not copy params from user\n"); | ||
1072 | ddcb_requ_free(cmd); | ||
1073 | return -EFAULT; | ||
1074 | } | ||
1075 | |||
1076 | if (!raw) | ||
1077 | rc = genwqe_execute_ddcb(cfile, cmd); | ||
1078 | else | ||
1079 | rc = __genwqe_execute_raw_ddcb(cd, cmd); | ||
1080 | |||
1081 | /* Copy back only the modifed fields. Do not copy ASIV | ||
1082 | back since the copy got modified by the driver. */ | ||
1083 | if (copy_to_user((void __user *)arg, cmd, | ||
1084 | sizeof(*cmd) - DDCB_ASIV_LENGTH)) { | ||
1085 | dev_err(&pci_dev->dev, | ||
1086 | "err: could not copy params to user\n"); | ||
1087 | ddcb_requ_free(cmd); | ||
1088 | return -EFAULT; | ||
1089 | } | ||
1090 | |||
1091 | ddcb_requ_free(cmd); | ||
1092 | return rc; | ||
1093 | } | ||
1094 | |||
1095 | /** | ||
1096 | * genwqe_ioctl() - IO control | ||
1097 | * @filp: file handle | ||
1098 | * @cmd: command identifier (passed from user) | ||
1099 | * @arg: argument (passed from user) | ||
1100 | * | ||
1101 | * Return: 0 success | ||
1102 | */ | ||
1103 | static long genwqe_ioctl(struct file *filp, unsigned int cmd, | ||
1104 | unsigned long arg) | ||
1105 | { | ||
1106 | int rc = 0; | ||
1107 | struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; | ||
1108 | struct genwqe_dev *cd = cfile->cd; | ||
1109 | struct genwqe_reg_io __user *io; | ||
1110 | u64 val; | ||
1111 | u32 reg_offs; | ||
1112 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1113 | |||
1114 | if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE) { | ||
1115 | dev_err(&pci_dev->dev, "err: ioctl code does not match!\n"); | ||
1116 | return -EINVAL; | ||
1117 | } | ||
1118 | |||
1119 | switch (cmd) { | ||
1120 | |||
1121 | case GENWQE_GET_CARD_STATE: | ||
1122 | put_user(cd->card_state, (enum genwqe_card_state __user *)arg); | ||
1123 | return 0; | ||
1124 | |||
1125 | /* Register access */ | ||
1126 | case GENWQE_READ_REG64: { | ||
1127 | io = (struct genwqe_reg_io __user *)arg; | ||
1128 | |||
1129 | if (get_user(reg_offs, &io->num)) { | ||
1130 | dev_err(&pci_dev->dev, "err: reg read64\n"); | ||
1131 | return -EFAULT; | ||
1132 | } | ||
1133 | if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7)) | ||
1134 | return -EINVAL; | ||
1135 | |||
1136 | val = __genwqe_readq(cd, reg_offs); | ||
1137 | put_user(val, &io->val64); | ||
1138 | return 0; | ||
1139 | } | ||
1140 | |||
1141 | case GENWQE_WRITE_REG64: { | ||
1142 | io = (struct genwqe_reg_io __user *)arg; | ||
1143 | |||
1144 | if (!capable(CAP_SYS_ADMIN)) | ||
1145 | return -EPERM; | ||
1146 | |||
1147 | if ((filp->f_flags & O_ACCMODE) == O_RDONLY) | ||
1148 | return -EPERM; | ||
1149 | |||
1150 | if (get_user(reg_offs, &io->num)) { | ||
1151 | dev_err(&pci_dev->dev, "err: reg write64\n"); | ||
1152 | return -EFAULT; | ||
1153 | } | ||
1154 | if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x7)) | ||
1155 | return -EINVAL; | ||
1156 | |||
1157 | if (get_user(val, &io->val64)) { | ||
1158 | dev_err(&pci_dev->dev, "err: reg write64\n"); | ||
1159 | return -EFAULT; | ||
1160 | } | ||
1161 | __genwqe_writeq(cd, reg_offs, val); | ||
1162 | return 0; | ||
1163 | } | ||
1164 | |||
1165 | case GENWQE_READ_REG32: { | ||
1166 | io = (struct genwqe_reg_io __user *)arg; | ||
1167 | |||
1168 | if (get_user(reg_offs, &io->num)) { | ||
1169 | dev_err(&pci_dev->dev, "err: reg read32\n"); | ||
1170 | return -EFAULT; | ||
1171 | } | ||
1172 | if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3)) | ||
1173 | return -EINVAL; | ||
1174 | |||
1175 | val = __genwqe_readl(cd, reg_offs); | ||
1176 | put_user(val, &io->val64); | ||
1177 | return 0; | ||
1178 | } | ||
1179 | |||
1180 | case GENWQE_WRITE_REG32: { | ||
1181 | io = (struct genwqe_reg_io __user *)arg; | ||
1182 | |||
1183 | if (!capable(CAP_SYS_ADMIN)) | ||
1184 | return -EPERM; | ||
1185 | |||
1186 | if ((filp->f_flags & O_ACCMODE) == O_RDONLY) | ||
1187 | return -EPERM; | ||
1188 | |||
1189 | if (get_user(reg_offs, &io->num)) { | ||
1190 | dev_err(&pci_dev->dev, "err: reg write32\n"); | ||
1191 | return -EFAULT; | ||
1192 | } | ||
1193 | if ((reg_offs >= cd->mmio_len) || (reg_offs & 0x3)) | ||
1194 | return -EINVAL; | ||
1195 | |||
1196 | if (get_user(val, &io->val64)) { | ||
1197 | dev_err(&pci_dev->dev, "err: reg write32\n"); | ||
1198 | return -EFAULT; | ||
1199 | } | ||
1200 | __genwqe_writel(cd, reg_offs, val); | ||
1201 | return 0; | ||
1202 | } | ||
1203 | |||
1204 | /* Flash update/reading */ | ||
1205 | case GENWQE_SLU_UPDATE: { | ||
1206 | struct genwqe_bitstream load; | ||
1207 | |||
1208 | if (!genwqe_is_privileged(cd)) | ||
1209 | return -EPERM; | ||
1210 | |||
1211 | if ((filp->f_flags & O_ACCMODE) == O_RDONLY) | ||
1212 | return -EPERM; | ||
1213 | |||
1214 | if (copy_from_user(&load, (void __user *)arg, sizeof(load))) { | ||
1215 | dev_err(&pci_dev->dev, | ||
1216 | "err: could not copy params from user\n"); | ||
1217 | return -EFAULT; | ||
1218 | } | ||
1219 | rc = do_flash_update(cfile, &load); | ||
1220 | |||
1221 | if (copy_to_user((void __user *)arg, &load, sizeof(load))) { | ||
1222 | dev_err(&pci_dev->dev, | ||
1223 | "err: could not copy params to user\n"); | ||
1224 | return -EFAULT; | ||
1225 | } | ||
1226 | dev_info(&pci_dev->dev, "[%s] rc=%d\n", __func__, rc); | ||
1227 | return rc; | ||
1228 | } | ||
1229 | |||
1230 | case GENWQE_SLU_READ: { | ||
1231 | struct genwqe_bitstream load; | ||
1232 | |||
1233 | if (!genwqe_is_privileged(cd)) | ||
1234 | return -EPERM; | ||
1235 | |||
1236 | if (genwqe_flash_readback_fails(cd)) | ||
1237 | return -ENOSPC; /* known to fail for old versions */ | ||
1238 | |||
1239 | if (copy_from_user(&load, (void __user *)arg, sizeof(load))) { | ||
1240 | dev_err(&pci_dev->dev, | ||
1241 | "err: could not copy params from user\n"); | ||
1242 | return -EFAULT; | ||
1243 | } | ||
1244 | rc = do_flash_read(cfile, &load); | ||
1245 | |||
1246 | if (copy_to_user((void __user *)arg, &load, sizeof(load))) { | ||
1247 | dev_err(&pci_dev->dev, | ||
1248 | "err: could not copy params to user\n"); | ||
1249 | return -EFAULT; | ||
1250 | } | ||
1251 | dev_info(&pci_dev->dev, "[%s] rc=%d\n", __func__, rc); | ||
1252 | return rc; | ||
1253 | } | ||
1254 | |||
1255 | /* memory pinning and unpinning */ | ||
1256 | case GENWQE_PIN_MEM: { | ||
1257 | struct genwqe_mem m; | ||
1258 | |||
1259 | if (copy_from_user(&m, (void __user *)arg, sizeof(m))) { | ||
1260 | dev_err(&pci_dev->dev, | ||
1261 | "err: could not copy params from user\n"); | ||
1262 | return -EFAULT; | ||
1263 | } | ||
1264 | return genwqe_pin_mem(cfile, &m); | ||
1265 | } | ||
1266 | |||
1267 | case GENWQE_UNPIN_MEM: { | ||
1268 | struct genwqe_mem m; | ||
1269 | |||
1270 | if (copy_from_user(&m, (void __user *)arg, sizeof(m))) { | ||
1271 | dev_err(&pci_dev->dev, | ||
1272 | "err: could not copy params from user\n"); | ||
1273 | return -EFAULT; | ||
1274 | } | ||
1275 | return genwqe_unpin_mem(cfile, &m); | ||
1276 | } | ||
1277 | |||
1278 | /* launch an DDCB and wait for completion */ | ||
1279 | case GENWQE_EXECUTE_DDCB: | ||
1280 | return do_execute_ddcb(cfile, arg, 0); | ||
1281 | |||
1282 | case GENWQE_EXECUTE_RAW_DDCB: { | ||
1283 | |||
1284 | if (!capable(CAP_SYS_ADMIN)) { | ||
1285 | dev_err(&pci_dev->dev, | ||
1286 | "err: must be superuser execute raw DDCB!\n"); | ||
1287 | return -EPERM; | ||
1288 | } | ||
1289 | return do_execute_ddcb(cfile, arg, 1); | ||
1290 | } | ||
1291 | |||
1292 | default: | ||
1293 | pr_err("unknown ioctl %x/%lx**\n", cmd, arg); | ||
1294 | return -EINVAL; | ||
1295 | } | ||
1296 | |||
1297 | return rc; | ||
1298 | } | ||
1299 | |||
1300 | #if defined(CONFIG_COMPAT) | ||
1301 | /** | ||
1302 | * genwqe_compat_ioctl() - Compatibility ioctl | ||
1303 | * | ||
1304 | * Called whenever a 32-bit process running under a 64-bit kernel | ||
1305 | * performs an ioctl on /dev/genwqe<n>_card. | ||
1306 | * | ||
1307 | * @filp: file pointer. | ||
1308 | * @cmd: command. | ||
1309 | * @arg: user argument. | ||
1310 | * Return: zero on success or negative number on failure. | ||
1311 | */ | ||
1312 | static long genwqe_compat_ioctl(struct file *filp, unsigned int cmd, | ||
1313 | unsigned long arg) | ||
1314 | { | ||
1315 | return genwqe_ioctl(filp, cmd, arg); | ||
1316 | } | ||
1317 | #endif /* defined(CONFIG_COMPAT) */ | ||
1318 | |||
1319 | static const struct file_operations genwqe_fops = { | ||
1320 | .owner = THIS_MODULE, | ||
1321 | .open = genwqe_open, | ||
1322 | .fasync = genwqe_fasync, | ||
1323 | .mmap = genwqe_mmap, | ||
1324 | .unlocked_ioctl = genwqe_ioctl, | ||
1325 | #if defined(CONFIG_COMPAT) | ||
1326 | .compat_ioctl = genwqe_compat_ioctl, | ||
1327 | #endif | ||
1328 | .release = genwqe_release, | ||
1329 | }; | ||
1330 | |||
1331 | static int genwqe_device_initialized(struct genwqe_dev *cd) | ||
1332 | { | ||
1333 | return cd->dev != NULL; | ||
1334 | } | ||
1335 | |||
1336 | /** | ||
1337 | * genwqe_device_create() - Create and configure genwqe char device | ||
1338 | * @cd: genwqe device descriptor | ||
1339 | * | ||
1340 | * This function must be called before we create any more genwqe | ||
1341 | * character devices, because it is allocating the major and minor | ||
1342 | * number which are supposed to be used by the client drivers. | ||
1343 | */ | ||
1344 | int genwqe_device_create(struct genwqe_dev *cd) | ||
1345 | { | ||
1346 | int rc; | ||
1347 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1348 | |||
1349 | /* | ||
1350 | * Here starts the individual setup per client. It must | ||
1351 | * initialize its own cdev data structure with its own fops. | ||
1352 | * The appropriate devnum needs to be created. The ranges must | ||
1353 | * not overlap. | ||
1354 | */ | ||
1355 | rc = alloc_chrdev_region(&cd->devnum_genwqe, 0, | ||
1356 | GENWQE_MAX_MINOR, GENWQE_DEVNAME); | ||
1357 | if (rc < 0) { | ||
1358 | dev_err(&pci_dev->dev, "err: alloc_chrdev_region failed\n"); | ||
1359 | goto err_dev; | ||
1360 | } | ||
1361 | |||
1362 | cdev_init(&cd->cdev_genwqe, &genwqe_fops); | ||
1363 | cd->cdev_genwqe.owner = THIS_MODULE; | ||
1364 | |||
1365 | rc = cdev_add(&cd->cdev_genwqe, cd->devnum_genwqe, 1); | ||
1366 | if (rc < 0) { | ||
1367 | dev_err(&pci_dev->dev, "err: cdev_add failed\n"); | ||
1368 | goto err_add; | ||
1369 | } | ||
1370 | |||
1371 | /* | ||
1372 | * Finally the device in /dev/... must be created. The rule is | ||
1373 | * to use card%d_clientname for each created device. | ||
1374 | */ | ||
1375 | cd->dev = device_create_with_groups(cd->class_genwqe, | ||
1376 | &cd->pci_dev->dev, | ||
1377 | cd->devnum_genwqe, cd, | ||
1378 | genwqe_attribute_groups, | ||
1379 | GENWQE_DEVNAME "%u_card", | ||
1380 | cd->card_idx); | ||
1381 | if (cd->dev == NULL) { | ||
1382 | rc = -ENODEV; | ||
1383 | goto err_cdev; | ||
1384 | } | ||
1385 | |||
1386 | rc = genwqe_init_debugfs(cd); | ||
1387 | if (rc != 0) | ||
1388 | goto err_debugfs; | ||
1389 | |||
1390 | return 0; | ||
1391 | |||
1392 | err_debugfs: | ||
1393 | device_destroy(cd->class_genwqe, cd->devnum_genwqe); | ||
1394 | err_cdev: | ||
1395 | cdev_del(&cd->cdev_genwqe); | ||
1396 | err_add: | ||
1397 | unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR); | ||
1398 | err_dev: | ||
1399 | cd->dev = NULL; | ||
1400 | return rc; | ||
1401 | } | ||
1402 | |||
1403 | static int genwqe_inform_and_stop_processes(struct genwqe_dev *cd) | ||
1404 | { | ||
1405 | int rc; | ||
1406 | unsigned int i; | ||
1407 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1408 | |||
1409 | if (!genwqe_open_files(cd)) | ||
1410 | return 0; | ||
1411 | |||
1412 | dev_warn(&pci_dev->dev, "[%s] send SIGIO and wait ...\n", __func__); | ||
1413 | |||
1414 | rc = genwqe_kill_fasync(cd, SIGIO); | ||
1415 | if (rc > 0) { | ||
1416 | /* give kill_timeout seconds to close file descriptors ... */ | ||
1417 | for (i = 0; (i < genwqe_kill_timeout) && | ||
1418 | genwqe_open_files(cd); i++) { | ||
1419 | dev_info(&pci_dev->dev, " %d sec ...", i); | ||
1420 | |||
1421 | cond_resched(); | ||
1422 | msleep(1000); | ||
1423 | } | ||
1424 | |||
1425 | /* if no open files we can safely continue, else ... */ | ||
1426 | if (!genwqe_open_files(cd)) | ||
1427 | return 0; | ||
1428 | |||
1429 | dev_warn(&pci_dev->dev, | ||
1430 | "[%s] send SIGKILL and wait ...\n", __func__); | ||
1431 | |||
1432 | rc = genwqe_force_sig(cd, SIGKILL); /* force terminate */ | ||
1433 | if (rc) { | ||
1434 | /* Give kill_timout more seconds to end processes */ | ||
1435 | for (i = 0; (i < genwqe_kill_timeout) && | ||
1436 | genwqe_open_files(cd); i++) { | ||
1437 | dev_warn(&pci_dev->dev, " %d sec ...", i); | ||
1438 | |||
1439 | cond_resched(); | ||
1440 | msleep(1000); | ||
1441 | } | ||
1442 | } | ||
1443 | } | ||
1444 | return 0; | ||
1445 | } | ||
1446 | |||
1447 | /** | ||
1448 | * genwqe_device_remove() - Remove genwqe's char device | ||
1449 | * | ||
1450 | * This function must be called after the client devices are removed | ||
1451 | * because it will free the major/minor number range for the genwqe | ||
1452 | * drivers. | ||
1453 | * | ||
1454 | * This function must be robust enough to be called twice. | ||
1455 | */ | ||
1456 | int genwqe_device_remove(struct genwqe_dev *cd) | ||
1457 | { | ||
1458 | int rc; | ||
1459 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1460 | |||
1461 | if (!genwqe_device_initialized(cd)) | ||
1462 | return 1; | ||
1463 | |||
1464 | genwqe_inform_and_stop_processes(cd); | ||
1465 | |||
1466 | /* | ||
1467 | * We currently do wait until all filedescriptors are | ||
1468 | * closed. This leads to a problem when we abort the | ||
1469 | * application which will decrease this reference from | ||
1470 | * 1/unused to 0/illegal and not from 2/used 1/empty. | ||
1471 | */ | ||
1472 | rc = atomic_read(&cd->cdev_genwqe.kobj.kref.refcount); | ||
1473 | if (rc != 1) { | ||
1474 | dev_err(&pci_dev->dev, | ||
1475 | "[%s] err: cdev_genwqe...refcount=%d\n", __func__, rc); | ||
1476 | panic("Fatal err: cannot free resources with pending references!"); | ||
1477 | } | ||
1478 | |||
1479 | genqwe_exit_debugfs(cd); | ||
1480 | device_destroy(cd->class_genwqe, cd->devnum_genwqe); | ||
1481 | cdev_del(&cd->cdev_genwqe); | ||
1482 | unregister_chrdev_region(cd->devnum_genwqe, GENWQE_MAX_MINOR); | ||
1483 | cd->dev = NULL; | ||
1484 | |||
1485 | return 0; | ||
1486 | } | ||