diff options
Diffstat (limited to 'drivers/dma/ioat/dma_v3.c')
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 1223 |
1 files changed, 1223 insertions, 0 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c new file mode 100644 index 000000000000..35d1e33afd5b --- /dev/null +++ b/drivers/dma/ioat/dma_v3.c | |||
@@ -0,0 +1,1223 @@ | |||
1 | /* | ||
2 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
3 | * redistributing this file, you may do so under either license. | ||
4 | * | ||
5 | * GPL LICENSE SUMMARY | ||
6 | * | ||
7 | * Copyright(c) 2004 - 2009 Intel Corporation. All rights reserved. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify it | ||
10 | * under the terms and conditions of the GNU General Public License, | ||
11 | * version 2, as published by the Free Software Foundation. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but WITHOUT | ||
14 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
15 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
16 | * more details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License along with | ||
19 | * this program; if not, write to the Free Software Foundation, Inc., | ||
20 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | * The full GNU General Public License is included in this distribution in | ||
23 | * the file called "COPYING". | ||
24 | * | ||
25 | * BSD LICENSE | ||
26 | * | ||
27 | * Copyright(c) 2004-2009 Intel Corporation. All rights reserved. | ||
28 | * | ||
29 | * Redistribution and use in source and binary forms, with or without | ||
30 | * modification, are permitted provided that the following conditions are met: | ||
31 | * | ||
32 | * * Redistributions of source code must retain the above copyright | ||
33 | * notice, this list of conditions and the following disclaimer. | ||
34 | * * Redistributions in binary form must reproduce the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer in | ||
36 | * the documentation and/or other materials provided with the | ||
37 | * distribution. | ||
38 | * * Neither the name of Intel Corporation nor the names of its | ||
39 | * contributors may be used to endorse or promote products derived | ||
40 | * from this software without specific prior written permission. | ||
41 | * | ||
42 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | ||
43 | * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
44 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | ||
45 | * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | ||
46 | * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
47 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
48 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | ||
49 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | ||
50 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | ||
51 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
52 | * POSSIBILITY OF SUCH DAMAGE. | ||
53 | */ | ||
54 | |||
55 | /* | ||
56 | * Support routines for v3+ hardware | ||
57 | */ | ||
58 | |||
59 | #include <linux/pci.h> | ||
60 | #include <linux/dmaengine.h> | ||
61 | #include <linux/dma-mapping.h> | ||
62 | #include "registers.h" | ||
63 | #include "hw.h" | ||
64 | #include "dma.h" | ||
65 | #include "dma_v2.h" | ||
66 | |||
67 | /* ioat hardware assumes at least two sources for raid operations */ | ||
68 | #define src_cnt_to_sw(x) ((x) + 2) | ||
69 | #define src_cnt_to_hw(x) ((x) - 2) | ||
70 | |||
71 | /* provide a lookup table for setting the source address in the base or | ||
72 | * extended descriptor of an xor or pq descriptor | ||
73 | */ | ||
74 | static const u8 xor_idx_to_desc __read_mostly = 0xd0; | ||
75 | static const u8 xor_idx_to_field[] __read_mostly = { 1, 4, 5, 6, 7, 0, 1, 2 }; | ||
76 | static const u8 pq_idx_to_desc __read_mostly = 0xf8; | ||
77 | static const u8 pq_idx_to_field[] __read_mostly = { 1, 4, 5, 0, 1, 2, 4, 5 }; | ||
78 | |||
79 | static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) | ||
80 | { | ||
81 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | ||
82 | |||
83 | return raw->field[xor_idx_to_field[idx]]; | ||
84 | } | ||
85 | |||
86 | static void xor_set_src(struct ioat_raw_descriptor *descs[2], | ||
87 | dma_addr_t addr, u32 offset, int idx) | ||
88 | { | ||
89 | struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; | ||
90 | |||
91 | raw->field[xor_idx_to_field[idx]] = addr + offset; | ||
92 | } | ||
93 | |||
94 | static dma_addr_t pq_get_src(struct ioat_raw_descriptor *descs[2], int idx) | ||
95 | { | ||
96 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | ||
97 | |||
98 | return raw->field[pq_idx_to_field[idx]]; | ||
99 | } | ||
100 | |||
101 | static void pq_set_src(struct ioat_raw_descriptor *descs[2], | ||
102 | dma_addr_t addr, u32 offset, u8 coef, int idx) | ||
103 | { | ||
104 | struct ioat_pq_descriptor *pq = (struct ioat_pq_descriptor *) descs[0]; | ||
105 | struct ioat_raw_descriptor *raw = descs[pq_idx_to_desc >> idx & 1]; | ||
106 | |||
107 | raw->field[pq_idx_to_field[idx]] = addr + offset; | ||
108 | pq->coef[idx] = coef; | ||
109 | } | ||
110 | |||
111 | static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, | ||
112 | struct ioat_ring_ent *desc, int idx) | ||
113 | { | ||
114 | struct ioat_chan_common *chan = &ioat->base; | ||
115 | struct pci_dev *pdev = chan->device->pdev; | ||
116 | size_t len = desc->len; | ||
117 | size_t offset = len - desc->hw->size; | ||
118 | struct dma_async_tx_descriptor *tx = &desc->txd; | ||
119 | enum dma_ctrl_flags flags = tx->flags; | ||
120 | |||
121 | switch (desc->hw->ctl_f.op) { | ||
122 | case IOAT_OP_COPY: | ||
123 | if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ | ||
124 | ioat_dma_unmap(chan, flags, len, desc->hw); | ||
125 | break; | ||
126 | case IOAT_OP_FILL: { | ||
127 | struct ioat_fill_descriptor *hw = desc->fill; | ||
128 | |||
129 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) | ||
130 | ioat_unmap(pdev, hw->dst_addr - offset, len, | ||
131 | PCI_DMA_FROMDEVICE, flags, 1); | ||
132 | break; | ||
133 | } | ||
134 | case IOAT_OP_XOR_VAL: | ||
135 | case IOAT_OP_XOR: { | ||
136 | struct ioat_xor_descriptor *xor = desc->xor; | ||
137 | struct ioat_ring_ent *ext; | ||
138 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | ||
139 | int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); | ||
140 | struct ioat_raw_descriptor *descs[2]; | ||
141 | int i; | ||
142 | |||
143 | if (src_cnt > 5) { | ||
144 | ext = ioat2_get_ring_ent(ioat, idx + 1); | ||
145 | xor_ex = ext->xor_ex; | ||
146 | } | ||
147 | |||
148 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
149 | descs[0] = (struct ioat_raw_descriptor *) xor; | ||
150 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | ||
151 | for (i = 0; i < src_cnt; i++) { | ||
152 | dma_addr_t src = xor_get_src(descs, i); | ||
153 | |||
154 | ioat_unmap(pdev, src - offset, len, | ||
155 | PCI_DMA_TODEVICE, flags, 0); | ||
156 | } | ||
157 | |||
158 | /* dest is a source in xor validate operations */ | ||
159 | if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
160 | ioat_unmap(pdev, xor->dst_addr - offset, len, | ||
161 | PCI_DMA_TODEVICE, flags, 1); | ||
162 | break; | ||
163 | } | ||
164 | } | ||
165 | |||
166 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) | ||
167 | ioat_unmap(pdev, xor->dst_addr - offset, len, | ||
168 | PCI_DMA_FROMDEVICE, flags, 1); | ||
169 | break; | ||
170 | } | ||
171 | case IOAT_OP_PQ_VAL: | ||
172 | case IOAT_OP_PQ: { | ||
173 | struct ioat_pq_descriptor *pq = desc->pq; | ||
174 | struct ioat_ring_ent *ext; | ||
175 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | ||
176 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | ||
177 | struct ioat_raw_descriptor *descs[2]; | ||
178 | int i; | ||
179 | |||
180 | if (src_cnt > 3) { | ||
181 | ext = ioat2_get_ring_ent(ioat, idx + 1); | ||
182 | pq_ex = ext->pq_ex; | ||
183 | } | ||
184 | |||
185 | /* in the 'continue' case don't unmap the dests as sources */ | ||
186 | if (dmaf_p_disabled_continue(flags)) | ||
187 | src_cnt--; | ||
188 | else if (dmaf_continue(flags)) | ||
189 | src_cnt -= 3; | ||
190 | |||
191 | if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { | ||
192 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
193 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | ||
194 | for (i = 0; i < src_cnt; i++) { | ||
195 | dma_addr_t src = pq_get_src(descs, i); | ||
196 | |||
197 | ioat_unmap(pdev, src - offset, len, | ||
198 | PCI_DMA_TODEVICE, flags, 0); | ||
199 | } | ||
200 | |||
201 | /* the dests are sources in pq validate operations */ | ||
202 | if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
203 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
204 | ioat_unmap(pdev, pq->p_addr - offset, | ||
205 | len, PCI_DMA_TODEVICE, flags, 0); | ||
206 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
207 | ioat_unmap(pdev, pq->q_addr - offset, | ||
208 | len, PCI_DMA_TODEVICE, flags, 0); | ||
209 | break; | ||
210 | } | ||
211 | } | ||
212 | |||
213 | if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { | ||
214 | if (!(flags & DMA_PREP_PQ_DISABLE_P)) | ||
215 | ioat_unmap(pdev, pq->p_addr - offset, len, | ||
216 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
217 | if (!(flags & DMA_PREP_PQ_DISABLE_Q)) | ||
218 | ioat_unmap(pdev, pq->q_addr - offset, len, | ||
219 | PCI_DMA_BIDIRECTIONAL, flags, 1); | ||
220 | } | ||
221 | break; | ||
222 | } | ||
223 | default: | ||
224 | dev_err(&pdev->dev, "%s: unknown op type: %#x\n", | ||
225 | __func__, desc->hw->ctl_f.op); | ||
226 | } | ||
227 | } | ||
228 | |||
229 | static bool desc_has_ext(struct ioat_ring_ent *desc) | ||
230 | { | ||
231 | struct ioat_dma_descriptor *hw = desc->hw; | ||
232 | |||
233 | if (hw->ctl_f.op == IOAT_OP_XOR || | ||
234 | hw->ctl_f.op == IOAT_OP_XOR_VAL) { | ||
235 | struct ioat_xor_descriptor *xor = desc->xor; | ||
236 | |||
237 | if (src_cnt_to_sw(xor->ctl_f.src_cnt) > 5) | ||
238 | return true; | ||
239 | } else if (hw->ctl_f.op == IOAT_OP_PQ || | ||
240 | hw->ctl_f.op == IOAT_OP_PQ_VAL) { | ||
241 | struct ioat_pq_descriptor *pq = desc->pq; | ||
242 | |||
243 | if (src_cnt_to_sw(pq->ctl_f.src_cnt) > 3) | ||
244 | return true; | ||
245 | } | ||
246 | |||
247 | return false; | ||
248 | } | ||
249 | |||
250 | /** | ||
251 | * __cleanup - reclaim used descriptors | ||
252 | * @ioat: channel (ring) to clean | ||
253 | * | ||
254 | * The difference from the dma_v2.c __cleanup() is that this routine | ||
255 | * handles extended descriptors and dma-unmapping raid operations. | ||
256 | */ | ||
257 | static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | ||
258 | { | ||
259 | struct ioat_chan_common *chan = &ioat->base; | ||
260 | struct ioat_ring_ent *desc; | ||
261 | bool seen_current = false; | ||
262 | u16 active; | ||
263 | int i; | ||
264 | |||
265 | dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", | ||
266 | __func__, ioat->head, ioat->tail, ioat->issued); | ||
267 | |||
268 | active = ioat2_ring_active(ioat); | ||
269 | for (i = 0; i < active && !seen_current; i++) { | ||
270 | struct dma_async_tx_descriptor *tx; | ||
271 | |||
272 | prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); | ||
273 | desc = ioat2_get_ring_ent(ioat, ioat->tail + i); | ||
274 | dump_desc_dbg(ioat, desc); | ||
275 | tx = &desc->txd; | ||
276 | if (tx->cookie) { | ||
277 | chan->completed_cookie = tx->cookie; | ||
278 | ioat3_dma_unmap(ioat, desc, ioat->tail + i); | ||
279 | tx->cookie = 0; | ||
280 | if (tx->callback) { | ||
281 | tx->callback(tx->callback_param); | ||
282 | tx->callback = NULL; | ||
283 | } | ||
284 | } | ||
285 | |||
286 | if (tx->phys == phys_complete) | ||
287 | seen_current = true; | ||
288 | |||
289 | /* skip extended descriptors */ | ||
290 | if (desc_has_ext(desc)) { | ||
291 | BUG_ON(i + 1 >= active); | ||
292 | i++; | ||
293 | } | ||
294 | } | ||
295 | ioat->tail += i; | ||
296 | BUG_ON(!seen_current); /* no active descs have written a completion? */ | ||
297 | chan->last_completion = phys_complete; | ||
298 | if (ioat->head == ioat->tail) { | ||
299 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", | ||
300 | __func__); | ||
301 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); | ||
302 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | static void ioat3_cleanup(struct ioat2_dma_chan *ioat) | ||
307 | { | ||
308 | struct ioat_chan_common *chan = &ioat->base; | ||
309 | unsigned long phys_complete; | ||
310 | |||
311 | prefetch(chan->completion); | ||
312 | |||
313 | if (!spin_trylock_bh(&chan->cleanup_lock)) | ||
314 | return; | ||
315 | |||
316 | if (!ioat_cleanup_preamble(chan, &phys_complete)) { | ||
317 | spin_unlock_bh(&chan->cleanup_lock); | ||
318 | return; | ||
319 | } | ||
320 | |||
321 | if (!spin_trylock_bh(&ioat->ring_lock)) { | ||
322 | spin_unlock_bh(&chan->cleanup_lock); | ||
323 | return; | ||
324 | } | ||
325 | |||
326 | __cleanup(ioat, phys_complete); | ||
327 | |||
328 | spin_unlock_bh(&ioat->ring_lock); | ||
329 | spin_unlock_bh(&chan->cleanup_lock); | ||
330 | } | ||
331 | |||
332 | static void ioat3_cleanup_tasklet(unsigned long data) | ||
333 | { | ||
334 | struct ioat2_dma_chan *ioat = (void *) data; | ||
335 | |||
336 | ioat3_cleanup(ioat); | ||
337 | writew(IOAT_CHANCTRL_RUN | IOAT3_CHANCTRL_COMPL_DCA_EN, | ||
338 | ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); | ||
339 | } | ||
340 | |||
341 | static void ioat3_restart_channel(struct ioat2_dma_chan *ioat) | ||
342 | { | ||
343 | struct ioat_chan_common *chan = &ioat->base; | ||
344 | unsigned long phys_complete; | ||
345 | u32 status; | ||
346 | |||
347 | status = ioat_chansts(chan); | ||
348 | if (is_ioat_active(status) || is_ioat_idle(status)) | ||
349 | ioat_suspend(chan); | ||
350 | while (is_ioat_active(status) || is_ioat_idle(status)) { | ||
351 | status = ioat_chansts(chan); | ||
352 | cpu_relax(); | ||
353 | } | ||
354 | |||
355 | if (ioat_cleanup_preamble(chan, &phys_complete)) | ||
356 | __cleanup(ioat, phys_complete); | ||
357 | |||
358 | __ioat2_restart_chan(ioat); | ||
359 | } | ||
360 | |||
361 | static void ioat3_timer_event(unsigned long data) | ||
362 | { | ||
363 | struct ioat2_dma_chan *ioat = (void *) data; | ||
364 | struct ioat_chan_common *chan = &ioat->base; | ||
365 | |||
366 | spin_lock_bh(&chan->cleanup_lock); | ||
367 | if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { | ||
368 | unsigned long phys_complete; | ||
369 | u64 status; | ||
370 | |||
371 | spin_lock_bh(&ioat->ring_lock); | ||
372 | status = ioat_chansts(chan); | ||
373 | |||
374 | /* when halted due to errors check for channel | ||
375 | * programming errors before advancing the completion state | ||
376 | */ | ||
377 | if (is_ioat_halted(status)) { | ||
378 | u32 chanerr; | ||
379 | |||
380 | chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); | ||
381 | BUG_ON(is_ioat_bug(chanerr)); | ||
382 | } | ||
383 | |||
384 | /* if we haven't made progress and we have already | ||
385 | * acknowledged a pending completion once, then be more | ||
386 | * forceful with a restart | ||
387 | */ | ||
388 | if (ioat_cleanup_preamble(chan, &phys_complete)) | ||
389 | __cleanup(ioat, phys_complete); | ||
390 | else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) | ||
391 | ioat3_restart_channel(ioat); | ||
392 | else { | ||
393 | set_bit(IOAT_COMPLETION_ACK, &chan->state); | ||
394 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | ||
395 | } | ||
396 | spin_unlock_bh(&ioat->ring_lock); | ||
397 | } else { | ||
398 | u16 active; | ||
399 | |||
400 | /* if the ring is idle, empty, and oversized try to step | ||
401 | * down the size | ||
402 | */ | ||
403 | spin_lock_bh(&ioat->ring_lock); | ||
404 | active = ioat2_ring_active(ioat); | ||
405 | if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) | ||
406 | reshape_ring(ioat, ioat->alloc_order-1); | ||
407 | spin_unlock_bh(&ioat->ring_lock); | ||
408 | |||
409 | /* keep shrinking until we get back to our minimum | ||
410 | * default size | ||
411 | */ | ||
412 | if (ioat->alloc_order > ioat_get_alloc_order()) | ||
413 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | ||
414 | } | ||
415 | spin_unlock_bh(&chan->cleanup_lock); | ||
416 | } | ||
417 | |||
418 | static enum dma_status | ||
419 | ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, | ||
420 | dma_cookie_t *done, dma_cookie_t *used) | ||
421 | { | ||
422 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
423 | |||
424 | if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) | ||
425 | return DMA_SUCCESS; | ||
426 | |||
427 | ioat3_cleanup(ioat); | ||
428 | |||
429 | return ioat_is_complete(c, cookie, done, used); | ||
430 | } | ||
431 | |||
432 | static struct dma_async_tx_descriptor * | ||
433 | ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, | ||
434 | size_t len, unsigned long flags) | ||
435 | { | ||
436 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
437 | struct ioat_ring_ent *desc; | ||
438 | size_t total_len = len; | ||
439 | struct ioat_fill_descriptor *fill; | ||
440 | int num_descs; | ||
441 | u64 src_data = (0x0101010101010101ULL) * (value & 0xff); | ||
442 | u16 idx; | ||
443 | int i; | ||
444 | |||
445 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
446 | if (likely(num_descs) && | ||
447 | ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) | ||
448 | /* pass */; | ||
449 | else | ||
450 | return NULL; | ||
451 | i = 0; | ||
452 | do { | ||
453 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
454 | |||
455 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
456 | fill = desc->fill; | ||
457 | |||
458 | fill->size = xfer_size; | ||
459 | fill->src_data = src_data; | ||
460 | fill->dst_addr = dest; | ||
461 | fill->ctl = 0; | ||
462 | fill->ctl_f.op = IOAT_OP_FILL; | ||
463 | |||
464 | len -= xfer_size; | ||
465 | dest += xfer_size; | ||
466 | dump_desc_dbg(ioat, desc); | ||
467 | } while (++i < num_descs); | ||
468 | |||
469 | desc->txd.flags = flags; | ||
470 | desc->len = total_len; | ||
471 | fill->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
472 | fill->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
473 | fill->ctl_f.compl_write = 1; | ||
474 | dump_desc_dbg(ioat, desc); | ||
475 | |||
476 | /* we leave the channel locked to ensure in order submission */ | ||
477 | return &desc->txd; | ||
478 | } | ||
479 | |||
480 | static struct dma_async_tx_descriptor * | ||
481 | __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
482 | dma_addr_t dest, dma_addr_t *src, unsigned int src_cnt, | ||
483 | size_t len, unsigned long flags) | ||
484 | { | ||
485 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
486 | struct ioat_ring_ent *compl_desc; | ||
487 | struct ioat_ring_ent *desc; | ||
488 | struct ioat_ring_ent *ext; | ||
489 | size_t total_len = len; | ||
490 | struct ioat_xor_descriptor *xor; | ||
491 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | ||
492 | struct ioat_dma_descriptor *hw; | ||
493 | u32 offset = 0; | ||
494 | int num_descs; | ||
495 | int with_ext; | ||
496 | int i; | ||
497 | u16 idx; | ||
498 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; | ||
499 | |||
500 | BUG_ON(src_cnt < 2); | ||
501 | |||
502 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
503 | /* we need 2x the number of descriptors to cover greater than 5 | ||
504 | * sources | ||
505 | */ | ||
506 | if (src_cnt > 5) { | ||
507 | with_ext = 1; | ||
508 | num_descs *= 2; | ||
509 | } else | ||
510 | with_ext = 0; | ||
511 | |||
512 | /* completion writes from the raid engine may pass completion | ||
513 | * writes from the legacy engine, so we need one extra null | ||
514 | * (legacy) descriptor to ensure all completion writes arrive in | ||
515 | * order. | ||
516 | */ | ||
517 | if (likely(num_descs) && | ||
518 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | ||
519 | /* pass */; | ||
520 | else | ||
521 | return NULL; | ||
522 | i = 0; | ||
523 | do { | ||
524 | struct ioat_raw_descriptor *descs[2]; | ||
525 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
526 | int s; | ||
527 | |||
528 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
529 | xor = desc->xor; | ||
530 | |||
531 | /* save a branch by unconditionally retrieving the | ||
532 | * extended descriptor xor_set_src() knows to not write | ||
533 | * to it in the single descriptor case | ||
534 | */ | ||
535 | ext = ioat2_get_ring_ent(ioat, idx + i + 1); | ||
536 | xor_ex = ext->xor_ex; | ||
537 | |||
538 | descs[0] = (struct ioat_raw_descriptor *) xor; | ||
539 | descs[1] = (struct ioat_raw_descriptor *) xor_ex; | ||
540 | for (s = 0; s < src_cnt; s++) | ||
541 | xor_set_src(descs, src[s], offset, s); | ||
542 | xor->size = xfer_size; | ||
543 | xor->dst_addr = dest + offset; | ||
544 | xor->ctl = 0; | ||
545 | xor->ctl_f.op = op; | ||
546 | xor->ctl_f.src_cnt = src_cnt_to_hw(src_cnt); | ||
547 | |||
548 | len -= xfer_size; | ||
549 | offset += xfer_size; | ||
550 | dump_desc_dbg(ioat, desc); | ||
551 | } while ((i += 1 + with_ext) < num_descs); | ||
552 | |||
553 | /* last xor descriptor carries the unmap parameters and fence bit */ | ||
554 | desc->txd.flags = flags; | ||
555 | desc->len = total_len; | ||
556 | if (result) | ||
557 | desc->result = result; | ||
558 | xor->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
559 | |||
560 | /* completion descriptor carries interrupt bit */ | ||
561 | compl_desc = ioat2_get_ring_ent(ioat, idx + i); | ||
562 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | ||
563 | hw = compl_desc->hw; | ||
564 | hw->ctl = 0; | ||
565 | hw->ctl_f.null = 1; | ||
566 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
567 | hw->ctl_f.compl_write = 1; | ||
568 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
569 | dump_desc_dbg(ioat, compl_desc); | ||
570 | |||
571 | /* we leave the channel locked to ensure in order submission */ | ||
572 | return &desc->txd; | ||
573 | } | ||
574 | |||
575 | static struct dma_async_tx_descriptor * | ||
576 | ioat3_prep_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, | ||
577 | unsigned int src_cnt, size_t len, unsigned long flags) | ||
578 | { | ||
579 | return __ioat3_prep_xor_lock(chan, NULL, dest, src, src_cnt, len, flags); | ||
580 | } | ||
581 | |||
582 | struct dma_async_tx_descriptor * | ||
583 | ioat3_prep_xor_val(struct dma_chan *chan, dma_addr_t *src, | ||
584 | unsigned int src_cnt, size_t len, | ||
585 | enum sum_check_flags *result, unsigned long flags) | ||
586 | { | ||
587 | /* the cleanup routine only sets bits on validate failure, it | ||
588 | * does not clear bits on validate success... so clear it here | ||
589 | */ | ||
590 | *result = 0; | ||
591 | |||
592 | return __ioat3_prep_xor_lock(chan, result, src[0], &src[1], | ||
593 | src_cnt - 1, len, flags); | ||
594 | } | ||
595 | |||
596 | static void | ||
597 | dump_pq_desc_dbg(struct ioat2_dma_chan *ioat, struct ioat_ring_ent *desc, struct ioat_ring_ent *ext) | ||
598 | { | ||
599 | struct device *dev = to_dev(&ioat->base); | ||
600 | struct ioat_pq_descriptor *pq = desc->pq; | ||
601 | struct ioat_pq_ext_descriptor *pq_ex = ext ? ext->pq_ex : NULL; | ||
602 | struct ioat_raw_descriptor *descs[] = { (void *) pq, (void *) pq_ex }; | ||
603 | int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); | ||
604 | int i; | ||
605 | |||
606 | dev_dbg(dev, "desc[%d]: (%#llx->%#llx) flags: %#x" | ||
607 | " sz: %#x ctl: %#x (op: %d int: %d compl: %d pq: '%s%s' src_cnt: %d)\n", | ||
608 | desc_id(desc), (unsigned long long) desc->txd.phys, | ||
609 | (unsigned long long) (pq_ex ? pq_ex->next : pq->next), | ||
610 | desc->txd.flags, pq->size, pq->ctl, pq->ctl_f.op, pq->ctl_f.int_en, | ||
611 | pq->ctl_f.compl_write, | ||
612 | pq->ctl_f.p_disable ? "" : "p", pq->ctl_f.q_disable ? "" : "q", | ||
613 | pq->ctl_f.src_cnt); | ||
614 | for (i = 0; i < src_cnt; i++) | ||
615 | dev_dbg(dev, "\tsrc[%d]: %#llx coef: %#x\n", i, | ||
616 | (unsigned long long) pq_get_src(descs, i), pq->coef[i]); | ||
617 | dev_dbg(dev, "\tP: %#llx\n", pq->p_addr); | ||
618 | dev_dbg(dev, "\tQ: %#llx\n", pq->q_addr); | ||
619 | } | ||
620 | |||
621 | static struct dma_async_tx_descriptor * | ||
622 | __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | ||
623 | const dma_addr_t *dst, const dma_addr_t *src, | ||
624 | unsigned int src_cnt, const unsigned char *scf, | ||
625 | size_t len, unsigned long flags) | ||
626 | { | ||
627 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
628 | struct ioat_chan_common *chan = &ioat->base; | ||
629 | struct ioat_ring_ent *compl_desc; | ||
630 | struct ioat_ring_ent *desc; | ||
631 | struct ioat_ring_ent *ext; | ||
632 | size_t total_len = len; | ||
633 | struct ioat_pq_descriptor *pq; | ||
634 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | ||
635 | struct ioat_dma_descriptor *hw; | ||
636 | u32 offset = 0; | ||
637 | int num_descs; | ||
638 | int with_ext; | ||
639 | int i, s; | ||
640 | u16 idx; | ||
641 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; | ||
642 | |||
643 | dev_dbg(to_dev(chan), "%s\n", __func__); | ||
644 | /* the engine requires at least two sources (we provide | ||
645 | * at least 1 implied source in the DMA_PREP_CONTINUE case) | ||
646 | */ | ||
647 | BUG_ON(src_cnt + dmaf_continue(flags) < 2); | ||
648 | |||
649 | num_descs = ioat2_xferlen_to_descs(ioat, len); | ||
650 | /* we need 2x the number of descriptors to cover greater than 3 | ||
651 | * sources | ||
652 | */ | ||
653 | if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { | ||
654 | with_ext = 1; | ||
655 | num_descs *= 2; | ||
656 | } else | ||
657 | with_ext = 0; | ||
658 | |||
659 | /* completion writes from the raid engine may pass completion | ||
660 | * writes from the legacy engine, so we need one extra null | ||
661 | * (legacy) descriptor to ensure all completion writes arrive in | ||
662 | * order. | ||
663 | */ | ||
664 | if (likely(num_descs) && | ||
665 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | ||
666 | /* pass */; | ||
667 | else | ||
668 | return NULL; | ||
669 | i = 0; | ||
670 | do { | ||
671 | struct ioat_raw_descriptor *descs[2]; | ||
672 | size_t xfer_size = min_t(size_t, len, 1 << ioat->xfercap_log); | ||
673 | |||
674 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
675 | pq = desc->pq; | ||
676 | |||
677 | /* save a branch by unconditionally retrieving the | ||
678 | * extended descriptor pq_set_src() knows to not write | ||
679 | * to it in the single descriptor case | ||
680 | */ | ||
681 | ext = ioat2_get_ring_ent(ioat, idx + i + with_ext); | ||
682 | pq_ex = ext->pq_ex; | ||
683 | |||
684 | descs[0] = (struct ioat_raw_descriptor *) pq; | ||
685 | descs[1] = (struct ioat_raw_descriptor *) pq_ex; | ||
686 | |||
687 | for (s = 0; s < src_cnt; s++) | ||
688 | pq_set_src(descs, src[s], offset, scf[s], s); | ||
689 | |||
690 | /* see the comment for dma_maxpq in include/linux/dmaengine.h */ | ||
691 | if (dmaf_p_disabled_continue(flags)) | ||
692 | pq_set_src(descs, dst[1], offset, 1, s++); | ||
693 | else if (dmaf_continue(flags)) { | ||
694 | pq_set_src(descs, dst[0], offset, 0, s++); | ||
695 | pq_set_src(descs, dst[1], offset, 1, s++); | ||
696 | pq_set_src(descs, dst[1], offset, 0, s++); | ||
697 | } | ||
698 | pq->size = xfer_size; | ||
699 | pq->p_addr = dst[0] + offset; | ||
700 | pq->q_addr = dst[1] + offset; | ||
701 | pq->ctl = 0; | ||
702 | pq->ctl_f.op = op; | ||
703 | pq->ctl_f.src_cnt = src_cnt_to_hw(s); | ||
704 | pq->ctl_f.p_disable = !!(flags & DMA_PREP_PQ_DISABLE_P); | ||
705 | pq->ctl_f.q_disable = !!(flags & DMA_PREP_PQ_DISABLE_Q); | ||
706 | |||
707 | len -= xfer_size; | ||
708 | offset += xfer_size; | ||
709 | } while ((i += 1 + with_ext) < num_descs); | ||
710 | |||
711 | /* last pq descriptor carries the unmap parameters and fence bit */ | ||
712 | desc->txd.flags = flags; | ||
713 | desc->len = total_len; | ||
714 | if (result) | ||
715 | desc->result = result; | ||
716 | pq->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
717 | dump_pq_desc_dbg(ioat, desc, ext); | ||
718 | |||
719 | /* completion descriptor carries interrupt bit */ | ||
720 | compl_desc = ioat2_get_ring_ent(ioat, idx + i); | ||
721 | compl_desc->txd.flags = flags & DMA_PREP_INTERRUPT; | ||
722 | hw = compl_desc->hw; | ||
723 | hw->ctl = 0; | ||
724 | hw->ctl_f.null = 1; | ||
725 | hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT); | ||
726 | hw->ctl_f.compl_write = 1; | ||
727 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
728 | dump_desc_dbg(ioat, compl_desc); | ||
729 | |||
730 | /* we leave the channel locked to ensure in order submission */ | ||
731 | return &desc->txd; | ||
732 | } | ||
733 | |||
734 | static struct dma_async_tx_descriptor * | ||
735 | ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, | ||
736 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
737 | unsigned long flags) | ||
738 | { | ||
739 | /* handle the single source multiply case from the raid6 | ||
740 | * recovery path | ||
741 | */ | ||
742 | if (unlikely((flags & DMA_PREP_PQ_DISABLE_P) && src_cnt == 1)) { | ||
743 | dma_addr_t single_source[2]; | ||
744 | unsigned char single_source_coef[2]; | ||
745 | |||
746 | BUG_ON(flags & DMA_PREP_PQ_DISABLE_Q); | ||
747 | single_source[0] = src[0]; | ||
748 | single_source[1] = src[0]; | ||
749 | single_source_coef[0] = scf[0]; | ||
750 | single_source_coef[1] = 0; | ||
751 | |||
752 | return __ioat3_prep_pq_lock(chan, NULL, dst, single_source, 2, | ||
753 | single_source_coef, len, flags); | ||
754 | } else | ||
755 | return __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, scf, | ||
756 | len, flags); | ||
757 | } | ||
758 | |||
759 | struct dma_async_tx_descriptor * | ||
760 | ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, | ||
761 | unsigned int src_cnt, const unsigned char *scf, size_t len, | ||
762 | enum sum_check_flags *pqres, unsigned long flags) | ||
763 | { | ||
764 | /* the cleanup routine only sets bits on validate failure, it | ||
765 | * does not clear bits on validate success... so clear it here | ||
766 | */ | ||
767 | *pqres = 0; | ||
768 | |||
769 | return __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, | ||
770 | flags); | ||
771 | } | ||
772 | |||
773 | static struct dma_async_tx_descriptor * | ||
774 | ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, | ||
775 | unsigned int src_cnt, size_t len, unsigned long flags) | ||
776 | { | ||
777 | unsigned char scf[src_cnt]; | ||
778 | dma_addr_t pq[2]; | ||
779 | |||
780 | memset(scf, 0, src_cnt); | ||
781 | flags |= DMA_PREP_PQ_DISABLE_Q; | ||
782 | pq[0] = dst; | ||
783 | pq[1] = ~0; | ||
784 | |||
785 | return __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, | ||
786 | flags); | ||
787 | } | ||
788 | |||
789 | struct dma_async_tx_descriptor * | ||
790 | ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, | ||
791 | unsigned int src_cnt, size_t len, | ||
792 | enum sum_check_flags *result, unsigned long flags) | ||
793 | { | ||
794 | unsigned char scf[src_cnt]; | ||
795 | dma_addr_t pq[2]; | ||
796 | |||
797 | /* the cleanup routine only sets bits on validate failure, it | ||
798 | * does not clear bits on validate success... so clear it here | ||
799 | */ | ||
800 | *result = 0; | ||
801 | |||
802 | memset(scf, 0, src_cnt); | ||
803 | flags |= DMA_PREP_PQ_DISABLE_Q; | ||
804 | pq[0] = src[0]; | ||
805 | pq[1] = ~0; | ||
806 | |||
807 | return __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, scf, | ||
808 | len, flags); | ||
809 | } | ||
810 | |||
811 | static struct dma_async_tx_descriptor * | ||
812 | ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) | ||
813 | { | ||
814 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | ||
815 | struct ioat_ring_ent *desc; | ||
816 | struct ioat_dma_descriptor *hw; | ||
817 | u16 idx; | ||
818 | |||
819 | if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) | ||
820 | desc = ioat2_get_ring_ent(ioat, idx); | ||
821 | else | ||
822 | return NULL; | ||
823 | |||
824 | hw = desc->hw; | ||
825 | hw->ctl = 0; | ||
826 | hw->ctl_f.null = 1; | ||
827 | hw->ctl_f.int_en = 1; | ||
828 | hw->ctl_f.fence = !!(flags & DMA_PREP_FENCE); | ||
829 | hw->ctl_f.compl_write = 1; | ||
830 | hw->size = NULL_DESC_BUFFER_SIZE; | ||
831 | hw->src_addr = 0; | ||
832 | hw->dst_addr = 0; | ||
833 | |||
834 | desc->txd.flags = flags; | ||
835 | desc->len = 1; | ||
836 | |||
837 | dump_desc_dbg(ioat, desc); | ||
838 | |||
839 | /* we leave the channel locked to ensure in order submission */ | ||
840 | return &desc->txd; | ||
841 | } | ||
842 | |||
843 | static void __devinit ioat3_dma_test_callback(void *dma_async_param) | ||
844 | { | ||
845 | struct completion *cmp = dma_async_param; | ||
846 | |||
847 | complete(cmp); | ||
848 | } | ||
849 | |||
850 | #define IOAT_NUM_SRC_TEST 6 /* must be <= 8 */ | ||
851 | static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device) | ||
852 | { | ||
853 | int i, src_idx; | ||
854 | struct page *dest; | ||
855 | struct page *xor_srcs[IOAT_NUM_SRC_TEST]; | ||
856 | struct page *xor_val_srcs[IOAT_NUM_SRC_TEST + 1]; | ||
857 | dma_addr_t dma_srcs[IOAT_NUM_SRC_TEST + 1]; | ||
858 | dma_addr_t dma_addr, dest_dma; | ||
859 | struct dma_async_tx_descriptor *tx; | ||
860 | struct dma_chan *dma_chan; | ||
861 | dma_cookie_t cookie; | ||
862 | u8 cmp_byte = 0; | ||
863 | u32 cmp_word; | ||
864 | u32 xor_val_result; | ||
865 | int err = 0; | ||
866 | struct completion cmp; | ||
867 | unsigned long tmo; | ||
868 | struct device *dev = &device->pdev->dev; | ||
869 | struct dma_device *dma = &device->common; | ||
870 | |||
871 | dev_dbg(dev, "%s\n", __func__); | ||
872 | |||
873 | if (!dma_has_cap(DMA_XOR, dma->cap_mask)) | ||
874 | return 0; | ||
875 | |||
876 | for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { | ||
877 | xor_srcs[src_idx] = alloc_page(GFP_KERNEL); | ||
878 | if (!xor_srcs[src_idx]) { | ||
879 | while (src_idx--) | ||
880 | __free_page(xor_srcs[src_idx]); | ||
881 | return -ENOMEM; | ||
882 | } | ||
883 | } | ||
884 | |||
885 | dest = alloc_page(GFP_KERNEL); | ||
886 | if (!dest) { | ||
887 | while (src_idx--) | ||
888 | __free_page(xor_srcs[src_idx]); | ||
889 | return -ENOMEM; | ||
890 | } | ||
891 | |||
892 | /* Fill in src buffers */ | ||
893 | for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) { | ||
894 | u8 *ptr = page_address(xor_srcs[src_idx]); | ||
895 | for (i = 0; i < PAGE_SIZE; i++) | ||
896 | ptr[i] = (1 << src_idx); | ||
897 | } | ||
898 | |||
899 | for (src_idx = 0; src_idx < IOAT_NUM_SRC_TEST; src_idx++) | ||
900 | cmp_byte ^= (u8) (1 << src_idx); | ||
901 | |||
902 | cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | | ||
903 | (cmp_byte << 8) | cmp_byte; | ||
904 | |||
905 | memset(page_address(dest), 0, PAGE_SIZE); | ||
906 | |||
907 | dma_chan = container_of(dma->channels.next, struct dma_chan, | ||
908 | device_node); | ||
909 | if (dma->device_alloc_chan_resources(dma_chan) < 1) { | ||
910 | err = -ENODEV; | ||
911 | goto out; | ||
912 | } | ||
913 | |||
914 | /* test xor */ | ||
915 | dest_dma = dma_map_page(dev, dest, 0, PAGE_SIZE, DMA_FROM_DEVICE); | ||
916 | for (i = 0; i < IOAT_NUM_SRC_TEST; i++) | ||
917 | dma_srcs[i] = dma_map_page(dev, xor_srcs[i], 0, PAGE_SIZE, | ||
918 | DMA_TO_DEVICE); | ||
919 | tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, | ||
920 | IOAT_NUM_SRC_TEST, PAGE_SIZE, | ||
921 | DMA_PREP_INTERRUPT); | ||
922 | |||
923 | if (!tx) { | ||
924 | dev_err(dev, "Self-test xor prep failed\n"); | ||
925 | err = -ENODEV; | ||
926 | goto free_resources; | ||
927 | } | ||
928 | |||
929 | async_tx_ack(tx); | ||
930 | init_completion(&cmp); | ||
931 | tx->callback = ioat3_dma_test_callback; | ||
932 | tx->callback_param = &cmp; | ||
933 | cookie = tx->tx_submit(tx); | ||
934 | if (cookie < 0) { | ||
935 | dev_err(dev, "Self-test xor setup failed\n"); | ||
936 | err = -ENODEV; | ||
937 | goto free_resources; | ||
938 | } | ||
939 | dma->device_issue_pending(dma_chan); | ||
940 | |||
941 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
942 | |||
943 | if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | ||
944 | dev_err(dev, "Self-test xor timed out\n"); | ||
945 | err = -ENODEV; | ||
946 | goto free_resources; | ||
947 | } | ||
948 | |||
949 | dma_sync_single_for_cpu(dev, dest_dma, PAGE_SIZE, DMA_FROM_DEVICE); | ||
950 | for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { | ||
951 | u32 *ptr = page_address(dest); | ||
952 | if (ptr[i] != cmp_word) { | ||
953 | dev_err(dev, "Self-test xor failed compare\n"); | ||
954 | err = -ENODEV; | ||
955 | goto free_resources; | ||
956 | } | ||
957 | } | ||
958 | dma_sync_single_for_device(dev, dest_dma, PAGE_SIZE, DMA_TO_DEVICE); | ||
959 | |||
960 | /* skip validate if the capability is not present */ | ||
961 | if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask)) | ||
962 | goto free_resources; | ||
963 | |||
964 | /* validate the sources with the destintation page */ | ||
965 | for (i = 0; i < IOAT_NUM_SRC_TEST; i++) | ||
966 | xor_val_srcs[i] = xor_srcs[i]; | ||
967 | xor_val_srcs[i] = dest; | ||
968 | |||
969 | xor_val_result = 1; | ||
970 | |||
971 | for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) | ||
972 | dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, | ||
973 | DMA_TO_DEVICE); | ||
974 | tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, | ||
975 | IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, | ||
976 | &xor_val_result, DMA_PREP_INTERRUPT); | ||
977 | if (!tx) { | ||
978 | dev_err(dev, "Self-test zero prep failed\n"); | ||
979 | err = -ENODEV; | ||
980 | goto free_resources; | ||
981 | } | ||
982 | |||
983 | async_tx_ack(tx); | ||
984 | init_completion(&cmp); | ||
985 | tx->callback = ioat3_dma_test_callback; | ||
986 | tx->callback_param = &cmp; | ||
987 | cookie = tx->tx_submit(tx); | ||
988 | if (cookie < 0) { | ||
989 | dev_err(dev, "Self-test zero setup failed\n"); | ||
990 | err = -ENODEV; | ||
991 | goto free_resources; | ||
992 | } | ||
993 | dma->device_issue_pending(dma_chan); | ||
994 | |||
995 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
996 | |||
997 | if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | ||
998 | dev_err(dev, "Self-test validate timed out\n"); | ||
999 | err = -ENODEV; | ||
1000 | goto free_resources; | ||
1001 | } | ||
1002 | |||
1003 | if (xor_val_result != 0) { | ||
1004 | dev_err(dev, "Self-test validate failed compare\n"); | ||
1005 | err = -ENODEV; | ||
1006 | goto free_resources; | ||
1007 | } | ||
1008 | |||
1009 | /* skip memset if the capability is not present */ | ||
1010 | if (!dma_has_cap(DMA_MEMSET, dma_chan->device->cap_mask)) | ||
1011 | goto free_resources; | ||
1012 | |||
1013 | /* test memset */ | ||
1014 | dma_addr = dma_map_page(dev, dest, 0, | ||
1015 | PAGE_SIZE, DMA_FROM_DEVICE); | ||
1016 | tx = dma->device_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, | ||
1017 | DMA_PREP_INTERRUPT); | ||
1018 | if (!tx) { | ||
1019 | dev_err(dev, "Self-test memset prep failed\n"); | ||
1020 | err = -ENODEV; | ||
1021 | goto free_resources; | ||
1022 | } | ||
1023 | |||
1024 | async_tx_ack(tx); | ||
1025 | init_completion(&cmp); | ||
1026 | tx->callback = ioat3_dma_test_callback; | ||
1027 | tx->callback_param = &cmp; | ||
1028 | cookie = tx->tx_submit(tx); | ||
1029 | if (cookie < 0) { | ||
1030 | dev_err(dev, "Self-test memset setup failed\n"); | ||
1031 | err = -ENODEV; | ||
1032 | goto free_resources; | ||
1033 | } | ||
1034 | dma->device_issue_pending(dma_chan); | ||
1035 | |||
1036 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
1037 | |||
1038 | if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | ||
1039 | dev_err(dev, "Self-test memset timed out\n"); | ||
1040 | err = -ENODEV; | ||
1041 | goto free_resources; | ||
1042 | } | ||
1043 | |||
1044 | for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { | ||
1045 | u32 *ptr = page_address(dest); | ||
1046 | if (ptr[i]) { | ||
1047 | dev_err(dev, "Self-test memset failed compare\n"); | ||
1048 | err = -ENODEV; | ||
1049 | goto free_resources; | ||
1050 | } | ||
1051 | } | ||
1052 | |||
1053 | /* test for non-zero parity sum */ | ||
1054 | xor_val_result = 0; | ||
1055 | for (i = 0; i < IOAT_NUM_SRC_TEST + 1; i++) | ||
1056 | dma_srcs[i] = dma_map_page(dev, xor_val_srcs[i], 0, PAGE_SIZE, | ||
1057 | DMA_TO_DEVICE); | ||
1058 | tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, | ||
1059 | IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, | ||
1060 | &xor_val_result, DMA_PREP_INTERRUPT); | ||
1061 | if (!tx) { | ||
1062 | dev_err(dev, "Self-test 2nd zero prep failed\n"); | ||
1063 | err = -ENODEV; | ||
1064 | goto free_resources; | ||
1065 | } | ||
1066 | |||
1067 | async_tx_ack(tx); | ||
1068 | init_completion(&cmp); | ||
1069 | tx->callback = ioat3_dma_test_callback; | ||
1070 | tx->callback_param = &cmp; | ||
1071 | cookie = tx->tx_submit(tx); | ||
1072 | if (cookie < 0) { | ||
1073 | dev_err(dev, "Self-test 2nd zero setup failed\n"); | ||
1074 | err = -ENODEV; | ||
1075 | goto free_resources; | ||
1076 | } | ||
1077 | dma->device_issue_pending(dma_chan); | ||
1078 | |||
1079 | tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); | ||
1080 | |||
1081 | if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { | ||
1082 | dev_err(dev, "Self-test 2nd validate timed out\n"); | ||
1083 | err = -ENODEV; | ||
1084 | goto free_resources; | ||
1085 | } | ||
1086 | |||
1087 | if (xor_val_result != SUM_CHECK_P_RESULT) { | ||
1088 | dev_err(dev, "Self-test validate failed compare\n"); | ||
1089 | err = -ENODEV; | ||
1090 | goto free_resources; | ||
1091 | } | ||
1092 | |||
1093 | free_resources: | ||
1094 | dma->device_free_chan_resources(dma_chan); | ||
1095 | out: | ||
1096 | src_idx = IOAT_NUM_SRC_TEST; | ||
1097 | while (src_idx--) | ||
1098 | __free_page(xor_srcs[src_idx]); | ||
1099 | __free_page(dest); | ||
1100 | return err; | ||
1101 | } | ||
1102 | |||
1103 | static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) | ||
1104 | { | ||
1105 | int rc = ioat_dma_self_test(device); | ||
1106 | |||
1107 | if (rc) | ||
1108 | return rc; | ||
1109 | |||
1110 | rc = ioat_xor_val_self_test(device); | ||
1111 | if (rc) | ||
1112 | return rc; | ||
1113 | |||
1114 | return 0; | ||
1115 | } | ||
1116 | |||
1117 | int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) | ||
1118 | { | ||
1119 | struct pci_dev *pdev = device->pdev; | ||
1120 | struct dma_device *dma; | ||
1121 | struct dma_chan *c; | ||
1122 | struct ioat_chan_common *chan; | ||
1123 | bool is_raid_device = false; | ||
1124 | int err; | ||
1125 | u16 dev_id; | ||
1126 | u32 cap; | ||
1127 | |||
1128 | device->enumerate_channels = ioat2_enumerate_channels; | ||
1129 | device->self_test = ioat3_dma_self_test; | ||
1130 | dma = &device->common; | ||
1131 | dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; | ||
1132 | dma->device_issue_pending = ioat2_issue_pending; | ||
1133 | dma->device_alloc_chan_resources = ioat2_alloc_chan_resources; | ||
1134 | dma->device_free_chan_resources = ioat2_free_chan_resources; | ||
1135 | |||
1136 | dma_cap_set(DMA_INTERRUPT, dma->cap_mask); | ||
1137 | dma->device_prep_dma_interrupt = ioat3_prep_interrupt_lock; | ||
1138 | |||
1139 | cap = readl(device->reg_base + IOAT_DMA_CAP_OFFSET); | ||
1140 | if (cap & IOAT_CAP_XOR) { | ||
1141 | is_raid_device = true; | ||
1142 | dma->max_xor = 8; | ||
1143 | dma->xor_align = 2; | ||
1144 | |||
1145 | dma_cap_set(DMA_XOR, dma->cap_mask); | ||
1146 | dma->device_prep_dma_xor = ioat3_prep_xor; | ||
1147 | |||
1148 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); | ||
1149 | dma->device_prep_dma_xor_val = ioat3_prep_xor_val; | ||
1150 | } | ||
1151 | if (cap & IOAT_CAP_PQ) { | ||
1152 | is_raid_device = true; | ||
1153 | dma_set_maxpq(dma, 8, 0); | ||
1154 | dma->pq_align = 2; | ||
1155 | |||
1156 | dma_cap_set(DMA_PQ, dma->cap_mask); | ||
1157 | dma->device_prep_dma_pq = ioat3_prep_pq; | ||
1158 | |||
1159 | dma_cap_set(DMA_PQ_VAL, dma->cap_mask); | ||
1160 | dma->device_prep_dma_pq_val = ioat3_prep_pq_val; | ||
1161 | |||
1162 | if (!(cap & IOAT_CAP_XOR)) { | ||
1163 | dma->max_xor = 8; | ||
1164 | dma->xor_align = 2; | ||
1165 | |||
1166 | dma_cap_set(DMA_XOR, dma->cap_mask); | ||
1167 | dma->device_prep_dma_xor = ioat3_prep_pqxor; | ||
1168 | |||
1169 | dma_cap_set(DMA_XOR_VAL, dma->cap_mask); | ||
1170 | dma->device_prep_dma_xor_val = ioat3_prep_pqxor_val; | ||
1171 | } | ||
1172 | } | ||
1173 | if (is_raid_device && (cap & IOAT_CAP_FILL_BLOCK)) { | ||
1174 | dma_cap_set(DMA_MEMSET, dma->cap_mask); | ||
1175 | dma->device_prep_dma_memset = ioat3_prep_memset_lock; | ||
1176 | } | ||
1177 | |||
1178 | |||
1179 | if (is_raid_device) { | ||
1180 | dma->device_is_tx_complete = ioat3_is_complete; | ||
1181 | device->cleanup_tasklet = ioat3_cleanup_tasklet; | ||
1182 | device->timer_fn = ioat3_timer_event; | ||
1183 | } else { | ||
1184 | dma->device_is_tx_complete = ioat2_is_complete; | ||
1185 | device->cleanup_tasklet = ioat2_cleanup_tasklet; | ||
1186 | device->timer_fn = ioat2_timer_event; | ||
1187 | } | ||
1188 | |||
1189 | /* -= IOAT ver.3 workarounds =- */ | ||
1190 | /* Write CHANERRMSK_INT with 3E07h to mask out the errors | ||
1191 | * that can cause stability issues for IOAT ver.3 | ||
1192 | */ | ||
1193 | pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); | ||
1194 | |||
1195 | /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit | ||
1196 | * (workaround for spurious config parity error after restart) | ||
1197 | */ | ||
1198 | pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); | ||
1199 | if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) | ||
1200 | pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); | ||
1201 | |||
1202 | err = ioat_probe(device); | ||
1203 | if (err) | ||
1204 | return err; | ||
1205 | ioat_set_tcp_copy_break(262144); | ||
1206 | |||
1207 | list_for_each_entry(c, &dma->channels, device_node) { | ||
1208 | chan = to_chan_common(c); | ||
1209 | writel(IOAT_DMA_DCA_ANY_CPU, | ||
1210 | chan->reg_base + IOAT_DCACTRL_OFFSET); | ||
1211 | } | ||
1212 | |||
1213 | err = ioat_register(device); | ||
1214 | if (err) | ||
1215 | return err; | ||
1216 | |||
1217 | ioat_kobject_add(device, &ioat2_ktype); | ||
1218 | |||
1219 | if (dca) | ||
1220 | device->dca = ioat3_dca_init(pdev, device->reg_base); | ||
1221 | |||
1222 | return 0; | ||
1223 | } | ||