aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/dma/ioat/dma_v3.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma/ioat/dma_v3.c')
-rw-r--r--drivers/dma/ioat/dma_v3.c143
1 files changed, 48 insertions, 95 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 6740e319c9cf..1cdd22e1051b 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -260,8 +260,8 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
260 struct ioat_chan_common *chan = &ioat->base; 260 struct ioat_chan_common *chan = &ioat->base;
261 struct ioat_ring_ent *desc; 261 struct ioat_ring_ent *desc;
262 bool seen_current = false; 262 bool seen_current = false;
263 int idx = ioat->tail, i;
263 u16 active; 264 u16 active;
264 int i;
265 265
266 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", 266 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
267 __func__, ioat->head, ioat->tail, ioat->issued); 267 __func__, ioat->head, ioat->tail, ioat->issued);
@@ -270,13 +270,14 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
270 for (i = 0; i < active && !seen_current; i++) { 270 for (i = 0; i < active && !seen_current; i++) {
271 struct dma_async_tx_descriptor *tx; 271 struct dma_async_tx_descriptor *tx;
272 272
273 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); 273 smp_read_barrier_depends();
274 desc = ioat2_get_ring_ent(ioat, ioat->tail + i); 274 prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
275 desc = ioat2_get_ring_ent(ioat, idx + i);
275 dump_desc_dbg(ioat, desc); 276 dump_desc_dbg(ioat, desc);
276 tx = &desc->txd; 277 tx = &desc->txd;
277 if (tx->cookie) { 278 if (tx->cookie) {
278 chan->completed_cookie = tx->cookie; 279 chan->completed_cookie = tx->cookie;
279 ioat3_dma_unmap(ioat, desc, ioat->tail + i); 280 ioat3_dma_unmap(ioat, desc, idx + i);
280 tx->cookie = 0; 281 tx->cookie = 0;
281 if (tx->callback) { 282 if (tx->callback) {
282 tx->callback(tx->callback_param); 283 tx->callback(tx->callback_param);
@@ -293,69 +294,30 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
293 i++; 294 i++;
294 } 295 }
295 } 296 }
296 ioat->tail += i; 297 smp_mb(); /* finish all descriptor reads before incrementing tail */
298 ioat->tail = idx + i;
297 BUG_ON(active && !seen_current); /* no active descs have written a completion? */ 299 BUG_ON(active && !seen_current); /* no active descs have written a completion? */
298 chan->last_completion = phys_complete; 300 chan->last_completion = phys_complete;
299 301
300 active = ioat2_ring_active(ioat); 302 if (active - i == 0) {
301 if (active == 0) {
302 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", 303 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
303 __func__); 304 __func__);
304 clear_bit(IOAT_COMPLETION_PENDING, &chan->state); 305 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
305 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 306 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
306 } 307 }
307 /* 5 microsecond delay per pending descriptor */ 308 /* 5 microsecond delay per pending descriptor */
308 writew(min((5 * active), IOAT_INTRDELAY_MASK), 309 writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
309 chan->device->reg_base + IOAT_INTRDELAY_OFFSET); 310 chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
310} 311}
311 312
312/* try to cleanup, but yield (via spin_trylock) to incoming submissions 313static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
313 * with the expectation that we will immediately poll again shortly
314 */
315static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat)
316{ 314{
317 struct ioat_chan_common *chan = &ioat->base; 315 struct ioat_chan_common *chan = &ioat->base;
318 unsigned long phys_complete; 316 unsigned long phys_complete;
319 317
320 prefetch(chan->completion);
321
322 if (!spin_trylock_bh(&chan->cleanup_lock))
323 return;
324
325 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
326 spin_unlock_bh(&chan->cleanup_lock);
327 return;
328 }
329
330 if (!spin_trylock_bh(&ioat->ring_lock)) {
331 spin_unlock_bh(&chan->cleanup_lock);
332 return;
333 }
334
335 __cleanup(ioat, phys_complete);
336
337 spin_unlock_bh(&ioat->ring_lock);
338 spin_unlock_bh(&chan->cleanup_lock);
339}
340
341/* run cleanup now because we already delayed the interrupt via INTRDELAY */
342static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat)
343{
344 struct ioat_chan_common *chan = &ioat->base;
345 unsigned long phys_complete;
346
347 prefetch(chan->completion);
348
349 spin_lock_bh(&chan->cleanup_lock); 318 spin_lock_bh(&chan->cleanup_lock);
350 if (!ioat_cleanup_preamble(chan, &phys_complete)) { 319 if (ioat_cleanup_preamble(chan, &phys_complete))
351 spin_unlock_bh(&chan->cleanup_lock); 320 __cleanup(ioat, phys_complete);
352 return;
353 }
354 spin_lock_bh(&ioat->ring_lock);
355
356 __cleanup(ioat, phys_complete);
357
358 spin_unlock_bh(&ioat->ring_lock);
359 spin_unlock_bh(&chan->cleanup_lock); 321 spin_unlock_bh(&chan->cleanup_lock);
360} 322}
361 323
@@ -363,7 +325,7 @@ static void ioat3_cleanup_event(unsigned long data)
363{ 325{
364 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 326 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
365 327
366 ioat3_cleanup_sync(ioat); 328 ioat3_cleanup(ioat);
367 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); 329 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
368} 330}
369 331
@@ -384,12 +346,10 @@ static void ioat3_timer_event(unsigned long data)
384 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 346 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
385 struct ioat_chan_common *chan = &ioat->base; 347 struct ioat_chan_common *chan = &ioat->base;
386 348
387 spin_lock_bh(&chan->cleanup_lock);
388 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { 349 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
389 unsigned long phys_complete; 350 unsigned long phys_complete;
390 u64 status; 351 u64 status;
391 352
392 spin_lock_bh(&ioat->ring_lock);
393 status = ioat_chansts(chan); 353 status = ioat_chansts(chan);
394 354
395 /* when halted due to errors check for channel 355 /* when halted due to errors check for channel
@@ -408,26 +368,31 @@ static void ioat3_timer_event(unsigned long data)
408 * acknowledged a pending completion once, then be more 368 * acknowledged a pending completion once, then be more
409 * forceful with a restart 369 * forceful with a restart
410 */ 370 */
371 spin_lock_bh(&chan->cleanup_lock);
411 if (ioat_cleanup_preamble(chan, &phys_complete)) 372 if (ioat_cleanup_preamble(chan, &phys_complete))
412 __cleanup(ioat, phys_complete); 373 __cleanup(ioat, phys_complete);
413 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) 374 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
375 spin_lock_bh(&ioat->prep_lock);
414 ioat3_restart_channel(ioat); 376 ioat3_restart_channel(ioat);
415 else { 377 spin_unlock_bh(&ioat->prep_lock);
378 } else {
416 set_bit(IOAT_COMPLETION_ACK, &chan->state); 379 set_bit(IOAT_COMPLETION_ACK, &chan->state);
417 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 380 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
418 } 381 }
419 spin_unlock_bh(&ioat->ring_lock); 382 spin_unlock_bh(&chan->cleanup_lock);
420 } else { 383 } else {
421 u16 active; 384 u16 active;
422 385
423 /* if the ring is idle, empty, and oversized try to step 386 /* if the ring is idle, empty, and oversized try to step
424 * down the size 387 * down the size
425 */ 388 */
426 spin_lock_bh(&ioat->ring_lock); 389 spin_lock_bh(&chan->cleanup_lock);
390 spin_lock_bh(&ioat->prep_lock);
427 active = ioat2_ring_active(ioat); 391 active = ioat2_ring_active(ioat);
428 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) 392 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
429 reshape_ring(ioat, ioat->alloc_order-1); 393 reshape_ring(ioat, ioat->alloc_order-1);
430 spin_unlock_bh(&ioat->ring_lock); 394 spin_unlock_bh(&ioat->prep_lock);
395 spin_unlock_bh(&chan->cleanup_lock);
431 396
432 /* keep shrinking until we get back to our minimum 397 /* keep shrinking until we get back to our minimum
433 * default size 398 * default size
@@ -435,21 +400,20 @@ static void ioat3_timer_event(unsigned long data)
435 if (ioat->alloc_order > ioat_get_alloc_order()) 400 if (ioat->alloc_order > ioat_get_alloc_order())
436 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 401 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
437 } 402 }
438 spin_unlock_bh(&chan->cleanup_lock);
439} 403}
440 404
441static enum dma_status 405static enum dma_status
442ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, 406ioat3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
443 dma_cookie_t *done, dma_cookie_t *used) 407 struct dma_tx_state *txstate)
444{ 408{
445 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 409 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
446 410
447 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) 411 if (ioat_tx_status(c, cookie, txstate) == DMA_SUCCESS)
448 return DMA_SUCCESS; 412 return DMA_SUCCESS;
449 413
450 ioat3_cleanup_poll(ioat); 414 ioat3_cleanup(ioat);
451 415
452 return ioat_is_complete(c, cookie, done, used); 416 return ioat_tx_status(c, cookie, txstate);
453} 417}
454 418
455static struct dma_async_tx_descriptor * 419static struct dma_async_tx_descriptor *
@@ -460,15 +424,12 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
460 struct ioat_ring_ent *desc; 424 struct ioat_ring_ent *desc;
461 size_t total_len = len; 425 size_t total_len = len;
462 struct ioat_fill_descriptor *fill; 426 struct ioat_fill_descriptor *fill;
463 int num_descs;
464 u64 src_data = (0x0101010101010101ULL) * (value & 0xff); 427 u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
465 u16 idx; 428 int num_descs, idx, i;
466 int i;
467 429
468 num_descs = ioat2_xferlen_to_descs(ioat, len); 430 num_descs = ioat2_xferlen_to_descs(ioat, len);
469 if (likely(num_descs) && 431 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
470 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) 432 idx = ioat->head;
471 /* pass */;
472 else 433 else
473 return NULL; 434 return NULL;
474 i = 0; 435 i = 0;
@@ -513,11 +474,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
513 struct ioat_xor_descriptor *xor; 474 struct ioat_xor_descriptor *xor;
514 struct ioat_xor_ext_descriptor *xor_ex = NULL; 475 struct ioat_xor_ext_descriptor *xor_ex = NULL;
515 struct ioat_dma_descriptor *hw; 476 struct ioat_dma_descriptor *hw;
477 int num_descs, with_ext, idx, i;
516 u32 offset = 0; 478 u32 offset = 0;
517 int num_descs;
518 int with_ext;
519 int i;
520 u16 idx;
521 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; 479 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
522 480
523 BUG_ON(src_cnt < 2); 481 BUG_ON(src_cnt < 2);
@@ -537,9 +495,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
537 * (legacy) descriptor to ensure all completion writes arrive in 495 * (legacy) descriptor to ensure all completion writes arrive in
538 * order. 496 * order.
539 */ 497 */
540 if (likely(num_descs) && 498 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
541 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) 499 idx = ioat->head;
542 /* pass */;
543 else 500 else
544 return NULL; 501 return NULL;
545 i = 0; 502 i = 0;
@@ -657,11 +614,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
657 struct ioat_pq_ext_descriptor *pq_ex = NULL; 614 struct ioat_pq_ext_descriptor *pq_ex = NULL;
658 struct ioat_dma_descriptor *hw; 615 struct ioat_dma_descriptor *hw;
659 u32 offset = 0; 616 u32 offset = 0;
660 int num_descs;
661 int with_ext;
662 int i, s;
663 u16 idx;
664 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; 617 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
618 int i, s, idx, with_ext, num_descs;
665 619
666 dev_dbg(to_dev(chan), "%s\n", __func__); 620 dev_dbg(to_dev(chan), "%s\n", __func__);
667 /* the engine requires at least two sources (we provide 621 /* the engine requires at least two sources (we provide
@@ -687,8 +641,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
687 * order. 641 * order.
688 */ 642 */
689 if (likely(num_descs) && 643 if (likely(num_descs) &&
690 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) 644 ioat2_check_space_lock(ioat, num_descs+1) == 0)
691 /* pass */; 645 idx = ioat->head;
692 else 646 else
693 return NULL; 647 return NULL;
694 i = 0; 648 i = 0;
@@ -851,10 +805,9 @@ ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
851 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 805 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
852 struct ioat_ring_ent *desc; 806 struct ioat_ring_ent *desc;
853 struct ioat_dma_descriptor *hw; 807 struct ioat_dma_descriptor *hw;
854 u16 idx;
855 808
856 if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) 809 if (ioat2_check_space_lock(ioat, 1) == 0)
857 desc = ioat2_get_ring_ent(ioat, idx); 810 desc = ioat2_get_ring_ent(ioat, ioat->head);
858 else 811 else
859 return NULL; 812 return NULL;
860 813
@@ -977,7 +930,7 @@ static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
977 930
978 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 931 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
979 932
980 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { 933 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
981 dev_err(dev, "Self-test xor timed out\n"); 934 dev_err(dev, "Self-test xor timed out\n");
982 err = -ENODEV; 935 err = -ENODEV;
983 goto free_resources; 936 goto free_resources;
@@ -1031,7 +984,7 @@ static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
1031 984
1032 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 985 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1033 986
1034 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { 987 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1035 dev_err(dev, "Self-test validate timed out\n"); 988 dev_err(dev, "Self-test validate timed out\n");
1036 err = -ENODEV; 989 err = -ENODEV;
1037 goto free_resources; 990 goto free_resources;
@@ -1072,7 +1025,7 @@ static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
1072 1025
1073 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 1026 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1074 1027
1075 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { 1028 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1076 dev_err(dev, "Self-test memset timed out\n"); 1029 dev_err(dev, "Self-test memset timed out\n");
1077 err = -ENODEV; 1030 err = -ENODEV;
1078 goto free_resources; 1031 goto free_resources;
@@ -1115,7 +1068,7 @@ static int __devinit ioat_xor_val_self_test(struct ioatdma_device *device)
1115 1068
1116 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); 1069 tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
1117 1070
1118 if (dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { 1071 if (dma->device_tx_status(dma_chan, cookie, NULL) != DMA_SUCCESS) {
1119 dev_err(dev, "Self-test 2nd validate timed out\n"); 1072 dev_err(dev, "Self-test 2nd validate timed out\n");
1120 err = -ENODEV; 1073 err = -ENODEV;
1121 goto free_resources; 1074 goto free_resources;
@@ -1222,7 +1175,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1222 if (cap & IOAT_CAP_XOR) { 1175 if (cap & IOAT_CAP_XOR) {
1223 is_raid_device = true; 1176 is_raid_device = true;
1224 dma->max_xor = 8; 1177 dma->max_xor = 8;
1225 dma->xor_align = 2; 1178 dma->xor_align = 6;
1226 1179
1227 dma_cap_set(DMA_XOR, dma->cap_mask); 1180 dma_cap_set(DMA_XOR, dma->cap_mask);
1228 dma->device_prep_dma_xor = ioat3_prep_xor; 1181 dma->device_prep_dma_xor = ioat3_prep_xor;
@@ -1233,7 +1186,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1233 if (cap & IOAT_CAP_PQ) { 1186 if (cap & IOAT_CAP_PQ) {
1234 is_raid_device = true; 1187 is_raid_device = true;
1235 dma_set_maxpq(dma, 8, 0); 1188 dma_set_maxpq(dma, 8, 0);
1236 dma->pq_align = 2; 1189 dma->pq_align = 6;
1237 1190
1238 dma_cap_set(DMA_PQ, dma->cap_mask); 1191 dma_cap_set(DMA_PQ, dma->cap_mask);
1239 dma->device_prep_dma_pq = ioat3_prep_pq; 1192 dma->device_prep_dma_pq = ioat3_prep_pq;
@@ -1243,7 +1196,7 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1243 1196
1244 if (!(cap & IOAT_CAP_XOR)) { 1197 if (!(cap & IOAT_CAP_XOR)) {
1245 dma->max_xor = 8; 1198 dma->max_xor = 8;
1246 dma->xor_align = 2; 1199 dma->xor_align = 6;
1247 1200
1248 dma_cap_set(DMA_XOR, dma->cap_mask); 1201 dma_cap_set(DMA_XOR, dma->cap_mask);
1249 dma->device_prep_dma_xor = ioat3_prep_pqxor; 1202 dma->device_prep_dma_xor = ioat3_prep_pqxor;
@@ -1259,11 +1212,11 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca)
1259 1212
1260 1213
1261 if (is_raid_device) { 1214 if (is_raid_device) {
1262 dma->device_is_tx_complete = ioat3_is_complete; 1215 dma->device_tx_status = ioat3_tx_status;
1263 device->cleanup_fn = ioat3_cleanup_event; 1216 device->cleanup_fn = ioat3_cleanup_event;
1264 device->timer_fn = ioat3_timer_event; 1217 device->timer_fn = ioat3_timer_event;
1265 } else { 1218 } else {
1266 dma->device_is_tx_complete = ioat_is_dma_complete; 1219 dma->device_tx_status = ioat_dma_tx_status;
1267 device->cleanup_fn = ioat2_cleanup_event; 1220 device->cleanup_fn = ioat2_cleanup_event;
1268 device->timer_fn = ioat2_timer_event; 1221 device->timer_fn = ioat2_timer_event;
1269 } 1222 }