aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/dma/ioat/dma_v3.c
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2010-05-01 18:22:55 -0400
committerDan Williams <dan.j.williams@intel.com>2010-05-01 18:22:55 -0400
commit074cc47679f8b0931d7d5384e95822d82768f149 (patch)
treebd4699a1c769553f2bbfa931913352d26c81dc4d /drivers/dma/ioat/dma_v3.c
parentabb12dfd50c7580d7dcbd581cf6265ba4d01ea7e (diff)
ioat2,3: convert to producer/consumer locking
Use separate locks for the descriptor prep (producer) and descriptor cleanup (consumer) paths. Allows the producer path to run concurrently with the cleanup path. Inspired by Documentation/circular-buffer.txt. Cc: David Howells <dhowells@redhat.com> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Maciej Sosnowski <maciej.sosnowski@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/dma/ioat/dma_v3.c')
-rw-r--r--drivers/dma/ioat/dma_v3.c117
1 files changed, 35 insertions, 82 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 6740e319c9cf..8b573fac2a25 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -260,8 +260,8 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
260 struct ioat_chan_common *chan = &ioat->base; 260 struct ioat_chan_common *chan = &ioat->base;
261 struct ioat_ring_ent *desc; 261 struct ioat_ring_ent *desc;
262 bool seen_current = false; 262 bool seen_current = false;
263 int idx = ioat->tail, i;
263 u16 active; 264 u16 active;
264 int i;
265 265
266 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", 266 dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n",
267 __func__, ioat->head, ioat->tail, ioat->issued); 267 __func__, ioat->head, ioat->tail, ioat->issued);
@@ -270,13 +270,14 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
270 for (i = 0; i < active && !seen_current; i++) { 270 for (i = 0; i < active && !seen_current; i++) {
271 struct dma_async_tx_descriptor *tx; 271 struct dma_async_tx_descriptor *tx;
272 272
273 prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); 273 smp_read_barrier_depends();
274 desc = ioat2_get_ring_ent(ioat, ioat->tail + i); 274 prefetch(ioat2_get_ring_ent(ioat, idx + i + 1));
275 desc = ioat2_get_ring_ent(ioat, idx + i);
275 dump_desc_dbg(ioat, desc); 276 dump_desc_dbg(ioat, desc);
276 tx = &desc->txd; 277 tx = &desc->txd;
277 if (tx->cookie) { 278 if (tx->cookie) {
278 chan->completed_cookie = tx->cookie; 279 chan->completed_cookie = tx->cookie;
279 ioat3_dma_unmap(ioat, desc, ioat->tail + i); 280 ioat3_dma_unmap(ioat, desc, idx + i);
280 tx->cookie = 0; 281 tx->cookie = 0;
281 if (tx->callback) { 282 if (tx->callback) {
282 tx->callback(tx->callback_param); 283 tx->callback(tx->callback_param);
@@ -293,69 +294,30 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
293 i++; 294 i++;
294 } 295 }
295 } 296 }
296 ioat->tail += i; 297 smp_mb(); /* finish all descriptor reads before incrementing tail */
298 ioat->tail = idx + i;
297 BUG_ON(active && !seen_current); /* no active descs have written a completion? */ 299 BUG_ON(active && !seen_current); /* no active descs have written a completion? */
298 chan->last_completion = phys_complete; 300 chan->last_completion = phys_complete;
299 301
300 active = ioat2_ring_active(ioat); 302 if (active - i == 0) {
301 if (active == 0) {
302 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", 303 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
303 __func__); 304 __func__);
304 clear_bit(IOAT_COMPLETION_PENDING, &chan->state); 305 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
305 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 306 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
306 } 307 }
307 /* 5 microsecond delay per pending descriptor */ 308 /* 5 microsecond delay per pending descriptor */
308 writew(min((5 * active), IOAT_INTRDELAY_MASK), 309 writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK),
309 chan->device->reg_base + IOAT_INTRDELAY_OFFSET); 310 chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
310} 311}
311 312
312/* try to cleanup, but yield (via spin_trylock) to incoming submissions 313static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
313 * with the expectation that we will immediately poll again shortly
314 */
315static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat)
316{ 314{
317 struct ioat_chan_common *chan = &ioat->base; 315 struct ioat_chan_common *chan = &ioat->base;
318 unsigned long phys_complete; 316 unsigned long phys_complete;
319 317
320 prefetch(chan->completion);
321
322 if (!spin_trylock_bh(&chan->cleanup_lock))
323 return;
324
325 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
326 spin_unlock_bh(&chan->cleanup_lock);
327 return;
328 }
329
330 if (!spin_trylock_bh(&ioat->ring_lock)) {
331 spin_unlock_bh(&chan->cleanup_lock);
332 return;
333 }
334
335 __cleanup(ioat, phys_complete);
336
337 spin_unlock_bh(&ioat->ring_lock);
338 spin_unlock_bh(&chan->cleanup_lock);
339}
340
341/* run cleanup now because we already delayed the interrupt via INTRDELAY */
342static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat)
343{
344 struct ioat_chan_common *chan = &ioat->base;
345 unsigned long phys_complete;
346
347 prefetch(chan->completion);
348
349 spin_lock_bh(&chan->cleanup_lock); 318 spin_lock_bh(&chan->cleanup_lock);
350 if (!ioat_cleanup_preamble(chan, &phys_complete)) { 319 if (ioat_cleanup_preamble(chan, &phys_complete))
351 spin_unlock_bh(&chan->cleanup_lock); 320 __cleanup(ioat, phys_complete);
352 return;
353 }
354 spin_lock_bh(&ioat->ring_lock);
355
356 __cleanup(ioat, phys_complete);
357
358 spin_unlock_bh(&ioat->ring_lock);
359 spin_unlock_bh(&chan->cleanup_lock); 321 spin_unlock_bh(&chan->cleanup_lock);
360} 322}
361 323
@@ -363,7 +325,7 @@ static void ioat3_cleanup_event(unsigned long data)
363{ 325{
364 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 326 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
365 327
366 ioat3_cleanup_sync(ioat); 328 ioat3_cleanup(ioat);
367 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); 329 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
368} 330}
369 331
@@ -384,12 +346,10 @@ static void ioat3_timer_event(unsigned long data)
384 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); 346 struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data);
385 struct ioat_chan_common *chan = &ioat->base; 347 struct ioat_chan_common *chan = &ioat->base;
386 348
387 spin_lock_bh(&chan->cleanup_lock);
388 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { 349 if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) {
389 unsigned long phys_complete; 350 unsigned long phys_complete;
390 u64 status; 351 u64 status;
391 352
392 spin_lock_bh(&ioat->ring_lock);
393 status = ioat_chansts(chan); 353 status = ioat_chansts(chan);
394 354
395 /* when halted due to errors check for channel 355 /* when halted due to errors check for channel
@@ -408,26 +368,31 @@ static void ioat3_timer_event(unsigned long data)
408 * acknowledged a pending completion once, then be more 368 * acknowledged a pending completion once, then be more
409 * forceful with a restart 369 * forceful with a restart
410 */ 370 */
371 spin_lock_bh(&chan->cleanup_lock);
411 if (ioat_cleanup_preamble(chan, &phys_complete)) 372 if (ioat_cleanup_preamble(chan, &phys_complete))
412 __cleanup(ioat, phys_complete); 373 __cleanup(ioat, phys_complete);
413 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) 374 else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) {
375 spin_lock_bh(&ioat->prep_lock);
414 ioat3_restart_channel(ioat); 376 ioat3_restart_channel(ioat);
415 else { 377 spin_unlock_bh(&ioat->prep_lock);
378 } else {
416 set_bit(IOAT_COMPLETION_ACK, &chan->state); 379 set_bit(IOAT_COMPLETION_ACK, &chan->state);
417 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 380 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
418 } 381 }
419 spin_unlock_bh(&ioat->ring_lock); 382 spin_unlock_bh(&chan->cleanup_lock);
420 } else { 383 } else {
421 u16 active; 384 u16 active;
422 385
423 /* if the ring is idle, empty, and oversized try to step 386 /* if the ring is idle, empty, and oversized try to step
424 * down the size 387 * down the size
425 */ 388 */
426 spin_lock_bh(&ioat->ring_lock); 389 spin_lock_bh(&chan->cleanup_lock);
390 spin_lock_bh(&ioat->prep_lock);
427 active = ioat2_ring_active(ioat); 391 active = ioat2_ring_active(ioat);
428 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) 392 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
429 reshape_ring(ioat, ioat->alloc_order-1); 393 reshape_ring(ioat, ioat->alloc_order-1);
430 spin_unlock_bh(&ioat->ring_lock); 394 spin_unlock_bh(&ioat->prep_lock);
395 spin_unlock_bh(&chan->cleanup_lock);
431 396
432 /* keep shrinking until we get back to our minimum 397 /* keep shrinking until we get back to our minimum
433 * default size 398 * default size
@@ -435,7 +400,6 @@ static void ioat3_timer_event(unsigned long data)
435 if (ioat->alloc_order > ioat_get_alloc_order()) 400 if (ioat->alloc_order > ioat_get_alloc_order())
436 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 401 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
437 } 402 }
438 spin_unlock_bh(&chan->cleanup_lock);
439} 403}
440 404
441static enum dma_status 405static enum dma_status
@@ -447,7 +411,7 @@ ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
447 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) 411 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
448 return DMA_SUCCESS; 412 return DMA_SUCCESS;
449 413
450 ioat3_cleanup_poll(ioat); 414 ioat3_cleanup(ioat);
451 415
452 return ioat_is_complete(c, cookie, done, used); 416 return ioat_is_complete(c, cookie, done, used);
453} 417}
@@ -460,15 +424,12 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value,
460 struct ioat_ring_ent *desc; 424 struct ioat_ring_ent *desc;
461 size_t total_len = len; 425 size_t total_len = len;
462 struct ioat_fill_descriptor *fill; 426 struct ioat_fill_descriptor *fill;
463 int num_descs;
464 u64 src_data = (0x0101010101010101ULL) * (value & 0xff); 427 u64 src_data = (0x0101010101010101ULL) * (value & 0xff);
465 u16 idx; 428 int num_descs, idx, i;
466 int i;
467 429
468 num_descs = ioat2_xferlen_to_descs(ioat, len); 430 num_descs = ioat2_xferlen_to_descs(ioat, len);
469 if (likely(num_descs) && 431 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0)
470 ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) 432 idx = ioat->head;
471 /* pass */;
472 else 433 else
473 return NULL; 434 return NULL;
474 i = 0; 435 i = 0;
@@ -513,11 +474,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
513 struct ioat_xor_descriptor *xor; 474 struct ioat_xor_descriptor *xor;
514 struct ioat_xor_ext_descriptor *xor_ex = NULL; 475 struct ioat_xor_ext_descriptor *xor_ex = NULL;
515 struct ioat_dma_descriptor *hw; 476 struct ioat_dma_descriptor *hw;
477 int num_descs, with_ext, idx, i;
516 u32 offset = 0; 478 u32 offset = 0;
517 int num_descs;
518 int with_ext;
519 int i;
520 u16 idx;
521 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; 479 u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR;
522 480
523 BUG_ON(src_cnt < 2); 481 BUG_ON(src_cnt < 2);
@@ -537,9 +495,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result,
537 * (legacy) descriptor to ensure all completion writes arrive in 495 * (legacy) descriptor to ensure all completion writes arrive in
538 * order. 496 * order.
539 */ 497 */
540 if (likely(num_descs) && 498 if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0)
541 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) 499 idx = ioat->head;
542 /* pass */;
543 else 500 else
544 return NULL; 501 return NULL;
545 i = 0; 502 i = 0;
@@ -657,11 +614,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
657 struct ioat_pq_ext_descriptor *pq_ex = NULL; 614 struct ioat_pq_ext_descriptor *pq_ex = NULL;
658 struct ioat_dma_descriptor *hw; 615 struct ioat_dma_descriptor *hw;
659 u32 offset = 0; 616 u32 offset = 0;
660 int num_descs;
661 int with_ext;
662 int i, s;
663 u16 idx;
664 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; 617 u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ;
618 int i, s, idx, with_ext, num_descs;
665 619
666 dev_dbg(to_dev(chan), "%s\n", __func__); 620 dev_dbg(to_dev(chan), "%s\n", __func__);
667 /* the engine requires at least two sources (we provide 621 /* the engine requires at least two sources (we provide
@@ -687,8 +641,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result,
687 * order. 641 * order.
688 */ 642 */
689 if (likely(num_descs) && 643 if (likely(num_descs) &&
690 ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) 644 ioat2_check_space_lock(ioat, num_descs+1) == 0)
691 /* pass */; 645 idx = ioat->head;
692 else 646 else
693 return NULL; 647 return NULL;
694 i = 0; 648 i = 0;
@@ -851,10 +805,9 @@ ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags)
851 struct ioat2_dma_chan *ioat = to_ioat2_chan(c); 805 struct ioat2_dma_chan *ioat = to_ioat2_chan(c);
852 struct ioat_ring_ent *desc; 806 struct ioat_ring_ent *desc;
853 struct ioat_dma_descriptor *hw; 807 struct ioat_dma_descriptor *hw;
854 u16 idx;
855 808
856 if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) 809 if (ioat2_check_space_lock(ioat, 1) == 0)
857 desc = ioat2_get_ring_ent(ioat, idx); 810 desc = ioat2_get_ring_ent(ioat, ioat->head);
858 else 811 else
859 return NULL; 812 return NULL;
860 813