diff options
author | Dan Williams <dan.j.williams@intel.com> | 2010-05-01 18:22:55 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2010-05-01 18:22:55 -0400 |
commit | 074cc47679f8b0931d7d5384e95822d82768f149 (patch) | |
tree | bd4699a1c769553f2bbfa931913352d26c81dc4d /drivers/dma/ioat/dma_v3.c | |
parent | abb12dfd50c7580d7dcbd581cf6265ba4d01ea7e (diff) |
ioat2,3: convert to producer/consumer locking
Use separate locks for the descriptor prep (producer) and descriptor
cleanup (consumer) paths. Allows the producer path to run concurrently
with the cleanup path. Inspired by Documentation/circular-buffer.txt.
Cc: David Howells <dhowells@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/dma/ioat/dma_v3.c')
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 117 |
1 files changed, 35 insertions, 82 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 6740e319c9cf..8b573fac2a25 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c | |||
@@ -260,8 +260,8 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
260 | struct ioat_chan_common *chan = &ioat->base; | 260 | struct ioat_chan_common *chan = &ioat->base; |
261 | struct ioat_ring_ent *desc; | 261 | struct ioat_ring_ent *desc; |
262 | bool seen_current = false; | 262 | bool seen_current = false; |
263 | int idx = ioat->tail, i; | ||
263 | u16 active; | 264 | u16 active; |
264 | int i; | ||
265 | 265 | ||
266 | dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", | 266 | dev_dbg(to_dev(chan), "%s: head: %#x tail: %#x issued: %#x\n", |
267 | __func__, ioat->head, ioat->tail, ioat->issued); | 267 | __func__, ioat->head, ioat->tail, ioat->issued); |
@@ -270,13 +270,14 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
270 | for (i = 0; i < active && !seen_current; i++) { | 270 | for (i = 0; i < active && !seen_current; i++) { |
271 | struct dma_async_tx_descriptor *tx; | 271 | struct dma_async_tx_descriptor *tx; |
272 | 272 | ||
273 | prefetch(ioat2_get_ring_ent(ioat, ioat->tail + i + 1)); | 273 | smp_read_barrier_depends(); |
274 | desc = ioat2_get_ring_ent(ioat, ioat->tail + i); | 274 | prefetch(ioat2_get_ring_ent(ioat, idx + i + 1)); |
275 | desc = ioat2_get_ring_ent(ioat, idx + i); | ||
275 | dump_desc_dbg(ioat, desc); | 276 | dump_desc_dbg(ioat, desc); |
276 | tx = &desc->txd; | 277 | tx = &desc->txd; |
277 | if (tx->cookie) { | 278 | if (tx->cookie) { |
278 | chan->completed_cookie = tx->cookie; | 279 | chan->completed_cookie = tx->cookie; |
279 | ioat3_dma_unmap(ioat, desc, ioat->tail + i); | 280 | ioat3_dma_unmap(ioat, desc, idx + i); |
280 | tx->cookie = 0; | 281 | tx->cookie = 0; |
281 | if (tx->callback) { | 282 | if (tx->callback) { |
282 | tx->callback(tx->callback_param); | 283 | tx->callback(tx->callback_param); |
@@ -293,69 +294,30 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
293 | i++; | 294 | i++; |
294 | } | 295 | } |
295 | } | 296 | } |
296 | ioat->tail += i; | 297 | smp_mb(); /* finish all descriptor reads before incrementing tail */ |
298 | ioat->tail = idx + i; | ||
297 | BUG_ON(active && !seen_current); /* no active descs have written a completion? */ | 299 | BUG_ON(active && !seen_current); /* no active descs have written a completion? */ |
298 | chan->last_completion = phys_complete; | 300 | chan->last_completion = phys_complete; |
299 | 301 | ||
300 | active = ioat2_ring_active(ioat); | 302 | if (active - i == 0) { |
301 | if (active == 0) { | ||
302 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", | 303 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", |
303 | __func__); | 304 | __func__); |
304 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); | 305 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); |
305 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | 306 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); |
306 | } | 307 | } |
307 | /* 5 microsecond delay per pending descriptor */ | 308 | /* 5 microsecond delay per pending descriptor */ |
308 | writew(min((5 * active), IOAT_INTRDELAY_MASK), | 309 | writew(min((5 * (active - i)), IOAT_INTRDELAY_MASK), |
309 | chan->device->reg_base + IOAT_INTRDELAY_OFFSET); | 310 | chan->device->reg_base + IOAT_INTRDELAY_OFFSET); |
310 | } | 311 | } |
311 | 312 | ||
312 | /* try to cleanup, but yield (via spin_trylock) to incoming submissions | 313 | static void ioat3_cleanup(struct ioat2_dma_chan *ioat) |
313 | * with the expectation that we will immediately poll again shortly | ||
314 | */ | ||
315 | static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat) | ||
316 | { | 314 | { |
317 | struct ioat_chan_common *chan = &ioat->base; | 315 | struct ioat_chan_common *chan = &ioat->base; |
318 | unsigned long phys_complete; | 316 | unsigned long phys_complete; |
319 | 317 | ||
320 | prefetch(chan->completion); | ||
321 | |||
322 | if (!spin_trylock_bh(&chan->cleanup_lock)) | ||
323 | return; | ||
324 | |||
325 | if (!ioat_cleanup_preamble(chan, &phys_complete)) { | ||
326 | spin_unlock_bh(&chan->cleanup_lock); | ||
327 | return; | ||
328 | } | ||
329 | |||
330 | if (!spin_trylock_bh(&ioat->ring_lock)) { | ||
331 | spin_unlock_bh(&chan->cleanup_lock); | ||
332 | return; | ||
333 | } | ||
334 | |||
335 | __cleanup(ioat, phys_complete); | ||
336 | |||
337 | spin_unlock_bh(&ioat->ring_lock); | ||
338 | spin_unlock_bh(&chan->cleanup_lock); | ||
339 | } | ||
340 | |||
341 | /* run cleanup now because we already delayed the interrupt via INTRDELAY */ | ||
342 | static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat) | ||
343 | { | ||
344 | struct ioat_chan_common *chan = &ioat->base; | ||
345 | unsigned long phys_complete; | ||
346 | |||
347 | prefetch(chan->completion); | ||
348 | |||
349 | spin_lock_bh(&chan->cleanup_lock); | 318 | spin_lock_bh(&chan->cleanup_lock); |
350 | if (!ioat_cleanup_preamble(chan, &phys_complete)) { | 319 | if (ioat_cleanup_preamble(chan, &phys_complete)) |
351 | spin_unlock_bh(&chan->cleanup_lock); | 320 | __cleanup(ioat, phys_complete); |
352 | return; | ||
353 | } | ||
354 | spin_lock_bh(&ioat->ring_lock); | ||
355 | |||
356 | __cleanup(ioat, phys_complete); | ||
357 | |||
358 | spin_unlock_bh(&ioat->ring_lock); | ||
359 | spin_unlock_bh(&chan->cleanup_lock); | 321 | spin_unlock_bh(&chan->cleanup_lock); |
360 | } | 322 | } |
361 | 323 | ||
@@ -363,7 +325,7 @@ static void ioat3_cleanup_event(unsigned long data) | |||
363 | { | 325 | { |
364 | struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); | 326 | struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); |
365 | 327 | ||
366 | ioat3_cleanup_sync(ioat); | 328 | ioat3_cleanup(ioat); |
367 | writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); | 329 | writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); |
368 | } | 330 | } |
369 | 331 | ||
@@ -384,12 +346,10 @@ static void ioat3_timer_event(unsigned long data) | |||
384 | struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); | 346 | struct ioat2_dma_chan *ioat = to_ioat2_chan((void *) data); |
385 | struct ioat_chan_common *chan = &ioat->base; | 347 | struct ioat_chan_common *chan = &ioat->base; |
386 | 348 | ||
387 | spin_lock_bh(&chan->cleanup_lock); | ||
388 | if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { | 349 | if (test_bit(IOAT_COMPLETION_PENDING, &chan->state)) { |
389 | unsigned long phys_complete; | 350 | unsigned long phys_complete; |
390 | u64 status; | 351 | u64 status; |
391 | 352 | ||
392 | spin_lock_bh(&ioat->ring_lock); | ||
393 | status = ioat_chansts(chan); | 353 | status = ioat_chansts(chan); |
394 | 354 | ||
395 | /* when halted due to errors check for channel | 355 | /* when halted due to errors check for channel |
@@ -408,26 +368,31 @@ static void ioat3_timer_event(unsigned long data) | |||
408 | * acknowledged a pending completion once, then be more | 368 | * acknowledged a pending completion once, then be more |
409 | * forceful with a restart | 369 | * forceful with a restart |
410 | */ | 370 | */ |
371 | spin_lock_bh(&chan->cleanup_lock); | ||
411 | if (ioat_cleanup_preamble(chan, &phys_complete)) | 372 | if (ioat_cleanup_preamble(chan, &phys_complete)) |
412 | __cleanup(ioat, phys_complete); | 373 | __cleanup(ioat, phys_complete); |
413 | else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) | 374 | else if (test_bit(IOAT_COMPLETION_ACK, &chan->state)) { |
375 | spin_lock_bh(&ioat->prep_lock); | ||
414 | ioat3_restart_channel(ioat); | 376 | ioat3_restart_channel(ioat); |
415 | else { | 377 | spin_unlock_bh(&ioat->prep_lock); |
378 | } else { | ||
416 | set_bit(IOAT_COMPLETION_ACK, &chan->state); | 379 | set_bit(IOAT_COMPLETION_ACK, &chan->state); |
417 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | 380 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); |
418 | } | 381 | } |
419 | spin_unlock_bh(&ioat->ring_lock); | 382 | spin_unlock_bh(&chan->cleanup_lock); |
420 | } else { | 383 | } else { |
421 | u16 active; | 384 | u16 active; |
422 | 385 | ||
423 | /* if the ring is idle, empty, and oversized try to step | 386 | /* if the ring is idle, empty, and oversized try to step |
424 | * down the size | 387 | * down the size |
425 | */ | 388 | */ |
426 | spin_lock_bh(&ioat->ring_lock); | 389 | spin_lock_bh(&chan->cleanup_lock); |
390 | spin_lock_bh(&ioat->prep_lock); | ||
427 | active = ioat2_ring_active(ioat); | 391 | active = ioat2_ring_active(ioat); |
428 | if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) | 392 | if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) |
429 | reshape_ring(ioat, ioat->alloc_order-1); | 393 | reshape_ring(ioat, ioat->alloc_order-1); |
430 | spin_unlock_bh(&ioat->ring_lock); | 394 | spin_unlock_bh(&ioat->prep_lock); |
395 | spin_unlock_bh(&chan->cleanup_lock); | ||
431 | 396 | ||
432 | /* keep shrinking until we get back to our minimum | 397 | /* keep shrinking until we get back to our minimum |
433 | * default size | 398 | * default size |
@@ -435,7 +400,6 @@ static void ioat3_timer_event(unsigned long data) | |||
435 | if (ioat->alloc_order > ioat_get_alloc_order()) | 400 | if (ioat->alloc_order > ioat_get_alloc_order()) |
436 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | 401 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); |
437 | } | 402 | } |
438 | spin_unlock_bh(&chan->cleanup_lock); | ||
439 | } | 403 | } |
440 | 404 | ||
441 | static enum dma_status | 405 | static enum dma_status |
@@ -447,7 +411,7 @@ ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, | |||
447 | if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) | 411 | if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) |
448 | return DMA_SUCCESS; | 412 | return DMA_SUCCESS; |
449 | 413 | ||
450 | ioat3_cleanup_poll(ioat); | 414 | ioat3_cleanup(ioat); |
451 | 415 | ||
452 | return ioat_is_complete(c, cookie, done, used); | 416 | return ioat_is_complete(c, cookie, done, used); |
453 | } | 417 | } |
@@ -460,15 +424,12 @@ ioat3_prep_memset_lock(struct dma_chan *c, dma_addr_t dest, int value, | |||
460 | struct ioat_ring_ent *desc; | 424 | struct ioat_ring_ent *desc; |
461 | size_t total_len = len; | 425 | size_t total_len = len; |
462 | struct ioat_fill_descriptor *fill; | 426 | struct ioat_fill_descriptor *fill; |
463 | int num_descs; | ||
464 | u64 src_data = (0x0101010101010101ULL) * (value & 0xff); | 427 | u64 src_data = (0x0101010101010101ULL) * (value & 0xff); |
465 | u16 idx; | 428 | int num_descs, idx, i; |
466 | int i; | ||
467 | 429 | ||
468 | num_descs = ioat2_xferlen_to_descs(ioat, len); | 430 | num_descs = ioat2_xferlen_to_descs(ioat, len); |
469 | if (likely(num_descs) && | 431 | if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs) == 0) |
470 | ioat2_alloc_and_lock(&idx, ioat, num_descs) == 0) | 432 | idx = ioat->head; |
471 | /* pass */; | ||
472 | else | 433 | else |
473 | return NULL; | 434 | return NULL; |
474 | i = 0; | 435 | i = 0; |
@@ -513,11 +474,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | |||
513 | struct ioat_xor_descriptor *xor; | 474 | struct ioat_xor_descriptor *xor; |
514 | struct ioat_xor_ext_descriptor *xor_ex = NULL; | 475 | struct ioat_xor_ext_descriptor *xor_ex = NULL; |
515 | struct ioat_dma_descriptor *hw; | 476 | struct ioat_dma_descriptor *hw; |
477 | int num_descs, with_ext, idx, i; | ||
516 | u32 offset = 0; | 478 | u32 offset = 0; |
517 | int num_descs; | ||
518 | int with_ext; | ||
519 | int i; | ||
520 | u16 idx; | ||
521 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; | 479 | u8 op = result ? IOAT_OP_XOR_VAL : IOAT_OP_XOR; |
522 | 480 | ||
523 | BUG_ON(src_cnt < 2); | 481 | BUG_ON(src_cnt < 2); |
@@ -537,9 +495,8 @@ __ioat3_prep_xor_lock(struct dma_chan *c, enum sum_check_flags *result, | |||
537 | * (legacy) descriptor to ensure all completion writes arrive in | 495 | * (legacy) descriptor to ensure all completion writes arrive in |
538 | * order. | 496 | * order. |
539 | */ | 497 | */ |
540 | if (likely(num_descs) && | 498 | if (likely(num_descs) && ioat2_check_space_lock(ioat, num_descs+1) == 0) |
541 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | 499 | idx = ioat->head; |
542 | /* pass */; | ||
543 | else | 500 | else |
544 | return NULL; | 501 | return NULL; |
545 | i = 0; | 502 | i = 0; |
@@ -657,11 +614,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | |||
657 | struct ioat_pq_ext_descriptor *pq_ex = NULL; | 614 | struct ioat_pq_ext_descriptor *pq_ex = NULL; |
658 | struct ioat_dma_descriptor *hw; | 615 | struct ioat_dma_descriptor *hw; |
659 | u32 offset = 0; | 616 | u32 offset = 0; |
660 | int num_descs; | ||
661 | int with_ext; | ||
662 | int i, s; | ||
663 | u16 idx; | ||
664 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; | 617 | u8 op = result ? IOAT_OP_PQ_VAL : IOAT_OP_PQ; |
618 | int i, s, idx, with_ext, num_descs; | ||
665 | 619 | ||
666 | dev_dbg(to_dev(chan), "%s\n", __func__); | 620 | dev_dbg(to_dev(chan), "%s\n", __func__); |
667 | /* the engine requires at least two sources (we provide | 621 | /* the engine requires at least two sources (we provide |
@@ -687,8 +641,8 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, | |||
687 | * order. | 641 | * order. |
688 | */ | 642 | */ |
689 | if (likely(num_descs) && | 643 | if (likely(num_descs) && |
690 | ioat2_alloc_and_lock(&idx, ioat, num_descs+1) == 0) | 644 | ioat2_check_space_lock(ioat, num_descs+1) == 0) |
691 | /* pass */; | 645 | idx = ioat->head; |
692 | else | 646 | else |
693 | return NULL; | 647 | return NULL; |
694 | i = 0; | 648 | i = 0; |
@@ -851,10 +805,9 @@ ioat3_prep_interrupt_lock(struct dma_chan *c, unsigned long flags) | |||
851 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); | 805 | struct ioat2_dma_chan *ioat = to_ioat2_chan(c); |
852 | struct ioat_ring_ent *desc; | 806 | struct ioat_ring_ent *desc; |
853 | struct ioat_dma_descriptor *hw; | 807 | struct ioat_dma_descriptor *hw; |
854 | u16 idx; | ||
855 | 808 | ||
856 | if (ioat2_alloc_and_lock(&idx, ioat, 1) == 0) | 809 | if (ioat2_check_space_lock(ioat, 1) == 0) |
857 | desc = ioat2_get_ring_ent(ioat, idx); | 810 | desc = ioat2_get_ring_ent(ioat, ioat->head); |
858 | else | 811 | else |
859 | return NULL; | 812 | return NULL; |
860 | 813 | ||