diff options
author | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 15:02:01 -0400 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2009-09-08 20:38:54 -0400 |
commit | a309218acee8606f7e235da20cc826eb06d9b0f6 (patch) | |
tree | abf2cc9830b6a5a52a165e6a736e85cd5d7b36c0 /drivers/dma/ioat | |
parent | 09c8a5b85e5f1e74a19bdd7c85547429d51df1cd (diff) |
ioat2,3: dynamically resize descriptor ring
Increment the allocation order of the descriptor ring every time we run
out of descriptors up to a maximum of allocation order specified by the
module parameter 'ioat_max_alloc_order'. After each idle period
decrement the allocation order to a minimum order of
'ioat_ring_alloc_order' (i.e. the default ring size, tunable as a module
parameter).
Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/dma/ioat')
-rw-r--r-- | drivers/dma/ioat/dma.h | 1 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v2.c | 215 | ||||
-rw-r--r-- | drivers/dma/ioat/dma_v2.h | 2 |
3 files changed, 187 insertions, 31 deletions
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index dbfccac3e80..d9d6a7e3cd7 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h | |||
@@ -88,6 +88,7 @@ struct ioat_chan_common { | |||
88 | #define IOAT_RESET_PENDING 2 | 88 | #define IOAT_RESET_PENDING 2 |
89 | struct timer_list timer; | 89 | struct timer_list timer; |
90 | #define COMPLETION_TIMEOUT msecs_to_jiffies(100) | 90 | #define COMPLETION_TIMEOUT msecs_to_jiffies(100) |
91 | #define IDLE_TIMEOUT msecs_to_jiffies(2000) | ||
91 | #define RESET_DELAY msecs_to_jiffies(100) | 92 | #define RESET_DELAY msecs_to_jiffies(100) |
92 | struct ioatdma_device *device; | 93 | struct ioatdma_device *device; |
93 | dma_addr_t completion_dma; | 94 | dma_addr_t completion_dma; |
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 72e59a0d0f2..460b7730133 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c | |||
@@ -43,6 +43,10 @@ static int ioat_ring_alloc_order = 8; | |||
43 | module_param(ioat_ring_alloc_order, int, 0644); | 43 | module_param(ioat_ring_alloc_order, int, 0644); |
44 | MODULE_PARM_DESC(ioat_ring_alloc_order, | 44 | MODULE_PARM_DESC(ioat_ring_alloc_order, |
45 | "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); | 45 | "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); |
46 | static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER; | ||
47 | module_param(ioat_ring_max_alloc_order, int, 0644); | ||
48 | MODULE_PARM_DESC(ioat_ring_max_alloc_order, | ||
49 | "ioat2+: upper limit for dynamic ring resizing (default: n=16)"); | ||
46 | 50 | ||
47 | static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) | 51 | static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) |
48 | { | 52 | { |
@@ -168,6 +172,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
168 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", | 172 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", |
169 | __func__); | 173 | __func__); |
170 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); | 174 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); |
175 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | ||
171 | } | 176 | } |
172 | } | 177 | } |
173 | 178 | ||
@@ -253,6 +258,8 @@ static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) | |||
253 | __restart_chan(ioat); | 258 | __restart_chan(ioat); |
254 | } | 259 | } |
255 | 260 | ||
261 | static bool reshape_ring(struct ioat2_dma_chan *ioat, int order); | ||
262 | |||
256 | static void ioat2_timer_event(unsigned long data) | 263 | static void ioat2_timer_event(unsigned long data) |
257 | { | 264 | { |
258 | struct ioat2_dma_chan *ioat = (void *) data; | 265 | struct ioat2_dma_chan *ioat = (void *) data; |
@@ -289,6 +296,23 @@ static void ioat2_timer_event(unsigned long data) | |||
289 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); | 296 | mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); |
290 | } | 297 | } |
291 | spin_unlock_bh(&ioat->ring_lock); | 298 | spin_unlock_bh(&ioat->ring_lock); |
299 | } else { | ||
300 | u16 active; | ||
301 | |||
302 | /* if the ring is idle, empty, and oversized try to step | ||
303 | * down the size | ||
304 | */ | ||
305 | spin_lock_bh(&ioat->ring_lock); | ||
306 | active = ioat2_ring_active(ioat); | ||
307 | if (active == 0 && ioat->alloc_order > ioat_get_alloc_order()) | ||
308 | reshape_ring(ioat, ioat->alloc_order-1); | ||
309 | spin_unlock_bh(&ioat->ring_lock); | ||
310 | |||
311 | /* keep shrinking until we get back to our minimum | ||
312 | * default size | ||
313 | */ | ||
314 | if (ioat->alloc_order > ioat_get_alloc_order()) | ||
315 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | ||
292 | } | 316 | } |
293 | spin_unlock_bh(&chan->cleanup_lock); | 317 | spin_unlock_bh(&chan->cleanup_lock); |
294 | } | 318 | } |
@@ -362,7 +386,7 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx) | |||
362 | return cookie; | 386 | return cookie; |
363 | } | 387 | } |
364 | 388 | ||
365 | static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) | 389 | static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags) |
366 | { | 390 | { |
367 | struct ioat_dma_descriptor *hw; | 391 | struct ioat_dma_descriptor *hw; |
368 | struct ioat_ring_ent *desc; | 392 | struct ioat_ring_ent *desc; |
@@ -370,12 +394,12 @@ static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) | |||
370 | dma_addr_t phys; | 394 | dma_addr_t phys; |
371 | 395 | ||
372 | dma = to_ioatdma_device(chan->device); | 396 | dma = to_ioatdma_device(chan->device); |
373 | hw = pci_pool_alloc(dma->dma_pool, GFP_KERNEL, &phys); | 397 | hw = pci_pool_alloc(dma->dma_pool, flags, &phys); |
374 | if (!hw) | 398 | if (!hw) |
375 | return NULL; | 399 | return NULL; |
376 | memset(hw, 0, sizeof(*hw)); | 400 | memset(hw, 0, sizeof(*hw)); |
377 | 401 | ||
378 | desc = kzalloc(sizeof(*desc), GFP_KERNEL); | 402 | desc = kzalloc(sizeof(*desc), flags); |
379 | if (!desc) { | 403 | if (!desc) { |
380 | pci_pool_free(dma->dma_pool, hw, phys); | 404 | pci_pool_free(dma->dma_pool, hw, phys); |
381 | return NULL; | 405 | return NULL; |
@@ -397,6 +421,42 @@ static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *cha | |||
397 | kfree(desc); | 421 | kfree(desc); |
398 | } | 422 | } |
399 | 423 | ||
424 | static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags) | ||
425 | { | ||
426 | struct ioat_ring_ent **ring; | ||
427 | int descs = 1 << order; | ||
428 | int i; | ||
429 | |||
430 | if (order > ioat_get_max_alloc_order()) | ||
431 | return NULL; | ||
432 | |||
433 | /* allocate the array to hold the software ring */ | ||
434 | ring = kcalloc(descs, sizeof(*ring), flags); | ||
435 | if (!ring) | ||
436 | return NULL; | ||
437 | for (i = 0; i < descs; i++) { | ||
438 | ring[i] = ioat2_alloc_ring_ent(c, flags); | ||
439 | if (!ring[i]) { | ||
440 | while (i--) | ||
441 | ioat2_free_ring_ent(ring[i], c); | ||
442 | kfree(ring); | ||
443 | return NULL; | ||
444 | } | ||
445 | set_desc_id(ring[i], i); | ||
446 | } | ||
447 | |||
448 | /* link descs */ | ||
449 | for (i = 0; i < descs-1; i++) { | ||
450 | struct ioat_ring_ent *next = ring[i+1]; | ||
451 | struct ioat_dma_descriptor *hw = ring[i]->hw; | ||
452 | |||
453 | hw->next = next->txd.phys; | ||
454 | } | ||
455 | ring[i]->hw->next = ring[0]->txd.phys; | ||
456 | |||
457 | return ring; | ||
458 | } | ||
459 | |||
400 | /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring | 460 | /* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring |
401 | * @chan: channel to be initialized | 461 | * @chan: channel to be initialized |
402 | */ | 462 | */ |
@@ -406,8 +466,7 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) | |||
406 | struct ioat_chan_common *chan = &ioat->base; | 466 | struct ioat_chan_common *chan = &ioat->base; |
407 | struct ioat_ring_ent **ring; | 467 | struct ioat_ring_ent **ring; |
408 | u32 chanerr; | 468 | u32 chanerr; |
409 | int descs; | 469 | int order; |
410 | int i; | ||
411 | 470 | ||
412 | /* have we already been set up? */ | 471 | /* have we already been set up? */ |
413 | if (ioat->ring) | 472 | if (ioat->ring) |
@@ -435,32 +494,10 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) | |||
435 | writel(((u64) chan->completion_dma) >> 32, | 494 | writel(((u64) chan->completion_dma) >> 32, |
436 | chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); | 495 | chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); |
437 | 496 | ||
438 | ioat->alloc_order = ioat_get_alloc_order(); | 497 | order = ioat_get_alloc_order(); |
439 | descs = 1 << ioat->alloc_order; | 498 | ring = ioat2_alloc_ring(c, order, GFP_KERNEL); |
440 | |||
441 | /* allocate the array to hold the software ring */ | ||
442 | ring = kcalloc(descs, sizeof(*ring), GFP_KERNEL); | ||
443 | if (!ring) | 499 | if (!ring) |
444 | return -ENOMEM; | 500 | return -ENOMEM; |
445 | for (i = 0; i < descs; i++) { | ||
446 | ring[i] = ioat2_alloc_ring_ent(c); | ||
447 | if (!ring[i]) { | ||
448 | while (i--) | ||
449 | ioat2_free_ring_ent(ring[i], c); | ||
450 | kfree(ring); | ||
451 | return -ENOMEM; | ||
452 | } | ||
453 | set_desc_id(ring[i], i); | ||
454 | } | ||
455 | |||
456 | /* link descs */ | ||
457 | for (i = 0; i < descs-1; i++) { | ||
458 | struct ioat_ring_ent *next = ring[i+1]; | ||
459 | struct ioat_dma_descriptor *hw = ring[i]->hw; | ||
460 | |||
461 | hw->next = next->txd.phys; | ||
462 | } | ||
463 | ring[i]->hw->next = ring[0]->txd.phys; | ||
464 | 501 | ||
465 | spin_lock_bh(&ioat->ring_lock); | 502 | spin_lock_bh(&ioat->ring_lock); |
466 | ioat->ring = ring; | 503 | ioat->ring = ring; |
@@ -468,12 +505,120 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c) | |||
468 | ioat->issued = 0; | 505 | ioat->issued = 0; |
469 | ioat->tail = 0; | 506 | ioat->tail = 0; |
470 | ioat->pending = 0; | 507 | ioat->pending = 0; |
508 | ioat->alloc_order = order; | ||
471 | spin_unlock_bh(&ioat->ring_lock); | 509 | spin_unlock_bh(&ioat->ring_lock); |
472 | 510 | ||
473 | tasklet_enable(&chan->cleanup_task); | 511 | tasklet_enable(&chan->cleanup_task); |
474 | ioat2_start_null_desc(ioat); | 512 | ioat2_start_null_desc(ioat); |
475 | 513 | ||
476 | return descs; | 514 | return 1 << ioat->alloc_order; |
515 | } | ||
516 | |||
517 | static bool reshape_ring(struct ioat2_dma_chan *ioat, int order) | ||
518 | { | ||
519 | /* reshape differs from normal ring allocation in that we want | ||
520 | * to allocate a new software ring while only | ||
521 | * extending/truncating the hardware ring | ||
522 | */ | ||
523 | struct ioat_chan_common *chan = &ioat->base; | ||
524 | struct dma_chan *c = &chan->common; | ||
525 | const u16 curr_size = ioat2_ring_mask(ioat) + 1; | ||
526 | const u16 active = ioat2_ring_active(ioat); | ||
527 | const u16 new_size = 1 << order; | ||
528 | struct ioat_ring_ent **ring; | ||
529 | u16 i; | ||
530 | |||
531 | if (order > ioat_get_max_alloc_order()) | ||
532 | return false; | ||
533 | |||
534 | /* double check that we have at least 1 free descriptor */ | ||
535 | if (active == curr_size) | ||
536 | return false; | ||
537 | |||
538 | /* when shrinking, verify that we can hold the current active | ||
539 | * set in the new ring | ||
540 | */ | ||
541 | if (active >= new_size) | ||
542 | return false; | ||
543 | |||
544 | /* allocate the array to hold the software ring */ | ||
545 | ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT); | ||
546 | if (!ring) | ||
547 | return false; | ||
548 | |||
549 | /* allocate/trim descriptors as needed */ | ||
550 | if (new_size > curr_size) { | ||
551 | /* copy current descriptors to the new ring */ | ||
552 | for (i = 0; i < curr_size; i++) { | ||
553 | u16 curr_idx = (ioat->tail+i) & (curr_size-1); | ||
554 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
555 | |||
556 | ring[new_idx] = ioat->ring[curr_idx]; | ||
557 | set_desc_id(ring[new_idx], new_idx); | ||
558 | } | ||
559 | |||
560 | /* add new descriptors to the ring */ | ||
561 | for (i = curr_size; i < new_size; i++) { | ||
562 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
563 | |||
564 | ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT); | ||
565 | if (!ring[new_idx]) { | ||
566 | while (i--) { | ||
567 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
568 | |||
569 | ioat2_free_ring_ent(ring[new_idx], c); | ||
570 | } | ||
571 | kfree(ring); | ||
572 | return false; | ||
573 | } | ||
574 | set_desc_id(ring[new_idx], new_idx); | ||
575 | } | ||
576 | |||
577 | /* hw link new descriptors */ | ||
578 | for (i = curr_size-1; i < new_size; i++) { | ||
579 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
580 | struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)]; | ||
581 | struct ioat_dma_descriptor *hw = ring[new_idx]->hw; | ||
582 | |||
583 | hw->next = next->txd.phys; | ||
584 | } | ||
585 | } else { | ||
586 | struct ioat_dma_descriptor *hw; | ||
587 | struct ioat_ring_ent *next; | ||
588 | |||
589 | /* copy current descriptors to the new ring, dropping the | ||
590 | * removed descriptors | ||
591 | */ | ||
592 | for (i = 0; i < new_size; i++) { | ||
593 | u16 curr_idx = (ioat->tail+i) & (curr_size-1); | ||
594 | u16 new_idx = (ioat->tail+i) & (new_size-1); | ||
595 | |||
596 | ring[new_idx] = ioat->ring[curr_idx]; | ||
597 | set_desc_id(ring[new_idx], new_idx); | ||
598 | } | ||
599 | |||
600 | /* free deleted descriptors */ | ||
601 | for (i = new_size; i < curr_size; i++) { | ||
602 | struct ioat_ring_ent *ent; | ||
603 | |||
604 | ent = ioat2_get_ring_ent(ioat, ioat->tail+i); | ||
605 | ioat2_free_ring_ent(ent, c); | ||
606 | } | ||
607 | |||
608 | /* fix up hardware ring */ | ||
609 | hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw; | ||
610 | next = ring[(ioat->tail+new_size) & (new_size-1)]; | ||
611 | hw->next = next->txd.phys; | ||
612 | } | ||
613 | |||
614 | dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n", | ||
615 | __func__, new_size); | ||
616 | |||
617 | kfree(ioat->ring); | ||
618 | ioat->ring = ring; | ||
619 | ioat->alloc_order = order; | ||
620 | |||
621 | return true; | ||
477 | } | 622 | } |
478 | 623 | ||
479 | /** | 624 | /** |
@@ -487,7 +632,15 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d | |||
487 | struct ioat_chan_common *chan = &ioat->base; | 632 | struct ioat_chan_common *chan = &ioat->base; |
488 | 633 | ||
489 | spin_lock_bh(&ioat->ring_lock); | 634 | spin_lock_bh(&ioat->ring_lock); |
490 | if (unlikely(ioat2_ring_space(ioat) < num_descs)) { | 635 | /* never allow the last descriptor to be consumed, we need at |
636 | * least one free at all times to allow for on-the-fly ring | ||
637 | * resizing. | ||
638 | */ | ||
639 | while (unlikely(ioat2_ring_space(ioat) <= num_descs)) { | ||
640 | if (reshape_ring(ioat, ioat->alloc_order + 1) && | ||
641 | ioat2_ring_space(ioat) > num_descs) | ||
642 | break; | ||
643 | |||
491 | if (printk_ratelimit()) | 644 | if (printk_ratelimit()) |
492 | dev_dbg(to_dev(chan), | 645 | dev_dbg(to_dev(chan), |
493 | "%s: ring full! num_descs: %d (%x:%x:%x)\n", | 646 | "%s: ring full! num_descs: %d (%x:%x:%x)\n", |
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 73b04a2eb4b..9baa3d6065f 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h | |||
@@ -37,6 +37,8 @@ extern int ioat_pending_level; | |||
37 | #define IOAT_MAX_ORDER 16 | 37 | #define IOAT_MAX_ORDER 16 |
38 | #define ioat_get_alloc_order() \ | 38 | #define ioat_get_alloc_order() \ |
39 | (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) | 39 | (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) |
40 | #define ioat_get_max_alloc_order() \ | ||
41 | (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER)) | ||
40 | 42 | ||
41 | /* struct ioat2_dma_chan - ioat v2 / v3 channel attributes | 43 | /* struct ioat2_dma_chan - ioat v2 / v3 channel attributes |
42 | * @base: common ioat channel parameters | 44 | * @base: common ioat channel parameters |