aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/dma/ioat
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 15:02:01 -0400
committerDan Williams <dan.j.williams@intel.com>2009-09-08 20:38:54 -0400
commita309218acee8606f7e235da20cc826eb06d9b0f6 (patch)
treeabf2cc9830b6a5a52a165e6a736e85cd5d7b36c0 /drivers/dma/ioat
parent09c8a5b85e5f1e74a19bdd7c85547429d51df1cd (diff)
ioat2,3: dynamically resize descriptor ring
Increment the allocation order of the descriptor ring every time we run out of descriptors up to a maximum of allocation order specified by the module parameter 'ioat_max_alloc_order'. After each idle period decrement the allocation order to a minimum order of 'ioat_ring_alloc_order' (i.e. the default ring size, tunable as a module parameter). Signed-off-by: Maciej Sosnowski <maciej.sosnowski@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/dma/ioat')
-rw-r--r--drivers/dma/ioat/dma.h1
-rw-r--r--drivers/dma/ioat/dma_v2.c215
-rw-r--r--drivers/dma/ioat/dma_v2.h2
3 files changed, 187 insertions, 31 deletions
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index dbfccac3e80..d9d6a7e3cd7 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -88,6 +88,7 @@ struct ioat_chan_common {
88 #define IOAT_RESET_PENDING 2 88 #define IOAT_RESET_PENDING 2
89 struct timer_list timer; 89 struct timer_list timer;
90 #define COMPLETION_TIMEOUT msecs_to_jiffies(100) 90 #define COMPLETION_TIMEOUT msecs_to_jiffies(100)
91 #define IDLE_TIMEOUT msecs_to_jiffies(2000)
91 #define RESET_DELAY msecs_to_jiffies(100) 92 #define RESET_DELAY msecs_to_jiffies(100)
92 struct ioatdma_device *device; 93 struct ioatdma_device *device;
93 dma_addr_t completion_dma; 94 dma_addr_t completion_dma;
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 72e59a0d0f2..460b7730133 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -43,6 +43,10 @@ static int ioat_ring_alloc_order = 8;
43module_param(ioat_ring_alloc_order, int, 0644); 43module_param(ioat_ring_alloc_order, int, 0644);
44MODULE_PARM_DESC(ioat_ring_alloc_order, 44MODULE_PARM_DESC(ioat_ring_alloc_order,
45 "ioat2+: allocate 2^n descriptors per channel (default: n=8)"); 45 "ioat2+: allocate 2^n descriptors per channel (default: n=8)");
46static int ioat_ring_max_alloc_order = IOAT_MAX_ORDER;
47module_param(ioat_ring_max_alloc_order, int, 0644);
48MODULE_PARM_DESC(ioat_ring_max_alloc_order,
49 "ioat2+: upper limit for dynamic ring resizing (default: n=16)");
46 50
47static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat) 51static void __ioat2_issue_pending(struct ioat2_dma_chan *ioat)
48{ 52{
@@ -168,6 +172,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
168 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", 172 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
169 __func__); 173 __func__);
170 clear_bit(IOAT_COMPLETION_PENDING, &chan->state); 174 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
175 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
171 } 176 }
172} 177}
173 178
@@ -253,6 +258,8 @@ static void ioat2_restart_channel(struct ioat2_dma_chan *ioat)
253 __restart_chan(ioat); 258 __restart_chan(ioat);
254} 259}
255 260
261static bool reshape_ring(struct ioat2_dma_chan *ioat, int order);
262
256static void ioat2_timer_event(unsigned long data) 263static void ioat2_timer_event(unsigned long data)
257{ 264{
258 struct ioat2_dma_chan *ioat = (void *) data; 265 struct ioat2_dma_chan *ioat = (void *) data;
@@ -289,6 +296,23 @@ static void ioat2_timer_event(unsigned long data)
289 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT); 296 mod_timer(&chan->timer, jiffies + COMPLETION_TIMEOUT);
290 } 297 }
291 spin_unlock_bh(&ioat->ring_lock); 298 spin_unlock_bh(&ioat->ring_lock);
299 } else {
300 u16 active;
301
302 /* if the ring is idle, empty, and oversized try to step
303 * down the size
304 */
305 spin_lock_bh(&ioat->ring_lock);
306 active = ioat2_ring_active(ioat);
307 if (active == 0 && ioat->alloc_order > ioat_get_alloc_order())
308 reshape_ring(ioat, ioat->alloc_order-1);
309 spin_unlock_bh(&ioat->ring_lock);
310
311 /* keep shrinking until we get back to our minimum
312 * default size
313 */
314 if (ioat->alloc_order > ioat_get_alloc_order())
315 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
292 } 316 }
293 spin_unlock_bh(&chan->cleanup_lock); 317 spin_unlock_bh(&chan->cleanup_lock);
294} 318}
@@ -362,7 +386,7 @@ static dma_cookie_t ioat2_tx_submit_unlock(struct dma_async_tx_descriptor *tx)
362 return cookie; 386 return cookie;
363} 387}
364 388
365static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan) 389static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan, gfp_t flags)
366{ 390{
367 struct ioat_dma_descriptor *hw; 391 struct ioat_dma_descriptor *hw;
368 struct ioat_ring_ent *desc; 392 struct ioat_ring_ent *desc;
@@ -370,12 +394,12 @@ static struct ioat_ring_ent *ioat2_alloc_ring_ent(struct dma_chan *chan)
370 dma_addr_t phys; 394 dma_addr_t phys;
371 395
372 dma = to_ioatdma_device(chan->device); 396 dma = to_ioatdma_device(chan->device);
373 hw = pci_pool_alloc(dma->dma_pool, GFP_KERNEL, &phys); 397 hw = pci_pool_alloc(dma->dma_pool, flags, &phys);
374 if (!hw) 398 if (!hw)
375 return NULL; 399 return NULL;
376 memset(hw, 0, sizeof(*hw)); 400 memset(hw, 0, sizeof(*hw));
377 401
378 desc = kzalloc(sizeof(*desc), GFP_KERNEL); 402 desc = kzalloc(sizeof(*desc), flags);
379 if (!desc) { 403 if (!desc) {
380 pci_pool_free(dma->dma_pool, hw, phys); 404 pci_pool_free(dma->dma_pool, hw, phys);
381 return NULL; 405 return NULL;
@@ -397,6 +421,42 @@ static void ioat2_free_ring_ent(struct ioat_ring_ent *desc, struct dma_chan *cha
397 kfree(desc); 421 kfree(desc);
398} 422}
399 423
424static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
425{
426 struct ioat_ring_ent **ring;
427 int descs = 1 << order;
428 int i;
429
430 if (order > ioat_get_max_alloc_order())
431 return NULL;
432
433 /* allocate the array to hold the software ring */
434 ring = kcalloc(descs, sizeof(*ring), flags);
435 if (!ring)
436 return NULL;
437 for (i = 0; i < descs; i++) {
438 ring[i] = ioat2_alloc_ring_ent(c, flags);
439 if (!ring[i]) {
440 while (i--)
441 ioat2_free_ring_ent(ring[i], c);
442 kfree(ring);
443 return NULL;
444 }
445 set_desc_id(ring[i], i);
446 }
447
448 /* link descs */
449 for (i = 0; i < descs-1; i++) {
450 struct ioat_ring_ent *next = ring[i+1];
451 struct ioat_dma_descriptor *hw = ring[i]->hw;
452
453 hw->next = next->txd.phys;
454 }
455 ring[i]->hw->next = ring[0]->txd.phys;
456
457 return ring;
458}
459
400/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring 460/* ioat2_alloc_chan_resources - allocate/initialize ioat2 descriptor ring
401 * @chan: channel to be initialized 461 * @chan: channel to be initialized
402 */ 462 */
@@ -406,8 +466,7 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c)
406 struct ioat_chan_common *chan = &ioat->base; 466 struct ioat_chan_common *chan = &ioat->base;
407 struct ioat_ring_ent **ring; 467 struct ioat_ring_ent **ring;
408 u32 chanerr; 468 u32 chanerr;
409 int descs; 469 int order;
410 int i;
411 470
412 /* have we already been set up? */ 471 /* have we already been set up? */
413 if (ioat->ring) 472 if (ioat->ring)
@@ -435,32 +494,10 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c)
435 writel(((u64) chan->completion_dma) >> 32, 494 writel(((u64) chan->completion_dma) >> 32,
436 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); 495 chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
437 496
438 ioat->alloc_order = ioat_get_alloc_order(); 497 order = ioat_get_alloc_order();
439 descs = 1 << ioat->alloc_order; 498 ring = ioat2_alloc_ring(c, order, GFP_KERNEL);
440
441 /* allocate the array to hold the software ring */
442 ring = kcalloc(descs, sizeof(*ring), GFP_KERNEL);
443 if (!ring) 499 if (!ring)
444 return -ENOMEM; 500 return -ENOMEM;
445 for (i = 0; i < descs; i++) {
446 ring[i] = ioat2_alloc_ring_ent(c);
447 if (!ring[i]) {
448 while (i--)
449 ioat2_free_ring_ent(ring[i], c);
450 kfree(ring);
451 return -ENOMEM;
452 }
453 set_desc_id(ring[i], i);
454 }
455
456 /* link descs */
457 for (i = 0; i < descs-1; i++) {
458 struct ioat_ring_ent *next = ring[i+1];
459 struct ioat_dma_descriptor *hw = ring[i]->hw;
460
461 hw->next = next->txd.phys;
462 }
463 ring[i]->hw->next = ring[0]->txd.phys;
464 501
465 spin_lock_bh(&ioat->ring_lock); 502 spin_lock_bh(&ioat->ring_lock);
466 ioat->ring = ring; 503 ioat->ring = ring;
@@ -468,12 +505,120 @@ static int ioat2_alloc_chan_resources(struct dma_chan *c)
468 ioat->issued = 0; 505 ioat->issued = 0;
469 ioat->tail = 0; 506 ioat->tail = 0;
470 ioat->pending = 0; 507 ioat->pending = 0;
508 ioat->alloc_order = order;
471 spin_unlock_bh(&ioat->ring_lock); 509 spin_unlock_bh(&ioat->ring_lock);
472 510
473 tasklet_enable(&chan->cleanup_task); 511 tasklet_enable(&chan->cleanup_task);
474 ioat2_start_null_desc(ioat); 512 ioat2_start_null_desc(ioat);
475 513
476 return descs; 514 return 1 << ioat->alloc_order;
515}
516
517static bool reshape_ring(struct ioat2_dma_chan *ioat, int order)
518{
519 /* reshape differs from normal ring allocation in that we want
520 * to allocate a new software ring while only
521 * extending/truncating the hardware ring
522 */
523 struct ioat_chan_common *chan = &ioat->base;
524 struct dma_chan *c = &chan->common;
525 const u16 curr_size = ioat2_ring_mask(ioat) + 1;
526 const u16 active = ioat2_ring_active(ioat);
527 const u16 new_size = 1 << order;
528 struct ioat_ring_ent **ring;
529 u16 i;
530
531 if (order > ioat_get_max_alloc_order())
532 return false;
533
534 /* double check that we have at least 1 free descriptor */
535 if (active == curr_size)
536 return false;
537
538 /* when shrinking, verify that we can hold the current active
539 * set in the new ring
540 */
541 if (active >= new_size)
542 return false;
543
544 /* allocate the array to hold the software ring */
545 ring = kcalloc(new_size, sizeof(*ring), GFP_NOWAIT);
546 if (!ring)
547 return false;
548
549 /* allocate/trim descriptors as needed */
550 if (new_size > curr_size) {
551 /* copy current descriptors to the new ring */
552 for (i = 0; i < curr_size; i++) {
553 u16 curr_idx = (ioat->tail+i) & (curr_size-1);
554 u16 new_idx = (ioat->tail+i) & (new_size-1);
555
556 ring[new_idx] = ioat->ring[curr_idx];
557 set_desc_id(ring[new_idx], new_idx);
558 }
559
560 /* add new descriptors to the ring */
561 for (i = curr_size; i < new_size; i++) {
562 u16 new_idx = (ioat->tail+i) & (new_size-1);
563
564 ring[new_idx] = ioat2_alloc_ring_ent(c, GFP_NOWAIT);
565 if (!ring[new_idx]) {
566 while (i--) {
567 u16 new_idx = (ioat->tail+i) & (new_size-1);
568
569 ioat2_free_ring_ent(ring[new_idx], c);
570 }
571 kfree(ring);
572 return false;
573 }
574 set_desc_id(ring[new_idx], new_idx);
575 }
576
577 /* hw link new descriptors */
578 for (i = curr_size-1; i < new_size; i++) {
579 u16 new_idx = (ioat->tail+i) & (new_size-1);
580 struct ioat_ring_ent *next = ring[(new_idx+1) & (new_size-1)];
581 struct ioat_dma_descriptor *hw = ring[new_idx]->hw;
582
583 hw->next = next->txd.phys;
584 }
585 } else {
586 struct ioat_dma_descriptor *hw;
587 struct ioat_ring_ent *next;
588
589 /* copy current descriptors to the new ring, dropping the
590 * removed descriptors
591 */
592 for (i = 0; i < new_size; i++) {
593 u16 curr_idx = (ioat->tail+i) & (curr_size-1);
594 u16 new_idx = (ioat->tail+i) & (new_size-1);
595
596 ring[new_idx] = ioat->ring[curr_idx];
597 set_desc_id(ring[new_idx], new_idx);
598 }
599
600 /* free deleted descriptors */
601 for (i = new_size; i < curr_size; i++) {
602 struct ioat_ring_ent *ent;
603
604 ent = ioat2_get_ring_ent(ioat, ioat->tail+i);
605 ioat2_free_ring_ent(ent, c);
606 }
607
608 /* fix up hardware ring */
609 hw = ring[(ioat->tail+new_size-1) & (new_size-1)]->hw;
610 next = ring[(ioat->tail+new_size) & (new_size-1)];
611 hw->next = next->txd.phys;
612 }
613
614 dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
615 __func__, new_size);
616
617 kfree(ioat->ring);
618 ioat->ring = ring;
619 ioat->alloc_order = order;
620
621 return true;
477} 622}
478 623
479/** 624/**
@@ -487,7 +632,15 @@ static int ioat2_alloc_and_lock(u16 *idx, struct ioat2_dma_chan *ioat, int num_d
487 struct ioat_chan_common *chan = &ioat->base; 632 struct ioat_chan_common *chan = &ioat->base;
488 633
489 spin_lock_bh(&ioat->ring_lock); 634 spin_lock_bh(&ioat->ring_lock);
490 if (unlikely(ioat2_ring_space(ioat) < num_descs)) { 635 /* never allow the last descriptor to be consumed, we need at
636 * least one free at all times to allow for on-the-fly ring
637 * resizing.
638 */
639 while (unlikely(ioat2_ring_space(ioat) <= num_descs)) {
640 if (reshape_ring(ioat, ioat->alloc_order + 1) &&
641 ioat2_ring_space(ioat) > num_descs)
642 break;
643
491 if (printk_ratelimit()) 644 if (printk_ratelimit())
492 dev_dbg(to_dev(chan), 645 dev_dbg(to_dev(chan),
493 "%s: ring full! num_descs: %d (%x:%x:%x)\n", 646 "%s: ring full! num_descs: %d (%x:%x:%x)\n",
diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h
index 73b04a2eb4b..9baa3d6065f 100644
--- a/drivers/dma/ioat/dma_v2.h
+++ b/drivers/dma/ioat/dma_v2.h
@@ -37,6 +37,8 @@ extern int ioat_pending_level;
37#define IOAT_MAX_ORDER 16 37#define IOAT_MAX_ORDER 16
38#define ioat_get_alloc_order() \ 38#define ioat_get_alloc_order() \
39 (min(ioat_ring_alloc_order, IOAT_MAX_ORDER)) 39 (min(ioat_ring_alloc_order, IOAT_MAX_ORDER))
40#define ioat_get_max_alloc_order() \
41 (min(ioat_ring_max_alloc_order, IOAT_MAX_ORDER))
40 42
41/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes 43/* struct ioat2_dma_chan - ioat v2 / v3 channel attributes
42 * @base: common ioat channel parameters 44 * @base: common ioat channel parameters