aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2010-03-03 23:21:13 -0500
committerDan Williams <dan.j.williams@intel.com>2010-03-03 23:21:13 -0500
commitb9cc98697d1ca35a86bbb708acc6d93993c28f0f (patch)
tree22bace58c86068483209b8b2d9a2b6238eb0c179
parentaa75db0080603bae27961c0502812dfd0f522bb3 (diff)
ioat3: interrupt coalescing
The hardware automatically disables further interrupts after each event until rearmed. This allows a delay to be injected between the occurence of the interrupt and the running of the cleanup routine. The delay is scaled by the descriptor backlog and then written to the INTRDELAY register which specifies the number of microseconds to hold off interrupt delivery after an interrupt event occurs. According to powertop this reduces the interrupt rate from ~5000 intr/s to ~150 intr/s per without affecting throughput (simple dd to a raid6 array). Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/dma/ioat/dma_v3.c37
-rw-r--r--drivers/dma/ioat/registers.h2
2 files changed, 34 insertions, 5 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 39520f2f7da9..9988f1340186 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -295,15 +295,23 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete)
295 ioat->tail += i; 295 ioat->tail += i;
296 BUG_ON(active && !seen_current); /* no active descs have written a completion? */ 296 BUG_ON(active && !seen_current); /* no active descs have written a completion? */
297 chan->last_completion = phys_complete; 297 chan->last_completion = phys_complete;
298 if (ioat->head == ioat->tail) { 298
299 active = ioat2_ring_active(ioat);
300 if (active == 0) {
299 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", 301 dev_dbg(to_dev(chan), "%s: cancel completion timeout\n",
300 __func__); 302 __func__);
301 clear_bit(IOAT_COMPLETION_PENDING, &chan->state); 303 clear_bit(IOAT_COMPLETION_PENDING, &chan->state);
302 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); 304 mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT);
303 } 305 }
306 /* 5 microsecond delay per pending descriptor */
307 writew(min((5 * active), IOAT_INTRDELAY_MASK),
308 chan->device->reg_base + IOAT_INTRDELAY_OFFSET);
304} 309}
305 310
306static void ioat3_cleanup(struct ioat2_dma_chan *ioat) 311/* try to cleanup, but yield (via spin_trylock) to incoming submissions
312 * with the expectation that we will immediately poll again shortly
313 */
314static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat)
307{ 315{
308 struct ioat_chan_common *chan = &ioat->base; 316 struct ioat_chan_common *chan = &ioat->base;
309 unsigned long phys_complete; 317 unsigned long phys_complete;
@@ -329,11 +337,32 @@ static void ioat3_cleanup(struct ioat2_dma_chan *ioat)
329 spin_unlock_bh(&chan->cleanup_lock); 337 spin_unlock_bh(&chan->cleanup_lock);
330} 338}
331 339
340/* run cleanup now because we already delayed the interrupt via INTRDELAY */
341static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat)
342{
343 struct ioat_chan_common *chan = &ioat->base;
344 unsigned long phys_complete;
345
346 prefetch(chan->completion);
347
348 spin_lock_bh(&chan->cleanup_lock);
349 if (!ioat_cleanup_preamble(chan, &phys_complete)) {
350 spin_unlock_bh(&chan->cleanup_lock);
351 return;
352 }
353 spin_lock_bh(&ioat->ring_lock);
354
355 __cleanup(ioat, phys_complete);
356
357 spin_unlock_bh(&ioat->ring_lock);
358 spin_unlock_bh(&chan->cleanup_lock);
359}
360
332static void ioat3_cleanup_tasklet(unsigned long data) 361static void ioat3_cleanup_tasklet(unsigned long data)
333{ 362{
334 struct ioat2_dma_chan *ioat = (void *) data; 363 struct ioat2_dma_chan *ioat = (void *) data;
335 364
336 ioat3_cleanup(ioat); 365 ioat3_cleanup_sync(ioat);
337 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); 366 writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET);
338} 367}
339 368
@@ -417,7 +446,7 @@ ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie,
417 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) 446 if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
418 return DMA_SUCCESS; 447 return DMA_SUCCESS;
419 448
420 ioat3_cleanup(ioat); 449 ioat3_cleanup_poll(ioat);
421 450
422 return ioat_is_complete(c, cookie, done, used); 451 return ioat_is_complete(c, cookie, done, used);
423} 452}
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index e8ae63baf588..1391798542b6 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -60,7 +60,7 @@
60#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ 60#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
61 61
62#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ 62#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
63#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */ 63#define IOAT_INTRDELAY_MASK 0x3FFF /* Interrupt Delay Time */
64#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ 64#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
65 65
66#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ 66#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */