diff options
author | Dan Williams <dan.j.williams@intel.com> | 2010-03-03 23:21:13 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2010-03-03 23:21:13 -0500 |
commit | b9cc98697d1ca35a86bbb708acc6d93993c28f0f (patch) | |
tree | 22bace58c86068483209b8b2d9a2b6238eb0c179 /drivers | |
parent | aa75db0080603bae27961c0502812dfd0f522bb3 (diff) |
ioat3: interrupt coalescing
The hardware automatically disables further interrupts after each event
until rearmed. This allows a delay to be injected between the occurence
of the interrupt and the running of the cleanup routine. The delay is
scaled by the descriptor backlog and then written to the INTRDELAY
register which specifies the number of microseconds to hold off
interrupt delivery after an interrupt event occurs. According to
powertop this reduces the interrupt rate from ~5000 intr/s to ~150
intr/s per without affecting throughput (simple dd to a raid6 array).
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/dma/ioat/dma_v3.c | 37 | ||||
-rw-r--r-- | drivers/dma/ioat/registers.h | 2 |
2 files changed, 34 insertions, 5 deletions
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 39520f2f7da9..9988f1340186 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c | |||
@@ -295,15 +295,23 @@ static void __cleanup(struct ioat2_dma_chan *ioat, unsigned long phys_complete) | |||
295 | ioat->tail += i; | 295 | ioat->tail += i; |
296 | BUG_ON(active && !seen_current); /* no active descs have written a completion? */ | 296 | BUG_ON(active && !seen_current); /* no active descs have written a completion? */ |
297 | chan->last_completion = phys_complete; | 297 | chan->last_completion = phys_complete; |
298 | if (ioat->head == ioat->tail) { | 298 | |
299 | active = ioat2_ring_active(ioat); | ||
300 | if (active == 0) { | ||
299 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", | 301 | dev_dbg(to_dev(chan), "%s: cancel completion timeout\n", |
300 | __func__); | 302 | __func__); |
301 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); | 303 | clear_bit(IOAT_COMPLETION_PENDING, &chan->state); |
302 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); | 304 | mod_timer(&chan->timer, jiffies + IDLE_TIMEOUT); |
303 | } | 305 | } |
306 | /* 5 microsecond delay per pending descriptor */ | ||
307 | writew(min((5 * active), IOAT_INTRDELAY_MASK), | ||
308 | chan->device->reg_base + IOAT_INTRDELAY_OFFSET); | ||
304 | } | 309 | } |
305 | 310 | ||
306 | static void ioat3_cleanup(struct ioat2_dma_chan *ioat) | 311 | /* try to cleanup, but yield (via spin_trylock) to incoming submissions |
312 | * with the expectation that we will immediately poll again shortly | ||
313 | */ | ||
314 | static void ioat3_cleanup_poll(struct ioat2_dma_chan *ioat) | ||
307 | { | 315 | { |
308 | struct ioat_chan_common *chan = &ioat->base; | 316 | struct ioat_chan_common *chan = &ioat->base; |
309 | unsigned long phys_complete; | 317 | unsigned long phys_complete; |
@@ -329,11 +337,32 @@ static void ioat3_cleanup(struct ioat2_dma_chan *ioat) | |||
329 | spin_unlock_bh(&chan->cleanup_lock); | 337 | spin_unlock_bh(&chan->cleanup_lock); |
330 | } | 338 | } |
331 | 339 | ||
340 | /* run cleanup now because we already delayed the interrupt via INTRDELAY */ | ||
341 | static void ioat3_cleanup_sync(struct ioat2_dma_chan *ioat) | ||
342 | { | ||
343 | struct ioat_chan_common *chan = &ioat->base; | ||
344 | unsigned long phys_complete; | ||
345 | |||
346 | prefetch(chan->completion); | ||
347 | |||
348 | spin_lock_bh(&chan->cleanup_lock); | ||
349 | if (!ioat_cleanup_preamble(chan, &phys_complete)) { | ||
350 | spin_unlock_bh(&chan->cleanup_lock); | ||
351 | return; | ||
352 | } | ||
353 | spin_lock_bh(&ioat->ring_lock); | ||
354 | |||
355 | __cleanup(ioat, phys_complete); | ||
356 | |||
357 | spin_unlock_bh(&ioat->ring_lock); | ||
358 | spin_unlock_bh(&chan->cleanup_lock); | ||
359 | } | ||
360 | |||
332 | static void ioat3_cleanup_tasklet(unsigned long data) | 361 | static void ioat3_cleanup_tasklet(unsigned long data) |
333 | { | 362 | { |
334 | struct ioat2_dma_chan *ioat = (void *) data; | 363 | struct ioat2_dma_chan *ioat = (void *) data; |
335 | 364 | ||
336 | ioat3_cleanup(ioat); | 365 | ioat3_cleanup_sync(ioat); |
337 | writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); | 366 | writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); |
338 | } | 367 | } |
339 | 368 | ||
@@ -417,7 +446,7 @@ ioat3_is_complete(struct dma_chan *c, dma_cookie_t cookie, | |||
417 | if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) | 446 | if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS) |
418 | return DMA_SUCCESS; | 447 | return DMA_SUCCESS; |
419 | 448 | ||
420 | ioat3_cleanup(ioat); | 449 | ioat3_cleanup_poll(ioat); |
421 | 450 | ||
422 | return ioat_is_complete(c, cookie, done, used); | 451 | return ioat_is_complete(c, cookie, done, used); |
423 | } | 452 | } |
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index e8ae63baf588..1391798542b6 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h | |||
@@ -60,7 +60,7 @@ | |||
60 | #define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ | 60 | #define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ |
61 | 61 | ||
62 | #define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ | 62 | #define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ |
63 | #define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */ | 63 | #define IOAT_INTRDELAY_MASK 0x3FFF /* Interrupt Delay Time */ |
64 | #define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ | 64 | #define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ |
65 | 65 | ||
66 | #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ | 66 | #define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ |