diff options
author | Hugh Dickins <hugh@veritas.com> | 2008-04-06 18:56:57 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-04-06 19:10:08 -0400 |
commit | 164fc5dcd6a1026fc713f5c63fad899aa484888c (patch) | |
tree | 16c906e4420a6501b86fc0eeacf9cdae5cb1cf79 | |
parent | 797de7bdb253624c16144f40b72ec65d63cdcca2 (diff) |
scsi: fix sense_slab/bio swapping livelock
Since 2.6.25-rc7, I've been seeing an occasional livelock on one x86_64
machine, copying kernel trees to tmpfs, paging out to swap.
Signature: 6000 pages under writeback but never getting written; most
tasks of interest trying to reclaim, but each get_swap_bio waiting for a
bio in mempool_alloc's io_schedule_timeout(5*HZ); every five seconds an
atomic page allocation failure report from kblockd failing to allocate a
sense_buffer in __scsi_get_command.
__scsi_get_command has a (one item) free_list to protect against this,
but rc1's [SCSI] use dynamically allocated sense buffer
de25deb18016f66dcdede165d07654559bb332bc upset that slightly. When it
fails to allocate from the separate sense_slab, instead of giving up, it
must fall back to the command free_list, which is sure to have a
sense_buffer attached.
Either my earlier -rc testing missed this, or there's some recent
contributory factor. One very significant factor is SLUB, which merges
slab caches when it can, and on 64-bit happens to merge both bio cache
and sense_slab cache into kmalloc's 128-byte cache: so that under this
swapping load, bios above are liable to gobble up all the slots needed
for scsi_cmnd sense_buffers below.
That's disturbing behaviour, and I tried a few things to fix it. Adding
a no-op constructor to the sense_slab inhibits SLUB from merging it, and
stops all the allocation failures I was seeing; but it's rather a hack,
and perhaps in different configurations we have other caches on the
swapout path which are ill-merged.
Another alternative is to revert the separate sense_slab, using
cache-line-aligned sense_buffer allocated beyond scsi_cmnd from the one
kmem_cache; but that might waste more memory, and is only a way of
diverting around the known problem.
While I don't like seeing the allocation failures, and hate the idea of
all those bios piled up above a scsi host working one by one, it does
seem to emerge fairly soon with the livelock fix. So lacking better
ideas, stick with that one clear fix for now.
Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Jens Axboe <jens.axboe@oracle.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Peter Zijlstra <a.p.ziljstra@chello.nl>
Cc: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | drivers/scsi/scsi.c | 22 |
1 files changed, 12 insertions, 10 deletions
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index e5c6f6af8765..c78b836f59dd 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c | |||
@@ -181,6 +181,18 @@ struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask) | |||
181 | cmd = kmem_cache_alloc(shost->cmd_pool->cmd_slab, | 181 | cmd = kmem_cache_alloc(shost->cmd_pool->cmd_slab, |
182 | gfp_mask | shost->cmd_pool->gfp_mask); | 182 | gfp_mask | shost->cmd_pool->gfp_mask); |
183 | 183 | ||
184 | if (likely(cmd)) { | ||
185 | buf = kmem_cache_alloc(shost->cmd_pool->sense_slab, | ||
186 | gfp_mask | shost->cmd_pool->gfp_mask); | ||
187 | if (likely(buf)) { | ||
188 | memset(cmd, 0, sizeof(*cmd)); | ||
189 | cmd->sense_buffer = buf; | ||
190 | } else { | ||
191 | kmem_cache_free(shost->cmd_pool->cmd_slab, cmd); | ||
192 | cmd = NULL; | ||
193 | } | ||
194 | } | ||
195 | |||
184 | if (unlikely(!cmd)) { | 196 | if (unlikely(!cmd)) { |
185 | unsigned long flags; | 197 | unsigned long flags; |
186 | 198 | ||
@@ -197,16 +209,6 @@ struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask) | |||
197 | memset(cmd, 0, sizeof(*cmd)); | 209 | memset(cmd, 0, sizeof(*cmd)); |
198 | cmd->sense_buffer = buf; | 210 | cmd->sense_buffer = buf; |
199 | } | 211 | } |
200 | } else { | ||
201 | buf = kmem_cache_alloc(shost->cmd_pool->sense_slab, | ||
202 | gfp_mask | shost->cmd_pool->gfp_mask); | ||
203 | if (likely(buf)) { | ||
204 | memset(cmd, 0, sizeof(*cmd)); | ||
205 | cmd->sense_buffer = buf; | ||
206 | } else { | ||
207 | kmem_cache_free(shost->cmd_pool->cmd_slab, cmd); | ||
208 | cmd = NULL; | ||
209 | } | ||
210 | } | 212 | } |
211 | 213 | ||
212 | return cmd; | 214 | return cmd; |