aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2008-04-06 18:56:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-04-06 19:10:08 -0400
commit164fc5dcd6a1026fc713f5c63fad899aa484888c (patch)
tree16c906e4420a6501b86fc0eeacf9cdae5cb1cf79
parent797de7bdb253624c16144f40b72ec65d63cdcca2 (diff)
scsi: fix sense_slab/bio swapping livelock
Since 2.6.25-rc7, I've been seeing an occasional livelock on one x86_64 machine, copying kernel trees to tmpfs, paging out to swap. Signature: 6000 pages under writeback but never getting written; most tasks of interest trying to reclaim, but each get_swap_bio waiting for a bio in mempool_alloc's io_schedule_timeout(5*HZ); every five seconds an atomic page allocation failure report from kblockd failing to allocate a sense_buffer in __scsi_get_command. __scsi_get_command has a (one item) free_list to protect against this, but rc1's [SCSI] use dynamically allocated sense buffer de25deb18016f66dcdede165d07654559bb332bc upset that slightly. When it fails to allocate from the separate sense_slab, instead of giving up, it must fall back to the command free_list, which is sure to have a sense_buffer attached. Either my earlier -rc testing missed this, or there's some recent contributory factor. One very significant factor is SLUB, which merges slab caches when it can, and on 64-bit happens to merge both bio cache and sense_slab cache into kmalloc's 128-byte cache: so that under this swapping load, bios above are liable to gobble up all the slots needed for scsi_cmnd sense_buffers below. That's disturbing behaviour, and I tried a few things to fix it. Adding a no-op constructor to the sense_slab inhibits SLUB from merging it, and stops all the allocation failures I was seeing; but it's rather a hack, and perhaps in different configurations we have other caches on the swapout path which are ill-merged. Another alternative is to revert the separate sense_slab, using cache-line-aligned sense_buffer allocated beyond scsi_cmnd from the one kmem_cache; but that might waste more memory, and is only a way of diverting around the known problem. While I don't like seeing the allocation failures, and hate the idea of all those bios piled up above a scsi host working one by one, it does seem to emerge fairly soon with the livelock fix. So lacking better ideas, stick with that one clear fix for now. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: James Bottomley <James.Bottomley@HansenPartnership.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp> Cc: Jens Axboe <jens.axboe@oracle.com> Cc: Christoph Lameter <clameter@sgi.com> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Peter Zijlstra <a.p.ziljstra@chello.nl> Cc: Rafael J. Wysocki <rjw@sisk.pl> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--drivers/scsi/scsi.c22
1 files changed, 12 insertions, 10 deletions
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index e5c6f6af8765..c78b836f59dd 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -181,6 +181,18 @@ struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask)
181 cmd = kmem_cache_alloc(shost->cmd_pool->cmd_slab, 181 cmd = kmem_cache_alloc(shost->cmd_pool->cmd_slab,
182 gfp_mask | shost->cmd_pool->gfp_mask); 182 gfp_mask | shost->cmd_pool->gfp_mask);
183 183
184 if (likely(cmd)) {
185 buf = kmem_cache_alloc(shost->cmd_pool->sense_slab,
186 gfp_mask | shost->cmd_pool->gfp_mask);
187 if (likely(buf)) {
188 memset(cmd, 0, sizeof(*cmd));
189 cmd->sense_buffer = buf;
190 } else {
191 kmem_cache_free(shost->cmd_pool->cmd_slab, cmd);
192 cmd = NULL;
193 }
194 }
195
184 if (unlikely(!cmd)) { 196 if (unlikely(!cmd)) {
185 unsigned long flags; 197 unsigned long flags;
186 198
@@ -197,16 +209,6 @@ struct scsi_cmnd *__scsi_get_command(struct Scsi_Host *shost, gfp_t gfp_mask)
197 memset(cmd, 0, sizeof(*cmd)); 209 memset(cmd, 0, sizeof(*cmd));
198 cmd->sense_buffer = buf; 210 cmd->sense_buffer = buf;
199 } 211 }
200 } else {
201 buf = kmem_cache_alloc(shost->cmd_pool->sense_slab,
202 gfp_mask | shost->cmd_pool->gfp_mask);
203 if (likely(buf)) {
204 memset(cmd, 0, sizeof(*cmd));
205 cmd->sense_buffer = buf;
206 } else {
207 kmem_cache_free(shost->cmd_pool->cmd_slab, cmd);
208 cmd = NULL;
209 }
210 } 212 }
211 213
212 return cmd; 214 return cmd;