aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2018-12-12 08:46:55 -0500
committerMartin K. Petersen <martin.petersen@oracle.com>2018-12-12 20:11:54 -0500
commit61cce6f6eeced5ddd9cac55e807fe28b4f18c1ba (patch)
tree972d163b33f885328b14c9a84e53d8895fcbddb4 /drivers
parent9e6371d3c6913ff1707fb2c0274c9925f7aaef80 (diff)
scsi: sd: use mempool for discard special page
When boxes are run near (or to) OOM, we have a problem with the discard page allocation in sd. If we fail allocating the special page, we return busy, and it'll get retried. But since ordering is honored for dispatch requests, we can keep retrying this same IO and failing. Behind that IO could be requests that want to free memory, but they never get the chance. This means you get repeated spews of traces like this: [1201401.625972] Call Trace: [1201401.631748] dump_stack+0x4d/0x65 [1201401.639445] warn_alloc+0xec/0x190 [1201401.647335] __alloc_pages_slowpath+0xe84/0xf30 [1201401.657722] ? get_page_from_freelist+0x11b/0xb10 [1201401.668475] ? __alloc_pages_slowpath+0x2e/0xf30 [1201401.679054] __alloc_pages_nodemask+0x1f9/0x210 [1201401.689424] alloc_pages_current+0x8c/0x110 [1201401.699025] sd_setup_write_same16_cmnd+0x51/0x150 [1201401.709987] sd_init_command+0x49c/0xb70 [1201401.719029] scsi_setup_cmnd+0x9c/0x160 [1201401.727877] scsi_queue_rq+0x4d9/0x610 [1201401.736535] blk_mq_dispatch_rq_list+0x19a/0x360 [1201401.747113] blk_mq_sched_dispatch_requests+0xff/0x190 [1201401.758844] __blk_mq_run_hw_queue+0x95/0xa0 [1201401.768653] blk_mq_run_work_fn+0x2c/0x30 [1201401.777886] process_one_work+0x14b/0x400 [1201401.787119] worker_thread+0x4b/0x470 [1201401.795586] kthread+0x110/0x150 [1201401.803089] ? rescuer_thread+0x320/0x320 [1201401.812322] ? kthread_park+0x90/0x90 [1201401.820787] ? do_syscall_64+0x53/0x150 [1201401.829635] ret_from_fork+0x29/0x40 Ensure that the discard page allocation has a mempool backing, so we know we can make progress. Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe <axboe@kernel.dk> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/scsi/sd.c23
1 files changed, 19 insertions, 4 deletions
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3bb2b3351e35..bd0a5c694a97 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -133,6 +133,7 @@ static DEFINE_MUTEX(sd_ref_mutex);
133 133
134static struct kmem_cache *sd_cdb_cache; 134static struct kmem_cache *sd_cdb_cache;
135static mempool_t *sd_cdb_pool; 135static mempool_t *sd_cdb_pool;
136static mempool_t *sd_page_pool;
136 137
137static const char *sd_cache_types[] = { 138static const char *sd_cache_types[] = {
138 "write through", "none", "write back", 139 "write through", "none", "write back",
@@ -759,9 +760,10 @@ static int sd_setup_unmap_cmnd(struct scsi_cmnd *cmd)
759 unsigned int data_len = 24; 760 unsigned int data_len = 24;
760 char *buf; 761 char *buf;
761 762
762 rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); 763 rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
763 if (!rq->special_vec.bv_page) 764 if (!rq->special_vec.bv_page)
764 return BLKPREP_DEFER; 765 return BLKPREP_DEFER;
766 clear_highpage(rq->special_vec.bv_page);
765 rq->special_vec.bv_offset = 0; 767 rq->special_vec.bv_offset = 0;
766 rq->special_vec.bv_len = data_len; 768 rq->special_vec.bv_len = data_len;
767 rq->rq_flags |= RQF_SPECIAL_PAYLOAD; 769 rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
@@ -792,9 +794,10 @@ static int sd_setup_write_same16_cmnd(struct scsi_cmnd *cmd, bool unmap)
792 u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); 794 u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
793 u32 data_len = sdp->sector_size; 795 u32 data_len = sdp->sector_size;
794 796
795 rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); 797 rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
796 if (!rq->special_vec.bv_page) 798 if (!rq->special_vec.bv_page)
797 return BLKPREP_DEFER; 799 return BLKPREP_DEFER;
800 clear_highpage(rq->special_vec.bv_page);
798 rq->special_vec.bv_offset = 0; 801 rq->special_vec.bv_offset = 0;
799 rq->special_vec.bv_len = data_len; 802 rq->special_vec.bv_len = data_len;
800 rq->rq_flags |= RQF_SPECIAL_PAYLOAD; 803 rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
@@ -822,9 +825,10 @@ static int sd_setup_write_same10_cmnd(struct scsi_cmnd *cmd, bool unmap)
822 u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9); 825 u32 nr_sectors = blk_rq_sectors(rq) >> (ilog2(sdp->sector_size) - 9);
823 u32 data_len = sdp->sector_size; 826 u32 data_len = sdp->sector_size;
824 827
825 rq->special_vec.bv_page = alloc_page(GFP_ATOMIC | __GFP_ZERO); 828 rq->special_vec.bv_page = mempool_alloc(sd_page_pool, GFP_ATOMIC);
826 if (!rq->special_vec.bv_page) 829 if (!rq->special_vec.bv_page)
827 return BLKPREP_DEFER; 830 return BLKPREP_DEFER;
831 clear_highpage(rq->special_vec.bv_page);
828 rq->special_vec.bv_offset = 0; 832 rq->special_vec.bv_offset = 0;
829 rq->special_vec.bv_len = data_len; 833 rq->special_vec.bv_len = data_len;
830 rq->rq_flags |= RQF_SPECIAL_PAYLOAD; 834 rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
@@ -1286,7 +1290,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
1286 u8 *cmnd; 1290 u8 *cmnd;
1287 1291
1288 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) 1292 if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
1289 __free_page(rq->special_vec.bv_page); 1293 mempool_free(rq->special_vec.bv_page, sd_page_pool);
1290 1294
1291 if (SCpnt->cmnd != scsi_req(rq)->cmd) { 1295 if (SCpnt->cmnd != scsi_req(rq)->cmd) {
1292 cmnd = SCpnt->cmnd; 1296 cmnd = SCpnt->cmnd;
@@ -3623,6 +3627,13 @@ static int __init init_sd(void)
3623 goto err_out_cache; 3627 goto err_out_cache;
3624 } 3628 }
3625 3629
3630 sd_page_pool = mempool_create_page_pool(SD_MEMPOOL_SIZE, 0);
3631 if (!sd_page_pool) {
3632 printk(KERN_ERR "sd: can't init discard page pool\n");
3633 err = -ENOMEM;
3634 goto err_out_ppool;
3635 }
3636
3626 err = scsi_register_driver(&sd_template.gendrv); 3637 err = scsi_register_driver(&sd_template.gendrv);
3627 if (err) 3638 if (err)
3628 goto err_out_driver; 3639 goto err_out_driver;
@@ -3630,6 +3641,9 @@ static int __init init_sd(void)
3630 return 0; 3641 return 0;
3631 3642
3632err_out_driver: 3643err_out_driver:
3644 mempool_destroy(sd_page_pool);
3645
3646err_out_ppool:
3633 mempool_destroy(sd_cdb_pool); 3647 mempool_destroy(sd_cdb_pool);
3634 3648
3635err_out_cache: 3649err_out_cache:
@@ -3656,6 +3670,7 @@ static void __exit exit_sd(void)
3656 3670
3657 scsi_unregister_driver(&sd_template.gendrv); 3671 scsi_unregister_driver(&sd_template.gendrv);
3658 mempool_destroy(sd_cdb_pool); 3672 mempool_destroy(sd_cdb_pool);
3673 mempool_destroy(sd_page_pool);
3659 kmem_cache_destroy(sd_cdb_cache); 3674 kmem_cache_destroy(sd_cdb_cache);
3660 3675
3661 class_unregister(&sd_disk_class); 3676 class_unregister(&sd_disk_class);