aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamien Le Moal <damien.lemoal@wdc.com>2019-07-01 01:09:17 -0400
committerJens Axboe <axboe@kernel.dk>2019-07-11 22:04:39 -0400
commitb091ac616846a1da75b1f2566b41255ce7f0e0a6 (patch)
tree7a0f9b432f6692dfe2b6b07b494bfe7ad887f46e
parentbd976e52725965ddcceb9abecbcc7ca46863665c (diff)
sd_zbc: Fix report zones buffer allocation
During disk scan and revalidation done with sd_revalidate(), the zones of a zoned disk are checked using the helper function blk_revalidate_disk_zones() if a configuration change is detected (change in the number of zones or zone size). The function blk_revalidate_disk_zones() issues report_zones calls that are very large, that is, to obtain zone information for all zones of the disk with a single command. The size of the report zones command buffer necessary for such large request generally is lower than the disk max_hw_sectors and KMALLOC_MAX_SIZE (4MB) and succeeds on boot (no memory fragmentation), but often fail at run time (e.g. hot-plug event). This causes the disk revalidation to fail and the disk capacity to be changed to 0. This problem can be avoided by using vmalloc() instead of kmalloc() for the buffer allocation. To limit the amount of memory to be allocated, this patch also introduces the arbitrary SD_ZBC_REPORT_MAX_ZONES maximum number of zones to report with a single report zones command. This limit may be lowered further to satisfy the disk max_hw_sectors limit. Finally, to ensure that the vmalloc-ed buffer can always be mapped in a request, the buffer size is further limited to at most queue_max_segments() pages, allowing successful mapping of the buffer even in the worst case scenario where none of the buffer pages are contiguous. Fixes: 515ce6061312 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer allocation") Fixes: e76239a3748c ("block: add a report_zones method") Cc: stable@vger.kernel.org Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/scsi/sd_zbc.c104
1 files changed, 75 insertions, 29 deletions
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index ec3764c8f3f1..db16c19e05c4 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -9,6 +9,8 @@
9 */ 9 */
10 10
11#include <linux/blkdev.h> 11#include <linux/blkdev.h>
12#include <linux/vmalloc.h>
13#include <linux/sched/mm.h>
12 14
13#include <asm/unaligned.h> 15#include <asm/unaligned.h>
14 16
@@ -50,7 +52,7 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf,
50/** 52/**
51 * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. 53 * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
52 * @sdkp: The target disk 54 * @sdkp: The target disk
53 * @buf: Buffer to use for the reply 55 * @buf: vmalloc-ed buffer to use for the reply
54 * @buflen: the buffer size 56 * @buflen: the buffer size
55 * @lba: Start LBA of the report 57 * @lba: Start LBA of the report
56 * @partial: Do partial report 58 * @partial: Do partial report
@@ -79,7 +81,6 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
79 put_unaligned_be32(buflen, &cmd[10]); 81 put_unaligned_be32(buflen, &cmd[10]);
80 if (partial) 82 if (partial)
81 cmd[14] = ZBC_REPORT_ZONE_PARTIAL; 83 cmd[14] = ZBC_REPORT_ZONE_PARTIAL;
82 memset(buf, 0, buflen);
83 84
84 result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE, 85 result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
85 buf, buflen, &sshdr, 86 buf, buflen, &sshdr,
@@ -103,6 +104,53 @@ static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
103 return 0; 104 return 0;
104} 105}
105 106
107/*
108 * Maximum number of zones to get with one report zones command.
109 */
110#define SD_ZBC_REPORT_MAX_ZONES 8192U
111
112/**
113 * Allocate a buffer for report zones reply.
114 * @sdkp: The target disk
115 * @nr_zones: Maximum number of zones to report
116 * @buflen: Size of the buffer allocated
117 *
118 * Try to allocate a reply buffer for the number of requested zones.
119 * The size of the buffer allocated may be smaller than requested to
120 * satify the device constraint (max_hw_sectors, max_segments, etc).
121 *
122 * Return the address of the allocated buffer and update @buflen with
123 * the size of the allocated buffer.
124 */
125static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
126 unsigned int nr_zones, size_t *buflen)
127{
128 struct request_queue *q = sdkp->disk->queue;
129 size_t bufsize;
130 void *buf;
131
132 /*
133 * Report zone buffer size should be at most 64B times the number of
134 * zones requested plus the 64B reply header, but should be at least
135 * SECTOR_SIZE for ATA devices.
136 * Make sure that this size does not exceed the hardware capabilities.
137 * Furthermore, since the report zone command cannot be split, make
138 * sure that the allocated buffer can always be mapped by limiting the
139 * number of pages allocated to the HBA max segments limit.
140 */
141 nr_zones = min(nr_zones, SD_ZBC_REPORT_MAX_ZONES);
142 bufsize = roundup((nr_zones + 1) * 64, 512);
143 bufsize = min_t(size_t, bufsize,
144 queue_max_hw_sectors(q) << SECTOR_SHIFT);
145 bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
146
147 buf = vzalloc(bufsize);
148 if (buf)
149 *buflen = bufsize;
150
151 return buf;
152}
153
106/** 154/**
107 * sd_zbc_report_zones - Disk report zones operation. 155 * sd_zbc_report_zones - Disk report zones operation.
108 * @disk: The target disk 156 * @disk: The target disk
@@ -116,30 +164,23 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
116 struct blk_zone *zones, unsigned int *nr_zones) 164 struct blk_zone *zones, unsigned int *nr_zones)
117{ 165{
118 struct scsi_disk *sdkp = scsi_disk(disk); 166 struct scsi_disk *sdkp = scsi_disk(disk);
119 unsigned int i, buflen, nrz = *nr_zones; 167 unsigned int i, nrz = *nr_zones;
120 unsigned char *buf; 168 unsigned char *buf;
121 size_t offset = 0; 169 size_t buflen = 0, offset = 0;
122 int ret = 0; 170 int ret = 0;
123 171
124 if (!sd_is_zoned(sdkp)) 172 if (!sd_is_zoned(sdkp))
125 /* Not a zoned device */ 173 /* Not a zoned device */
126 return -EOPNOTSUPP; 174 return -EOPNOTSUPP;
127 175
128 /* 176 buf = sd_zbc_alloc_report_buffer(sdkp, nrz, &buflen);
129 * Get a reply buffer for the number of requested zones plus a header,
130 * without exceeding the device maximum command size. For ATA disks,
131 * buffers must be aligned to 512B.
132 */
133 buflen = min(queue_max_hw_sectors(disk->queue) << 9,
134 roundup((nrz + 1) * 64, 512));
135 buf = kmalloc(buflen, GFP_KERNEL);
136 if (!buf) 177 if (!buf)
137 return -ENOMEM; 178 return -ENOMEM;
138 179
139 ret = sd_zbc_do_report_zones(sdkp, buf, buflen, 180 ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
140 sectors_to_logical(sdkp->device, sector), true); 181 sectors_to_logical(sdkp->device, sector), true);
141 if (ret) 182 if (ret)
142 goto out_free_buf; 183 goto out;
143 184
144 nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64); 185 nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64);
145 for (i = 0; i < nrz; i++) { 186 for (i = 0; i < nrz; i++) {
@@ -150,8 +191,8 @@ int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
150 191
151 *nr_zones = nrz; 192 *nr_zones = nrz;
152 193
153out_free_buf: 194out:
154 kfree(buf); 195 kvfree(buf);
155 196
156 return ret; 197 return ret;
157} 198}
@@ -285,8 +326,6 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
285 return 0; 326 return 0;
286} 327}
287 328
288#define SD_ZBC_BUF_SIZE 131072U
289
290/** 329/**
291 * sd_zbc_check_zones - Check the device capacity and zone sizes 330 * sd_zbc_check_zones - Check the device capacity and zone sizes
292 * @sdkp: Target disk 331 * @sdkp: Target disk
@@ -302,22 +341,28 @@ static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
302 */ 341 */
303static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) 342static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
304{ 343{
344 size_t bufsize, buflen;
345 unsigned int noio_flag;
305 u64 zone_blocks = 0; 346 u64 zone_blocks = 0;
306 sector_t max_lba, block = 0; 347 sector_t max_lba, block = 0;
307 unsigned char *buf; 348 unsigned char *buf;
308 unsigned char *rec; 349 unsigned char *rec;
309 unsigned int buf_len;
310 unsigned int list_length;
311 int ret; 350 int ret;
312 u8 same; 351 u8 same;
313 352
353 /* Do all memory allocations as if GFP_NOIO was specified */
354 noio_flag = memalloc_noio_save();
355
314 /* Get a buffer */ 356 /* Get a buffer */
315 buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL); 357 buf = sd_zbc_alloc_report_buffer(sdkp, SD_ZBC_REPORT_MAX_ZONES,
316 if (!buf) 358 &bufsize);
317 return -ENOMEM; 359 if (!buf) {
360 ret = -ENOMEM;
361 goto out;
362 }
318 363
319 /* Do a report zone to get max_lba and the same field */ 364 /* Do a report zone to get max_lba and the same field */
320 ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false); 365 ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, 0, false);
321 if (ret) 366 if (ret)
322 goto out_free; 367 goto out_free;
323 368
@@ -353,12 +398,12 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
353 do { 398 do {
354 399
355 /* Parse REPORT ZONES header */ 400 /* Parse REPORT ZONES header */
356 list_length = get_unaligned_be32(&buf[0]) + 64; 401 buflen = min_t(size_t, get_unaligned_be32(&buf[0]) + 64,
402 bufsize);
357 rec = buf + 64; 403 rec = buf + 64;
358 buf_len = min(list_length, SD_ZBC_BUF_SIZE);
359 404
360 /* Parse zone descriptors */ 405 /* Parse zone descriptors */
361 while (rec < buf + buf_len) { 406 while (rec < buf + buflen) {
362 u64 this_zone_blocks = get_unaligned_be64(&rec[8]); 407 u64 this_zone_blocks = get_unaligned_be64(&rec[8]);
363 408
364 if (zone_blocks == 0) { 409 if (zone_blocks == 0) {
@@ -374,8 +419,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
374 } 419 }
375 420
376 if (block < sdkp->capacity) { 421 if (block < sdkp->capacity) {
377 ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 422 ret = sd_zbc_do_report_zones(sdkp, buf, bufsize, block,
378 block, true); 423 true);
379 if (ret) 424 if (ret)
380 goto out_free; 425 goto out_free;
381 } 426 }
@@ -406,7 +451,8 @@ out:
406 } 451 }
407 452
408out_free: 453out_free:
409 kfree(buf); 454 memalloc_noio_restore(noio_flag);
455 kvfree(buf);
410 456
411 return ret; 457 return ret;
412} 458}