diff options
author | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2011-02-10 10:30:34 -0500 |
---|---|---|
committer | Matthew Wilcox <matthew.r.wilcox@intel.com> | 2011-11-04 15:52:57 -0400 |
commit | 99802a7aee2b3dd720e382c52b892cc6a8122b11 (patch) | |
tree | 50cfb8658b7458aa088914e562cc760b91849d6b /drivers/block/nvme.c | |
parent | 091b609258b8e01cc45b01a41ca5e496f674d989 (diff) |
NVMe: Optimise memory usage for I/Os between 4k and 128k
Add a second memory pool for smaller I/Os. We can pack 16 of these on a
single page instead of using an entire page for each one.
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Diffstat (limited to 'drivers/block/nvme.c')
-rw-r--r-- | drivers/block/nvme.c | 31 |
1 files changed, 23 insertions, 8 deletions
diff --git a/drivers/block/nvme.c b/drivers/block/nvme.c index 80fe6a7a8163..cd7aeba8310b 100644 --- a/drivers/block/nvme.c +++ b/drivers/block/nvme.c | |||
@@ -58,6 +58,7 @@ struct nvme_dev { | |||
58 | u32 __iomem *dbs; | 58 | u32 __iomem *dbs; |
59 | struct pci_dev *pci_dev; | 59 | struct pci_dev *pci_dev; |
60 | struct dma_pool *prp_page_pool; | 60 | struct dma_pool *prp_page_pool; |
61 | struct dma_pool *prp_small_pool; | ||
61 | int instance; | 62 | int instance; |
62 | int queue_count; | 63 | int queue_count; |
63 | u32 ctrl_config; | 64 | u32 ctrl_config; |
@@ -249,11 +250,6 @@ static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) | |||
249 | return 0; | 250 | return 0; |
250 | } | 251 | } |
251 | 252 | ||
252 | static __le64 *alloc_prp_list(struct nvme_dev *dev, dma_addr_t *addr) | ||
253 | { | ||
254 | return dma_pool_alloc(dev->prp_page_pool, GFP_ATOMIC, addr); | ||
255 | } | ||
256 | |||
257 | struct nvme_prps { | 253 | struct nvme_prps { |
258 | int npages; | 254 | int npages; |
259 | dma_addr_t first_dma; | 255 | dma_addr_t first_dma; |
@@ -271,6 +267,9 @@ static void nvme_free_prps(struct nvme_queue *nvmeq, struct nvme_prps *prps) | |||
271 | return; | 267 | return; |
272 | 268 | ||
273 | prp_dma = prps->first_dma; | 269 | prp_dma = prps->first_dma; |
270 | |||
271 | if (prps->npages == 0) | ||
272 | dma_pool_free(dev->prp_small_pool, prps->list[0], prp_dma); | ||
274 | for (i = 0; i < prps->npages; i++) { | 273 | for (i = 0; i < prps->npages; i++) { |
275 | __le64 *prp_list = prps->list[i]; | 274 | __le64 *prp_list = prps->list[i]; |
276 | dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); | 275 | dma_addr_t next_prp_dma = le64_to_cpu(prp_list[last_prp]); |
@@ -322,6 +321,7 @@ static struct nvme_prps *nvme_setup_prps(struct nvme_queue *nvmeq, | |||
322 | struct scatterlist *sg, int length) | 321 | struct scatterlist *sg, int length) |
323 | { | 322 | { |
324 | struct nvme_dev *dev = nvmeq->dev; | 323 | struct nvme_dev *dev = nvmeq->dev; |
324 | struct dma_pool *pool; | ||
325 | int dma_len = sg_dma_len(sg); | 325 | int dma_len = sg_dma_len(sg); |
326 | u64 dma_addr = sg_dma_address(sg); | 326 | u64 dma_addr = sg_dma_address(sg); |
327 | int offset = offset_in_page(dma_addr); | 327 | int offset = offset_in_page(dma_addr); |
@@ -352,9 +352,16 @@ static struct nvme_prps *nvme_setup_prps(struct nvme_queue *nvmeq, | |||
352 | nprps = DIV_ROUND_UP(length, PAGE_SIZE); | 352 | nprps = DIV_ROUND_UP(length, PAGE_SIZE); |
353 | npages = DIV_ROUND_UP(8 * nprps, PAGE_SIZE); | 353 | npages = DIV_ROUND_UP(8 * nprps, PAGE_SIZE); |
354 | prps = kmalloc(sizeof(*prps) + sizeof(__le64 *) * npages, GFP_ATOMIC); | 354 | prps = kmalloc(sizeof(*prps) + sizeof(__le64 *) * npages, GFP_ATOMIC); |
355 | prps->npages = npages; | ||
356 | prp_page = 0; | 355 | prp_page = 0; |
357 | prp_list = alloc_prp_list(dev, &prp_dma); | 356 | if (nprps <= (256 / 8)) { |
357 | pool = dev->prp_small_pool; | ||
358 | prps->npages = 0; | ||
359 | } else { | ||
360 | pool = dev->prp_page_pool; | ||
361 | prps->npages = npages; | ||
362 | } | ||
363 | |||
364 | prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); | ||
358 | prps->list[prp_page++] = prp_list; | 365 | prps->list[prp_page++] = prp_list; |
359 | prps->first_dma = prp_dma; | 366 | prps->first_dma = prp_dma; |
360 | cmd->prp2 = cpu_to_le64(prp_dma); | 367 | cmd->prp2 = cpu_to_le64(prp_dma); |
@@ -362,7 +369,7 @@ static struct nvme_prps *nvme_setup_prps(struct nvme_queue *nvmeq, | |||
362 | for (;;) { | 369 | for (;;) { |
363 | if (i == PAGE_SIZE / 8 - 1) { | 370 | if (i == PAGE_SIZE / 8 - 1) { |
364 | __le64 *old_prp_list = prp_list; | 371 | __le64 *old_prp_list = prp_list; |
365 | prp_list = alloc_prp_list(dev, &prp_dma); | 372 | prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); |
366 | prps->list[prp_page++] = prp_list; | 373 | prps->list[prp_page++] = prp_list; |
367 | old_prp_list[i] = cpu_to_le64(prp_dma); | 374 | old_prp_list[i] = cpu_to_le64(prp_dma); |
368 | i = 0; | 375 | i = 0; |
@@ -1313,12 +1320,20 @@ static int nvme_setup_prp_pools(struct nvme_dev *dev) | |||
1313 | if (!dev->prp_page_pool) | 1320 | if (!dev->prp_page_pool) |
1314 | return -ENOMEM; | 1321 | return -ENOMEM; |
1315 | 1322 | ||
1323 | /* Optimisation for I/Os between 4k and 128k */ | ||
1324 | dev->prp_small_pool = dma_pool_create("prp list 256", dmadev, | ||
1325 | 256, 256, 0); | ||
1326 | if (!dev->prp_small_pool) { | ||
1327 | dma_pool_destroy(dev->prp_page_pool); | ||
1328 | return -ENOMEM; | ||
1329 | } | ||
1316 | return 0; | 1330 | return 0; |
1317 | } | 1331 | } |
1318 | 1332 | ||
1319 | static void nvme_release_prp_pools(struct nvme_dev *dev) | 1333 | static void nvme_release_prp_pools(struct nvme_dev *dev) |
1320 | { | 1334 | { |
1321 | dma_pool_destroy(dev->prp_page_pool); | 1335 | dma_pool_destroy(dev->prp_page_pool); |
1336 | dma_pool_destroy(dev->prp_small_pool); | ||
1322 | } | 1337 | } |
1323 | 1338 | ||
1324 | /* XXX: Use an ida or something to let remove / add work correctly */ | 1339 | /* XXX: Use an ida or something to let remove / add work correctly */ |