diff options
author | Dan Williams <dan.j.williams@intel.com> | 2016-01-28 23:25:31 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2016-01-30 16:35:32 -0500 |
commit | d1a5f2b4d8a125943dcb6b032fc7eaefc2c78296 (patch) | |
tree | afdd4251d10cbcf34b00bd4f33adb27996881cc6 | |
parent | 9f4736fe7ca804aa79b5916221bb13dfc6221a0f (diff) |
block: use DAX for partition table reads
Avoid populating pagecache when the block device is in DAX mode.
Otherwise these page cache entries collide with the fsync/msync
implementation and break data durability guarantees.
Cc: Jan Kara <jack@suse.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Tested-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r-- | block/partition-generic.c | 18 | ||||
-rw-r--r-- | fs/dax.c | 20 | ||||
-rw-r--r-- | include/linux/dax.h | 11 |
3 files changed, 46 insertions, 3 deletions
diff --git a/block/partition-generic.c b/block/partition-generic.c index 746935a5973c..fefd01b496a0 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/kmod.h> | 16 | #include <linux/kmod.h> |
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/genhd.h> | 18 | #include <linux/genhd.h> |
19 | #include <linux/dax.h> | ||
19 | #include <linux/blktrace_api.h> | 20 | #include <linux/blktrace_api.h> |
20 | 21 | ||
21 | #include "partitions/check.h" | 22 | #include "partitions/check.h" |
@@ -550,13 +551,24 @@ int invalidate_partitions(struct gendisk *disk, struct block_device *bdev) | |||
550 | return 0; | 551 | return 0; |
551 | } | 552 | } |
552 | 553 | ||
553 | unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) | 554 | static struct page *read_pagecache_sector(struct block_device *bdev, sector_t n) |
554 | { | 555 | { |
555 | struct address_space *mapping = bdev->bd_inode->i_mapping; | 556 | struct address_space *mapping = bdev->bd_inode->i_mapping; |
557 | |||
558 | return read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), | ||
559 | NULL); | ||
560 | } | ||
561 | |||
562 | unsigned char *read_dev_sector(struct block_device *bdev, sector_t n, Sector *p) | ||
563 | { | ||
556 | struct page *page; | 564 | struct page *page; |
557 | 565 | ||
558 | page = read_mapping_page(mapping, (pgoff_t)(n >> (PAGE_CACHE_SHIFT-9)), | 566 | /* don't populate page cache for dax capable devices */ |
559 | NULL); | 567 | if (IS_DAX(bdev->bd_inode)) |
568 | page = read_dax_sector(bdev, n); | ||
569 | else | ||
570 | page = read_pagecache_sector(bdev, n); | ||
571 | |||
560 | if (!IS_ERR(page)) { | 572 | if (!IS_ERR(page)) { |
561 | if (PageError(page)) | 573 | if (PageError(page)) |
562 | goto fail; | 574 | goto fail; |
@@ -58,6 +58,26 @@ static void dax_unmap_atomic(struct block_device *bdev, | |||
58 | blk_queue_exit(bdev->bd_queue); | 58 | blk_queue_exit(bdev->bd_queue); |
59 | } | 59 | } |
60 | 60 | ||
61 | struct page *read_dax_sector(struct block_device *bdev, sector_t n) | ||
62 | { | ||
63 | struct page *page = alloc_pages(GFP_KERNEL, 0); | ||
64 | struct blk_dax_ctl dax = { | ||
65 | .size = PAGE_SIZE, | ||
66 | .sector = n & ~((((int) PAGE_SIZE) / 512) - 1), | ||
67 | }; | ||
68 | long rc; | ||
69 | |||
70 | if (!page) | ||
71 | return ERR_PTR(-ENOMEM); | ||
72 | |||
73 | rc = dax_map_atomic(bdev, &dax); | ||
74 | if (rc < 0) | ||
75 | return ERR_PTR(rc); | ||
76 | memcpy_from_pmem(page_address(page), dax.addr, PAGE_SIZE); | ||
77 | dax_unmap_atomic(bdev, &dax); | ||
78 | return page; | ||
79 | } | ||
80 | |||
61 | /* | 81 | /* |
62 | * dax_clear_blocks() is called from within transaction context from XFS, | 82 | * dax_clear_blocks() is called from within transaction context from XFS, |
63 | * and hence this means the stack from this point must follow GFP_NOFS | 83 | * and hence this means the stack from this point must follow GFP_NOFS |
diff --git a/include/linux/dax.h b/include/linux/dax.h index 8204c3dc3800..818e45078929 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h | |||
@@ -14,6 +14,17 @@ int dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, | |||
14 | dax_iodone_t); | 14 | dax_iodone_t); |
15 | int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, | 15 | int __dax_fault(struct vm_area_struct *, struct vm_fault *, get_block_t, |
16 | dax_iodone_t); | 16 | dax_iodone_t); |
17 | |||
18 | #ifdef CONFIG_FS_DAX | ||
19 | struct page *read_dax_sector(struct block_device *bdev, sector_t n); | ||
20 | #else | ||
21 | static inline struct page *read_dax_sector(struct block_device *bdev, | ||
22 | sector_t n) | ||
23 | { | ||
24 | return ERR_PTR(-ENXIO); | ||
25 | } | ||
26 | #endif | ||
27 | |||
17 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 28 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
18 | int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, | 29 | int dax_pmd_fault(struct vm_area_struct *, unsigned long addr, pmd_t *, |
19 | unsigned int flags, get_block_t, dax_iodone_t); | 30 | unsigned int flags, get_block_t, dax_iodone_t); |