diff options
author | Huang Ying <ying.huang@intel.com> | 2017-09-06 19:22:27 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 20:27:27 -0400 |
commit | 98cc093cba1e925eb34963dedb5f1684f1bdb2f4 (patch) | |
tree | 1651b9aed181959b1f5bae885a81ca130de09162 /drivers/nvdimm | |
parent | f0eea189e8e969b66e03bac8a7d92888ba267854 (diff) |
block, THP: make block_device_operations.rw_page support THP
The .rw_page in struct block_device_operations is used by the swap
subsystem to read/write the page contents from/into the corresponding
swap slot in the swap device. To support the THP (Transparent Huge
Page) swap optimization, the .rw_page is enhanced to support to
read/write THP if possible.
Link: http://lkml.kernel.org/r/20170724051840.2309-6-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@intel.com> [for brd.c, zram_drv.c, pmem.c]
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Vishal L Verma <vishal.l.verma@intel.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "Kirill A . Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r-- | drivers/nvdimm/btt.c | 4 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 41 |
2 files changed, 33 insertions, 12 deletions
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 14323faf8bd9..60491641a8d6 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c | |||
@@ -1241,8 +1241,10 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector, | |||
1241 | { | 1241 | { |
1242 | struct btt *btt = bdev->bd_disk->private_data; | 1242 | struct btt *btt = bdev->bd_disk->private_data; |
1243 | int rc; | 1243 | int rc; |
1244 | unsigned int len; | ||
1244 | 1245 | ||
1245 | rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); | 1246 | len = hpage_nr_pages(page) * PAGE_SIZE; |
1247 | rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector); | ||
1246 | if (rc == 0) | 1248 | if (rc == 0) |
1247 | page_endio(page, is_write, 0); | 1249 | page_endio(page, is_write, 0); |
1248 | 1250 | ||
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index f7099adaabc0..e9aa453da50c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c | |||
@@ -80,22 +80,40 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem, | |||
80 | static void write_pmem(void *pmem_addr, struct page *page, | 80 | static void write_pmem(void *pmem_addr, struct page *page, |
81 | unsigned int off, unsigned int len) | 81 | unsigned int off, unsigned int len) |
82 | { | 82 | { |
83 | void *mem = kmap_atomic(page); | 83 | unsigned int chunk; |
84 | 84 | void *mem; | |
85 | memcpy_flushcache(pmem_addr, mem + off, len); | 85 | |
86 | kunmap_atomic(mem); | 86 | while (len) { |
87 | mem = kmap_atomic(page); | ||
88 | chunk = min_t(unsigned int, len, PAGE_SIZE); | ||
89 | memcpy_flushcache(pmem_addr, mem + off, chunk); | ||
90 | kunmap_atomic(mem); | ||
91 | len -= chunk; | ||
92 | off = 0; | ||
93 | page++; | ||
94 | pmem_addr += PAGE_SIZE; | ||
95 | } | ||
87 | } | 96 | } |
88 | 97 | ||
89 | static blk_status_t read_pmem(struct page *page, unsigned int off, | 98 | static blk_status_t read_pmem(struct page *page, unsigned int off, |
90 | void *pmem_addr, unsigned int len) | 99 | void *pmem_addr, unsigned int len) |
91 | { | 100 | { |
101 | unsigned int chunk; | ||
92 | int rc; | 102 | int rc; |
93 | void *mem = kmap_atomic(page); | 103 | void *mem; |
94 | 104 | ||
95 | rc = memcpy_mcsafe(mem + off, pmem_addr, len); | 105 | while (len) { |
96 | kunmap_atomic(mem); | 106 | mem = kmap_atomic(page); |
97 | if (rc) | 107 | chunk = min_t(unsigned int, len, PAGE_SIZE); |
98 | return BLK_STS_IOERR; | 108 | rc = memcpy_mcsafe(mem + off, pmem_addr, chunk); |
109 | kunmap_atomic(mem); | ||
110 | if (rc) | ||
111 | return BLK_STS_IOERR; | ||
112 | len -= chunk; | ||
113 | off = 0; | ||
114 | page++; | ||
115 | pmem_addr += PAGE_SIZE; | ||
116 | } | ||
99 | return BLK_STS_OK; | 117 | return BLK_STS_OK; |
100 | } | 118 | } |
101 | 119 | ||
@@ -188,7 +206,8 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, | |||
188 | struct pmem_device *pmem = bdev->bd_queue->queuedata; | 206 | struct pmem_device *pmem = bdev->bd_queue->queuedata; |
189 | blk_status_t rc; | 207 | blk_status_t rc; |
190 | 208 | ||
191 | rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector); | 209 | rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, |
210 | 0, is_write, sector); | ||
192 | 211 | ||
193 | /* | 212 | /* |
194 | * The ->rw_page interface is subtle and tricky. The core | 213 | * The ->rw_page interface is subtle and tricky. The core |