diff options
author | Dan Williams <dan.j.williams@intel.com> | 2016-01-06 15:03:41 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2016-01-09 11:39:04 -0500 |
commit | e10624f8c09710b3b0740ea3847627ea02f55c39 (patch) | |
tree | ab59f8d9dbb5ca7e7d2771e0b5f8bf0cbf935553 | |
parent | b95f5f4391fad65f1819c2404080b05ca95bdd92 (diff) |
pmem: fail io-requests to known bad blocks
Check the sectors specified in a read bio to see if they hit a known bad
block, and return an error code pmem_do_bvec().
Note that the ->rw_page() is not in a position to return errors. For
now, copy the same layering violation present in zram_rw_page() to avoid
crashes of the form:
kernel BUG at mm/filemap.c:822!
[..]
Call Trace:
[<ffffffff811c540e>] page_endio+0x1e/0x60
[<ffffffff81290d29>] mpage_end_io+0x39/0x60
[<ffffffff8141c4ef>] bio_endio+0x3f/0x60
[<ffffffffa005c491>] pmem_make_request+0x111/0x230 [nd_pmem]
...i.e. unlock a page that was already unlocked via pmem_rw_page() =>
page_endio().
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r-- | drivers/nvdimm/pmem.c | 46 |
1 files changed, 39 insertions, 7 deletions
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 2b1f3009f827..d00c659d1304 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c | |||
@@ -47,7 +47,20 @@ struct pmem_device { | |||
47 | 47 | ||
48 | static int pmem_major; | 48 | static int pmem_major; |
49 | 49 | ||
50 | static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, | 50 | static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) |
51 | { | ||
52 | if (bb->count) { | ||
53 | sector_t first_bad; | ||
54 | int num_bad; | ||
55 | |||
56 | return !!badblocks_check(bb, sector, len / 512, &first_bad, | ||
57 | &num_bad); | ||
58 | } | ||
59 | |||
60 | return false; | ||
61 | } | ||
62 | |||
63 | static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, | ||
51 | unsigned int len, unsigned int off, int rw, | 64 | unsigned int len, unsigned int off, int rw, |
52 | sector_t sector) | 65 | sector_t sector) |
53 | { | 66 | { |
@@ -56,6 +69,8 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, | |||
56 | void __pmem *pmem_addr = pmem->virt_addr + pmem_off; | 69 | void __pmem *pmem_addr = pmem->virt_addr + pmem_off; |
57 | 70 | ||
58 | if (rw == READ) { | 71 | if (rw == READ) { |
72 | if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) | ||
73 | return -EIO; | ||
59 | memcpy_from_pmem(mem + off, pmem_addr, len); | 74 | memcpy_from_pmem(mem + off, pmem_addr, len); |
60 | flush_dcache_page(page); | 75 | flush_dcache_page(page); |
61 | } else { | 76 | } else { |
@@ -64,10 +79,12 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, | |||
64 | } | 79 | } |
65 | 80 | ||
66 | kunmap_atomic(mem); | 81 | kunmap_atomic(mem); |
82 | return 0; | ||
67 | } | 83 | } |
68 | 84 | ||
69 | static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) | 85 | static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) |
70 | { | 86 | { |
87 | int rc = 0; | ||
71 | bool do_acct; | 88 | bool do_acct; |
72 | unsigned long start; | 89 | unsigned long start; |
73 | struct bio_vec bvec; | 90 | struct bio_vec bvec; |
@@ -76,9 +93,15 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) | |||
76 | struct pmem_device *pmem = bdev->bd_disk->private_data; | 93 | struct pmem_device *pmem = bdev->bd_disk->private_data; |
77 | 94 | ||
78 | do_acct = nd_iostat_start(bio, &start); | 95 | do_acct = nd_iostat_start(bio, &start); |
79 | bio_for_each_segment(bvec, bio, iter) | 96 | bio_for_each_segment(bvec, bio, iter) { |
80 | pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset, | 97 | rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, |
81 | bio_data_dir(bio), iter.bi_sector); | 98 | bvec.bv_offset, bio_data_dir(bio), |
99 | iter.bi_sector); | ||
100 | if (rc) { | ||
101 | bio->bi_error = rc; | ||
102 | break; | ||
103 | } | ||
104 | } | ||
82 | if (do_acct) | 105 | if (do_acct) |
83 | nd_iostat_end(bio, start); | 106 | nd_iostat_end(bio, start); |
84 | 107 | ||
@@ -93,13 +116,22 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, | |||
93 | struct page *page, int rw) | 116 | struct page *page, int rw) |
94 | { | 117 | { |
95 | struct pmem_device *pmem = bdev->bd_disk->private_data; | 118 | struct pmem_device *pmem = bdev->bd_disk->private_data; |
119 | int rc; | ||
96 | 120 | ||
97 | pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); | 121 | rc = pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector); |
98 | if (rw & WRITE) | 122 | if (rw & WRITE) |
99 | wmb_pmem(); | 123 | wmb_pmem(); |
100 | page_endio(page, rw & WRITE, 0); | ||
101 | 124 | ||
102 | return 0; | 125 | /* |
126 | * The ->rw_page interface is subtle and tricky. The core | ||
127 | * retries on any error, so we can only invoke page_endio() in | ||
128 | * the successful completion case. Otherwise, we'll see crashes | ||
129 | * caused by double completion. | ||
130 | */ | ||
131 | if (rc == 0) | ||
132 | page_endio(page, rw & WRITE, 0); | ||
133 | |||
134 | return rc; | ||
103 | } | 135 | } |
104 | 136 | ||
105 | static long pmem_direct_access(struct block_device *bdev, sector_t sector, | 137 | static long pmem_direct_access(struct block_device *bdev, sector_t sector, |