aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVishal Verma <vishal.l.verma@intel.com>2017-08-30 21:36:03 -0400
committerDan Williams <dan.j.williams@intel.com>2017-08-31 18:05:10 -0400
commitd9b83c7569536e3255992491737d9f895640ea18 (patch)
tree050059ad7332979b9c2be55702b4df1310503fbb
parent0930a750c35be3c2f5aacebc0d20ddeaf727c208 (diff)
libnvdimm, btt: rework error clearing
Clearing errors or badblocks during a BTT write requires sending an ACPI DSM, which means potentially sleeping. Since a BTT IO happens in atomic context (preemption disabled, spinlocks may be held), we cannot perform error clearing in the course of an IO. Due to this error clearing for BTT IOs has hitherto been disabled. In this patch we move error clearing out of the atomic section, and thus re-enable error clearing with BTTs. When we are about to add a block to the free list, we check if it was previously marked as an error, and if it was, we add it to the freelist, but also set a flag that says error clearing will be required. We then drop the lane (ending the atomic context), and send a zero buffer so that the error can be cleared. The error flag in the free list is protected by the nd 'lane', and is set only be a thread while it holds that lane. When the error is cleared, the flag is cleared, but while holding a mutex for that freelist index. When writing, we check for two things - 1/ If the freelist mutex is held or if the error flag is set. If so, this is an error block that is being (or about to be) cleared. 2/ If the block is a known badblock based on nsio->bb The second check is required because the BTT map error flag for a map entry only gets set when an error LBA is read. If we write to a new location that may not have the map error flag set, but still might be in the region's badblock list, we can trigger an EIO on the write, which is undesirable and completely avoidable. Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Toshi Kani <toshi.kani@hpe.com> Cc: Dan Williams <dan.j.williams@intel.com> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
-rw-r--r--drivers/nvdimm/btt.c117
-rw-r--r--drivers/nvdimm/btt.h5
-rw-r--r--drivers/nvdimm/claim.c8
3 files changed, 111 insertions, 19 deletions
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 9c96530ea6d5..dabb84f7ab8a 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -31,6 +31,11 @@ enum log_ent_request {
31 LOG_OLD_ENT 31 LOG_OLD_ENT
32}; 32};
33 33
34static u64 adjust_initial_offset(struct nd_btt *nd_btt, u64 offset)
35{
36 return offset + nd_btt->initial_offset;
37}
38
34static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, 39static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
35 void *buf, size_t n, unsigned long flags) 40 void *buf, size_t n, unsigned long flags)
36{ 41{
@@ -38,7 +43,7 @@ static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
38 struct nd_namespace_common *ndns = nd_btt->ndns; 43 struct nd_namespace_common *ndns = nd_btt->ndns;
39 44
40 /* arena offsets may be shifted from the base of the device */ 45 /* arena offsets may be shifted from the base of the device */
41 offset += arena->nd_btt->initial_offset; 46 offset = adjust_initial_offset(nd_btt, offset);
42 return nvdimm_read_bytes(ndns, offset, buf, n, flags); 47 return nvdimm_read_bytes(ndns, offset, buf, n, flags);
43} 48}
44 49
@@ -49,7 +54,7 @@ static int arena_write_bytes(struct arena_info *arena, resource_size_t offset,
49 struct nd_namespace_common *ndns = nd_btt->ndns; 54 struct nd_namespace_common *ndns = nd_btt->ndns;
50 55
51 /* arena offsets may be shifted from the base of the device */ 56 /* arena offsets may be shifted from the base of the device */
52 offset += arena->nd_btt->initial_offset; 57 offset = adjust_initial_offset(nd_btt, offset);
53 return nvdimm_write_bytes(ndns, offset, buf, n, flags); 58 return nvdimm_write_bytes(ndns, offset, buf, n, flags);
54} 59}
55 60
@@ -381,7 +386,9 @@ static int btt_flog_write(struct arena_info *arena, u32 lane, u32 sub,
381 arena->freelist[lane].sub = 1 - arena->freelist[lane].sub; 386 arena->freelist[lane].sub = 1 - arena->freelist[lane].sub;
382 if (++(arena->freelist[lane].seq) == 4) 387 if (++(arena->freelist[lane].seq) == 4)
383 arena->freelist[lane].seq = 1; 388 arena->freelist[lane].seq = 1;
384 arena->freelist[lane].block = le32_to_cpu(ent->old_map); 389 if (ent_e_flag(ent->old_map))
390 arena->freelist[lane].has_err = 1;
391 arena->freelist[lane].block = le32_to_cpu(ent_lba(ent->old_map));
385 392
386 return ret; 393 return ret;
387} 394}
@@ -480,6 +487,40 @@ static int btt_log_init(struct arena_info *arena)
480 return ret; 487 return ret;
481} 488}
482 489
490static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
491{
492 return arena->dataoff + ((u64)lba * arena->internal_lbasize);
493}
494
495static int arena_clear_freelist_error(struct arena_info *arena, u32 lane)
496{
497 int ret = 0;
498
499 if (arena->freelist[lane].has_err) {
500 void *zero_page = page_address(ZERO_PAGE(0));
501 u32 lba = arena->freelist[lane].block;
502 u64 nsoff = to_namespace_offset(arena, lba);
503 unsigned long len = arena->sector_size;
504
505 mutex_lock(&arena->err_lock);
506
507 while (len) {
508 unsigned long chunk = min(len, PAGE_SIZE);
509
510 ret = arena_write_bytes(arena, nsoff, zero_page,
511 chunk, 0);
512 if (ret)
513 break;
514 len -= chunk;
515 nsoff += chunk;
516 if (len == 0)
517 arena->freelist[lane].has_err = 0;
518 }
519 mutex_unlock(&arena->err_lock);
520 }
521 return ret;
522}
523
483static int btt_freelist_init(struct arena_info *arena) 524static int btt_freelist_init(struct arena_info *arena)
484{ 525{
485 int old, new, ret; 526 int old, new, ret;
@@ -505,6 +546,16 @@ static int btt_freelist_init(struct arena_info *arena)
505 arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); 546 arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq));
506 arena->freelist[i].block = le32_to_cpu(log_new.old_map); 547 arena->freelist[i].block = le32_to_cpu(log_new.old_map);
507 548
549 /*
550 * FIXME: if error clearing fails during init, we want to make
551 * the BTT read-only
552 */
553 if (ent_e_flag(log_new.old_map)) {
554 ret = arena_clear_freelist_error(arena, i);
555 if (ret)
556 WARN_ONCE(1, "Unable to clear known errors\n");
557 }
558
508 /* This implies a newly created or untouched flog entry */ 559 /* This implies a newly created or untouched flog entry */
509 if (log_new.old_map == log_new.new_map) 560 if (log_new.old_map == log_new.new_map)
510 continue; 561 continue;
@@ -525,7 +576,6 @@ static int btt_freelist_init(struct arena_info *arena)
525 if (ret) 576 if (ret)
526 return ret; 577 return ret;
527 } 578 }
528
529 } 579 }
530 580
531 return 0; 581 return 0;
@@ -695,6 +745,7 @@ static int discover_arenas(struct btt *btt)
695 arena->external_lba_start = cur_nlba; 745 arena->external_lba_start = cur_nlba;
696 parse_arena_meta(arena, super, cur_off); 746 parse_arena_meta(arena, super, cur_off);
697 747
748 mutex_init(&arena->err_lock);
698 ret = btt_freelist_init(arena); 749 ret = btt_freelist_init(arena);
699 if (ret) 750 if (ret)
700 goto out; 751 goto out;
@@ -905,11 +956,6 @@ static void unlock_map(struct arena_info *arena, u32 premap)
905 spin_unlock(&arena->map_locks[idx].lock); 956 spin_unlock(&arena->map_locks[idx].lock);
906} 957}
907 958
908static u64 to_namespace_offset(struct arena_info *arena, u64 lba)
909{
910 return arena->dataoff + ((u64)lba * arena->internal_lbasize);
911}
912
913static int btt_data_read(struct arena_info *arena, struct page *page, 959static int btt_data_read(struct arena_info *arena, struct page *page,
914 unsigned int off, u32 lba, u32 len) 960 unsigned int off, u32 lba, u32 len)
915{ 961{
@@ -1067,8 +1113,14 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
1067 } 1113 }
1068 1114
1069 ret = btt_data_read(arena, page, off, postmap, cur_len); 1115 ret = btt_data_read(arena, page, off, postmap, cur_len);
1070 if (ret) 1116 if (ret) {
1117 int rc;
1118
1119 /* Media error - set the e_flag */
1120 rc = btt_map_write(arena, premap, postmap, 0, 1,
1121 NVDIMM_IO_ATOMIC);
1071 goto out_rtt; 1122 goto out_rtt;
1123 }
1072 1124
1073 if (bip) { 1125 if (bip) {
1074 ret = btt_rw_integrity(btt, bip, arena, postmap, READ); 1126 ret = btt_rw_integrity(btt, bip, arena, postmap, READ);
@@ -1093,6 +1145,21 @@ static int btt_read_pg(struct btt *btt, struct bio_integrity_payload *bip,
1093 return ret; 1145 return ret;
1094} 1146}
1095 1147
1148/*
1149 * Normally, arena_{read,write}_bytes will take care of the initial offset
1150 * adjustment, but in the case of btt_is_badblock, where we query is_bad_pmem,
1151 * we need the final, raw namespace offset here
1152 */
1153static bool btt_is_badblock(struct btt *btt, struct arena_info *arena,
1154 u32 postmap)
1155{
1156 u64 nsoff = adjust_initial_offset(arena->nd_btt,
1157 to_namespace_offset(arena, postmap));
1158 sector_t phys_sector = nsoff >> 9;
1159
1160 return is_bad_pmem(btt->phys_bb, phys_sector, arena->internal_lbasize);
1161}
1162
1096static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip, 1163static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1097 sector_t sector, struct page *page, unsigned int off, 1164 sector_t sector, struct page *page, unsigned int off,
1098 unsigned int len) 1165 unsigned int len)
@@ -1105,7 +1172,9 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1105 1172
1106 while (len) { 1173 while (len) {
1107 u32 cur_len; 1174 u32 cur_len;
1175 int e_flag;
1108 1176
1177 retry:
1109 lane = nd_region_acquire_lane(btt->nd_region); 1178 lane = nd_region_acquire_lane(btt->nd_region);
1110 1179
1111 ret = lba_to_arena(btt, sector, &premap, &arena); 1180 ret = lba_to_arena(btt, sector, &premap, &arena);
@@ -1118,6 +1187,21 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1118 goto out_lane; 1187 goto out_lane;
1119 } 1188 }
1120 1189
1190 if (btt_is_badblock(btt, arena, arena->freelist[lane].block))
1191 arena->freelist[lane].has_err = 1;
1192
1193 if (mutex_is_locked(&arena->err_lock)
1194 || arena->freelist[lane].has_err) {
1195 nd_region_release_lane(btt->nd_region, lane);
1196
1197 ret = arena_clear_freelist_error(arena, lane);
1198 if (ret)
1199 return ret;
1200
1201 /* OK to acquire a different lane/free block */
1202 goto retry;
1203 }
1204
1121 new_postmap = arena->freelist[lane].block; 1205 new_postmap = arena->freelist[lane].block;
1122 1206
1123 /* Wait if the new block is being read from */ 1207 /* Wait if the new block is being read from */
@@ -1143,7 +1227,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1143 } 1227 }
1144 1228
1145 lock_map(arena, premap); 1229 lock_map(arena, premap);
1146 ret = btt_map_read(arena, premap, &old_postmap, NULL, NULL, 1230 ret = btt_map_read(arena, premap, &old_postmap, NULL, &e_flag,
1147 NVDIMM_IO_ATOMIC); 1231 NVDIMM_IO_ATOMIC);
1148 if (ret) 1232 if (ret)
1149 goto out_map; 1233 goto out_map;
@@ -1151,6 +1235,8 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1151 ret = -EIO; 1235 ret = -EIO;
1152 goto out_map; 1236 goto out_map;
1153 } 1237 }
1238 if (e_flag)
1239 set_e_flag(old_postmap);
1154 1240
1155 log.lba = cpu_to_le32(premap); 1241 log.lba = cpu_to_le32(premap);
1156 log.old_map = cpu_to_le32(old_postmap); 1242 log.old_map = cpu_to_le32(old_postmap);
@@ -1169,6 +1255,12 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
1169 unlock_map(arena, premap); 1255 unlock_map(arena, premap);
1170 nd_region_release_lane(btt->nd_region, lane); 1256 nd_region_release_lane(btt->nd_region, lane);
1171 1257
1258 if (e_flag) {
1259 ret = arena_clear_freelist_error(arena, lane);
1260 if (ret)
1261 return ret;
1262 }
1263
1172 len -= cur_len; 1264 len -= cur_len;
1173 off += cur_len; 1265 off += cur_len;
1174 sector += btt->sector_size >> SECTOR_SHIFT; 1266 sector += btt->sector_size >> SECTOR_SHIFT;
@@ -1349,6 +1441,7 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1349{ 1441{
1350 int ret; 1442 int ret;
1351 struct btt *btt; 1443 struct btt *btt;
1444 struct nd_namespace_io *nsio;
1352 struct device *dev = &nd_btt->dev; 1445 struct device *dev = &nd_btt->dev;
1353 1446
1354 btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL); 1447 btt = devm_kzalloc(dev, sizeof(struct btt), GFP_KERNEL);
@@ -1362,6 +1455,8 @@ static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
1362 INIT_LIST_HEAD(&btt->arena_list); 1455 INIT_LIST_HEAD(&btt->arena_list);
1363 mutex_init(&btt->init_lock); 1456 mutex_init(&btt->init_lock);
1364 btt->nd_region = nd_region; 1457 btt->nd_region = nd_region;
1458 nsio = to_nd_namespace_io(&nd_btt->ndns->dev);
1459 btt->phys_bb = &nsio->bb;
1365 1460
1366 ret = discover_arenas(btt); 1461 ret = discover_arenas(btt);
1367 if (ret) { 1462 if (ret) {
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 2bc0d10b8438..578c2057524d 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -15,6 +15,7 @@
15#ifndef _LINUX_BTT_H 15#ifndef _LINUX_BTT_H
16#define _LINUX_BTT_H 16#define _LINUX_BTT_H
17 17
18#include <linux/badblocks.h>
18#include <linux/types.h> 19#include <linux/types.h>
19 20
20#define BTT_SIG_LEN 16 21#define BTT_SIG_LEN 16
@@ -41,6 +42,7 @@
41#define ent_lba(ent) (ent & MAP_LBA_MASK) 42#define ent_lba(ent) (ent & MAP_LBA_MASK)
42#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK)) 43#define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK))
43#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK)) 44#define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK))
45#define set_e_flag(ent) (ent |= MAP_ERR_MASK)
44 46
45enum btt_init_state { 47enum btt_init_state {
46 INIT_UNCHECKED = 0, 48 INIT_UNCHECKED = 0,
@@ -82,6 +84,7 @@ struct free_entry {
82 u32 block; 84 u32 block;
83 u8 sub; 85 u8 sub;
84 u8 seq; 86 u8 seq;
87 u8 has_err;
85}; 88};
86 89
87struct aligned_lock { 90struct aligned_lock {
@@ -153,6 +156,7 @@ struct arena_info {
153 struct dentry *debugfs_dir; 156 struct dentry *debugfs_dir;
154 /* Arena flags */ 157 /* Arena flags */
155 u32 flags; 158 u32 flags;
159 struct mutex err_lock;
156}; 160};
157 161
158/** 162/**
@@ -187,6 +191,7 @@ struct btt {
187 struct mutex init_lock; 191 struct mutex init_lock;
188 int init_state; 192 int init_state;
189 int num_arenas; 193 int num_arenas;
194 struct badblocks *phys_bb;
190}; 195};
191 196
192bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); 197bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super);
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 3e6404f1ba5a..b2fc29b8279b 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -280,14 +280,6 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
280 } 280 }
281 281
282 if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) { 282 if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align))) {
283 /*
284 * FIXME: nsio_rw_bytes() may be called from atomic
285 * context in the btt case and the ACPI DSM path for
286 * clearing the error takes sleeping locks and allocates
287 * memory. An explicit error clearing path, and support
288 * for tracking badblocks in BTT metadata is needed to
289 * work around this collision.
290 */
291 if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512) 283 if (IS_ALIGNED(offset, 512) && IS_ALIGNED(size, 512)
292 && !(flags & NVDIMM_IO_ATOMIC)) { 284 && !(flags & NVDIMM_IO_ATOMIC)) {
293 long cleared; 285 long cleared;