diff options
-rw-r--r-- | drivers/nvdimm/btt.c | 201 | ||||
-rw-r--r-- | drivers/nvdimm/btt.h | 45 |
2 files changed, 211 insertions, 35 deletions
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index e949e3302af4..c586bcdb5190 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c | |||
@@ -211,12 +211,12 @@ static int btt_map_read(struct arena_info *arena, u32 lba, u32 *mapping, | |||
211 | return ret; | 211 | return ret; |
212 | } | 212 | } |
213 | 213 | ||
214 | static int btt_log_read_pair(struct arena_info *arena, u32 lane, | 214 | static int btt_log_group_read(struct arena_info *arena, u32 lane, |
215 | struct log_entry *ent) | 215 | struct log_group *log) |
216 | { | 216 | { |
217 | return arena_read_bytes(arena, | 217 | return arena_read_bytes(arena, |
218 | arena->logoff + (2 * lane * LOG_ENT_SIZE), ent, | 218 | arena->logoff + (lane * LOG_GRP_SIZE), log, |
219 | 2 * LOG_ENT_SIZE, 0); | 219 | LOG_GRP_SIZE, 0); |
220 | } | 220 | } |
221 | 221 | ||
222 | static struct dentry *debugfs_root; | 222 | static struct dentry *debugfs_root; |
@@ -256,6 +256,8 @@ static void arena_debugfs_init(struct arena_info *a, struct dentry *parent, | |||
256 | debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); | 256 | debugfs_create_x64("logoff", S_IRUGO, d, &a->logoff); |
257 | debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); | 257 | debugfs_create_x64("info2off", S_IRUGO, d, &a->info2off); |
258 | debugfs_create_x32("flags", S_IRUGO, d, &a->flags); | 258 | debugfs_create_x32("flags", S_IRUGO, d, &a->flags); |
259 | debugfs_create_u32("log_index_0", S_IRUGO, d, &a->log_index[0]); | ||
260 | debugfs_create_u32("log_index_1", S_IRUGO, d, &a->log_index[1]); | ||
259 | } | 261 | } |
260 | 262 | ||
261 | static void btt_debugfs_init(struct btt *btt) | 263 | static void btt_debugfs_init(struct btt *btt) |
@@ -274,6 +276,11 @@ static void btt_debugfs_init(struct btt *btt) | |||
274 | } | 276 | } |
275 | } | 277 | } |
276 | 278 | ||
279 | static u32 log_seq(struct log_group *log, int log_idx) | ||
280 | { | ||
281 | return le32_to_cpu(log->ent[log_idx].seq); | ||
282 | } | ||
283 | |||
277 | /* | 284 | /* |
278 | * This function accepts two log entries, and uses the | 285 | * This function accepts two log entries, and uses the |
279 | * sequence number to find the 'older' entry. | 286 | * sequence number to find the 'older' entry. |
@@ -283,8 +290,10 @@ static void btt_debugfs_init(struct btt *btt) | |||
283 | * | 290 | * |
284 | * TODO The logic feels a bit kludge-y. make it better.. | 291 | * TODO The logic feels a bit kludge-y. make it better.. |
285 | */ | 292 | */ |
286 | static int btt_log_get_old(struct log_entry *ent) | 293 | static int btt_log_get_old(struct arena_info *a, struct log_group *log) |
287 | { | 294 | { |
295 | int idx0 = a->log_index[0]; | ||
296 | int idx1 = a->log_index[1]; | ||
288 | int old; | 297 | int old; |
289 | 298 | ||
290 | /* | 299 | /* |
@@ -292,23 +301,23 @@ static int btt_log_get_old(struct log_entry *ent) | |||
292 | * the next time, the following logic works out to put this | 301 | * the next time, the following logic works out to put this |
293 | * (next) entry into [1] | 302 | * (next) entry into [1] |
294 | */ | 303 | */ |
295 | if (ent[0].seq == 0) { | 304 | if (log_seq(log, idx0) == 0) { |
296 | ent[0].seq = cpu_to_le32(1); | 305 | log->ent[idx0].seq = cpu_to_le32(1); |
297 | return 0; | 306 | return 0; |
298 | } | 307 | } |
299 | 308 | ||
300 | if (ent[0].seq == ent[1].seq) | 309 | if (log_seq(log, idx0) == log_seq(log, idx1)) |
301 | return -EINVAL; | 310 | return -EINVAL; |
302 | if (le32_to_cpu(ent[0].seq) + le32_to_cpu(ent[1].seq) > 5) | 311 | if (log_seq(log, idx0) + log_seq(log, idx1) > 5) |
303 | return -EINVAL; | 312 | return -EINVAL; |
304 | 313 | ||
305 | if (le32_to_cpu(ent[0].seq) < le32_to_cpu(ent[1].seq)) { | 314 | if (log_seq(log, idx0) < log_seq(log, idx1)) { |
306 | if (le32_to_cpu(ent[1].seq) - le32_to_cpu(ent[0].seq) == 1) | 315 | if ((log_seq(log, idx1) - log_seq(log, idx0)) == 1) |
307 | old = 0; | 316 | old = 0; |
308 | else | 317 | else |
309 | old = 1; | 318 | old = 1; |
310 | } else { | 319 | } else { |
311 | if (le32_to_cpu(ent[0].seq) - le32_to_cpu(ent[1].seq) == 1) | 320 | if ((log_seq(log, idx0) - log_seq(log, idx1)) == 1) |
312 | old = 1; | 321 | old = 1; |
313 | else | 322 | else |
314 | old = 0; | 323 | old = 0; |
@@ -328,17 +337,18 @@ static int btt_log_read(struct arena_info *arena, u32 lane, | |||
328 | { | 337 | { |
329 | int ret; | 338 | int ret; |
330 | int old_ent, ret_ent; | 339 | int old_ent, ret_ent; |
331 | struct log_entry log[2]; | 340 | struct log_group log; |
332 | 341 | ||
333 | ret = btt_log_read_pair(arena, lane, log); | 342 | ret = btt_log_group_read(arena, lane, &log); |
334 | if (ret) | 343 | if (ret) |
335 | return -EIO; | 344 | return -EIO; |
336 | 345 | ||
337 | old_ent = btt_log_get_old(log); | 346 | old_ent = btt_log_get_old(arena, &log); |
338 | if (old_ent < 0 || old_ent > 1) { | 347 | if (old_ent < 0 || old_ent > 1) { |
339 | dev_err(to_dev(arena), | 348 | dev_err(to_dev(arena), |
340 | "log corruption (%d): lane %d seq [%d, %d]\n", | 349 | "log corruption (%d): lane %d seq [%d, %d]\n", |
341 | old_ent, lane, log[0].seq, log[1].seq); | 350 | old_ent, lane, log.ent[arena->log_index[0]].seq, |
351 | log.ent[arena->log_index[1]].seq); | ||
342 | /* TODO set error state? */ | 352 | /* TODO set error state? */ |
343 | return -EIO; | 353 | return -EIO; |
344 | } | 354 | } |
@@ -346,7 +356,7 @@ static int btt_log_read(struct arena_info *arena, u32 lane, | |||
346 | ret_ent = (old_flag ? old_ent : (1 - old_ent)); | 356 | ret_ent = (old_flag ? old_ent : (1 - old_ent)); |
347 | 357 | ||
348 | if (ent != NULL) | 358 | if (ent != NULL) |
349 | memcpy(ent, &log[ret_ent], LOG_ENT_SIZE); | 359 | memcpy(ent, &log.ent[arena->log_index[ret_ent]], LOG_ENT_SIZE); |
350 | 360 | ||
351 | return ret_ent; | 361 | return ret_ent; |
352 | } | 362 | } |
@@ -360,17 +370,13 @@ static int __btt_log_write(struct arena_info *arena, u32 lane, | |||
360 | u32 sub, struct log_entry *ent, unsigned long flags) | 370 | u32 sub, struct log_entry *ent, unsigned long flags) |
361 | { | 371 | { |
362 | int ret; | 372 | int ret; |
363 | /* | 373 | u32 group_slot = arena->log_index[sub]; |
364 | * Ignore the padding in log_entry for calculating log_half. | 374 | unsigned int log_half = LOG_ENT_SIZE / 2; |
365 | * The entry is 'committed' when we write the sequence number, | ||
366 | * and we want to ensure that that is the last thing written. | ||
367 | * We don't bother writing the padding as that would be extra | ||
368 | * media wear and write amplification | ||
369 | */ | ||
370 | unsigned int log_half = (LOG_ENT_SIZE - 2 * sizeof(u64)) / 2; | ||
371 | u64 ns_off = arena->logoff + (((2 * lane) + sub) * LOG_ENT_SIZE); | ||
372 | void *src = ent; | 375 | void *src = ent; |
376 | u64 ns_off; | ||
373 | 377 | ||
378 | ns_off = arena->logoff + (lane * LOG_GRP_SIZE) + | ||
379 | (group_slot * LOG_ENT_SIZE); | ||
374 | /* split the 16B write into atomic, durable halves */ | 380 | /* split the 16B write into atomic, durable halves */ |
375 | ret = arena_write_bytes(arena, ns_off, src, log_half, flags); | 381 | ret = arena_write_bytes(arena, ns_off, src, log_half, flags); |
376 | if (ret) | 382 | if (ret) |
@@ -453,7 +459,7 @@ static int btt_log_init(struct arena_info *arena) | |||
453 | { | 459 | { |
454 | size_t logsize = arena->info2off - arena->logoff; | 460 | size_t logsize = arena->info2off - arena->logoff; |
455 | size_t chunk_size = SZ_4K, offset = 0; | 461 | size_t chunk_size = SZ_4K, offset = 0; |
456 | struct log_entry log; | 462 | struct log_entry ent; |
457 | void *zerobuf; | 463 | void *zerobuf; |
458 | int ret; | 464 | int ret; |
459 | u32 i; | 465 | u32 i; |
@@ -485,11 +491,11 @@ static int btt_log_init(struct arena_info *arena) | |||
485 | } | 491 | } |
486 | 492 | ||
487 | for (i = 0; i < arena->nfree; i++) { | 493 | for (i = 0; i < arena->nfree; i++) { |
488 | log.lba = cpu_to_le32(i); | 494 | ent.lba = cpu_to_le32(i); |
489 | log.old_map = cpu_to_le32(arena->external_nlba + i); | 495 | ent.old_map = cpu_to_le32(arena->external_nlba + i); |
490 | log.new_map = cpu_to_le32(arena->external_nlba + i); | 496 | ent.new_map = cpu_to_le32(arena->external_nlba + i); |
491 | log.seq = cpu_to_le32(LOG_SEQ_INIT); | 497 | ent.seq = cpu_to_le32(LOG_SEQ_INIT); |
492 | ret = __btt_log_write(arena, i, 0, &log, 0); | 498 | ret = __btt_log_write(arena, i, 0, &ent, 0); |
493 | if (ret) | 499 | if (ret) |
494 | goto free; | 500 | goto free; |
495 | } | 501 | } |
@@ -594,6 +600,123 @@ static int btt_freelist_init(struct arena_info *arena) | |||
594 | return 0; | 600 | return 0; |
595 | } | 601 | } |
596 | 602 | ||
603 | static bool ent_is_padding(struct log_entry *ent) | ||
604 | { | ||
605 | return (ent->lba == 0) && (ent->old_map == 0) && (ent->new_map == 0) | ||
606 | && (ent->seq == 0); | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * Detecting valid log indices: We read a log group (see the comments in btt.h | ||
611 | * for a description of a 'log_group' and its 'slots'), and iterate over its | ||
612 | * four slots. We expect that a padding slot will be all-zeroes, and use this | ||
613 | * to detect a padding slot vs. an actual entry. | ||
614 | * | ||
615 | * If a log_group is in the initial state, i.e. hasn't been used since the | ||
616 | * creation of this BTT layout, it will have three of the four slots with | ||
617 | * zeroes. We skip over these log_groups for the detection of log_index. If | ||
618 | * all log_groups are in the initial state (i.e. the BTT has never been | ||
619 | * written to), it is safe to assume the 'new format' of log entries in slots | ||
620 | * (0, 1). | ||
621 | */ | ||
622 | static int log_set_indices(struct arena_info *arena) | ||
623 | { | ||
624 | bool idx_set = false, initial_state = true; | ||
625 | int ret, log_index[2] = {-1, -1}; | ||
626 | u32 i, j, next_idx = 0; | ||
627 | struct log_group log; | ||
628 | u32 pad_count = 0; | ||
629 | |||
630 | for (i = 0; i < arena->nfree; i++) { | ||
631 | ret = btt_log_group_read(arena, i, &log); | ||
632 | if (ret < 0) | ||
633 | return ret; | ||
634 | |||
635 | for (j = 0; j < 4; j++) { | ||
636 | if (!idx_set) { | ||
637 | if (ent_is_padding(&log.ent[j])) { | ||
638 | pad_count++; | ||
639 | continue; | ||
640 | } else { | ||
641 | /* Skip if index has been recorded */ | ||
642 | if ((next_idx == 1) && | ||
643 | (j == log_index[0])) | ||
644 | continue; | ||
645 | /* valid entry, record index */ | ||
646 | log_index[next_idx] = j; | ||
647 | next_idx++; | ||
648 | } | ||
649 | if (next_idx == 2) { | ||
650 | /* two valid entries found */ | ||
651 | idx_set = true; | ||
652 | } else if (next_idx > 2) { | ||
653 | /* too many valid indices */ | ||
654 | return -ENXIO; | ||
655 | } | ||
656 | } else { | ||
657 | /* | ||
658 | * once the indices have been set, just verify | ||
659 | * that all subsequent log groups are either in | ||
660 | * their initial state or follow the same | ||
661 | * indices. | ||
662 | */ | ||
663 | if (j == log_index[0]) { | ||
664 | /* entry must be 'valid' */ | ||
665 | if (ent_is_padding(&log.ent[j])) | ||
666 | return -ENXIO; | ||
667 | } else if (j == log_index[1]) { | ||
668 | ; | ||
669 | /* | ||
670 | * log_index[1] can be padding if the | ||
671 | * lane never got used and it is still | ||
672 | * in the initial state (three 'padding' | ||
673 | * entries) | ||
674 | */ | ||
675 | } else { | ||
676 | /* entry must be invalid (padding) */ | ||
677 | if (!ent_is_padding(&log.ent[j])) | ||
678 | return -ENXIO; | ||
679 | } | ||
680 | } | ||
681 | } | ||
682 | /* | ||
683 | * If any of the log_groups have more than one valid, | ||
684 | * non-padding entry, then the we are no longer in the | ||
685 | * initial_state | ||
686 | */ | ||
687 | if (pad_count < 3) | ||
688 | initial_state = false; | ||
689 | pad_count = 0; | ||
690 | } | ||
691 | |||
692 | if (!initial_state && !idx_set) | ||
693 | return -ENXIO; | ||
694 | |||
695 | /* | ||
696 | * If all the entries in the log were in the initial state, | ||
697 | * assume new padding scheme | ||
698 | */ | ||
699 | if (initial_state) | ||
700 | log_index[1] = 1; | ||
701 | |||
702 | /* | ||
703 | * Only allow the known permutations of log/padding indices, | ||
704 | * i.e. (0, 1), and (0, 2) | ||
705 | */ | ||
706 | if ((log_index[0] == 0) && ((log_index[1] == 1) || (log_index[1] == 2))) | ||
707 | ; /* known index possibilities */ | ||
708 | else { | ||
709 | dev_err(to_dev(arena), "Found an unknown padding scheme\n"); | ||
710 | return -ENXIO; | ||
711 | } | ||
712 | |||
713 | arena->log_index[0] = log_index[0]; | ||
714 | arena->log_index[1] = log_index[1]; | ||
715 | dev_dbg(to_dev(arena), "log_index_0 = %d\n", log_index[0]); | ||
716 | dev_dbg(to_dev(arena), "log_index_1 = %d\n", log_index[1]); | ||
717 | return 0; | ||
718 | } | ||
719 | |||
597 | static int btt_rtt_init(struct arena_info *arena) | 720 | static int btt_rtt_init(struct arena_info *arena) |
598 | { | 721 | { |
599 | arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); | 722 | arena->rtt = kcalloc(arena->nfree, sizeof(u32), GFP_KERNEL); |
@@ -650,8 +773,7 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, | |||
650 | available -= 2 * BTT_PG_SIZE; | 773 | available -= 2 * BTT_PG_SIZE; |
651 | 774 | ||
652 | /* The log takes a fixed amount of space based on nfree */ | 775 | /* The log takes a fixed amount of space based on nfree */ |
653 | logsize = roundup(2 * arena->nfree * sizeof(struct log_entry), | 776 | logsize = roundup(arena->nfree * LOG_GRP_SIZE, BTT_PG_SIZE); |
654 | BTT_PG_SIZE); | ||
655 | available -= logsize; | 777 | available -= logsize; |
656 | 778 | ||
657 | /* Calculate optimal split between map and data area */ | 779 | /* Calculate optimal split between map and data area */ |
@@ -668,6 +790,10 @@ static struct arena_info *alloc_arena(struct btt *btt, size_t size, | |||
668 | arena->mapoff = arena->dataoff + datasize; | 790 | arena->mapoff = arena->dataoff + datasize; |
669 | arena->logoff = arena->mapoff + mapsize; | 791 | arena->logoff = arena->mapoff + mapsize; |
670 | arena->info2off = arena->logoff + logsize; | 792 | arena->info2off = arena->logoff + logsize; |
793 | |||
794 | /* Default log indices are (0,1) */ | ||
795 | arena->log_index[0] = 0; | ||
796 | arena->log_index[1] = 1; | ||
671 | return arena; | 797 | return arena; |
672 | } | 798 | } |
673 | 799 | ||
@@ -758,6 +884,13 @@ static int discover_arenas(struct btt *btt) | |||
758 | arena->external_lba_start = cur_nlba; | 884 | arena->external_lba_start = cur_nlba; |
759 | parse_arena_meta(arena, super, cur_off); | 885 | parse_arena_meta(arena, super, cur_off); |
760 | 886 | ||
887 | ret = log_set_indices(arena); | ||
888 | if (ret) { | ||
889 | dev_err(to_dev(arena), | ||
890 | "Unable to deduce log/padding indices\n"); | ||
891 | goto out; | ||
892 | } | ||
893 | |||
761 | mutex_init(&arena->err_lock); | 894 | mutex_init(&arena->err_lock); |
762 | ret = btt_freelist_init(arena); | 895 | ret = btt_freelist_init(arena); |
763 | if (ret) | 896 | if (ret) |
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 884fbbbdd18a..db3cb6d4d0d4 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h | |||
@@ -27,6 +27,7 @@ | |||
27 | #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) | 27 | #define MAP_ERR_MASK (1 << MAP_ERR_SHIFT) |
28 | #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) | 28 | #define MAP_LBA_MASK (~((1 << MAP_TRIM_SHIFT) | (1 << MAP_ERR_SHIFT))) |
29 | #define MAP_ENT_NORMAL 0xC0000000 | 29 | #define MAP_ENT_NORMAL 0xC0000000 |
30 | #define LOG_GRP_SIZE sizeof(struct log_group) | ||
30 | #define LOG_ENT_SIZE sizeof(struct log_entry) | 31 | #define LOG_ENT_SIZE sizeof(struct log_entry) |
31 | #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ | 32 | #define ARENA_MIN_SIZE (1UL << 24) /* 16 MB */ |
32 | #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ | 33 | #define ARENA_MAX_SIZE (1ULL << 39) /* 512 GB */ |
@@ -50,12 +51,52 @@ enum btt_init_state { | |||
50 | INIT_READY | 51 | INIT_READY |
51 | }; | 52 | }; |
52 | 53 | ||
54 | /* | ||
55 | * A log group represents one log 'lane', and consists of four log entries. | ||
56 | * Two of the four entries are valid entries, and the remaining two are | ||
57 | * padding. Due to an old bug in the padding location, we need to perform a | ||
58 | * test to determine the padding scheme being used, and use that scheme | ||
59 | * thereafter. | ||
60 | * | ||
61 | * In kernels prior to 4.15, 'log group' would have actual log entries at | ||
62 | * indices (0, 2) and padding at indices (1, 3), where as the correct/updated | ||
63 | * format has log entries at indices (0, 1) and padding at indices (2, 3). | ||
64 | * | ||
65 | * Old (pre 4.15) format: | ||
66 | * +-----------------+-----------------+ | ||
67 | * | ent[0] | ent[1] | | ||
68 | * | 16B | 16B | | ||
69 | * | lba/old/new/seq | pad | | ||
70 | * +-----------------------------------+ | ||
71 | * | ent[2] | ent[3] | | ||
72 | * | 16B | 16B | | ||
73 | * | lba/old/new/seq | pad | | ||
74 | * +-----------------+-----------------+ | ||
75 | * | ||
76 | * New format: | ||
77 | * +-----------------+-----------------+ | ||
78 | * | ent[0] | ent[1] | | ||
79 | * | 16B | 16B | | ||
80 | * | lba/old/new/seq | lba/old/new/seq | | ||
81 | * +-----------------------------------+ | ||
82 | * | ent[2] | ent[3] | | ||
83 | * | 16B | 16B | | ||
84 | * | pad | pad | | ||
85 | * +-----------------+-----------------+ | ||
86 | * | ||
87 | * We detect during start-up which format is in use, and set | ||
88 | * arena->log_index[(0, 1)] with the detected format. | ||
89 | */ | ||
90 | |||
53 | struct log_entry { | 91 | struct log_entry { |
54 | __le32 lba; | 92 | __le32 lba; |
55 | __le32 old_map; | 93 | __le32 old_map; |
56 | __le32 new_map; | 94 | __le32 new_map; |
57 | __le32 seq; | 95 | __le32 seq; |
58 | __le64 padding[2]; | 96 | }; |
97 | |||
98 | struct log_group { | ||
99 | struct log_entry ent[4]; | ||
59 | }; | 100 | }; |
60 | 101 | ||
61 | struct btt_sb { | 102 | struct btt_sb { |
@@ -126,6 +167,7 @@ struct aligned_lock { | |||
126 | * @debugfs_dir: Debugfs dentry | 167 | * @debugfs_dir: Debugfs dentry |
127 | * @flags: Arena flags - may signify error states. | 168 | * @flags: Arena flags - may signify error states. |
128 | * @err_lock: Mutex for synchronizing error clearing. | 169 | * @err_lock: Mutex for synchronizing error clearing. |
170 | * @log_index: Indices of the valid log entries in a log_group | ||
129 | * | 171 | * |
130 | * arena_info is a per-arena handle. Once an arena is narrowed down for an | 172 | * arena_info is a per-arena handle. Once an arena is narrowed down for an |
131 | * IO, this struct is passed around for the duration of the IO. | 173 | * IO, this struct is passed around for the duration of the IO. |
@@ -158,6 +200,7 @@ struct arena_info { | |||
158 | /* Arena flags */ | 200 | /* Arena flags */ |
159 | u32 flags; | 201 | u32 flags; |
160 | struct mutex err_lock; | 202 | struct mutex err_lock; |
203 | int log_index[2]; | ||
161 | }; | 204 | }; |
162 | 205 | ||
163 | /** | 206 | /** |