aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/mtd/ubi/wl.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/mtd/ubi/wl.c')
-rw-r--r--drivers/mtd/ubi/wl.c208
1 files changed, 102 insertions, 106 deletions
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a471a491f0ab..05d70937b543 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -19,22 +19,22 @@
19 */ 19 */
20 20
21/* 21/*
22 * UBI wear-leveling unit. 22 * UBI wear-leveling sub-system.
23 * 23 *
24 * This unit is responsible for wear-leveling. It works in terms of physical 24 * This sub-system is responsible for wear-leveling. It works in terms of
25 * eraseblocks and erase counters and knows nothing about logical eraseblocks, 25 * physical* eraseblocks and erase counters and knows nothing about logical
26 * volumes, etc. From this unit's perspective all physical eraseblocks are of 26 * eraseblocks, volumes, etc. From this sub-system's perspective all physical
27 * two types - used and free. Used physical eraseblocks are those that were 27 * eraseblocks are of two types - used and free. Used physical eraseblocks are
28 * "get" by the 'ubi_wl_get_peb()' function, and free physical eraseblocks are 28 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
29 * those that were put by the 'ubi_wl_put_peb()' function. 29 * eraseblocks are those that were put by the 'ubi_wl_put_peb()' function.
30 * 30 *
31 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter 31 * Physical eraseblocks returned by 'ubi_wl_get_peb()' have only erase counter
32 * header. The rest of the physical eraseblock contains only 0xFF bytes. 32 * header. The rest of the physical eraseblock contains only %0xFF bytes.
33 * 33 *
34 * When physical eraseblocks are returned to the WL unit by means of the 34 * When physical eraseblocks are returned to the WL sub-system by means of the
35 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is 35 * 'ubi_wl_put_peb()' function, they are scheduled for erasure. The erasure is
36 * done asynchronously in context of the per-UBI device background thread, 36 * done asynchronously in context of the per-UBI device background thread,
37 * which is also managed by the WL unit. 37 * which is also managed by the WL sub-system.
38 * 38 *
39 * The wear-leveling is ensured by means of moving the contents of used 39 * The wear-leveling is ensured by means of moving the contents of used
40 * physical eraseblocks with low erase counter to free physical eraseblocks 40 * physical eraseblocks with low erase counter to free physical eraseblocks
@@ -43,34 +43,36 @@
43 * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick 43 * The 'ubi_wl_get_peb()' function accepts data type hints which help to pick
44 * an "optimal" physical eraseblock. For example, when it is known that the 44 * an "optimal" physical eraseblock. For example, when it is known that the
45 * physical eraseblock will be "put" soon because it contains short-term data, 45 * physical eraseblock will be "put" soon because it contains short-term data,
46 * the WL unit may pick a free physical eraseblock with low erase counter, and 46 * the WL sub-system may pick a free physical eraseblock with low erase
47 * so forth. 47 * counter, and so forth.
48 * 48 *
49 * If the WL unit fails to erase a physical eraseblock, it marks it as bad. 49 * If the WL sub-system fails to erase a physical eraseblock, it marks it as
50 * bad.
50 * 51 *
51 * This unit is also responsible for scrubbing. If a bit-flip is detected in a 52 * This sub-system is also responsible for scrubbing. If a bit-flip is detected
52 * physical eraseblock, it has to be moved. Technically this is the same as 53 * in a physical eraseblock, it has to be moved. Technically this is the same
53 * moving it for wear-leveling reasons. 54 * as moving it for wear-leveling reasons.
54 * 55 *
55 * As it was said, for the UBI unit all physical eraseblocks are either "free" 56 * As it was said, for the UBI sub-system all physical eraseblocks are either
56 * or "used". Free eraseblock are kept in the @wl->free RB-tree, while used 57 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
57 * eraseblocks are kept in a set of different RB-trees: @wl->used, 58 * used eraseblocks are kept in a set of different RB-trees: @wl->used,
58 * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. 59 * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub.
59 * 60 *
60 * Note, in this implementation, we keep a small in-RAM object for each physical 61 * Note, in this implementation, we keep a small in-RAM object for each physical
61 * eraseblock. This is surely not a scalable solution. But it appears to be good 62 * eraseblock. This is surely not a scalable solution. But it appears to be good
62 * enough for moderately large flashes and it is simple. In future, one may 63 * enough for moderately large flashes and it is simple. In future, one may
63 * re-work this unit and make it more scalable. 64 * re-work this sub-system and make it more scalable.
64 * 65 *
65 * At the moment this unit does not utilize the sequence number, which was 66 * At the moment this sub-system does not utilize the sequence number, which
66 * introduced relatively recently. But it would be wise to do this because the 67 * was introduced relatively recently. But it would be wise to do this because
67 * sequence number of a logical eraseblock characterizes how old is it. For 68 * the sequence number of a logical eraseblock characterizes how old is it. For
68 * example, when we move a PEB with low erase counter, and we need to pick the 69 * example, when we move a PEB with low erase counter, and we need to pick the
69 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we 70 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
70 * pick target PEB with an average EC if our PEB is not very "old". This is a 71 * pick target PEB with an average EC if our PEB is not very "old". This is a
71 * room for future re-works of the WL unit. 72 * room for future re-works of the WL sub-system.
72 * 73 *
73 * FIXME: looks too complex, should be simplified (later). 74 * Note: the stuff with protection trees looks too complex and is difficult to
75 * understand. Should be fixed.
74 */ 76 */
75 77
76#include <linux/slab.h> 78#include <linux/slab.h>
@@ -92,20 +94,21 @@
92 94
93/* 95/*
94 * Maximum difference between two erase counters. If this threshold is 96 * Maximum difference between two erase counters. If this threshold is
95 * exceeded, the WL unit starts moving data from used physical eraseblocks with 97 * exceeded, the WL sub-system starts moving data from used physical
96 * low erase counter to free physical eraseblocks with high erase counter. 98 * eraseblocks with low erase counter to free physical eraseblocks with high
99 * erase counter.
97 */ 100 */
98#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD 101#define UBI_WL_THRESHOLD CONFIG_MTD_UBI_WL_THRESHOLD
99 102
100/* 103/*
101 * When a physical eraseblock is moved, the WL unit has to pick the target 104 * When a physical eraseblock is moved, the WL sub-system has to pick the target
102 * physical eraseblock to move to. The simplest way would be just to pick the 105 * physical eraseblock to move to. The simplest way would be just to pick the
103 * one with the highest erase counter. But in certain workloads this could lead 106 * one with the highest erase counter. But in certain workloads this could lead
104 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a 107 * to an unlimited wear of one or few physical eraseblock. Indeed, imagine a
105 * situation when the picked physical eraseblock is constantly erased after the 108 * situation when the picked physical eraseblock is constantly erased after the
106 * data is written to it. So, we have a constant which limits the highest erase 109 * data is written to it. So, we have a constant which limits the highest erase
107 * counter of the free physical eraseblock to pick. Namely, the WL unit does 110 * counter of the free physical eraseblock to pick. Namely, the WL sub-system
108 * not pick eraseblocks with erase counter greater then the lowest erase 111 * does not pick eraseblocks with erase counter greater then the lowest erase
109 * counter plus %WL_FREE_MAX_DIFF. 112 * counter plus %WL_FREE_MAX_DIFF.
110 */ 113 */
111#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD) 114#define WL_FREE_MAX_DIFF (2*UBI_WL_THRESHOLD)
@@ -123,11 +126,11 @@
123 * @abs_ec: the absolute erase counter value when the protection ends 126 * @abs_ec: the absolute erase counter value when the protection ends
124 * @e: the wear-leveling entry of the physical eraseblock under protection 127 * @e: the wear-leveling entry of the physical eraseblock under protection
125 * 128 *
126 * When the WL unit returns a physical eraseblock, the physical eraseblock is 129 * When the WL sub-system returns a physical eraseblock, the physical
127 * protected from being moved for some "time". For this reason, the physical 130 * eraseblock is protected from being moved for some "time". For this reason,
128 * eraseblock is not directly moved from the @wl->free tree to the @wl->used 131 * the physical eraseblock is not directly moved from the @wl->free tree to the
129 * tree. There is one more tree in between where this physical eraseblock is 132 * @wl->used tree. There is one more tree in between where this physical
130 * temporarily stored (@wl->prot). 133 * eraseblock is temporarily stored (@wl->prot).
131 * 134 *
132 * All this protection stuff is needed because: 135 * All this protection stuff is needed because:
133 * o we don't want to move physical eraseblocks just after we have given them 136 * o we don't want to move physical eraseblocks just after we have given them
@@ -175,7 +178,6 @@ struct ubi_wl_prot_entry {
175 * @list: a link in the list of pending works 178 * @list: a link in the list of pending works
176 * @func: worker function 179 * @func: worker function
177 * @priv: private data of the worker function 180 * @priv: private data of the worker function
178 *
179 * @e: physical eraseblock to erase 181 * @e: physical eraseblock to erase
180 * @torture: if the physical eraseblock has to be tortured 182 * @torture: if the physical eraseblock has to be tortured
181 * 183 *
@@ -473,52 +475,47 @@ retry:
473 } 475 }
474 476
475 switch (dtype) { 477 switch (dtype) {
476 case UBI_LONGTERM: 478 case UBI_LONGTERM:
477 /* 479 /*
478 * For long term data we pick a physical eraseblock 480 * For long term data we pick a physical eraseblock with high
479 * with high erase counter. But the highest erase 481 * erase counter. But the highest erase counter we can pick is
480 * counter we can pick is bounded by the the lowest 482 * bounded by the the lowest erase counter plus
481 * erase counter plus %WL_FREE_MAX_DIFF. 483 * %WL_FREE_MAX_DIFF.
482 */ 484 */
483 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 485 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
484 protect = LT_PROTECTION; 486 protect = LT_PROTECTION;
485 break; 487 break;
486 case UBI_UNKNOWN: 488 case UBI_UNKNOWN:
487 /* 489 /*
488 * For unknown data we pick a physical eraseblock with 490 * For unknown data we pick a physical eraseblock with medium
489 * medium erase counter. But we by no means can pick a 491 * erase counter. But we by no means can pick a physical
490 * physical eraseblock with erase counter greater or 492 * eraseblock with erase counter greater or equivalent than the
491 * equivalent than the lowest erase counter plus 493 * lowest erase counter plus %WL_FREE_MAX_DIFF.
492 * %WL_FREE_MAX_DIFF. 494 */
493 */ 495 first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
494 first = rb_entry(rb_first(&ubi->free), 496 last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb);
495 struct ubi_wl_entry, rb);
496 last = rb_entry(rb_last(&ubi->free),
497 struct ubi_wl_entry, rb);
498 497
499 if (last->ec - first->ec < WL_FREE_MAX_DIFF) 498 if (last->ec - first->ec < WL_FREE_MAX_DIFF)
500 e = rb_entry(ubi->free.rb_node, 499 e = rb_entry(ubi->free.rb_node,
501 struct ubi_wl_entry, rb); 500 struct ubi_wl_entry, rb);
502 else { 501 else {
503 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; 502 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
504 e = find_wl_entry(&ubi->free, medium_ec); 503 e = find_wl_entry(&ubi->free, medium_ec);
505 } 504 }
506 protect = U_PROTECTION; 505 protect = U_PROTECTION;
507 break; 506 break;
508 case UBI_SHORTTERM: 507 case UBI_SHORTTERM:
509 /* 508 /*
510 * For short term data we pick a physical eraseblock 509 * For short term data we pick a physical eraseblock with the
511 * with the lowest erase counter as we expect it will 510 * lowest erase counter as we expect it will be erased soon.
512 * be erased soon. 511 */
513 */ 512 e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb);
514 e = rb_entry(rb_first(&ubi->free), 513 protect = ST_PROTECTION;
515 struct ubi_wl_entry, rb); 514 break;
516 protect = ST_PROTECTION; 515 default:
517 break; 516 protect = 0;
518 default: 517 e = NULL;
519 protect = 0; 518 BUG();
520 e = NULL;
521 BUG();
522 } 519 }
523 520
524 /* 521 /*
@@ -582,7 +579,8 @@ found:
582 * This function returns zero in case of success and a negative error code in 579 * This function returns zero in case of success and a negative error code in
583 * case of failure. 580 * case of failure.
584 */ 581 */
585static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e, int torture) 582static int sync_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
583 int torture)
586{ 584{
587 int err; 585 int err;
588 struct ubi_ec_hdr *ec_hdr; 586 struct ubi_ec_hdr *ec_hdr;
@@ -634,8 +632,7 @@ out_free:
634} 632}
635 633
636/** 634/**
637 * check_protection_over - check if it is time to stop protecting some 635 * check_protection_over - check if it is time to stop protecting some PEBs.
638 * physical eraseblocks.
639 * @ubi: UBI device description object 636 * @ubi: UBI device description object
640 * 637 *
641 * This function is called after each erase operation, when the absolute erase 638 * This function is called after each erase operation, when the absolute erase
@@ -871,6 +868,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
871 } 868 }
872 869
873 ubi_free_vid_hdr(ubi, vid_hdr); 870 ubi_free_vid_hdr(ubi, vid_hdr);
871 if (scrubbing && !protect)
872 ubi_msg("scrubbed PEB %d, data moved to PEB %d",
873 e1->pnum, e2->pnum);
874
874 spin_lock(&ubi->wl_lock); 875 spin_lock(&ubi->wl_lock);
875 if (protect) 876 if (protect)
876 prot_tree_add(ubi, e1, pe, protect); 877 prot_tree_add(ubi, e1, pe, protect);
@@ -1054,8 +1055,8 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
1054 spin_unlock(&ubi->wl_lock); 1055 spin_unlock(&ubi->wl_lock);
1055 1056
1056 /* 1057 /*
1057 * One more erase operation has happened, take care about protected 1058 * One more erase operation has happened, take care about
1058 * physical eraseblocks. 1059 * protected physical eraseblocks.
1059 */ 1060 */
1060 check_protection_over(ubi); 1061 check_protection_over(ubi);
1061 1062
@@ -1136,7 +1137,7 @@ out_ro:
1136} 1137}
1137 1138
1138/** 1139/**
1139 * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit. 1140 * ubi_wl_put_peb - return a PEB to the wear-leveling sub-system.
1140 * @ubi: UBI device description object 1141 * @ubi: UBI device description object
1141 * @pnum: physical eraseblock to return 1142 * @pnum: physical eraseblock to return
1142 * @torture: if this physical eraseblock has to be tortured 1143 * @torture: if this physical eraseblock has to be tortured
@@ -1175,11 +1176,11 @@ retry:
1175 /* 1176 /*
1176 * User is putting the physical eraseblock which was selected 1177 * User is putting the physical eraseblock which was selected
1177 * as the target the data is moved to. It may happen if the EBA 1178 * as the target the data is moved to. It may happen if the EBA
1178 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but 1179 * sub-system already re-mapped the LEB in 'ubi_eba_copy_leb()'
1179 * the WL unit has not put the PEB to the "used" tree yet, but 1180 * but the WL sub-system has not put the PEB to the "used" tree
1180 * it is about to do this. So we just set a flag which will 1181 * yet, but it is about to do this. So we just set a flag which
1181 * tell the WL worker that the PEB is not needed anymore and 1182 * will tell the WL worker that the PEB is not needed anymore
1182 * should be scheduled for erasure. 1183 * and should be scheduled for erasure.
1183 */ 1184 */
1184 dbg_wl("PEB %d is the target of data moving", pnum); 1185 dbg_wl("PEB %d is the target of data moving", pnum);
1185 ubi_assert(!ubi->move_to_put); 1186 ubi_assert(!ubi->move_to_put);
@@ -1229,7 +1230,7 @@ int ubi_wl_scrub_peb(struct ubi_device *ubi, int pnum)
1229{ 1230{
1230 struct ubi_wl_entry *e; 1231 struct ubi_wl_entry *e;
1231 1232
1232 ubi_msg("schedule PEB %d for scrubbing", pnum); 1233 dbg_msg("schedule PEB %d for scrubbing", pnum);
1233 1234
1234retry: 1235retry:
1235 spin_lock(&ubi->wl_lock); 1236 spin_lock(&ubi->wl_lock);
@@ -1368,7 +1369,7 @@ int ubi_thread(void *u)
1368 int err; 1369 int err;
1369 1370
1370 if (kthread_should_stop()) 1371 if (kthread_should_stop())
1371 goto out; 1372 break;
1372 1373
1373 if (try_to_freeze()) 1374 if (try_to_freeze())
1374 continue; 1375 continue;
@@ -1403,7 +1404,6 @@ int ubi_thread(void *u)
1403 cond_resched(); 1404 cond_resched();
1404 } 1405 }
1405 1406
1406out:
1407 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name); 1407 dbg_wl("background thread \"%s\" is killed", ubi->bgt_name);
1408 return 0; 1408 return 0;
1409} 1409}
@@ -1426,8 +1426,7 @@ static void cancel_pending(struct ubi_device *ubi)
1426} 1426}
1427 1427
1428/** 1428/**
1429 * ubi_wl_init_scan - initialize the wear-leveling unit using scanning 1429 * ubi_wl_init_scan - initialize the WL sub-system using scanning information.
1430 * information.
1431 * @ubi: UBI device description object 1430 * @ubi: UBI device description object
1432 * @si: scanning information 1431 * @si: scanning information
1433 * 1432 *
@@ -1584,13 +1583,12 @@ static void protection_trees_destroy(struct ubi_device *ubi)
1584} 1583}
1585 1584
1586/** 1585/**
1587 * ubi_wl_close - close the wear-leveling unit. 1586 * ubi_wl_close - close the wear-leveling sub-system.
1588 * @ubi: UBI device description object 1587 * @ubi: UBI device description object
1589 */ 1588 */
1590void ubi_wl_close(struct ubi_device *ubi) 1589void ubi_wl_close(struct ubi_device *ubi)
1591{ 1590{
1592 dbg_wl("close the UBI wear-leveling unit"); 1591 dbg_wl("close the WL sub-system");
1593
1594 cancel_pending(ubi); 1592 cancel_pending(ubi);
1595 protection_trees_destroy(ubi); 1593 protection_trees_destroy(ubi);
1596 tree_destroy(&ubi->used); 1594 tree_destroy(&ubi->used);
@@ -1602,8 +1600,7 @@ void ubi_wl_close(struct ubi_device *ubi)
1602#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID 1600#ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
1603 1601
1604/** 1602/**
1605 * paranoid_check_ec - make sure that the erase counter of a physical eraseblock 1603 * paranoid_check_ec - make sure that the erase counter of a PEB is correct.
1606 * is correct.
1607 * @ubi: UBI device description object 1604 * @ubi: UBI device description object
1608 * @pnum: the physical eraseblock number to check 1605 * @pnum: the physical eraseblock number to check
1609 * @ec: the erase counter to check 1606 * @ec: the erase counter to check
@@ -1644,13 +1641,12 @@ out_free:
1644} 1641}
1645 1642
1646/** 1643/**
1647 * paranoid_check_in_wl_tree - make sure that a wear-leveling entry is present 1644 * paranoid_check_in_wl_tree - check that wear-leveling entry is in WL RB-tree.
1648 * in a WL RB-tree.
1649 * @e: the wear-leveling entry to check 1645 * @e: the wear-leveling entry to check
1650 * @root: the root of the tree 1646 * @root: the root of the tree
1651 * 1647 *
1652 * This function returns zero if @e is in the @root RB-tree and %1 if it 1648 * This function returns zero if @e is in the @root RB-tree and %1 if it is
1653 * is not. 1649 * not.
1654 */ 1650 */
1655static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, 1651static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
1656 struct rb_root *root) 1652 struct rb_root *root)