aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/mtd/ubi/wl.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/mtd/ubi/wl.c')
-rw-r--r--drivers/mtd/ubi/wl.c489
1 files changed, 215 insertions, 274 deletions
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index dcb6dac1dc54..14901cb82c18 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -22,7 +22,7 @@
22 * UBI wear-leveling sub-system. 22 * UBI wear-leveling sub-system.
23 * 23 *
24 * This sub-system is responsible for wear-leveling. It works in terms of 24 * This sub-system is responsible for wear-leveling. It works in terms of
25 * physical* eraseblocks and erase counters and knows nothing about logical 25 * physical eraseblocks and erase counters and knows nothing about logical
26 * eraseblocks, volumes, etc. From this sub-system's perspective all physical 26 * eraseblocks, volumes, etc. From this sub-system's perspective all physical
27 * eraseblocks are of two types - used and free. Used physical eraseblocks are 27 * eraseblocks are of two types - used and free. Used physical eraseblocks are
28 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical 28 * those that were "get" by the 'ubi_wl_get_peb()' function, and free physical
@@ -55,8 +55,39 @@
55 * 55 *
56 * As it was said, for the UBI sub-system all physical eraseblocks are either 56 * As it was said, for the UBI sub-system all physical eraseblocks are either
57 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while 57 * "free" or "used". Free eraseblock are kept in the @wl->free RB-tree, while
58 * used eraseblocks are kept in a set of different RB-trees: @wl->used, 58 * used eraseblocks are kept in @wl->used or @wl->scrub RB-trees, or
59 * @wl->prot.pnum, @wl->prot.aec, and @wl->scrub. 59 * (temporarily) in the @wl->pq queue.
60 *
61 * When the WL sub-system returns a physical eraseblock, the physical
62 * eraseblock is protected from being moved for some "time". For this reason,
63 * the physical eraseblock is not directly moved from the @wl->free tree to the
64 * @wl->used tree. There is a protection queue in between where this
65 * physical eraseblock is temporarily stored (@wl->pq).
66 *
67 * All this protection stuff is needed because:
68 * o we don't want to move physical eraseblocks just after we have given them
69 * to the user; instead, we first want to let users fill them up with data;
70 *
71 * o there is a chance that the user will put the physical eraseblock very
72 * soon, so it makes sense not to move it for some time, but wait; this is
73 * especially important in case of "short term" physical eraseblocks.
74 *
75 * Physical eraseblocks stay protected only for limited time. But the "time" is
76 * measured in erase cycles in this case. This is implemented with help of the
77 * protection queue. Eraseblocks are put to the tail of this queue when they
78 * are returned by the 'ubi_wl_get_peb()', and eraseblocks are removed from the
79 * head of the queue on each erase operation (for any eraseblock). So the
80 * length of the queue defines how may (global) erase cycles PEBs are protected.
81 *
82 * To put it differently, each physical eraseblock has 2 main states: free and
83 * used. The former state corresponds to the @wl->free tree. The latter state
84 * is split up on several sub-states:
85 * o the WL movement is allowed (@wl->used tree);
86 * o the WL movement is temporarily prohibited (@wl->pq queue);
87 * o scrubbing is needed (@wl->scrub tree).
88 *
89 * Depending on the sub-state, wear-leveling entries of the used physical
90 * eraseblocks may be kept in one of those structures.
60 * 91 *
61 * Note, in this implementation, we keep a small in-RAM object for each physical 92 * Note, in this implementation, we keep a small in-RAM object for each physical
62 * eraseblock. This is surely not a scalable solution. But it appears to be good 93 * eraseblock. This is surely not a scalable solution. But it appears to be good
@@ -70,9 +101,6 @@
70 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we 101 * target PEB, we pick a PEB with the highest EC if our PEB is "old" and we
71 * pick target PEB with an average EC if our PEB is not very "old". This is a 102 * pick target PEB with an average EC if our PEB is not very "old". This is a
72 * room for future re-works of the WL sub-system. 103 * room for future re-works of the WL sub-system.
73 *
74 * Note: the stuff with protection trees looks too complex and is difficult to
75 * understand. Should be fixed.
76 */ 104 */
77 105
78#include <linux/slab.h> 106#include <linux/slab.h>
@@ -85,14 +113,6 @@
85#define WL_RESERVED_PEBS 1 113#define WL_RESERVED_PEBS 1
86 114
87/* 115/*
88 * How many erase cycles are short term, unknown, and long term physical
89 * eraseblocks protected.
90 */
91#define ST_PROTECTION 16
92#define U_PROTECTION 10
93#define LT_PROTECTION 4
94
95/*
96 * Maximum difference between two erase counters. If this threshold is 116 * Maximum difference between two erase counters. If this threshold is
97 * exceeded, the WL sub-system starts moving data from used physical 117 * exceeded, the WL sub-system starts moving data from used physical
98 * eraseblocks with low erase counter to free physical eraseblocks with high 118 * eraseblocks with low erase counter to free physical eraseblocks with high
@@ -120,64 +140,9 @@
120#define WL_MAX_FAILURES 32 140#define WL_MAX_FAILURES 32
121 141
122/** 142/**
123 * struct ubi_wl_prot_entry - PEB protection entry.
124 * @rb_pnum: link in the @wl->prot.pnum RB-tree
125 * @rb_aec: link in the @wl->prot.aec RB-tree
126 * @abs_ec: the absolute erase counter value when the protection ends
127 * @e: the wear-leveling entry of the physical eraseblock under protection
128 *
129 * When the WL sub-system returns a physical eraseblock, the physical
130 * eraseblock is protected from being moved for some "time". For this reason,
131 * the physical eraseblock is not directly moved from the @wl->free tree to the
132 * @wl->used tree. There is one more tree in between where this physical
133 * eraseblock is temporarily stored (@wl->prot).
134 *
135 * All this protection stuff is needed because:
136 * o we don't want to move physical eraseblocks just after we have given them
137 * to the user; instead, we first want to let users fill them up with data;
138 *
139 * o there is a chance that the user will put the physical eraseblock very
140 * soon, so it makes sense not to move it for some time, but wait; this is
141 * especially important in case of "short term" physical eraseblocks.
142 *
143 * Physical eraseblocks stay protected only for limited time. But the "time" is
144 * measured in erase cycles in this case. This is implemented with help of the
145 * absolute erase counter (@wl->abs_ec). When it reaches certain value, the
146 * physical eraseblocks are moved from the protection trees (@wl->prot.*) to
147 * the @wl->used tree.
148 *
149 * Protected physical eraseblocks are searched by physical eraseblock number
150 * (when they are put) and by the absolute erase counter (to check if it is
151 * time to move them to the @wl->used tree). So there are actually 2 RB-trees
152 * storing the protected physical eraseblocks: @wl->prot.pnum and
153 * @wl->prot.aec. They are referred to as the "protection" trees. The
154 * first one is indexed by the physical eraseblock number. The second one is
155 * indexed by the absolute erase counter. Both trees store
156 * &struct ubi_wl_prot_entry objects.
157 *
158 * Each physical eraseblock has 2 main states: free and used. The former state
159 * corresponds to the @wl->free tree. The latter state is split up on several
160 * sub-states:
161 * o the WL movement is allowed (@wl->used tree);
162 * o the WL movement is temporarily prohibited (@wl->prot.pnum and
163 * @wl->prot.aec trees);
164 * o scrubbing is needed (@wl->scrub tree).
165 *
166 * Depending on the sub-state, wear-leveling entries of the used physical
167 * eraseblocks may be kept in one of those trees.
168 */
169struct ubi_wl_prot_entry {
170 struct rb_node rb_pnum;
171 struct rb_node rb_aec;
172 unsigned long long abs_ec;
173 struct ubi_wl_entry *e;
174};
175
176/**
177 * struct ubi_work - UBI work description data structure. 143 * struct ubi_work - UBI work description data structure.
178 * @list: a link in the list of pending works 144 * @list: a link in the list of pending works
179 * @func: worker function 145 * @func: worker function
180 * @priv: private data of the worker function
181 * @e: physical eraseblock to erase 146 * @e: physical eraseblock to erase
182 * @torture: if the physical eraseblock has to be tortured 147 * @torture: if the physical eraseblock has to be tortured
183 * 148 *
@@ -198,9 +163,11 @@ struct ubi_work {
198static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec); 163static int paranoid_check_ec(struct ubi_device *ubi, int pnum, int ec);
199static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e, 164static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
200 struct rb_root *root); 165 struct rb_root *root);
166static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e);
201#else 167#else
202#define paranoid_check_ec(ubi, pnum, ec) 0 168#define paranoid_check_ec(ubi, pnum, ec) 0
203#define paranoid_check_in_wl_tree(e, root) 169#define paranoid_check_in_wl_tree(e, root)
170#define paranoid_check_in_pq(ubi, e) 0
204#endif 171#endif
205 172
206/** 173/**
@@ -220,7 +187,7 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
220 struct ubi_wl_entry *e1; 187 struct ubi_wl_entry *e1;
221 188
222 parent = *p; 189 parent = *p;
223 e1 = rb_entry(parent, struct ubi_wl_entry, rb); 190 e1 = rb_entry(parent, struct ubi_wl_entry, u.rb);
224 191
225 if (e->ec < e1->ec) 192 if (e->ec < e1->ec)
226 p = &(*p)->rb_left; 193 p = &(*p)->rb_left;
@@ -235,8 +202,8 @@ static void wl_tree_add(struct ubi_wl_entry *e, struct rb_root *root)
235 } 202 }
236 } 203 }
237 204
238 rb_link_node(&e->rb, parent, p); 205 rb_link_node(&e->u.rb, parent, p);
239 rb_insert_color(&e->rb, root); 206 rb_insert_color(&e->u.rb, root);
240} 207}
241 208
242/** 209/**
@@ -331,7 +298,7 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
331 while (p) { 298 while (p) {
332 struct ubi_wl_entry *e1; 299 struct ubi_wl_entry *e1;
333 300
334 e1 = rb_entry(p, struct ubi_wl_entry, rb); 301 e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
335 302
336 if (e->pnum == e1->pnum) { 303 if (e->pnum == e1->pnum) {
337 ubi_assert(e == e1); 304 ubi_assert(e == e1);
@@ -355,50 +322,24 @@ static int in_wl_tree(struct ubi_wl_entry *e, struct rb_root *root)
355} 322}
356 323
357/** 324/**
358 * prot_tree_add - add physical eraseblock to protection trees. 325 * prot_queue_add - add physical eraseblock to the protection queue.
359 * @ubi: UBI device description object 326 * @ubi: UBI device description object
360 * @e: the physical eraseblock to add 327 * @e: the physical eraseblock to add
361 * @pe: protection entry object to use
362 * @abs_ec: absolute erase counter value when this physical eraseblock has
363 * to be removed from the protection trees.
364 * 328 *
365 * @wl->lock has to be locked. 329 * This function adds @e to the tail of the protection queue @ubi->pq, where
330 * @e will stay for %UBI_PROT_QUEUE_LEN erase operations and will be
331 * temporarily protected from the wear-leveling worker. Note, @wl->lock has to
332 * be locked.
366 */ 333 */
367static void prot_tree_add(struct ubi_device *ubi, struct ubi_wl_entry *e, 334static void prot_queue_add(struct ubi_device *ubi, struct ubi_wl_entry *e)
368 struct ubi_wl_prot_entry *pe, int abs_ec)
369{ 335{
370 struct rb_node **p, *parent = NULL; 336 int pq_tail = ubi->pq_head - 1;
371 struct ubi_wl_prot_entry *pe1;
372
373 pe->e = e;
374 pe->abs_ec = ubi->abs_ec + abs_ec;
375
376 p = &ubi->prot.pnum.rb_node;
377 while (*p) {
378 parent = *p;
379 pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_pnum);
380
381 if (e->pnum < pe1->e->pnum)
382 p = &(*p)->rb_left;
383 else
384 p = &(*p)->rb_right;
385 }
386 rb_link_node(&pe->rb_pnum, parent, p);
387 rb_insert_color(&pe->rb_pnum, &ubi->prot.pnum);
388
389 p = &ubi->prot.aec.rb_node;
390 parent = NULL;
391 while (*p) {
392 parent = *p;
393 pe1 = rb_entry(parent, struct ubi_wl_prot_entry, rb_aec);
394 337
395 if (pe->abs_ec < pe1->abs_ec) 338 if (pq_tail < 0)
396 p = &(*p)->rb_left; 339 pq_tail = UBI_PROT_QUEUE_LEN - 1;
397 else 340 ubi_assert(pq_tail >= 0 && pq_tail < UBI_PROT_QUEUE_LEN);
398 p = &(*p)->rb_right; 341 list_add_tail(&e->u.list, &ubi->pq[pq_tail]);
399 } 342 dbg_wl("added PEB %d EC %d to the protection queue", e->pnum, e->ec);
400 rb_link_node(&pe->rb_aec, parent, p);
401 rb_insert_color(&pe->rb_aec, &ubi->prot.aec);
402} 343}
403 344
404/** 345/**
@@ -414,14 +355,14 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
414 struct rb_node *p; 355 struct rb_node *p;
415 struct ubi_wl_entry *e; 356 struct ubi_wl_entry *e;
416 357
417 e = rb_entry(rb_first(root), struct ubi_wl_entry, rb); 358 e = rb_entry(rb_first(root), struct ubi_wl_entry, u.rb);
418 max += e->ec; 359 max += e->ec;
419 360
420 p = root->rb_node; 361 p = root->rb_node;
421 while (p) { 362 while (p) {
422 struct ubi_wl_entry *e1; 363 struct ubi_wl_entry *e1;
423 364
424 e1 = rb_entry(p, struct ubi_wl_entry, rb); 365 e1 = rb_entry(p, struct ubi_wl_entry, u.rb);
425 if (e1->ec >= max) 366 if (e1->ec >= max)
426 p = p->rb_left; 367 p = p->rb_left;
427 else { 368 else {
@@ -443,17 +384,12 @@ static struct ubi_wl_entry *find_wl_entry(struct rb_root *root, int max)
443 */ 384 */
444int ubi_wl_get_peb(struct ubi_device *ubi, int dtype) 385int ubi_wl_get_peb(struct ubi_device *ubi, int dtype)
445{ 386{
446 int err, protect, medium_ec; 387 int err, medium_ec;
447 struct ubi_wl_entry *e, *first, *last; 388 struct ubi_wl_entry *e, *first, *last;
448 struct ubi_wl_prot_entry *pe;
449 389
450 ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM || 390 ubi_assert(dtype == UBI_LONGTERM || dtype == UBI_SHORTTERM ||
451 dtype == UBI_UNKNOWN); 391 dtype == UBI_UNKNOWN);
452 392
453 pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
454 if (!pe)
455 return -ENOMEM;
456
457retry: 393retry:
458 spin_lock(&ubi->wl_lock); 394 spin_lock(&ubi->wl_lock);
459 if (!ubi->free.rb_node) { 395 if (!ubi->free.rb_node) {
@@ -461,16 +397,13 @@ retry:
461 ubi_assert(list_empty(&ubi->works)); 397 ubi_assert(list_empty(&ubi->works));
462 ubi_err("no free eraseblocks"); 398 ubi_err("no free eraseblocks");
463 spin_unlock(&ubi->wl_lock); 399 spin_unlock(&ubi->wl_lock);
464 kfree(pe);
465 return -ENOSPC; 400 return -ENOSPC;
466 } 401 }
467 spin_unlock(&ubi->wl_lock); 402 spin_unlock(&ubi->wl_lock);
468 403
469 err = produce_free_peb(ubi); 404 err = produce_free_peb(ubi);
470 if (err < 0) { 405 if (err < 0)
471 kfree(pe);
472 return err; 406 return err;
473 }
474 goto retry; 407 goto retry;
475 } 408 }
476 409
@@ -483,7 +416,6 @@ retry:
483 * %WL_FREE_MAX_DIFF. 416 * %WL_FREE_MAX_DIFF.
484 */ 417 */
485 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 418 e = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
486 protect = LT_PROTECTION;
487 break; 419 break;
488 case UBI_UNKNOWN: 420 case UBI_UNKNOWN:
489 /* 421 /*
@@ -492,81 +424,63 @@ retry:
492 * eraseblock with erase counter greater or equivalent than the 424 * eraseblock with erase counter greater or equivalent than the
493 * lowest erase counter plus %WL_FREE_MAX_DIFF. 425 * lowest erase counter plus %WL_FREE_MAX_DIFF.
494 */ 426 */
495 first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb); 427 first = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry,
496 last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, rb); 428 u.rb);
429 last = rb_entry(rb_last(&ubi->free), struct ubi_wl_entry, u.rb);
497 430
498 if (last->ec - first->ec < WL_FREE_MAX_DIFF) 431 if (last->ec - first->ec < WL_FREE_MAX_DIFF)
499 e = rb_entry(ubi->free.rb_node, 432 e = rb_entry(ubi->free.rb_node,
500 struct ubi_wl_entry, rb); 433 struct ubi_wl_entry, u.rb);
501 else { 434 else {
502 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2; 435 medium_ec = (first->ec + WL_FREE_MAX_DIFF)/2;
503 e = find_wl_entry(&ubi->free, medium_ec); 436 e = find_wl_entry(&ubi->free, medium_ec);
504 } 437 }
505 protect = U_PROTECTION;
506 break; 438 break;
507 case UBI_SHORTTERM: 439 case UBI_SHORTTERM:
508 /* 440 /*
509 * For short term data we pick a physical eraseblock with the 441 * For short term data we pick a physical eraseblock with the
510 * lowest erase counter as we expect it will be erased soon. 442 * lowest erase counter as we expect it will be erased soon.
511 */ 443 */
512 e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, rb); 444 e = rb_entry(rb_first(&ubi->free), struct ubi_wl_entry, u.rb);
513 protect = ST_PROTECTION;
514 break; 445 break;
515 default: 446 default:
516 protect = 0;
517 e = NULL;
518 BUG(); 447 BUG();
519 } 448 }
520 449
450 paranoid_check_in_wl_tree(e, &ubi->free);
451
521 /* 452 /*
522 * Move the physical eraseblock to the protection trees where it will 453 * Move the physical eraseblock to the protection queue where it will
523 * be protected from being moved for some time. 454 * be protected from being moved for some time.
524 */ 455 */
525 paranoid_check_in_wl_tree(e, &ubi->free); 456 rb_erase(&e->u.rb, &ubi->free);
526 rb_erase(&e->rb, &ubi->free); 457 dbg_wl("PEB %d EC %d", e->pnum, e->ec);
527 prot_tree_add(ubi, e, pe, protect); 458 prot_queue_add(ubi, e);
528
529 dbg_wl("PEB %d EC %d, protection %d", e->pnum, e->ec, protect);
530 spin_unlock(&ubi->wl_lock); 459 spin_unlock(&ubi->wl_lock);
531
532 return e->pnum; 460 return e->pnum;
533} 461}
534 462
535/** 463/**
536 * prot_tree_del - remove a physical eraseblock from the protection trees 464 * prot_queue_del - remove a physical eraseblock from the protection queue.
537 * @ubi: UBI device description object 465 * @ubi: UBI device description object
538 * @pnum: the physical eraseblock to remove 466 * @pnum: the physical eraseblock to remove
539 * 467 *
540 * This function returns PEB @pnum from the protection trees and returns zero 468 * This function deletes PEB @pnum from the protection queue and returns zero
541 * in case of success and %-ENODEV if the PEB was not found in the protection 469 * in case of success and %-ENODEV if the PEB was not found.
542 * trees.
543 */ 470 */
544static int prot_tree_del(struct ubi_device *ubi, int pnum) 471static int prot_queue_del(struct ubi_device *ubi, int pnum)
545{ 472{
546 struct rb_node *p; 473 struct ubi_wl_entry *e;
547 struct ubi_wl_prot_entry *pe = NULL;
548
549 p = ubi->prot.pnum.rb_node;
550 while (p) {
551
552 pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
553
554 if (pnum == pe->e->pnum)
555 goto found;
556 474
557 if (pnum < pe->e->pnum) 475 e = ubi->lookuptbl[pnum];
558 p = p->rb_left; 476 if (!e)
559 else 477 return -ENODEV;
560 p = p->rb_right;
561 }
562 478
563 return -ENODEV; 479 if (paranoid_check_in_pq(ubi, e))
480 return -ENODEV;
564 481
565found: 482 list_del(&e->u.list);
566 ubi_assert(pe->e->pnum == pnum); 483 dbg_wl("deleted PEB %d from the protection queue", e->pnum);
567 rb_erase(&pe->rb_aec, &ubi->prot.aec);
568 rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
569 kfree(pe);
570 return 0; 484 return 0;
571} 485}
572 486
@@ -632,47 +546,47 @@ out_free:
632} 546}
633 547
634/** 548/**
635 * check_protection_over - check if it is time to stop protecting some PEBs. 549 * serve_prot_queue - check if it is time to stop protecting PEBs.
636 * @ubi: UBI device description object 550 * @ubi: UBI device description object
637 * 551 *
638 * This function is called after each erase operation, when the absolute erase 552 * This function is called after each erase operation and removes PEBs from the
639 * counter is incremented, to check if some physical eraseblock have not to be 553 * tail of the protection queue. These PEBs have been protected for long enough
640 * protected any longer. These physical eraseblocks are moved from the 554 * and should be moved to the used tree.
641 * protection trees to the used tree.
642 */ 555 */
643static void check_protection_over(struct ubi_device *ubi) 556static void serve_prot_queue(struct ubi_device *ubi)
644{ 557{
645 struct ubi_wl_prot_entry *pe; 558 struct ubi_wl_entry *e, *tmp;
559 int count;
646 560
647 /* 561 /*
648 * There may be several protected physical eraseblock to remove, 562 * There may be several protected physical eraseblock to remove,
649 * process them all. 563 * process them all.
650 */ 564 */
651 while (1) { 565repeat:
652 spin_lock(&ubi->wl_lock); 566 count = 0;
653 if (!ubi->prot.aec.rb_node) { 567 spin_lock(&ubi->wl_lock);
654 spin_unlock(&ubi->wl_lock); 568 list_for_each_entry_safe(e, tmp, &ubi->pq[ubi->pq_head], u.list) {
655 break; 569 dbg_wl("PEB %d EC %d protection over, move to used tree",
656 } 570 e->pnum, e->ec);
657
658 pe = rb_entry(rb_first(&ubi->prot.aec),
659 struct ubi_wl_prot_entry, rb_aec);
660 571
661 if (pe->abs_ec > ubi->abs_ec) { 572 list_del(&e->u.list);
573 wl_tree_add(e, &ubi->used);
574 if (count++ > 32) {
575 /*
576 * Let's be nice and avoid holding the spinlock for
577 * too long.
578 */
662 spin_unlock(&ubi->wl_lock); 579 spin_unlock(&ubi->wl_lock);
663 break; 580 cond_resched();
581 goto repeat;
664 } 582 }
665
666 dbg_wl("PEB %d protection over, abs_ec %llu, PEB abs_ec %llu",
667 pe->e->pnum, ubi->abs_ec, pe->abs_ec);
668 rb_erase(&pe->rb_aec, &ubi->prot.aec);
669 rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
670 wl_tree_add(pe->e, &ubi->used);
671 spin_unlock(&ubi->wl_lock);
672
673 kfree(pe);
674 cond_resched();
675 } 583 }
584
585 ubi->pq_head += 1;
586 if (ubi->pq_head == UBI_PROT_QUEUE_LEN)
587 ubi->pq_head = 0;
588 ubi_assert(ubi->pq_head >= 0 && ubi->pq_head < UBI_PROT_QUEUE_LEN);
589 spin_unlock(&ubi->wl_lock);
676} 590}
677 591
678/** 592/**
@@ -680,8 +594,8 @@ static void check_protection_over(struct ubi_device *ubi)
680 * @ubi: UBI device description object 594 * @ubi: UBI device description object
681 * @wrk: the work to schedule 595 * @wrk: the work to schedule
682 * 596 *
683 * This function enqueues a work defined by @wrk to the tail of the pending 597 * This function adds a work defined by @wrk to the tail of the pending works
684 * works list. 598 * list.
685 */ 599 */
686static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk) 600static void schedule_ubi_work(struct ubi_device *ubi, struct ubi_work *wrk)
687{ 601{
@@ -739,13 +653,11 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
739static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, 653static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
740 int cancel) 654 int cancel)
741{ 655{
742 int err, put = 0, scrubbing = 0, protect = 0; 656 int err, scrubbing = 0, torture = 0;
743 struct ubi_wl_prot_entry *uninitialized_var(pe);
744 struct ubi_wl_entry *e1, *e2; 657 struct ubi_wl_entry *e1, *e2;
745 struct ubi_vid_hdr *vid_hdr; 658 struct ubi_vid_hdr *vid_hdr;
746 659
747 kfree(wrk); 660 kfree(wrk);
748
749 if (cancel) 661 if (cancel)
750 return 0; 662 return 0;
751 663
@@ -781,7 +693,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
781 * highly worn-out free physical eraseblock. If the erase 693 * highly worn-out free physical eraseblock. If the erase
782 * counters differ much enough, start wear-leveling. 694 * counters differ much enough, start wear-leveling.
783 */ 695 */
784 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); 696 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
785 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 697 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
786 698
787 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { 699 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
@@ -790,21 +702,21 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
790 goto out_cancel; 702 goto out_cancel;
791 } 703 }
792 paranoid_check_in_wl_tree(e1, &ubi->used); 704 paranoid_check_in_wl_tree(e1, &ubi->used);
793 rb_erase(&e1->rb, &ubi->used); 705 rb_erase(&e1->u.rb, &ubi->used);
794 dbg_wl("move PEB %d EC %d to PEB %d EC %d", 706 dbg_wl("move PEB %d EC %d to PEB %d EC %d",
795 e1->pnum, e1->ec, e2->pnum, e2->ec); 707 e1->pnum, e1->ec, e2->pnum, e2->ec);
796 } else { 708 } else {
797 /* Perform scrubbing */ 709 /* Perform scrubbing */
798 scrubbing = 1; 710 scrubbing = 1;
799 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); 711 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, u.rb);
800 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 712 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
801 paranoid_check_in_wl_tree(e1, &ubi->scrub); 713 paranoid_check_in_wl_tree(e1, &ubi->scrub);
802 rb_erase(&e1->rb, &ubi->scrub); 714 rb_erase(&e1->u.rb, &ubi->scrub);
803 dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum); 715 dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
804 } 716 }
805 717
806 paranoid_check_in_wl_tree(e2, &ubi->free); 718 paranoid_check_in_wl_tree(e2, &ubi->free);
807 rb_erase(&e2->rb, &ubi->free); 719 rb_erase(&e2->u.rb, &ubi->free);
808 ubi->move_from = e1; 720 ubi->move_from = e1;
809 ubi->move_to = e2; 721 ubi->move_to = e2;
810 spin_unlock(&ubi->wl_lock); 722 spin_unlock(&ubi->wl_lock);
@@ -844,46 +756,67 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
844 756
845 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); 757 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
846 if (err) { 758 if (err) {
847 759 if (err == -EAGAIN)
760 goto out_not_moved;
848 if (err < 0) 761 if (err < 0)
849 goto out_error; 762 goto out_error;
850 if (err == 1) 763 if (err == 2) {
764 /* Target PEB write error, torture it */
765 torture = 1;
851 goto out_not_moved; 766 goto out_not_moved;
767 }
852 768
853 /* 769 /*
854 * For some reason the LEB was not moved - it might be because 770 * The LEB has not been moved because the volume is being
855 * the volume is being deleted. We should prevent this PEB from 771 * deleted or the PEB has been put meanwhile. We should prevent
856 * being selected for wear-levelling movement for some "time", 772 * this PEB from being selected for wear-leveling movement
857 * so put it to the protection tree. 773 * again, so put it to the protection queue.
858 */ 774 */
859 775
860 dbg_wl("cancelled moving PEB %d", e1->pnum); 776 dbg_wl("canceled moving PEB %d", e1->pnum);
861 pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS); 777 ubi_assert(err == 1);
862 if (!pe) { 778
863 err = -ENOMEM; 779 ubi_free_vid_hdr(ubi, vid_hdr);
864 goto out_error; 780 vid_hdr = NULL;
865 } 781
782 spin_lock(&ubi->wl_lock);
783 prot_queue_add(ubi, e1);
784 ubi_assert(!ubi->move_to_put);
785 ubi->move_from = ubi->move_to = NULL;
786 ubi->wl_scheduled = 0;
787 spin_unlock(&ubi->wl_lock);
866 788
867 protect = 1; 789 e1 = NULL;
790 err = schedule_erase(ubi, e2, 0);
791 if (err)
792 goto out_error;
793 mutex_unlock(&ubi->move_mutex);
794 return 0;
868 } 795 }
869 796
797 /* The PEB has been successfully moved */
870 ubi_free_vid_hdr(ubi, vid_hdr); 798 ubi_free_vid_hdr(ubi, vid_hdr);
871 if (scrubbing && !protect) 799 vid_hdr = NULL;
800 if (scrubbing)
872 ubi_msg("scrubbed PEB %d, data moved to PEB %d", 801 ubi_msg("scrubbed PEB %d, data moved to PEB %d",
873 e1->pnum, e2->pnum); 802 e1->pnum, e2->pnum);
874 803
875 spin_lock(&ubi->wl_lock); 804 spin_lock(&ubi->wl_lock);
876 if (protect) 805 if (!ubi->move_to_put) {
877 prot_tree_add(ubi, e1, pe, protect);
878 if (!ubi->move_to_put)
879 wl_tree_add(e2, &ubi->used); 806 wl_tree_add(e2, &ubi->used);
880 else 807 e2 = NULL;
881 put = 1; 808 }
882 ubi->move_from = ubi->move_to = NULL; 809 ubi->move_from = ubi->move_to = NULL;
883 ubi->move_to_put = ubi->wl_scheduled = 0; 810 ubi->move_to_put = ubi->wl_scheduled = 0;
884 spin_unlock(&ubi->wl_lock); 811 spin_unlock(&ubi->wl_lock);
885 812
886 if (put) { 813 err = schedule_erase(ubi, e1, 0);
814 if (err) {
815 e1 = NULL;
816 goto out_error;
817 }
818
819 if (e2) {
887 /* 820 /*
888 * Well, the target PEB was put meanwhile, schedule it for 821 * Well, the target PEB was put meanwhile, schedule it for
889 * erasure. 822 * erasure.
@@ -894,13 +827,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
894 goto out_error; 827 goto out_error;
895 } 828 }
896 829
897 if (!protect) {
898 err = schedule_erase(ubi, e1, 0);
899 if (err)
900 goto out_error;
901 }
902
903
904 dbg_wl("done"); 830 dbg_wl("done");
905 mutex_unlock(&ubi->move_mutex); 831 mutex_unlock(&ubi->move_mutex);
906 return 0; 832 return 0;
@@ -908,20 +834,24 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
908 /* 834 /*
909 * For some reasons the LEB was not moved, might be an error, might be 835 * For some reasons the LEB was not moved, might be an error, might be
910 * something else. @e1 was not changed, so return it back. @e2 might 836 * something else. @e1 was not changed, so return it back. @e2 might
911 * be changed, schedule it for erasure. 837 * have been changed, schedule it for erasure.
912 */ 838 */
913out_not_moved: 839out_not_moved:
840 dbg_wl("canceled moving PEB %d", e1->pnum);
914 ubi_free_vid_hdr(ubi, vid_hdr); 841 ubi_free_vid_hdr(ubi, vid_hdr);
842 vid_hdr = NULL;
915 spin_lock(&ubi->wl_lock); 843 spin_lock(&ubi->wl_lock);
916 if (scrubbing) 844 if (scrubbing)
917 wl_tree_add(e1, &ubi->scrub); 845 wl_tree_add(e1, &ubi->scrub);
918 else 846 else
919 wl_tree_add(e1, &ubi->used); 847 wl_tree_add(e1, &ubi->used);
848 ubi_assert(!ubi->move_to_put);
920 ubi->move_from = ubi->move_to = NULL; 849 ubi->move_from = ubi->move_to = NULL;
921 ubi->move_to_put = ubi->wl_scheduled = 0; 850 ubi->wl_scheduled = 0;
922 spin_unlock(&ubi->wl_lock); 851 spin_unlock(&ubi->wl_lock);
923 852
924 err = schedule_erase(ubi, e2, 0); 853 e1 = NULL;
854 err = schedule_erase(ubi, e2, torture);
925 if (err) 855 if (err)
926 goto out_error; 856 goto out_error;
927 857
@@ -938,8 +868,10 @@ out_error:
938 ubi->move_to_put = ubi->wl_scheduled = 0; 868 ubi->move_to_put = ubi->wl_scheduled = 0;
939 spin_unlock(&ubi->wl_lock); 869 spin_unlock(&ubi->wl_lock);
940 870
941 kmem_cache_free(ubi_wl_entry_slab, e1); 871 if (e1)
942 kmem_cache_free(ubi_wl_entry_slab, e2); 872 kmem_cache_free(ubi_wl_entry_slab, e1);
873 if (e2)
874 kmem_cache_free(ubi_wl_entry_slab, e2);
943 ubi_ro_mode(ubi); 875 ubi_ro_mode(ubi);
944 876
945 mutex_unlock(&ubi->move_mutex); 877 mutex_unlock(&ubi->move_mutex);
@@ -988,7 +920,7 @@ static int ensure_wear_leveling(struct ubi_device *ubi)
988 * erase counter of free physical eraseblocks is greater then 920 * erase counter of free physical eraseblocks is greater then
989 * %UBI_WL_THRESHOLD. 921 * %UBI_WL_THRESHOLD.
990 */ 922 */
991 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, rb); 923 e1 = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
992 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 924 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
993 925
994 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) 926 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
@@ -1050,7 +982,6 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
1050 kfree(wl_wrk); 982 kfree(wl_wrk);
1051 983
1052 spin_lock(&ubi->wl_lock); 984 spin_lock(&ubi->wl_lock);
1053 ubi->abs_ec += 1;
1054 wl_tree_add(e, &ubi->free); 985 wl_tree_add(e, &ubi->free);
1055 spin_unlock(&ubi->wl_lock); 986 spin_unlock(&ubi->wl_lock);
1056 987
@@ -1058,7 +989,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
1058 * One more erase operation has happened, take care about 989 * One more erase operation has happened, take care about
1059 * protected physical eraseblocks. 990 * protected physical eraseblocks.
1060 */ 991 */
1061 check_protection_over(ubi); 992 serve_prot_queue(ubi);
1062 993
1063 /* And take care about wear-leveling */ 994 /* And take care about wear-leveling */
1064 err = ensure_wear_leveling(ubi); 995 err = ensure_wear_leveling(ubi);
@@ -1190,12 +1121,12 @@ retry:
1190 } else { 1121 } else {
1191 if (in_wl_tree(e, &ubi->used)) { 1122 if (in_wl_tree(e, &ubi->used)) {
1192 paranoid_check_in_wl_tree(e, &ubi->used); 1123 paranoid_check_in_wl_tree(e, &ubi->used);
1193 rb_erase(&e->rb, &ubi->used); 1124 rb_erase(&e->u.rb, &ubi->used);
1194 } else if (in_wl_tree(e, &ubi->scrub)) { 1125 } else if (in_wl_tree(e, &ubi->scrub)) {
1195 paranoid_check_in_wl_tree(e, &ubi->scrub); 1126 paranoid_check_in_wl_tree(e, &ubi->scrub);
1196 rb_erase(&e->rb, &ubi->scrub); 1127 rb_erase(&e->u.rb, &ubi->scrub);
1197 } else { 1128 } else {
1198 err = prot_tree_del(ubi, e->pnum); 1129 err = prot_queue_del(ubi, e->pnum);
1199 if (err) { 1130 if (err) {
1200 ubi_err("PEB %d not found", pnum); 1131 ubi_err("PEB %d not found", pnum);
1201 ubi_ro_mode(ubi); 1132 ubi_ro_mode(ubi);
@@ -1255,11 +1186,11 @@ retry:
1255 1186
1256 if (in_wl_tree(e, &ubi->used)) { 1187 if (in_wl_tree(e, &ubi->used)) {
1257 paranoid_check_in_wl_tree(e, &ubi->used); 1188 paranoid_check_in_wl_tree(e, &ubi->used);
1258 rb_erase(&e->rb, &ubi->used); 1189 rb_erase(&e->u.rb, &ubi->used);
1259 } else { 1190 } else {
1260 int err; 1191 int err;
1261 1192
1262 err = prot_tree_del(ubi, e->pnum); 1193 err = prot_queue_del(ubi, e->pnum);
1263 if (err) { 1194 if (err) {
1264 ubi_err("PEB %d not found", pnum); 1195 ubi_err("PEB %d not found", pnum);
1265 ubi_ro_mode(ubi); 1196 ubi_ro_mode(ubi);
@@ -1290,7 +1221,7 @@ int ubi_wl_flush(struct ubi_device *ubi)
1290 int err; 1221 int err;
1291 1222
1292 /* 1223 /*
1293 * Erase while the pending works queue is not empty, but not more then 1224 * Erase while the pending works queue is not empty, but not more than
1294 * the number of currently pending works. 1225 * the number of currently pending works.
1295 */ 1226 */
1296 dbg_wl("flush (%d pending works)", ubi->works_count); 1227 dbg_wl("flush (%d pending works)", ubi->works_count);
@@ -1308,7 +1239,7 @@ int ubi_wl_flush(struct ubi_device *ubi)
1308 up_write(&ubi->work_sem); 1239 up_write(&ubi->work_sem);
1309 1240
1310 /* 1241 /*
1311 * And in case last was the WL worker and it cancelled the LEB 1242 * And in case last was the WL worker and it canceled the LEB
1312 * movement, flush again. 1243 * movement, flush again.
1313 */ 1244 */
1314 while (ubi->works_count) { 1245 while (ubi->works_count) {
@@ -1337,11 +1268,11 @@ static void tree_destroy(struct rb_root *root)
1337 else if (rb->rb_right) 1268 else if (rb->rb_right)
1338 rb = rb->rb_right; 1269 rb = rb->rb_right;
1339 else { 1270 else {
1340 e = rb_entry(rb, struct ubi_wl_entry, rb); 1271 e = rb_entry(rb, struct ubi_wl_entry, u.rb);
1341 1272
1342 rb = rb_parent(rb); 1273 rb = rb_parent(rb);
1343 if (rb) { 1274 if (rb) {
1344 if (rb->rb_left == &e->rb) 1275 if (rb->rb_left == &e->u.rb)
1345 rb->rb_left = NULL; 1276 rb->rb_left = NULL;
1346 else 1277 else
1347 rb->rb_right = NULL; 1278 rb->rb_right = NULL;
@@ -1436,15 +1367,13 @@ static void cancel_pending(struct ubi_device *ubi)
1436 */ 1367 */
1437int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si) 1368int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1438{ 1369{
1439 int err; 1370 int err, i;
1440 struct rb_node *rb1, *rb2; 1371 struct rb_node *rb1, *rb2;
1441 struct ubi_scan_volume *sv; 1372 struct ubi_scan_volume *sv;
1442 struct ubi_scan_leb *seb, *tmp; 1373 struct ubi_scan_leb *seb, *tmp;
1443 struct ubi_wl_entry *e; 1374 struct ubi_wl_entry *e;
1444 1375
1445
1446 ubi->used = ubi->free = ubi->scrub = RB_ROOT; 1376 ubi->used = ubi->free = ubi->scrub = RB_ROOT;
1447 ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
1448 spin_lock_init(&ubi->wl_lock); 1377 spin_lock_init(&ubi->wl_lock);
1449 mutex_init(&ubi->move_mutex); 1378 mutex_init(&ubi->move_mutex);
1450 init_rwsem(&ubi->work_sem); 1379 init_rwsem(&ubi->work_sem);
@@ -1458,6 +1387,10 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1458 if (!ubi->lookuptbl) 1387 if (!ubi->lookuptbl)
1459 return err; 1388 return err;
1460 1389
1390 for (i = 0; i < UBI_PROT_QUEUE_LEN; i++)
1391 INIT_LIST_HEAD(&ubi->pq[i]);
1392 ubi->pq_head = 0;
1393
1461 list_for_each_entry_safe(seb, tmp, &si->erase, u.list) { 1394 list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
1462 cond_resched(); 1395 cond_resched();
1463 1396
@@ -1552,33 +1485,18 @@ out_free:
1552} 1485}
1553 1486
1554/** 1487/**
1555 * protection_trees_destroy - destroy the protection RB-trees. 1488 * protection_queue_destroy - destroy the protection queue.
1556 * @ubi: UBI device description object 1489 * @ubi: UBI device description object
1557 */ 1490 */
1558static void protection_trees_destroy(struct ubi_device *ubi) 1491static void protection_queue_destroy(struct ubi_device *ubi)
1559{ 1492{
1560 struct rb_node *rb; 1493 int i;
1561 struct ubi_wl_prot_entry *pe; 1494 struct ubi_wl_entry *e, *tmp;
1562
1563 rb = ubi->prot.aec.rb_node;
1564 while (rb) {
1565 if (rb->rb_left)
1566 rb = rb->rb_left;
1567 else if (rb->rb_right)
1568 rb = rb->rb_right;
1569 else {
1570 pe = rb_entry(rb, struct ubi_wl_prot_entry, rb_aec);
1571
1572 rb = rb_parent(rb);
1573 if (rb) {
1574 if (rb->rb_left == &pe->rb_aec)
1575 rb->rb_left = NULL;
1576 else
1577 rb->rb_right = NULL;
1578 }
1579 1495
1580 kmem_cache_free(ubi_wl_entry_slab, pe->e); 1496 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i) {
1581 kfree(pe); 1497 list_for_each_entry_safe(e, tmp, &ubi->pq[i], u.list) {
1498 list_del(&e->u.list);
1499 kmem_cache_free(ubi_wl_entry_slab, e);
1582 } 1500 }
1583 } 1501 }
1584} 1502}
@@ -1591,7 +1509,7 @@ void ubi_wl_close(struct ubi_device *ubi)
1591{ 1509{
1592 dbg_wl("close the WL sub-system"); 1510 dbg_wl("close the WL sub-system");
1593 cancel_pending(ubi); 1511 cancel_pending(ubi);
1594 protection_trees_destroy(ubi); 1512 protection_queue_destroy(ubi);
1595 tree_destroy(&ubi->used); 1513 tree_destroy(&ubi->used);
1596 tree_destroy(&ubi->free); 1514 tree_destroy(&ubi->free);
1597 tree_destroy(&ubi->scrub); 1515 tree_destroy(&ubi->scrub);
@@ -1661,4 +1579,27 @@ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
1661 return 1; 1579 return 1;
1662} 1580}
1663 1581
1582/**
1583 * paranoid_check_in_pq - check if wear-leveling entry is in the protection
1584 * queue.
1585 * @ubi: UBI device description object
1586 * @e: the wear-leveling entry to check
1587 *
1588 * This function returns zero if @e is in @ubi->pq and %1 if it is not.
1589 */
1590static int paranoid_check_in_pq(struct ubi_device *ubi, struct ubi_wl_entry *e)
1591{
1592 struct ubi_wl_entry *p;
1593 int i;
1594
1595 for (i = 0; i < UBI_PROT_QUEUE_LEN; ++i)
1596 list_for_each_entry(p, &ubi->pq[i], u.list)
1597 if (p == e)
1598 return 0;
1599
1600 ubi_err("paranoid check failed for PEB %d, EC %d, Protect queue",
1601 e->pnum, e->ec);
1602 ubi_dbg_dump_stack();
1603 return 1;
1604}
1664#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */ 1605#endif /* CONFIG_MTD_UBI_DEBUG_PARANOID */