From 06b68ba15671f32a3aa3bbddf04b0d2dd7fbf902 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Sun, 16 Dec 2007 12:49:01 +0200
Subject: UBI: create ubi_wl_entry slab on initialization

Similarly to ltree_entry_slab, it makes more sense to create
and destroy ubi_wl_entry slab on module initialization/exit.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 58 +++++++++++++---------------------------------------
 1 file changed, 14 insertions(+), 44 deletions(-)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 6330c8cc72b5..a405d40faa23 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -116,21 +116,6 @@
  */
 #define WL_MAX_FAILURES 32
 
-/**
- * struct ubi_wl_entry - wear-leveling entry.
- * @rb: link in the corresponding RB-tree
- * @ec: erase counter
- * @pnum: physical eraseblock number
- *
- * Each physical eraseblock has a corresponding &struct wl_entry object which
- * may be kept in different RB-trees.
- */
-struct ubi_wl_entry {
-	struct rb_node rb;
-	int ec;
-	int pnum;
-};
-
 /**
  * struct ubi_wl_prot_entry - PEB protection entry.
  * @rb_pnum: link in the @wl->prot.pnum RB-tree
@@ -216,9 +201,6 @@ static int paranoid_check_in_wl_tree(struct ubi_wl_entry *e,
 #define paranoid_check_in_wl_tree(e, root)
 #endif
 
-/* Slab cache for wear-leveling entries */
-static struct kmem_cache *wl_entries_slab;
-
 /**
  * wl_tree_add - add a wear-leveling entry to a WL RB-tree.
  * @e: the wear-leveling entry to add
@@ -878,14 +860,14 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
 		err = schedule_erase(ubi, e2, 0);
 		if (err) {
-			kmem_cache_free(wl_entries_slab, e2);
+			kmem_cache_free(ubi_wl_entry_slab, e2);
 			ubi_ro_mode(ubi);
 		}
 	}
 
 	err = schedule_erase(ubi, e1, 0);
 	if (err) {
-		kmem_cache_free(wl_entries_slab, e1);
+		kmem_cache_free(ubi_wl_entry_slab, e1);
 		ubi_ro_mode(ubi);
 	}
 
@@ -920,14 +902,14 @@ error:
 		dbg_wl("PEB %d was put meanwhile, erase", e1->pnum);
 		err = schedule_erase(ubi, e1, 0);
 		if (err) {
-			kmem_cache_free(wl_entries_slab, e1);
+			kmem_cache_free(ubi_wl_entry_slab, e1);
 			ubi_ro_mode(ubi);
 		}
 	}
 
 	err = schedule_erase(ubi, e2, 0);
 	if (err) {
-		kmem_cache_free(wl_entries_slab, e2);
+		kmem_cache_free(ubi_wl_entry_slab, e2);
 		ubi_ro_mode(ubi);
 	}
 
@@ -1020,7 +1002,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
 	if (cancel) {
 		dbg_wl("cancel erasure of PEB %d EC %d", pnum, e->ec);
 		kfree(wl_wrk);
-		kmem_cache_free(wl_entries_slab, e);
+		kmem_cache_free(ubi_wl_entry_slab, e);
 		return 0;
 	}
 
@@ -1049,7 +1031,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
 
 	ubi_err("failed to erase PEB %d, error %d", pnum, err);
 	kfree(wl_wrk);
-	kmem_cache_free(wl_entries_slab, e);
+	kmem_cache_free(ubi_wl_entry_slab, e);
 
 	if (err == -EINTR || err == -ENOMEM || err == -EAGAIN ||
 	    err == -EBUSY) {
@@ -1294,7 +1276,7 @@ static void tree_destroy(struct rb_root *root)
 					rb->rb_right = NULL;
 			}
 
-			kmem_cache_free(wl_entries_slab, e);
+			kmem_cache_free(ubi_wl_entry_slab, e);
 		}
 	}
 }
@@ -1407,14 +1389,6 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 		return err;
 	}
 
-	if (ubi_devices_cnt == 0) {
-		wl_entries_slab = kmem_cache_create("ubi_wl_entry_slab",
-						    sizeof(struct ubi_wl_entry),
-						    0, 0, NULL);
-		if (!wl_entries_slab)
-			return -ENOMEM;
-	}
-
 	err = -ENOMEM;
 	ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL);
 	if (!ubi->lookuptbl)
@@ -1423,7 +1397,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
 		cond_resched();
 
-		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
 		if (!e)
 			goto out_free;
 
@@ -1431,7 +1405,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 		e->ec = seb->ec;
 		ubi->lookuptbl[e->pnum] = e;
 		if (schedule_erase(ubi, e, 0)) {
-			kmem_cache_free(wl_entries_slab, e);
+			kmem_cache_free(ubi_wl_entry_slab, e);
 			goto out_free;
 		}
 	}
@@ -1439,7 +1413,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	list_for_each_entry(seb, &si->free, u.list) {
 		cond_resched();
 
-		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
 		if (!e)
 			goto out_free;
 
@@ -1453,7 +1427,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	list_for_each_entry(seb, &si->corr, u.list) {
 		cond_resched();
 
-		e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+		e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
 		if (!e)
 			goto out_free;
 
@@ -1461,7 +1435,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 		e->ec = seb->ec;
 		ubi->lookuptbl[e->pnum] = e;
 		if (schedule_erase(ubi, e, 0)) {
-			kmem_cache_free(wl_entries_slab, e);
+			kmem_cache_free(ubi_wl_entry_slab, e);
 			goto out_free;
 		}
 	}
@@ -1470,7 +1444,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 		ubi_rb_for_each_entry(rb2, seb, &sv->root, u.rb) {
 			cond_resched();
 
-			e = kmem_cache_alloc(wl_entries_slab, GFP_KERNEL);
+			e = kmem_cache_alloc(ubi_wl_entry_slab, GFP_KERNEL);
 			if (!e)
 				goto out_free;
 
@@ -1510,8 +1484,6 @@ out_free:
 	tree_destroy(&ubi->free);
 	tree_destroy(&ubi->scrub);
 	kfree(ubi->lookuptbl);
-	if (ubi_devices_cnt == 0)
-		kmem_cache_destroy(wl_entries_slab);
 	return err;
 }
 
@@ -1541,7 +1513,7 @@ static void protection_trees_destroy(struct ubi_device *ubi)
 					rb->rb_right = NULL;
 			}
 
-			kmem_cache_free(wl_entries_slab, pe->e);
+			kmem_cache_free(ubi_wl_entry_slab, pe->e);
 			kfree(pe);
 		}
 	}
@@ -1565,8 +1537,6 @@ void ubi_wl_close(struct ubi_device *ubi)
 	tree_destroy(&ubi->free);
 	tree_destroy(&ubi->scrub);
 	kfree(ubi->lookuptbl);
-	if (ubi_devices_cnt == 1)
-		kmem_cache_destroy(wl_entries_slab);
 }
 
 #ifdef CONFIG_MTD_UBI_DEBUG_PARANOID
-- 
cgit v1.2.2


From d2c468550915ab2f16149e274a6f0da0b925a748 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 18 Dec 2007 13:17:24 +0200
Subject: UBI: improve comment

Explain better the purpose of thie 'move_to_put' stuff.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a405d40faa23..36aa097203f9 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -793,7 +793,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
 		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
 		paranoid_check_in_wl_tree(e1, &ubi->scrub);
-	rb_erase(&e1->rb, &ubi->scrub);
+		rb_erase(&e1->rb, &ubi->scrub);
 		dbg_wl("scrub PEB %d to PEB %d", e1->pnum, e2->pnum);
 	}
 
@@ -1139,8 +1139,11 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
 		/*
 		 * User is putting the physical eraseblock which was selected
 		 * as the target the data is moved to. It may happen if the EBA
-		 * unit already re-mapped the LEB but the WL unit did has not
-		 * put the PEB to the "used" tree.
+		 * unit already re-mapped the LEB in 'ubi_eba_copy_leb()' but
+		 * the WL unit has not put the PEB to the "used" tree yet, but
+		 * it is about to do this. So we just set a flag which will
+		 * tell the WL worker that the PEB is not needed anymore and
+		 * should be sheduled for erasure.
 		 */
 		dbg_wl("PEB %d is the target of data moving", pnum);
 		ubi_assert(!ubi->move_to_put);
-- 
cgit v1.2.2


From 43f9b25a9cdd7b177f77f026b1461abd1abbd174 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 18 Dec 2007 15:06:55 +0200
Subject: UBI: bugfix: protect from volume removal

When the WL worker is moving an LEB, the volume might go away
occasionally. UBI does not handle these situations correctly.

This patch introduces a new mutex which serializes wear-levelling
worker and the the 'ubi_wl_put_peb()' function. Now, if one puts
an LEB, and its PEB is being moved, it will wait on the mutex.
And because we unmap all LEBs when removing volumes, this will make
the volume remove function to wait while the LEB movement
finishes.

Below is an example of an oops which should be fixed by this patch:

Pid: 9167, comm: io_paral Not tainted (2.6.24-rc5-ubi-2.6.git #2)
EIP: 0060:[<f884a379>] EFLAGS: 00010246 CPU: 0
EIP is at prot_tree_del+0x2a/0x63 [ubi]
EAX: f39a90e0 EBX: 00000000 ECX: 00000000 EDX: 00000134
ESI: f39a90e0 EDI: f39a90e0 EBP: f2d55ddc ESP: f2d55dd4
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process io_paral (pid: 9167, ti=f2d54000 task=f72a8030 task.ti=f2d54000)
Stack: f39a95f8 ef6aae50 f2d55e08 f884a511 f88538e1 f884ecea 00000134 00000000
       f39a9604 f39a95f0 efea8280 00000000 f39a90e0 f2d55e40 f8847261 f8850c3c
       f884eaad 00000001 000000b9 00000134 00000172 000000b9 00000134 00000001
Call Trace:
 [<c0105227>] show_trace_log_lvl+0x1a/0x30
 [<c01052e2>] show_stack_log_lvl+0xa5/0xca
 [<c01053d6>] show_registers+0xcf/0x21b
 [<c0105648>] die+0x126/0x224
 [<c0119a62>] do_page_fault+0x27f/0x60d
 [<c037dd62>] error_code+0x72/0x78
 [<f884a511>] ubi_wl_put_peb+0xf0/0x191 [ubi]
 [<f8847261>] ubi_eba_unmap_leb+0xaf/0xcc [ubi]
 [<f8843c21>] ubi_remove_volume+0x102/0x1e8 [ubi]
 [<f8846077>] ubi_cdev_ioctl+0x22a/0x383 [ubi]
 [<c017d768>] do_ioctl+0x68/0x71
 [<c017d7c6>] vfs_ioctl+0x55/0x271
 [<c017da15>] sys_ioctl+0x33/0x52
 [<c0104152>] sysenter_past_esp+0x5f/0xa5
 =======================

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 219 +++++++++++++++++++++++++++++++--------------------
 1 file changed, 133 insertions(+), 86 deletions(-)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 36aa097203f9..a60f9425ab13 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -249,6 +249,8 @@ static int do_work(struct ubi_device *ubi)
 	int err;
 	struct ubi_work *wrk;
 
+	cond_resched();
+
 	spin_lock(&ubi->wl_lock);
 
 	if (list_empty(&ubi->works)) {
@@ -531,8 +533,12 @@ retry:
  * prot_tree_del - remove a physical eraseblock from the protection trees
  * @ubi: UBI device description object
  * @pnum: the physical eraseblock to remove
+ *
+ * This function returns PEB @pnum from the protection trees and returns zero
+ * in case of success and %-ENODEV if the PEB was not found in the protection
+ * trees.
  */
-static void prot_tree_del(struct ubi_device *ubi, int pnum)
+static int prot_tree_del(struct ubi_device *ubi, int pnum)
 {
 	struct rb_node *p;
 	struct ubi_wl_prot_entry *pe = NULL;
@@ -543,7 +549,7 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum)
 		pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
 
 		if (pnum == pe->e->pnum)
-			break;
+			goto found;
 
 		if (pnum < pe->e->pnum)
 			p = p->rb_left;
@@ -551,10 +557,14 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum)
 			p = p->rb_right;
 	}
 
+	return -ENODEV;
+
+found:
 	ubi_assert(pe->e->pnum == pnum);
 	rb_erase(&pe->rb_aec, &ubi->prot.aec);
 	rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
 	kfree(pe);
+	return 0;
 }
 
 /**
@@ -726,7 +736,8 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
 static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 				int cancel)
 {
-	int err, put = 0;
+	int err, put = 0, scrubbing = 0, protect = 0;
+	struct ubi_wl_prot_entry *pe;
 	struct ubi_wl_entry *e1, *e2;
 	struct ubi_vid_hdr *vid_hdr;
 
@@ -739,21 +750,17 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	if (!vid_hdr)
 		return -ENOMEM;
 
+	mutex_lock(&ubi->move_mutex);
 	spin_lock(&ubi->wl_lock);
+	ubi_assert(!ubi->move_from && !ubi->move_to);
+	ubi_assert(!ubi->move_to_put);
 
-	/*
-	 * Only one WL worker at a time is supported at this implementation, so
-	 * make sure a PEB is not being moved already.
-	 */
-	if (ubi->move_to || !ubi->free.rb_node ||
+	if (!ubi->free.rb_node ||
 	    (!ubi->used.rb_node && !ubi->scrub.rb_node)) {
 		/*
-		 * Only one WL worker at a time is supported at this
-		 * implementation, so if a LEB is already being moved, cancel.
-		 *
-		 * No free physical eraseblocks? Well, we cancel wear-leveling
-		 * then. It will be triggered again when a free physical
-		 * eraseblock appears.
+		 * No free physical eraseblocks? Well, they must be waiting in
+		 * the queue to be erased. Cancel movement - it will be
+		 * triggered again when a free physical eraseblock appears.
 		 *
 		 * No used physical eraseblocks? They must be temporarily
 		 * protected from being moved. They will be moved to the
@@ -762,10 +769,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		 */
 		dbg_wl("cancel WL, a list is empty: free %d, used %d",
 		       !ubi->free.rb_node, !ubi->used.rb_node);
-		ubi->wl_scheduled = 0;
-		spin_unlock(&ubi->wl_lock);
-		ubi_free_vid_hdr(ubi, vid_hdr);
-		return 0;
+		goto out_cancel;
 	}
 
 	if (!ubi->scrub.rb_node) {
@@ -780,16 +784,15 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
 			dbg_wl("no WL needed: min used EC %d, max free EC %d",
 			       e1->ec, e2->ec);
-			ubi->wl_scheduled = 0;
-			spin_unlock(&ubi->wl_lock);
-			ubi_free_vid_hdr(ubi, vid_hdr);
-			return 0;
+			goto out_cancel;
 		}
 		paranoid_check_in_wl_tree(e1, &ubi->used);
 		rb_erase(&e1->rb, &ubi->used);
 		dbg_wl("move PEB %d EC %d to PEB %d EC %d",
 		       e1->pnum, e1->ec, e2->pnum, e2->ec);
 	} else {
+		/* Perform scrubbing */
+		scrubbing = 1;
 		e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
 		e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
 		paranoid_check_in_wl_tree(e1, &ubi->scrub);
@@ -799,8 +802,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 
 	paranoid_check_in_wl_tree(e2, &ubi->free);
 	rb_erase(&e2->rb, &ubi->free);
-	ubi_assert(!ubi->move_from && !ubi->move_to);
-	ubi_assert(!ubi->move_to_put && !ubi->move_from_put);
 	ubi->move_from = e1;
 	ubi->move_to = e2;
 	spin_unlock(&ubi->wl_lock);
@@ -810,6 +811,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	 * We so far do not know which logical eraseblock our physical
 	 * eraseblock (@e1) belongs to. We have to read the volume identifier
 	 * header first.
+	 *
+	 * Note, we are protected from this PEB being unmapped and erased. The
+	 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
+	 * which is being moved was unmapped.
 	 */
 
 	err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
@@ -824,32 +829,51 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 			 * likely have the VID header in place.
 			 */
 			dbg_wl("PEB %d has no VID header", e1->pnum);
-			err = 0;
-		} else {
-			ubi_err("error %d while reading VID header from PEB %d",
-				err, e1->pnum);
-			if (err > 0)
-				err = -EIO;
+			goto out_not_moved;
 		}
-		goto error;
+
+		ubi_err("error %d while reading VID header from PEB %d",
+			err, e1->pnum);
+		if (err > 0)
+			err = -EIO;
+		goto out_error;
 	}
 
 	err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
 	if (err) {
-		if (err == UBI_IO_BITFLIPS)
-			err = 0;
-		goto error;
+
+		if (err < 0)
+			goto out_error;
+		if (err == 1)
+			goto out_not_moved;
+
+		/*
+		 * For some reason the LEB was not moved - it might be because
+		 * the volume is being deleted. We should prevent this PEB from
+		 * being selected for wear-levelling movement for some "time",
+		 * so put it to the protection tree.
+		 */
+
+		dbg_wl("cancelled moving PEB %d", e1->pnum);
+		pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
+		if (!pe) {
+			err = -ENOMEM;
+			goto out_error;
+		}
+
+		protect = 1;
 	}
 
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	spin_lock(&ubi->wl_lock);
+	if (protect)
+		prot_tree_add(ubi, e1, pe, protect);
 	if (!ubi->move_to_put)
 		wl_tree_add(e2, &ubi->used);
 	else
 		put = 1;
 	ubi->move_from = ubi->move_to = NULL;
-	ubi->move_from_put = ubi->move_to_put = 0;
-	ubi->wl_scheduled = 0;
+	ubi->move_to_put = ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 
 	if (put) {
@@ -859,62 +883,67 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 		 */
 		dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
 		err = schedule_erase(ubi, e2, 0);
-		if (err) {
-			kmem_cache_free(ubi_wl_entry_slab, e2);
-			ubi_ro_mode(ubi);
-		}
+		if (err)
+			goto out_error;
 	}
 
-	err = schedule_erase(ubi, e1, 0);
-	if (err) {
-		kmem_cache_free(ubi_wl_entry_slab, e1);
-		ubi_ro_mode(ubi);
+	if (!protect) {
+		err = schedule_erase(ubi, e1, 0);
+		if (err)
+			goto out_error;
 	}
 
+
 	dbg_wl("done");
-	return err;
+	mutex_unlock(&ubi->move_mutex);
+	return 0;
 
 	/*
-	 * Some error occurred. @e1 was not changed, so return it back. @e2
-	 * might be changed, schedule it for erasure.
+	 * For some reasons the LEB was not moved, might be an error, might be
+	 * something else. @e1 was not changed, so return it back. @e2 might
+	 * be changed, schedule it for erasure.
 	 */
-error:
-	if (err)
-		dbg_wl("error %d occurred, cancel operation", err);
-	ubi_assert(err <= 0);
-
+out_not_moved:
 	ubi_free_vid_hdr(ubi, vid_hdr);
 	spin_lock(&ubi->wl_lock);
-	ubi->wl_scheduled = 0;
-	if (ubi->move_from_put)
-		put = 1;
+	if (scrubbing)
+		wl_tree_add(e1, &ubi->scrub);
 	else
 		wl_tree_add(e1, &ubi->used);
 	ubi->move_from = ubi->move_to = NULL;
-	ubi->move_from_put = ubi->move_to_put = 0;
+	ubi->move_to_put = ubi->wl_scheduled = 0;
 	spin_unlock(&ubi->wl_lock);
 
-	if (put) {
-		/*
-		 * Well, the target PEB was put meanwhile, schedule it for
-		 * erasure.
-		 */
-		dbg_wl("PEB %d was put meanwhile, erase", e1->pnum);
-		err = schedule_erase(ubi, e1, 0);
-		if (err) {
-			kmem_cache_free(ubi_wl_entry_slab, e1);
-			ubi_ro_mode(ubi);
-		}
-	}
-
 	err = schedule_erase(ubi, e2, 0);
-	if (err) {
-		kmem_cache_free(ubi_wl_entry_slab, e2);
-		ubi_ro_mode(ubi);
-	}
+	if (err)
+		goto out_error;
+
+	mutex_unlock(&ubi->move_mutex);
+	return 0;
+
+out_error:
+	ubi_err("error %d while moving PEB %d to PEB %d",
+		err, e1->pnum, e2->pnum);
 
-	yield();
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	spin_lock(&ubi->wl_lock);
+	ubi->move_from = ubi->move_to = NULL;
+	ubi->move_to_put = ubi->wl_scheduled = 0;
+	spin_unlock(&ubi->wl_lock);
+
+	kmem_cache_free(ubi_wl_entry_slab, e1);
+	kmem_cache_free(ubi_wl_entry_slab, e2);
+	ubi_ro_mode(ubi);
+
+	mutex_unlock(&ubi->move_mutex);
 	return err;
+
+out_cancel:
+	ubi->wl_scheduled = 0;
+	spin_unlock(&ubi->wl_lock);
+	mutex_unlock(&ubi->move_mutex);
+	ubi_free_vid_hdr(ubi, vid_hdr);
+	return 0;
 }
 
 /**
@@ -1101,8 +1130,7 @@ out_ro:
 }
 
 /**
- * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling
- * unit.
+ * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
  * @ubi: UBI device description object
  * @pnum: physical eraseblock to return
  * @torture: if this physical eraseblock has to be tortured
@@ -1110,7 +1138,7 @@ out_ro:
  * This function is called to return physical eraseblock @pnum to the pool of
  * free physical eraseblocks. The @torture flag has to be set if an I/O error
  * occurred to this @pnum and it has to be tested. This function returns zero
- * in case of success and a negative error code in case of failure.
+ * in case of success, and a negative error code in case of failure.
  */
 int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
 {
@@ -1121,8 +1149,8 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
 	ubi_assert(pnum >= 0);
 	ubi_assert(pnum < ubi->peb_count);
 
+retry:
 	spin_lock(&ubi->wl_lock);
-
 	e = ubi->lookuptbl[pnum];
 	if (e == ubi->move_from) {
 		/*
@@ -1130,11 +1158,13 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
 		 * be moved. It will be scheduled for erasure in the
 		 * wear-leveling worker.
 		 */
-		dbg_wl("PEB %d is being moved", pnum);
-		ubi_assert(!ubi->move_from_put);
-		ubi->move_from_put = 1;
+		dbg_wl("PEB %d is being moved, wait", pnum);
 		spin_unlock(&ubi->wl_lock);
-		return 0;
+
+		/* Wait for the WL worker by taking the @ubi->move_mutex */
+		mutex_lock(&ubi->move_mutex);
+		mutex_unlock(&ubi->move_mutex);
+		goto retry;
 	} else if (e == ubi->move_to) {
 		/*
 		 * User is putting the physical eraseblock which was selected
@@ -1157,8 +1187,15 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
 		} else if (in_wl_tree(e, &ubi->scrub)) {
 			paranoid_check_in_wl_tree(e, &ubi->scrub);
 			rb_erase(&e->rb, &ubi->scrub);
-		} else
-			prot_tree_del(ubi, e->pnum);
+		} else {
+			err = prot_tree_del(ubi, e->pnum);
+			if (err) {
+				ubi_err("PEB %d not found", pnum);
+				ubi_ro_mode(ubi);
+				spin_unlock(&ubi->wl_lock);
+				return err;
+			}
+		}
 	}
 	spin_unlock(&ubi->wl_lock);
 
@@ -1212,8 +1249,17 @@ retry:
 	if (in_wl_tree(e, &ubi->used)) {
 		paranoid_check_in_wl_tree(e, &ubi->used);
 		rb_erase(&e->rb, &ubi->used);
-	} else
-		prot_tree_del(ubi, pnum);
+	} else {
+		int err;
+
+		err = prot_tree_del(ubi, e->pnum);
+		if (err) {
+			ubi_err("PEB %d not found", pnum);
+			ubi_ro_mode(ubi);
+			spin_unlock(&ubi->wl_lock);
+			return err;
+		}
+	}
 
 	wl_tree_add(e, &ubi->scrub);
 	spin_unlock(&ubi->wl_lock);
@@ -1379,6 +1425,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	ubi->used = ubi->free = ubi->scrub = RB_ROOT;
 	ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
 	spin_lock_init(&ubi->wl_lock);
+	mutex_init(&ubi->move_mutex);
 	ubi->max_ec = si->max_ec;
 	INIT_LIST_HEAD(&ubi->works);
 
-- 
cgit v1.2.2


From 593dd33c92c6529443d5df1350dc5cc76511232d Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Tue, 18 Dec 2007 15:54:35 +0200
Subject: UBI: fix ubi_wl_flush

The flush function should finish all the pending jobs. But if
somebody else is doing a work, this function should wait and let
it finish.

This patche uses rw semaphore for synchronization purpose - it
just looks quite convinient.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 39 ++++++++++++++++++++++++++++++++-------
 1 file changed, 32 insertions(+), 7 deletions(-)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index a60f9425ab13..8421c7a9a835 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -251,10 +251,18 @@ static int do_work(struct ubi_device *ubi)
 
 	cond_resched();
 
+	/*
+	 * @ubi->work_sem is used to synchronize with the workers. Workers take
+	 * it in read mode, so many of them may be doing works at a time. But
+	 * the queue flush code has to be sure the whole queue of works is
+	 * done, and it takes the mutex in write mode.
+	 */
+	down_read(&ubi->work_sem);
 	spin_lock(&ubi->wl_lock);
 
 	if (list_empty(&ubi->works)) {
 		spin_unlock(&ubi->wl_lock);
+		up_read(&ubi->work_sem);
 		return 0;
 	}
 
@@ -275,6 +283,7 @@ static int do_work(struct ubi_device *ubi)
 	ubi->works_count -= 1;
 	ubi_assert(ubi->works_count >= 0);
 	spin_unlock(&ubi->wl_lock);
+	up_read(&ubi->work_sem);
 	return err;
 }
 
@@ -1173,7 +1182,7 @@ retry:
 		 * the WL unit has not put the PEB to the "used" tree yet, but
 		 * it is about to do this. So we just set a flag which will
 		 * tell the WL worker that the PEB is not needed anymore and
-		 * should be sheduled for erasure.
+		 * should be scheduled for erasure.
 		 */
 		dbg_wl("PEB %d is the target of data moving", pnum);
 		ubi_assert(!ubi->move_to_put);
@@ -1280,17 +1289,32 @@ retry:
  */
 int ubi_wl_flush(struct ubi_device *ubi)
 {
-	int err, pending_count;
-
-	pending_count = ubi->works_count;
-
-	dbg_wl("flush (%d pending works)", pending_count);
+	int err;
 
 	/*
 	 * Erase while the pending works queue is not empty, but not more then
 	 * the number of currently pending works.
 	 */
-	while (pending_count-- > 0) {
+	dbg_wl("flush (%d pending works)", ubi->works_count);
+	while (ubi->works_count) {
+		err = do_work(ubi);
+		if (err)
+			return err;
+	}
+
+	/*
+	 * Make sure all the works which have been done in parallel are
+	 * finished.
+	 */
+	down_write(&ubi->work_sem);
+	up_write(&ubi->work_sem);
+
+	/*
+	 * And in case last was the WL worker and it cancelled the LEB
+	 * movement, flush again.
+	 */
+	while (ubi->works_count) {
+		dbg_wl("flush more (%d pending works)", ubi->works_count);
 		err = do_work(ubi);
 		if (err)
 			return err;
@@ -1426,6 +1450,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 	ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
 	spin_lock_init(&ubi->wl_lock);
 	mutex_init(&ubi->move_mutex);
+	init_rwsem(&ubi->work_sem);
 	ubi->max_ec = si->max_ec;
 	INIT_LIST_HEAD(&ubi->works);
 
-- 
cgit v1.2.2


From 16f557ecbf96dd13d13788a6f62d4d97ae73b1f9 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 19 Dec 2007 16:03:17 +0200
Subject: UBI: fix comment

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 8421c7a9a835..7d32f71d6f1e 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -259,7 +259,6 @@ static int do_work(struct ubi_device *ubi)
 	 */
 	down_read(&ubi->work_sem);
 	spin_lock(&ubi->wl_lock);
-
 	if (list_empty(&ubi->works)) {
 		spin_unlock(&ubi->wl_lock);
 		up_read(&ubi->work_sem);
@@ -268,6 +267,8 @@ static int do_work(struct ubi_device *ubi)
 
 	wrk = list_entry(ubi->works.next, struct ubi_work, list);
 	list_del(&wrk->list);
+	ubi->works_count -= 1;
+	ubi_assert(ubi->works_count >= 0);
 	spin_unlock(&ubi->wl_lock);
 
 	/*
@@ -278,12 +279,8 @@ static int do_work(struct ubi_device *ubi)
 	err = wrk->func(ubi, wrk, 0);
 	if (err)
 		ubi_err("work failed with error code %d", err);
-
-	spin_lock(&ubi->wl_lock);
-	ubi->works_count -= 1;
-	ubi_assert(ubi->works_count >= 0);
-	spin_unlock(&ubi->wl_lock);
 	up_read(&ubi->work_sem);
+
 	return err;
 }
 
-- 
cgit v1.2.2


From e73f4459d969bb266f03dd4cbe21bdba8cb2732c Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 17 Dec 2007 17:37:26 +0200
Subject: UBI: add UBI devices reference counting

This is one more step on the way to "removable" UBI devices. It
adds reference counting for UBI devices. Every time a volume on
this device is opened - the device's refcount is increased. It
is also increased if someone is reading any sysfs file of this
UBI device or of one of its volumes.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 7d32f71d6f1e..bfc64c824165 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1303,6 +1303,7 @@ int ubi_wl_flush(struct ubi_device *ubi)
 	 * Make sure all the works which have been done in parallel are
 	 * finished.
 	 */
+	ubi_assert(ubi->ref_count > 0);
 	down_write(&ubi->work_sem);
 	up_write(&ubi->work_sem);
 
-- 
cgit v1.2.2


From cdfa788acd134a35d3e5b73d1a76fca4033d8aa9 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Mon, 17 Dec 2007 20:33:20 +0200
Subject: UBI: prepare attach and detach functions

Prepare the attach and detach functions to by used outside of
module initialization:

* detach function checks reference count before detaching
* it kills the background thread as well

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index bfc64c824165..1142aabcfc8c 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1356,7 +1356,7 @@ static void tree_destroy(struct rb_root *root)
  * ubi_thread - UBI background thread.
  * @u: the UBI device description object pointer
  */
-static int ubi_thread(void *u)
+int ubi_thread(void *u)
 {
 	int failures = 0;
 	struct ubi_device *ubi = u;
@@ -1454,18 +1454,10 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
 
 	sprintf(ubi->bgt_name, UBI_BGT_NAME_PATTERN, ubi->ubi_num);
 
-	ubi->bgt_thread = kthread_create(ubi_thread, ubi, ubi->bgt_name);
-	if (IS_ERR(ubi->bgt_thread)) {
-		err = PTR_ERR(ubi->bgt_thread);
-		ubi_err("cannot spawn \"%s\", error %d", ubi->bgt_name,
-			err);
-		return err;
-	}
-
 	err = -ENOMEM;
 	ubi->lookuptbl = kzalloc(ubi->peb_count * sizeof(void *), GFP_KERNEL);
 	if (!ubi->lookuptbl)
-		goto out_free;
+		return err;
 
 	list_for_each_entry_safe(seb, tmp, &si->erase, u.list) {
 		cond_resched();
@@ -1598,10 +1590,6 @@ static void protection_trees_destroy(struct ubi_device *ubi)
  */
 void ubi_wl_close(struct ubi_device *ubi)
 {
-	dbg_wl("disable \"%s\"", ubi->bgt_name);
-	if (ubi->bgt_thread)
-		kthread_stop(ubi->bgt_thread);
-
 	dbg_wl("close the UBI wear-leveling unit");
 
 	cancel_pending(ubi);
-- 
cgit v1.2.2


From 4ccf8cffa963c7b5bdc6d455ea9417084ee49aa8 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Wed, 16 Jan 2008 15:44:24 +0200
Subject: UBI: add auto-resize feature

The problem: NAND flashes have different amount of initial bad physical
eraseblocks (marked as bad by the manufacturer). For example, for 256MiB
Samsung OneNAND flash there might be from 0 to 40 bad initial eraseblocks,
which is about 2%. When UBI is used as the base system, one needs to know
the exact amount of good physical eraseblocks, because this number is
needed to create the UBI image which is put to the devices during
production. But this number is not know, which forces us to use the
minimum number of good physical eraseblocks. And UBI additionally
reserves some percentage of physical eraseblocks for bad block handling
(default is 1%), so we have 1-3% of PEBs reserved at the end, depending
on the amount of initial bad PEBs. But it is desired to always have
1% (or more, depending on the configuration).

Solution: this patch adds an "auto-resize" flag to the volume table.
The volume which has the "auto-resize" flag will automatically be re-sized
(enlarged) on the first UBI initialization. UBI clears the flag when
the volume is re-sized. Only one volume may have the "auto-resize" flag.

So, the production UBI image may have one volume with "auto-resize"
flag set, and its size is automatically adjusted on the first boot
of the device.

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 1142aabcfc8c..8bfb7434c993 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -1303,7 +1303,6 @@ int ubi_wl_flush(struct ubi_device *ubi)
 	 * Make sure all the works which have been done in parallel are
 	 * finished.
 	 */
-	ubi_assert(ubi->ref_count > 0);
 	down_write(&ubi->work_sem);
 	up_write(&ubi->work_sem);
 
-- 
cgit v1.2.2


From c18a84186cc05bee19d55823f1a35f4ea91a92d6 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Date: Thu, 24 Jan 2008 11:19:14 +0200
Subject: UBI: fix warnings

Old gcc complains:

CC      drivers/mtd/ubi/wl.o
drivers/mtd/ubi/wl.c: In function 'wear_leveling_worker':
drivers/mtd/ubi/wl.c:746: warning: 'pe' may be used uninitialized in this function
CC      drivers/mtd/ubi/scan.o
drivers/mtd/ubi/scan.c: In function 'ubi_scan':
drivers/mtd/ubi/scan.c:772: warning: 'ec' may be used uninitialized in this function
drivers/mtd/ubi/scan.c:772: note: 'ec' was declared here

Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
---
 drivers/mtd/ubi/wl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/mtd/ubi/wl.c')

diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 8bfb7434c993..a471a491f0ab 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -743,7 +743,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 				int cancel)
 {
 	int err, put = 0, scrubbing = 0, protect = 0;
-	struct ubi_wl_prot_entry *pe;
+	struct ubi_wl_prot_entry *uninitialized_var(pe);
 	struct ubi_wl_entry *e1, *e2;
 	struct ubi_vid_hdr *vid_hdr;
 
-- 
cgit v1.2.2