aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2007-12-18 08:06:55 -0500
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2007-12-26 12:15:16 -0500
commit43f9b25a9cdd7b177f77f026b1461abd1abbd174 (patch)
tree0c58fa96a8b6050fd2166d67552809cebd435549
parentd2c468550915ab2f16149e274a6f0da0b925a748 (diff)
UBI: bugfix: protect from volume removal
When the WL worker is moving an LEB, the volume might go away occasionally. UBI does not handle these situations correctly. This patch introduces a new mutex which serializes wear-levelling worker and the the 'ubi_wl_put_peb()' function. Now, if one puts an LEB, and its PEB is being moved, it will wait on the mutex. And because we unmap all LEBs when removing volumes, this will make the volume remove function to wait while the LEB movement finishes. Below is an example of an oops which should be fixed by this patch: Pid: 9167, comm: io_paral Not tainted (2.6.24-rc5-ubi-2.6.git #2) EIP: 0060:[<f884a379>] EFLAGS: 00010246 CPU: 0 EIP is at prot_tree_del+0x2a/0x63 [ubi] EAX: f39a90e0 EBX: 00000000 ECX: 00000000 EDX: 00000134 ESI: f39a90e0 EDI: f39a90e0 EBP: f2d55ddc ESP: f2d55dd4 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068 Process io_paral (pid: 9167, ti=f2d54000 task=f72a8030 task.ti=f2d54000) Stack: f39a95f8 ef6aae50 f2d55e08 f884a511 f88538e1 f884ecea 00000134 00000000 f39a9604 f39a95f0 efea8280 00000000 f39a90e0 f2d55e40 f8847261 f8850c3c f884eaad 00000001 000000b9 00000134 00000172 000000b9 00000134 00000001 Call Trace: [<c0105227>] show_trace_log_lvl+0x1a/0x30 [<c01052e2>] show_stack_log_lvl+0xa5/0xca [<c01053d6>] show_registers+0xcf/0x21b [<c0105648>] die+0x126/0x224 [<c0119a62>] do_page_fault+0x27f/0x60d [<c037dd62>] error_code+0x72/0x78 [<f884a511>] ubi_wl_put_peb+0xf0/0x191 [ubi] [<f8847261>] ubi_eba_unmap_leb+0xaf/0xcc [ubi] [<f8843c21>] ubi_remove_volume+0x102/0x1e8 [ubi] [<f8846077>] ubi_cdev_ioctl+0x22a/0x383 [ubi] [<c017d768>] do_ioctl+0x68/0x71 [<c017d7c6>] vfs_ioctl+0x55/0x271 [<c017da15>] sys_ioctl+0x33/0x52 [<c0104152>] sysenter_past_esp+0x5f/0xa5 ======================= Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
-rw-r--r--drivers/mtd/ubi/eba.c145
-rw-r--r--drivers/mtd/ubi/ubi.h10
-rw-r--r--drivers/mtd/ubi/wl.c219
3 files changed, 239 insertions, 135 deletions
diff --git a/drivers/mtd/ubi/eba.c b/drivers/mtd/ubi/eba.c
index 84f7dc9fd3ac..c94f475758de 100644
--- a/drivers/mtd/ubi/eba.c
+++ b/drivers/mtd/ubi/eba.c
@@ -260,6 +260,44 @@ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum)
260} 260}
261 261
262/** 262/**
263 * leb_write_lock - lock logical eraseblock for writing.
264 * @ubi: UBI device description object
265 * @vol_id: volume ID
266 * @lnum: logical eraseblock number
267 *
268 * This function locks a logical eraseblock for writing if there is no
269 * contention and does nothing if there is contention. Returns %0 in case of
270 * success, %1 in case of contention, and and a negative error code in case of
271 * failure.
272 */
273static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum)
274{
275 int free;
276 struct ubi_ltree_entry *le;
277
278 le = ltree_add_entry(ubi, vol_id, lnum);
279 if (IS_ERR(le))
280 return PTR_ERR(le);
281 if (down_write_trylock(&le->mutex))
282 return 0;
283
284 /* Contention, cancel */
285 spin_lock(&ubi->ltree_lock);
286 le->users -= 1;
287 ubi_assert(le->users >= 0);
288 if (le->users == 0) {
289 rb_erase(&le->rb, &ubi->ltree);
290 free = 1;
291 } else
292 free = 0;
293 spin_unlock(&ubi->ltree_lock);
294 if (free)
295 kmem_cache_free(ubi_ltree_slab, le);
296
297 return 1;
298}
299
300/**
263 * leb_write_unlock - unlock logical eraseblock. 301 * leb_write_unlock - unlock logical eraseblock.
264 * @ubi: UBI device description object 302 * @ubi: UBI device description object
265 * @vol_id: volume ID 303 * @vol_id: volume ID
@@ -923,14 +961,16 @@ write_error:
923 * 961 *
924 * This function copies logical eraseblock from physical eraseblock @from to 962 * This function copies logical eraseblock from physical eraseblock @from to
925 * physical eraseblock @to. The @vid_hdr buffer may be changed by this 963 * physical eraseblock @to. The @vid_hdr buffer may be changed by this
926 * function. Returns zero in case of success, %UBI_IO_BITFLIPS if the operation 964 * function. Returns:
927 * was canceled because bit-flips were detected at the target PEB, and a 965 * o %0 in case of success;
928 * negative error code in case of failure. 966 * o %1 if the operation was canceled and should be tried later (e.g.,
967 * because a bit-flip was detected at the target PEB);
968 * o %2 if the volume is being deleted and this LEB should not be moved.
929 */ 969 */
930int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, 970int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
931 struct ubi_vid_hdr *vid_hdr) 971 struct ubi_vid_hdr *vid_hdr)
932{ 972{
933 int err, vol_id, lnum, data_size, aldata_size, pnum, idx; 973 int err, vol_id, lnum, data_size, aldata_size, idx;
934 struct ubi_volume *vol; 974 struct ubi_volume *vol;
935 uint32_t crc; 975 uint32_t crc;
936 976
@@ -946,57 +986,67 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
946 data_size = aldata_size = 986 data_size = aldata_size =
947 ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); 987 ubi->leb_size - be32_to_cpu(vid_hdr->data_pad);
948 988
949 /*
950 * We do not want anybody to write to this logical eraseblock while we
951 * are moving it, so we lock it.
952 */
953 err = leb_write_lock(ubi, vol_id, lnum);
954 if (err)
955 return err;
956
957 mutex_lock(&ubi->buf_mutex);
958
959 /*
960 * But the logical eraseblock might have been put by this time.
961 * Cancel if it is true.
962 */
963 idx = vol_id2idx(ubi, vol_id); 989 idx = vol_id2idx(ubi, vol_id);
964 990 spin_lock(&ubi->volumes_lock);
965 /* 991 /*
966 * We may race with volume deletion/re-size, so we have to hold 992 * Note, we may race with volume deletion, which means that the volume
967 * @ubi->volumes_lock. 993 * this logical eraseblock belongs to might be being deleted. Since the
968 * 994 * volume deletion unmaps all the volume's logical eraseblocks, it will
969 * Note, it is not a problem if we race with volume deletion or re-size 995 * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish.
970 * here. If the volume is deleted or re-sized while we are moving an
971 * eraseblock which belongs to this volume, we'll end up with finding
972 * out that this LEB was unmapped at the end (see WL), and drop this
973 * PEB.
974 */ 996 */
975 spin_lock(&ubi->volumes_lock);
976 vol = ubi->volumes[idx]; 997 vol = ubi->volumes[idx];
977 if (!vol) { 998 if (!vol) {
978 dbg_eba("volume %d was removed meanwhile", vol_id); 999 /* No need to do further work, cancel */
1000 dbg_eba("volume %d is being removed, cancel", vol_id);
979 spin_unlock(&ubi->volumes_lock); 1001 spin_unlock(&ubi->volumes_lock);
980 goto out_unlock; 1002 return 2;
981 } 1003 }
1004 spin_unlock(&ubi->volumes_lock);
982 1005
983 pnum = vol->eba_tbl[lnum]; 1006 /*
984 if (pnum != from) { 1007 * We do not want anybody to write to this logical eraseblock while we
985 dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to " 1008 * are moving it, so lock it.
986 "PEB %d, cancel", vol_id, lnum, from, pnum); 1009 *
987 spin_unlock(&ubi->volumes_lock); 1010 * Note, we are using non-waiting locking here, because we cannot sleep
988 goto out_unlock; 1011 * on the LEB, since it may cause deadlocks. Indeed, imagine a task is
1012 * unmapping the LEB which is mapped to the PEB we are going to move
1013 * (@from). This task locks the LEB and goes sleep in the
1014 * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are
1015 * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the
1016 * LEB is already locked, we just do not move it and return %1.
1017 */
1018 err = leb_write_trylock(ubi, vol_id, lnum);
1019 if (err) {
1020 dbg_eba("contention on LEB %d:%d, cancel", vol_id, lnum);
1021 return err;
989 } 1022 }
990 spin_unlock(&ubi->volumes_lock);
991 1023
992 /* OK, now the LEB is locked and we can safely start moving it */ 1024 /*
1025 * The LEB might have been put meanwhile, and the task which put it is
1026 * probably waiting on @ubi->move_mutex. No need to continue the work,
1027 * cancel it.
1028 */
1029 if (vol->eba_tbl[lnum] != from) {
1030 dbg_eba("LEB %d:%d is no longer mapped to PEB %d, mapped to "
1031 "PEB %d, cancel", vol_id, lnum, from,
1032 vol->eba_tbl[lnum]);
1033 err = 1;
1034 goto out_unlock_leb;
1035 }
993 1036
1037 /*
1038 * OK, now the LEB is locked and we can safely start moving iy. Since
1039 * this function utilizes thie @ubi->peb1_buf buffer which is shared
1040 * with some other functions, so lock the buffer by taking the
1041 * @ubi->buf_mutex.
1042 */
1043 mutex_lock(&ubi->buf_mutex);
994 dbg_eba("read %d bytes of data", aldata_size); 1044 dbg_eba("read %d bytes of data", aldata_size);
995 err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size); 1045 err = ubi_io_read_data(ubi, ubi->peb_buf1, from, 0, aldata_size);
996 if (err && err != UBI_IO_BITFLIPS) { 1046 if (err && err != UBI_IO_BITFLIPS) {
997 ubi_warn("error %d while reading data from PEB %d", 1047 ubi_warn("error %d while reading data from PEB %d",
998 err, from); 1048 err, from);
999 goto out_unlock; 1049 goto out_unlock_buf;
1000 } 1050 }
1001 1051
1002 /* 1052 /*
@@ -1032,7 +1082,7 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
1032 1082
1033 err = ubi_io_write_vid_hdr(ubi, to, vid_hdr); 1083 err = ubi_io_write_vid_hdr(ubi, to, vid_hdr);
1034 if (err) 1084 if (err)
1035 goto out_unlock; 1085 goto out_unlock_buf;
1036 1086
1037 cond_resched(); 1087 cond_resched();
1038 1088
@@ -1041,13 +1091,15 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
1041 if (err) { 1091 if (err) {
1042 if (err != UBI_IO_BITFLIPS) 1092 if (err != UBI_IO_BITFLIPS)
1043 ubi_warn("cannot read VID header back from PEB %d", to); 1093 ubi_warn("cannot read VID header back from PEB %d", to);
1044 goto out_unlock; 1094 else
1095 err = 1;
1096 goto out_unlock_buf;
1045 } 1097 }
1046 1098
1047 if (data_size > 0) { 1099 if (data_size > 0) {
1048 err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size); 1100 err = ubi_io_write_data(ubi, ubi->peb_buf1, to, 0, aldata_size);
1049 if (err) 1101 if (err)
1050 goto out_unlock; 1102 goto out_unlock_buf;
1051 1103
1052 cond_resched(); 1104 cond_resched();
1053 1105
@@ -1061,7 +1113,9 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
1061 if (err != UBI_IO_BITFLIPS) 1113 if (err != UBI_IO_BITFLIPS)
1062 ubi_warn("cannot read data back from PEB %d", 1114 ubi_warn("cannot read data back from PEB %d",
1063 to); 1115 to);
1064 goto out_unlock; 1116 else
1117 err = 1;
1118 goto out_unlock_buf;
1065 } 1119 }
1066 1120
1067 cond_resched(); 1121 cond_resched();
@@ -1069,15 +1123,16 @@ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to,
1069 if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) { 1123 if (memcmp(ubi->peb_buf1, ubi->peb_buf2, aldata_size)) {
1070 ubi_warn("read data back from PEB %d - it is different", 1124 ubi_warn("read data back from PEB %d - it is different",
1071 to); 1125 to);
1072 goto out_unlock; 1126 goto out_unlock_buf;
1073 } 1127 }
1074 } 1128 }
1075 1129
1076 ubi_assert(vol->eba_tbl[lnum] == from); 1130 ubi_assert(vol->eba_tbl[lnum] == from);
1077 vol->eba_tbl[lnum] = to; 1131 vol->eba_tbl[lnum] = to;
1078 1132
1079out_unlock: 1133out_unlock_buf:
1080 mutex_unlock(&ubi->buf_mutex); 1134 mutex_unlock(&ubi->buf_mutex);
1135out_unlock_leb:
1081 leb_write_unlock(ubi, vol_id, lnum); 1136 leb_write_unlock(ubi, vol_id, lnum);
1082 return err; 1137 return err;
1083} 1138}
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index f782d5aa849a..ea9a6990a4dc 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -275,13 +275,13 @@ struct ubi_wl_entry;
275 * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from, 275 * @wl_lock: protects the @used, @free, @prot, @lookuptbl, @abs_ec, @move_from,
276 * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works 276 * @move_to, @move_to_put @erase_pending, @wl_scheduled, and @works
277 * fields 277 * fields
278 * @move_mutex: serializes eraseblock moves
278 * @wl_scheduled: non-zero if the wear-leveling was scheduled 279 * @wl_scheduled: non-zero if the wear-leveling was scheduled
279 * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any 280 * @lookuptbl: a table to quickly find a &struct ubi_wl_entry object for any
280 * physical eraseblock 281 * physical eraseblock
281 * @abs_ec: absolute erase counter 282 * @abs_ec: absolute erase counter
282 * @move_from: physical eraseblock from where the data is being moved 283 * @move_from: physical eraseblock from where the data is being moved
283 * @move_to: physical eraseblock where the data is being moved to 284 * @move_to: physical eraseblock where the data is being moved to
284 * @move_from_put: if the "from" PEB was put
285 * @move_to_put: if the "to" PEB was put 285 * @move_to_put: if the "to" PEB was put
286 * @works: list of pending works 286 * @works: list of pending works
287 * @works_count: count of pending works 287 * @works_count: count of pending works
@@ -354,12 +354,12 @@ struct ubi_device {
354 struct rb_root aec; 354 struct rb_root aec;
355 } prot; 355 } prot;
356 spinlock_t wl_lock; 356 spinlock_t wl_lock;
357 struct mutex move_mutex;
357 int wl_scheduled; 358 int wl_scheduled;
358 struct ubi_wl_entry **lookuptbl; 359 struct ubi_wl_entry **lookuptbl;
359 unsigned long long abs_ec; 360 unsigned long long abs_ec;
360 struct ubi_wl_entry *move_from; 361 struct ubi_wl_entry *move_from;
361 struct ubi_wl_entry *move_to; 362 struct ubi_wl_entry *move_to;
362 int move_from_put;
363 int move_to_put; 363 int move_to_put;
364 struct list_head works; 364 struct list_head works;
365 int works_count; 365 int works_count;
@@ -561,8 +561,10 @@ static inline int ubi_io_write_data(struct ubi_device *ubi, const void *buf,
561 */ 561 */
562static inline void ubi_ro_mode(struct ubi_device *ubi) 562static inline void ubi_ro_mode(struct ubi_device *ubi)
563{ 563{
564 ubi->ro_mode = 1; 564 if (!ubi->ro_mode) {
565 ubi_warn("switch to read-only mode"); 565 ubi->ro_mode = 1;
566 ubi_warn("switch to read-only mode");
567 }
566} 568}
567 569
568/** 570/**
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 36aa097203f9..a60f9425ab13 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -249,6 +249,8 @@ static int do_work(struct ubi_device *ubi)
249 int err; 249 int err;
250 struct ubi_work *wrk; 250 struct ubi_work *wrk;
251 251
252 cond_resched();
253
252 spin_lock(&ubi->wl_lock); 254 spin_lock(&ubi->wl_lock);
253 255
254 if (list_empty(&ubi->works)) { 256 if (list_empty(&ubi->works)) {
@@ -531,8 +533,12 @@ retry:
531 * prot_tree_del - remove a physical eraseblock from the protection trees 533 * prot_tree_del - remove a physical eraseblock from the protection trees
532 * @ubi: UBI device description object 534 * @ubi: UBI device description object
533 * @pnum: the physical eraseblock to remove 535 * @pnum: the physical eraseblock to remove
536 *
537 * This function returns PEB @pnum from the protection trees and returns zero
538 * in case of success and %-ENODEV if the PEB was not found in the protection
539 * trees.
534 */ 540 */
535static void prot_tree_del(struct ubi_device *ubi, int pnum) 541static int prot_tree_del(struct ubi_device *ubi, int pnum)
536{ 542{
537 struct rb_node *p; 543 struct rb_node *p;
538 struct ubi_wl_prot_entry *pe = NULL; 544 struct ubi_wl_prot_entry *pe = NULL;
@@ -543,7 +549,7 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum)
543 pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum); 549 pe = rb_entry(p, struct ubi_wl_prot_entry, rb_pnum);
544 550
545 if (pnum == pe->e->pnum) 551 if (pnum == pe->e->pnum)
546 break; 552 goto found;
547 553
548 if (pnum < pe->e->pnum) 554 if (pnum < pe->e->pnum)
549 p = p->rb_left; 555 p = p->rb_left;
@@ -551,10 +557,14 @@ static void prot_tree_del(struct ubi_device *ubi, int pnum)
551 p = p->rb_right; 557 p = p->rb_right;
552 } 558 }
553 559
560 return -ENODEV;
561
562found:
554 ubi_assert(pe->e->pnum == pnum); 563 ubi_assert(pe->e->pnum == pnum);
555 rb_erase(&pe->rb_aec, &ubi->prot.aec); 564 rb_erase(&pe->rb_aec, &ubi->prot.aec);
556 rb_erase(&pe->rb_pnum, &ubi->prot.pnum); 565 rb_erase(&pe->rb_pnum, &ubi->prot.pnum);
557 kfree(pe); 566 kfree(pe);
567 return 0;
558} 568}
559 569
560/** 570/**
@@ -726,7 +736,8 @@ static int schedule_erase(struct ubi_device *ubi, struct ubi_wl_entry *e,
726static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk, 736static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
727 int cancel) 737 int cancel)
728{ 738{
729 int err, put = 0; 739 int err, put = 0, scrubbing = 0, protect = 0;
740 struct ubi_wl_prot_entry *pe;
730 struct ubi_wl_entry *e1, *e2; 741 struct ubi_wl_entry *e1, *e2;
731 struct ubi_vid_hdr *vid_hdr; 742 struct ubi_vid_hdr *vid_hdr;
732 743
@@ -739,21 +750,17 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
739 if (!vid_hdr) 750 if (!vid_hdr)
740 return -ENOMEM; 751 return -ENOMEM;
741 752
753 mutex_lock(&ubi->move_mutex);
742 spin_lock(&ubi->wl_lock); 754 spin_lock(&ubi->wl_lock);
755 ubi_assert(!ubi->move_from && !ubi->move_to);
756 ubi_assert(!ubi->move_to_put);
743 757
744 /* 758 if (!ubi->free.rb_node ||
745 * Only one WL worker at a time is supported at this implementation, so
746 * make sure a PEB is not being moved already.
747 */
748 if (ubi->move_to || !ubi->free.rb_node ||
749 (!ubi->used.rb_node && !ubi->scrub.rb_node)) { 759 (!ubi->used.rb_node && !ubi->scrub.rb_node)) {
750 /* 760 /*
751 * Only one WL worker at a time is supported at this 761 * No free physical eraseblocks? Well, they must be waiting in
752 * implementation, so if a LEB is already being moved, cancel. 762 * the queue to be erased. Cancel movement - it will be
753 * 763 * triggered again when a free physical eraseblock appears.
754 * No free physical eraseblocks? Well, we cancel wear-leveling
755 * then. It will be triggered again when a free physical
756 * eraseblock appears.
757 * 764 *
758 * No used physical eraseblocks? They must be temporarily 765 * No used physical eraseblocks? They must be temporarily
759 * protected from being moved. They will be moved to the 766 * protected from being moved. They will be moved to the
@@ -762,10 +769,7 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
762 */ 769 */
763 dbg_wl("cancel WL, a list is empty: free %d, used %d", 770 dbg_wl("cancel WL, a list is empty: free %d, used %d",
764 !ubi->free.rb_node, !ubi->used.rb_node); 771 !ubi->free.rb_node, !ubi->used.rb_node);
765 ubi->wl_scheduled = 0; 772 goto out_cancel;
766 spin_unlock(&ubi->wl_lock);
767 ubi_free_vid_hdr(ubi, vid_hdr);
768 return 0;
769 } 773 }
770 774
771 if (!ubi->scrub.rb_node) { 775 if (!ubi->scrub.rb_node) {
@@ -780,16 +784,15 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
780 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) { 784 if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
781 dbg_wl("no WL needed: min used EC %d, max free EC %d", 785 dbg_wl("no WL needed: min used EC %d, max free EC %d",
782 e1->ec, e2->ec); 786 e1->ec, e2->ec);
783 ubi->wl_scheduled = 0; 787 goto out_cancel;
784 spin_unlock(&ubi->wl_lock);
785 ubi_free_vid_hdr(ubi, vid_hdr);
786 return 0;
787 } 788 }
788 paranoid_check_in_wl_tree(e1, &ubi->used); 789 paranoid_check_in_wl_tree(e1, &ubi->used);
789 rb_erase(&e1->rb, &ubi->used); 790 rb_erase(&e1->rb, &ubi->used);
790 dbg_wl("move PEB %d EC %d to PEB %d EC %d", 791 dbg_wl("move PEB %d EC %d to PEB %d EC %d",
791 e1->pnum, e1->ec, e2->pnum, e2->ec); 792 e1->pnum, e1->ec, e2->pnum, e2->ec);
792 } else { 793 } else {
794 /* Perform scrubbing */
795 scrubbing = 1;
793 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb); 796 e1 = rb_entry(rb_first(&ubi->scrub), struct ubi_wl_entry, rb);
794 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF); 797 e2 = find_wl_entry(&ubi->free, WL_FREE_MAX_DIFF);
795 paranoid_check_in_wl_tree(e1, &ubi->scrub); 798 paranoid_check_in_wl_tree(e1, &ubi->scrub);
@@ -799,8 +802,6 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
799 802
800 paranoid_check_in_wl_tree(e2, &ubi->free); 803 paranoid_check_in_wl_tree(e2, &ubi->free);
801 rb_erase(&e2->rb, &ubi->free); 804 rb_erase(&e2->rb, &ubi->free);
802 ubi_assert(!ubi->move_from && !ubi->move_to);
803 ubi_assert(!ubi->move_to_put && !ubi->move_from_put);
804 ubi->move_from = e1; 805 ubi->move_from = e1;
805 ubi->move_to = e2; 806 ubi->move_to = e2;
806 spin_unlock(&ubi->wl_lock); 807 spin_unlock(&ubi->wl_lock);
@@ -810,6 +811,10 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
810 * We so far do not know which logical eraseblock our physical 811 * We so far do not know which logical eraseblock our physical
811 * eraseblock (@e1) belongs to. We have to read the volume identifier 812 * eraseblock (@e1) belongs to. We have to read the volume identifier
812 * header first. 813 * header first.
814 *
815 * Note, we are protected from this PEB being unmapped and erased. The
816 * 'ubi_wl_put_peb()' would wait for moving to be finished if the PEB
817 * which is being moved was unmapped.
813 */ 818 */
814 819
815 err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0); 820 err = ubi_io_read_vid_hdr(ubi, e1->pnum, vid_hdr, 0);
@@ -824,32 +829,51 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
824 * likely have the VID header in place. 829 * likely have the VID header in place.
825 */ 830 */
826 dbg_wl("PEB %d has no VID header", e1->pnum); 831 dbg_wl("PEB %d has no VID header", e1->pnum);
827 err = 0; 832 goto out_not_moved;
828 } else {
829 ubi_err("error %d while reading VID header from PEB %d",
830 err, e1->pnum);
831 if (err > 0)
832 err = -EIO;
833 } 833 }
834 goto error; 834
835 ubi_err("error %d while reading VID header from PEB %d",
836 err, e1->pnum);
837 if (err > 0)
838 err = -EIO;
839 goto out_error;
835 } 840 }
836 841
837 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr); 842 err = ubi_eba_copy_leb(ubi, e1->pnum, e2->pnum, vid_hdr);
838 if (err) { 843 if (err) {
839 if (err == UBI_IO_BITFLIPS) 844
840 err = 0; 845 if (err < 0)
841 goto error; 846 goto out_error;
847 if (err == 1)
848 goto out_not_moved;
849
850 /*
851 * For some reason the LEB was not moved - it might be because
852 * the volume is being deleted. We should prevent this PEB from
853 * being selected for wear-levelling movement for some "time",
854 * so put it to the protection tree.
855 */
856
857 dbg_wl("cancelled moving PEB %d", e1->pnum);
858 pe = kmalloc(sizeof(struct ubi_wl_prot_entry), GFP_NOFS);
859 if (!pe) {
860 err = -ENOMEM;
861 goto out_error;
862 }
863
864 protect = 1;
842 } 865 }
843 866
844 ubi_free_vid_hdr(ubi, vid_hdr); 867 ubi_free_vid_hdr(ubi, vid_hdr);
845 spin_lock(&ubi->wl_lock); 868 spin_lock(&ubi->wl_lock);
869 if (protect)
870 prot_tree_add(ubi, e1, pe, protect);
846 if (!ubi->move_to_put) 871 if (!ubi->move_to_put)
847 wl_tree_add(e2, &ubi->used); 872 wl_tree_add(e2, &ubi->used);
848 else 873 else
849 put = 1; 874 put = 1;
850 ubi->move_from = ubi->move_to = NULL; 875 ubi->move_from = ubi->move_to = NULL;
851 ubi->move_from_put = ubi->move_to_put = 0; 876 ubi->move_to_put = ubi->wl_scheduled = 0;
852 ubi->wl_scheduled = 0;
853 spin_unlock(&ubi->wl_lock); 877 spin_unlock(&ubi->wl_lock);
854 878
855 if (put) { 879 if (put) {
@@ -859,62 +883,67 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
859 */ 883 */
860 dbg_wl("PEB %d was put meanwhile, erase", e2->pnum); 884 dbg_wl("PEB %d was put meanwhile, erase", e2->pnum);
861 err = schedule_erase(ubi, e2, 0); 885 err = schedule_erase(ubi, e2, 0);
862 if (err) { 886 if (err)
863 kmem_cache_free(ubi_wl_entry_slab, e2); 887 goto out_error;
864 ubi_ro_mode(ubi);
865 }
866 } 888 }
867 889
868 err = schedule_erase(ubi, e1, 0); 890 if (!protect) {
869 if (err) { 891 err = schedule_erase(ubi, e1, 0);
870 kmem_cache_free(ubi_wl_entry_slab, e1); 892 if (err)
871 ubi_ro_mode(ubi); 893 goto out_error;
872 } 894 }
873 895
896
874 dbg_wl("done"); 897 dbg_wl("done");
875 return err; 898 mutex_unlock(&ubi->move_mutex);
899 return 0;
876 900
877 /* 901 /*
878 * Some error occurred. @e1 was not changed, so return it back. @e2 902 * For some reasons the LEB was not moved, might be an error, might be
879 * might be changed, schedule it for erasure. 903 * something else. @e1 was not changed, so return it back. @e2 might
904 * be changed, schedule it for erasure.
880 */ 905 */
881error: 906out_not_moved:
882 if (err)
883 dbg_wl("error %d occurred, cancel operation", err);
884 ubi_assert(err <= 0);
885
886 ubi_free_vid_hdr(ubi, vid_hdr); 907 ubi_free_vid_hdr(ubi, vid_hdr);
887 spin_lock(&ubi->wl_lock); 908 spin_lock(&ubi->wl_lock);
888 ubi->wl_scheduled = 0; 909 if (scrubbing)
889 if (ubi->move_from_put) 910 wl_tree_add(e1, &ubi->scrub);
890 put = 1;
891 else 911 else
892 wl_tree_add(e1, &ubi->used); 912 wl_tree_add(e1, &ubi->used);
893 ubi->move_from = ubi->move_to = NULL; 913 ubi->move_from = ubi->move_to = NULL;
894 ubi->move_from_put = ubi->move_to_put = 0; 914 ubi->move_to_put = ubi->wl_scheduled = 0;
895 spin_unlock(&ubi->wl_lock); 915 spin_unlock(&ubi->wl_lock);
896 916
897 if (put) {
898 /*
899 * Well, the target PEB was put meanwhile, schedule it for
900 * erasure.
901 */
902 dbg_wl("PEB %d was put meanwhile, erase", e1->pnum);
903 err = schedule_erase(ubi, e1, 0);
904 if (err) {
905 kmem_cache_free(ubi_wl_entry_slab, e1);
906 ubi_ro_mode(ubi);
907 }
908 }
909
910 err = schedule_erase(ubi, e2, 0); 917 err = schedule_erase(ubi, e2, 0);
911 if (err) { 918 if (err)
912 kmem_cache_free(ubi_wl_entry_slab, e2); 919 goto out_error;
913 ubi_ro_mode(ubi); 920
914 } 921 mutex_unlock(&ubi->move_mutex);
922 return 0;
923
924out_error:
925 ubi_err("error %d while moving PEB %d to PEB %d",
926 err, e1->pnum, e2->pnum);
915 927
916 yield(); 928 ubi_free_vid_hdr(ubi, vid_hdr);
929 spin_lock(&ubi->wl_lock);
930 ubi->move_from = ubi->move_to = NULL;
931 ubi->move_to_put = ubi->wl_scheduled = 0;
932 spin_unlock(&ubi->wl_lock);
933
934 kmem_cache_free(ubi_wl_entry_slab, e1);
935 kmem_cache_free(ubi_wl_entry_slab, e2);
936 ubi_ro_mode(ubi);
937
938 mutex_unlock(&ubi->move_mutex);
917 return err; 939 return err;
940
941out_cancel:
942 ubi->wl_scheduled = 0;
943 spin_unlock(&ubi->wl_lock);
944 mutex_unlock(&ubi->move_mutex);
945 ubi_free_vid_hdr(ubi, vid_hdr);
946 return 0;
918} 947}
919 948
920/** 949/**
@@ -1101,8 +1130,7 @@ out_ro:
1101} 1130}
1102 1131
1103/** 1132/**
1104 * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling 1133 * ubi_wl_put_peb - return a physical eraseblock to the wear-leveling unit.
1105 * unit.
1106 * @ubi: UBI device description object 1134 * @ubi: UBI device description object
1107 * @pnum: physical eraseblock to return 1135 * @pnum: physical eraseblock to return
1108 * @torture: if this physical eraseblock has to be tortured 1136 * @torture: if this physical eraseblock has to be tortured
@@ -1110,7 +1138,7 @@ out_ro:
1110 * This function is called to return physical eraseblock @pnum to the pool of 1138 * This function is called to return physical eraseblock @pnum to the pool of
1111 * free physical eraseblocks. The @torture flag has to be set if an I/O error 1139 * free physical eraseblocks. The @torture flag has to be set if an I/O error
1112 * occurred to this @pnum and it has to be tested. This function returns zero 1140 * occurred to this @pnum and it has to be tested. This function returns zero
1113 * in case of success and a negative error code in case of failure. 1141 * in case of success, and a negative error code in case of failure.
1114 */ 1142 */
1115int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture) 1143int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
1116{ 1144{
@@ -1121,8 +1149,8 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
1121 ubi_assert(pnum >= 0); 1149 ubi_assert(pnum >= 0);
1122 ubi_assert(pnum < ubi->peb_count); 1150 ubi_assert(pnum < ubi->peb_count);
1123 1151
1152retry:
1124 spin_lock(&ubi->wl_lock); 1153 spin_lock(&ubi->wl_lock);
1125
1126 e = ubi->lookuptbl[pnum]; 1154 e = ubi->lookuptbl[pnum];
1127 if (e == ubi->move_from) { 1155 if (e == ubi->move_from) {
1128 /* 1156 /*
@@ -1130,11 +1158,13 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
1130 * be moved. It will be scheduled for erasure in the 1158 * be moved. It will be scheduled for erasure in the
1131 * wear-leveling worker. 1159 * wear-leveling worker.
1132 */ 1160 */
1133 dbg_wl("PEB %d is being moved", pnum); 1161 dbg_wl("PEB %d is being moved, wait", pnum);
1134 ubi_assert(!ubi->move_from_put);
1135 ubi->move_from_put = 1;
1136 spin_unlock(&ubi->wl_lock); 1162 spin_unlock(&ubi->wl_lock);
1137 return 0; 1163
1164 /* Wait for the WL worker by taking the @ubi->move_mutex */
1165 mutex_lock(&ubi->move_mutex);
1166 mutex_unlock(&ubi->move_mutex);
1167 goto retry;
1138 } else if (e == ubi->move_to) { 1168 } else if (e == ubi->move_to) {
1139 /* 1169 /*
1140 * User is putting the physical eraseblock which was selected 1170 * User is putting the physical eraseblock which was selected
@@ -1157,8 +1187,15 @@ int ubi_wl_put_peb(struct ubi_device *ubi, int pnum, int torture)
1157 } else if (in_wl_tree(e, &ubi->scrub)) { 1187 } else if (in_wl_tree(e, &ubi->scrub)) {
1158 paranoid_check_in_wl_tree(e, &ubi->scrub); 1188 paranoid_check_in_wl_tree(e, &ubi->scrub);
1159 rb_erase(&e->rb, &ubi->scrub); 1189 rb_erase(&e->rb, &ubi->scrub);
1160 } else 1190 } else {
1161 prot_tree_del(ubi, e->pnum); 1191 err = prot_tree_del(ubi, e->pnum);
1192 if (err) {
1193 ubi_err("PEB %d not found", pnum);
1194 ubi_ro_mode(ubi);
1195 spin_unlock(&ubi->wl_lock);
1196 return err;
1197 }
1198 }
1162 } 1199 }
1163 spin_unlock(&ubi->wl_lock); 1200 spin_unlock(&ubi->wl_lock);
1164 1201
@@ -1212,8 +1249,17 @@ retry:
1212 if (in_wl_tree(e, &ubi->used)) { 1249 if (in_wl_tree(e, &ubi->used)) {
1213 paranoid_check_in_wl_tree(e, &ubi->used); 1250 paranoid_check_in_wl_tree(e, &ubi->used);
1214 rb_erase(&e->rb, &ubi->used); 1251 rb_erase(&e->rb, &ubi->used);
1215 } else 1252 } else {
1216 prot_tree_del(ubi, pnum); 1253 int err;
1254
1255 err = prot_tree_del(ubi, e->pnum);
1256 if (err) {
1257 ubi_err("PEB %d not found", pnum);
1258 ubi_ro_mode(ubi);
1259 spin_unlock(&ubi->wl_lock);
1260 return err;
1261 }
1262 }
1217 1263
1218 wl_tree_add(e, &ubi->scrub); 1264 wl_tree_add(e, &ubi->scrub);
1219 spin_unlock(&ubi->wl_lock); 1265 spin_unlock(&ubi->wl_lock);
@@ -1379,6 +1425,7 @@ int ubi_wl_init_scan(struct ubi_device *ubi, struct ubi_scan_info *si)
1379 ubi->used = ubi->free = ubi->scrub = RB_ROOT; 1425 ubi->used = ubi->free = ubi->scrub = RB_ROOT;
1380 ubi->prot.pnum = ubi->prot.aec = RB_ROOT; 1426 ubi->prot.pnum = ubi->prot.aec = RB_ROOT;
1381 spin_lock_init(&ubi->wl_lock); 1427 spin_lock_init(&ubi->wl_lock);
1428 mutex_init(&ubi->move_mutex);
1382 ubi->max_ec = si->max_ec; 1429 ubi->max_ec = si->max_ec;
1383 INIT_LIST_HEAD(&ubi->works); 1430 INIT_LIST_HEAD(&ubi->works);
1384 1431