aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ubifs
diff options
context:
space:
mode:
authorGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
committerGlenn Elliott <gelliott@cs.unc.edu>2012-03-04 19:47:13 -0500
commitc71c03bda1e86c9d5198c5d83f712e695c4f2a1e (patch)
treeecb166cb3e2b7e2adb3b5e292245fefd23381ac8 /fs/ubifs
parentea53c912f8a86a8567697115b6a0d8152beee5c8 (diff)
parent6a00f206debf8a5c8899055726ad127dbeeed098 (diff)
Merge branch 'mpi-master' into wip-k-fmlpwip-k-fmlp
Conflicts: litmus/sched_cedf.c
Diffstat (limited to 'fs/ubifs')
-rw-r--r--fs/ubifs/Kconfig36
-rw-r--r--fs/ubifs/budget.c106
-rw-r--r--fs/ubifs/commit.c66
-rw-r--r--fs/ubifs/debug.c413
-rw-r--r--fs/ubifs/debug.h352
-rw-r--r--fs/ubifs/dir.c24
-rw-r--r--fs/ubifs/file.c47
-rw-r--r--fs/ubifs/find.c10
-rw-r--r--fs/ubifs/gc.c153
-rw-r--r--fs/ubifs/io.c248
-rw-r--r--fs/ubifs/ioctl.c2
-rw-r--r--fs/ubifs/journal.c61
-rw-r--r--fs/ubifs/key.h14
-rw-r--r--fs/ubifs/log.c54
-rw-r--r--fs/ubifs/lprops.c115
-rw-r--r--fs/ubifs/lpt.c14
-rw-r--r--fs/ubifs/lpt_commit.c114
-rw-r--r--fs/ubifs/master.c11
-rw-r--r--fs/ubifs/misc.h26
-rw-r--r--fs/ubifs/orphan.c13
-rw-r--r--fs/ubifs/recovery.c475
-rw-r--r--fs/ubifs/replay.c485
-rw-r--r--fs/ubifs/sb.c162
-rw-r--r--fs/ubifs/scan.c8
-rw-r--r--fs/ubifs/shrinker.c11
-rw-r--r--fs/ubifs/super.c393
-rw-r--r--fs/ubifs/tnc.c34
-rw-r--r--fs/ubifs/tnc_commit.c18
-rw-r--r--fs/ubifs/ubifs-media.h30
-rw-r--r--fs/ubifs/ubifs.h154
-rw-r--r--fs/ubifs/xattr.c12
31 files changed, 2257 insertions, 1404 deletions
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 830e3f76f442..f8b0160da2da 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -44,29 +44,17 @@ config UBIFS_FS_ZLIB
44 44
45# Debugging-related stuff 45# Debugging-related stuff
46config UBIFS_FS_DEBUG 46config UBIFS_FS_DEBUG
47 bool "Enable debugging" 47 bool "Enable debugging support"
48 depends on UBIFS_FS 48 depends on UBIFS_FS
49 select DEBUG_FS 49 select DEBUG_FS
50 select KALLSYMS_ALL 50 select KALLSYMS
51 help 51 help
52 This option enables UBIFS debugging. 52 This option enables UBIFS debugging support. It makes sure various
53 53 assertions, self-checks, debugging messages and test modes are compiled
54config UBIFS_FS_DEBUG_MSG_LVL 54 in (this all is compiled out otherwise). Assertions are light-weight
55 int "Default message level (0 = no extra messages, 3 = lots)" 55 and this option also enables them. Self-checks, debugging messages and
56 depends on UBIFS_FS_DEBUG 56 test modes are switched off by default. Thus, it is safe and actually
57 default "0" 57 recommended to have debugging support enabled, and it should not slow
58 help 58 down UBIFS. You can then further enable / disable individual debugging
59 This controls the amount of debugging messages produced by UBIFS. 59 features using UBIFS module parameters and the corresponding sysfs
60 If reporting bugs, please try to have available a full dump of the 60 interfaces.
61 messages at level 1 while the misbehaviour was occurring. Level 2
62 may become necessary if level 1 messages were not enough to find the
63 bug. Generally Level 3 should be avoided.
64
65config UBIFS_FS_DEBUG_CHKS
66 bool "Enable extra checks"
67 depends on UBIFS_FS_DEBUG
68 help
69 If extra checks are enabled UBIFS will check the consistency of its
70 internal data structures during operation. However, UBIFS performance
71 is dramatically slower when this option is selected especially if the
72 file system is large.
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index c8ff0d1ae5d3..315de66e52b2 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c)
106 long long liab; 106 long long liab;
107 107
108 spin_lock(&c->space_lock); 108 spin_lock(&c->space_lock);
109 liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; 109 liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth;
110 spin_unlock(&c->space_lock); 110 spin_unlock(&c->space_lock);
111 return liab; 111 return liab;
112} 112}
@@ -147,7 +147,7 @@ static int make_free_space(struct ubifs_info *c)
147 if (liab2 < liab1) 147 if (liab2 < liab1)
148 return -EAGAIN; 148 return -EAGAIN;
149 149
150 dbg_budg("new liability %lld (not shrinked)", liab2); 150 dbg_budg("new liability %lld (not shrunk)", liab2);
151 151
152 /* Liability did not shrink again, try GC */ 152 /* Liability did not shrink again, try GC */
153 dbg_budg("Run GC"); 153 dbg_budg("Run GC");
@@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
180 int idx_lebs; 180 int idx_lebs;
181 long long idx_size; 181 long long idx_size;
182 182
183 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; 183 idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx;
184 /* And make sure we have thrice the index size of space reserved */ 184 /* And make sure we have thrice the index size of space reserved */
185 idx_size += idx_size << 1; 185 idx_size += idx_size << 1;
186 /* 186 /*
@@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c)
292 * budgeted index space to the size of the current index, multiplies this by 3, 292 * budgeted index space to the size of the current index, multiplies this by 3,
293 * and makes sure this does not exceed the amount of free LEBs. 293 * and makes sure this does not exceed the amount of free LEBs.
294 * 294 *
295 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: 295 * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables:
296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might 296 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
297 * be large, because UBIFS does not do any index consolidation as long as 297 * be large, because UBIFS does not do any index consolidation as long as
298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs 298 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
299 * will contain a lot of dirt. 299 * will contain a lot of dirt.
300 * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, 300 * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW,
301 * the index may be consolidated to take up to @c->min_idx_lebs LEBs. 301 * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs.
302 * 302 *
303 * This function returns zero in case of success, and %-ENOSPC in case of 303 * This function returns zero in case of success, and %-ENOSPC in case of
304 * failure. 304 * failure.
@@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c)
343 c->lst.taken_empty_lebs; 343 c->lst.taken_empty_lebs;
344 if (unlikely(rsvd_idx_lebs > lebs)) { 344 if (unlikely(rsvd_idx_lebs > lebs)) {
345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " 345 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
346 "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, 346 "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs,
347 rsvd_idx_lebs); 347 rsvd_idx_lebs);
348 return -ENOSPC; 348 return -ENOSPC;
349 } 349 }
350 350
351 available = ubifs_calc_available(c, min_idx_lebs); 351 available = ubifs_calc_available(c, min_idx_lebs);
352 outstanding = c->budg_data_growth + c->budg_dd_growth; 352 outstanding = c->bi.data_growth + c->bi.dd_growth;
353 353
354 if (unlikely(available < outstanding)) { 354 if (unlikely(available < outstanding)) {
355 dbg_budg("out of data space: available %lld, outstanding %lld", 355 dbg_budg("out of data space: available %lld, outstanding %lld",
@@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c)
360 if (available - outstanding <= c->rp_size && !can_use_rp(c)) 360 if (available - outstanding <= c->rp_size && !can_use_rp(c))
361 return -ENOSPC; 361 return -ENOSPC;
362 362
363 c->min_idx_lebs = min_idx_lebs; 363 c->bi.min_idx_lebs = min_idx_lebs;
364 return 0; 364 return 0;
365} 365}
366 366
@@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c,
393{ 393{
394 int data_growth; 394 int data_growth;
395 395
396 data_growth = req->new_ino ? c->inode_budget : 0; 396 data_growth = req->new_ino ? c->bi.inode_budget : 0;
397 if (req->new_page) 397 if (req->new_page)
398 data_growth += c->page_budget; 398 data_growth += c->bi.page_budget;
399 if (req->new_dent) 399 if (req->new_dent)
400 data_growth += c->dent_budget; 400 data_growth += c->bi.dent_budget;
401 data_growth += req->new_ino_d; 401 data_growth += req->new_ino_d;
402 return data_growth; 402 return data_growth;
403} 403}
@@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c,
413{ 413{
414 int dd_growth; 414 int dd_growth;
415 415
416 dd_growth = req->dirtied_page ? c->page_budget : 0; 416 dd_growth = req->dirtied_page ? c->bi.page_budget : 0;
417 417
418 if (req->dirtied_ino) 418 if (req->dirtied_ino)
419 dd_growth += c->inode_budget << (req->dirtied_ino - 1); 419 dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1);
420 if (req->mod_dent) 420 if (req->mod_dent)
421 dd_growth += c->dent_budget; 421 dd_growth += c->bi.dent_budget;
422 dd_growth += req->dirtied_ino_d; 422 dd_growth += req->dirtied_ino_d;
423 return dd_growth; 423 return dd_growth;
424} 424}
@@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
460 460
461again: 461again:
462 spin_lock(&c->space_lock); 462 spin_lock(&c->space_lock);
463 ubifs_assert(c->budg_idx_growth >= 0); 463 ubifs_assert(c->bi.idx_growth >= 0);
464 ubifs_assert(c->budg_data_growth >= 0); 464 ubifs_assert(c->bi.data_growth >= 0);
465 ubifs_assert(c->budg_dd_growth >= 0); 465 ubifs_assert(c->bi.dd_growth >= 0);
466 466
467 if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { 467 if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) {
468 dbg_budg("no space"); 468 dbg_budg("no space");
469 spin_unlock(&c->space_lock); 469 spin_unlock(&c->space_lock);
470 return -ENOSPC; 470 return -ENOSPC;
471 } 471 }
472 472
473 c->budg_idx_growth += idx_growth; 473 c->bi.idx_growth += idx_growth;
474 c->budg_data_growth += data_growth; 474 c->bi.data_growth += data_growth;
475 c->budg_dd_growth += dd_growth; 475 c->bi.dd_growth += dd_growth;
476 476
477 err = do_budget_space(c); 477 err = do_budget_space(c);
478 if (likely(!err)) { 478 if (likely(!err)) {
@@ -484,9 +484,9 @@ again:
484 } 484 }
485 485
486 /* Restore the old values */ 486 /* Restore the old values */
487 c->budg_idx_growth -= idx_growth; 487 c->bi.idx_growth -= idx_growth;
488 c->budg_data_growth -= data_growth; 488 c->bi.data_growth -= data_growth;
489 c->budg_dd_growth -= dd_growth; 489 c->bi.dd_growth -= dd_growth;
490 spin_unlock(&c->space_lock); 490 spin_unlock(&c->space_lock);
491 491
492 if (req->fast) { 492 if (req->fast) {
@@ -506,9 +506,9 @@ again:
506 goto again; 506 goto again;
507 } 507 }
508 dbg_budg("FS is full, -ENOSPC"); 508 dbg_budg("FS is full, -ENOSPC");
509 c->nospace = 1; 509 c->bi.nospace = 1;
510 if (can_use_rp(c) || c->rp_size == 0) 510 if (can_use_rp(c) || c->rp_size == 0)
511 c->nospace_rp = 1; 511 c->bi.nospace_rp = 1;
512 smp_wmb(); 512 smp_wmb();
513 } else 513 } else
514 ubifs_err("cannot budget space, error %d", err); 514 ubifs_err("cannot budget space, error %d", err);
@@ -523,8 +523,8 @@ again:
523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note, 523 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
524 * since the index changes (which were budgeted for in @req->idx_growth) will 524 * since the index changes (which were budgeted for in @req->idx_growth) will
525 * only be written to the media on commit, this function moves the index budget 525 * only be written to the media on commit, this function moves the index budget
526 * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be 526 * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed
527 * zeroed by the commit operation. 527 * by the commit operation.
528 */ 528 */
529void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) 529void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
530{ 530{
@@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
553 if (!req->data_growth && !req->dd_growth) 553 if (!req->data_growth && !req->dd_growth)
554 return; 554 return;
555 555
556 c->nospace = c->nospace_rp = 0; 556 c->bi.nospace = c->bi.nospace_rp = 0;
557 smp_wmb(); 557 smp_wmb();
558 558
559 spin_lock(&c->space_lock); 559 spin_lock(&c->space_lock);
560 c->budg_idx_growth -= req->idx_growth; 560 c->bi.idx_growth -= req->idx_growth;
561 c->budg_uncommitted_idx += req->idx_growth; 561 c->bi.uncommitted_idx += req->idx_growth;
562 c->budg_data_growth -= req->data_growth; 562 c->bi.data_growth -= req->data_growth;
563 c->budg_dd_growth -= req->dd_growth; 563 c->bi.dd_growth -= req->dd_growth;
564 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 564 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
565 565
566 ubifs_assert(c->budg_idx_growth >= 0); 566 ubifs_assert(c->bi.idx_growth >= 0);
567 ubifs_assert(c->budg_data_growth >= 0); 567 ubifs_assert(c->bi.data_growth >= 0);
568 ubifs_assert(c->budg_dd_growth >= 0); 568 ubifs_assert(c->bi.dd_growth >= 0);
569 ubifs_assert(c->min_idx_lebs < c->main_lebs); 569 ubifs_assert(c->bi.min_idx_lebs < c->main_lebs);
570 ubifs_assert(!(c->budg_idx_growth & 7)); 570 ubifs_assert(!(c->bi.idx_growth & 7));
571 ubifs_assert(!(c->budg_data_growth & 7)); 571 ubifs_assert(!(c->bi.data_growth & 7));
572 ubifs_assert(!(c->budg_dd_growth & 7)); 572 ubifs_assert(!(c->bi.dd_growth & 7));
573 spin_unlock(&c->space_lock); 573 spin_unlock(&c->space_lock);
574} 574}
575 575
@@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c)
586{ 586{
587 spin_lock(&c->space_lock); 587 spin_lock(&c->space_lock);
588 /* Release the index growth reservation */ 588 /* Release the index growth reservation */
589 c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; 589 c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
590 /* Release the data growth reservation */ 590 /* Release the data growth reservation */
591 c->budg_data_growth -= c->page_budget; 591 c->bi.data_growth -= c->bi.page_budget;
592 /* Increase the dirty data growth reservation instead */ 592 /* Increase the dirty data growth reservation instead */
593 c->budg_dd_growth += c->page_budget; 593 c->bi.dd_growth += c->bi.page_budget;
594 /* And re-calculate the indexing space reservation */ 594 /* And re-calculate the indexing space reservation */
595 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 595 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
596 spin_unlock(&c->space_lock); 596 spin_unlock(&c->space_lock);
597} 597}
598 598
@@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
612 612
613 memset(&req, 0, sizeof(struct ubifs_budget_req)); 613 memset(&req, 0, sizeof(struct ubifs_budget_req));
614 /* The "no space" flags will be cleared because dd_growth is > 0 */ 614 /* The "no space" flags will be cleared because dd_growth is > 0 */
615 req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); 615 req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8);
616 ubifs_release_budget(c, &req); 616 ubifs_release_budget(c, &req);
617} 617}
618 618
@@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
682 int rsvd_idx_lebs, lebs; 682 int rsvd_idx_lebs, lebs;
683 long long available, outstanding, free; 683 long long available, outstanding, free;
684 684
685 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 685 ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
686 outstanding = c->budg_data_growth + c->budg_dd_growth; 686 outstanding = c->bi.data_growth + c->bi.dd_growth;
687 available = ubifs_calc_available(c, c->min_idx_lebs); 687 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
688 688
689 /* 689 /*
690 * When reporting free space to user-space, UBIFS guarantees that it is 690 * When reporting free space to user-space, UBIFS guarantees that it is
@@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c)
697 * Note, the calculations below are similar to what we have in 697 * Note, the calculations below are similar to what we have in
698 * 'do_budget_space()', so refer there for comments. 698 * 'do_budget_space()', so refer there for comments.
699 */ 699 */
700 if (c->min_idx_lebs > c->lst.idx_lebs) 700 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
701 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 701 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
702 else 702 else
703 rsvd_idx_lebs = 0; 703 rsvd_idx_lebs = 0;
704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 704 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 37fa7ed062d8..87cd0ead8633 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -48,6 +48,56 @@
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include "ubifs.h" 49#include "ubifs.h"
50 50
51/*
52 * nothing_to_commit - check if there is nothing to commit.
53 * @c: UBIFS file-system description object
54 *
55 * This is a helper function which checks if there is anything to commit. It is
56 * used as an optimization to avoid starting the commit if it is not really
57 * necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
58 * writing the commit start node to the log), and it is better to avoid doing
59 * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
60 * nothing to commit, it is more optimal to avoid any flash I/O.
61 *
62 * This function has to be called with @c->commit_sem locked for writing -
63 * this function does not take LPT/TNC locks because the @c->commit_sem
64 * guarantees that we have exclusive access to the TNC and LPT data structures.
65 *
66 * This function returns %1 if there is nothing to commit and %0 otherwise.
67 */
68static int nothing_to_commit(struct ubifs_info *c)
69{
70 /*
71 * During mounting or remounting from R/O mode to R/W mode we may
72 * commit for various recovery-related reasons.
73 */
74 if (c->mounting || c->remounting_rw)
75 return 0;
76
77 /*
78 * If the root TNC node is dirty, we definitely have something to
79 * commit.
80 */
81 if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
82 return 0;
83
84 /*
85 * Even though the TNC is clean, the LPT tree may have dirty nodes. For
86 * example, this may happen if the budgeting subsystem invoked GC to
87 * make some free space, and the GC found an LEB with only dirty and
88 * free space. In this case GC would just change the lprops of this
89 * LEB (by turning all space into free space) and unmap it.
90 */
91 if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
92 return 0;
93
94 ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
95 ubifs_assert(c->dirty_pn_cnt == 0);
96 ubifs_assert(c->dirty_nn_cnt == 0);
97
98 return 1;
99}
100
51/** 101/**
52 * do_commit - commit the journal. 102 * do_commit - commit the journal.
53 * @c: UBIFS file-system description object 103 * @c: UBIFS file-system description object
@@ -63,11 +113,19 @@ static int do_commit(struct ubifs_info *c)
63 struct ubifs_lp_stats lst; 113 struct ubifs_lp_stats lst;
64 114
65 dbg_cmt("start"); 115 dbg_cmt("start");
66 if (c->ro_media) { 116 ubifs_assert(!c->ro_media && !c->ro_mount);
117
118 if (c->ro_error) {
67 err = -EROFS; 119 err = -EROFS;
68 goto out_up; 120 goto out_up;
69 } 121 }
70 122
123 if (nothing_to_commit(c)) {
124 up_write(&c->commit_sem);
125 err = 0;
126 goto out_cancel;
127 }
128
71 /* Sync all write buffers (necessary for recovery) */ 129 /* Sync all write buffers (necessary for recovery) */
72 for (i = 0; i < c->jhead_cnt; i++) { 130 for (i = 0; i < c->jhead_cnt; i++) {
73 err = ubifs_wbuf_sync(&c->jheads[i].wbuf); 131 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
@@ -124,7 +182,7 @@ static int do_commit(struct ubifs_info *c)
124 c->mst_node->root_len = cpu_to_le32(zroot.len); 182 c->mst_node->root_len = cpu_to_le32(zroot.len);
125 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); 183 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
126 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); 184 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
127 c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); 185 c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz);
128 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); 186 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
129 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); 187 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
130 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); 188 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
@@ -160,12 +218,12 @@ static int do_commit(struct ubifs_info *c)
160 if (err) 218 if (err)
161 goto out; 219 goto out;
162 220
221out_cancel:
163 spin_lock(&c->cs_lock); 222 spin_lock(&c->cs_lock);
164 c->cmt_state = COMMIT_RESTING; 223 c->cmt_state = COMMIT_RESTING;
165 wake_up(&c->cmt_wq); 224 wake_up(&c->cmt_wq);
166 dbg_cmt("commit end"); 225 dbg_cmt("commit end");
167 spin_unlock(&c->cs_lock); 226 spin_unlock(&c->cs_lock);
168
169 return 0; 227 return 0;
170 228
171out_up: 229out_up:
@@ -519,7 +577,7 @@ int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
519 size_t sz; 577 size_t sz;
520 578
521 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) 579 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
522 goto out; 580 return 0;
523 581
524 INIT_LIST_HEAD(&list); 582 INIT_LIST_HEAD(&list);
525 583
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index c2a68baa782f..0bb2bcef0de9 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -34,7 +34,6 @@
34#include <linux/moduleparam.h> 34#include <linux/moduleparam.h>
35#include <linux/debugfs.h> 35#include <linux/debugfs.h>
36#include <linux/math64.h> 36#include <linux/math64.h>
37#include <linux/slab.h>
38 37
39#ifdef CONFIG_UBIFS_FS_DEBUG 38#ifdef CONFIG_UBIFS_FS_DEBUG
40 39
@@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock);
43static char dbg_key_buf0[128]; 42static char dbg_key_buf0[128];
44static char dbg_key_buf1[128]; 43static char dbg_key_buf1[128];
45 44
46unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; 45unsigned int ubifs_chk_flags;
47unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT;
48unsigned int ubifs_tst_flags; 46unsigned int ubifs_tst_flags;
49 47
50module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
51module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); 48module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
52module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); 49module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
53 50
54MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
55MODULE_PARM_DESC(debug_chks, "Debug check flags"); 51MODULE_PARM_DESC(debug_chks, "Debug check flags");
56MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); 52MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
57 53
@@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node)
317 printk(KERN_DEBUG "\tflags %#x\n", sup_flags); 313 printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
318 printk(KERN_DEBUG "\t big_lpt %u\n", 314 printk(KERN_DEBUG "\t big_lpt %u\n",
319 !!(sup_flags & UBIFS_FLG_BIGLPT)); 315 !!(sup_flags & UBIFS_FLG_BIGLPT));
316 printk(KERN_DEBUG "\t space_fixup %u\n",
317 !!(sup_flags & UBIFS_FLG_SPACE_FIXUP));
320 printk(KERN_DEBUG "\tmin_io_size %u\n", 318 printk(KERN_DEBUG "\tmin_io_size %u\n",
321 le32_to_cpu(sup->min_io_size)); 319 le32_to_cpu(sup->min_io_size));
322 printk(KERN_DEBUG "\tleb_size %u\n", 320 printk(KERN_DEBUG "\tleb_size %u\n",
@@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
602 spin_unlock(&dbg_lock); 600 spin_unlock(&dbg_lock);
603} 601}
604 602
605void dbg_dump_budg(struct ubifs_info *c) 603void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi)
606{ 604{
607 int i; 605 int i;
608 struct rb_node *rb; 606 struct rb_node *rb;
@@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c)
610 struct ubifs_gced_idx_leb *idx_gc; 608 struct ubifs_gced_idx_leb *idx_gc;
611 long long available, outstanding, free; 609 long long available, outstanding, free;
612 610
613 ubifs_assert(spin_is_locked(&c->space_lock)); 611 spin_lock(&c->space_lock);
614 spin_lock(&dbg_lock); 612 spin_lock(&dbg_lock);
615 printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " 613 printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, "
616 "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, 614 "total budget sum %lld\n", current->pid,
617 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); 615 bi->data_growth + bi->dd_growth,
618 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " 616 bi->data_growth + bi->dd_growth + bi->idx_growth);
619 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, 617 printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, "
620 c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, 618 "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth,
621 c->freeable_cnt); 619 bi->idx_growth);
622 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " 620 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, "
623 "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, 621 "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz,
624 c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); 622 bi->uncommitted_idx);
623 printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n",
624 bi->page_budget, bi->inode_budget, bi->dent_budget);
625 printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n",
626 bi->nospace, bi->nospace_rp);
627 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
628 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
629
630 if (bi != &c->bi)
631 /*
632 * If we are dumping saved budgeting data, do not print
633 * additional information which is about the current state, not
634 * the old one which corresponded to the saved budgeting data.
635 */
636 goto out_unlock;
637
638 printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n",
639 c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt);
625 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " 640 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
626 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), 641 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
627 atomic_long_read(&c->dirty_zn_cnt), 642 atomic_long_read(&c->dirty_zn_cnt),
628 atomic_long_read(&c->clean_zn_cnt)); 643 atomic_long_read(&c->clean_zn_cnt));
629 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
630 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
631 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", 644 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
632 c->gc_lnum, c->ihead_lnum); 645 c->gc_lnum, c->ihead_lnum);
646
633 /* If we are in R/O mode, journal heads do not exist */ 647 /* If we are in R/O mode, journal heads do not exist */
634 if (c->jheads) 648 if (c->jheads)
635 for (i = 0; i < c->jhead_cnt; i++) 649 for (i = 0; i < c->jhead_cnt; i++)
@@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c)
648 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); 662 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
649 663
650 /* Print budgeting predictions */ 664 /* Print budgeting predictions */
651 available = ubifs_calc_available(c, c->min_idx_lebs); 665 available = ubifs_calc_available(c, c->bi.min_idx_lebs);
652 outstanding = c->budg_data_growth + c->budg_dd_growth; 666 outstanding = c->bi.data_growth + c->bi.dd_growth;
653 free = ubifs_get_free_space_nolock(c); 667 free = ubifs_get_free_space_nolock(c);
654 printk(KERN_DEBUG "Budgeting predictions:\n"); 668 printk(KERN_DEBUG "Budgeting predictions:\n");
655 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", 669 printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n",
656 available, outstanding, free); 670 available, outstanding, free);
671out_unlock:
657 spin_unlock(&dbg_lock); 672 spin_unlock(&dbg_lock);
673 spin_unlock(&c->space_lock);
658} 674}
659 675
660void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) 676void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
@@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
729 if (bud->lnum == lp->lnum) { 745 if (bud->lnum == lp->lnum) {
730 int head = 0; 746 int head = 0;
731 for (i = 0; i < c->jhead_cnt; i++) { 747 for (i = 0; i < c->jhead_cnt; i++) {
732 if (lp->lnum == c->jheads[i].wbuf.lnum) { 748 /*
749 * Note, if we are in R/O mode or in the middle
750 * of mounting/re-mounting, the write-buffers do
751 * not exist.
752 */
753 if (c->jheads &&
754 lp->lnum == c->jheads[i].wbuf.lnum) {
733 printk(KERN_CONT ", jhead %s", 755 printk(KERN_CONT ", jhead %s",
734 dbg_jhead(i)); 756 dbg_jhead(i));
735 head = 1; 757 head = 1;
@@ -810,16 +832,24 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
810{ 832{
811 struct ubifs_scan_leb *sleb; 833 struct ubifs_scan_leb *sleb;
812 struct ubifs_scan_node *snod; 834 struct ubifs_scan_node *snod;
835 void *buf;
813 836
814 if (dbg_failure_mode) 837 if (dbg_failure_mode)
815 return; 838 return;
816 839
817 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 840 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
818 current->pid, lnum); 841 current->pid, lnum);
819 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 842
843 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
844 if (!buf) {
845 ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
846 return;
847 }
848
849 sleb = ubifs_scan(c, lnum, 0, buf, 0);
820 if (IS_ERR(sleb)) { 850 if (IS_ERR(sleb)) {
821 ubifs_err("scan error %d", (int)PTR_ERR(sleb)); 851 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
822 return; 852 goto out;
823 } 853 }
824 854
825 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, 855 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
@@ -835,6 +865,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
835 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 865 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
836 current->pid, lnum); 866 current->pid, lnum);
837 ubifs_scan_destroy(sleb); 867 ubifs_scan_destroy(sleb);
868
869out:
870 vfree(buf);
838 return; 871 return;
839} 872}
840 873
@@ -961,11 +994,41 @@ void dbg_dump_index(struct ubifs_info *c)
961void dbg_save_space_info(struct ubifs_info *c) 994void dbg_save_space_info(struct ubifs_info *c)
962{ 995{
963 struct ubifs_debug_info *d = c->dbg; 996 struct ubifs_debug_info *d = c->dbg;
964 997 int freeable_cnt;
965 ubifs_get_lp_stats(c, &d->saved_lst);
966 998
967 spin_lock(&c->space_lock); 999 spin_lock(&c->space_lock);
1000 memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats));
1001 memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info));
1002 d->saved_idx_gc_cnt = c->idx_gc_cnt;
1003
1004 /*
1005 * We use a dirty hack here and zero out @c->freeable_cnt, because it
1006 * affects the free space calculations, and UBIFS might not know about
1007 * all freeable eraseblocks. Indeed, we know about freeable eraseblocks
1008 * only when we read their lprops, and we do this only lazily, upon the
1009 * need. So at any given point of time @c->freeable_cnt might be not
1010 * exactly accurate.
1011 *
1012 * Just one example about the issue we hit when we did not zero
1013 * @c->freeable_cnt.
1014 * 1. The file-system is mounted R/O, c->freeable_cnt is %0. We save the
1015 * amount of free space in @d->saved_free
1016 * 2. We re-mount R/W, which makes UBIFS to read the "lsave"
1017 * information from flash, where we cache LEBs from various
1018 * categories ('ubifs_remount_fs()' -> 'ubifs_lpt_init()'
1019 * -> 'lpt_init_wr()' -> 'read_lsave()' -> 'ubifs_lpt_lookup()'
1020 * -> 'ubifs_get_pnode()' -> 'update_cats()'
1021 * -> 'ubifs_add_to_cat()').
1022 * 3. Lsave contains a freeable eraseblock, and @c->freeable_cnt
1023 * becomes %1.
1024 * 4. We calculate the amount of free space when the re-mount is
1025 * finished in 'dbg_check_space_info()' and it does not match
1026 * @d->saved_free.
1027 */
1028 freeable_cnt = c->freeable_cnt;
1029 c->freeable_cnt = 0;
968 d->saved_free = ubifs_get_free_space_nolock(c); 1030 d->saved_free = ubifs_get_free_space_nolock(c);
1031 c->freeable_cnt = freeable_cnt;
969 spin_unlock(&c->space_lock); 1032 spin_unlock(&c->space_lock);
970} 1033}
971 1034
@@ -982,12 +1045,15 @@ int dbg_check_space_info(struct ubifs_info *c)
982{ 1045{
983 struct ubifs_debug_info *d = c->dbg; 1046 struct ubifs_debug_info *d = c->dbg;
984 struct ubifs_lp_stats lst; 1047 struct ubifs_lp_stats lst;
985 long long avail, free; 1048 long long free;
1049 int freeable_cnt;
986 1050
987 spin_lock(&c->space_lock); 1051 spin_lock(&c->space_lock);
988 avail = ubifs_calc_available(c, c->min_idx_lebs); 1052 freeable_cnt = c->freeable_cnt;
1053 c->freeable_cnt = 0;
1054 free = ubifs_get_free_space_nolock(c);
1055 c->freeable_cnt = freeable_cnt;
989 spin_unlock(&c->space_lock); 1056 spin_unlock(&c->space_lock);
990 free = ubifs_get_free_space(c);
991 1057
992 if (free != d->saved_free) { 1058 if (free != d->saved_free) {
993 ubifs_err("free space changed from %lld to %lld", 1059 ubifs_err("free space changed from %lld to %lld",
@@ -1000,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c)
1000out: 1066out:
1001 ubifs_msg("saved lprops statistics dump"); 1067 ubifs_msg("saved lprops statistics dump");
1002 dbg_dump_lstats(&d->saved_lst); 1068 dbg_dump_lstats(&d->saved_lst);
1003 ubifs_get_lp_stats(c, &lst); 1069 ubifs_msg("saved budgeting info dump");
1004 1070 dbg_dump_budg(c, &d->saved_bi);
1071 ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt);
1005 ubifs_msg("current lprops statistics dump"); 1072 ubifs_msg("current lprops statistics dump");
1073 ubifs_get_lp_stats(c, &lst);
1006 dbg_dump_lstats(&lst); 1074 dbg_dump_lstats(&lst);
1007 1075 ubifs_msg("current budgeting info dump");
1008 spin_lock(&c->space_lock); 1076 dbg_dump_budg(c, &c->bi);
1009 dbg_dump_budg(c);
1010 spin_unlock(&c->space_lock);
1011 dump_stack(); 1077 dump_stack();
1012 return -EINVAL; 1078 return -EINVAL;
1013} 1079}
@@ -1751,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1751 struct rb_node **p, *parent = NULL; 1817 struct rb_node **p, *parent = NULL;
1752 struct fsck_inode *fscki; 1818 struct fsck_inode *fscki;
1753 ino_t inum = key_inum_flash(c, &ino->key); 1819 ino_t inum = key_inum_flash(c, &ino->key);
1820 struct inode *inode;
1821 struct ubifs_inode *ui;
1754 1822
1755 p = &fsckd->inodes.rb_node; 1823 p = &fsckd->inodes.rb_node;
1756 while (*p) { 1824 while (*p) {
@@ -1774,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c,
1774 if (!fscki) 1842 if (!fscki)
1775 return ERR_PTR(-ENOMEM); 1843 return ERR_PTR(-ENOMEM);
1776 1844
1845 inode = ilookup(c->vfs_sb, inum);
1846
1777 fscki->inum = inum; 1847 fscki->inum = inum;
1778 fscki->nlink = le32_to_cpu(ino->nlink); 1848 /*
1779 fscki->size = le64_to_cpu(ino->size); 1849 * If the inode is present in the VFS inode cache, use it instead of
1780 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); 1850 * the on-flash inode which might be out-of-date. E.g., the size might
1781 fscki->xattr_sz = le32_to_cpu(ino->xattr_size); 1851 * be out-of-date. If we do not do this, the following may happen, for
1782 fscki->xattr_nms = le32_to_cpu(ino->xattr_names); 1852 * example:
1783 fscki->mode = le32_to_cpu(ino->mode); 1853 * 1. A power cut happens
1854 * 2. We mount the file-system R/O, the replay process fixes up the
1855 * inode size in the VFS cache, but on on-flash.
1856 * 3. 'check_leaf()' fails because it hits a data node beyond inode
1857 * size.
1858 */
1859 if (!inode) {
1860 fscki->nlink = le32_to_cpu(ino->nlink);
1861 fscki->size = le64_to_cpu(ino->size);
1862 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
1863 fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
1864 fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
1865 fscki->mode = le32_to_cpu(ino->mode);
1866 } else {
1867 ui = ubifs_inode(inode);
1868 fscki->nlink = inode->i_nlink;
1869 fscki->size = inode->i_size;
1870 fscki->xattr_cnt = ui->xattr_cnt;
1871 fscki->xattr_sz = ui->xattr_size;
1872 fscki->xattr_nms = ui->xattr_names;
1873 fscki->mode = inode->i_mode;
1874 iput(inode);
1875 }
1876
1784 if (S_ISDIR(fscki->mode)) { 1877 if (S_ISDIR(fscki->mode)) {
1785 fscki->calc_sz = UBIFS_INO_NODE_SZ; 1878 fscki->calc_sz = UBIFS_INO_NODE_SZ;
1786 fscki->calc_cnt = 2; 1879 fscki->calc_cnt = 2;
1787 } 1880 }
1881
1788 rb_link_node(&fscki->rb, parent, p); 1882 rb_link_node(&fscki->rb, parent, p);
1789 rb_insert_color(&fscki->rb, &fsckd->inodes); 1883 rb_insert_color(&fscki->rb, &fsckd->inodes);
1884
1790 return fscki; 1885 return fscki;
1791} 1886}
1792 1887
@@ -2239,14 +2334,169 @@ out_free:
2239 return err; 2334 return err;
2240} 2335}
2241 2336
2242static int invocation_cnt; 2337/**
2338 * dbg_check_data_nodes_order - check that list of data nodes is sorted.
2339 * @c: UBIFS file-system description object
2340 * @head: the list of nodes ('struct ubifs_scan_node' objects)
2341 *
2342 * This function returns zero if the list of data nodes is sorted correctly,
2343 * and %-EINVAL if not.
2344 */
2345int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
2346{
2347 struct list_head *cur;
2348 struct ubifs_scan_node *sa, *sb;
2349
2350 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2351 return 0;
2352
2353 for (cur = head->next; cur->next != head; cur = cur->next) {
2354 ino_t inuma, inumb;
2355 uint32_t blka, blkb;
2356
2357 cond_resched();
2358 sa = container_of(cur, struct ubifs_scan_node, list);
2359 sb = container_of(cur->next, struct ubifs_scan_node, list);
2360
2361 if (sa->type != UBIFS_DATA_NODE) {
2362 ubifs_err("bad node type %d", sa->type);
2363 dbg_dump_node(c, sa->node);
2364 return -EINVAL;
2365 }
2366 if (sb->type != UBIFS_DATA_NODE) {
2367 ubifs_err("bad node type %d", sb->type);
2368 dbg_dump_node(c, sb->node);
2369 return -EINVAL;
2370 }
2371
2372 inuma = key_inum(c, &sa->key);
2373 inumb = key_inum(c, &sb->key);
2374
2375 if (inuma < inumb)
2376 continue;
2377 if (inuma > inumb) {
2378 ubifs_err("larger inum %lu goes before inum %lu",
2379 (unsigned long)inuma, (unsigned long)inumb);
2380 goto error_dump;
2381 }
2382
2383 blka = key_block(c, &sa->key);
2384 blkb = key_block(c, &sb->key);
2385
2386 if (blka > blkb) {
2387 ubifs_err("larger block %u goes before %u", blka, blkb);
2388 goto error_dump;
2389 }
2390 if (blka == blkb) {
2391 ubifs_err("two data nodes for the same block");
2392 goto error_dump;
2393 }
2394 }
2395
2396 return 0;
2397
2398error_dump:
2399 dbg_dump_node(c, sa->node);
2400 dbg_dump_node(c, sb->node);
2401 return -EINVAL;
2402}
2403
2404/**
2405 * dbg_check_nondata_nodes_order - check that list of data nodes is sorted.
2406 * @c: UBIFS file-system description object
2407 * @head: the list of nodes ('struct ubifs_scan_node' objects)
2408 *
2409 * This function returns zero if the list of non-data nodes is sorted correctly,
2410 * and %-EINVAL if not.
2411 */
2412int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
2413{
2414 struct list_head *cur;
2415 struct ubifs_scan_node *sa, *sb;
2416
2417 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2418 return 0;
2419
2420 for (cur = head->next; cur->next != head; cur = cur->next) {
2421 ino_t inuma, inumb;
2422 uint32_t hasha, hashb;
2423
2424 cond_resched();
2425 sa = container_of(cur, struct ubifs_scan_node, list);
2426 sb = container_of(cur->next, struct ubifs_scan_node, list);
2427
2428 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
2429 sa->type != UBIFS_XENT_NODE) {
2430 ubifs_err("bad node type %d", sa->type);
2431 dbg_dump_node(c, sa->node);
2432 return -EINVAL;
2433 }
2434 if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
2435 sa->type != UBIFS_XENT_NODE) {
2436 ubifs_err("bad node type %d", sb->type);
2437 dbg_dump_node(c, sb->node);
2438 return -EINVAL;
2439 }
2440
2441 if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
2442 ubifs_err("non-inode node goes before inode node");
2443 goto error_dump;
2444 }
2445
2446 if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE)
2447 continue;
2448
2449 if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) {
2450 /* Inode nodes are sorted in descending size order */
2451 if (sa->len < sb->len) {
2452 ubifs_err("smaller inode node goes first");
2453 goto error_dump;
2454 }
2455 continue;
2456 }
2457
2458 /*
2459 * This is either a dentry or xentry, which should be sorted in
2460 * ascending (parent ino, hash) order.
2461 */
2462 inuma = key_inum(c, &sa->key);
2463 inumb = key_inum(c, &sb->key);
2464
2465 if (inuma < inumb)
2466 continue;
2467 if (inuma > inumb) {
2468 ubifs_err("larger inum %lu goes before inum %lu",
2469 (unsigned long)inuma, (unsigned long)inumb);
2470 goto error_dump;
2471 }
2472
2473 hasha = key_block(c, &sa->key);
2474 hashb = key_block(c, &sb->key);
2475
2476 if (hasha > hashb) {
2477 ubifs_err("larger hash %u goes before %u",
2478 hasha, hashb);
2479 goto error_dump;
2480 }
2481 }
2482
2483 return 0;
2484
2485error_dump:
2486 ubifs_msg("dumping first node");
2487 dbg_dump_node(c, sa->node);
2488 ubifs_msg("dumping second node");
2489 dbg_dump_node(c, sb->node);
2490 return -EINVAL;
2491 return 0;
2492}
2243 2493
2244int dbg_force_in_the_gaps(void) 2494int dbg_force_in_the_gaps(void)
2245{ 2495{
2246 if (!dbg_force_in_the_gaps_enabled) 2496 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2247 return 0; 2497 return 0;
2248 /* Force in-the-gaps every 8th commit */ 2498
2249 return !((invocation_cnt++) & 0x7); 2499 return !(random32() & 7);
2250} 2500}
2251 2501
2252/* Failure mode for recovery testing */ 2502/* Failure mode for recovery testing */
@@ -2434,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
2434 int len, int check) 2684 int len, int check)
2435{ 2685{
2436 if (in_failure_mode(desc)) 2686 if (in_failure_mode(desc))
2437 return -EIO; 2687 return -EROFS;
2438 return ubi_leb_read(desc, lnum, buf, offset, len, check); 2688 return ubi_leb_read(desc, lnum, buf, offset, len, check);
2439} 2689}
2440 2690
@@ -2444,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2444 int err, failing; 2694 int err, failing;
2445 2695
2446 if (in_failure_mode(desc)) 2696 if (in_failure_mode(desc))
2447 return -EIO; 2697 return -EROFS;
2448 failing = do_fail(desc, lnum, 1); 2698 failing = do_fail(desc, lnum, 1);
2449 if (failing) 2699 if (failing)
2450 cut_data(buf, len); 2700 cut_data(buf, len);
@@ -2452,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2452 if (err) 2702 if (err)
2453 return err; 2703 return err;
2454 if (failing) 2704 if (failing)
2455 return -EIO; 2705 return -EROFS;
2456 return 0; 2706 return 0;
2457} 2707}
2458 2708
@@ -2462,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
2462 int err; 2712 int err;
2463 2713
2464 if (do_fail(desc, lnum, 1)) 2714 if (do_fail(desc, lnum, 1))
2465 return -EIO; 2715 return -EROFS;
2466 err = ubi_leb_change(desc, lnum, buf, len, dtype); 2716 err = ubi_leb_change(desc, lnum, buf, len, dtype);
2467 if (err) 2717 if (err)
2468 return err; 2718 return err;
2469 if (do_fail(desc, lnum, 1)) 2719 if (do_fail(desc, lnum, 1))
2470 return -EIO; 2720 return -EROFS;
2471 return 0; 2721 return 0;
2472} 2722}
2473 2723
@@ -2476,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
2476 int err; 2726 int err;
2477 2727
2478 if (do_fail(desc, lnum, 0)) 2728 if (do_fail(desc, lnum, 0))
2479 return -EIO; 2729 return -EROFS;
2480 err = ubi_leb_erase(desc, lnum); 2730 err = ubi_leb_erase(desc, lnum);
2481 if (err) 2731 if (err)
2482 return err; 2732 return err;
2483 if (do_fail(desc, lnum, 0)) 2733 if (do_fail(desc, lnum, 0))
2484 return -EIO; 2734 return -EROFS;
2485 return 0; 2735 return 0;
2486} 2736}
2487 2737
@@ -2490,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
2490 int err; 2740 int err;
2491 2741
2492 if (do_fail(desc, lnum, 0)) 2742 if (do_fail(desc, lnum, 0))
2493 return -EIO; 2743 return -EROFS;
2494 err = ubi_leb_unmap(desc, lnum); 2744 err = ubi_leb_unmap(desc, lnum);
2495 if (err) 2745 if (err)
2496 return err; 2746 return err;
2497 if (do_fail(desc, lnum, 0)) 2747 if (do_fail(desc, lnum, 0))
2498 return -EIO; 2748 return -EROFS;
2499 return 0; 2749 return 0;
2500} 2750}
2501 2751
2502int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) 2752int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
2503{ 2753{
2504 if (in_failure_mode(desc)) 2754 if (in_failure_mode(desc))
2505 return -EIO; 2755 return -EROFS;
2506 return ubi_is_mapped(desc, lnum); 2756 return ubi_is_mapped(desc, lnum);
2507} 2757}
2508 2758
@@ -2511,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
2511 int err; 2761 int err;
2512 2762
2513 if (do_fail(desc, lnum, 0)) 2763 if (do_fail(desc, lnum, 0))
2514 return -EIO; 2764 return -EROFS;
2515 err = ubi_leb_map(desc, lnum, dtype); 2765 err = ubi_leb_map(desc, lnum, dtype);
2516 if (err) 2766 if (err)
2517 return err; 2767 return err;
2518 if (do_fail(desc, lnum, 0)) 2768 if (do_fail(desc, lnum, 0))
2519 return -EIO; 2769 return -EROFS;
2520 return 0; 2770 return 0;
2521} 2771}
2522 2772
@@ -2534,16 +2784,8 @@ int ubifs_debugging_init(struct ubifs_info *c)
2534 if (!c->dbg) 2784 if (!c->dbg)
2535 return -ENOMEM; 2785 return -ENOMEM;
2536 2786
2537 c->dbg->buf = vmalloc(c->leb_size);
2538 if (!c->dbg->buf)
2539 goto out;
2540
2541 failure_mode_init(c); 2787 failure_mode_init(c);
2542 return 0; 2788 return 0;
2543
2544out:
2545 kfree(c->dbg);
2546 return -ENOMEM;
2547} 2789}
2548 2790
2549/** 2791/**
@@ -2553,7 +2795,6 @@ out:
2553void ubifs_debugging_exit(struct ubifs_info *c) 2795void ubifs_debugging_exit(struct ubifs_info *c)
2554{ 2796{
2555 failure_mode_exit(c); 2797 failure_mode_exit(c);
2556 vfree(c->dbg->buf);
2557 kfree(c->dbg); 2798 kfree(c->dbg);
2558} 2799}
2559 2800
@@ -2595,7 +2836,7 @@ void dbg_debugfs_exit(void)
2595static int open_debugfs_file(struct inode *inode, struct file *file) 2836static int open_debugfs_file(struct inode *inode, struct file *file)
2596{ 2837{
2597 file->private_data = inode->i_private; 2838 file->private_data = inode->i_private;
2598 return 0; 2839 return nonseekable_open(inode, file);
2599} 2840}
2600 2841
2601static ssize_t write_debugfs_file(struct file *file, const char __user *buf, 2842static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
@@ -2606,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf,
2606 2847
2607 if (file->f_path.dentry == d->dfs_dump_lprops) 2848 if (file->f_path.dentry == d->dfs_dump_lprops)
2608 dbg_dump_lprops(c); 2849 dbg_dump_lprops(c);
2609 else if (file->f_path.dentry == d->dfs_dump_budg) { 2850 else if (file->f_path.dentry == d->dfs_dump_budg)
2610 spin_lock(&c->space_lock); 2851 dbg_dump_budg(c, &c->bi);
2611 dbg_dump_budg(c); 2852 else if (file->f_path.dentry == d->dfs_dump_tnc) {
2612 spin_unlock(&c->space_lock);
2613 } else if (file->f_path.dentry == d->dfs_dump_tnc) {
2614 mutex_lock(&c->tnc_mutex); 2853 mutex_lock(&c->tnc_mutex);
2615 dbg_dump_tnc(c); 2854 dbg_dump_tnc(c);
2616 mutex_unlock(&c->tnc_mutex); 2855 mutex_unlock(&c->tnc_mutex);
2617 } else 2856 } else
2618 return -EINVAL; 2857 return -EINVAL;
2619 2858
2620 *ppos += count;
2621 return count; 2859 return count;
2622} 2860}
2623 2861
@@ -2625,6 +2863,7 @@ static const struct file_operations dfs_fops = {
2625 .open = open_debugfs_file, 2863 .open = open_debugfs_file,
2626 .write = write_debugfs_file, 2864 .write = write_debugfs_file,
2627 .owner = THIS_MODULE, 2865 .owner = THIS_MODULE,
2866 .llseek = no_llseek,
2628}; 2867};
2629 2868
2630/** 2869/**
@@ -2647,40 +2886,38 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2647 struct ubifs_debug_info *d = c->dbg; 2886 struct ubifs_debug_info *d = c->dbg;
2648 2887
2649 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id); 2888 sprintf(d->dfs_dir_name, "ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2650 d->dfs_dir = debugfs_create_dir(d->dfs_dir_name, dfs_rootdir); 2889 fname = d->dfs_dir_name;
2651 if (IS_ERR(d->dfs_dir)) { 2890 dent = debugfs_create_dir(fname, dfs_rootdir);
2652 err = PTR_ERR(d->dfs_dir); 2891 if (IS_ERR_OR_NULL(dent))
2653 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2654 d->dfs_dir_name, err);
2655 goto out; 2892 goto out;
2656 } 2893 d->dfs_dir = dent;
2657 2894
2658 fname = "dump_lprops"; 2895 fname = "dump_lprops";
2659 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2896 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2660 if (IS_ERR(dent)) 2897 if (IS_ERR_OR_NULL(dent))
2661 goto out_remove; 2898 goto out_remove;
2662 d->dfs_dump_lprops = dent; 2899 d->dfs_dump_lprops = dent;
2663 2900
2664 fname = "dump_budg"; 2901 fname = "dump_budg";
2665 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2902 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2666 if (IS_ERR(dent)) 2903 if (IS_ERR_OR_NULL(dent))
2667 goto out_remove; 2904 goto out_remove;
2668 d->dfs_dump_budg = dent; 2905 d->dfs_dump_budg = dent;
2669 2906
2670 fname = "dump_tnc"; 2907 fname = "dump_tnc";
2671 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2908 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2672 if (IS_ERR(dent)) 2909 if (IS_ERR_OR_NULL(dent))
2673 goto out_remove; 2910 goto out_remove;
2674 d->dfs_dump_tnc = dent; 2911 d->dfs_dump_tnc = dent;
2675 2912
2676 return 0; 2913 return 0;
2677 2914
2678out_remove: 2915out_remove:
2679 err = PTR_ERR(dent);
2680 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2681 fname, err);
2682 debugfs_remove_recursive(d->dfs_dir); 2916 debugfs_remove_recursive(d->dfs_dir);
2683out: 2917out:
2918 err = dent ? PTR_ERR(dent) : -ENODEV;
2919 ubifs_err("cannot create \"%s\" debugfs directory, error %d\n",
2920 fname, err);
2684 return err; 2921 return err;
2685} 2922}
2686 2923
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 29d960101ea6..a811ac4a26bb 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -23,11 +23,18 @@
23#ifndef __UBIFS_DEBUG_H__ 23#ifndef __UBIFS_DEBUG_H__
24#define __UBIFS_DEBUG_H__ 24#define __UBIFS_DEBUG_H__
25 25
26/* Checking helper functions */
27typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
28 struct ubifs_zbranch *zbr, void *priv);
29typedef int (*dbg_znode_callback)(struct ubifs_info *c,
30 struct ubifs_znode *znode, void *priv);
31
26#ifdef CONFIG_UBIFS_FS_DEBUG 32#ifdef CONFIG_UBIFS_FS_DEBUG
27 33
34#include <linux/random.h>
35
28/** 36/**
29 * ubifs_debug_info - per-FS debugging information. 37 * ubifs_debug_info - per-FS debugging information.
30 * @buf: a buffer of LEB size, used for various purposes
31 * @old_zroot: old index root - used by 'dbg_check_old_index()' 38 * @old_zroot: old index root - used by 'dbg_check_old_index()'
32 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' 39 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
33 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' 40 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
@@ -45,16 +52,17 @@
45 * @new_ihead_offs: used by debugging to check @c->ihead_offs 52 * @new_ihead_offs: used by debugging to check @c->ihead_offs
46 * 53 *
47 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') 54 * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()')
48 * @saved_free: saved free space (used by 'dbg_save_space_info()') 55 * @saved_bi: saved budgeting information
56 * @saved_free: saved amount of free space
57 * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt
49 * 58 *
50 * dfs_dir_name: name of debugfs directory containing this file-system's files 59 * @dfs_dir_name: name of debugfs directory containing this file-system's files
51 * dfs_dir: direntry object of the file-system debugfs directory 60 * @dfs_dir: direntry object of the file-system debugfs directory
52 * dfs_dump_lprops: "dump lprops" debugfs knob 61 * @dfs_dump_lprops: "dump lprops" debugfs knob
53 * dfs_dump_budg: "dump budgeting information" debugfs knob 62 * @dfs_dump_budg: "dump budgeting information" debugfs knob
54 * dfs_dump_tnc: "dump TNC" debugfs knob 63 * @dfs_dump_tnc: "dump TNC" debugfs knob
55 */ 64 */
56struct ubifs_debug_info { 65struct ubifs_debug_info {
57 void *buf;
58 struct ubifs_zbranch old_zroot; 66 struct ubifs_zbranch old_zroot;
59 int old_zroot_level; 67 int old_zroot_level;
60 unsigned long long old_zroot_sqnum; 68 unsigned long long old_zroot_sqnum;
@@ -72,7 +80,9 @@ struct ubifs_debug_info {
72 int new_ihead_offs; 80 int new_ihead_offs;
73 81
74 struct ubifs_lp_stats saved_lst; 82 struct ubifs_lp_stats saved_lst;
83 struct ubifs_budg_info saved_bi;
75 long long saved_free; 84 long long saved_free;
85 int saved_idx_gc_cnt;
76 86
77 char dfs_dir_name[100]; 87 char dfs_dir_name[100];
78 struct dentry *dfs_dir; 88 struct dentry *dfs_dir;
@@ -97,23 +107,7 @@ struct ubifs_debug_info {
97 } \ 107 } \
98} while (0) 108} while (0)
99 109
100#define dbg_dump_stack() do { \ 110#define dbg_dump_stack() dump_stack()
101 if (!dbg_failure_mode) \
102 dump_stack(); \
103} while (0)
104
105/* Generic debugging messages */
106#define dbg_msg(fmt, ...) do { \
107 spin_lock(&dbg_lock); \
108 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
109 __func__, ##__VA_ARGS__); \
110 spin_unlock(&dbg_lock); \
111} while (0)
112
113#define dbg_do_msg(typ, fmt, ...) do { \
114 if (ubifs_msg_flags & typ) \
115 dbg_msg(fmt, ##__VA_ARGS__); \
116} while (0)
117 111
118#define dbg_err(fmt, ...) do { \ 112#define dbg_err(fmt, ...) do { \
119 spin_lock(&dbg_lock); \ 113 spin_lock(&dbg_lock); \
@@ -133,86 +127,43 @@ const char *dbg_key_str1(const struct ubifs_info *c,
133#define DBGKEY(key) dbg_key_str0(c, (key)) 127#define DBGKEY(key) dbg_key_str0(c, (key))
134#define DBGKEY1(key) dbg_key_str1(c, (key)) 128#define DBGKEY1(key) dbg_key_str1(c, (key))
135 129
136/* General messages */ 130#define ubifs_dbg_msg(type, fmt, ...) do { \
137#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) 131 spin_lock(&dbg_lock); \
132 pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \
133 spin_unlock(&dbg_lock); \
134} while (0)
138 135
136/* Just a debugging messages not related to any specific UBIFS subsystem */
137#define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__)
138/* General messages */
139#define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__)
139/* Additional journal messages */ 140/* Additional journal messages */
140#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) 141#define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__)
141
142/* Additional TNC messages */ 142/* Additional TNC messages */
143#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) 143#define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__)
144
145/* Additional lprops messages */ 144/* Additional lprops messages */
146#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) 145#define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__)
147
148/* Additional LEB find messages */ 146/* Additional LEB find messages */
149#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) 147#define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__)
150
151/* Additional mount messages */ 148/* Additional mount messages */
152#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) 149#define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__)
153
154/* Additional I/O messages */ 150/* Additional I/O messages */
155#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) 151#define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__)
156
157/* Additional commit messages */ 152/* Additional commit messages */
158#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) 153#define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__)
159
160/* Additional budgeting messages */ 154/* Additional budgeting messages */
161#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) 155#define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__)
162
163/* Additional log messages */ 156/* Additional log messages */
164#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) 157#define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__)
165
166/* Additional gc messages */ 158/* Additional gc messages */
167#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) 159#define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__)
168
169/* Additional scan messages */ 160/* Additional scan messages */
170#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) 161#define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__)
171
172/* Additional recovery messages */ 162/* Additional recovery messages */
173#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 163#define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__)
174 164
175/* 165/*
176 * Debugging message type flags (must match msg_type_names in debug.c). 166 * Debugging check flags.
177 *
178 * UBIFS_MSG_GEN: general messages
179 * UBIFS_MSG_JNL: journal messages
180 * UBIFS_MSG_MNT: mount messages
181 * UBIFS_MSG_CMT: commit messages
182 * UBIFS_MSG_FIND: LEB find messages
183 * UBIFS_MSG_BUDG: budgeting messages
184 * UBIFS_MSG_GC: garbage collection messages
185 * UBIFS_MSG_TNC: TNC messages
186 * UBIFS_MSG_LP: lprops messages
187 * UBIFS_MSG_IO: I/O messages
188 * UBIFS_MSG_LOG: log messages
189 * UBIFS_MSG_SCAN: scan messages
190 * UBIFS_MSG_RCVRY: recovery messages
191 */
192enum {
193 UBIFS_MSG_GEN = 0x1,
194 UBIFS_MSG_JNL = 0x2,
195 UBIFS_MSG_MNT = 0x4,
196 UBIFS_MSG_CMT = 0x8,
197 UBIFS_MSG_FIND = 0x10,
198 UBIFS_MSG_BUDG = 0x20,
199 UBIFS_MSG_GC = 0x40,
200 UBIFS_MSG_TNC = 0x80,
201 UBIFS_MSG_LP = 0x100,
202 UBIFS_MSG_IO = 0x200,
203 UBIFS_MSG_LOG = 0x400,
204 UBIFS_MSG_SCAN = 0x800,
205 UBIFS_MSG_RCVRY = 0x1000,
206};
207
208/* Debugging message type flags for each default debug message level */
209#define UBIFS_MSG_LVL_0 0
210#define UBIFS_MSG_LVL_1 0x1
211#define UBIFS_MSG_LVL_2 0x7f
212#define UBIFS_MSG_LVL_3 0xffff
213
214/*
215 * Debugging check flags (must match chk_names in debug.c).
216 * 167 *
217 * UBIFS_CHK_GEN: general checks 168 * UBIFS_CHK_GEN: general checks
218 * UBIFS_CHK_TNC: check TNC 169 * UBIFS_CHK_TNC: check TNC
@@ -233,32 +184,14 @@ enum {
233}; 184};
234 185
235/* 186/*
236 * Special testing flags (must match tst_names in debug.c). 187 * Special testing flags.
237 * 188 *
238 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
239 * UBIFS_TST_RCVRY: failure mode for recovery testing 189 * UBIFS_TST_RCVRY: failure mode for recovery testing
240 */ 190 */
241enum { 191enum {
242 UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
243 UBIFS_TST_RCVRY = 0x4, 192 UBIFS_TST_RCVRY = 0x4,
244}; 193};
245 194
246#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
247#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
248#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
249#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
250#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
251#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
252#else
253#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
254#endif
255
256#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
257#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
258#else
259#define UBIFS_CHK_FLAGS_DEFAULT 0
260#endif
261
262extern spinlock_t dbg_lock; 195extern spinlock_t dbg_lock;
263 196
264extern unsigned int ubifs_msg_flags; 197extern unsigned int ubifs_msg_flags;
@@ -280,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum,
280 int offs); 213 int offs);
281void dbg_dump_budget_req(const struct ubifs_budget_req *req); 214void dbg_dump_budget_req(const struct ubifs_budget_req *req);
282void dbg_dump_lstats(const struct ubifs_lp_stats *lst); 215void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
283void dbg_dump_budg(struct ubifs_info *c); 216void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
284void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); 217void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
285void dbg_dump_lprops(struct ubifs_info *c); 218void dbg_dump_lprops(struct ubifs_info *c);
286void dbg_dump_lpt_info(struct ubifs_info *c); 219void dbg_dump_lpt_info(struct ubifs_info *c);
@@ -294,11 +227,6 @@ void dbg_dump_tnc(struct ubifs_info *c);
294void dbg_dump_index(struct ubifs_info *c); 227void dbg_dump_index(struct ubifs_info *c);
295void dbg_dump_lpt_lebs(const struct ubifs_info *c); 228void dbg_dump_lpt_lebs(const struct ubifs_info *c);
296 229
297/* Checking helper functions */
298typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
299 struct ubifs_zbranch *zbr, void *priv);
300typedef int (*dbg_znode_callback)(struct ubifs_info *c,
301 struct ubifs_znode *znode, void *priv);
302int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb, 230int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
303 dbg_znode_callback znode_cb, void *priv); 231 dbg_znode_callback znode_cb, void *priv);
304 232
@@ -319,25 +247,24 @@ int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
319int dbg_check_filesystem(struct ubifs_info *c); 247int dbg_check_filesystem(struct ubifs_info *c);
320void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat, 248void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
321 int add_pos); 249 int add_pos);
322int dbg_check_lprops(struct ubifs_info *c);
323int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, 250int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
324 int row, int col); 251 int row, int col);
325int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, 252int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
326 loff_t size); 253 loff_t size);
254int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head);
255int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head);
327 256
328/* Force the use of in-the-gaps method for testing */ 257/* Force the use of in-the-gaps method for testing */
329 258static inline int dbg_force_in_the_gaps_enabled(void)
330#define dbg_force_in_the_gaps_enabled \ 259{
331 (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) 260 return ubifs_chk_flags & UBIFS_CHK_GEN;
332 261}
333int dbg_force_in_the_gaps(void); 262int dbg_force_in_the_gaps(void);
334 263
335/* Failure mode for recovery testing */ 264/* Failure mode for recovery testing */
336
337#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) 265#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
338 266
339#ifndef UBIFS_DBG_PRESERVE_UBI 267#ifndef UBIFS_DBG_PRESERVE_UBI
340
341#define ubi_leb_read dbg_leb_read 268#define ubi_leb_read dbg_leb_read
342#define ubi_leb_write dbg_leb_write 269#define ubi_leb_write dbg_leb_write
343#define ubi_leb_change dbg_leb_change 270#define ubi_leb_change dbg_leb_change
@@ -345,7 +272,6 @@ int dbg_force_in_the_gaps(void);
345#define ubi_leb_unmap dbg_leb_unmap 272#define ubi_leb_unmap dbg_leb_unmap
346#define ubi_is_mapped dbg_is_mapped 273#define ubi_is_mapped dbg_is_mapped
347#define ubi_leb_map dbg_leb_map 274#define ubi_leb_map dbg_leb_map
348
349#endif 275#endif
350 276
351int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, 277int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
@@ -392,87 +318,127 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c);
392 __func__, __LINE__, current->pid); \ 318 __func__, __LINE__, current->pid); \
393} while (0) 319} while (0)
394 320
395#define dbg_err(fmt, ...) do { \ 321#define dbg_err(fmt, ...) do { \
396 if (0) \ 322 if (0) \
397 ubifs_err(fmt, ##__VA_ARGS__); \ 323 ubifs_err(fmt, ##__VA_ARGS__); \
398} while (0) 324} while (0)
399 325
400#define dbg_msg(fmt, ...) do { \ 326#define ubifs_dbg_msg(fmt, ...) do { \
401 if (0) \ 327 if (0) \
402 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ 328 pr_debug(fmt "\n", ##__VA_ARGS__); \
403 current->pid, __func__, ##__VA_ARGS__); \
404} while (0) 329} while (0)
405 330
406#define dbg_dump_stack() 331#define dbg_dump_stack()
407#define ubifs_assert_cmt_locked(c) 332#define ubifs_assert_cmt_locked(c)
408 333
409#define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 334#define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
410#define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 335#define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
411#define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 336#define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
412#define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 337#define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
413#define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 338#define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
414#define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 339#define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
415#define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 340#define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
416#define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 341#define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
417#define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 342#define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
418#define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 343#define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
419#define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 344#define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
420#define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 345#define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
421#define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) 346#define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
347#define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__)
422 348
423#define DBGKEY(key) ((char *)(key)) 349#define DBGKEY(key) ((char *)(key))
424#define DBGKEY1(key) ((char *)(key)) 350#define DBGKEY1(key) ((char *)(key))
425 351
426#define ubifs_debugging_init(c) 0 352static inline int ubifs_debugging_init(struct ubifs_info *c) { return 0; }
427#define ubifs_debugging_exit(c) ({}) 353static inline void ubifs_debugging_exit(struct ubifs_info *c) { return; }
428 354static inline const char *dbg_ntype(int type) { return ""; }
429#define dbg_ntype(type) "" 355static inline const char *dbg_cstate(int cmt_state) { return ""; }
430#define dbg_cstate(cmt_state) "" 356static inline const char *dbg_jhead(int jhead) { return ""; }
431#define dbg_jhead(jhead) "" 357static inline const char *
432#define dbg_get_key_dump(c, key) ({}) 358dbg_get_key_dump(const struct ubifs_info *c,
433#define dbg_dump_inode(c, inode) ({}) 359 const union ubifs_key *key) { return ""; }
434#define dbg_dump_node(c, node) ({}) 360static inline void dbg_dump_inode(const struct ubifs_info *c,
435#define dbg_dump_lpt_node(c, node, lnum, offs) ({}) 361 const struct inode *inode) { return; }
436#define dbg_dump_budget_req(req) ({}) 362static inline void dbg_dump_node(const struct ubifs_info *c,
437#define dbg_dump_lstats(lst) ({}) 363 const void *node) { return; }
438#define dbg_dump_budg(c) ({}) 364static inline void dbg_dump_lpt_node(const struct ubifs_info *c,
439#define dbg_dump_lprop(c, lp) ({}) 365 void *node, int lnum,
440#define dbg_dump_lprops(c) ({}) 366 int offs) { return; }
441#define dbg_dump_lpt_info(c) ({}) 367static inline void
442#define dbg_dump_leb(c, lnum) ({}) 368dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; }
443#define dbg_dump_znode(c, znode) ({}) 369static inline void
444#define dbg_dump_heap(c, heap, cat) ({}) 370dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; }
445#define dbg_dump_pnode(c, pnode, parent, iip) ({}) 371static inline void
446#define dbg_dump_tnc(c) ({}) 372dbg_dump_budg(struct ubifs_info *c,
447#define dbg_dump_index(c) ({}) 373 const struct ubifs_budg_info *bi) { return; }
448#define dbg_dump_lpt_lebs(c) ({}) 374static inline void dbg_dump_lprop(const struct ubifs_info *c,
449 375 const struct ubifs_lprops *lp) { return; }
450#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0 376static inline void dbg_dump_lprops(struct ubifs_info *c) { return; }
451#define dbg_old_index_check_init(c, zroot) 0 377static inline void dbg_dump_lpt_info(struct ubifs_info *c) { return; }
452#define dbg_save_space_info(c) ({}) 378static inline void dbg_dump_leb(const struct ubifs_info *c,
453#define dbg_check_space_info(c) 0 379 int lnum) { return; }
454#define dbg_check_old_index(c, zroot) 0 380static inline void
455#define dbg_check_cats(c) 0 381dbg_dump_znode(const struct ubifs_info *c,
456#define dbg_check_ltab(c) 0 382 const struct ubifs_znode *znode) { return; }
457#define dbg_chk_lpt_free_spc(c) 0 383static inline void dbg_dump_heap(struct ubifs_info *c,
458#define dbg_chk_lpt_sz(c, action, len) 0 384 struct ubifs_lpt_heap *heap,
459#define dbg_check_synced_i_size(inode) 0 385 int cat) { return; }
460#define dbg_check_dir_size(c, dir) 0 386static inline void dbg_dump_pnode(struct ubifs_info *c,
461#define dbg_check_tnc(c, x) 0 387 struct ubifs_pnode *pnode,
462#define dbg_check_idx_size(c, idx_size) 0 388 struct ubifs_nnode *parent,
463#define dbg_check_filesystem(c) 0 389 int iip) { return; }
464#define dbg_check_heap(c, heap, cat, add_pos) ({}) 390static inline void dbg_dump_tnc(struct ubifs_info *c) { return; }
465#define dbg_check_lprops(c) 0 391static inline void dbg_dump_index(struct ubifs_info *c) { return; }
466#define dbg_check_lpt_nodes(c, cnode, row, col) 0 392static inline void dbg_dump_lpt_lebs(const struct ubifs_info *c) { return; }
467#define dbg_check_inode_size(c, inode, size) 0 393
468#define dbg_force_in_the_gaps_enabled 0 394static inline int dbg_walk_index(struct ubifs_info *c,
469#define dbg_force_in_the_gaps() 0 395 dbg_leaf_callback leaf_cb,
470#define dbg_failure_mode 0 396 dbg_znode_callback znode_cb,
471 397 void *priv) { return 0; }
472#define dbg_debugfs_init() 0 398static inline void dbg_save_space_info(struct ubifs_info *c) { return; }
473#define dbg_debugfs_exit() 399static inline int dbg_check_space_info(struct ubifs_info *c) { return 0; }
474#define dbg_debugfs_init_fs(c) 0 400static inline int dbg_check_lprops(struct ubifs_info *c) { return 0; }
475#define dbg_debugfs_exit_fs(c) 0 401static inline int
402dbg_old_index_check_init(struct ubifs_info *c,
403 struct ubifs_zbranch *zroot) { return 0; }
404static inline int
405dbg_check_old_index(struct ubifs_info *c,
406 struct ubifs_zbranch *zroot) { return 0; }
407static inline int dbg_check_cats(struct ubifs_info *c) { return 0; }
408static inline int dbg_check_ltab(struct ubifs_info *c) { return 0; }
409static inline int dbg_chk_lpt_free_spc(struct ubifs_info *c) { return 0; }
410static inline int dbg_chk_lpt_sz(struct ubifs_info *c,
411 int action, int len) { return 0; }
412static inline int dbg_check_synced_i_size(struct inode *inode) { return 0; }
413static inline int dbg_check_dir_size(struct ubifs_info *c,
414 const struct inode *dir) { return 0; }
415static inline int dbg_check_tnc(struct ubifs_info *c, int extra) { return 0; }
416static inline int dbg_check_idx_size(struct ubifs_info *c,
417 long long idx_size) { return 0; }
418static inline int dbg_check_filesystem(struct ubifs_info *c) { return 0; }
419static inline void dbg_check_heap(struct ubifs_info *c,
420 struct ubifs_lpt_heap *heap,
421 int cat, int add_pos) { return; }
422static inline int dbg_check_lpt_nodes(struct ubifs_info *c,
423 struct ubifs_cnode *cnode, int row, int col) { return 0; }
424static inline int dbg_check_inode_size(struct ubifs_info *c,
425 const struct inode *inode,
426 loff_t size) { return 0; }
427static inline int
428dbg_check_data_nodes_order(struct ubifs_info *c,
429 struct list_head *head) { return 0; }
430static inline int
431dbg_check_nondata_nodes_order(struct ubifs_info *c,
432 struct list_head *head) { return 0; }
433
434static inline int dbg_force_in_the_gaps(void) { return 0; }
435#define dbg_force_in_the_gaps_enabled() 0
436#define dbg_failure_mode 0
437
438static inline int dbg_debugfs_init(void) { return 0; }
439static inline void dbg_debugfs_exit(void) { return; }
440static inline int dbg_debugfs_init_fs(struct ubifs_info *c) { return 0; }
441static inline int dbg_debugfs_exit_fs(struct ubifs_info *c) { return 0; }
476 442
477#endif /* !CONFIG_UBIFS_FS_DEBUG */ 443#endif /* !CONFIG_UBIFS_FS_DEBUG */
478#endif /* !__UBIFS_DEBUG_H__ */ 444#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 87ebcce72213..ef5abd38f0bf 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -522,24 +522,6 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
522 ubifs_assert(mutex_is_locked(&dir->i_mutex)); 522 ubifs_assert(mutex_is_locked(&dir->i_mutex));
523 ubifs_assert(mutex_is_locked(&inode->i_mutex)); 523 ubifs_assert(mutex_is_locked(&inode->i_mutex));
524 524
525 /*
526 * Return -ENOENT if we've raced with unlink and i_nlink is 0. Doing
527 * otherwise has the potential to corrupt the orphan inode list.
528 *
529 * Indeed, consider a scenario when 'vfs_link(dirA/fileA)' and
530 * 'vfs_unlink(dirA/fileA, dirB/fileB)' race. 'vfs_link()' does not
531 * lock 'dirA->i_mutex', so this is possible. Both of the functions
532 * lock 'fileA->i_mutex' though. Suppose 'vfs_unlink()' wins, and takes
533 * 'fileA->i_mutex' mutex first. Suppose 'fileA->i_nlink' is 1. In this
534 * case 'ubifs_unlink()' will drop the last reference, and put 'inodeA'
535 * to the list of orphans. After this, 'vfs_link()' will link
536 * 'dirB/fileB' to 'inodeA'. This is a problem because, for example,
537 * the subsequent 'vfs_unlink(dirB/fileB)' will add the same inode
538 * to the list of orphans.
539 */
540 if (inode->i_nlink == 0)
541 return -ENOENT;
542
543 err = dbg_check_synced_i_size(inode); 525 err = dbg_check_synced_i_size(inode);
544 if (err) 526 if (err)
545 return err; 527 return err;
@@ -550,7 +532,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
550 532
551 lock_2_inodes(dir, inode); 533 lock_2_inodes(dir, inode);
552 inc_nlink(inode); 534 inc_nlink(inode);
553 atomic_inc(&inode->i_count); 535 ihold(inode);
554 inode->i_ctime = ubifs_current_time(inode); 536 inode->i_ctime = ubifs_current_time(inode);
555 dir->i_size += sz_change; 537 dir->i_size += sz_change;
556 dir_ui->ui_size = dir->i_size; 538 dir_ui->ui_size = dir->i_size;
@@ -621,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
621 ubifs_release_budget(c, &req); 603 ubifs_release_budget(c, &req);
622 else { 604 else {
623 /* We've deleted something - clean the "no space" flags */ 605 /* We've deleted something - clean the "no space" flags */
624 c->nospace = c->nospace_rp = 0; 606 c->bi.nospace = c->bi.nospace_rp = 0;
625 smp_wmb(); 607 smp_wmb();
626 } 608 }
627 return 0; 609 return 0;
@@ -711,7 +693,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
711 ubifs_release_budget(c, &req); 693 ubifs_release_budget(c, &req);
712 else { 694 else {
713 /* We've deleted something - clean the "no space" flags */ 695 /* We've deleted something - clean the "no space" flags */
714 c->nospace = c->nospace_rp = 0; 696 c->bi.nospace = c->bi.nospace_rp = 0;
715 smp_wmb(); 697 smp_wmb();
716 } 698 }
717 return 0; 699 return 0;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 03ae894c45de..5e7fccfc4b29 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c)
212 */ 212 */
213static void release_existing_page_budget(struct ubifs_info *c) 213static void release_existing_page_budget(struct ubifs_info *c)
214{ 214{
215 struct ubifs_budget_req req = { .dd_growth = c->page_budget}; 215 struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget};
216 216
217 ubifs_release_budget(c, &req); 217 ubifs_release_budget(c, &req);
218} 218}
@@ -433,8 +433,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
433 struct page *page; 433 struct page *page;
434 434
435 ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); 435 ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
436 ubifs_assert(!c->ro_media && !c->ro_mount);
436 437
437 if (unlikely(c->ro_media)) 438 if (unlikely(c->ro_error))
438 return -EROFS; 439 return -EROFS;
439 440
440 /* Try out the fast-path part first */ 441 /* Try out the fast-path part first */
@@ -447,10 +448,12 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
447 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) { 448 if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
448 /* 449 /*
449 * We change whole page so no need to load it. But we 450 * We change whole page so no need to load it. But we
450 * have to set the @PG_checked flag to make the further 451 * do not know whether this page exists on the media or
451 * code know that the page is new. This might be not 452 * not, so we assume the latter because it requires
452 * true, but it is better to budget more than to read 453 * larger budget. The assumption is that it is better
453 * the page from the media. 454 * to budget a bit more than to read the page from the
455 * media. Thus, we are setting the @PG_checked flag
456 * here.
454 */ 457 */
455 SetPageChecked(page); 458 SetPageChecked(page);
456 skipped_read = 1; 459 skipped_read = 1;
@@ -558,6 +561,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
558 dbg_gen("copied %d instead of %d, read page and repeat", 561 dbg_gen("copied %d instead of %d, read page and repeat",
559 copied, len); 562 copied, len);
560 cancel_budget(c, page, ui, appending); 563 cancel_budget(c, page, ui, appending);
564 ClearPageChecked(page);
561 565
562 /* 566 /*
563 * Return 0 to force VFS to repeat the whole operation, or the 567 * Return 0 to force VFS to repeat the whole operation, or the
@@ -967,11 +971,11 @@ static int do_writepage(struct page *page, int len)
967 * the page locked, and it locks @ui_mutex. However, write-back does take inode 971 * the page locked, and it locks @ui_mutex. However, write-back does take inode
968 * @i_mutex, which means other VFS operations may be run on this inode at the 972 * @i_mutex, which means other VFS operations may be run on this inode at the
969 * same time. And the problematic one is truncation to smaller size, from where 973 * same time. And the problematic one is truncation to smaller size, from where
970 * we have to call 'truncate_setsize()', which first changes @inode->i_size, then 974 * we have to call 'truncate_setsize()', which first changes @inode->i_size,
971 * drops the truncated pages. And while dropping the pages, it takes the page 975 * then drops the truncated pages. And while dropping the pages, it takes the
972 * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with 976 * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()'
973 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This 977 * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'.
974 * means that @inode->i_size is changed while @ui_mutex is unlocked. 978 * This means that @inode->i_size is changed while @ui_mutex is unlocked.
975 * 979 *
976 * XXX(truncate): with the new truncate sequence this is not true anymore, 980 * XXX(truncate): with the new truncate sequence this is not true anymore,
977 * and the calls to truncate_setsize can be move around freely. They should 981 * and the calls to truncate_setsize can be move around freely. They should
@@ -1185,7 +1189,7 @@ out_budg:
1185 if (budgeted) 1189 if (budgeted)
1186 ubifs_release_budget(c, &req); 1190 ubifs_release_budget(c, &req);
1187 else { 1191 else {
1188 c->nospace = c->nospace_rp = 0; 1192 c->bi.nospace = c->bi.nospace_rp = 0;
1189 smp_wmb(); 1193 smp_wmb();
1190 } 1194 }
1191 return err; 1195 return err;
@@ -1308,6 +1312,13 @@ int ubifs_fsync(struct file *file, int datasync)
1308 1312
1309 dbg_gen("syncing inode %lu", inode->i_ino); 1313 dbg_gen("syncing inode %lu", inode->i_ino);
1310 1314
1315 if (c->ro_mount)
1316 /*
1317 * For some really strange reasons VFS does not filter out
1318 * 'fsync()' for R/O mounted file-systems as per 2.6.39.
1319 */
1320 return 0;
1321
1311 /* 1322 /*
1312 * VFS has already synchronized dirty pages for this inode. Synchronize 1323 * VFS has already synchronized dirty pages for this inode. Synchronize
1313 * the inode unless this is a 'datasync()' call. 1324 * the inode unless this is a 'datasync()' call.
@@ -1425,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1425} 1436}
1426 1437
1427/* 1438/*
1428 * mmap()d file has taken write protection fault and is being made 1439 * mmap()d file has taken write protection fault and is being made writable.
1429 * writable. UBIFS must ensure page is budgeted for. 1440 * UBIFS must ensure page is budgeted for.
1430 */ 1441 */
1431static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) 1442static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
1443 struct vm_fault *vmf)
1432{ 1444{
1433 struct page *page = vmf->page; 1445 struct page *page = vmf->page;
1434 struct inode *inode = vma->vm_file->f_path.dentry->d_inode; 1446 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
@@ -1439,9 +1451,9 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vm
1439 1451
1440 dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, 1452 dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
1441 i_size_read(inode)); 1453 i_size_read(inode));
1442 ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); 1454 ubifs_assert(!c->ro_media && !c->ro_mount);
1443 1455
1444 if (unlikely(c->ro_media)) 1456 if (unlikely(c->ro_error))
1445 return VM_FAULT_SIGBUS; /* -EROFS */ 1457 return VM_FAULT_SIGBUS; /* -EROFS */
1446 1458
1447 /* 1459 /*
@@ -1529,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1529{ 1541{
1530 int err; 1542 int err;
1531 1543
1532 /* 'generic_file_mmap()' takes care of NOMMU case */
1533 err = generic_file_mmap(file, vma); 1544 err = generic_file_mmap(file, vma);
1534 if (err) 1545 if (err)
1535 return err; 1546 return err;
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 1d54383d1269..2559d174e004 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
252 * But if the index takes fewer LEBs than it is reserved for it, 252 * But if the index takes fewer LEBs than it is reserved for it,
253 * this function must avoid picking those reserved LEBs. 253 * this function must avoid picking those reserved LEBs.
254 */ 254 */
255 if (c->min_idx_lebs >= c->lst.idx_lebs) { 255 if (c->bi.min_idx_lebs >= c->lst.idx_lebs) {
256 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 256 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
257 exclude_index = 1; 257 exclude_index = 1;
258 } 258 }
259 spin_unlock(&c->space_lock); 259 spin_unlock(&c->space_lock);
@@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
276 pick_free = 0; 276 pick_free = 0;
277 } else { 277 } else {
278 spin_lock(&c->space_lock); 278 spin_lock(&c->space_lock);
279 exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); 279 exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs);
280 spin_unlock(&c->space_lock); 280 spin_unlock(&c->space_lock);
281 } 281 }
282 282
@@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs,
501 501
502 /* Check if there are enough empty LEBs for commit */ 502 /* Check if there are enough empty LEBs for commit */
503 spin_lock(&c->space_lock); 503 spin_lock(&c->space_lock);
504 if (c->min_idx_lebs > c->lst.idx_lebs) 504 if (c->bi.min_idx_lebs > c->lst.idx_lebs)
505 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; 505 rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs;
506 else 506 else
507 rsvd_idx_lebs = 0; 507 rsvd_idx_lebs = 0;
508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - 508 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 918d1582ca05..ded29f6224c2 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c)
100 if (err) 100 if (err)
101 return err; 101 return err;
102 102
103 err = ubifs_wbuf_sync_nolock(wbuf);
104 if (err)
105 return err;
106
103 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); 107 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
104 if (err) 108 if (err)
105 return err; 109 return err;
@@ -118,17 +122,23 @@ static int switch_gc_head(struct ubifs_info *c)
118 * This function compares data nodes @a and @b. Returns %1 if @a has greater 122 * This function compares data nodes @a and @b. Returns %1 if @a has greater
119 * inode or block number, and %-1 otherwise. 123 * inode or block number, and %-1 otherwise.
120 */ 124 */
121int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 125static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
122{ 126{
123 ino_t inuma, inumb; 127 ino_t inuma, inumb;
124 struct ubifs_info *c = priv; 128 struct ubifs_info *c = priv;
125 struct ubifs_scan_node *sa, *sb; 129 struct ubifs_scan_node *sa, *sb;
126 130
127 cond_resched(); 131 cond_resched();
132 if (a == b)
133 return 0;
134
128 sa = list_entry(a, struct ubifs_scan_node, list); 135 sa = list_entry(a, struct ubifs_scan_node, list);
129 sb = list_entry(b, struct ubifs_scan_node, list); 136 sb = list_entry(b, struct ubifs_scan_node, list);
137
130 ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); 138 ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY);
131 ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); 139 ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY);
140 ubifs_assert(sa->type == UBIFS_DATA_NODE);
141 ubifs_assert(sb->type == UBIFS_DATA_NODE);
132 142
133 inuma = key_inum(c, &sa->key); 143 inuma = key_inum(c, &sa->key);
134 inumb = key_inum(c, &sb->key); 144 inumb = key_inum(c, &sb->key);
@@ -155,30 +165,43 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
155 * first and sorted by length in descending order. Directory entry nodes go 165 * first and sorted by length in descending order. Directory entry nodes go
156 * after inode nodes and are sorted in ascending hash valuer order. 166 * after inode nodes and are sorted in ascending hash valuer order.
157 */ 167 */
158int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) 168static int nondata_nodes_cmp(void *priv, struct list_head *a,
169 struct list_head *b)
159{ 170{
160 int typea, typeb;
161 ino_t inuma, inumb; 171 ino_t inuma, inumb;
162 struct ubifs_info *c = priv; 172 struct ubifs_info *c = priv;
163 struct ubifs_scan_node *sa, *sb; 173 struct ubifs_scan_node *sa, *sb;
164 174
165 cond_resched(); 175 cond_resched();
176 if (a == b)
177 return 0;
178
166 sa = list_entry(a, struct ubifs_scan_node, list); 179 sa = list_entry(a, struct ubifs_scan_node, list);
167 sb = list_entry(b, struct ubifs_scan_node, list); 180 sb = list_entry(b, struct ubifs_scan_node, list);
168 typea = key_type(c, &sa->key); 181
169 typeb = key_type(c, &sb->key); 182 ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY &&
170 ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); 183 key_type(c, &sb->key) != UBIFS_DATA_KEY);
184 ubifs_assert(sa->type != UBIFS_DATA_NODE &&
185 sb->type != UBIFS_DATA_NODE);
171 186
172 /* Inodes go before directory entries */ 187 /* Inodes go before directory entries */
173 if (typea == UBIFS_INO_KEY) { 188 if (sa->type == UBIFS_INO_NODE) {
174 if (typeb == UBIFS_INO_KEY) 189 if (sb->type == UBIFS_INO_NODE)
175 return sb->len - sa->len; 190 return sb->len - sa->len;
176 return -1; 191 return -1;
177 } 192 }
178 if (typeb == UBIFS_INO_KEY) 193 if (sb->type == UBIFS_INO_NODE)
179 return 1; 194 return 1;
180 195
181 ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); 196 ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY ||
197 key_type(c, &sa->key) == UBIFS_XENT_KEY);
198 ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY ||
199 key_type(c, &sb->key) == UBIFS_XENT_KEY);
200 ubifs_assert(sa->type == UBIFS_DENT_NODE ||
201 sa->type == UBIFS_XENT_NODE);
202 ubifs_assert(sb->type == UBIFS_DENT_NODE ||
203 sb->type == UBIFS_XENT_NODE);
204
182 inuma = key_inum(c, &sa->key); 205 inuma = key_inum(c, &sa->key);
183 inumb = key_inum(c, &sb->key); 206 inumb = key_inum(c, &sb->key);
184 207
@@ -224,17 +247,33 @@ int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b)
224static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, 247static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
225 struct list_head *nondata, int *min) 248 struct list_head *nondata, int *min)
226{ 249{
250 int err;
227 struct ubifs_scan_node *snod, *tmp; 251 struct ubifs_scan_node *snod, *tmp;
228 252
229 *min = INT_MAX; 253 *min = INT_MAX;
230 254
231 /* Separate data nodes and non-data nodes */ 255 /* Separate data nodes and non-data nodes */
232 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { 256 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
233 int err; 257 ubifs_assert(snod->type == UBIFS_INO_NODE ||
258 snod->type == UBIFS_DATA_NODE ||
259 snod->type == UBIFS_DENT_NODE ||
260 snod->type == UBIFS_XENT_NODE ||
261 snod->type == UBIFS_TRUN_NODE);
262
263 if (snod->type != UBIFS_INO_NODE &&
264 snod->type != UBIFS_DATA_NODE &&
265 snod->type != UBIFS_DENT_NODE &&
266 snod->type != UBIFS_XENT_NODE) {
267 /* Probably truncation node, zap it */
268 list_del(&snod->list);
269 kfree(snod);
270 continue;
271 }
234 272
235 ubifs_assert(snod->type != UBIFS_IDX_NODE); 273 ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY ||
236 ubifs_assert(snod->type != UBIFS_REF_NODE); 274 key_type(c, &snod->key) == UBIFS_INO_KEY ||
237 ubifs_assert(snod->type != UBIFS_CS_NODE); 275 key_type(c, &snod->key) == UBIFS_DENT_KEY ||
276 key_type(c, &snod->key) == UBIFS_XENT_KEY);
238 277
239 err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, 278 err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
240 snod->offs, 0); 279 snod->offs, 0);
@@ -258,6 +297,13 @@ static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
258 /* Sort data and non-data nodes */ 297 /* Sort data and non-data nodes */
259 list_sort(c, &sleb->nodes, &data_nodes_cmp); 298 list_sort(c, &sleb->nodes, &data_nodes_cmp);
260 list_sort(c, nondata, &nondata_nodes_cmp); 299 list_sort(c, nondata, &nondata_nodes_cmp);
300
301 err = dbg_check_data_nodes_order(c, &sleb->nodes);
302 if (err)
303 return err;
304 err = dbg_check_nondata_nodes_order(c, nondata);
305 if (err)
306 return err;
261 return 0; 307 return 0;
262} 308}
263 309
@@ -432,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
432 ubifs_assert(c->gc_lnum != lnum); 478 ubifs_assert(c->gc_lnum != lnum);
433 ubifs_assert(wbuf->lnum != lnum); 479 ubifs_assert(wbuf->lnum != lnum);
434 480
481 if (lp->free + lp->dirty == c->leb_size) {
482 /* Special case - a free LEB */
483 dbg_gc("LEB %d is free, return it", lp->lnum);
484 ubifs_assert(!(lp->flags & LPROPS_INDEX));
485
486 if (lp->free != c->leb_size) {
487 /*
488 * Write buffers must be sync'd before unmapping
489 * freeable LEBs, because one of them may contain data
490 * which obsoletes something in 'lp->pnum'.
491 */
492 err = gc_sync_wbufs(c);
493 if (err)
494 return err;
495 err = ubifs_change_one_lp(c, lp->lnum, c->leb_size,
496 0, 0, 0, 0);
497 if (err)
498 return err;
499 }
500 err = ubifs_leb_unmap(c, lp->lnum);
501 if (err)
502 return err;
503
504 if (c->gc_lnum == -1) {
505 c->gc_lnum = lnum;
506 return LEB_RETAINED;
507 }
508
509 return LEB_FREED;
510 }
511
435 /* 512 /*
436 * We scan the entire LEB even though we only really need to scan up to 513 * We scan the entire LEB even though we only really need to scan up to
437 * (c->leb_size - lp->free). 514 * (c->leb_size - lp->free).
@@ -575,13 +652,14 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
575 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 652 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
576 653
577 ubifs_assert_cmt_locked(c); 654 ubifs_assert_cmt_locked(c);
655 ubifs_assert(!c->ro_media && !c->ro_mount);
578 656
579 if (ubifs_gc_should_commit(c)) 657 if (ubifs_gc_should_commit(c))
580 return -EAGAIN; 658 return -EAGAIN;
581 659
582 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 660 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
583 661
584 if (c->ro_media) { 662 if (c->ro_error) {
585 ret = -EROFS; 663 ret = -EROFS;
586 goto out_unlock; 664 goto out_unlock;
587 } 665 }
@@ -640,51 +718,18 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
640 "(min. space %d)", lp.lnum, lp.free, lp.dirty, 718 "(min. space %d)", lp.lnum, lp.free, lp.dirty,
641 lp.free + lp.dirty, min_space); 719 lp.free + lp.dirty, min_space);
642 720
643 if (lp.free + lp.dirty == c->leb_size) {
644 /* An empty LEB was returned */
645 dbg_gc("LEB %d is free, return it", lp.lnum);
646 /*
647 * ubifs_find_dirty_leb() doesn't return freeable index
648 * LEBs.
649 */
650 ubifs_assert(!(lp.flags & LPROPS_INDEX));
651 if (lp.free != c->leb_size) {
652 /*
653 * Write buffers must be sync'd before
654 * unmapping freeable LEBs, because one of them
655 * may contain data which obsoletes something
656 * in 'lp.pnum'.
657 */
658 ret = gc_sync_wbufs(c);
659 if (ret)
660 goto out;
661 ret = ubifs_change_one_lp(c, lp.lnum,
662 c->leb_size, 0, 0, 0,
663 0);
664 if (ret)
665 goto out;
666 }
667 ret = ubifs_leb_unmap(c, lp.lnum);
668 if (ret)
669 goto out;
670 ret = lp.lnum;
671 break;
672 }
673
674 space_before = c->leb_size - wbuf->offs - wbuf->used; 721 space_before = c->leb_size - wbuf->offs - wbuf->used;
675 if (wbuf->lnum == -1) 722 if (wbuf->lnum == -1)
676 space_before = 0; 723 space_before = 0;
677 724
678 ret = ubifs_garbage_collect_leb(c, &lp); 725 ret = ubifs_garbage_collect_leb(c, &lp);
679 if (ret < 0) { 726 if (ret < 0) {
680 if (ret == -EAGAIN || ret == -ENOSPC) { 727 if (ret == -EAGAIN) {
681 /* 728 /*
682 * These codes are not errors, so we have to 729 * This is not error, so we have to return the
683 * return the LEB to lprops. But if the 730 * LEB to lprops. But if 'ubifs_return_leb()'
684 * 'ubifs_return_leb()' function fails, its 731 * fails, its failure code is propagated to the
685 * failure code is propagated to the caller 732 * caller instead of the original '-EAGAIN'.
686 * instead of the original '-EAGAIN' or
687 * '-ENOSPC'.
688 */ 733 */
689 err = ubifs_return_leb(c, lp.lnum); 734 err = ubifs_return_leb(c, lp.lnum);
690 if (err) 735 if (err)
@@ -774,8 +819,8 @@ out_unlock:
774out: 819out:
775 ubifs_assert(ret < 0); 820 ubifs_assert(ret < 0);
776 ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); 821 ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
777 ubifs_ro_mode(c, ret);
778 ubifs_wbuf_sync_nolock(wbuf); 822 ubifs_wbuf_sync_nolock(wbuf);
823 ubifs_ro_mode(c, ret);
779 mutex_unlock(&wbuf->io_mutex); 824 mutex_unlock(&wbuf->io_mutex);
780 ubifs_return_leb(c, lp.lnum); 825 ubifs_return_leb(c, lp.lnum);
781 return ret; 826 return ret;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index bcf5a16f30bb..3be645e012c9 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -31,6 +31,26 @@
31 * buffer is full or when it is not used for some time (by timer). This is 31 * buffer is full or when it is not used for some time (by timer). This is
32 * similar to the mechanism is used by JFFS2. 32 * similar to the mechanism is used by JFFS2.
33 * 33 *
34 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
35 * write size (@c->max_write_size). The latter is the maximum amount of bytes
36 * the underlying flash is able to program at a time, and writing in
37 * @c->max_write_size units should presumably be faster. Obviously,
38 * @c->min_io_size <= @c->max_write_size. Write-buffers are of
39 * @c->max_write_size bytes in size for maximum performance. However, when a
40 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
41 * boundary) which contains data is written, not the whole write-buffer,
42 * because this is more space-efficient.
43 *
44 * This optimization adds few complications to the code. Indeed, on the one
45 * hand, we want to write in optimal @c->max_write_size bytes chunks, which
46 * also means aligning writes at the @c->max_write_size bytes offsets. On the
47 * other hand, we do not want to waste space when synchronizing the write
48 * buffer, so during synchronization we writes in smaller chunks. And this makes
49 * the next write offset to be not aligned to @c->max_write_size bytes. So the
50 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
51 * to @c->max_write_size bytes again. We do this by temporarily shrinking
52 * write-buffer size (@wbuf->size).
53 *
34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 54 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
35 * mutexes defined inside these objects. Since sometimes upper-level code 55 * mutexes defined inside these objects. Since sometimes upper-level code
36 * has to lock the write-buffer (e.g. journal space reservation code), many 56 * has to lock the write-buffer (e.g. journal space reservation code), many
@@ -46,8 +66,8 @@
46 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 66 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
47 * uses padding nodes or padding bytes, if the padding node does not fit. 67 * uses padding nodes or padding bytes, if the padding node does not fit.
48 * 68 *
49 * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes 69 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
50 * every time they are read from the flash media. 70 * they are read from the flash media.
51 */ 71 */
52 72
53#include <linux/crc32.h> 73#include <linux/crc32.h>
@@ -61,8 +81,8 @@
61 */ 81 */
62void ubifs_ro_mode(struct ubifs_info *c, int err) 82void ubifs_ro_mode(struct ubifs_info *c, int err)
63{ 83{
64 if (!c->ro_media) { 84 if (!c->ro_error) {
65 c->ro_media = 1; 85 c->ro_error = 1;
66 c->no_chk_data_crc = 0; 86 c->no_chk_data_crc = 0;
67 c->vfs_sb->s_flags |= MS_RDONLY; 87 c->vfs_sb->s_flags |= MS_RDONLY;
68 ubifs_warn("switched to read-only mode, error %d", err); 88 ubifs_warn("switched to read-only mode, error %d", err);
@@ -88,8 +108,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
88 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is 108 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
89 * true, which is controlled by corresponding UBIFS mount option. However, if 109 * true, which is controlled by corresponding UBIFS mount option. However, if
90 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is 110 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
91 * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is 111 * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
92 * ignored and CRC is checked. 112 * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
113 * is checked. This is because during mounting or re-mounting from R/O mode to
114 * R/W mode we may read journal nodes (when replying the journal or doing the
115 * recovery) and the journal nodes may potentially be corrupted, so checking is
116 * required.
93 * 117 *
94 * This function returns zero in case of success and %-EUCLEAN in case of bad 118 * This function returns zero in case of success and %-EUCLEAN in case of bad
95 * CRC or magic. 119 * CRC or magic.
@@ -131,8 +155,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
131 node_len > c->ranges[type].max_len) 155 node_len > c->ranges[type].max_len)
132 goto out_len; 156 goto out_len;
133 157
134 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && 158 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
135 c->no_chk_data_crc) 159 !c->remounting_rw && c->no_chk_data_crc)
136 return 0; 160 return 0;
137 161
138 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 162 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
@@ -343,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
343 * 367 *
344 * This function synchronizes write-buffer @buf and returns zero in case of 368 * This function synchronizes write-buffer @buf and returns zero in case of
345 * success or a negative error code in case of failure. 369 * success or a negative error code in case of failure.
370 *
371 * Note, although write-buffers are of @c->max_write_size, this function does
372 * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
373 * if the write-buffer is only partially filled with data, only the used part
374 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
375 * This way we waste less space.
346 */ 376 */
347int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 377int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
348{ 378{
349 struct ubifs_info *c = wbuf->c; 379 struct ubifs_info *c = wbuf->c;
350 int err, dirt; 380 int err, dirt, sync_len;
351 381
352 cancel_wbuf_timer_nolock(wbuf); 382 cancel_wbuf_timer_nolock(wbuf);
353 if (!wbuf->used || wbuf->lnum == -1) 383 if (!wbuf->used || wbuf->lnum == -1)
@@ -356,28 +386,54 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
356 386
357 dbg_io("LEB %d:%d, %d bytes, jhead %s", 387 dbg_io("LEB %d:%d, %d bytes, jhead %s",
358 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); 388 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
359 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
360 ubifs_assert(!(wbuf->avail & 7)); 389 ubifs_assert(!(wbuf->avail & 7));
361 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); 390 ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
362 391 ubifs_assert(wbuf->size >= c->min_io_size);
363 if (c->ro_media) 392 ubifs_assert(wbuf->size <= c->max_write_size);
393 ubifs_assert(wbuf->size % c->min_io_size == 0);
394 ubifs_assert(!c->ro_media && !c->ro_mount);
395 if (c->leb_size - wbuf->offs >= c->max_write_size)
396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
397
398 if (c->ro_error)
364 return -EROFS; 399 return -EROFS;
365 400
366 ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); 401 /*
402 * Do not write whole write buffer but write only the minimum necessary
403 * amount of min. I/O units.
404 */
405 sync_len = ALIGN(wbuf->used, c->min_io_size);
406 dirt = sync_len - wbuf->used;
407 if (dirt)
408 ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
367 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 409 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
368 c->min_io_size, wbuf->dtype); 410 sync_len, wbuf->dtype);
369 if (err) { 411 if (err) {
370 ubifs_err("cannot write %d bytes to LEB %d:%d", 412 ubifs_err("cannot write %d bytes to LEB %d:%d",
371 c->min_io_size, wbuf->lnum, wbuf->offs); 413 sync_len, wbuf->lnum, wbuf->offs);
372 dbg_dump_stack(); 414 dbg_dump_stack();
373 return err; 415 return err;
374 } 416 }
375 417
376 dirt = wbuf->avail;
377
378 spin_lock(&wbuf->lock); 418 spin_lock(&wbuf->lock);
379 wbuf->offs += c->min_io_size; 419 wbuf->offs += sync_len;
380 wbuf->avail = c->min_io_size; 420 /*
421 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
422 * But our goal is to optimize writes and make sure we write in
423 * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
424 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
425 * sure that @wbuf->offs + @wbuf->size is aligned to
426 * @c->max_write_size. This way we make sure that after next
427 * write-buffer flush we are again at the optimal offset (aligned to
428 * @c->max_write_size).
429 */
430 if (c->leb_size - wbuf->offs < c->max_write_size)
431 wbuf->size = c->leb_size - wbuf->offs;
432 else if (wbuf->offs & (c->max_write_size - 1))
433 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
434 else
435 wbuf->size = c->max_write_size;
436 wbuf->avail = wbuf->size;
381 wbuf->used = 0; 437 wbuf->used = 0;
382 wbuf->next_ino = 0; 438 wbuf->next_ino = 0;
383 spin_unlock(&wbuf->lock); 439 spin_unlock(&wbuf->lock);
@@ -396,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
396 * @dtype: data type 452 * @dtype: data type
397 * 453 *
398 * This function targets the write-buffer to logical eraseblock @lnum:@offs. 454 * This function targets the write-buffer to logical eraseblock @lnum:@offs.
399 * The write-buffer is synchronized if it is not empty. Returns zero in case of 455 * The write-buffer has to be empty. Returns zero in case of success and a
400 * success and a negative error code in case of failure. 456 * negative error code in case of failure.
401 */ 457 */
402int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, 458int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
403 int dtype) 459 int dtype)
@@ -409,18 +465,18 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
409 ubifs_assert(offs >= 0 && offs <= c->leb_size); 465 ubifs_assert(offs >= 0 && offs <= c->leb_size);
410 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); 466 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
411 ubifs_assert(lnum != wbuf->lnum); 467 ubifs_assert(lnum != wbuf->lnum);
412 468 ubifs_assert(wbuf->used == 0);
413 if (wbuf->used > 0) {
414 int err = ubifs_wbuf_sync_nolock(wbuf);
415
416 if (err)
417 return err;
418 }
419 469
420 spin_lock(&wbuf->lock); 470 spin_lock(&wbuf->lock);
421 wbuf->lnum = lnum; 471 wbuf->lnum = lnum;
422 wbuf->offs = offs; 472 wbuf->offs = offs;
423 wbuf->avail = c->min_io_size; 473 if (c->leb_size - wbuf->offs < c->max_write_size)
474 wbuf->size = c->leb_size - wbuf->offs;
475 else if (wbuf->offs & (c->max_write_size - 1))
476 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
477 else
478 wbuf->size = c->max_write_size;
479 wbuf->avail = wbuf->size;
424 wbuf->used = 0; 480 wbuf->used = 0;
425 spin_unlock(&wbuf->lock); 481 spin_unlock(&wbuf->lock);
426 wbuf->dtype = dtype; 482 wbuf->dtype = dtype;
@@ -440,11 +496,12 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c)
440{ 496{
441 int err, i; 497 int err, i;
442 498
499 ubifs_assert(!c->ro_media && !c->ro_mount);
443 if (!c->need_wbuf_sync) 500 if (!c->need_wbuf_sync)
444 return 0; 501 return 0;
445 c->need_wbuf_sync = 0; 502 c->need_wbuf_sync = 0;
446 503
447 if (c->ro_media) { 504 if (c->ro_error) {
448 err = -EROFS; 505 err = -EROFS;
449 goto out_timers; 506 goto out_timers;
450 } 507 }
@@ -499,8 +556,9 @@ out_timers:
499 * 556 *
500 * This function writes data to flash via write-buffer @wbuf. This means that 557 * This function writes data to flash via write-buffer @wbuf. This means that
501 * the last piece of the node won't reach the flash media immediately if it 558 * the last piece of the node won't reach the flash media immediately if it
502 * does not take whole minimal I/O unit. Instead, the node will sit in RAM 559 * does not take whole max. write unit (@c->max_write_size). Instead, the node
503 * until the write-buffer is synchronized (e.g., by timer). 560 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
561 * because more data are appended to the write-buffer).
504 * 562 *
505 * This function returns zero in case of success and a negative error code in 563 * This function returns zero in case of success and a negative error code in
506 * case of failure. If the node cannot be written because there is no more 564 * case of failure. If the node cannot be written because there is no more
@@ -509,7 +567,7 @@ out_timers:
509int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) 567int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
510{ 568{
511 struct ubifs_info *c = wbuf->c; 569 struct ubifs_info *c = wbuf->c;
512 int err, written, n, aligned_len = ALIGN(len, 8), offs; 570 int err, written, n, aligned_len = ALIGN(len, 8);
513 571
514 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, 572 dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
515 dbg_ntype(((struct ubifs_ch *)buf)->node_type), 573 dbg_ntype(((struct ubifs_ch *)buf)->node_type),
@@ -517,8 +575,15 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
517 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 575 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
518 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 576 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
519 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 577 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
520 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); 578 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
579 ubifs_assert(wbuf->size >= c->min_io_size);
580 ubifs_assert(wbuf->size <= c->max_write_size);
581 ubifs_assert(wbuf->size % c->min_io_size == 0);
521 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 582 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
583 ubifs_assert(!c->ro_media && !c->ro_mount);
584 ubifs_assert(!c->space_fixup);
585 if (c->leb_size - wbuf->offs >= c->max_write_size)
586 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size));
522 587
523 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 588 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
524 err = -ENOSPC; 589 err = -ENOSPC;
@@ -527,7 +592,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
527 592
528 cancel_wbuf_timer_nolock(wbuf); 593 cancel_wbuf_timer_nolock(wbuf);
529 594
530 if (c->ro_media) 595 if (c->ro_error)
531 return -EROFS; 596 return -EROFS;
532 597
533 if (aligned_len <= wbuf->avail) { 598 if (aligned_len <= wbuf->avail) {
@@ -541,14 +606,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
541 dbg_io("flush jhead %s wbuf to LEB %d:%d", 606 dbg_io("flush jhead %s wbuf to LEB %d:%d",
542 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 607 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
543 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 608 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
544 wbuf->offs, c->min_io_size, 609 wbuf->offs, wbuf->size,
545 wbuf->dtype); 610 wbuf->dtype);
546 if (err) 611 if (err)
547 goto out; 612 goto out;
548 613
549 spin_lock(&wbuf->lock); 614 spin_lock(&wbuf->lock);
550 wbuf->offs += c->min_io_size; 615 wbuf->offs += wbuf->size;
551 wbuf->avail = c->min_io_size; 616 if (c->leb_size - wbuf->offs >= c->max_write_size)
617 wbuf->size = c->max_write_size;
618 else
619 wbuf->size = c->leb_size - wbuf->offs;
620 wbuf->avail = wbuf->size;
552 wbuf->used = 0; 621 wbuf->used = 0;
553 wbuf->next_ino = 0; 622 wbuf->next_ino = 0;
554 spin_unlock(&wbuf->lock); 623 spin_unlock(&wbuf->lock);
@@ -562,39 +631,63 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
562 goto exit; 631 goto exit;
563 } 632 }
564 633
565 /* 634 written = 0;
566 * The node is large enough and does not fit entirely within current 635
567 * minimal I/O unit. We have to fill and flush write-buffer and switch 636 if (wbuf->used) {
568 * to the next min. I/O unit. 637 /*
569 */ 638 * The node is large enough and does not fit entirely within
570 dbg_io("flush jhead %s wbuf to LEB %d:%d", 639 * current available space. We have to fill and flush
571 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 640 * write-buffer and switch to the next max. write unit.
572 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail); 641 */
573 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 642 dbg_io("flush jhead %s wbuf to LEB %d:%d",
574 c->min_io_size, wbuf->dtype); 643 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
575 if (err) 644 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
576 goto out; 645 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
646 wbuf->size, wbuf->dtype);
647 if (err)
648 goto out;
649
650 wbuf->offs += wbuf->size;
651 len -= wbuf->avail;
652 aligned_len -= wbuf->avail;
653 written += wbuf->avail;
654 } else if (wbuf->offs & (c->max_write_size - 1)) {
655 /*
656 * The write-buffer offset is not aligned to
657 * @c->max_write_size and @wbuf->size is less than
658 * @c->max_write_size. Write @wbuf->size bytes to make sure the
659 * following writes are done in optimal @c->max_write_size
660 * chunks.
661 */
662 dbg_io("write %d bytes to LEB %d:%d",
663 wbuf->size, wbuf->lnum, wbuf->offs);
664 err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
665 wbuf->size, wbuf->dtype);
666 if (err)
667 goto out;
577 668
578 offs = wbuf->offs + c->min_io_size; 669 wbuf->offs += wbuf->size;
579 len -= wbuf->avail; 670 len -= wbuf->size;
580 aligned_len -= wbuf->avail; 671 aligned_len -= wbuf->size;
581 written = wbuf->avail; 672 written += wbuf->size;
673 }
582 674
583 /* 675 /*
584 * The remaining data may take more whole min. I/O units, so write the 676 * The remaining data may take more whole max. write units, so write the
585 * remains multiple to min. I/O unit size directly to the flash media. 677 * remains multiple to max. write unit size directly to the flash media.
586 * We align node length to 8-byte boundary because we anyway flash wbuf 678 * We align node length to 8-byte boundary because we anyway flash wbuf
587 * if the remaining space is less than 8 bytes. 679 * if the remaining space is less than 8 bytes.
588 */ 680 */
589 n = aligned_len >> c->min_io_shift; 681 n = aligned_len >> c->max_write_shift;
590 if (n) { 682 if (n) {
591 n <<= c->min_io_shift; 683 n <<= c->max_write_shift;
592 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 684 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
593 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 685 wbuf->offs);
594 wbuf->dtype); 686 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written,
687 wbuf->offs, n, wbuf->dtype);
595 if (err) 688 if (err)
596 goto out; 689 goto out;
597 offs += n; 690 wbuf->offs += n;
598 aligned_len -= n; 691 aligned_len -= n;
599 len -= n; 692 len -= n;
600 written += n; 693 written += n;
@@ -604,14 +697,17 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
604 if (aligned_len) 697 if (aligned_len)
605 /* 698 /*
606 * And now we have what's left and what does not take whole 699 * And now we have what's left and what does not take whole
607 * min. I/O unit, so write it to the write-buffer and we are 700 * max. write unit, so write it to the write-buffer and we are
608 * done. 701 * done.
609 */ 702 */
610 memcpy(wbuf->buf, buf + written, len); 703 memcpy(wbuf->buf, buf + written, len);
611 704
612 wbuf->offs = offs; 705 if (c->leb_size - wbuf->offs >= c->max_write_size)
706 wbuf->size = c->max_write_size;
707 else
708 wbuf->size = c->leb_size - wbuf->offs;
709 wbuf->avail = wbuf->size - aligned_len;
613 wbuf->used = aligned_len; 710 wbuf->used = aligned_len;
614 wbuf->avail = c->min_io_size - aligned_len;
615 wbuf->next_ino = 0; 711 wbuf->next_ino = 0;
616 spin_unlock(&wbuf->lock); 712 spin_unlock(&wbuf->lock);
617 713
@@ -663,8 +759,10 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
663 buf_len); 759 buf_len);
664 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); 760 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
665 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); 761 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
762 ubifs_assert(!c->ro_media && !c->ro_mount);
763 ubifs_assert(!c->space_fixup);
666 764
667 if (c->ro_media) 765 if (c->ro_error)
668 return -EROFS; 766 return -EROFS;
669 767
670 ubifs_prepare_node(c, buf, len, 1); 768 ubifs_prepare_node(c, buf, len, 1);
@@ -815,7 +913,8 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
815 return 0; 913 return 0;
816 914
817out: 915out:
818 ubifs_err("bad node at LEB %d:%d", lnum, offs); 916 ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs,
917 ubi_is_mapped(c->ubi, lnum));
819 dbg_dump_node(c, buf); 918 dbg_dump_node(c, buf);
820 dbg_dump_stack(); 919 dbg_dump_stack();
821 return -EINVAL; 920 return -EINVAL;
@@ -833,11 +932,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
833{ 932{
834 size_t size; 933 size_t size;
835 934
836 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); 935 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
837 if (!wbuf->buf) 936 if (!wbuf->buf)
838 return -ENOMEM; 937 return -ENOMEM;
839 938
840 size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 939 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
841 wbuf->inodes = kmalloc(size, GFP_KERNEL); 940 wbuf->inodes = kmalloc(size, GFP_KERNEL);
842 if (!wbuf->inodes) { 941 if (!wbuf->inodes) {
843 kfree(wbuf->buf); 942 kfree(wbuf->buf);
@@ -847,7 +946,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
847 946
848 wbuf->used = 0; 947 wbuf->used = 0;
849 wbuf->lnum = wbuf->offs = -1; 948 wbuf->lnum = wbuf->offs = -1;
850 wbuf->avail = c->min_io_size; 949 /*
950 * If the LEB starts at the max. write size aligned address, then
951 * write-buffer size has to be set to @c->max_write_size. Otherwise,
952 * set it to something smaller so that it ends at the closest max.
953 * write size boundary.
954 */
955 size = c->max_write_size - (c->leb_start % c->max_write_size);
956 wbuf->avail = wbuf->size = size;
851 wbuf->dtype = UBI_UNKNOWN; 957 wbuf->dtype = UBI_UNKNOWN;
852 wbuf->sync_callback = NULL; 958 wbuf->sync_callback = NULL;
853 mutex_init(&wbuf->io_mutex); 959 mutex_init(&wbuf->io_mutex);
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
index 8aacd64957a2..548acf494afd 100644
--- a/fs/ubifs/ioctl.c
+++ b/fs/ubifs/ioctl.c
@@ -160,7 +160,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
160 if (IS_RDONLY(inode)) 160 if (IS_RDONLY(inode))
161 return -EROFS; 161 return -EROFS;
162 162
163 if (!is_owner_or_cap(inode)) 163 if (!inode_owner_or_capable(inode))
164 return -EACCES; 164 return -EACCES;
165 165
166 if (get_user(flags, (int __user *) arg)) 166 if (get_user(flags, (int __user *) arg))
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index d321baeca68d..cef0460f4c54 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len)
122 * better to try to allocate space at the ends of eraseblocks. This is 122 * better to try to allocate space at the ends of eraseblocks. This is
123 * what the squeeze parameter does. 123 * what the squeeze parameter does.
124 */ 124 */
125 ubifs_assert(!c->ro_media && !c->ro_mount);
125 squeeze = (jhead == BASEHD); 126 squeeze = (jhead == BASEHD);
126again: 127again:
127 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 128 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
128 129
129 if (c->ro_media) { 130 if (c->ro_error) {
130 err = -EROFS; 131 err = -EROFS;
131 goto out_unlock; 132 goto out_unlock;
132 } 133 }
@@ -140,14 +141,8 @@ again:
140 * LEB with some empty space. 141 * LEB with some empty space.
141 */ 142 */
142 lnum = ubifs_find_free_space(c, len, &offs, squeeze); 143 lnum = ubifs_find_free_space(c, len, &offs, squeeze);
143 if (lnum >= 0) { 144 if (lnum >= 0)
144 /* Found an LEB, add it to the journal head */
145 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
146 if (err)
147 goto out_return;
148 /* A new bud was successfully allocated and added to the log */
149 goto out; 145 goto out;
150 }
151 146
152 err = lnum; 147 err = lnum;
153 if (err != -ENOSPC) 148 if (err != -ENOSPC)
@@ -202,12 +197,23 @@ again:
202 return 0; 197 return 0;
203 } 198 }
204 199
205 err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
206 if (err)
207 goto out_return;
208 offs = 0; 200 offs = 0;
209 201
210out: 202out:
203 /*
204 * Make sure we synchronize the write-buffer before we add the new bud
205 * to the log. Otherwise we may have a power cut after the log
206 * reference node for the last bud (@lnum) is written but before the
207 * write-buffer data are written to the next-to-last bud
208 * (@wbuf->lnum). And the effect would be that the recovery would see
209 * that there is corruption in the next-to-last bud.
210 */
211 err = ubifs_wbuf_sync_nolock(wbuf);
212 if (err)
213 goto out_return;
214 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
215 if (err)
216 goto out_return;
211 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); 217 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype);
212 if (err) 218 if (err)
213 goto out_unlock; 219 goto out_unlock;
@@ -379,10 +385,8 @@ out:
379 if (err == -ENOSPC) { 385 if (err == -ENOSPC) {
380 /* This are some budgeting problems, print useful information */ 386 /* This are some budgeting problems, print useful information */
381 down_write(&c->commit_sem); 387 down_write(&c->commit_sem);
382 spin_lock(&c->space_lock);
383 dbg_dump_stack(); 388 dbg_dump_stack();
384 dbg_dump_budg(c); 389 dbg_dump_budg(c, &c->bi);
385 spin_unlock(&c->space_lock);
386 dbg_dump_lprops(c); 390 dbg_dump_lprops(c);
387 cmt_retries = dbg_check_lprops(c); 391 cmt_retries = dbg_check_lprops(c);
388 up_write(&c->commit_sem); 392 up_write(&c->commit_sem);
@@ -665,6 +669,7 @@ out_free:
665 669
666out_release: 670out_release:
667 release_head(c, BASEHD); 671 release_head(c, BASEHD);
672 kfree(dent);
668out_ro: 673out_ro:
669 ubifs_ro_mode(c, err); 674 ubifs_ro_mode(c, err);
670 if (last_reference) 675 if (last_reference)
@@ -689,7 +694,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
689{ 694{
690 struct ubifs_data_node *data; 695 struct ubifs_data_node *data;
691 int err, lnum, offs, compr_type, out_len; 696 int err, lnum, offs, compr_type, out_len;
692 int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; 697 int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
693 struct ubifs_inode *ui = ubifs_inode(inode); 698 struct ubifs_inode *ui = ubifs_inode(inode);
694 699
695 dbg_jnl("ino %lu, blk %u, len %d, key %s", 700 dbg_jnl("ino %lu, blk %u, len %d, key %s",
@@ -697,9 +702,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
697 DBGKEY(key)); 702 DBGKEY(key));
698 ubifs_assert(len <= UBIFS_BLOCK_SIZE); 703 ubifs_assert(len <= UBIFS_BLOCK_SIZE);
699 704
700 data = kmalloc(dlen, GFP_NOFS); 705 data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
701 if (!data) 706 if (!data) {
702 return -ENOMEM; 707 /*
708 * Fall-back to the write reserve buffer. Note, we might be
709 * currently on the memory reclaim path, when the kernel is
710 * trying to free some memory by writing out dirty pages. The
711 * write reserve buffer helps us to guarantee that we are
712 * always able to write the data.
713 */
714 allocated = 0;
715 mutex_lock(&c->write_reserve_mutex);
716 data = c->write_reserve_buf;
717 }
703 718
704 data->ch.node_type = UBIFS_DATA_NODE; 719 data->ch.node_type = UBIFS_DATA_NODE;
705 key_write(c, key, &data->key); 720 key_write(c, key, &data->key);
@@ -735,7 +750,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
735 goto out_ro; 750 goto out_ro;
736 751
737 finish_reservation(c); 752 finish_reservation(c);
738 kfree(data); 753 if (!allocated)
754 mutex_unlock(&c->write_reserve_mutex);
755 else
756 kfree(data);
739 return 0; 757 return 0;
740 758
741out_release: 759out_release:
@@ -744,7 +762,10 @@ out_ro:
744 ubifs_ro_mode(c, err); 762 ubifs_ro_mode(c, err);
745 finish_reservation(c); 763 finish_reservation(c);
746out_free: 764out_free:
747 kfree(data); 765 if (!allocated)
766 mutex_unlock(&c->write_reserve_mutex);
767 else
768 kfree(data);
748 return err; 769 return err;
749} 770}
750 771
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
index 0f530c684f0b..92a8491a8f8c 100644
--- a/fs/ubifs/key.h
+++ b/fs/ubifs/key.h
@@ -306,6 +306,20 @@ static inline void trun_key_init(const struct ubifs_info *c,
306} 306}
307 307
308/** 308/**
309 * invalid_key_init - initialize invalid node key.
310 * @c: UBIFS file-system description object
311 * @key: key to initialize
312 *
313 * This is a helper function which marks a @key object as invalid.
314 */
315static inline void invalid_key_init(const struct ubifs_info *c,
316 union ubifs_key *key)
317{
318 key->u32[0] = 0xDEADBEAF;
319 key->u32[1] = UBIFS_INVALID_KEY;
320}
321
322/**
309 * key_type - get key type. 323 * key_type - get key type.
310 * @c: UBIFS file-system description object 324 * @c: UBIFS file-system description object
311 * @key: key to get type of 325 * @key: key to get type of
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index c345e125f42c..affea9494ae2 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
100} 100}
101 101
102/** 102/**
103 * next_log_lnum - switch to the next log LEB.
104 * @c: UBIFS file-system description object
105 * @lnum: current log LEB
106 */
107static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
108{
109 lnum += 1;
110 if (lnum > c->log_last)
111 lnum = UBIFS_LOG_LNUM;
112
113 return lnum;
114}
115
116/**
117 * empty_log_bytes - calculate amount of empty space in the log. 103 * empty_log_bytes - calculate amount of empty space in the log.
118 * @c: UBIFS file-system description object 104 * @c: UBIFS file-system description object
119 */ 105 */
@@ -159,7 +145,7 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
159 jhead = &c->jheads[bud->jhead]; 145 jhead = &c->jheads[bud->jhead];
160 list_add_tail(&bud->list, &jhead->buds_list); 146 list_add_tail(&bud->list, &jhead->buds_list);
161 } else 147 } else
162 ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); 148 ubifs_assert(c->replaying && c->ro_mount);
163 149
164 /* 150 /*
165 * Note, although this is a new bud, we anyway account this space now, 151 * Note, although this is a new bud, we anyway account this space now,
@@ -175,26 +161,6 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
175} 161}
176 162
177/** 163/**
178 * ubifs_create_buds_lists - create journal head buds lists for remount rw.
179 * @c: UBIFS file-system description object
180 */
181void ubifs_create_buds_lists(struct ubifs_info *c)
182{
183 struct rb_node *p;
184
185 spin_lock(&c->buds_lock);
186 p = rb_first(&c->buds);
187 while (p) {
188 struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
189 struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
190
191 list_add_tail(&bud->list, &jhead->buds_list);
192 p = rb_next(p);
193 }
194 spin_unlock(&c->buds_lock);
195}
196
197/**
198 * ubifs_add_bud_to_log - add a new bud to the log. 164 * ubifs_add_bud_to_log - add a new bud to the log.
199 * @c: UBIFS file-system description object 165 * @c: UBIFS file-system description object
200 * @jhead: journal head the bud belongs to 166 * @jhead: journal head the bud belongs to
@@ -223,8 +189,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
223 } 189 }
224 190
225 mutex_lock(&c->log_mutex); 191 mutex_lock(&c->log_mutex);
226 192 ubifs_assert(!c->ro_media && !c->ro_mount);
227 if (c->ro_media) { 193 if (c->ro_error) {
228 err = -EROFS; 194 err = -EROFS;
229 goto out_unlock; 195 goto out_unlock;
230 } 196 }
@@ -277,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
277 ref->jhead = cpu_to_le32(jhead); 243 ref->jhead = cpu_to_le32(jhead);
278 244
279 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { 245 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
280 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 246 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
281 c->lhead_offs = 0; 247 c->lhead_offs = 0;
282 } 248 }
283 249
@@ -445,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
445 411
446 /* Switch to the next log LEB */ 412 /* Switch to the next log LEB */
447 if (c->lhead_offs) { 413 if (c->lhead_offs) {
448 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 414 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
449 c->lhead_offs = 0; 415 c->lhead_offs = 0;
450 } 416 }
451 417
@@ -466,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
466 432
467 c->lhead_offs += len; 433 c->lhead_offs += len;
468 if (c->lhead_offs == c->leb_size) { 434 if (c->lhead_offs == c->leb_size) {
469 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); 435 c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
470 c->lhead_offs = 0; 436 c->lhead_offs = 0;
471 } 437 }
472 438
@@ -553,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
553 } 519 }
554 mutex_lock(&c->log_mutex); 520 mutex_lock(&c->log_mutex);
555 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; 521 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
556 lnum = next_log_lnum(c, lnum)) { 522 lnum = ubifs_next_log_lnum(c, lnum)) {
557 dbg_log("unmap log LEB %d", lnum); 523 dbg_log("unmap log LEB %d", lnum);
558 err = ubifs_leb_unmap(c, lnum); 524 err = ubifs_leb_unmap(c, lnum);
559 if (err) 525 if (err)
@@ -662,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
662 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); 628 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
663 if (err) 629 if (err)
664 return err; 630 return err;
665 *lnum = next_log_lnum(c, *lnum); 631 *lnum = ubifs_next_log_lnum(c, *lnum);
666 *offs = 0; 632 *offs = 0;
667 } 633 }
668 memcpy(buf + *offs, node, len); 634 memcpy(buf + *offs, node, len);
@@ -732,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
732 ubifs_scan_destroy(sleb); 698 ubifs_scan_destroy(sleb);
733 if (lnum == c->lhead_lnum) 699 if (lnum == c->lhead_lnum)
734 break; 700 break;
735 lnum = next_log_lnum(c, lnum); 701 lnum = ubifs_next_log_lnum(c, lnum);
736 } 702 }
737 if (offs) { 703 if (offs) {
738 int sz = ALIGN(offs, c->min_io_size); 704 int sz = ALIGN(offs, c->min_io_size);
@@ -752,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c)
752 /* Unmap remaining LEBs */ 718 /* Unmap remaining LEBs */
753 lnum = write_lnum; 719 lnum = write_lnum;
754 do { 720 do {
755 lnum = next_log_lnum(c, lnum); 721 lnum = ubifs_next_log_lnum(c, lnum);
756 err = ubifs_leb_unmap(c, lnum); 722 err = ubifs_leb_unmap(c, lnum);
757 if (err) 723 if (err)
758 return err; 724 return err;
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 4d4ca388889b..667884f4a615 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1007,21 +1007,11 @@ out:
1007} 1007}
1008 1008
1009/** 1009/**
1010 * struct scan_check_data - data provided to scan callback function.
1011 * @lst: LEB properties statistics
1012 * @err: error code
1013 */
1014struct scan_check_data {
1015 struct ubifs_lp_stats lst;
1016 int err;
1017};
1018
1019/**
1020 * scan_check_cb - scan callback. 1010 * scan_check_cb - scan callback.
1021 * @c: the UBIFS file-system description object 1011 * @c: the UBIFS file-system description object
1022 * @lp: LEB properties to scan 1012 * @lp: LEB properties to scan
1023 * @in_tree: whether the LEB properties are in main memory 1013 * @in_tree: whether the LEB properties are in main memory
1024 * @data: information passed to and from the caller of the scan 1014 * @lst: lprops statistics to update
1025 * 1015 *
1026 * This function returns a code that indicates whether the scan should continue 1016 * This function returns a code that indicates whether the scan should continue
1027 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree 1017 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
@@ -1030,12 +1020,12 @@ struct scan_check_data {
1030 */ 1020 */
1031static int scan_check_cb(struct ubifs_info *c, 1021static int scan_check_cb(struct ubifs_info *c,
1032 const struct ubifs_lprops *lp, int in_tree, 1022 const struct ubifs_lprops *lp, int in_tree,
1033 struct scan_check_data *data) 1023 struct ubifs_lp_stats *lst)
1034{ 1024{
1035 struct ubifs_scan_leb *sleb; 1025 struct ubifs_scan_leb *sleb;
1036 struct ubifs_scan_node *snod; 1026 struct ubifs_scan_node *snod;
1037 struct ubifs_lp_stats *lst = &data->lst; 1027 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
1038 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; 1028 void *buf = NULL;
1039 1029
1040 cat = lp->flags & LPROPS_CAT_MASK; 1030 cat = lp->flags & LPROPS_CAT_MASK;
1041 if (cat != LPROPS_UNCAT) { 1031 if (cat != LPROPS_UNCAT) {
@@ -1043,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c,
1043 if (cat != (lp->flags & LPROPS_CAT_MASK)) { 1033 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
1044 ubifs_err("bad LEB category %d expected %d", 1034 ubifs_err("bad LEB category %d expected %d",
1045 (lp->flags & LPROPS_CAT_MASK), cat); 1035 (lp->flags & LPROPS_CAT_MASK), cat);
1046 goto out; 1036 return -EINVAL;
1047 } 1037 }
1048 } 1038 }
1049 1039
@@ -1077,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c,
1077 } 1067 }
1078 if (!found) { 1068 if (!found) {
1079 ubifs_err("bad LPT list (category %d)", cat); 1069 ubifs_err("bad LPT list (category %d)", cat);
1080 goto out; 1070 return -EINVAL;
1081 } 1071 }
1082 } 1072 }
1083 } 1073 }
@@ -1089,36 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c,
1089 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || 1079 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
1090 lp != heap->arr[lp->hpos]) { 1080 lp != heap->arr[lp->hpos]) {
1091 ubifs_err("bad LPT heap (category %d)", cat); 1081 ubifs_err("bad LPT heap (category %d)", cat);
1092 goto out; 1082 return -EINVAL;
1093 } 1083 }
1094 } 1084 }
1095 1085
1096 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 1086 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1097 if (IS_ERR(sleb)) { 1087 if (!buf)
1098 /* 1088 return -ENOMEM;
1099 * After an unclean unmount, empty and freeable LEBs
1100 * may contain garbage.
1101 */
1102 if (lp->free == c->leb_size) {
1103 ubifs_err("scan errors were in empty LEB "
1104 "- continuing checking");
1105 lst->empty_lebs += 1;
1106 lst->total_free += c->leb_size;
1107 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1108 return LPT_SCAN_CONTINUE;
1109 }
1110 1089
1111 if (lp->free + lp->dirty == c->leb_size && 1090 /*
1112 !(lp->flags & LPROPS_INDEX)) { 1091 * After an unclean unmount, empty and freeable LEBs
1113 ubifs_err("scan errors were in freeable LEB " 1092 * may contain garbage - do not scan them.
1114 "- continuing checking"); 1093 */
1115 lst->total_free += lp->free; 1094 if (lp->free == c->leb_size) {
1116 lst->total_dirty += lp->dirty; 1095 lst->empty_lebs += 1;
1117 lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1096 lst->total_free += c->leb_size;
1118 return LPT_SCAN_CONTINUE; 1097 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1098 return LPT_SCAN_CONTINUE;
1099 }
1100 if (lp->free + lp->dirty == c->leb_size &&
1101 !(lp->flags & LPROPS_INDEX)) {
1102 lst->total_free += lp->free;
1103 lst->total_dirty += lp->dirty;
1104 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1105 return LPT_SCAN_CONTINUE;
1106 }
1107
1108 sleb = ubifs_scan(c, lnum, 0, buf, 0);
1109 if (IS_ERR(sleb)) {
1110 ret = PTR_ERR(sleb);
1111 if (ret == -EUCLEAN) {
1112 dbg_dump_lprops(c);
1113 dbg_dump_budg(c, &c->bi);
1119 } 1114 }
1120 data->err = PTR_ERR(sleb); 1115 goto out;
1121 return LPT_SCAN_STOP;
1122 } 1116 }
1123 1117
1124 is_idx = -1; 1118 is_idx = -1;
@@ -1236,6 +1230,7 @@ static int scan_check_cb(struct ubifs_info *c,
1236 } 1230 }
1237 1231
1238 ubifs_scan_destroy(sleb); 1232 ubifs_scan_destroy(sleb);
1233 vfree(buf);
1239 return LPT_SCAN_CONTINUE; 1234 return LPT_SCAN_CONTINUE;
1240 1235
1241out_print: 1236out_print:
@@ -1245,9 +1240,10 @@ out_print:
1245 dbg_dump_leb(c, lnum); 1240 dbg_dump_leb(c, lnum);
1246out_destroy: 1241out_destroy:
1247 ubifs_scan_destroy(sleb); 1242 ubifs_scan_destroy(sleb);
1243 ret = -EINVAL;
1248out: 1244out:
1249 data->err = -EINVAL; 1245 vfree(buf);
1250 return LPT_SCAN_STOP; 1246 return ret;
1251} 1247}
1252 1248
1253/** 1249/**
@@ -1264,8 +1260,7 @@ out:
1264int dbg_check_lprops(struct ubifs_info *c) 1260int dbg_check_lprops(struct ubifs_info *c)
1265{ 1261{
1266 int i, err; 1262 int i, err;
1267 struct scan_check_data data; 1263 struct ubifs_lp_stats lst;
1268 struct ubifs_lp_stats *lst = &data.lst;
1269 1264
1270 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1265 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1271 return 0; 1266 return 0;
@@ -1280,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c)
1280 return err; 1275 return err;
1281 } 1276 }
1282 1277
1283 memset(lst, 0, sizeof(struct ubifs_lp_stats)); 1278 memset(&lst, 0, sizeof(struct ubifs_lp_stats));
1284
1285 data.err = 0;
1286 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, 1279 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
1287 (ubifs_lpt_scan_callback)scan_check_cb, 1280 (ubifs_lpt_scan_callback)scan_check_cb,
1288 &data); 1281 &lst);
1289 if (err && err != -ENOSPC) 1282 if (err && err != -ENOSPC)
1290 goto out; 1283 goto out;
1291 if (data.err) {
1292 err = data.err;
1293 goto out;
1294 }
1295 1284
1296 if (lst->empty_lebs != c->lst.empty_lebs || 1285 if (lst.empty_lebs != c->lst.empty_lebs ||
1297 lst->idx_lebs != c->lst.idx_lebs || 1286 lst.idx_lebs != c->lst.idx_lebs ||
1298 lst->total_free != c->lst.total_free || 1287 lst.total_free != c->lst.total_free ||
1299 lst->total_dirty != c->lst.total_dirty || 1288 lst.total_dirty != c->lst.total_dirty ||
1300 lst->total_used != c->lst.total_used) { 1289 lst.total_used != c->lst.total_used) {
1301 ubifs_err("bad overall accounting"); 1290 ubifs_err("bad overall accounting");
1302 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " 1291 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
1303 "total_free %lld, total_dirty %lld, total_used %lld", 1292 "total_free %lld, total_dirty %lld, total_used %lld",
1304 lst->empty_lebs, lst->idx_lebs, lst->total_free, 1293 lst.empty_lebs, lst.idx_lebs, lst.total_free,
1305 lst->total_dirty, lst->total_used); 1294 lst.total_dirty, lst.total_used);
1306 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " 1295 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
1307 "total_free %lld, total_dirty %lld, total_used %lld", 1296 "total_free %lld, total_dirty %lld, total_used %lld",
1308 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, 1297 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
@@ -1311,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c)
1311 goto out; 1300 goto out;
1312 } 1301 }
1313 1302
1314 if (lst->total_dead != c->lst.total_dead || 1303 if (lst.total_dead != c->lst.total_dead ||
1315 lst->total_dark != c->lst.total_dark) { 1304 lst.total_dark != c->lst.total_dark) {
1316 ubifs_err("bad dead/dark space accounting"); 1305 ubifs_err("bad dead/dark space accounting");
1317 ubifs_err("calculated: total_dead %lld, total_dark %lld", 1306 ubifs_err("calculated: total_dead %lld, total_dark %lld",
1318 lst->total_dead, lst->total_dark); 1307 lst.total_dead, lst.total_dark);
1319 ubifs_err("read from lprops: total_dead %lld, total_dark %lld", 1308 ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
1320 c->lst.total_dead, c->lst.total_dark); 1309 c->lst.total_dead, c->lst.total_dark);
1321 err = -EINVAL; 1310 err = -EINVAL;
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 0084a33c4c69..ef5155e109a2 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -1270,10 +1270,9 @@ static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1270 lnum = branch->lnum; 1270 lnum = branch->lnum;
1271 offs = branch->offs; 1271 offs = branch->offs;
1272 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS); 1272 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
1273 if (!pnode) { 1273 if (!pnode)
1274 err = -ENOMEM; 1274 return -ENOMEM;
1275 goto out; 1275
1276 }
1277 if (lnum == 0) { 1276 if (lnum == 0) {
1278 /* 1277 /*
1279 * This pnode was not written which just means that the LEB 1278 * This pnode was not written which just means that the LEB
@@ -1363,6 +1362,7 @@ static int read_lsave(struct ubifs_info *c)
1363 goto out; 1362 goto out;
1364 for (i = 0; i < c->lsave_cnt; i++) { 1363 for (i = 0; i < c->lsave_cnt; i++) {
1365 int lnum = c->lsave[i]; 1364 int lnum = c->lsave[i];
1365 struct ubifs_lprops *lprops;
1366 1366
1367 /* 1367 /*
1368 * Due to automatic resizing, the values in the lsave table 1368 * Due to automatic resizing, the values in the lsave table
@@ -1370,7 +1370,11 @@ static int read_lsave(struct ubifs_info *c)
1370 */ 1370 */
1371 if (lnum >= c->leb_cnt) 1371 if (lnum >= c->leb_cnt)
1372 continue; 1372 continue;
1373 ubifs_lpt_lookup(c, lnum); 1373 lprops = ubifs_lpt_lookup(c, lnum);
1374 if (IS_ERR(lprops)) {
1375 err = PTR_ERR(lprops);
1376 goto out;
1377 }
1374 } 1378 }
1375out: 1379out:
1376 vfree(buf); 1380 vfree(buf);
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index d12535b7fc78..dfcb5748a7dc 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -29,6 +29,12 @@
29#include <linux/slab.h> 29#include <linux/slab.h>
30#include "ubifs.h" 30#include "ubifs.h"
31 31
32#ifdef CONFIG_UBIFS_FS_DEBUG
33static int dbg_populate_lsave(struct ubifs_info *c);
34#else
35#define dbg_populate_lsave(c) 0
36#endif
37
32/** 38/**
33 * first_dirty_cnode - find first dirty cnode. 39 * first_dirty_cnode - find first dirty cnode.
34 * @c: UBIFS file-system description object 40 * @c: UBIFS file-system description object
@@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c,
586 if (nnode->nbranch[iip].lnum) 592 if (nnode->nbranch[iip].lnum)
587 break; 593 break;
588 } 594 }
589 } while (iip >= UBIFS_LPT_FANOUT); 595 } while (iip >= UBIFS_LPT_FANOUT);
590 596
591 /* Go right */ 597 /* Go right */
592 nnode = ubifs_get_nnode(c, nnode, iip); 598 nnode = ubifs_get_nnode(c, nnode, iip);
@@ -705,6 +711,9 @@ static int make_tree_dirty(struct ubifs_info *c)
705 struct ubifs_pnode *pnode; 711 struct ubifs_pnode *pnode;
706 712
707 pnode = pnode_lookup(c, 0); 713 pnode = pnode_lookup(c, 0);
714 if (IS_ERR(pnode))
715 return PTR_ERR(pnode);
716
708 while (pnode) { 717 while (pnode) {
709 do_make_pnode_dirty(c, pnode); 718 do_make_pnode_dirty(c, pnode);
710 pnode = next_pnode_to_dirty(c, pnode); 719 pnode = next_pnode_to_dirty(c, pnode);
@@ -812,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c)
812 c->lpt_drty_flgs |= LSAVE_DIRTY; 821 c->lpt_drty_flgs |= LSAVE_DIRTY;
813 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); 822 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
814 } 823 }
824
825 if (dbg_populate_lsave(c))
826 return;
827
815 list_for_each_entry(lprops, &c->empty_list, list) { 828 list_for_each_entry(lprops, &c->empty_list, list) {
816 c->lsave[cnt++] = lprops->lnum; 829 c->lsave[cnt++] = lprops->lnum;
817 if (cnt >= c->lsave_cnt) 830 if (cnt >= c->lsave_cnt)
@@ -1625,29 +1638,35 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1625{ 1638{
1626 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; 1639 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
1627 int ret; 1640 int ret;
1628 void *buf = c->dbg->buf; 1641 void *buf, *p;
1629 1642
1630 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1643 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1631 return 0; 1644 return 0;
1632 1645
1646 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1647 if (!buf) {
1648 ubifs_err("cannot allocate memory for ltab checking");
1649 return 0;
1650 }
1651
1633 dbg_lp("LEB %d", lnum); 1652 dbg_lp("LEB %d", lnum);
1634 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1653 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1635 if (err) { 1654 if (err) {
1636 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); 1655 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
1637 return err; 1656 goto out;
1638 } 1657 }
1639 while (1) { 1658 while (1) {
1640 if (!is_a_node(c, buf, len)) { 1659 if (!is_a_node(c, p, len)) {
1641 int i, pad_len; 1660 int i, pad_len;
1642 1661
1643 pad_len = get_pad_len(c, buf, len); 1662 pad_len = get_pad_len(c, p, len);
1644 if (pad_len) { 1663 if (pad_len) {
1645 buf += pad_len; 1664 p += pad_len;
1646 len -= pad_len; 1665 len -= pad_len;
1647 dirty += pad_len; 1666 dirty += pad_len;
1648 continue; 1667 continue;
1649 } 1668 }
1650 if (!dbg_is_all_ff(buf, len)) { 1669 if (!dbg_is_all_ff(p, len)) {
1651 dbg_msg("invalid empty space in LEB %d at %d", 1670 dbg_msg("invalid empty space in LEB %d at %d",
1652 lnum, c->leb_size - len); 1671 lnum, c->leb_size - len);
1653 err = -EINVAL; 1672 err = -EINVAL;
@@ -1665,16 +1684,21 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1665 lnum, dirty, c->ltab[i].dirty); 1684 lnum, dirty, c->ltab[i].dirty);
1666 err = -EINVAL; 1685 err = -EINVAL;
1667 } 1686 }
1668 return err; 1687 goto out;
1669 } 1688 }
1670 node_type = get_lpt_node_type(c, buf, &node_num); 1689 node_type = get_lpt_node_type(c, p, &node_num);
1671 node_len = get_lpt_node_len(c, node_type); 1690 node_len = get_lpt_node_len(c, node_type);
1672 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); 1691 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
1673 if (ret == 1) 1692 if (ret == 1)
1674 dirty += node_len; 1693 dirty += node_len;
1675 buf += node_len; 1694 p += node_len;
1676 len -= node_len; 1695 len -= node_len;
1677 } 1696 }
1697
1698 err = 0;
1699out:
1700 vfree(buf);
1701 return err;
1678} 1702}
1679 1703
1680/** 1704/**
@@ -1867,25 +1891,31 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1867static void dump_lpt_leb(const struct ubifs_info *c, int lnum) 1891static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1868{ 1892{
1869 int err, len = c->leb_size, node_type, node_num, node_len, offs; 1893 int err, len = c->leb_size, node_type, node_num, node_len, offs;
1870 void *buf = c->dbg->buf; 1894 void *buf, *p;
1871 1895
1872 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 1896 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
1873 current->pid, lnum); 1897 current->pid, lnum);
1898 buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
1899 if (!buf) {
1900 ubifs_err("cannot allocate memory to dump LPT");
1901 return;
1902 }
1903
1874 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1904 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1875 if (err) { 1905 if (err) {
1876 ubifs_err("cannot read LEB %d, error %d", lnum, err); 1906 ubifs_err("cannot read LEB %d, error %d", lnum, err);
1877 return; 1907 goto out;
1878 } 1908 }
1879 while (1) { 1909 while (1) {
1880 offs = c->leb_size - len; 1910 offs = c->leb_size - len;
1881 if (!is_a_node(c, buf, len)) { 1911 if (!is_a_node(c, p, len)) {
1882 int pad_len; 1912 int pad_len;
1883 1913
1884 pad_len = get_pad_len(c, buf, len); 1914 pad_len = get_pad_len(c, p, len);
1885 if (pad_len) { 1915 if (pad_len) {
1886 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", 1916 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
1887 lnum, offs, pad_len); 1917 lnum, offs, pad_len);
1888 buf += pad_len; 1918 p += pad_len;
1889 len -= pad_len; 1919 len -= pad_len;
1890 continue; 1920 continue;
1891 } 1921 }
@@ -1895,7 +1925,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1895 break; 1925 break;
1896 } 1926 }
1897 1927
1898 node_type = get_lpt_node_type(c, buf, &node_num); 1928 node_type = get_lpt_node_type(c, p, &node_num);
1899 switch (node_type) { 1929 switch (node_type) {
1900 case UBIFS_LPT_PNODE: 1930 case UBIFS_LPT_PNODE:
1901 { 1931 {
@@ -1920,7 +1950,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1920 else 1950 else
1921 printk(KERN_DEBUG "LEB %d:%d, nnode, ", 1951 printk(KERN_DEBUG "LEB %d:%d, nnode, ",
1922 lnum, offs); 1952 lnum, offs);
1923 err = ubifs_unpack_nnode(c, buf, &nnode); 1953 err = ubifs_unpack_nnode(c, p, &nnode);
1924 for (i = 0; i < UBIFS_LPT_FANOUT; i++) { 1954 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1925 printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, 1955 printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
1926 nnode.nbranch[i].offs); 1956 nnode.nbranch[i].offs);
@@ -1941,15 +1971,18 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1941 break; 1971 break;
1942 default: 1972 default:
1943 ubifs_err("LPT node type %d not recognized", node_type); 1973 ubifs_err("LPT node type %d not recognized", node_type);
1944 return; 1974 goto out;
1945 } 1975 }
1946 1976
1947 buf += node_len; 1977 p += node_len;
1948 len -= node_len; 1978 len -= node_len;
1949 } 1979 }
1950 1980
1951 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 1981 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
1952 current->pid, lnum); 1982 current->pid, lnum);
1983out:
1984 vfree(buf);
1985 return;
1953} 1986}
1954 1987
1955/** 1988/**
@@ -1971,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c)
1971 current->pid); 2004 current->pid);
1972} 2005}
1973 2006
2007/**
2008 * dbg_populate_lsave - debugging version of 'populate_lsave()'
2009 * @c: UBIFS file-system description object
2010 *
2011 * This is a debugging version for 'populate_lsave()' which populates lsave
2012 * with random LEBs instead of useful LEBs, which is good for test coverage.
2013 * Returns zero if lsave has not been populated (this debugging feature is
2014 * disabled) an non-zero if lsave has been populated.
2015 */
2016static int dbg_populate_lsave(struct ubifs_info *c)
2017{
2018 struct ubifs_lprops *lprops;
2019 struct ubifs_lpt_heap *heap;
2020 int i;
2021
2022 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
2023 return 0;
2024 if (random32() & 3)
2025 return 0;
2026
2027 for (i = 0; i < c->lsave_cnt; i++)
2028 c->lsave[i] = c->main_first;
2029
2030 list_for_each_entry(lprops, &c->empty_list, list)
2031 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2032 list_for_each_entry(lprops, &c->freeable_list, list)
2033 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2034 list_for_each_entry(lprops, &c->frdi_idx_list, list)
2035 c->lsave[random32() % c->lsave_cnt] = lprops->lnum;
2036
2037 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
2038 for (i = 0; i < heap->cnt; i++)
2039 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2040 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
2041 for (i = 0; i < heap->cnt; i++)
2042 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2043 heap = &c->lpt_heap[LPROPS_FREE - 1];
2044 for (i = 0; i < heap->cnt; i++)
2045 c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum;
2046
2047 return 1;
2048}
2049
1974#endif /* CONFIG_UBIFS_FS_DEBUG */ 2050#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 28beaeedadc0..278c2382e8c2 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c)
148 } 148 }
149 149
150 main_sz = (long long)c->main_lebs * c->leb_size; 150 main_sz = (long long)c->main_lebs * c->leb_size;
151 if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { 151 if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) {
152 err = 9; 152 err = 9;
153 goto out; 153 goto out;
154 } 154 }
@@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c)
218 } 218 }
219 219
220 if (c->lst.total_dead + c->lst.total_dark + 220 if (c->lst.total_dead + c->lst.total_dark +
221 c->lst.total_used + c->old_idx_sz > main_sz) { 221 c->lst.total_used + c->bi.old_idx_sz > main_sz) {
222 err = 21; 222 err = 21;
223 goto out; 223 goto out;
224 } 224 }
@@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c)
286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); 286 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); 287 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); 288 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
289 c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); 289 c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size);
290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); 290 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); 291 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); 292 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
@@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c)
305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); 305 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); 306 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
307 307
308 c->calc_idx_sz = c->old_idx_sz; 308 c->calc_idx_sz = c->bi.old_idx_sz;
309 309
310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) 310 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
311 c->no_orphs = 1; 311 c->no_orphs = 1;
@@ -361,7 +361,8 @@ int ubifs_write_master(struct ubifs_info *c)
361{ 361{
362 int err, lnum, offs, len; 362 int err, lnum, offs, len;
363 363
364 if (c->ro_media) 364 ubifs_assert(!c->ro_media && !c->ro_mount);
365 if (c->ro_error)
365 return -EROFS; 366 return -EROFS;
366 367
367 lnum = UBIFS_MST_LNUM; 368 lnum = UBIFS_MST_LNUM;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 4fa81d867e41..0b5296a9a4c5 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -132,7 +132,8 @@ static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
132{ 132{
133 int err; 133 int err;
134 134
135 if (c->ro_media) 135 ubifs_assert(!c->ro_media && !c->ro_mount);
136 if (c->ro_error)
136 return -EROFS; 137 return -EROFS;
137 err = ubi_leb_unmap(c->ubi, lnum); 138 err = ubi_leb_unmap(c->ubi, lnum);
138 if (err) { 139 if (err) {
@@ -159,7 +160,8 @@ static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
159{ 160{
160 int err; 161 int err;
161 162
162 if (c->ro_media) 163 ubifs_assert(!c->ro_media && !c->ro_mount);
164 if (c->ro_error)
163 return -EROFS; 165 return -EROFS;
164 err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); 166 err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
165 if (err) { 167 if (err) {
@@ -186,7 +188,8 @@ static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
186{ 188{
187 int err; 189 int err;
188 190
189 if (c->ro_media) 191 ubifs_assert(!c->ro_media && !c->ro_mount);
192 if (c->ro_error)
190 return -EROFS; 193 return -EROFS;
191 err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); 194 err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
192 if (err) { 195 if (err) {
@@ -337,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c)
337 mutex_unlock(&c->lp_mutex); 340 mutex_unlock(&c->lp_mutex);
338} 341}
339 342
343/**
344 * ubifs_next_log_lnum - switch to the next log LEB.
345 * @c: UBIFS file-system description object
346 * @lnum: current log LEB
347 *
348 * This helper function returns the log LEB number which goes next after LEB
349 * 'lnum'.
350 */
351static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum)
352{
353 lnum += 1;
354 if (lnum > c->log_last)
355 lnum = UBIFS_LOG_LNUM;
356
357 return lnum;
358}
359
340#endif /* __UBIFS_MISC_H__ */ 360#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 82009c74b6a3..a5422fffbd69 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c)
673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); 673 sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1);
674 if (IS_ERR(sleb)) { 674 if (IS_ERR(sleb)) {
675 if (PTR_ERR(sleb) == -EUCLEAN) 675 if (PTR_ERR(sleb) == -EUCLEAN)
676 sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); 676 sleb = ubifs_recover_leb(c, lnum, 0,
677 c->sbuf, -1);
677 if (IS_ERR(sleb)) { 678 if (IS_ERR(sleb)) {
678 err = PTR_ERR(sleb); 679 err = PTR_ERR(sleb);
679 break; 680 break;
@@ -892,15 +893,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
892static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) 893static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
893{ 894{
894 int lnum, err = 0; 895 int lnum, err = 0;
896 void *buf;
895 897
896 /* Check no-orphans flag and skip this if no orphans */ 898 /* Check no-orphans flag and skip this if no orphans */
897 if (c->no_orphs) 899 if (c->no_orphs)
898 return 0; 900 return 0;
899 901
902 buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL);
903 if (!buf) {
904 ubifs_err("cannot allocate memory to check orphans");
905 return 0;
906 }
907
900 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { 908 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
901 struct ubifs_scan_leb *sleb; 909 struct ubifs_scan_leb *sleb;
902 910
903 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 911 sleb = ubifs_scan(c, lnum, 0, buf, 0);
904 if (IS_ERR(sleb)) { 912 if (IS_ERR(sleb)) {
905 err = PTR_ERR(sleb); 913 err = PTR_ERR(sleb);
906 break; 914 break;
@@ -912,6 +920,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
912 break; 920 break;
913 } 921 }
914 922
923 vfree(buf);
915 return err; 924 return err;
916} 925}
917 926
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index daae9e1f5382..783d8e0beb76 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -28,6 +28,23 @@
28 * UBIFS always cleans away all remnants of an unclean un-mount, so that 28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
29 * errors do not accumulate. However UBIFS defers recovery if it is mounted 29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
30 * read-only, and the flash is not modified in that case. 30 * read-only, and the flash is not modified in that case.
31 *
32 * The general UBIFS approach to the recovery is that it recovers from
33 * corruptions which could be caused by power cuts, but it refuses to recover
34 * from corruption caused by other reasons. And UBIFS tries to distinguish
35 * between these 2 reasons of corruptions and silently recover in the former
36 * case and loudly complain in the latter case.
37 *
38 * UBIFS writes only to erased LEBs, so it writes only to the flash space
39 * containing only 0xFFs. UBIFS also always writes strictly from the beginning
40 * of the LEB to the end. And UBIFS assumes that the underlying flash media
41 * writes in @c->max_write_size bytes at a time.
42 *
43 * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
44 * I/O unit corresponding to offset X to contain corrupted data, all the
45 * following min. I/O units have to contain empty space (all 0xFFs). If this is
46 * not true, the corruption cannot be the result of a power cut, and UBIFS
47 * refuses to mount.
31 */ 48 */
32 49
33#include <linux/crc32.h> 50#include <linux/crc32.h>
@@ -292,7 +309,7 @@ int ubifs_recover_master_node(struct ubifs_info *c)
292 309
293 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); 310 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
294 311
295 if ((c->vfs_sb->s_flags & MS_RDONLY)) { 312 if (c->ro_mount) {
296 /* Read-only mode. Keep a copy for switching to rw mode */ 313 /* Read-only mode. Keep a copy for switching to rw mode */
297 c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); 314 c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
298 if (!c->rcvrd_mst_node) { 315 if (!c->rcvrd_mst_node) {
@@ -300,6 +317,32 @@ int ubifs_recover_master_node(struct ubifs_info *c)
300 goto out_free; 317 goto out_free;
301 } 318 }
302 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ); 319 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
320
321 /*
322 * We had to recover the master node, which means there was an
323 * unclean reboot. However, it is possible that the master node
324 * is clean at this point, i.e., %UBIFS_MST_DIRTY is not set.
325 * E.g., consider the following chain of events:
326 *
327 * 1. UBIFS was cleanly unmounted, so the master node is clean
328 * 2. UBIFS is being mounted R/W and starts changing the master
329 * node in the first (%UBIFS_MST_LNUM). A power cut happens,
330 * so this LEB ends up with some amount of garbage at the
331 * end.
332 * 3. UBIFS is being mounted R/O. We reach this place and
333 * recover the master node from the second LEB
334 * (%UBIFS_MST_LNUM + 1). But we cannot update the media
335 * because we are being mounted R/O. We have to defer the
336 * operation.
337 * 4. However, this master node (@c->mst_node) is marked as
338 * clean (since the step 1). And if we just return, the
339 * mount code will be confused and won't recover the master
340 * node when it is re-mounter R/W later.
341 *
342 * Thus, to force the recovery by marking the master node as
343 * dirty.
344 */
345 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
303 } else { 346 } else {
304 /* Write the recovered master node */ 347 /* Write the recovered master node */
305 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1; 348 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
@@ -362,8 +405,9 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
362 * @offs: offset to check 405 * @offs: offset to check
363 * 406 *
364 * This function returns %1 if @offs was in the last write to the LEB whose data 407 * This function returns %1 if @offs was in the last write to the LEB whose data
365 * is in @buf, otherwise %0 is returned. The determination is made by checking 408 * is in @buf, otherwise %0 is returned. The determination is made by checking
366 * for subsequent empty space starting from the next @c->min_io_size boundary. 409 * for subsequent empty space starting from the next @c->max_write_size
410 * boundary.
367 */ 411 */
368static int is_last_write(const struct ubifs_info *c, void *buf, int offs) 412static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
369{ 413{
@@ -371,10 +415,10 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
371 uint8_t *p; 415 uint8_t *p;
372 416
373 /* 417 /*
374 * Round up to the next @c->min_io_size boundary i.e. @offs is in the 418 * Round up to the next @c->max_write_size boundary i.e. @offs is in
375 * last wbuf written. After that should be empty space. 419 * the last wbuf written. After that should be empty space.
376 */ 420 */
377 empty_offs = ALIGN(offs + 1, c->min_io_size); 421 empty_offs = ALIGN(offs + 1, c->max_write_size);
378 check_len = c->leb_size - empty_offs; 422 check_len = c->leb_size - empty_offs;
379 p = buf + empty_offs - offs; 423 p = buf + empty_offs - offs;
380 return is_empty(p, check_len); 424 return is_empty(p, check_len);
@@ -429,7 +473,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
429 int skip, dlen = le32_to_cpu(ch->len); 473 int skip, dlen = le32_to_cpu(ch->len);
430 474
431 /* Check for empty space after the corrupt node's common header */ 475 /* Check for empty space after the corrupt node's common header */
432 skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; 476 skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
433 if (is_empty(buf + skip, len - skip)) 477 if (is_empty(buf + skip, len - skip))
434 return 1; 478 return 1;
435 /* 479 /*
@@ -441,7 +485,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
441 return 0; 485 return 0;
442 } 486 }
443 /* Now we know the corrupt node's length we can skip over it */ 487 /* Now we know the corrupt node's length we can skip over it */
444 skip = ALIGN(offs + dlen, c->min_io_size) - offs; 488 skip = ALIGN(offs + dlen, c->max_write_size) - offs;
445 /* After which there should be empty space */ 489 /* After which there should be empty space */
446 if (is_empty(buf + skip, len - skip)) 490 if (is_empty(buf + skip, len - skip))
447 return 1; 491 return 1;
@@ -469,7 +513,7 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
469 endpt = snod->offs + snod->len; 513 endpt = snod->offs + snod->len;
470 } 514 }
471 515
472 if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { 516 if (c->ro_mount && !c->remounting_rw) {
473 /* Add to recovery list */ 517 /* Add to recovery list */
474 struct ubifs_unclean_leb *ucleb; 518 struct ubifs_unclean_leb *ucleb;
475 519
@@ -520,16 +564,15 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
520} 564}
521 565
522/** 566/**
523 * drop_incomplete_group - drop nodes from an incomplete group. 567 * drop_last_group - drop the last group of nodes.
524 * @sleb: scanned LEB information 568 * @sleb: scanned LEB information
525 * @offs: offset of dropped nodes is returned here 569 * @offs: offset of dropped nodes is returned here
526 * 570 *
527 * This function returns %1 if nodes are dropped and %0 otherwise. 571 * This is a helper function for 'ubifs_recover_leb()' which drops the last
572 * group of nodes of the scanned LEB.
528 */ 573 */
529static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) 574static void drop_last_group(struct ubifs_scan_leb *sleb, int *offs)
530{ 575{
531 int dropped = 0;
532
533 while (!list_empty(&sleb->nodes)) { 576 while (!list_empty(&sleb->nodes)) {
534 struct ubifs_scan_node *snod; 577 struct ubifs_scan_node *snod;
535 struct ubifs_ch *ch; 578 struct ubifs_ch *ch;
@@ -538,15 +581,40 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
538 list); 581 list);
539 ch = snod->node; 582 ch = snod->node;
540 if (ch->group_type != UBIFS_IN_NODE_GROUP) 583 if (ch->group_type != UBIFS_IN_NODE_GROUP)
541 return dropped; 584 break;
542 dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs); 585
586 dbg_rcvry("dropping grouped node at %d:%d",
587 sleb->lnum, snod->offs);
588 *offs = snod->offs;
589 list_del(&snod->list);
590 kfree(snod);
591 sleb->nodes_cnt -= 1;
592 }
593}
594
595/**
596 * drop_last_node - drop the last node.
597 * @sleb: scanned LEB information
598 * @offs: offset of dropped nodes is returned here
599 * @grouped: non-zero if whole group of nodes have to be dropped
600 *
601 * This is a helper function for 'ubifs_recover_leb()' which drops the last
602 * node of the scanned LEB.
603 */
604static void drop_last_node(struct ubifs_scan_leb *sleb, int *offs)
605{
606 struct ubifs_scan_node *snod;
607
608 if (!list_empty(&sleb->nodes)) {
609 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
610 list);
611
612 dbg_rcvry("dropping last node at %d:%d", sleb->lnum, snod->offs);
543 *offs = snod->offs; 613 *offs = snod->offs;
544 list_del(&snod->list); 614 list_del(&snod->list);
545 kfree(snod); 615 kfree(snod);
546 sleb->nodes_cnt -= 1; 616 sleb->nodes_cnt -= 1;
547 dropped = 1;
548 } 617 }
549 return dropped;
550} 618}
551 619
552/** 620/**
@@ -555,7 +623,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
555 * @lnum: LEB number 623 * @lnum: LEB number
556 * @offs: offset 624 * @offs: offset
557 * @sbuf: LEB-sized buffer to use 625 * @sbuf: LEB-sized buffer to use
558 * @grouped: nodes may be grouped for recovery 626 * @jhead: journal head number this LEB belongs to (%-1 if the LEB does not
627 * belong to any journal head)
559 * 628 *
560 * This function does a scan of a LEB, but caters for errors that might have 629 * This function does a scan of a LEB, but caters for errors that might have
561 * been caused by the unclean unmount from which we are attempting to recover. 630 * been caused by the unclean unmount from which we are attempting to recover.
@@ -563,25 +632,21 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
563 * found, and a negative error code in case of failure. 632 * found, and a negative error code in case of failure.
564 */ 633 */
565struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 634struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
566 int offs, void *sbuf, int grouped) 635 int offs, void *sbuf, int jhead)
567{ 636{
568 int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; 637 int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit;
569 int empty_chkd = 0, start = offs; 638 int grouped = jhead == -1 ? 0 : c->jheads[jhead].grouped;
570 struct ubifs_scan_leb *sleb; 639 struct ubifs_scan_leb *sleb;
571 void *buf = sbuf + offs; 640 void *buf = sbuf + offs;
572 641
573 dbg_rcvry("%d:%d", lnum, offs); 642 dbg_rcvry("%d:%d, jhead %d, grouped %d", lnum, offs, jhead, grouped);
574 643
575 sleb = ubifs_start_scan(c, lnum, offs, sbuf); 644 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
576 if (IS_ERR(sleb)) 645 if (IS_ERR(sleb))
577 return sleb; 646 return sleb;
578 647
579 if (sleb->ecc) 648 ubifs_assert(len >= 8);
580 need_clean = 1;
581
582 while (len >= 8) { 649 while (len >= 8) {
583 int ret;
584
585 dbg_scan("look at LEB %d:%d (%d bytes left)", 650 dbg_scan("look at LEB %d:%d (%d bytes left)",
586 lnum, offs, len); 651 lnum, offs, len);
587 652
@@ -591,8 +656,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
591 * Scan quietly until there is an error from which we cannot 656 * Scan quietly until there is an error from which we cannot
592 * recover 657 * recover
593 */ 658 */
594 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); 659 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
595
596 if (ret == SCANNED_A_NODE) { 660 if (ret == SCANNED_A_NODE) {
597 /* A valid node, and not a padding node */ 661 /* A valid node, and not a padding node */
598 struct ubifs_ch *ch = buf; 662 struct ubifs_ch *ch = buf;
@@ -605,104 +669,126 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
605 offs += node_len; 669 offs += node_len;
606 buf += node_len; 670 buf += node_len;
607 len -= node_len; 671 len -= node_len;
608 continue; 672 } else if (ret > 0) {
609 }
610
611 if (ret > 0) {
612 /* Padding bytes or a valid padding node */ 673 /* Padding bytes or a valid padding node */
613 offs += ret; 674 offs += ret;
614 buf += ret; 675 buf += ret;
615 len -= ret; 676 len -= ret;
616 continue; 677 } else if (ret == SCANNED_EMPTY_SPACE ||
617 } 678 ret == SCANNED_GARBAGE ||
618 679 ret == SCANNED_A_BAD_PAD_NODE ||
619 if (ret == SCANNED_EMPTY_SPACE) { 680 ret == SCANNED_A_CORRUPT_NODE) {
620 if (!is_empty(buf, len)) { 681 dbg_rcvry("found corruption - %d", ret);
621 if (!is_last_write(c, buf, offs))
622 break;
623 clean_buf(c, &buf, lnum, &offs, &len);
624 need_clean = 1;
625 }
626 empty_chkd = 1;
627 break; 682 break;
628 } 683 } else {
629 684 dbg_err("unexpected return value %d", ret);
630 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
631 if (is_last_write(c, buf, offs)) {
632 clean_buf(c, &buf, lnum, &offs, &len);
633 need_clean = 1;
634 empty_chkd = 1;
635 break;
636 }
637
638 if (ret == SCANNED_A_CORRUPT_NODE)
639 if (no_more_nodes(c, buf, len, lnum, offs)) {
640 clean_buf(c, &buf, lnum, &offs, &len);
641 need_clean = 1;
642 empty_chkd = 1;
643 break;
644 }
645
646 if (quiet) {
647 /* Redo the last scan but noisily */
648 quiet = 0;
649 continue;
650 }
651
652 switch (ret) {
653 case SCANNED_GARBAGE:
654 dbg_err("garbage");
655 goto corrupted;
656 case SCANNED_A_CORRUPT_NODE:
657 case SCANNED_A_BAD_PAD_NODE:
658 dbg_err("bad node");
659 goto corrupted;
660 default:
661 dbg_err("unknown");
662 err = -EINVAL; 685 err = -EINVAL;
663 goto error; 686 goto error;
664 } 687 }
665 } 688 }
666 689
667 if (!empty_chkd && !is_empty(buf, len)) { 690 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) {
668 if (is_last_write(c, buf, offs)) { 691 if (!is_last_write(c, buf, offs))
669 clean_buf(c, &buf, lnum, &offs, &len); 692 goto corrupted_rescan;
670 need_clean = 1; 693 } else if (ret == SCANNED_A_CORRUPT_NODE) {
671 } else { 694 if (!no_more_nodes(c, buf, len, lnum, offs))
695 goto corrupted_rescan;
696 } else if (!is_empty(buf, len)) {
697 if (!is_last_write(c, buf, offs)) {
672 int corruption = first_non_ff(buf, len); 698 int corruption = first_non_ff(buf, len);
673 699
700 /*
701 * See header comment for this file for more
702 * explanations about the reasons we have this check.
703 */
674 ubifs_err("corrupt empty space LEB %d:%d, corruption " 704 ubifs_err("corrupt empty space LEB %d:%d, corruption "
675 "starts at %d", lnum, offs, corruption); 705 "starts at %d", lnum, offs, corruption);
676 /* Make sure we dump interesting non-0xFF data */ 706 /* Make sure we dump interesting non-0xFF data */
677 offs = corruption; 707 offs += corruption;
678 buf += corruption; 708 buf += corruption;
679 goto corrupted; 709 goto corrupted;
680 } 710 }
681 } 711 }
682 712
683 /* Drop nodes from incomplete group */ 713 min_io_unit = round_down(offs, c->min_io_size);
684 if (grouped && drop_incomplete_group(sleb, &offs)) { 714 if (grouped)
685 buf = sbuf + offs; 715 /*
686 len = c->leb_size - offs; 716 * If nodes are grouped, always drop the incomplete group at
687 clean_buf(c, &buf, lnum, &offs, &len); 717 * the end.
688 need_clean = 1; 718 */
689 } 719 drop_last_group(sleb, &offs);
690 720
691 if (offs % c->min_io_size) { 721 if (jhead == GCHD) {
692 clean_buf(c, &buf, lnum, &offs, &len); 722 /*
693 need_clean = 1; 723 * If this LEB belongs to the GC head then while we are in the
724 * middle of the same min. I/O unit keep dropping nodes. So
725 * basically, what we want is to make sure that the last min.
726 * I/O unit where we saw the corruption is dropped completely
727 * with all the uncorrupted nodes which may possibly sit there.
728 *
729 * In other words, let's name the min. I/O unit where the
730 * corruption starts B, and the previous min. I/O unit A. The
731 * below code tries to deal with a situation when half of B
732 * contains valid nodes or the end of a valid node, and the
733 * second half of B contains corrupted data or garbage. This
734 * means that UBIFS had been writing to B just before the power
735 * cut happened. I do not know how realistic is this scenario
736 * that half of the min. I/O unit had been written successfully
737 * and the other half not, but this is possible in our 'failure
738 * mode emulation' infrastructure at least.
739 *
740 * So what is the problem, why we need to drop those nodes? Why
741 * can't we just clean-up the second half of B by putting a
742 * padding node there? We can, and this works fine with one
743 * exception which was reproduced with power cut emulation
744 * testing and happens extremely rarely.
745 *
746 * Imagine the file-system is full, we run GC which starts
747 * moving valid nodes from LEB X to LEB Y (obviously, LEB Y is
748 * the current GC head LEB). The @c->gc_lnum is -1, which means
749 * that GC will retain LEB X and will try to continue. Imagine
750 * that LEB X is currently the dirtiest LEB, and the amount of
751 * used space in LEB Y is exactly the same as amount of free
752 * space in LEB X.
753 *
754 * And a power cut happens when nodes are moved from LEB X to
755 * LEB Y. We are here trying to recover LEB Y which is the GC
756 * head LEB. We find the min. I/O unit B as described above.
757 * Then we clean-up LEB Y by padding min. I/O unit. And later
758 * 'ubifs_rcvry_gc_commit()' function fails, because it cannot
759 * find a dirty LEB which could be GC'd into LEB Y! Even LEB X
760 * does not match because the amount of valid nodes there does
761 * not fit the free space in LEB Y any more! And this is
762 * because of the padding node which we added to LEB Y. The
763 * user-visible effect of this which I once observed and
764 * analysed is that we cannot mount the file-system with
765 * -ENOSPC error.
766 *
767 * So obviously, to make sure that situation does not happen we
768 * should free min. I/O unit B in LEB Y completely and the last
769 * used min. I/O unit in LEB Y should be A. This is basically
770 * what the below code tries to do.
771 */
772 while (offs > min_io_unit)
773 drop_last_node(sleb, &offs);
694 } 774 }
695 775
776 buf = sbuf + offs;
777 len = c->leb_size - offs;
778
779 clean_buf(c, &buf, lnum, &offs, &len);
696 ubifs_end_scan(c, sleb, lnum, offs); 780 ubifs_end_scan(c, sleb, lnum, offs);
697 781
698 if (need_clean) { 782 err = fix_unclean_leb(c, sleb, start);
699 err = fix_unclean_leb(c, sleb, start); 783 if (err)
700 if (err) 784 goto error;
701 goto error;
702 }
703 785
704 return sleb; 786 return sleb;
705 787
788corrupted_rescan:
789 /* Re-scan the corrupted data with verbose messages */
790 dbg_err("corruptio %d", ret);
791 ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
706corrupted: 792corrupted:
707 ubifs_scanned_corruption(c, lnum, offs, buf); 793 ubifs_scanned_corruption(c, lnum, offs, buf);
708 err = -EUCLEAN; 794 err = -EUCLEAN;
@@ -772,7 +858,8 @@ out_free:
772 * @sbuf: LEB-sized buffer to use 858 * @sbuf: LEB-sized buffer to use
773 * 859 *
774 * This function does a scan of a LEB, but caters for errors that might have 860 * This function does a scan of a LEB, but caters for errors that might have
775 * been caused by the unclean unmount from which we are attempting to recover. 861 * been caused by unclean reboots from which we are attempting to recover
862 * (assume that only the last log LEB can be corrupted by an unclean reboot).
776 * 863 *
777 * This function returns %0 on success and a negative error code on failure. 864 * This function returns %0 on success and a negative error code on failure.
778 */ 865 */
@@ -818,7 +905,7 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
818 } 905 }
819 ubifs_scan_destroy(sleb); 906 ubifs_scan_destroy(sleb);
820 } 907 }
821 return ubifs_recover_leb(c, lnum, offs, sbuf, 0); 908 return ubifs_recover_leb(c, lnum, offs, sbuf, -1);
822} 909}
823 910
824/** 911/**
@@ -835,12 +922,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
835static int recover_head(const struct ubifs_info *c, int lnum, int offs, 922static int recover_head(const struct ubifs_info *c, int lnum, int offs,
836 void *sbuf) 923 void *sbuf)
837{ 924{
838 int len, err; 925 int len = c->max_write_size, err;
839 926
840 if (c->min_io_size > 1)
841 len = c->min_io_size;
842 else
843 len = 512;
844 if (offs + len > c->leb_size) 927 if (offs + len > c->leb_size)
845 len = c->leb_size - offs; 928 len = c->leb_size - offs;
846 929
@@ -883,7 +966,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
883{ 966{
884 int err; 967 int err;
885 968
886 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); 969 ubifs_assert(!c->ro_mount || c->remounting_rw);
887 970
888 dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); 971 dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
889 err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); 972 err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
@@ -1025,6 +1108,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1025} 1108}
1026 1109
1027/** 1110/**
1111 * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit.
1112 * @c: UBIFS file-system description object
1113 *
1114 * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty
1115 * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns
1116 * zero in case of success and a negative error code in case of failure.
1117 */
1118static int grab_empty_leb(struct ubifs_info *c)
1119{
1120 int lnum, err;
1121
1122 /*
1123 * Note, it is very important to first search for an empty LEB and then
1124 * run the commit, not vice-versa. The reason is that there might be
1125 * only one empty LEB at the moment, the one which has been the
1126 * @c->gc_lnum just before the power cut happened. During the regular
1127 * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no
1128 * one but GC can grab it. But at this moment this single empty LEB is
1129 * not marked as taken, so if we run commit - what happens? Right, the
1130 * commit will grab it and write the index there. Remember that the
1131 * index always expands as long as there is free space, and it only
1132 * starts consolidating when we run out of space.
1133 *
1134 * IOW, if we run commit now, we might not be able to find a free LEB
1135 * after this.
1136 */
1137 lnum = ubifs_find_free_leb_for_idx(c);
1138 if (lnum < 0) {
1139 dbg_err("could not find an empty LEB");
1140 dbg_dump_lprops(c);
1141 dbg_dump_budg(c, &c->bi);
1142 return lnum;
1143 }
1144
1145 /* Reset the index flag */
1146 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1147 LPROPS_INDEX, 0);
1148 if (err)
1149 return err;
1150
1151 c->gc_lnum = lnum;
1152 dbg_rcvry("found empty LEB %d, run commit", lnum);
1153
1154 return ubifs_run_commit(c);
1155}
1156
1157/**
1028 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. 1158 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1029 * @c: UBIFS file-system description object 1159 * @c: UBIFS file-system description object
1030 * 1160 *
@@ -1046,71 +1176,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1046{ 1176{
1047 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; 1177 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1048 struct ubifs_lprops lp; 1178 struct ubifs_lprops lp;
1049 int lnum, err; 1179 int err;
1180
1181 dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs);
1050 1182
1051 c->gc_lnum = -1; 1183 c->gc_lnum = -1;
1052 if (wbuf->lnum == -1) { 1184 if (wbuf->lnum == -1 || wbuf->offs == c->leb_size)
1053 dbg_rcvry("no GC head LEB"); 1185 return grab_empty_leb(c);
1054 goto find_free; 1186
1055 }
1056 /*
1057 * See whether the used space in the dirtiest LEB fits in the GC head
1058 * LEB.
1059 */
1060 if (wbuf->offs == c->leb_size) {
1061 dbg_rcvry("no room in GC head LEB");
1062 goto find_free;
1063 }
1064 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); 1187 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1065 if (err) { 1188 if (err) {
1066 /* 1189 if (err != -ENOSPC)
1067 * There are no dirty or empty LEBs subject to here being
1068 * enough for the index. Try to use
1069 * 'ubifs_find_free_leb_for_idx()', which will return any empty
1070 * LEBs (ignoring index requirements). If the index then
1071 * doesn't have enough LEBs the recovery commit will fail -
1072 * which is the same result anyway i.e. recovery fails. So
1073 * there is no problem ignoring index requirements and just
1074 * grabbing a free LEB since we have already established there
1075 * is not a dirty LEB we could have used instead.
1076 */
1077 if (err == -ENOSPC) {
1078 dbg_rcvry("could not find a dirty LEB");
1079 goto find_free;
1080 }
1081 return err;
1082 }
1083 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1084 lnum = lp.lnum;
1085 if (lp.free + lp.dirty == c->leb_size) {
1086 /* An empty LEB was returned */
1087 if (lp.free != c->leb_size) {
1088 err = ubifs_change_one_lp(c, lnum, c->leb_size,
1089 0, 0, 0, 0);
1090 if (err)
1091 return err;
1092 }
1093 err = ubifs_leb_unmap(c, lnum);
1094 if (err)
1095 return err;
1096 c->gc_lnum = lnum;
1097 dbg_rcvry("allocated LEB %d for GC", lnum);
1098 /* Run the commit */
1099 dbg_rcvry("committing");
1100 return ubifs_run_commit(c);
1101 }
1102 /*
1103 * There was no empty LEB so the used space in the dirtiest LEB must fit
1104 * in the GC head LEB.
1105 */
1106 if (lp.free + lp.dirty < wbuf->offs) {
1107 dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1108 lnum, wbuf->lnum, wbuf->offs);
1109 err = ubifs_return_leb(c, lnum);
1110 if (err)
1111 return err; 1190 return err;
1112 goto find_free; 1191
1192 dbg_rcvry("could not find a dirty LEB");
1193 return grab_empty_leb(c);
1113 } 1194 }
1195
1196 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1197 ubifs_assert(lp.free + lp.dirty >= wbuf->offs);
1198
1114 /* 1199 /*
1115 * We run the commit before garbage collection otherwise subsequent 1200 * We run the commit before garbage collection otherwise subsequent
1116 * mounts will see the GC and orphan deletion in a different order. 1201 * mounts will see the GC and orphan deletion in a different order.
@@ -1119,11 +1204,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1119 err = ubifs_run_commit(c); 1204 err = ubifs_run_commit(c);
1120 if (err) 1205 if (err)
1121 return err; 1206 return err;
1122 /* 1207
1123 * The data in the dirtiest LEB fits in the GC head LEB, so do the GC 1208 dbg_rcvry("GC'ing LEB %d", lp.lnum);
1124 * - use locking to keep 'ubifs_assert()' happy.
1125 */
1126 dbg_rcvry("GC'ing LEB %d", lnum);
1127 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); 1209 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1128 err = ubifs_garbage_collect_leb(c, &lp); 1210 err = ubifs_garbage_collect_leb(c, &lp);
1129 if (err >= 0) { 1211 if (err >= 0) {
@@ -1139,37 +1221,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1139 err = -EINVAL; 1221 err = -EINVAL;
1140 return err; 1222 return err;
1141 } 1223 }
1142 if (err != LEB_RETAINED) { 1224
1143 dbg_err("GC returned %d", err); 1225 ubifs_assert(err == LEB_RETAINED);
1226 if (err != LEB_RETAINED)
1144 return -EINVAL; 1227 return -EINVAL;
1145 } 1228
1146 err = ubifs_leb_unmap(c, c->gc_lnum); 1229 err = ubifs_leb_unmap(c, c->gc_lnum);
1147 if (err) 1230 if (err)
1148 return err; 1231 return err;
1149 dbg_rcvry("allocated LEB %d for GC", lnum);
1150 return 0;
1151 1232
1152find_free: 1233 dbg_rcvry("allocated LEB %d for GC", lp.lnum);
1153 /* 1234 return 0;
1154 * There is no GC head LEB or the free space in the GC head LEB is too
1155 * small, or there are not dirty LEBs. Allocate gc_lnum by calling
1156 * 'ubifs_find_free_leb_for_idx()' so GC is not run.
1157 */
1158 lnum = ubifs_find_free_leb_for_idx(c);
1159 if (lnum < 0) {
1160 dbg_err("could not find an empty LEB");
1161 return lnum;
1162 }
1163 /* And reset the index flag */
1164 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1165 LPROPS_INDEX, 0);
1166 if (err)
1167 return err;
1168 c->gc_lnum = lnum;
1169 dbg_rcvry("allocated LEB %d for GC", lnum);
1170 /* Run the commit */
1171 dbg_rcvry("committing");
1172 return ubifs_run_commit(c);
1173} 1235}
1174 1236
1175/** 1237/**
@@ -1411,7 +1473,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1411 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); 1473 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1412 if (err) 1474 if (err)
1413 goto out; 1475 goto out;
1414 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", 1476 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld",
1415 (unsigned long)e->inum, lnum, offs, i_size, e->d_size); 1477 (unsigned long)e->inum, lnum, offs, i_size, e->d_size);
1416 return 0; 1478 return 0;
1417 1479
@@ -1460,20 +1522,27 @@ int ubifs_recover_size(struct ubifs_info *c)
1460 e->i_size = le64_to_cpu(ino->size); 1522 e->i_size = le64_to_cpu(ino->size);
1461 } 1523 }
1462 } 1524 }
1525
1463 if (e->exists && e->i_size < e->d_size) { 1526 if (e->exists && e->i_size < e->d_size) {
1464 if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { 1527 if (c->ro_mount) {
1465 /* Fix the inode size and pin it in memory */ 1528 /* Fix the inode size and pin it in memory */
1466 struct inode *inode; 1529 struct inode *inode;
1530 struct ubifs_inode *ui;
1531
1532 ubifs_assert(!e->inode);
1467 1533
1468 inode = ubifs_iget(c->vfs_sb, e->inum); 1534 inode = ubifs_iget(c->vfs_sb, e->inum);
1469 if (IS_ERR(inode)) 1535 if (IS_ERR(inode))
1470 return PTR_ERR(inode); 1536 return PTR_ERR(inode);
1537
1538 ui = ubifs_inode(inode);
1471 if (inode->i_size < e->d_size) { 1539 if (inode->i_size < e->d_size) {
1472 dbg_rcvry("ino %lu size %lld -> %lld", 1540 dbg_rcvry("ino %lu size %lld -> %lld",
1473 (unsigned long)e->inum, 1541 (unsigned long)e->inum,
1474 e->d_size, inode->i_size); 1542 inode->i_size, e->d_size);
1475 inode->i_size = e->d_size; 1543 inode->i_size = e->d_size;
1476 ubifs_inode(inode)->ui_size = e->d_size; 1544 ui->ui_size = e->d_size;
1545 ui->synced_i_size = e->d_size;
1477 e->inode = inode; 1546 e->inode = inode;
1478 this = rb_next(this); 1547 this = rb_next(this);
1479 continue; 1548 continue;
@@ -1488,9 +1557,11 @@ int ubifs_recover_size(struct ubifs_info *c)
1488 iput(e->inode); 1557 iput(e->inode);
1489 } 1558 }
1490 } 1559 }
1560
1491 this = rb_next(this); 1561 this = rb_next(this);
1492 rb_erase(&e->rb, &c->size_tree); 1562 rb_erase(&e->rb, &c->size_tree);
1493 kfree(e); 1563 kfree(e);
1494 } 1564 }
1565
1495 return 0; 1566 return 0;
1496} 1567}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 5c2d6d759a3e..5e97161ce4d3 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -33,43 +33,32 @@
33 */ 33 */
34 34
35#include "ubifs.h" 35#include "ubifs.h"
36 36#include <linux/list_sort.h>
37/*
38 * Replay flags.
39 *
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
42 */
43enum {
44 REPLAY_DELETION = 1,
45 REPLAY_REF = 2,
46};
47 37
48/** 38/**
49 * struct replay_entry - replay tree entry. 39 * struct replay_entry - replay list entry.
50 * @lnum: logical eraseblock number of the node 40 * @lnum: logical eraseblock number of the node
51 * @offs: node offset 41 * @offs: node offset
52 * @len: node length 42 * @len: node length
43 * @deletion: non-zero if this entry corresponds to a node deletion
53 * @sqnum: node sequence number 44 * @sqnum: node sequence number
54 * @flags: replay flags 45 * @list: links the replay list
55 * @rb: links the replay tree
56 * @key: node key 46 * @key: node key
57 * @nm: directory entry name 47 * @nm: directory entry name
58 * @old_size: truncation old size 48 * @old_size: truncation old size
59 * @new_size: truncation new size 49 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 * 50 *
63 * UBIFS journal replay must compare node sequence numbers, which means it must 51 * The replay process first scans all buds and builds the replay list, then
64 * build a tree of node information to insert into the TNC. 52 * sorts the replay list in nodes sequence number order, and then inserts all
53 * the replay entries to the TNC.
65 */ 54 */
66struct replay_entry { 55struct replay_entry {
67 int lnum; 56 int lnum;
68 int offs; 57 int offs;
69 int len; 58 int len;
59 unsigned int deletion:1;
70 unsigned long long sqnum; 60 unsigned long long sqnum;
71 int flags; 61 struct list_head list;
72 struct rb_node rb;
73 union ubifs_key key; 62 union ubifs_key key;
74 union { 63 union {
75 struct qstr nm; 64 struct qstr nm;
@@ -77,10 +66,6 @@ struct replay_entry {
77 loff_t old_size; 66 loff_t old_size;
78 loff_t new_size; 67 loff_t new_size;
79 }; 68 };
80 struct {
81 int free;
82 int dirty;
83 };
84 }; 69 };
85}; 70};
86 71
@@ -88,57 +73,64 @@ struct replay_entry {
88 * struct bud_entry - entry in the list of buds to replay. 73 * struct bud_entry - entry in the list of buds to replay.
89 * @list: next bud in the list 74 * @list: next bud in the list
90 * @bud: bud description object 75 * @bud: bud description object
91 * @free: free bytes in the bud
92 * @sqnum: reference node sequence number 76 * @sqnum: reference node sequence number
77 * @free: free bytes in the bud
78 * @dirty: dirty bytes in the bud
93 */ 79 */
94struct bud_entry { 80struct bud_entry {
95 struct list_head list; 81 struct list_head list;
96 struct ubifs_bud *bud; 82 struct ubifs_bud *bud;
97 int free;
98 unsigned long long sqnum; 83 unsigned long long sqnum;
84 int free;
85 int dirty;
99}; 86};
100 87
101/** 88/**
102 * set_bud_lprops - set free and dirty space used by a bud. 89 * set_bud_lprops - set free and dirty space used by a bud.
103 * @c: UBIFS file-system description object 90 * @c: UBIFS file-system description object
104 * @r: replay entry of bud 91 * @b: bud entry which describes the bud
92 *
93 * This function makes sure the LEB properties of bud @b are set correctly
94 * after the replay. Returns zero in case of success and a negative error code
95 * in case of failure.
105 */ 96 */
106static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 97static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b)
107{ 98{
108 const struct ubifs_lprops *lp; 99 const struct ubifs_lprops *lp;
109 int err = 0, dirty; 100 int err = 0, dirty;
110 101
111 ubifs_get_lprops(c); 102 ubifs_get_lprops(c);
112 103
113 lp = ubifs_lpt_lookup_dirty(c, r->lnum); 104 lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum);
114 if (IS_ERR(lp)) { 105 if (IS_ERR(lp)) {
115 err = PTR_ERR(lp); 106 err = PTR_ERR(lp);
116 goto out; 107 goto out;
117 } 108 }
118 109
119 dirty = lp->dirty; 110 dirty = lp->dirty;
120 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 111 if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
121 /* 112 /*
122 * The LEB was added to the journal with a starting offset of 113 * The LEB was added to the journal with a starting offset of
123 * zero which means the LEB must have been empty. The LEB 114 * zero which means the LEB must have been empty. The LEB
124 * property values should be lp->free == c->leb_size and 115 * property values should be @lp->free == @c->leb_size and
125 * lp->dirty == 0, but that is not the case. The reason is that 116 * @lp->dirty == 0, but that is not the case. The reason is that
126 * the LEB was garbage collected. The garbage collector resets 117 * the LEB had been garbage collected before it became the bud,
127 * the free and dirty space without recording it anywhere except 118 * and there was not commit inbetween. The garbage collector
128 * lprops, so if there is not a commit then lprops does not have 119 * resets the free and dirty space without recording it
129 * that information next time the file system is mounted. 120 * anywhere except lprops, so if there was no commit then
121 * lprops does not have that information.
130 * 122 *
131 * We do not need to adjust free space because the scan has told 123 * We do not need to adjust free space because the scan has told
132 * us the exact value which is recorded in the replay entry as 124 * us the exact value which is recorded in the replay entry as
133 * r->free. 125 * @b->free.
134 * 126 *
135 * However we do need to subtract from the dirty space the 127 * However we do need to subtract from the dirty space the
136 * amount of space that the garbage collector reclaimed, which 128 * amount of space that the garbage collector reclaimed, which
137 * is the whole LEB minus the amount of space that was free. 129 * is the whole LEB minus the amount of space that was free.
138 */ 130 */
139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 131 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
140 lp->free, lp->dirty); 132 lp->free, lp->dirty);
141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 133 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum,
142 lp->free, lp->dirty); 134 lp->free, lp->dirty);
143 dirty -= c->leb_size - lp->free; 135 dirty -= c->leb_size - lp->free;
144 /* 136 /*
@@ -150,21 +142,48 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
150 */ 142 */
151 if (dirty != 0) 143 if (dirty != 0)
152 dbg_msg("LEB %d lp: %d free %d dirty " 144 dbg_msg("LEB %d lp: %d free %d dirty "
153 "replay: %d free %d dirty", r->lnum, lp->free, 145 "replay: %d free %d dirty", b->bud->lnum,
154 lp->dirty, r->free, r->dirty); 146 lp->free, lp->dirty, b->free, b->dirty);
155 } 147 }
156 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 148 lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty,
157 lp->flags | LPROPS_TAKEN, 0); 149 lp->flags | LPROPS_TAKEN, 0);
158 if (IS_ERR(lp)) { 150 if (IS_ERR(lp)) {
159 err = PTR_ERR(lp); 151 err = PTR_ERR(lp);
160 goto out; 152 goto out;
161 } 153 }
154
155 /* Make sure the journal head points to the latest bud */
156 err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf,
157 b->bud->lnum, c->leb_size - b->free,
158 UBI_SHORTTERM);
159
162out: 160out:
163 ubifs_release_lprops(c); 161 ubifs_release_lprops(c);
164 return err; 162 return err;
165} 163}
166 164
167/** 165/**
166 * set_buds_lprops - set free and dirty space for all replayed buds.
167 * @c: UBIFS file-system description object
168 *
169 * This function sets LEB properties for all replayed buds. Returns zero in
170 * case of success and a negative error code in case of failure.
171 */
172static int set_buds_lprops(struct ubifs_info *c)
173{
174 struct bud_entry *b;
175 int err;
176
177 list_for_each_entry(b, &c->replay_buds, list) {
178 err = set_bud_lprops(c, b);
179 if (err)
180 return err;
181 }
182
183 return 0;
184}
185
186/**
168 * trun_remove_range - apply a replay entry for a truncation to the TNC. 187 * trun_remove_range - apply a replay entry for a truncation to the TNC.
169 * @c: UBIFS file-system description object 188 * @c: UBIFS file-system description object
170 * @r: replay entry of truncation 189 * @r: replay entry of truncation
@@ -200,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
200 */ 219 */
201static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 220static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
202{ 221{
203 int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 222 int err;
204 223
205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 224 dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum,
206 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 225 r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key));
207 226
208 /* Set c->replay_sqnum to help deal with dangling branches. */ 227 /* Set c->replay_sqnum to help deal with dangling branches. */
209 c->replay_sqnum = r->sqnum; 228 c->replay_sqnum = r->sqnum;
210 229
211 if (r->flags & REPLAY_REF) 230 if (is_hash_key(c, &r->key)) {
212 err = set_bud_lprops(c, r); 231 if (r->deletion)
213 else if (is_hash_key(c, &r->key)) {
214 if (deletion)
215 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 232 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
216 else 233 else
217 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 234 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
218 r->len, &r->nm); 235 r->len, &r->nm);
219 } else { 236 } else {
220 if (deletion) 237 if (r->deletion)
221 switch (key_type(c, &r->key)) { 238 switch (key_type(c, &r->key)) {
222 case UBIFS_INO_KEY: 239 case UBIFS_INO_KEY:
223 { 240 {
@@ -240,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
240 return err; 257 return err;
241 258
242 if (c->need_recovery) 259 if (c->need_recovery)
243 err = ubifs_recover_size_accum(c, &r->key, deletion, 260 err = ubifs_recover_size_accum(c, &r->key, r->deletion,
244 r->new_size); 261 r->new_size);
245 } 262 }
246 263
@@ -248,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
248} 265}
249 266
250/** 267/**
251 * destroy_replay_tree - destroy the replay. 268 * replay_entries_cmp - compare 2 replay entries.
252 * @c: UBIFS file-system description object 269 * @priv: UBIFS file-system description object
270 * @a: first replay entry
271 * @a: second replay entry
253 * 272 *
254 * Destroy the replay tree. 273 * This is a comparios function for 'list_sort()' which compares 2 replay
274 * entries @a and @b by comparing their sequence numer. Returns %1 if @a has
275 * greater sequence number and %-1 otherwise.
255 */ 276 */
256static void destroy_replay_tree(struct ubifs_info *c) 277static int replay_entries_cmp(void *priv, struct list_head *a,
278 struct list_head *b)
257{ 279{
258 struct rb_node *this = c->replay_tree.rb_node; 280 struct replay_entry *ra, *rb;
259 struct replay_entry *r; 281
260 282 cond_resched();
261 while (this) { 283 if (a == b)
262 if (this->rb_left) { 284 return 0;
263 this = this->rb_left; 285
264 continue; 286 ra = list_entry(a, struct replay_entry, list);
265 } else if (this->rb_right) { 287 rb = list_entry(b, struct replay_entry, list);
266 this = this->rb_right; 288 ubifs_assert(ra->sqnum != rb->sqnum);
267 continue; 289 if (ra->sqnum > rb->sqnum)
268 } 290 return 1;
269 r = rb_entry(this, struct replay_entry, rb); 291 return -1;
270 this = rb_parent(this);
271 if (this) {
272 if (this->rb_left == &r->rb)
273 this->rb_left = NULL;
274 else
275 this->rb_right = NULL;
276 }
277 if (is_hash_key(c, &r->key))
278 kfree(r->nm.name);
279 kfree(r);
280 }
281 c->replay_tree = RB_ROOT;
282} 292}
283 293
284/** 294/**
285 * apply_replay_tree - apply the replay tree to the TNC. 295 * apply_replay_list - apply the replay list to the TNC.
286 * @c: UBIFS file-system description object 296 * @c: UBIFS file-system description object
287 * 297 *
288 * Apply the replay tree. 298 * Apply all entries in the replay list to the TNC. Returns zero in case of
289 * Returns zero in case of success and a negative error code in case of 299 * success and a negative error code in case of failure.
290 * failure.
291 */ 300 */
292static int apply_replay_tree(struct ubifs_info *c) 301static int apply_replay_list(struct ubifs_info *c)
293{ 302{
294 struct rb_node *this = rb_first(&c->replay_tree); 303 struct replay_entry *r;
304 int err;
295 305
296 while (this) { 306 list_sort(c, &c->replay_list, &replay_entries_cmp);
297 struct replay_entry *r;
298 int err;
299 307
308 list_for_each_entry(r, &c->replay_list, list) {
300 cond_resched(); 309 cond_resched();
301 310
302 r = rb_entry(this, struct replay_entry, rb);
303 err = apply_replay_entry(c, r); 311 err = apply_replay_entry(c, r);
304 if (err) 312 if (err)
305 return err; 313 return err;
306 this = rb_next(this);
307 } 314 }
315
308 return 0; 316 return 0;
309} 317}
310 318
311/** 319/**
312 * insert_node - insert a node to the replay tree. 320 * destroy_replay_list - destroy the replay.
321 * @c: UBIFS file-system description object
322 *
323 * Destroy the replay list.
324 */
325static void destroy_replay_list(struct ubifs_info *c)
326{
327 struct replay_entry *r, *tmp;
328
329 list_for_each_entry_safe(r, tmp, &c->replay_list, list) {
330 if (is_hash_key(c, &r->key))
331 kfree(r->nm.name);
332 list_del(&r->list);
333 kfree(r);
334 }
335}
336
337/**
338 * insert_node - insert a node to the replay list
313 * @c: UBIFS file-system description object 339 * @c: UBIFS file-system description object
314 * @lnum: node logical eraseblock number 340 * @lnum: node logical eraseblock number
315 * @offs: node offset 341 * @offs: node offset
@@ -321,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c)
321 * @old_size: truncation old size 347 * @old_size: truncation old size
322 * @new_size: truncation new size 348 * @new_size: truncation new size
323 * 349 *
324 * This function inserts a scanned non-direntry node to the replay tree. The 350 * This function inserts a scanned non-direntry node to the replay list. The
325 * replay tree is an RB-tree containing @struct replay_entry elements which are 351 * replay list contains @struct replay_entry elements, and we sort this list in
326 * indexed by the sequence number. The replay tree is applied at the very end 352 * sequence number order before applying it. The replay list is applied at the
327 * of the replay process. Since the tree is sorted in sequence number order, 353 * very end of the replay process. Since the list is sorted in sequence number
328 * the older modifications are applied first. This function returns zero in 354 * order, the older modifications are applied first. This function returns zero
329 * case of success and a negative error code in case of failure. 355 * in case of success and a negative error code in case of failure.
330 */ 356 */
331static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 357static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
332 union ubifs_key *key, unsigned long long sqnum, 358 union ubifs_key *key, unsigned long long sqnum,
333 int deletion, int *used, loff_t old_size, 359 int deletion, int *used, loff_t old_size,
334 loff_t new_size) 360 loff_t new_size)
335{ 361{
336 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
337 struct replay_entry *r; 362 struct replay_entry *r;
338 363
364 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
365
339 if (key_inum(c, key) >= c->highest_inum) 366 if (key_inum(c, key) >= c->highest_inum)
340 c->highest_inum = key_inum(c, key); 367 c->highest_inum = key_inum(c, key);
341 368
342 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
343 while (*p) {
344 parent = *p;
345 r = rb_entry(parent, struct replay_entry, rb);
346 if (sqnum < r->sqnum) {
347 p = &(*p)->rb_left;
348 continue;
349 } else if (sqnum > r->sqnum) {
350 p = &(*p)->rb_right;
351 continue;
352 }
353 ubifs_err("duplicate sqnum in replay");
354 return -EINVAL;
355 }
356
357 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 369 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
358 if (!r) 370 if (!r)
359 return -ENOMEM; 371 return -ENOMEM;
@@ -363,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
363 r->lnum = lnum; 375 r->lnum = lnum;
364 r->offs = offs; 376 r->offs = offs;
365 r->len = len; 377 r->len = len;
378 r->deletion = !!deletion;
366 r->sqnum = sqnum; 379 r->sqnum = sqnum;
367 r->flags = (deletion ? REPLAY_DELETION : 0); 380 key_copy(c, key, &r->key);
368 r->old_size = old_size; 381 r->old_size = old_size;
369 r->new_size = new_size; 382 r->new_size = new_size;
370 key_copy(c, key, &r->key);
371 383
372 rb_link_node(&r->rb, parent, p); 384 list_add_tail(&r->list, &c->replay_list);
373 rb_insert_color(&r->rb, &c->replay_tree);
374 return 0; 385 return 0;
375} 386}
376 387
377/** 388/**
378 * insert_dent - insert a directory entry node into the replay tree. 389 * insert_dent - insert a directory entry node into the replay list.
379 * @c: UBIFS file-system description object 390 * @c: UBIFS file-system description object
380 * @lnum: node logical eraseblock number 391 * @lnum: node logical eraseblock number
381 * @offs: node offset 392 * @offs: node offset
@@ -387,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
387 * @deletion: non-zero if this is a deletion 398 * @deletion: non-zero if this is a deletion
388 * @used: number of bytes in use in a LEB 399 * @used: number of bytes in use in a LEB
389 * 400 *
390 * This function inserts a scanned directory entry node to the replay tree. 401 * This function inserts a scanned directory entry node or an extended
391 * Returns zero in case of success and a negative error code in case of 402 * attribute entry to the replay list. Returns zero in case of success and a
392 * failure. 403 * negative error code in case of failure.
393 *
394 * This function is also used for extended attribute entries because they are
395 * implemented as directory entry nodes.
396 */ 404 */
397static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 405static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
398 union ubifs_key *key, const char *name, int nlen, 406 union ubifs_key *key, const char *name, int nlen,
399 unsigned long long sqnum, int deletion, int *used) 407 unsigned long long sqnum, int deletion, int *used)
400{ 408{
401 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
402 struct replay_entry *r; 409 struct replay_entry *r;
403 char *nbuf; 410 char *nbuf;
404 411
412 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
405 if (key_inum(c, key) >= c->highest_inum) 413 if (key_inum(c, key) >= c->highest_inum)
406 c->highest_inum = key_inum(c, key); 414 c->highest_inum = key_inum(c, key);
407 415
408 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
409 while (*p) {
410 parent = *p;
411 r = rb_entry(parent, struct replay_entry, rb);
412 if (sqnum < r->sqnum) {
413 p = &(*p)->rb_left;
414 continue;
415 }
416 if (sqnum > r->sqnum) {
417 p = &(*p)->rb_right;
418 continue;
419 }
420 ubifs_err("duplicate sqnum in replay");
421 return -EINVAL;
422 }
423
424 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 416 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
425 if (!r) 417 if (!r)
426 return -ENOMEM; 418 return -ENOMEM;
419
427 nbuf = kmalloc(nlen + 1, GFP_KERNEL); 420 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
428 if (!nbuf) { 421 if (!nbuf) {
429 kfree(r); 422 kfree(r);
@@ -435,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
435 r->lnum = lnum; 428 r->lnum = lnum;
436 r->offs = offs; 429 r->offs = offs;
437 r->len = len; 430 r->len = len;
431 r->deletion = !!deletion;
438 r->sqnum = sqnum; 432 r->sqnum = sqnum;
433 key_copy(c, key, &r->key);
439 r->nm.len = nlen; 434 r->nm.len = nlen;
440 memcpy(nbuf, name, nlen); 435 memcpy(nbuf, name, nlen);
441 nbuf[nlen] = '\0'; 436 nbuf[nlen] = '\0';
442 r->nm.name = nbuf; 437 r->nm.name = nbuf;
443 r->flags = (deletion ? REPLAY_DELETION : 0);
444 key_copy(c, key, &r->key);
445 438
446 ubifs_assert(!*p); 439 list_add_tail(&r->list, &c->replay_list);
447 rb_link_node(&r->rb, parent, p);
448 rb_insert_color(&r->rb, &c->replay_tree);
449 return 0; 440 return 0;
450} 441}
451 442
@@ -482,29 +473,91 @@ int ubifs_validate_entry(struct ubifs_info *c,
482} 473}
483 474
484/** 475/**
476 * is_last_bud - check if the bud is the last in the journal head.
477 * @c: UBIFS file-system description object
478 * @bud: bud description object
479 *
480 * This function checks if bud @bud is the last bud in its journal head. This
481 * information is then used by 'replay_bud()' to decide whether the bud can
482 * have corruptions or not. Indeed, only last buds can be corrupted by power
483 * cuts. Returns %1 if this is the last bud, and %0 if not.
484 */
485static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud)
486{
487 struct ubifs_jhead *jh = &c->jheads[bud->jhead];
488 struct ubifs_bud *next;
489 uint32_t data;
490 int err;
491
492 if (list_is_last(&bud->list, &jh->buds_list))
493 return 1;
494
495 /*
496 * The following is a quirk to make sure we work correctly with UBIFS
497 * images used with older UBIFS.
498 *
499 * Normally, the last bud will be the last in the journal head's list
500 * of bud. However, there is one exception if the UBIFS image belongs
501 * to older UBIFS. This is fairly unlikely: one would need to use old
502 * UBIFS, then have a power cut exactly at the right point, and then
503 * try to mount this image with new UBIFS.
504 *
505 * The exception is: it is possible to have 2 buds A and B, A goes
506 * before B, and B is the last, bud B is contains no data, and bud A is
507 * corrupted at the end. The reason is that in older versions when the
508 * journal code switched the next bud (from A to B), it first added a
509 * log reference node for the new bud (B), and only after this it
510 * synchronized the write-buffer of current bud (A). But later this was
511 * changed and UBIFS started to always synchronize the write-buffer of
512 * the bud (A) before writing the log reference for the new bud (B).
513 *
514 * But because older UBIFS always synchronized A's write-buffer before
515 * writing to B, we can recognize this exceptional situation but
516 * checking the contents of bud B - if it is empty, then A can be
517 * treated as the last and we can recover it.
518 *
519 * TODO: remove this piece of code in a couple of years (today it is
520 * 16.05.2011).
521 */
522 next = list_entry(bud->list.next, struct ubifs_bud, list);
523 if (!list_is_last(&next->list, &jh->buds_list))
524 return 0;
525
526 err = ubi_read(c->ubi, next->lnum, (char *)&data,
527 next->start, 4);
528 if (err)
529 return 0;
530
531 return data == 0xFFFFFFFF;
532}
533
534/**
485 * replay_bud - replay a bud logical eraseblock. 535 * replay_bud - replay a bud logical eraseblock.
486 * @c: UBIFS file-system description object 536 * @c: UBIFS file-system description object
487 * @lnum: bud logical eraseblock number to replay 537 * @b: bud entry which describes the bud
488 * @offs: bud start offset
489 * @jhead: journal head to which this bud belongs
490 * @free: amount of free space in the bud is returned here
491 * @dirty: amount of dirty space from padding and deletion nodes is returned
492 * here
493 * 538 *
494 * This function returns zero in case of success and a negative error code in 539 * This function replays bud @bud, recovers it if needed, and adds all nodes
495 * case of failure. 540 * from this bud to the replay list. Returns zero in case of success and a
541 * negative error code in case of failure.
496 */ 542 */
497static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 543static int replay_bud(struct ubifs_info *c, struct bud_entry *b)
498 int *free, int *dirty)
499{ 544{
500 int err = 0, used = 0; 545 int is_last = is_last_bud(c, b->bud);
546 int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start;
501 struct ubifs_scan_leb *sleb; 547 struct ubifs_scan_leb *sleb;
502 struct ubifs_scan_node *snod; 548 struct ubifs_scan_node *snod;
503 struct ubifs_bud *bud;
504 549
505 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 550 dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d",
506 if (c->need_recovery) 551 lnum, b->bud->jhead, offs, is_last);
507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 552
553 if (c->need_recovery && is_last)
554 /*
555 * Recover only last LEBs in the journal heads, because power
556 * cuts may cause corruptions only in these LEBs, because only
557 * these LEBs could possibly be written to at the power cut
558 * time.
559 */
560 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, b->bud->jhead);
508 else 561 else
509 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); 562 sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0);
510 if (IS_ERR(sleb)) 563 if (IS_ERR(sleb))
@@ -620,20 +673,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
620 goto out; 673 goto out;
621 } 674 }
622 675
623 bud = ubifs_search_bud(c, lnum); 676 ubifs_assert(ubifs_search_bud(c, lnum));
624 if (!bud)
625 BUG();
626
627 ubifs_assert(sleb->endpt - offs >= used); 677 ubifs_assert(sleb->endpt - offs >= used);
628 ubifs_assert(sleb->endpt % c->min_io_size == 0); 678 ubifs_assert(sleb->endpt % c->min_io_size == 0);
629 679
630 if (sleb->endpt + c->min_io_size <= c->leb_size && 680 b->dirty = sleb->endpt - offs - used;
631 !(c->vfs_sb->s_flags & MS_RDONLY)) 681 b->free = c->leb_size - sleb->endpt;
632 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, 682 dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free);
633 sleb->endpt, UBI_SHORTTERM);
634
635 *dirty = sleb->endpt - offs - used;
636 *free = c->leb_size - sleb->endpt;
637 683
638out: 684out:
639 ubifs_scan_destroy(sleb); 685 ubifs_scan_destroy(sleb);
@@ -647,55 +693,6 @@ out_dump:
647} 693}
648 694
649/** 695/**
650 * insert_ref_node - insert a reference node to the replay tree.
651 * @c: UBIFS file-system description object
652 * @lnum: node logical eraseblock number
653 * @offs: node offset
654 * @sqnum: sequence number
655 * @free: amount of free space in bud
656 * @dirty: amount of dirty space from padding and deletion nodes
657 *
658 * This function inserts a reference node to the replay tree and returns zero
659 * in case of success or a negative error code in case of failure.
660 */
661static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
662 unsigned long long sqnum, int free, int dirty)
663{
664 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
665 struct replay_entry *r;
666
667 dbg_mnt("add ref LEB %d:%d", lnum, offs);
668 while (*p) {
669 parent = *p;
670 r = rb_entry(parent, struct replay_entry, rb);
671 if (sqnum < r->sqnum) {
672 p = &(*p)->rb_left;
673 continue;
674 } else if (sqnum > r->sqnum) {
675 p = &(*p)->rb_right;
676 continue;
677 }
678 ubifs_err("duplicate sqnum in replay tree");
679 return -EINVAL;
680 }
681
682 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
683 if (!r)
684 return -ENOMEM;
685
686 r->lnum = lnum;
687 r->offs = offs;
688 r->sqnum = sqnum;
689 r->flags = REPLAY_REF;
690 r->free = free;
691 r->dirty = dirty;
692
693 rb_link_node(&r->rb, parent, p);
694 rb_insert_color(&r->rb, &c->replay_tree);
695 return 0;
696}
697
698/**
699 * replay_buds - replay all buds. 696 * replay_buds - replay all buds.
700 * @c: UBIFS file-system description object 697 * @c: UBIFS file-system description object
701 * 698 *
@@ -705,17 +702,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
705static int replay_buds(struct ubifs_info *c) 702static int replay_buds(struct ubifs_info *c)
706{ 703{
707 struct bud_entry *b; 704 struct bud_entry *b;
708 int err, uninitialized_var(free), uninitialized_var(dirty); 705 int err;
706 unsigned long long prev_sqnum = 0;
709 707
710 list_for_each_entry(b, &c->replay_buds, list) { 708 list_for_each_entry(b, &c->replay_buds, list) {
711 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 709 err = replay_bud(c, b);
712 &free, &dirty);
713 if (err)
714 return err;
715 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
716 free, dirty);
717 if (err) 710 if (err)
718 return err; 711 return err;
712
713 ubifs_assert(b->sqnum > prev_sqnum);
714 prev_sqnum = b->sqnum;
719 } 715 }
720 716
721 return 0; 717 return 0;
@@ -840,6 +836,11 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
840 if (IS_ERR(sleb)) { 836 if (IS_ERR(sleb)) {
841 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) 837 if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery)
842 return PTR_ERR(sleb); 838 return PTR_ERR(sleb);
839 /*
840 * Note, the below function will recover this log LEB only if
841 * it is the last, because unclean reboots can possibly corrupt
842 * only the tail of the log.
843 */
843 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 844 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
844 if (IS_ERR(sleb)) 845 if (IS_ERR(sleb))
845 return PTR_ERR(sleb); 846 return PTR_ERR(sleb);
@@ -851,7 +852,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
851 } 852 }
852 853
853 node = sleb->buf; 854 node = sleb->buf;
854
855 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 855 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
856 if (c->cs_sqnum == 0) { 856 if (c->cs_sqnum == 0) {
857 /* 857 /*
@@ -898,7 +898,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
898 } 898 }
899 899
900 list_for_each_entry(snod, &sleb->nodes, list) { 900 list_for_each_entry(snod, &sleb->nodes, list) {
901
902 cond_resched(); 901 cond_resched();
903 902
904 if (snod->sqnum >= SQNUM_WATERMARK) { 903 if (snod->sqnum >= SQNUM_WATERMARK) {
@@ -1011,7 +1010,6 @@ out:
1011int ubifs_replay_journal(struct ubifs_info *c) 1010int ubifs_replay_journal(struct ubifs_info *c)
1012{ 1011{
1013 int err, i, lnum, offs, free; 1012 int err, i, lnum, offs, free;
1014 void *sbuf = NULL;
1015 1013
1016 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1014 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1017 1015
@@ -1026,14 +1024,8 @@ int ubifs_replay_journal(struct ubifs_info *c)
1026 return -EINVAL; 1024 return -EINVAL;
1027 } 1025 }
1028 1026
1029 sbuf = vmalloc(c->leb_size);
1030 if (!sbuf)
1031 return -ENOMEM;
1032
1033 dbg_mnt("start replaying the journal"); 1027 dbg_mnt("start replaying the journal");
1034
1035 c->replaying = 1; 1028 c->replaying = 1;
1036
1037 lnum = c->ltail_lnum = c->lhead_lnum; 1029 lnum = c->ltail_lnum = c->lhead_lnum;
1038 offs = c->lhead_offs; 1030 offs = c->lhead_offs;
1039 1031
@@ -1046,7 +1038,7 @@ int ubifs_replay_journal(struct ubifs_info *c)
1046 lnum = UBIFS_LOG_LNUM; 1038 lnum = UBIFS_LOG_LNUM;
1047 offs = 0; 1039 offs = 0;
1048 } 1040 }
1049 err = replay_log_leb(c, lnum, offs, sbuf); 1041 err = replay_log_leb(c, lnum, offs, c->sbuf);
1050 if (err == 1) 1042 if (err == 1)
1051 /* We hit the end of the log */ 1043 /* We hit the end of the log */
1052 break; 1044 break;
@@ -1059,27 +1051,30 @@ int ubifs_replay_journal(struct ubifs_info *c)
1059 if (err) 1051 if (err)
1060 goto out; 1052 goto out;
1061 1053
1062 err = apply_replay_tree(c); 1054 err = apply_replay_list(c);
1055 if (err)
1056 goto out;
1057
1058 err = set_buds_lprops(c);
1063 if (err) 1059 if (err)
1064 goto out; 1060 goto out;
1065 1061
1066 /* 1062 /*
1067 * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable 1063 * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable
1068 * to roughly estimate index growth. Things like @c->min_idx_lebs 1064 * to roughly estimate index growth. Things like @c->bi.min_idx_lebs
1069 * depend on it. This means we have to initialize it to make sure 1065 * depend on it. This means we have to initialize it to make sure
1070 * budgeting works properly. 1066 * budgeting works properly.
1071 */ 1067 */
1072 c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); 1068 c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt);
1073 c->budg_uncommitted_idx *= c->max_idx_node_sz; 1069 c->bi.uncommitted_idx *= c->max_idx_node_sz;
1074 1070
1075 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1071 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1076 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1072 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1077 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1073 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1078 (unsigned long)c->highest_inum); 1074 (unsigned long)c->highest_inum);
1079out: 1075out:
1080 destroy_replay_tree(c); 1076 destroy_replay_list(c);
1081 destroy_bud_list(c); 1077 destroy_bud_list(c);
1082 vfree(sbuf);
1083 c->replaying = 0; 1078 c->replaying = 0;
1084 return err; 1079 return err;
1085} 1080}
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index 96cb62c8a9dd..c606f010e8df 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -475,7 +475,8 @@ failed:
475 * @c: UBIFS file-system description object 475 * @c: UBIFS file-system description object
476 * 476 *
477 * This function returns a pointer to the superblock node or a negative error 477 * This function returns a pointer to the superblock node or a negative error
478 * code. 478 * code. Note, the user of this function is responsible of kfree()'ing the
479 * returned superblock buffer.
479 */ 480 */
480struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) 481struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
481{ 482{
@@ -542,11 +543,8 @@ int ubifs_read_superblock(struct ubifs_info *c)
542 * due to the unavailability of time-travelling equipment. 543 * due to the unavailability of time-travelling equipment.
543 */ 544 */
544 if (c->fmt_version > UBIFS_FORMAT_VERSION) { 545 if (c->fmt_version > UBIFS_FORMAT_VERSION) {
545 struct super_block *sb = c->vfs_sb; 546 ubifs_assert(!c->ro_media || c->ro_mount);
546 int mounting_ro = sb->s_flags & MS_RDONLY; 547 if (!c->ro_mount ||
547
548 ubifs_assert(!c->ro_media || mounting_ro);
549 if (!mounting_ro ||
550 c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { 548 c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) {
551 ubifs_err("on-flash format version is w%d/r%d, but " 549 ubifs_err("on-flash format version is w%d/r%d, but "
552 "software only supports up to version " 550 "software only supports up to version "
@@ -619,12 +617,13 @@ int ubifs_read_superblock(struct ubifs_info *c)
619 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); 617 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
620 memcpy(&c->uuid, &sup->uuid, 16); 618 memcpy(&c->uuid, &sup->uuid, 16);
621 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); 619 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
620 c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP);
622 621
623 /* Automatically increase file system size to the maximum size */ 622 /* Automatically increase file system size to the maximum size */
624 c->old_leb_cnt = c->leb_cnt; 623 c->old_leb_cnt = c->leb_cnt;
625 if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { 624 if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
626 c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); 625 c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
627 if (c->vfs_sb->s_flags & MS_RDONLY) 626 if (c->ro_mount)
628 dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", 627 dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
629 c->old_leb_cnt, c->leb_cnt); 628 c->old_leb_cnt, c->leb_cnt);
630 else { 629 else {
@@ -653,3 +652,152 @@ out:
653 kfree(sup); 652 kfree(sup);
654 return err; 653 return err;
655} 654}
655
656/**
657 * fixup_leb - fixup/unmap an LEB containing free space.
658 * @c: UBIFS file-system description object
659 * @lnum: the LEB number to fix up
660 * @len: number of used bytes in LEB (starting at offset 0)
661 *
662 * This function reads the contents of the given LEB number @lnum, then fixes
663 * it up, so that empty min. I/O units in the end of LEB are actually erased on
664 * flash (rather than being just all-0xff real data). If the LEB is completely
665 * empty, it is simply unmapped.
666 */
667static int fixup_leb(struct ubifs_info *c, int lnum, int len)
668{
669 int err;
670
671 ubifs_assert(len >= 0);
672 ubifs_assert(len % c->min_io_size == 0);
673 ubifs_assert(len < c->leb_size);
674
675 if (len == 0) {
676 dbg_mnt("unmap empty LEB %d", lnum);
677 return ubi_leb_unmap(c->ubi, lnum);
678 }
679
680 dbg_mnt("fixup LEB %d, data len %d", lnum, len);
681 err = ubi_read(c->ubi, lnum, c->sbuf, 0, len);
682 if (err)
683 return err;
684
685 return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
686}
687
688/**
689 * fixup_free_space - find & remap all LEBs containing free space.
690 * @c: UBIFS file-system description object
691 *
692 * This function walks through all LEBs in the filesystem and fiexes up those
693 * containing free/empty space.
694 */
695static int fixup_free_space(struct ubifs_info *c)
696{
697 int lnum, err = 0;
698 struct ubifs_lprops *lprops;
699
700 ubifs_get_lprops(c);
701
702 /* Fixup LEBs in the master area */
703 for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) {
704 err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz);
705 if (err)
706 goto out;
707 }
708
709 /* Unmap unused log LEBs */
710 lnum = ubifs_next_log_lnum(c, c->lhead_lnum);
711 while (lnum != c->ltail_lnum) {
712 err = fixup_leb(c, lnum, 0);
713 if (err)
714 goto out;
715 lnum = ubifs_next_log_lnum(c, lnum);
716 }
717
718 /* Fixup the current log head */
719 err = fixup_leb(c, c->lhead_lnum, c->lhead_offs);
720 if (err)
721 goto out;
722
723 /* Fixup LEBs in the LPT area */
724 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
725 int free = c->ltab[lnum - c->lpt_first].free;
726
727 if (free > 0) {
728 err = fixup_leb(c, lnum, c->leb_size - free);
729 if (err)
730 goto out;
731 }
732 }
733
734 /* Unmap LEBs in the orphans area */
735 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
736 err = fixup_leb(c, lnum, 0);
737 if (err)
738 goto out;
739 }
740
741 /* Fixup LEBs in the main area */
742 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
743 lprops = ubifs_lpt_lookup(c, lnum);
744 if (IS_ERR(lprops)) {
745 err = PTR_ERR(lprops);
746 goto out;
747 }
748
749 if (lprops->free > 0) {
750 err = fixup_leb(c, lnum, c->leb_size - lprops->free);
751 if (err)
752 goto out;
753 }
754 }
755
756out:
757 ubifs_release_lprops(c);
758 return err;
759}
760
761/**
762 * ubifs_fixup_free_space - find & fix all LEBs with free space.
763 * @c: UBIFS file-system description object
764 *
765 * This function fixes up LEBs containing free space on first mount, if the
766 * appropriate flag was set when the FS was created. Each LEB with one or more
767 * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure
768 * the free space is actually erased. E.g., this is necessary for some NAND
769 * chips, since the free space may have been programmed like real "0xff" data
770 * (generating a non-0xff ECC), causing future writes to the not-really-erased
771 * NAND pages to behave badly. After the space is fixed up, the superblock flag
772 * is cleared, so that this is skipped for all future mounts.
773 */
774int ubifs_fixup_free_space(struct ubifs_info *c)
775{
776 int err;
777 struct ubifs_sb_node *sup;
778
779 ubifs_assert(c->space_fixup);
780 ubifs_assert(!c->ro_mount);
781
782 ubifs_msg("start fixing up free space");
783
784 err = fixup_free_space(c);
785 if (err)
786 return err;
787
788 sup = ubifs_read_sb_node(c);
789 if (IS_ERR(sup))
790 return PTR_ERR(sup);
791
792 /* Free-space fixup is no longer required */
793 c->space_fixup = 0;
794 sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP);
795
796 err = ubifs_write_sb_node(c, sup);
797 kfree(sup);
798 if (err)
799 return err;
800
801 ubifs_msg("free space fixup complete");
802 return err;
803}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 96c525384191..36216b46f772 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -197,7 +197,7 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
197 struct ubifs_ino_node *ino = buf; 197 struct ubifs_ino_node *ino = buf;
198 struct ubifs_scan_node *snod; 198 struct ubifs_scan_node *snod;
199 199
200 snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); 200 snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
201 if (!snod) 201 if (!snod)
202 return -ENOMEM; 202 return -ENOMEM;
203 203
@@ -212,13 +212,15 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
212 case UBIFS_DENT_NODE: 212 case UBIFS_DENT_NODE:
213 case UBIFS_XENT_NODE: 213 case UBIFS_XENT_NODE:
214 case UBIFS_DATA_NODE: 214 case UBIFS_DATA_NODE:
215 case UBIFS_TRUN_NODE:
216 /* 215 /*
217 * The key is in the same place in all keyed 216 * The key is in the same place in all keyed
218 * nodes. 217 * nodes.
219 */ 218 */
220 key_read(c, &ino->key, &snod->key); 219 key_read(c, &ino->key, &snod->key);
221 break; 220 break;
221 default:
222 invalid_key_init(c, &snod->key);
223 break;
222 } 224 }
223 list_add_tail(&snod->list, &sleb->nodes); 225 list_add_tail(&snod->list, &sleb->nodes);
224 sleb->nodes_cnt += 1; 226 sleb->nodes_cnt += 1;
@@ -326,7 +328,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
326 if (!quiet) 328 if (!quiet)
327 ubifs_err("empty space starts at non-aligned offset %d", 329 ubifs_err("empty space starts at non-aligned offset %d",
328 offs); 330 offs);
329 goto corrupted;; 331 goto corrupted;
330 } 332 }
331 333
332 ubifs_end_scan(c, sleb, lnum, offs); 334 ubifs_end_scan(c, sleb, lnum, offs);
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 0b201114a5ad..9e1d05666fed 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -250,7 +250,7 @@ static int kick_a_thread(void)
250 dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); 250 dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
251 251
252 if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || 252 if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
253 c->ro_media) { 253 c->ro_mount || c->ro_error) {
254 mutex_unlock(&c->umount_mutex); 254 mutex_unlock(&c->umount_mutex);
255 continue; 255 continue;
256 } 256 }
@@ -277,13 +277,18 @@ static int kick_a_thread(void)
277 return 0; 277 return 0;
278} 278}
279 279
280int ubifs_shrinker(struct shrinker *shrink, int nr, gfp_t gfp_mask) 280int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
281{ 281{
282 int nr = sc->nr_to_scan;
282 int freed, contention = 0; 283 int freed, contention = 0;
283 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt); 284 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
284 285
285 if (nr == 0) 286 if (nr == 0)
286 return clean_zn_cnt; 287 /*
288 * Due to the way UBIFS updates the clean znode counter it may
289 * temporarily be negative.
290 */
291 return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
287 292
288 if (!clean_zn_cnt) { 293 if (!clean_zn_cnt) {
289 /* 294 /*
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index cd5900b85d38..529be0582029 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -272,12 +272,20 @@ static struct inode *ubifs_alloc_inode(struct super_block *sb)
272 return &ui->vfs_inode; 272 return &ui->vfs_inode;
273}; 273};
274 274
275static void ubifs_i_callback(struct rcu_head *head)
276{
277 struct inode *inode = container_of(head, struct inode, i_rcu);
278 struct ubifs_inode *ui = ubifs_inode(inode);
279 INIT_LIST_HEAD(&inode->i_dentry);
280 kmem_cache_free(ubifs_inode_slab, ui);
281}
282
275static void ubifs_destroy_inode(struct inode *inode) 283static void ubifs_destroy_inode(struct inode *inode)
276{ 284{
277 struct ubifs_inode *ui = ubifs_inode(inode); 285 struct ubifs_inode *ui = ubifs_inode(inode);
278 286
279 kfree(ui->data); 287 kfree(ui->data);
280 kmem_cache_free(ubifs_inode_slab, inode); 288 call_rcu(&inode->i_rcu, ubifs_i_callback);
281} 289}
282 290
283/* 291/*
@@ -367,14 +375,14 @@ out:
367 ubifs_release_dirty_inode_budget(c, ui); 375 ubifs_release_dirty_inode_budget(c, ui);
368 else { 376 else {
369 /* We've deleted something - clean the "no space" flags */ 377 /* We've deleted something - clean the "no space" flags */
370 c->nospace = c->nospace_rp = 0; 378 c->bi.nospace = c->bi.nospace_rp = 0;
371 smp_wmb(); 379 smp_wmb();
372 } 380 }
373done: 381done:
374 end_writeback(inode); 382 end_writeback(inode);
375} 383}
376 384
377static void ubifs_dirty_inode(struct inode *inode) 385static void ubifs_dirty_inode(struct inode *inode, int flags)
378{ 386{
379 struct ubifs_inode *ui = ubifs_inode(inode); 387 struct ubifs_inode *ui = ubifs_inode(inode);
380 388
@@ -504,9 +512,12 @@ static int init_constants_early(struct ubifs_info *c)
504 512
505 c->leb_cnt = c->vi.size; 513 c->leb_cnt = c->vi.size;
506 c->leb_size = c->vi.usable_leb_size; 514 c->leb_size = c->vi.usable_leb_size;
515 c->leb_start = c->di.leb_start;
507 c->half_leb_size = c->leb_size / 2; 516 c->half_leb_size = c->leb_size / 2;
508 c->min_io_size = c->di.min_io_size; 517 c->min_io_size = c->di.min_io_size;
509 c->min_io_shift = fls(c->min_io_size) - 1; 518 c->min_io_shift = fls(c->min_io_size) - 1;
519 c->max_write_size = c->di.max_write_size;
520 c->max_write_shift = fls(c->max_write_size) - 1;
510 521
511 if (c->leb_size < UBIFS_MIN_LEB_SZ) { 522 if (c->leb_size < UBIFS_MIN_LEB_SZ) {
512 ubifs_err("too small LEBs (%d bytes), min. is %d bytes", 523 ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
@@ -526,6 +537,18 @@ static int init_constants_early(struct ubifs_info *c)
526 } 537 }
527 538
528 /* 539 /*
540 * Maximum write size has to be greater or equivalent to min. I/O
541 * size, and be multiple of min. I/O size.
542 */
543 if (c->max_write_size < c->min_io_size ||
544 c->max_write_size % c->min_io_size ||
545 !is_power_of_2(c->max_write_size)) {
546 ubifs_err("bad write buffer size %d for %d min. I/O unit",
547 c->max_write_size, c->min_io_size);
548 return -EINVAL;
549 }
550
551 /*
529 * UBIFS aligns all node to 8-byte boundary, so to make function in 552 * UBIFS aligns all node to 8-byte boundary, so to make function in
530 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is 553 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
531 * less than 8. 554 * less than 8.
@@ -533,6 +556,10 @@ static int init_constants_early(struct ubifs_info *c)
533 if (c->min_io_size < 8) { 556 if (c->min_io_size < 8) {
534 c->min_io_size = 8; 557 c->min_io_size = 8;
535 c->min_io_shift = 3; 558 c->min_io_shift = 3;
559 if (c->max_write_size < c->min_io_size) {
560 c->max_write_size = c->min_io_size;
561 c->max_write_shift = c->min_io_shift;
562 }
536 } 563 }
537 564
538 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); 565 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
@@ -667,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c)
667 * be compressed and direntries are of the maximum size. 694 * be compressed and direntries are of the maximum size.
668 * 695 *
669 * Note, data, which may be stored in inodes is budgeted separately, so 696 * Note, data, which may be stored in inodes is budgeted separately, so
670 * it is not included into 'c->inode_budget'. 697 * it is not included into 'c->bi.inode_budget'.
671 */ 698 */
672 c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; 699 c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
673 c->inode_budget = UBIFS_INO_NODE_SZ; 700 c->bi.inode_budget = UBIFS_INO_NODE_SZ;
674 c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; 701 c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ;
675 702
676 /* 703 /*
677 * When the amount of flash space used by buds becomes 704 * When the amount of flash space used by buds becomes
@@ -715,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c)
715{ 742{
716 long long tmp64; 743 long long tmp64;
717 744
718 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 745 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
719 c->report_rp_size = ubifs_reported_space(c, c->rp_size); 746 c->report_rp_size = ubifs_reported_space(c, c->rp_size);
720 747
721 /* 748 /*
@@ -784,15 +811,18 @@ static int alloc_wbufs(struct ubifs_info *c)
784 811
785 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback; 812 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
786 c->jheads[i].wbuf.jhead = i; 813 c->jheads[i].wbuf.jhead = i;
814 c->jheads[i].grouped = 1;
787 } 815 }
788 816
789 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM; 817 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
790 /* 818 /*
791 * Garbage Collector head likely contains long-term data and 819 * Garbage Collector head likely contains long-term data and
792 * does not need to be synchronized by timer. 820 * does not need to be synchronized by timer. Also GC head nodes are
821 * not grouped.
793 */ 822 */
794 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM; 823 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
795 c->jheads[GCHD].wbuf.no_timer = 1; 824 c->jheads[GCHD].wbuf.no_timer = 1;
825 c->jheads[GCHD].grouped = 0;
796 826
797 return 0; 827 return 0;
798} 828}
@@ -1117,8 +1147,8 @@ static int check_free_space(struct ubifs_info *c)
1117{ 1147{
1118 ubifs_assert(c->dark_wm > 0); 1148 ubifs_assert(c->dark_wm > 0);
1119 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { 1149 if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) {
1120 ubifs_err("insufficient free space to mount in read/write mode"); 1150 ubifs_err("insufficient free space to mount in R/W mode");
1121 dbg_dump_budg(c); 1151 dbg_dump_budg(c, &c->bi);
1122 dbg_dump_lprops(c); 1152 dbg_dump_lprops(c);
1123 return -ENOSPC; 1153 return -ENOSPC;
1124 } 1154 }
@@ -1137,11 +1167,11 @@ static int check_free_space(struct ubifs_info *c)
1137 */ 1167 */
1138static int mount_ubifs(struct ubifs_info *c) 1168static int mount_ubifs(struct ubifs_info *c)
1139{ 1169{
1140 struct super_block *sb = c->vfs_sb; 1170 int err;
1141 int err, mounted_read_only = (sb->s_flags & MS_RDONLY);
1142 long long x; 1171 long long x;
1143 size_t sz; 1172 size_t sz;
1144 1173
1174 c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY);
1145 err = init_constants_early(c); 1175 err = init_constants_early(c);
1146 if (err) 1176 if (err)
1147 return err; 1177 return err;
@@ -1154,7 +1184,7 @@ static int mount_ubifs(struct ubifs_info *c)
1154 if (err) 1184 if (err)
1155 goto out_free; 1185 goto out_free;
1156 1186
1157 if (c->empty && (mounted_read_only || c->ro_media)) { 1187 if (c->empty && (c->ro_mount || c->ro_media)) {
1158 /* 1188 /*
1159 * This UBI volume is empty, and read-only, or the file system 1189 * This UBI volume is empty, and read-only, or the file system
1160 * is mounted read-only - we cannot format it. 1190 * is mounted read-only - we cannot format it.
@@ -1165,7 +1195,7 @@ static int mount_ubifs(struct ubifs_info *c)
1165 goto out_free; 1195 goto out_free;
1166 } 1196 }
1167 1197
1168 if (c->ro_media && !mounted_read_only) { 1198 if (c->ro_media && !c->ro_mount) {
1169 ubifs_err("cannot mount read-write - read-only media"); 1199 ubifs_err("cannot mount read-write - read-only media");
1170 err = -EROFS; 1200 err = -EROFS;
1171 goto out_free; 1201 goto out_free;
@@ -1185,7 +1215,7 @@ static int mount_ubifs(struct ubifs_info *c)
1185 if (!c->sbuf) 1215 if (!c->sbuf)
1186 goto out_free; 1216 goto out_free;
1187 1217
1188 if (!mounted_read_only) { 1218 if (!c->ro_mount) {
1189 c->ileb_buf = vmalloc(c->leb_size); 1219 c->ileb_buf = vmalloc(c->leb_size);
1190 if (!c->ileb_buf) 1220 if (!c->ileb_buf)
1191 goto out_free; 1221 goto out_free;
@@ -1194,11 +1224,14 @@ static int mount_ubifs(struct ubifs_info *c)
1194 if (c->bulk_read == 1) 1224 if (c->bulk_read == 1)
1195 bu_init(c); 1225 bu_init(c);
1196 1226
1197 /* 1227 if (!c->ro_mount) {
1198 * We have to check all CRCs, even for data nodes, when we mount the FS 1228 c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ,
1199 * (specifically, when we are replaying). 1229 GFP_KERNEL);
1200 */ 1230 if (!c->write_reserve_buf)
1201 c->always_chk_crc = 1; 1231 goto out_free;
1232 }
1233
1234 c->mounting = 1;
1202 1235
1203 err = ubifs_read_superblock(c); 1236 err = ubifs_read_superblock(c);
1204 if (err) 1237 if (err)
@@ -1227,12 +1260,12 @@ static int mount_ubifs(struct ubifs_info *c)
1227 goto out_free; 1260 goto out_free;
1228 } 1261 }
1229 1262
1230 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); 1263 err = alloc_wbufs(c);
1231 if (!mounted_read_only) { 1264 if (err)
1232 err = alloc_wbufs(c); 1265 goto out_cbuf;
1233 if (err)
1234 goto out_cbuf;
1235 1266
1267 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id);
1268 if (!c->ro_mount) {
1236 /* Create background thread */ 1269 /* Create background thread */
1237 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); 1270 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
1238 if (IS_ERR(c->bgt)) { 1271 if (IS_ERR(c->bgt)) {
@@ -1254,12 +1287,25 @@ static int mount_ubifs(struct ubifs_info *c)
1254 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { 1287 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1255 ubifs_msg("recovery needed"); 1288 ubifs_msg("recovery needed");
1256 c->need_recovery = 1; 1289 c->need_recovery = 1;
1257 if (!mounted_read_only) { 1290 }
1258 err = ubifs_recover_inl_heads(c, c->sbuf); 1291
1259 if (err) 1292 if (c->need_recovery && !c->ro_mount) {
1260 goto out_master; 1293 err = ubifs_recover_inl_heads(c, c->sbuf);
1261 } 1294 if (err)
1262 } else if (!mounted_read_only) { 1295 goto out_master;
1296 }
1297
1298 err = ubifs_lpt_init(c, 1, !c->ro_mount);
1299 if (err)
1300 goto out_master;
1301
1302 if (!c->ro_mount && c->space_fixup) {
1303 err = ubifs_fixup_free_space(c);
1304 if (err)
1305 goto out_master;
1306 }
1307
1308 if (!c->ro_mount) {
1263 /* 1309 /*
1264 * Set the "dirty" flag so that if we reboot uncleanly we 1310 * Set the "dirty" flag so that if we reboot uncleanly we
1265 * will notice this immediately on the next mount. 1311 * will notice this immediately on the next mount.
@@ -1267,14 +1313,10 @@ static int mount_ubifs(struct ubifs_info *c)
1267 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY); 1313 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
1268 err = ubifs_write_master(c); 1314 err = ubifs_write_master(c);
1269 if (err) 1315 if (err)
1270 goto out_master; 1316 goto out_lpt;
1271 } 1317 }
1272 1318
1273 err = ubifs_lpt_init(c, 1, !mounted_read_only); 1319 err = dbg_check_idx_size(c, c->bi.old_idx_sz);
1274 if (err)
1275 goto out_lpt;
1276
1277 err = dbg_check_idx_size(c, c->old_idx_sz);
1278 if (err) 1320 if (err)
1279 goto out_lpt; 1321 goto out_lpt;
1280 1322
@@ -1283,13 +1325,13 @@ static int mount_ubifs(struct ubifs_info *c)
1283 goto out_journal; 1325 goto out_journal;
1284 1326
1285 /* Calculate 'min_idx_lebs' after journal replay */ 1327 /* Calculate 'min_idx_lebs' after journal replay */
1286 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 1328 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
1287 1329
1288 err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); 1330 err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount);
1289 if (err) 1331 if (err)
1290 goto out_orphans; 1332 goto out_orphans;
1291 1333
1292 if (!mounted_read_only) { 1334 if (!c->ro_mount) {
1293 int lnum; 1335 int lnum;
1294 1336
1295 err = check_free_space(c); 1337 err = check_free_space(c);
@@ -1351,7 +1393,7 @@ static int mount_ubifs(struct ubifs_info *c)
1351 spin_unlock(&ubifs_infos_lock); 1393 spin_unlock(&ubifs_infos_lock);
1352 1394
1353 if (c->need_recovery) { 1395 if (c->need_recovery) {
1354 if (mounted_read_only) 1396 if (c->ro_mount)
1355 ubifs_msg("recovery deferred"); 1397 ubifs_msg("recovery deferred");
1356 else { 1398 else {
1357 c->need_recovery = 0; 1399 c->need_recovery = 0;
@@ -1374,11 +1416,11 @@ static int mount_ubifs(struct ubifs_info *c)
1374 if (err) 1416 if (err)
1375 goto out_infos; 1417 goto out_infos;
1376 1418
1377 c->always_chk_crc = 0; 1419 c->mounting = 0;
1378 1420
1379 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", 1421 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
1380 c->vi.ubi_num, c->vi.vol_id, c->vi.name); 1422 c->vi.ubi_num, c->vi.vol_id, c->vi.name);
1381 if (mounted_read_only) 1423 if (c->ro_mount)
1382 ubifs_msg("mounted read-only"); 1424 ubifs_msg("mounted read-only");
1383 x = (long long)c->main_lebs * c->leb_size; 1425 x = (long long)c->main_lebs * c->leb_size;
1384 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " 1426 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d "
@@ -1395,6 +1437,7 @@ static int mount_ubifs(struct ubifs_info *c)
1395 1437
1396 dbg_msg("compiled on: " __DATE__ " at " __TIME__); 1438 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
1397 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); 1439 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
1440 dbg_msg("max. write size: %d bytes", c->max_write_size);
1398 dbg_msg("LEB size: %d bytes (%d KiB)", 1441 dbg_msg("LEB size: %d bytes (%d KiB)",
1399 c->leb_size, c->leb_size >> 10); 1442 c->leb_size, c->leb_size >> 10);
1400 dbg_msg("data journal heads: %d", 1443 dbg_msg("data journal heads: %d",
@@ -1411,7 +1454,8 @@ static int mount_ubifs(struct ubifs_info *c)
1411 c->main_lebs, c->main_first, c->leb_cnt - 1); 1454 c->main_lebs, c->main_first, c->leb_cnt - 1);
1412 dbg_msg("index LEBs: %d", c->lst.idx_lebs); 1455 dbg_msg("index LEBs: %d", c->lst.idx_lebs);
1413 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", 1456 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
1414 c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); 1457 c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
1458 c->bi.old_idx_sz >> 20);
1415 dbg_msg("key hash type: %d", c->key_hash_type); 1459 dbg_msg("key hash type: %d", c->key_hash_type);
1416 dbg_msg("tree fanout: %d", c->fanout); 1460 dbg_msg("tree fanout: %d", c->fanout);
1417 dbg_msg("reserved GC LEB: %d", c->gc_lnum); 1461 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
@@ -1424,9 +1468,9 @@ static int mount_ubifs(struct ubifs_info *c)
1424 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); 1468 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
1425 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", 1469 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
1426 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); 1470 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1427 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", 1471 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
1428 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1472 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1429 UBIFS_MAX_DENT_NODE_SZ); 1473 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
1430 dbg_msg("dead watermark: %d", c->dead_wm); 1474 dbg_msg("dead watermark: %d", c->dead_wm);
1431 dbg_msg("dark watermark: %d", c->dark_wm); 1475 dbg_msg("dark watermark: %d", c->dark_wm);
1432 dbg_msg("LEB overhead: %d", c->leb_overhead); 1476 dbg_msg("LEB overhead: %d", c->leb_overhead);
@@ -1466,6 +1510,7 @@ out_wbufs:
1466out_cbuf: 1510out_cbuf:
1467 kfree(c->cbuf); 1511 kfree(c->cbuf);
1468out_free: 1512out_free:
1513 kfree(c->write_reserve_buf);
1469 kfree(c->bu.buf); 1514 kfree(c->bu.buf);
1470 vfree(c->ileb_buf); 1515 vfree(c->ileb_buf);
1471 vfree(c->sbuf); 1516 vfree(c->sbuf);
@@ -1504,6 +1549,7 @@ static void ubifs_umount(struct ubifs_info *c)
1504 kfree(c->cbuf); 1549 kfree(c->cbuf);
1505 kfree(c->rcvrd_mst_node); 1550 kfree(c->rcvrd_mst_node);
1506 kfree(c->mst_node); 1551 kfree(c->mst_node);
1552 kfree(c->write_reserve_buf);
1507 kfree(c->bu.buf); 1553 kfree(c->bu.buf);
1508 vfree(c->ileb_buf); 1554 vfree(c->ileb_buf);
1509 vfree(c->sbuf); 1555 vfree(c->sbuf);
@@ -1535,7 +1581,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1535 mutex_lock(&c->umount_mutex); 1581 mutex_lock(&c->umount_mutex);
1536 dbg_save_space_info(c); 1582 dbg_save_space_info(c);
1537 c->remounting_rw = 1; 1583 c->remounting_rw = 1;
1538 c->always_chk_crc = 1; 1584 c->ro_mount = 0;
1539 1585
1540 err = check_free_space(c); 1586 err = check_free_space(c);
1541 if (err) 1587 if (err)
@@ -1551,6 +1597,7 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1551 } 1597 }
1552 sup->leb_cnt = cpu_to_le32(c->leb_cnt); 1598 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
1553 err = ubifs_write_sb_node(c, sup); 1599 err = ubifs_write_sb_node(c, sup);
1600 kfree(sup);
1554 if (err) 1601 if (err)
1555 goto out; 1602 goto out;
1556 } 1603 }
@@ -1590,16 +1637,14 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1590 goto out; 1637 goto out;
1591 } 1638 }
1592 1639
1593 err = ubifs_lpt_init(c, 0, 1); 1640 c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
1594 if (err) 1641 if (!c->write_reserve_buf)
1595 goto out; 1642 goto out;
1596 1643
1597 err = alloc_wbufs(c); 1644 err = ubifs_lpt_init(c, 0, 1);
1598 if (err) 1645 if (err)
1599 goto out; 1646 goto out;
1600 1647
1601 ubifs_create_buds_lists(c);
1602
1603 /* Create background thread */ 1648 /* Create background thread */
1604 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name); 1649 c->bgt = kthread_create(ubifs_bg_thread, c, "%s", c->bgt_name);
1605 if (IS_ERR(c->bgt)) { 1650 if (IS_ERR(c->bgt)) {
@@ -1634,20 +1679,37 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1634 if (err) 1679 if (err)
1635 goto out; 1680 goto out;
1636 1681
1682 dbg_gen("re-mounted read-write");
1683 c->remounting_rw = 0;
1684
1637 if (c->need_recovery) { 1685 if (c->need_recovery) {
1638 c->need_recovery = 0; 1686 c->need_recovery = 0;
1639 ubifs_msg("deferred recovery completed"); 1687 ubifs_msg("deferred recovery completed");
1688 } else {
1689 /*
1690 * Do not run the debugging space check if the were doing
1691 * recovery, because when we saved the information we had the
1692 * file-system in a state where the TNC and lprops has been
1693 * modified in memory, but all the I/O operations (including a
1694 * commit) were deferred. So the file-system was in
1695 * "non-committed" state. Now the file-system is in committed
1696 * state, and of course the amount of free space will change
1697 * because, for example, the old index size was imprecise.
1698 */
1699 err = dbg_check_space_info(c);
1700 }
1701
1702 if (c->space_fixup) {
1703 err = ubifs_fixup_free_space(c);
1704 if (err)
1705 goto out;
1640 } 1706 }
1641 1707
1642 dbg_gen("re-mounted read-write");
1643 c->vfs_sb->s_flags &= ~MS_RDONLY;
1644 c->remounting_rw = 0;
1645 c->always_chk_crc = 0;
1646 err = dbg_check_space_info(c);
1647 mutex_unlock(&c->umount_mutex); 1708 mutex_unlock(&c->umount_mutex);
1648 return err; 1709 return err;
1649 1710
1650out: 1711out:
1712 c->ro_mount = 1;
1651 vfree(c->orph_buf); 1713 vfree(c->orph_buf);
1652 c->orph_buf = NULL; 1714 c->orph_buf = NULL;
1653 if (c->bgt) { 1715 if (c->bgt) {
@@ -1655,11 +1717,12 @@ out:
1655 c->bgt = NULL; 1717 c->bgt = NULL;
1656 } 1718 }
1657 free_wbufs(c); 1719 free_wbufs(c);
1720 kfree(c->write_reserve_buf);
1721 c->write_reserve_buf = NULL;
1658 vfree(c->ileb_buf); 1722 vfree(c->ileb_buf);
1659 c->ileb_buf = NULL; 1723 c->ileb_buf = NULL;
1660 ubifs_lpt_free(c, 1); 1724 ubifs_lpt_free(c, 1);
1661 c->remounting_rw = 0; 1725 c->remounting_rw = 0;
1662 c->always_chk_crc = 0;
1663 mutex_unlock(&c->umount_mutex); 1726 mutex_unlock(&c->umount_mutex);
1664 return err; 1727 return err;
1665} 1728}
@@ -1676,7 +1739,7 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1676 int i, err; 1739 int i, err;
1677 1740
1678 ubifs_assert(!c->need_recovery); 1741 ubifs_assert(!c->need_recovery);
1679 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); 1742 ubifs_assert(!c->ro_mount);
1680 1743
1681 mutex_lock(&c->umount_mutex); 1744 mutex_lock(&c->umount_mutex);
1682 if (c->bgt) { 1745 if (c->bgt) {
@@ -1686,10 +1749,8 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1686 1749
1687 dbg_save_space_info(c); 1750 dbg_save_space_info(c);
1688 1751
1689 for (i = 0; i < c->jhead_cnt; i++) { 1752 for (i = 0; i < c->jhead_cnt; i++)
1690 ubifs_wbuf_sync(&c->jheads[i].wbuf); 1753 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1691 hrtimer_cancel(&c->jheads[i].wbuf.timer);
1692 }
1693 1754
1694 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); 1755 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
1695 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); 1756 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
@@ -1698,12 +1759,14 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1698 if (err) 1759 if (err)
1699 ubifs_ro_mode(c, err); 1760 ubifs_ro_mode(c, err);
1700 1761
1701 free_wbufs(c);
1702 vfree(c->orph_buf); 1762 vfree(c->orph_buf);
1703 c->orph_buf = NULL; 1763 c->orph_buf = NULL;
1764 kfree(c->write_reserve_buf);
1765 c->write_reserve_buf = NULL;
1704 vfree(c->ileb_buf); 1766 vfree(c->ileb_buf);
1705 c->ileb_buf = NULL; 1767 c->ileb_buf = NULL;
1706 ubifs_lpt_free(c, 1); 1768 ubifs_lpt_free(c, 1);
1769 c->ro_mount = 1;
1707 err = dbg_check_space_info(c); 1770 err = dbg_check_space_info(c);
1708 if (err) 1771 if (err)
1709 ubifs_ro_mode(c, err); 1772 ubifs_ro_mode(c, err);
@@ -1723,10 +1786,11 @@ static void ubifs_put_super(struct super_block *sb)
1723 * of the media. For example, there will be dirty inodes if we failed 1786 * of the media. For example, there will be dirty inodes if we failed
1724 * to write them back because of I/O errors. 1787 * to write them back because of I/O errors.
1725 */ 1788 */
1726 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); 1789 if (!c->ro_error) {
1727 ubifs_assert(c->budg_idx_growth == 0); 1790 ubifs_assert(c->bi.idx_growth == 0);
1728 ubifs_assert(c->budg_dd_growth == 0); 1791 ubifs_assert(c->bi.dd_growth == 0);
1729 ubifs_assert(c->budg_data_growth == 0); 1792 ubifs_assert(c->bi.data_growth == 0);
1793 }
1730 1794
1731 /* 1795 /*
1732 * The 'c->umount_lock' prevents races between UBIFS memory shrinker 1796 * The 'c->umount_lock' prevents races between UBIFS memory shrinker
@@ -1735,7 +1799,7 @@ static void ubifs_put_super(struct super_block *sb)
1735 * the mutex is locked. 1799 * the mutex is locked.
1736 */ 1800 */
1737 mutex_lock(&c->umount_mutex); 1801 mutex_lock(&c->umount_mutex);
1738 if (!(c->vfs_sb->s_flags & MS_RDONLY)) { 1802 if (!c->ro_mount) {
1739 /* 1803 /*
1740 * First of all kill the background thread to make sure it does 1804 * First of all kill the background thread to make sure it does
1741 * not interfere with un-mounting and freeing resources. 1805 * not interfere with un-mounting and freeing resources.
@@ -1745,23 +1809,22 @@ static void ubifs_put_super(struct super_block *sb)
1745 c->bgt = NULL; 1809 c->bgt = NULL;
1746 } 1810 }
1747 1811
1748 /* Synchronize write-buffers */
1749 if (c->jheads)
1750 for (i = 0; i < c->jhead_cnt; i++)
1751 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1752
1753 /* 1812 /*
1754 * On fatal errors c->ro_media is set to 1, in which case we do 1813 * On fatal errors c->ro_error is set to 1, in which case we do
1755 * not write the master node. 1814 * not write the master node.
1756 */ 1815 */
1757 if (!c->ro_media) { 1816 if (!c->ro_error) {
1817 int err;
1818
1819 /* Synchronize write-buffers */
1820 for (i = 0; i < c->jhead_cnt; i++)
1821 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1822
1758 /* 1823 /*
1759 * We are being cleanly unmounted which means the 1824 * We are being cleanly unmounted which means the
1760 * orphans were killed - indicate this in the master 1825 * orphans were killed - indicate this in the master
1761 * node. Also save the reserved GC LEB number. 1826 * node. Also save the reserved GC LEB number.
1762 */ 1827 */
1763 int err;
1764
1765 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); 1828 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
1766 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); 1829 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
1767 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); 1830 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
@@ -1774,6 +1837,10 @@ static void ubifs_put_super(struct super_block *sb)
1774 */ 1837 */
1775 ubifs_err("failed to write master node, " 1838 ubifs_err("failed to write master node, "
1776 "error %d", err); 1839 "error %d", err);
1840 } else {
1841 for (i = 0; i < c->jhead_cnt; i++)
1842 /* Make sure write-buffer timers are canceled */
1843 hrtimer_cancel(&c->jheads[i].wbuf.timer);
1777 } 1844 }
1778 } 1845 }
1779 1846
@@ -1781,7 +1848,6 @@ static void ubifs_put_super(struct super_block *sb)
1781 bdi_destroy(&c->bdi); 1848 bdi_destroy(&c->bdi);
1782 ubi_close_volume(c->ubi); 1849 ubi_close_volume(c->ubi);
1783 mutex_unlock(&c->umount_mutex); 1850 mutex_unlock(&c->umount_mutex);
1784 kfree(c);
1785} 1851}
1786 1852
1787static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) 1853static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
@@ -1797,17 +1863,21 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1797 return err; 1863 return err;
1798 } 1864 }
1799 1865
1800 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { 1866 if (c->ro_mount && !(*flags & MS_RDONLY)) {
1867 if (c->ro_error) {
1868 ubifs_msg("cannot re-mount R/W due to prior errors");
1869 return -EROFS;
1870 }
1801 if (c->ro_media) { 1871 if (c->ro_media) {
1802 ubifs_msg("cannot re-mount due to prior errors"); 1872 ubifs_msg("cannot re-mount R/W - UBI volume is R/O");
1803 return -EROFS; 1873 return -EROFS;
1804 } 1874 }
1805 err = ubifs_remount_rw(c); 1875 err = ubifs_remount_rw(c);
1806 if (err) 1876 if (err)
1807 return err; 1877 return err;
1808 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { 1878 } else if (!c->ro_mount && (*flags & MS_RDONLY)) {
1809 if (c->ro_media) { 1879 if (c->ro_error) {
1810 ubifs_msg("cannot re-mount due to prior errors"); 1880 ubifs_msg("cannot re-mount R/O due to prior errors");
1811 return -EROFS; 1881 return -EROFS;
1812 } 1882 }
1813 ubifs_remount_ro(c); 1883 ubifs_remount_ro(c);
@@ -1900,59 +1970,65 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
1900 return ERR_PTR(-EINVAL); 1970 return ERR_PTR(-EINVAL);
1901} 1971}
1902 1972
1903static int ubifs_fill_super(struct super_block *sb, void *data, int silent) 1973static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
1904{ 1974{
1905 struct ubi_volume_desc *ubi = sb->s_fs_info;
1906 struct ubifs_info *c; 1975 struct ubifs_info *c;
1907 struct inode *root;
1908 int err;
1909 1976
1910 c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL); 1977 c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
1911 if (!c) 1978 if (c) {
1912 return -ENOMEM; 1979 spin_lock_init(&c->cnt_lock);
1980 spin_lock_init(&c->cs_lock);
1981 spin_lock_init(&c->buds_lock);
1982 spin_lock_init(&c->space_lock);
1983 spin_lock_init(&c->orphan_lock);
1984 init_rwsem(&c->commit_sem);
1985 mutex_init(&c->lp_mutex);
1986 mutex_init(&c->tnc_mutex);
1987 mutex_init(&c->log_mutex);
1988 mutex_init(&c->mst_mutex);
1989 mutex_init(&c->umount_mutex);
1990 mutex_init(&c->bu_mutex);
1991 mutex_init(&c->write_reserve_mutex);
1992 init_waitqueue_head(&c->cmt_wq);
1993 c->buds = RB_ROOT;
1994 c->old_idx = RB_ROOT;
1995 c->size_tree = RB_ROOT;
1996 c->orph_tree = RB_ROOT;
1997 INIT_LIST_HEAD(&c->infos_list);
1998 INIT_LIST_HEAD(&c->idx_gc);
1999 INIT_LIST_HEAD(&c->replay_list);
2000 INIT_LIST_HEAD(&c->replay_buds);
2001 INIT_LIST_HEAD(&c->uncat_list);
2002 INIT_LIST_HEAD(&c->empty_list);
2003 INIT_LIST_HEAD(&c->freeable_list);
2004 INIT_LIST_HEAD(&c->frdi_idx_list);
2005 INIT_LIST_HEAD(&c->unclean_leb_list);
2006 INIT_LIST_HEAD(&c->old_buds);
2007 INIT_LIST_HEAD(&c->orph_list);
2008 INIT_LIST_HEAD(&c->orph_new);
2009 c->no_chk_data_crc = 1;
2010
2011 c->highest_inum = UBIFS_FIRST_INO;
2012 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
2013
2014 ubi_get_volume_info(ubi, &c->vi);
2015 ubi_get_device_info(c->vi.ubi_num, &c->di);
2016 }
2017 return c;
2018}
1913 2019
1914 spin_lock_init(&c->cnt_lock); 2020static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1915 spin_lock_init(&c->cs_lock); 2021{
1916 spin_lock_init(&c->buds_lock); 2022 struct ubifs_info *c = sb->s_fs_info;
1917 spin_lock_init(&c->space_lock); 2023 struct inode *root;
1918 spin_lock_init(&c->orphan_lock); 2024 int err;
1919 init_rwsem(&c->commit_sem);
1920 mutex_init(&c->lp_mutex);
1921 mutex_init(&c->tnc_mutex);
1922 mutex_init(&c->log_mutex);
1923 mutex_init(&c->mst_mutex);
1924 mutex_init(&c->umount_mutex);
1925 mutex_init(&c->bu_mutex);
1926 init_waitqueue_head(&c->cmt_wq);
1927 c->buds = RB_ROOT;
1928 c->old_idx = RB_ROOT;
1929 c->size_tree = RB_ROOT;
1930 c->orph_tree = RB_ROOT;
1931 INIT_LIST_HEAD(&c->infos_list);
1932 INIT_LIST_HEAD(&c->idx_gc);
1933 INIT_LIST_HEAD(&c->replay_list);
1934 INIT_LIST_HEAD(&c->replay_buds);
1935 INIT_LIST_HEAD(&c->uncat_list);
1936 INIT_LIST_HEAD(&c->empty_list);
1937 INIT_LIST_HEAD(&c->freeable_list);
1938 INIT_LIST_HEAD(&c->frdi_idx_list);
1939 INIT_LIST_HEAD(&c->unclean_leb_list);
1940 INIT_LIST_HEAD(&c->old_buds);
1941 INIT_LIST_HEAD(&c->orph_list);
1942 INIT_LIST_HEAD(&c->orph_new);
1943 2025
1944 c->vfs_sb = sb; 2026 c->vfs_sb = sb;
1945 c->highest_inum = UBIFS_FIRST_INO;
1946 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
1947
1948 ubi_get_volume_info(ubi, &c->vi);
1949 ubi_get_device_info(c->vi.ubi_num, &c->di);
1950
1951 /* Re-open the UBI device in read-write mode */ 2027 /* Re-open the UBI device in read-write mode */
1952 c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE); 2028 c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
1953 if (IS_ERR(c->ubi)) { 2029 if (IS_ERR(c->ubi)) {
1954 err = PTR_ERR(c->ubi); 2030 err = PTR_ERR(c->ubi);
1955 goto out_free; 2031 goto out;
1956 } 2032 }
1957 2033
1958 /* 2034 /*
@@ -1965,7 +2041,6 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1965 */ 2041 */
1966 c->bdi.name = "ubifs", 2042 c->bdi.name = "ubifs",
1967 c->bdi.capabilities = BDI_CAP_MAP_COPY; 2043 c->bdi.capabilities = BDI_CAP_MAP_COPY;
1968 c->bdi.unplug_io_fn = default_unplug_io_fn;
1969 err = bdi_init(&c->bdi); 2044 err = bdi_init(&c->bdi);
1970 if (err) 2045 if (err)
1971 goto out_close; 2046 goto out_close;
@@ -2019,24 +2094,29 @@ out_bdi:
2019 bdi_destroy(&c->bdi); 2094 bdi_destroy(&c->bdi);
2020out_close: 2095out_close:
2021 ubi_close_volume(c->ubi); 2096 ubi_close_volume(c->ubi);
2022out_free: 2097out:
2023 kfree(c);
2024 return err; 2098 return err;
2025} 2099}
2026 2100
2027static int sb_test(struct super_block *sb, void *data) 2101static int sb_test(struct super_block *sb, void *data)
2028{ 2102{
2029 dev_t *dev = data; 2103 struct ubifs_info *c1 = data;
2030 struct ubifs_info *c = sb->s_fs_info; 2104 struct ubifs_info *c = sb->s_fs_info;
2031 2105
2032 return c->vi.cdev == *dev; 2106 return c->vi.cdev == c1->vi.cdev;
2107}
2108
2109static int sb_set(struct super_block *sb, void *data)
2110{
2111 sb->s_fs_info = data;
2112 return set_anon_super(sb, NULL);
2033} 2113}
2034 2114
2035static int ubifs_get_sb(struct file_system_type *fs_type, int flags, 2115static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
2036 const char *name, void *data, struct vfsmount *mnt) 2116 const char *name, void *data)
2037{ 2117{
2038 struct ubi_volume_desc *ubi; 2118 struct ubi_volume_desc *ubi;
2039 struct ubi_volume_info vi; 2119 struct ubifs_info *c;
2040 struct super_block *sb; 2120 struct super_block *sb;
2041 int err; 2121 int err;
2042 2122
@@ -2049,34 +2129,37 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2049 */ 2129 */
2050 ubi = open_ubi(name, UBI_READONLY); 2130 ubi = open_ubi(name, UBI_READONLY);
2051 if (IS_ERR(ubi)) { 2131 if (IS_ERR(ubi)) {
2052 ubifs_err("cannot open \"%s\", error %d", 2132 dbg_err("cannot open \"%s\", error %d",
2053 name, (int)PTR_ERR(ubi)); 2133 name, (int)PTR_ERR(ubi));
2054 return PTR_ERR(ubi); 2134 return ERR_CAST(ubi);
2055 } 2135 }
2056 ubi_get_volume_info(ubi, &vi);
2057 2136
2058 dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id); 2137 c = alloc_ubifs_info(ubi);
2138 if (!c) {
2139 err = -ENOMEM;
2140 goto out_close;
2141 }
2059 2142
2060 sb = sget(fs_type, &sb_test, &set_anon_super, &vi.cdev); 2143 dbg_gen("opened ubi%d_%d", c->vi.ubi_num, c->vi.vol_id);
2144
2145 sb = sget(fs_type, sb_test, sb_set, c);
2061 if (IS_ERR(sb)) { 2146 if (IS_ERR(sb)) {
2062 err = PTR_ERR(sb); 2147 err = PTR_ERR(sb);
2148 kfree(c);
2063 goto out_close; 2149 goto out_close;
2064 } 2150 }
2065 2151
2066 if (sb->s_root) { 2152 if (sb->s_root) {
2153 struct ubifs_info *c1 = sb->s_fs_info;
2154 kfree(c);
2067 /* A new mount point for already mounted UBIFS */ 2155 /* A new mount point for already mounted UBIFS */
2068 dbg_gen("this ubi volume is already mounted"); 2156 dbg_gen("this ubi volume is already mounted");
2069 if ((flags ^ sb->s_flags) & MS_RDONLY) { 2157 if (!!(flags & MS_RDONLY) != c1->ro_mount) {
2070 err = -EBUSY; 2158 err = -EBUSY;
2071 goto out_deact; 2159 goto out_deact;
2072 } 2160 }
2073 } else { 2161 } else {
2074 sb->s_flags = flags; 2162 sb->s_flags = flags;
2075 /*
2076 * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
2077 * replaced by 'c'.
2078 */
2079 sb->s_fs_info = ubi;
2080 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0); 2163 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
2081 if (err) 2164 if (err)
2082 goto out_deact; 2165 goto out_deact;
@@ -2087,21 +2170,27 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
2087 /* 'fill_super()' opens ubi again so we must close it here */ 2170 /* 'fill_super()' opens ubi again so we must close it here */
2088 ubi_close_volume(ubi); 2171 ubi_close_volume(ubi);
2089 2172
2090 simple_set_mnt(mnt, sb); 2173 return dget(sb->s_root);
2091 return 0;
2092 2174
2093out_deact: 2175out_deact:
2094 deactivate_locked_super(sb); 2176 deactivate_locked_super(sb);
2095out_close: 2177out_close:
2096 ubi_close_volume(ubi); 2178 ubi_close_volume(ubi);
2097 return err; 2179 return ERR_PTR(err);
2180}
2181
2182static void kill_ubifs_super(struct super_block *s)
2183{
2184 struct ubifs_info *c = s->s_fs_info;
2185 kill_anon_super(s);
2186 kfree(c);
2098} 2187}
2099 2188
2100static struct file_system_type ubifs_fs_type = { 2189static struct file_system_type ubifs_fs_type = {
2101 .name = "ubifs", 2190 .name = "ubifs",
2102 .owner = THIS_MODULE, 2191 .owner = THIS_MODULE,
2103 .get_sb = ubifs_get_sb, 2192 .mount = ubifs_mount,
2104 .kill_sb = kill_anon_super, 2193 .kill_sb = kill_ubifs_super,
2105}; 2194};
2106 2195
2107/* 2196/*
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 2194915220e5..91b4213dde84 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -447,8 +447,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
447 * 447 *
448 * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc 448 * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
449 * is true (it is controlled by corresponding mount option). However, if 449 * is true (it is controlled by corresponding mount option). However, if
450 * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always 450 * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to
451 * checked. 451 * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is
452 * because during mounting or re-mounting from R/O mode to R/W mode we may read
453 * journal nodes (when replying the journal or doing the recovery) and the
454 * journal nodes may potentially be corrupted, so checking is required.
452 */ 455 */
453static int try_read_node(const struct ubifs_info *c, void *buf, int type, 456static int try_read_node(const struct ubifs_info *c, void *buf, int type,
454 int len, int lnum, int offs) 457 int len, int lnum, int offs)
@@ -476,7 +479,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
476 if (node_len != len) 479 if (node_len != len)
477 return 0; 480 return 0;
478 481
479 if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) 482 if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
483 !c->remounting_rw)
480 return 1; 484 return 1;
481 485
482 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 486 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
@@ -1177,6 +1181,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1177 unsigned long time = get_seconds(); 1181 unsigned long time = get_seconds();
1178 1182
1179 dbg_tnc("search key %s", DBGKEY(key)); 1183 dbg_tnc("search key %s", DBGKEY(key));
1184 ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
1180 1185
1181 znode = c->zroot.znode; 1186 znode = c->zroot.znode;
1182 if (unlikely(!znode)) { 1187 if (unlikely(!znode)) {
@@ -2552,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
2552 if (err) { 2557 if (err) {
2553 /* Ensure the znode is dirtied */ 2558 /* Ensure the znode is dirtied */
2554 if (znode->cnext || !ubifs_zn_dirty(znode)) { 2559 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2555 znode = dirty_cow_bottom_up(c, znode); 2560 znode = dirty_cow_bottom_up(c, znode);
2556 if (IS_ERR(znode)) { 2561 if (IS_ERR(znode)) {
2557 err = PTR_ERR(znode); 2562 err = PTR_ERR(znode);
2558 goto out_unlock; 2563 goto out_unlock;
2559 } 2564 }
2560 } 2565 }
2561 err = tnc_delete(c, znode, n); 2566 err = tnc_delete(c, znode, n);
2562 } 2567 }
@@ -2871,12 +2876,13 @@ static void tnc_destroy_cnext(struct ubifs_info *c)
2871 */ 2876 */
2872void ubifs_tnc_close(struct ubifs_info *c) 2877void ubifs_tnc_close(struct ubifs_info *c)
2873{ 2878{
2874 long clean_freed;
2875
2876 tnc_destroy_cnext(c); 2879 tnc_destroy_cnext(c);
2877 if (c->zroot.znode) { 2880 if (c->zroot.znode) {
2878 clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode); 2881 long n;
2879 atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt); 2882
2883 ubifs_destroy_tnc_subtree(c->zroot.znode);
2884 n = atomic_long_read(&c->clean_zn_cnt);
2885 atomic_long_sub(n, &ubifs_clean_zn_cnt);
2880 } 2886 }
2881 kfree(c->gap_lebs); 2887 kfree(c->gap_lebs);
2882 kfree(c->ilebs); 2888 kfree(c->ilebs);
@@ -2966,7 +2972,7 @@ static struct ubifs_znode *right_znode(struct ubifs_info *c,
2966 * 2972 *
2967 * This function searches an indexing node by its first key @key and its 2973 * This function searches an indexing node by its first key @key and its
2968 * address @lnum:@offs. It looks up the indexing tree by pulling all indexing 2974 * address @lnum:@offs. It looks up the indexing tree by pulling all indexing
2969 * nodes it traverses to TNC. This function is called fro indexing nodes which 2975 * nodes it traverses to TNC. This function is called for indexing nodes which
2970 * were found on the media by scanning, for example when garbage-collecting or 2976 * were found on the media by scanning, for example when garbage-collecting or
2971 * when doing in-the-gaps commit. This means that the indexing node which is 2977 * when doing in-the-gaps commit. This means that the indexing node which is
2972 * looked for does not have to have exactly the same leftmost key @key, because 2978 * looked for does not have to have exactly the same leftmost key @key, because
@@ -2988,6 +2994,8 @@ static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
2988 struct ubifs_znode *znode, *zn; 2994 struct ubifs_znode *znode, *zn;
2989 int n, nn; 2995 int n, nn;
2990 2996
2997 ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
2998
2991 /* 2999 /*
2992 * The arguments have probably been read off flash, so don't assume 3000 * The arguments have probably been read off flash, so don't assume
2993 * they are valid. 3001 * they are valid.
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index 53288e5d604e..41920f357bbf 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
377 c->gap_lebs = NULL; 377 c->gap_lebs = NULL;
378 return err; 378 return err;
379 } 379 }
380 if (!dbg_force_in_the_gaps_enabled) { 380 if (dbg_force_in_the_gaps_enabled()) {
381 /* 381 /*
382 * Do not print scary warnings if the debugging 382 * Do not print scary warnings if the debugging
383 * option which forces in-the-gaps is enabled. 383 * option which forces in-the-gaps is enabled.
384 */ 384 */
385 ubifs_err("out of space"); 385 ubifs_warn("out of space");
386 spin_lock(&c->space_lock); 386 dbg_dump_budg(c, &c->bi);
387 dbg_dump_budg(c);
388 spin_unlock(&c->space_lock);
389 dbg_dump_lprops(c); 387 dbg_dump_lprops(c);
390 } 388 }
391 /* Try to commit anyway */ 389 /* Try to commit anyway */
@@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
796 spin_lock(&c->space_lock); 794 spin_lock(&c->space_lock);
797 /* 795 /*
798 * Although we have not finished committing yet, update size of the 796 * Although we have not finished committing yet, update size of the
799 * committed index ('c->old_idx_sz') and zero out the index growth 797 * committed index ('c->bi.old_idx_sz') and zero out the index growth
800 * budget. It is OK to do this now, because we've reserved all the 798 * budget. It is OK to do this now, because we've reserved all the
801 * space which is needed to commit the index, and it is save for the 799 * space which is needed to commit the index, and it is save for the
802 * budgeting subsystem to assume the index is already committed, 800 * budgeting subsystem to assume the index is already committed,
803 * even though it is not. 801 * even though it is not.
804 */ 802 */
805 ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); 803 ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c));
806 c->old_idx_sz = c->calc_idx_sz; 804 c->bi.old_idx_sz = c->calc_idx_sz;
807 c->budg_uncommitted_idx = 0; 805 c->bi.uncommitted_idx = 0;
808 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); 806 c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c);
809 spin_unlock(&c->space_lock); 807 spin_unlock(&c->space_lock);
810 mutex_unlock(&c->tnc_mutex); 808 mutex_unlock(&c->tnc_mutex);
811 809
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index 191ca7863fe7..e24380cf46ed 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -408,9 +408,11 @@ enum {
408 * Superblock flags. 408 * Superblock flags.
409 * 409 *
410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set 410 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
411 * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed
411 */ 412 */
412enum { 413enum {
413 UBIFS_FLG_BIGLPT = 0x02, 414 UBIFS_FLG_BIGLPT = 0x02,
415 UBIFS_FLG_SPACE_FIXUP = 0x04,
414}; 416};
415 417
416/** 418/**
@@ -434,7 +436,7 @@ struct ubifs_ch {
434 __u8 node_type; 436 __u8 node_type;
435 __u8 group_type; 437 __u8 group_type;
436 __u8 padding[2]; 438 __u8 padding[2];
437} __attribute__ ((packed)); 439} __packed;
438 440
439/** 441/**
440 * union ubifs_dev_desc - device node descriptor. 442 * union ubifs_dev_desc - device node descriptor.
@@ -448,7 +450,7 @@ struct ubifs_ch {
448union ubifs_dev_desc { 450union ubifs_dev_desc {
449 __le32 new; 451 __le32 new;
450 __le64 huge; 452 __le64 huge;
451} __attribute__ ((packed)); 453} __packed;
452 454
453/** 455/**
454 * struct ubifs_ino_node - inode node. 456 * struct ubifs_ino_node - inode node.
@@ -509,7 +511,7 @@ struct ubifs_ino_node {
509 __le16 compr_type; 511 __le16 compr_type;
510 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ 512 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
511 __u8 data[]; 513 __u8 data[];
512} __attribute__ ((packed)); 514} __packed;
513 515
514/** 516/**
515 * struct ubifs_dent_node - directory entry node. 517 * struct ubifs_dent_node - directory entry node.
@@ -534,7 +536,7 @@ struct ubifs_dent_node {
534 __le16 nlen; 536 __le16 nlen;
535 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ 537 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
536 __u8 name[]; 538 __u8 name[];
537} __attribute__ ((packed)); 539} __packed;
538 540
539/** 541/**
540 * struct ubifs_data_node - data node. 542 * struct ubifs_data_node - data node.
@@ -555,7 +557,7 @@ struct ubifs_data_node {
555 __le16 compr_type; 557 __le16 compr_type;
556 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ 558 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
557 __u8 data[]; 559 __u8 data[];
558} __attribute__ ((packed)); 560} __packed;
559 561
560/** 562/**
561 * struct ubifs_trun_node - truncation node. 563 * struct ubifs_trun_node - truncation node.
@@ -575,7 +577,7 @@ struct ubifs_trun_node {
575 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ 577 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
576 __le64 old_size; 578 __le64 old_size;
577 __le64 new_size; 579 __le64 new_size;
578} __attribute__ ((packed)); 580} __packed;
579 581
580/** 582/**
581 * struct ubifs_pad_node - padding node. 583 * struct ubifs_pad_node - padding node.
@@ -586,7 +588,7 @@ struct ubifs_trun_node {
586struct ubifs_pad_node { 588struct ubifs_pad_node {
587 struct ubifs_ch ch; 589 struct ubifs_ch ch;
588 __le32 pad_len; 590 __le32 pad_len;
589} __attribute__ ((packed)); 591} __packed;
590 592
591/** 593/**
592 * struct ubifs_sb_node - superblock node. 594 * struct ubifs_sb_node - superblock node.
@@ -644,7 +646,7 @@ struct ubifs_sb_node {
644 __u8 uuid[16]; 646 __u8 uuid[16];
645 __le32 ro_compat_version; 647 __le32 ro_compat_version;
646 __u8 padding2[3968]; 648 __u8 padding2[3968];
647} __attribute__ ((packed)); 649} __packed;
648 650
649/** 651/**
650 * struct ubifs_mst_node - master node. 652 * struct ubifs_mst_node - master node.
@@ -711,7 +713,7 @@ struct ubifs_mst_node {
711 __le32 idx_lebs; 713 __le32 idx_lebs;
712 __le32 leb_cnt; 714 __le32 leb_cnt;
713 __u8 padding[344]; 715 __u8 padding[344];
714} __attribute__ ((packed)); 716} __packed;
715 717
716/** 718/**
717 * struct ubifs_ref_node - logical eraseblock reference node. 719 * struct ubifs_ref_node - logical eraseblock reference node.
@@ -727,7 +729,7 @@ struct ubifs_ref_node {
727 __le32 offs; 729 __le32 offs;
728 __le32 jhead; 730 __le32 jhead;
729 __u8 padding[28]; 731 __u8 padding[28];
730} __attribute__ ((packed)); 732} __packed;
731 733
732/** 734/**
733 * struct ubifs_branch - key/reference/length branch 735 * struct ubifs_branch - key/reference/length branch
@@ -741,7 +743,7 @@ struct ubifs_branch {
741 __le32 offs; 743 __le32 offs;
742 __le32 len; 744 __le32 len;
743 __u8 key[]; 745 __u8 key[];
744} __attribute__ ((packed)); 746} __packed;
745 747
746/** 748/**
747 * struct ubifs_idx_node - indexing node. 749 * struct ubifs_idx_node - indexing node.
@@ -755,7 +757,7 @@ struct ubifs_idx_node {
755 __le16 child_cnt; 757 __le16 child_cnt;
756 __le16 level; 758 __le16 level;
757 __u8 branches[]; 759 __u8 branches[];
758} __attribute__ ((packed)); 760} __packed;
759 761
760/** 762/**
761 * struct ubifs_cs_node - commit start node. 763 * struct ubifs_cs_node - commit start node.
@@ -765,7 +767,7 @@ struct ubifs_idx_node {
765struct ubifs_cs_node { 767struct ubifs_cs_node {
766 struct ubifs_ch ch; 768 struct ubifs_ch ch;
767 __le64 cmt_no; 769 __le64 cmt_no;
768} __attribute__ ((packed)); 770} __packed;
769 771
770/** 772/**
771 * struct ubifs_orph_node - orphan node. 773 * struct ubifs_orph_node - orphan node.
@@ -777,6 +779,6 @@ struct ubifs_orph_node {
777 struct ubifs_ch ch; 779 struct ubifs_ch ch;
778 __le64 cmt_no; 780 __le64 cmt_no;
779 __le64 inos[]; 781 __le64 inos[];
780} __attribute__ ((packed)); 782} __packed;
781 783
782#endif /* __UBIFS_MEDIA_H__ */ 784#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 0c9876b396dd..f79983d6f860 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -119,8 +119,12 @@
119 * in TNC. However, when replaying, it is handy to introduce fake "truncation" 119 * in TNC. However, when replaying, it is handy to introduce fake "truncation"
120 * keys for truncation nodes because the code becomes simpler. So we define 120 * keys for truncation nodes because the code becomes simpler. So we define
121 * %UBIFS_TRUN_KEY type. 121 * %UBIFS_TRUN_KEY type.
122 *
123 * But otherwise, out of the journal reply scope, the truncation keys are
124 * invalid.
122 */ 125 */
123#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT 126#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
127#define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT
124 128
125/* 129/*
126 * How much a directory entry/extended attribute entry adds to the parent/host 130 * How much a directory entry/extended attribute entry adds to the parent/host
@@ -147,6 +151,12 @@
147 */ 151 */
148#define WORST_COMPR_FACTOR 2 152#define WORST_COMPR_FACTOR 2
149 153
154/*
155 * How much memory is needed for a buffer where we comress a data node.
156 */
157#define COMPRESSED_DATA_NODE_BUF_SZ \
158 (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
159
150/* Maximum expected tree height for use by bottom_up_buf */ 160/* Maximum expected tree height for use by bottom_up_buf */
151#define BOTTOM_UP_HEIGHT 64 161#define BOTTOM_UP_HEIGHT 64
152 162
@@ -379,9 +389,9 @@ struct ubifs_gced_idx_leb {
379 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses 389 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
380 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot 390 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
381 * make sure @inode->i_size is always changed under @ui_mutex, because it 391 * make sure @inode->i_size is always changed under @ui_mutex, because it
382 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock 392 * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would
383 * with 'ubifs_writepage()' (see file.c). All the other inode fields are 393 * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields
384 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one 394 * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one
385 * could consider to rework locking and base it on "shadow" fields. 395 * could consider to rework locking and base it on "shadow" fields.
386 */ 396 */
387struct ubifs_inode { 397struct ubifs_inode {
@@ -642,6 +652,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
642 * @offs: write-buffer offset in this logical eraseblock 652 * @offs: write-buffer offset in this logical eraseblock
643 * @avail: number of bytes available in the write-buffer 653 * @avail: number of bytes available in the write-buffer
644 * @used: number of used bytes in the write-buffer 654 * @used: number of used bytes in the write-buffer
655 * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range)
645 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, 656 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
646 * %UBI_UNKNOWN) 657 * %UBI_UNKNOWN)
647 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep 658 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
@@ -676,6 +687,7 @@ struct ubifs_wbuf {
676 int offs; 687 int offs;
677 int avail; 688 int avail;
678 int used; 689 int used;
690 int size;
679 int dtype; 691 int dtype;
680 int jhead; 692 int jhead;
681 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); 693 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
@@ -710,12 +722,14 @@ struct ubifs_bud {
710 * struct ubifs_jhead - journal head. 722 * struct ubifs_jhead - journal head.
711 * @wbuf: head's write-buffer 723 * @wbuf: head's write-buffer
712 * @buds_list: list of bud LEBs belonging to this journal head 724 * @buds_list: list of bud LEBs belonging to this journal head
725 * @grouped: non-zero if UBIFS groups nodes when writing to this journal head
713 * 726 *
714 * Note, the @buds list is protected by the @c->buds_lock. 727 * Note, the @buds list is protected by the @c->buds_lock.
715 */ 728 */
716struct ubifs_jhead { 729struct ubifs_jhead {
717 struct ubifs_wbuf wbuf; 730 struct ubifs_wbuf wbuf;
718 struct list_head buds_list; 731 struct list_head buds_list;
732 unsigned int grouped:1;
719}; 733};
720 734
721/** 735/**
@@ -925,6 +939,40 @@ struct ubifs_mount_opts {
925 unsigned int compr_type:2; 939 unsigned int compr_type:2;
926}; 940};
927 941
942/**
943 * struct ubifs_budg_info - UBIFS budgeting information.
944 * @idx_growth: amount of bytes budgeted for index growth
945 * @data_growth: amount of bytes budgeted for cached data
946 * @dd_growth: amount of bytes budgeted for cached data that will make
947 * other data dirty
948 * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but
949 * which still have to be taken into account because the index
950 * has not been committed so far
951 * @old_idx_sz: size of index on flash
952 * @min_idx_lebs: minimum number of LEBs required for the index
953 * @nospace: non-zero if the file-system does not have flash space (used as
954 * optimization)
955 * @nospace_rp: the same as @nospace, but additionally means that even reserved
956 * pool is full
957 * @page_budget: budget for a page (constant, nenver changed after mount)
958 * @inode_budget: budget for an inode (constant, nenver changed after mount)
959 * @dent_budget: budget for a directory entry (constant, nenver changed after
960 * mount)
961 */
962struct ubifs_budg_info {
963 long long idx_growth;
964 long long data_growth;
965 long long dd_growth;
966 long long uncommitted_idx;
967 unsigned long long old_idx_sz;
968 int min_idx_lebs;
969 unsigned int nospace:1;
970 unsigned int nospace_rp:1;
971 int page_budget;
972 int inode_budget;
973 int dent_budget;
974};
975
928struct ubifs_debug_info; 976struct ubifs_debug_info;
929 977
930/** 978/**
@@ -968,6 +1016,7 @@ struct ubifs_debug_info;
968 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running 1016 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
969 * 1017 *
970 * @big_lpt: flag that LPT is too big to write whole during commit 1018 * @big_lpt: flag that LPT is too big to write whole during commit
1019 * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up
971 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during 1020 * @no_chk_data_crc: do not check CRCs when reading data nodes (except during
972 * recovery) 1021 * recovery)
973 * @bulk_read: enable bulk-reads 1022 * @bulk_read: enable bulk-reads
@@ -999,6 +1048,11 @@ struct ubifs_debug_info;
999 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu 1048 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
1000 * @bu: pre-allocated bulk-read information 1049 * @bu: pre-allocated bulk-read information
1001 * 1050 *
1051 * @write_reserve_mutex: protects @write_reserve_buf
1052 * @write_reserve_buf: on the write path we allocate memory, which might
1053 * sometimes be unavailable, in which case we use this
1054 * write reserve buffer
1055 *
1002 * @log_lebs: number of logical eraseblocks in the log 1056 * @log_lebs: number of logical eraseblocks in the log
1003 * @log_bytes: log size in bytes 1057 * @log_bytes: log size in bytes
1004 * @log_last: last LEB of the log 1058 * @log_last: last LEB of the log
@@ -1020,7 +1074,12 @@ struct ubifs_debug_info;
1020 * 1074 *
1021 * @min_io_size: minimal input/output unit size 1075 * @min_io_size: minimal input/output unit size
1022 * @min_io_shift: number of bits in @min_io_size minus one 1076 * @min_io_shift: number of bits in @min_io_size minus one
1077 * @max_write_size: maximum amount of bytes the underlying flash can write at a
1078 * time (MTD write buffer size)
1079 * @max_write_shift: number of bits in @max_write_size minus one
1023 * @leb_size: logical eraseblock size in bytes 1080 * @leb_size: logical eraseblock size in bytes
1081 * @leb_start: starting offset of logical eraseblocks within physical
1082 * eraseblocks
1024 * @half_leb_size: half LEB size 1083 * @half_leb_size: half LEB size
1025 * @idx_leb_size: how many bytes of an LEB are effectively available when it is 1084 * @idx_leb_size: how many bytes of an LEB are effectively available when it is
1026 * used to store indexing nodes (@leb_size - @max_idx_node_sz) 1085 * used to store indexing nodes (@leb_size - @max_idx_node_sz)
@@ -1028,37 +1087,21 @@ struct ubifs_debug_info;
1028 * @max_leb_cnt: maximum count of logical eraseblocks 1087 * @max_leb_cnt: maximum count of logical eraseblocks
1029 * @old_leb_cnt: count of logical eraseblocks before re-size 1088 * @old_leb_cnt: count of logical eraseblocks before re-size
1030 * @ro_media: the underlying UBI volume is read-only 1089 * @ro_media: the underlying UBI volume is read-only
1090 * @ro_mount: the file-system was mounted as read-only
1091 * @ro_error: UBIFS switched to R/O mode because an error happened
1031 * 1092 *
1032 * @dirty_pg_cnt: number of dirty pages (not used) 1093 * @dirty_pg_cnt: number of dirty pages (not used)
1033 * @dirty_zn_cnt: number of dirty znodes 1094 * @dirty_zn_cnt: number of dirty znodes
1034 * @clean_zn_cnt: number of clean znodes 1095 * @clean_zn_cnt: number of clean znodes
1035 * 1096 *
1036 * @budg_idx_growth: amount of bytes budgeted for index growth 1097 * @space_lock: protects @bi and @lst
1037 * @budg_data_growth: amount of bytes budgeted for cached data 1098 * @lst: lprops statistics
1038 * @budg_dd_growth: amount of bytes budgeted for cached data that will make 1099 * @bi: budgeting information
1039 * other data dirty
1040 * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
1041 * but which still have to be taken into account because
1042 * the index has not been committed so far
1043 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
1044 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst,
1045 * @nospace, and @nospace_rp;
1046 * @min_idx_lebs: minimum number of LEBs required for the index
1047 * @old_idx_sz: size of index on flash
1048 * @calc_idx_sz: temporary variable which is used to calculate new index size 1100 * @calc_idx_sz: temporary variable which is used to calculate new index size
1049 * (contains accurate new index size at end of TNC commit start) 1101 * (contains accurate new index size at end of TNC commit start)
1050 * @lst: lprops statistics
1051 * @nospace: non-zero if the file-system does not have flash space (used as
1052 * optimization)
1053 * @nospace_rp: the same as @nospace, but additionally means that even reserved
1054 * pool is full
1055 *
1056 * @page_budget: budget for a page
1057 * @inode_budget: budget for an inode
1058 * @dent_budget: budget for a directory entry
1059 * 1102 *
1060 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash 1103 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
1061 * I/O unit 1104 * I/O unit
1062 * @mst_node_alsz: master node aligned size 1105 * @mst_node_alsz: master node aligned size
1063 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary 1106 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
1064 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary 1107 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
@@ -1160,19 +1203,20 @@ struct ubifs_debug_info;
1160 * @rp_uid: reserved pool user ID 1203 * @rp_uid: reserved pool user ID
1161 * @rp_gid: reserved pool group ID 1204 * @rp_gid: reserved pool group ID
1162 * 1205 *
1163 * @empty: if the UBI device is empty 1206 * @empty: %1 if the UBI device is empty
1164 * @replay_tree: temporary tree used during journal replay 1207 * @need_recovery: %1 if the file-system needs recovery
1208 * @replaying: %1 during journal replay
1209 * @mounting: %1 while mounting
1210 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
1165 * @replay_list: temporary list used during journal replay 1211 * @replay_list: temporary list used during journal replay
1166 * @replay_buds: list of buds to replay 1212 * @replay_buds: list of buds to replay
1167 * @cs_sqnum: sequence number of first node in the log (commit start node) 1213 * @cs_sqnum: sequence number of first node in the log (commit start node)
1168 * @replay_sqnum: sequence number of node currently being replayed 1214 * @replay_sqnum: sequence number of node currently being replayed
1169 * @need_recovery: file-system needs recovery 1215 * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
1170 * @replaying: set to %1 during journal replay 1216 * mode
1171 * @unclean_leb_list: LEBs to recover when mounting ro to rw 1217 * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
1172 * @rcvrd_mst_node: recovered master node to write when mounting ro to rw 1218 * FS to R/W mode
1173 * @size_tree: inode size information for recovery 1219 * @size_tree: inode size information for recovery
1174 * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
1175 * @always_chk_crc: always check CRCs (while mounting and remounting rw)
1176 * @mount_opts: UBIFS-specific mount options 1220 * @mount_opts: UBIFS-specific mount options
1177 * 1221 *
1178 * @dbg: debugging-related information 1222 * @dbg: debugging-related information
@@ -1212,6 +1256,7 @@ struct ubifs_info {
1212 wait_queue_head_t cmt_wq; 1256 wait_queue_head_t cmt_wq;
1213 1257
1214 unsigned int big_lpt:1; 1258 unsigned int big_lpt:1;
1259 unsigned int space_fixup:1;
1215 unsigned int no_chk_data_crc:1; 1260 unsigned int no_chk_data_crc:1;
1216 unsigned int bulk_read:1; 1261 unsigned int bulk_read:1;
1217 unsigned int default_compr:2; 1262 unsigned int default_compr:2;
@@ -1241,6 +1286,9 @@ struct ubifs_info {
1241 struct mutex bu_mutex; 1286 struct mutex bu_mutex;
1242 struct bu_info bu; 1287 struct bu_info bu;
1243 1288
1289 struct mutex write_reserve_mutex;
1290 void *write_reserve_buf;
1291
1244 int log_lebs; 1292 int log_lebs;
1245 long long log_bytes; 1293 long long log_bytes;
1246 int log_last; 1294 int log_last;
@@ -1262,33 +1310,27 @@ struct ubifs_info {
1262 1310
1263 int min_io_size; 1311 int min_io_size;
1264 int min_io_shift; 1312 int min_io_shift;
1313 int max_write_size;
1314 int max_write_shift;
1265 int leb_size; 1315 int leb_size;
1316 int leb_start;
1266 int half_leb_size; 1317 int half_leb_size;
1267 int idx_leb_size; 1318 int idx_leb_size;
1268 int leb_cnt; 1319 int leb_cnt;
1269 int max_leb_cnt; 1320 int max_leb_cnt;
1270 int old_leb_cnt; 1321 int old_leb_cnt;
1271 int ro_media; 1322 unsigned int ro_media:1;
1323 unsigned int ro_mount:1;
1324 unsigned int ro_error:1;
1272 1325
1273 atomic_long_t dirty_pg_cnt; 1326 atomic_long_t dirty_pg_cnt;
1274 atomic_long_t dirty_zn_cnt; 1327 atomic_long_t dirty_zn_cnt;
1275 atomic_long_t clean_zn_cnt; 1328 atomic_long_t clean_zn_cnt;
1276 1329
1277 long long budg_idx_growth;
1278 long long budg_data_growth;
1279 long long budg_dd_growth;
1280 long long budg_uncommitted_idx;
1281 spinlock_t space_lock; 1330 spinlock_t space_lock;
1282 int min_idx_lebs;
1283 unsigned long long old_idx_sz;
1284 unsigned long long calc_idx_sz;
1285 struct ubifs_lp_stats lst; 1331 struct ubifs_lp_stats lst;
1286 unsigned int nospace:1; 1332 struct ubifs_budg_info bi;
1287 unsigned int nospace_rp:1; 1333 unsigned long long calc_idx_sz;
1288
1289 int page_budget;
1290 int inode_budget;
1291 int dent_budget;
1292 1334
1293 int ref_node_alsz; 1335 int ref_node_alsz;
1294 int mst_node_alsz; 1336 int mst_node_alsz;
@@ -1391,19 +1433,18 @@ struct ubifs_info {
1391 gid_t rp_gid; 1433 gid_t rp_gid;
1392 1434
1393 /* The below fields are used only during mounting and re-mounting */ 1435 /* The below fields are used only during mounting and re-mounting */
1394 int empty; 1436 unsigned int empty:1;
1395 struct rb_root replay_tree; 1437 unsigned int need_recovery:1;
1438 unsigned int replaying:1;
1439 unsigned int mounting:1;
1440 unsigned int remounting_rw:1;
1396 struct list_head replay_list; 1441 struct list_head replay_list;
1397 struct list_head replay_buds; 1442 struct list_head replay_buds;
1398 unsigned long long cs_sqnum; 1443 unsigned long long cs_sqnum;
1399 unsigned long long replay_sqnum; 1444 unsigned long long replay_sqnum;
1400 int need_recovery;
1401 int replaying;
1402 struct list_head unclean_leb_list; 1445 struct list_head unclean_leb_list;
1403 struct ubifs_mst_node *rcvrd_mst_node; 1446 struct ubifs_mst_node *rcvrd_mst_node;
1404 struct rb_root size_tree; 1447 struct rb_root size_tree;
1405 int remounting_rw;
1406 int always_chk_crc;
1407 struct ubifs_mount_opts mount_opts; 1448 struct ubifs_mount_opts mount_opts;
1408 1449
1409#ifdef CONFIG_UBIFS_FS_DEBUG 1450#ifdef CONFIG_UBIFS_FS_DEBUG
@@ -1575,7 +1616,7 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
1575int ubifs_tnc_end_commit(struct ubifs_info *c); 1616int ubifs_tnc_end_commit(struct ubifs_info *c);
1576 1617
1577/* shrinker.c */ 1618/* shrinker.c */
1578int ubifs_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); 1619int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc);
1579 1620
1580/* commit.c */ 1621/* commit.c */
1581int ubifs_bg_thread(void *info); 1622int ubifs_bg_thread(void *info);
@@ -1594,6 +1635,7 @@ int ubifs_write_master(struct ubifs_info *c);
1594int ubifs_read_superblock(struct ubifs_info *c); 1635int ubifs_read_superblock(struct ubifs_info *c);
1595struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); 1636struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
1596int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); 1637int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
1638int ubifs_fixup_free_space(struct ubifs_info *c);
1597 1639
1598/* replay.c */ 1640/* replay.c */
1599int ubifs_validate_entry(struct ubifs_info *c, 1641int ubifs_validate_entry(struct ubifs_info *c,
@@ -1702,7 +1744,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
1702int ubifs_recover_master_node(struct ubifs_info *c); 1744int ubifs_recover_master_node(struct ubifs_info *c);
1703int ubifs_write_rcvrd_mst_node(struct ubifs_info *c); 1745int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
1704struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, 1746struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
1705 int offs, void *sbuf, int grouped); 1747 int offs, void *sbuf, int jhead);
1706struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum, 1748struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
1707 int offs, void *sbuf); 1749 int offs, void *sbuf);
1708int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf); 1750int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c74400f88fe0..16f19f55e63f 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -56,6 +56,7 @@
56 */ 56 */
57 57
58#include "ubifs.h" 58#include "ubifs.h"
59#include <linux/fs.h>
59#include <linux/slab.h> 60#include <linux/slab.h>
60#include <linux/xattr.h> 61#include <linux/xattr.h>
61#include <linux/posix_acl_xattr.h> 62#include <linux/posix_acl_xattr.h>
@@ -79,9 +80,8 @@ enum {
79 SECURITY_XATTR, 80 SECURITY_XATTR,
80}; 81};
81 82
82static const struct inode_operations none_inode_operations; 83static const struct inode_operations empty_iops;
83static const struct address_space_operations none_address_operations; 84static const struct file_operations empty_fops;
84static const struct file_operations none_file_operations;
85 85
86/** 86/**
87 * create_xattr - create an extended attribute. 87 * create_xattr - create an extended attribute.
@@ -130,9 +130,9 @@ static int create_xattr(struct ubifs_info *c, struct inode *host,
130 } 130 }
131 131
132 /* Re-define all operations to be "nothing" */ 132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &none_address_operations; 133 inode->i_mapping->a_ops = &empty_aops;
134 inode->i_op = &none_inode_operations; 134 inode->i_op = &empty_iops;
135 inode->i_fop = &none_file_operations; 135 inode->i_fop = &empty_fops;
136 136
137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; 137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
138 ui = ubifs_inode(inode); 138 ui = ubifs_inode(inode);