aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-03-18 13:50:27 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-03-18 13:50:27 -0400
commit8f627a8a881481598c2591c3acc122fb9be7bac4 (patch)
tree06497d25e30824500aeaf8c736c45b070f121234 /fs
parentfd57ed021990157ee5b3997c3f21c734093a9e23 (diff)
parent5d630e43284fdb0613e4e7e7dd906f27bc25b6af (diff)
Merge branch 'linux-next' of git://git.infradead.org/ubifs-2.6
* 'linux-next' of git://git.infradead.org/ubifs-2.6: (25 commits) UBIFS: clean-up commentaries UBIFS: save 128KiB or more RAM UBIFS: allocate orphans scan buffer on demand UBIFS: allocate lpt dump buffer on demand UBIFS: allocate ltab checking buffer on demand UBIFS: allocate scanning buffer on demand UBIFS: allocate dump buffer on demand UBIFS: do not check data crc by default UBIFS: simplify UBIFS Kconfig menu UBIFS: print max. index node size UBIFS: handle allocation failures in UBIFS write path UBIFS: use max_write_size during recovery UBIFS: use max_write_size for write-buffers UBIFS: introduce write-buffer size field UBI: incorporate LEB offset information UBIFS: incorporate maximum write size UBI: provide LEB offset information UBI: incorporate maximum write size UBIFS: fix LEB number in printk UBIFS: restrict world-writable debugfs files ...
Diffstat (limited to 'fs')
-rw-r--r--fs/ubifs/Kconfig23
-rw-r--r--fs/ubifs/commit.c58
-rw-r--r--fs/ubifs/debug.c34
-rw-r--r--fs/ubifs/debug.h30
-rw-r--r--fs/ubifs/io.c201
-rw-r--r--fs/ubifs/journal.c28
-rw-r--r--fs/ubifs/lprops.c26
-rw-r--r--fs/ubifs/lpt_commit.c56
-rw-r--r--fs/ubifs/orphan.c10
-rw-r--r--fs/ubifs/recovery.c44
-rw-r--r--fs/ubifs/scan.c2
-rw-r--r--fs/ubifs/super.c54
-rw-r--r--fs/ubifs/tnc.c10
-rw-r--r--fs/ubifs/ubifs.h45
14 files changed, 447 insertions, 174 deletions
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
index 830e3f76f442..1d1859dc3de5 100644
--- a/fs/ubifs/Kconfig
+++ b/fs/ubifs/Kconfig
@@ -44,23 +44,20 @@ config UBIFS_FS_ZLIB
44 44
45# Debugging-related stuff 45# Debugging-related stuff
46config UBIFS_FS_DEBUG 46config UBIFS_FS_DEBUG
47 bool "Enable debugging" 47 bool "Enable debugging support"
48 depends on UBIFS_FS 48 depends on UBIFS_FS
49 select DEBUG_FS 49 select DEBUG_FS
50 select KALLSYMS_ALL 50 select KALLSYMS_ALL
51 help 51 help
52 This option enables UBIFS debugging. 52 This option enables UBIFS debugging support. It makes sure various
53 53 assertions, self-checks, debugging messages and test modes are compiled
54config UBIFS_FS_DEBUG_MSG_LVL 54 in (this all is compiled out otherwise). Assertions are light-weight
55 int "Default message level (0 = no extra messages, 3 = lots)" 55 and this option also enables them. Self-checks, debugging messages and
56 depends on UBIFS_FS_DEBUG 56 test modes are switched off by default. Thus, it is safe and actually
57 default "0" 57 recommended to have debugging support enabled, and it should not slow
58 help 58 down UBIFS. You can then further enable / disable individual debugging
59 This controls the amount of debugging messages produced by UBIFS. 59 features using UBIFS module parameters and the corresponding sysfs
60 If reporting bugs, please try to have available a full dump of the 60 interfaces.
61 messages at level 1 while the misbehaviour was occurring. Level 2
62 may become necessary if level 1 messages were not enough to find the
63 bug. Generally Level 3 should be avoided.
64 61
65config UBIFS_FS_DEBUG_CHKS 62config UBIFS_FS_DEBUG_CHKS
66 bool "Enable extra checks" 63 bool "Enable extra checks"
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index 02429d81ca33..b148fbc80f8d 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -48,6 +48,56 @@
48#include <linux/slab.h> 48#include <linux/slab.h>
49#include "ubifs.h" 49#include "ubifs.h"
50 50
51/*
52 * nothing_to_commit - check if there is nothing to commit.
53 * @c: UBIFS file-system description object
54 *
55 * This is a helper function which checks if there is anything to commit. It is
56 * used as an optimization to avoid starting the commit if it is not really
57 * necessary. Indeed, the commit operation always assumes flash I/O (e.g.,
58 * writing the commit start node to the log), and it is better to avoid doing
59 * this unnecessarily. E.g., 'ubifs_sync_fs()' runs the commit, but if there is
60 * nothing to commit, it is more optimal to avoid any flash I/O.
61 *
62 * This function has to be called with @c->commit_sem locked for writing -
63 * this function does not take LPT/TNC locks because the @c->commit_sem
64 * guarantees that we have exclusive access to the TNC and LPT data structures.
65 *
66 * This function returns %1 if there is nothing to commit and %0 otherwise.
67 */
68static int nothing_to_commit(struct ubifs_info *c)
69{
70 /*
71 * During mounting or remounting from R/O mode to R/W mode we may
72 * commit for various recovery-related reasons.
73 */
74 if (c->mounting || c->remounting_rw)
75 return 0;
76
77 /*
78 * If the root TNC node is dirty, we definitely have something to
79 * commit.
80 */
81 if (c->zroot.znode && test_bit(DIRTY_ZNODE, &c->zroot.znode->flags))
82 return 0;
83
84 /*
85 * Even though the TNC is clean, the LPT tree may have dirty nodes. For
86 * example, this may happen if the budgeting subsystem invoked GC to
87 * make some free space, and the GC found an LEB with only dirty and
88 * free space. In this case GC would just change the lprops of this
89 * LEB (by turning all space into free space) and unmap it.
90 */
91 if (c->nroot && test_bit(DIRTY_CNODE, &c->nroot->flags))
92 return 0;
93
94 ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
95 ubifs_assert(c->dirty_pn_cnt == 0);
96 ubifs_assert(c->dirty_nn_cnt == 0);
97
98 return 1;
99}
100
51/** 101/**
52 * do_commit - commit the journal. 102 * do_commit - commit the journal.
53 * @c: UBIFS file-system description object 103 * @c: UBIFS file-system description object
@@ -70,6 +120,12 @@ static int do_commit(struct ubifs_info *c)
70 goto out_up; 120 goto out_up;
71 } 121 }
72 122
123 if (nothing_to_commit(c)) {
124 up_write(&c->commit_sem);
125 err = 0;
126 goto out_cancel;
127 }
128
73 /* Sync all write buffers (necessary for recovery) */ 129 /* Sync all write buffers (necessary for recovery) */
74 for (i = 0; i < c->jhead_cnt; i++) { 130 for (i = 0; i < c->jhead_cnt; i++) {
75 err = ubifs_wbuf_sync(&c->jheads[i].wbuf); 131 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
@@ -162,12 +218,12 @@ static int do_commit(struct ubifs_info *c)
162 if (err) 218 if (err)
163 goto out; 219 goto out;
164 220
221out_cancel:
165 spin_lock(&c->cs_lock); 222 spin_lock(&c->cs_lock);
166 c->cmt_state = COMMIT_RESTING; 223 c->cmt_state = COMMIT_RESTING;
167 wake_up(&c->cmt_wq); 224 wake_up(&c->cmt_wq);
168 dbg_cmt("commit end"); 225 dbg_cmt("commit end");
169 spin_unlock(&c->cs_lock); 226 spin_unlock(&c->cs_lock);
170
171 return 0; 227 return 0;
172 228
173out_up: 229out_up:
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 0bee4dbffc31..01c2b028e525 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -43,8 +43,8 @@ DEFINE_SPINLOCK(dbg_lock);
43static char dbg_key_buf0[128]; 43static char dbg_key_buf0[128];
44static char dbg_key_buf1[128]; 44static char dbg_key_buf1[128];
45 45
46unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT; 46unsigned int ubifs_msg_flags;
47unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT; 47unsigned int ubifs_chk_flags;
48unsigned int ubifs_tst_flags; 48unsigned int ubifs_tst_flags;
49 49
50module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); 50module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
@@ -810,16 +810,24 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
810{ 810{
811 struct ubifs_scan_leb *sleb; 811 struct ubifs_scan_leb *sleb;
812 struct ubifs_scan_node *snod; 812 struct ubifs_scan_node *snod;
813 void *buf;
813 814
814 if (dbg_failure_mode) 815 if (dbg_failure_mode)
815 return; 816 return;
816 817
817 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 818 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
818 current->pid, lnum); 819 current->pid, lnum);
819 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 820
821 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
822 if (!buf) {
823 ubifs_err("cannot allocate memory for dumping LEB %d", lnum);
824 return;
825 }
826
827 sleb = ubifs_scan(c, lnum, 0, buf, 0);
820 if (IS_ERR(sleb)) { 828 if (IS_ERR(sleb)) {
821 ubifs_err("scan error %d", (int)PTR_ERR(sleb)); 829 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
822 return; 830 goto out;
823 } 831 }
824 832
825 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum, 833 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
@@ -835,6 +843,9 @@ void dbg_dump_leb(const struct ubifs_info *c, int lnum)
835 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 843 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
836 current->pid, lnum); 844 current->pid, lnum);
837 ubifs_scan_destroy(sleb); 845 ubifs_scan_destroy(sleb);
846
847out:
848 vfree(buf);
838 return; 849 return;
839} 850}
840 851
@@ -2690,16 +2701,8 @@ int ubifs_debugging_init(struct ubifs_info *c)
2690 if (!c->dbg) 2701 if (!c->dbg)
2691 return -ENOMEM; 2702 return -ENOMEM;
2692 2703
2693 c->dbg->buf = vmalloc(c->leb_size);
2694 if (!c->dbg->buf)
2695 goto out;
2696
2697 failure_mode_init(c); 2704 failure_mode_init(c);
2698 return 0; 2705 return 0;
2699
2700out:
2701 kfree(c->dbg);
2702 return -ENOMEM;
2703} 2706}
2704 2707
2705/** 2708/**
@@ -2709,7 +2712,6 @@ out:
2709void ubifs_debugging_exit(struct ubifs_info *c) 2712void ubifs_debugging_exit(struct ubifs_info *c)
2710{ 2713{
2711 failure_mode_exit(c); 2714 failure_mode_exit(c);
2712 vfree(c->dbg->buf);
2713 kfree(c->dbg); 2715 kfree(c->dbg);
2714} 2716}
2715 2717
@@ -2813,19 +2815,19 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
2813 } 2815 }
2814 2816
2815 fname = "dump_lprops"; 2817 fname = "dump_lprops";
2816 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2818 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2817 if (IS_ERR(dent)) 2819 if (IS_ERR(dent))
2818 goto out_remove; 2820 goto out_remove;
2819 d->dfs_dump_lprops = dent; 2821 d->dfs_dump_lprops = dent;
2820 2822
2821 fname = "dump_budg"; 2823 fname = "dump_budg";
2822 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2824 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2823 if (IS_ERR(dent)) 2825 if (IS_ERR(dent))
2824 goto out_remove; 2826 goto out_remove;
2825 d->dfs_dump_budg = dent; 2827 d->dfs_dump_budg = dent;
2826 2828
2827 fname = "dump_tnc"; 2829 fname = "dump_tnc";
2828 dent = debugfs_create_file(fname, S_IWUGO, d->dfs_dir, c, &dfs_fops); 2830 dent = debugfs_create_file(fname, S_IWUSR, d->dfs_dir, c, &dfs_fops);
2829 if (IS_ERR(dent)) 2831 if (IS_ERR(dent))
2830 goto out_remove; 2832 goto out_remove;
2831 d->dfs_dump_tnc = dent; 2833 d->dfs_dump_tnc = dent;
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 69ebe4729151..919f0de29d8f 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -27,7 +27,6 @@
27 27
28/** 28/**
29 * ubifs_debug_info - per-FS debugging information. 29 * ubifs_debug_info - per-FS debugging information.
30 * @buf: a buffer of LEB size, used for various purposes
31 * @old_zroot: old index root - used by 'dbg_check_old_index()' 30 * @old_zroot: old index root - used by 'dbg_check_old_index()'
32 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()' 31 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
33 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()' 32 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
@@ -54,7 +53,6 @@
54 * dfs_dump_tnc: "dump TNC" debugfs knob 53 * dfs_dump_tnc: "dump TNC" debugfs knob
55 */ 54 */
56struct ubifs_debug_info { 55struct ubifs_debug_info {
57 void *buf;
58 struct ubifs_zbranch old_zroot; 56 struct ubifs_zbranch old_zroot;
59 int old_zroot_level; 57 int old_zroot_level;
60 unsigned long long old_zroot_sqnum; 58 unsigned long long old_zroot_sqnum;
@@ -173,7 +171,7 @@ const char *dbg_key_str1(const struct ubifs_info *c,
173#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) 171#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
174 172
175/* 173/*
176 * Debugging message type flags (must match msg_type_names in debug.c). 174 * Debugging message type flags.
177 * 175 *
178 * UBIFS_MSG_GEN: general messages 176 * UBIFS_MSG_GEN: general messages
179 * UBIFS_MSG_JNL: journal messages 177 * UBIFS_MSG_JNL: journal messages
@@ -205,14 +203,8 @@ enum {
205 UBIFS_MSG_RCVRY = 0x1000, 203 UBIFS_MSG_RCVRY = 0x1000,
206}; 204};
207 205
208/* Debugging message type flags for each default debug message level */
209#define UBIFS_MSG_LVL_0 0
210#define UBIFS_MSG_LVL_1 0x1
211#define UBIFS_MSG_LVL_2 0x7f
212#define UBIFS_MSG_LVL_3 0xffff
213
214/* 206/*
215 * Debugging check flags (must match chk_names in debug.c). 207 * Debugging check flags.
216 * 208 *
217 * UBIFS_CHK_GEN: general checks 209 * UBIFS_CHK_GEN: general checks
218 * UBIFS_CHK_TNC: check TNC 210 * UBIFS_CHK_TNC: check TNC
@@ -233,7 +225,7 @@ enum {
233}; 225};
234 226
235/* 227/*
236 * Special testing flags (must match tst_names in debug.c). 228 * Special testing flags.
237 * 229 *
238 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method 230 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
239 * UBIFS_TST_RCVRY: failure mode for recovery testing 231 * UBIFS_TST_RCVRY: failure mode for recovery testing
@@ -243,22 +235,6 @@ enum {
243 UBIFS_TST_RCVRY = 0x4, 235 UBIFS_TST_RCVRY = 0x4,
244}; 236};
245 237
246#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
247#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
248#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
249#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
250#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
251#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
252#else
253#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
254#endif
255
256#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
257#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
258#else
259#define UBIFS_CHK_FLAGS_DEFAULT 0
260#endif
261
262extern spinlock_t dbg_lock; 238extern spinlock_t dbg_lock;
263 239
264extern unsigned int ubifs_msg_flags; 240extern unsigned int ubifs_msg_flags;
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index d82173182eeb..dfd168b7807e 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -31,6 +31,26 @@
31 * buffer is full or when it is not used for some time (by timer). This is 31 * buffer is full or when it is not used for some time (by timer). This is
32 * similar to the mechanism is used by JFFS2. 32 * similar to the mechanism is used by JFFS2.
33 * 33 *
34 * UBIFS distinguishes between minimum write size (@c->min_io_size) and maximum
35 * write size (@c->max_write_size). The latter is the maximum amount of bytes
36 * the underlying flash is able to program at a time, and writing in
37 * @c->max_write_size units should presumably be faster. Obviously,
38 * @c->min_io_size <= @c->max_write_size. Write-buffers are of
39 * @c->max_write_size bytes in size for maximum performance. However, when a
40 * write-buffer is flushed, only the portion of it (aligned to @c->min_io_size
41 * boundary) which contains data is written, not the whole write-buffer,
42 * because this is more space-efficient.
43 *
44 * This optimization adds few complications to the code. Indeed, on the one
45 * hand, we want to write in optimal @c->max_write_size bytes chunks, which
46 * also means aligning writes at the @c->max_write_size bytes offsets. On the
47 * other hand, we do not want to waste space when synchronizing the write
48 * buffer, so during synchronization we writes in smaller chunks. And this makes
49 * the next write offset to be not aligned to @c->max_write_size bytes. So the
50 * have to make sure that the write-buffer offset (@wbuf->offs) becomes aligned
51 * to @c->max_write_size bytes again. We do this by temporarily shrinking
52 * write-buffer size (@wbuf->size).
53 *
34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by 54 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
35 * mutexes defined inside these objects. Since sometimes upper-level code 55 * mutexes defined inside these objects. Since sometimes upper-level code
36 * has to lock the write-buffer (e.g. journal space reservation code), many 56 * has to lock the write-buffer (e.g. journal space reservation code), many
@@ -46,8 +66,8 @@
46 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it 66 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
47 * uses padding nodes or padding bytes, if the padding node does not fit. 67 * uses padding nodes or padding bytes, if the padding node does not fit.
48 * 68 *
49 * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes 69 * All UBIFS nodes are protected by CRC checksums and UBIFS checks CRC when
50 * every time they are read from the flash media. 70 * they are read from the flash media.
51 */ 71 */
52 72
53#include <linux/crc32.h> 73#include <linux/crc32.h>
@@ -88,8 +108,12 @@ void ubifs_ro_mode(struct ubifs_info *c, int err)
88 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is 108 * This function may skip data nodes CRC checking if @c->no_chk_data_crc is
89 * true, which is controlled by corresponding UBIFS mount option. However, if 109 * true, which is controlled by corresponding UBIFS mount option. However, if
90 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is 110 * @must_chk_crc is true, then @c->no_chk_data_crc is ignored and CRC is
91 * checked. Similarly, if @c->always_chk_crc is true, @c->no_chk_data_crc is 111 * checked. Similarly, if @c->mounting or @c->remounting_rw is true (we are
92 * ignored and CRC is checked. 112 * mounting or re-mounting to R/W mode), @c->no_chk_data_crc is ignored and CRC
113 * is checked. This is because during mounting or re-mounting from R/O mode to
114 * R/W mode we may read journal nodes (when replying the journal or doing the
115 * recovery) and the journal nodes may potentially be corrupted, so checking is
116 * required.
93 * 117 *
94 * This function returns zero in case of success and %-EUCLEAN in case of bad 118 * This function returns zero in case of success and %-EUCLEAN in case of bad
95 * CRC or magic. 119 * CRC or magic.
@@ -131,8 +155,8 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
131 node_len > c->ranges[type].max_len) 155 node_len > c->ranges[type].max_len)
132 goto out_len; 156 goto out_len;
133 157
134 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc && 158 if (!must_chk_crc && type == UBIFS_DATA_NODE && !c->mounting &&
135 c->no_chk_data_crc) 159 !c->remounting_rw && c->no_chk_data_crc)
136 return 0; 160 return 0;
137 161
138 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 162 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
@@ -343,11 +367,17 @@ static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
343 * 367 *
344 * This function synchronizes write-buffer @buf and returns zero in case of 368 * This function synchronizes write-buffer @buf and returns zero in case of
345 * success or a negative error code in case of failure. 369 * success or a negative error code in case of failure.
370 *
371 * Note, although write-buffers are of @c->max_write_size, this function does
372 * not necessarily writes all @c->max_write_size bytes to the flash. Instead,
373 * if the write-buffer is only partially filled with data, only the used part
374 * of the write-buffer (aligned on @c->min_io_size boundary) is synchronized.
375 * This way we waste less space.
346 */ 376 */
347int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) 377int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
348{ 378{
349 struct ubifs_info *c = wbuf->c; 379 struct ubifs_info *c = wbuf->c;
350 int err, dirt; 380 int err, dirt, sync_len;
351 381
352 cancel_wbuf_timer_nolock(wbuf); 382 cancel_wbuf_timer_nolock(wbuf);
353 if (!wbuf->used || wbuf->lnum == -1) 383 if (!wbuf->used || wbuf->lnum == -1)
@@ -357,27 +387,53 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
357 dbg_io("LEB %d:%d, %d bytes, jhead %s", 387 dbg_io("LEB %d:%d, %d bytes, jhead %s",
358 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); 388 wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead));
359 ubifs_assert(!(wbuf->avail & 7)); 389 ubifs_assert(!(wbuf->avail & 7));
360 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); 390 ubifs_assert(wbuf->offs + wbuf->size <= c->leb_size);
391 ubifs_assert(wbuf->size >= c->min_io_size);
392 ubifs_assert(wbuf->size <= c->max_write_size);
393 ubifs_assert(wbuf->size % c->min_io_size == 0);
361 ubifs_assert(!c->ro_media && !c->ro_mount); 394 ubifs_assert(!c->ro_media && !c->ro_mount);
395 if (c->leb_size - wbuf->offs >= c->max_write_size)
396 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
362 397
363 if (c->ro_error) 398 if (c->ro_error)
364 return -EROFS; 399 return -EROFS;
365 400
366 ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); 401 /*
402 * Do not write whole write buffer but write only the minimum necessary
403 * amount of min. I/O units.
404 */
405 sync_len = ALIGN(wbuf->used, c->min_io_size);
406 dirt = sync_len - wbuf->used;
407 if (dirt)
408 ubifs_pad(c, wbuf->buf + wbuf->used, dirt);
367 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs, 409 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
368 c->min_io_size, wbuf->dtype); 410 sync_len, wbuf->dtype);
369 if (err) { 411 if (err) {
370 ubifs_err("cannot write %d bytes to LEB %d:%d", 412 ubifs_err("cannot write %d bytes to LEB %d:%d",
371 c->min_io_size, wbuf->lnum, wbuf->offs); 413 sync_len, wbuf->lnum, wbuf->offs);
372 dbg_dump_stack(); 414 dbg_dump_stack();
373 return err; 415 return err;
374 } 416 }
375 417
376 dirt = wbuf->avail;
377
378 spin_lock(&wbuf->lock); 418 spin_lock(&wbuf->lock);
379 wbuf->offs += c->min_io_size; 419 wbuf->offs += sync_len;
380 wbuf->avail = c->min_io_size; 420 /*
421 * Now @wbuf->offs is not necessarily aligned to @c->max_write_size.
422 * But our goal is to optimize writes and make sure we write in
423 * @c->max_write_size chunks and to @c->max_write_size-aligned offset.
424 * Thus, if @wbuf->offs is not aligned to @c->max_write_size now, make
425 * sure that @wbuf->offs + @wbuf->size is aligned to
426 * @c->max_write_size. This way we make sure that after next
427 * write-buffer flush we are again at the optimal offset (aligned to
428 * @c->max_write_size).
429 */
430 if (c->leb_size - wbuf->offs < c->max_write_size)
431 wbuf->size = c->leb_size - wbuf->offs;
432 else if (wbuf->offs & (c->max_write_size - 1))
433 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
434 else
435 wbuf->size = c->max_write_size;
436 wbuf->avail = wbuf->size;
381 wbuf->used = 0; 437 wbuf->used = 0;
382 wbuf->next_ino = 0; 438 wbuf->next_ino = 0;
383 spin_unlock(&wbuf->lock); 439 spin_unlock(&wbuf->lock);
@@ -420,7 +476,13 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
420 spin_lock(&wbuf->lock); 476 spin_lock(&wbuf->lock);
421 wbuf->lnum = lnum; 477 wbuf->lnum = lnum;
422 wbuf->offs = offs; 478 wbuf->offs = offs;
423 wbuf->avail = c->min_io_size; 479 if (c->leb_size - wbuf->offs < c->max_write_size)
480 wbuf->size = c->leb_size - wbuf->offs;
481 else if (wbuf->offs & (c->max_write_size - 1))
482 wbuf->size = ALIGN(wbuf->offs, c->max_write_size) - wbuf->offs;
483 else
484 wbuf->size = c->max_write_size;
485 wbuf->avail = wbuf->size;
424 wbuf->used = 0; 486 wbuf->used = 0;
425 spin_unlock(&wbuf->lock); 487 spin_unlock(&wbuf->lock);
426 wbuf->dtype = dtype; 488 wbuf->dtype = dtype;
@@ -500,8 +562,9 @@ out_timers:
500 * 562 *
501 * This function writes data to flash via write-buffer @wbuf. This means that 563 * This function writes data to flash via write-buffer @wbuf. This means that
502 * the last piece of the node won't reach the flash media immediately if it 564 * the last piece of the node won't reach the flash media immediately if it
503 * does not take whole minimal I/O unit. Instead, the node will sit in RAM 565 * does not take whole max. write unit (@c->max_write_size). Instead, the node
504 * until the write-buffer is synchronized (e.g., by timer). 566 * will sit in RAM until the write-buffer is synchronized (e.g., by timer, or
567 * because more data are appended to the write-buffer).
505 * 568 *
506 * This function returns zero in case of success and a negative error code in 569 * This function returns zero in case of success and a negative error code in
507 * case of failure. If the node cannot be written because there is no more 570 * case of failure. If the node cannot be written because there is no more
@@ -518,9 +581,14 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
518 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt); 581 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
519 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0); 582 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
520 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); 583 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
521 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); 584 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= wbuf->size);
585 ubifs_assert(wbuf->size >= c->min_io_size);
586 ubifs_assert(wbuf->size <= c->max_write_size);
587 ubifs_assert(wbuf->size % c->min_io_size == 0);
522 ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); 588 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
523 ubifs_assert(!c->ro_media && !c->ro_mount); 589 ubifs_assert(!c->ro_media && !c->ro_mount);
590 if (c->leb_size - wbuf->offs >= c->max_write_size)
591 ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size ));
524 592
525 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { 593 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
526 err = -ENOSPC; 594 err = -ENOSPC;
@@ -543,14 +611,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
543 dbg_io("flush jhead %s wbuf to LEB %d:%d", 611 dbg_io("flush jhead %s wbuf to LEB %d:%d",
544 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs); 612 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
545 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, 613 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
546 wbuf->offs, c->min_io_size, 614 wbuf->offs, wbuf->size,
547 wbuf->dtype); 615 wbuf->dtype);
548 if (err) 616 if (err)
549 goto out; 617 goto out;
550 618
551 spin_lock(&wbuf->lock); 619 spin_lock(&wbuf->lock);
552 wbuf->offs += c->min_io_size; 620 wbuf->offs += wbuf->size;
553 wbuf->avail = c->min_io_size; 621 if (c->leb_size - wbuf->offs >= c->max_write_size)
622 wbuf->size = c->max_write_size;
623 else
624 wbuf->size = c->leb_size - wbuf->offs;
625 wbuf->avail = wbuf->size;
554 wbuf->used = 0; 626 wbuf->used = 0;
555 wbuf->next_ino = 0; 627 wbuf->next_ino = 0;
556 spin_unlock(&wbuf->lock); 628 spin_unlock(&wbuf->lock);
@@ -564,33 +636,57 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
564 goto exit; 636 goto exit;
565 } 637 }
566 638
567 /* 639 offs = wbuf->offs;
568 * The node is large enough and does not fit entirely within current 640 written = 0;
569 * minimal I/O unit. We have to fill and flush write-buffer and switch
570 * to the next min. I/O unit.
571 */
572 dbg_io("flush jhead %s wbuf to LEB %d:%d",
573 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
574 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
575 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
576 c->min_io_size, wbuf->dtype);
577 if (err)
578 goto out;
579 641
580 offs = wbuf->offs + c->min_io_size; 642 if (wbuf->used) {
581 len -= wbuf->avail; 643 /*
582 aligned_len -= wbuf->avail; 644 * The node is large enough and does not fit entirely within
583 written = wbuf->avail; 645 * current available space. We have to fill and flush
646 * write-buffer and switch to the next max. write unit.
647 */
648 dbg_io("flush jhead %s wbuf to LEB %d:%d",
649 dbg_jhead(wbuf->jhead), wbuf->lnum, wbuf->offs);
650 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
651 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
652 wbuf->size, wbuf->dtype);
653 if (err)
654 goto out;
655
656 offs += wbuf->size;
657 len -= wbuf->avail;
658 aligned_len -= wbuf->avail;
659 written += wbuf->avail;
660 } else if (wbuf->offs & (c->max_write_size - 1)) {
661 /*
662 * The write-buffer offset is not aligned to
663 * @c->max_write_size and @wbuf->size is less than
664 * @c->max_write_size. Write @wbuf->size bytes to make sure the
665 * following writes are done in optimal @c->max_write_size
666 * chunks.
667 */
668 dbg_io("write %d bytes to LEB %d:%d",
669 wbuf->size, wbuf->lnum, wbuf->offs);
670 err = ubi_leb_write(c->ubi, wbuf->lnum, buf, wbuf->offs,
671 wbuf->size, wbuf->dtype);
672 if (err)
673 goto out;
674
675 offs += wbuf->size;
676 len -= wbuf->size;
677 aligned_len -= wbuf->size;
678 written += wbuf->size;
679 }
584 680
585 /* 681 /*
586 * The remaining data may take more whole min. I/O units, so write the 682 * The remaining data may take more whole max. write units, so write the
587 * remains multiple to min. I/O unit size directly to the flash media. 683 * remains multiple to max. write unit size directly to the flash media.
588 * We align node length to 8-byte boundary because we anyway flash wbuf 684 * We align node length to 8-byte boundary because we anyway flash wbuf
589 * if the remaining space is less than 8 bytes. 685 * if the remaining space is less than 8 bytes.
590 */ 686 */
591 n = aligned_len >> c->min_io_shift; 687 n = aligned_len >> c->max_write_shift;
592 if (n) { 688 if (n) {
593 n <<= c->min_io_shift; 689 n <<= c->max_write_shift;
594 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); 690 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
595 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, 691 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
596 wbuf->dtype); 692 wbuf->dtype);
@@ -606,14 +702,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
606 if (aligned_len) 702 if (aligned_len)
607 /* 703 /*
608 * And now we have what's left and what does not take whole 704 * And now we have what's left and what does not take whole
609 * min. I/O unit, so write it to the write-buffer and we are 705 * max. write unit, so write it to the write-buffer and we are
610 * done. 706 * done.
611 */ 707 */
612 memcpy(wbuf->buf, buf + written, len); 708 memcpy(wbuf->buf, buf + written, len);
613 709
614 wbuf->offs = offs; 710 wbuf->offs = offs;
711 if (c->leb_size - wbuf->offs >= c->max_write_size)
712 wbuf->size = c->max_write_size;
713 else
714 wbuf->size = c->leb_size - wbuf->offs;
715 wbuf->avail = wbuf->size - aligned_len;
615 wbuf->used = aligned_len; 716 wbuf->used = aligned_len;
616 wbuf->avail = c->min_io_size - aligned_len;
617 wbuf->next_ino = 0; 717 wbuf->next_ino = 0;
618 spin_unlock(&wbuf->lock); 718 spin_unlock(&wbuf->lock);
619 719
@@ -837,11 +937,11 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
837{ 937{
838 size_t size; 938 size_t size;
839 939
840 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL); 940 wbuf->buf = kmalloc(c->max_write_size, GFP_KERNEL);
841 if (!wbuf->buf) 941 if (!wbuf->buf)
842 return -ENOMEM; 942 return -ENOMEM;
843 943
844 size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t); 944 size = (c->max_write_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
845 wbuf->inodes = kmalloc(size, GFP_KERNEL); 945 wbuf->inodes = kmalloc(size, GFP_KERNEL);
846 if (!wbuf->inodes) { 946 if (!wbuf->inodes) {
847 kfree(wbuf->buf); 947 kfree(wbuf->buf);
@@ -851,7 +951,14 @@ int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
851 951
852 wbuf->used = 0; 952 wbuf->used = 0;
853 wbuf->lnum = wbuf->offs = -1; 953 wbuf->lnum = wbuf->offs = -1;
854 wbuf->avail = c->min_io_size; 954 /*
955 * If the LEB starts at the max. write size aligned address, then
956 * write-buffer size has to be set to @c->max_write_size. Otherwise,
957 * set it to something smaller so that it ends at the closest max.
958 * write size boundary.
959 */
960 size = c->max_write_size - (c->leb_start % c->max_write_size);
961 wbuf->avail = wbuf->size = size;
855 wbuf->dtype = UBI_UNKNOWN; 962 wbuf->dtype = UBI_UNKNOWN;
856 wbuf->sync_callback = NULL; 963 wbuf->sync_callback = NULL;
857 mutex_init(&wbuf->io_mutex); 964 mutex_init(&wbuf->io_mutex);
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 914f1bd89e57..aed25e864227 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -690,7 +690,7 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
690{ 690{
691 struct ubifs_data_node *data; 691 struct ubifs_data_node *data;
692 int err, lnum, offs, compr_type, out_len; 692 int err, lnum, offs, compr_type, out_len;
693 int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR; 693 int dlen = COMPRESSED_DATA_NODE_BUF_SZ, allocated = 1;
694 struct ubifs_inode *ui = ubifs_inode(inode); 694 struct ubifs_inode *ui = ubifs_inode(inode);
695 695
696 dbg_jnl("ino %lu, blk %u, len %d, key %s", 696 dbg_jnl("ino %lu, blk %u, len %d, key %s",
@@ -698,9 +698,19 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
698 DBGKEY(key)); 698 DBGKEY(key));
699 ubifs_assert(len <= UBIFS_BLOCK_SIZE); 699 ubifs_assert(len <= UBIFS_BLOCK_SIZE);
700 700
701 data = kmalloc(dlen, GFP_NOFS); 701 data = kmalloc(dlen, GFP_NOFS | __GFP_NOWARN);
702 if (!data) 702 if (!data) {
703 return -ENOMEM; 703 /*
704 * Fall-back to the write reserve buffer. Note, we might be
705 * currently on the memory reclaim path, when the kernel is
706 * trying to free some memory by writing out dirty pages. The
707 * write reserve buffer helps us to guarantee that we are
708 * always able to write the data.
709 */
710 allocated = 0;
711 mutex_lock(&c->write_reserve_mutex);
712 data = c->write_reserve_buf;
713 }
704 714
705 data->ch.node_type = UBIFS_DATA_NODE; 715 data->ch.node_type = UBIFS_DATA_NODE;
706 key_write(c, key, &data->key); 716 key_write(c, key, &data->key);
@@ -736,7 +746,10 @@ int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
736 goto out_ro; 746 goto out_ro;
737 747
738 finish_reservation(c); 748 finish_reservation(c);
739 kfree(data); 749 if (!allocated)
750 mutex_unlock(&c->write_reserve_mutex);
751 else
752 kfree(data);
740 return 0; 753 return 0;
741 754
742out_release: 755out_release:
@@ -745,7 +758,10 @@ out_ro:
745 ubifs_ro_mode(c, err); 758 ubifs_ro_mode(c, err);
746 finish_reservation(c); 759 finish_reservation(c);
747out_free: 760out_free:
748 kfree(data); 761 if (!allocated)
762 mutex_unlock(&c->write_reserve_mutex);
763 else
764 kfree(data);
749 return err; 765 return err;
750} 766}
751 767
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
index 4d4ca388889b..c7b25e2f7764 100644
--- a/fs/ubifs/lprops.c
+++ b/fs/ubifs/lprops.c
@@ -1035,7 +1035,8 @@ static int scan_check_cb(struct ubifs_info *c,
1035 struct ubifs_scan_leb *sleb; 1035 struct ubifs_scan_leb *sleb;
1036 struct ubifs_scan_node *snod; 1036 struct ubifs_scan_node *snod;
1037 struct ubifs_lp_stats *lst = &data->lst; 1037 struct ubifs_lp_stats *lst = &data->lst;
1038 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty; 1038 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret;
1039 void *buf = NULL;
1039 1040
1040 cat = lp->flags & LPROPS_CAT_MASK; 1041 cat = lp->flags & LPROPS_CAT_MASK;
1041 if (cat != LPROPS_UNCAT) { 1042 if (cat != LPROPS_UNCAT) {
@@ -1093,7 +1094,13 @@ static int scan_check_cb(struct ubifs_info *c,
1093 } 1094 }
1094 } 1095 }
1095 1096
1096 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 1097 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
1098 if (!buf) {
1099 ubifs_err("cannot allocate memory to scan LEB %d", lnum);
1100 goto out;
1101 }
1102
1103 sleb = ubifs_scan(c, lnum, 0, buf, 0);
1097 if (IS_ERR(sleb)) { 1104 if (IS_ERR(sleb)) {
1098 /* 1105 /*
1099 * After an unclean unmount, empty and freeable LEBs 1106 * After an unclean unmount, empty and freeable LEBs
@@ -1105,7 +1112,8 @@ static int scan_check_cb(struct ubifs_info *c,
1105 lst->empty_lebs += 1; 1112 lst->empty_lebs += 1;
1106 lst->total_free += c->leb_size; 1113 lst->total_free += c->leb_size;
1107 lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1114 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1108 return LPT_SCAN_CONTINUE; 1115 ret = LPT_SCAN_CONTINUE;
1116 goto exit;
1109 } 1117 }
1110 1118
1111 if (lp->free + lp->dirty == c->leb_size && 1119 if (lp->free + lp->dirty == c->leb_size &&
@@ -1115,10 +1123,12 @@ static int scan_check_cb(struct ubifs_info *c,
1115 lst->total_free += lp->free; 1123 lst->total_free += lp->free;
1116 lst->total_dirty += lp->dirty; 1124 lst->total_dirty += lp->dirty;
1117 lst->total_dark += ubifs_calc_dark(c, c->leb_size); 1125 lst->total_dark += ubifs_calc_dark(c, c->leb_size);
1118 return LPT_SCAN_CONTINUE; 1126 ret = LPT_SCAN_CONTINUE;
1127 goto exit;
1119 } 1128 }
1120 data->err = PTR_ERR(sleb); 1129 data->err = PTR_ERR(sleb);
1121 return LPT_SCAN_STOP; 1130 ret = LPT_SCAN_STOP;
1131 goto exit;
1122 } 1132 }
1123 1133
1124 is_idx = -1; 1134 is_idx = -1;
@@ -1236,7 +1246,10 @@ static int scan_check_cb(struct ubifs_info *c,
1236 } 1246 }
1237 1247
1238 ubifs_scan_destroy(sleb); 1248 ubifs_scan_destroy(sleb);
1239 return LPT_SCAN_CONTINUE; 1249 ret = LPT_SCAN_CONTINUE;
1250exit:
1251 vfree(buf);
1252 return ret;
1240 1253
1241out_print: 1254out_print:
1242 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " 1255 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
@@ -1246,6 +1259,7 @@ out_print:
1246out_destroy: 1259out_destroy:
1247 ubifs_scan_destroy(sleb); 1260 ubifs_scan_destroy(sleb);
1248out: 1261out:
1262 vfree(buf);
1249 data->err = -EINVAL; 1263 data->err = -EINVAL;
1250 return LPT_SCAN_STOP; 1264 return LPT_SCAN_STOP;
1251} 1265}
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
index 5c90dec5db0b..0a3c2c3f5c4a 100644
--- a/fs/ubifs/lpt_commit.c
+++ b/fs/ubifs/lpt_commit.c
@@ -1628,29 +1628,35 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1628{ 1628{
1629 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len; 1629 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
1630 int ret; 1630 int ret;
1631 void *buf = c->dbg->buf; 1631 void *buf, *p;
1632 1632
1633 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) 1633 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1634 return 0; 1634 return 0;
1635 1635
1636 buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
1637 if (!buf) {
1638 ubifs_err("cannot allocate memory for ltab checking");
1639 return 0;
1640 }
1641
1636 dbg_lp("LEB %d", lnum); 1642 dbg_lp("LEB %d", lnum);
1637 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1643 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1638 if (err) { 1644 if (err) {
1639 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err); 1645 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
1640 return err; 1646 goto out;
1641 } 1647 }
1642 while (1) { 1648 while (1) {
1643 if (!is_a_node(c, buf, len)) { 1649 if (!is_a_node(c, p, len)) {
1644 int i, pad_len; 1650 int i, pad_len;
1645 1651
1646 pad_len = get_pad_len(c, buf, len); 1652 pad_len = get_pad_len(c, p, len);
1647 if (pad_len) { 1653 if (pad_len) {
1648 buf += pad_len; 1654 p += pad_len;
1649 len -= pad_len; 1655 len -= pad_len;
1650 dirty += pad_len; 1656 dirty += pad_len;
1651 continue; 1657 continue;
1652 } 1658 }
1653 if (!dbg_is_all_ff(buf, len)) { 1659 if (!dbg_is_all_ff(p, len)) {
1654 dbg_msg("invalid empty space in LEB %d at %d", 1660 dbg_msg("invalid empty space in LEB %d at %d",
1655 lnum, c->leb_size - len); 1661 lnum, c->leb_size - len);
1656 err = -EINVAL; 1662 err = -EINVAL;
@@ -1668,16 +1674,21 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1668 lnum, dirty, c->ltab[i].dirty); 1674 lnum, dirty, c->ltab[i].dirty);
1669 err = -EINVAL; 1675 err = -EINVAL;
1670 } 1676 }
1671 return err; 1677 goto out;
1672 } 1678 }
1673 node_type = get_lpt_node_type(c, buf, &node_num); 1679 node_type = get_lpt_node_type(c, p, &node_num);
1674 node_len = get_lpt_node_len(c, node_type); 1680 node_len = get_lpt_node_len(c, node_type);
1675 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len); 1681 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
1676 if (ret == 1) 1682 if (ret == 1)
1677 dirty += node_len; 1683 dirty += node_len;
1678 buf += node_len; 1684 p += node_len;
1679 len -= node_len; 1685 len -= node_len;
1680 } 1686 }
1687
1688 err = 0;
1689out:
1690 vfree(buf);
1691 return err;
1681} 1692}
1682 1693
1683/** 1694/**
@@ -1870,25 +1881,31 @@ int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len)
1870static void dump_lpt_leb(const struct ubifs_info *c, int lnum) 1881static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1871{ 1882{
1872 int err, len = c->leb_size, node_type, node_num, node_len, offs; 1883 int err, len = c->leb_size, node_type, node_num, node_len, offs;
1873 void *buf = c->dbg->buf; 1884 void *buf, *p;
1874 1885
1875 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n", 1886 printk(KERN_DEBUG "(pid %d) start dumping LEB %d\n",
1876 current->pid, lnum); 1887 current->pid, lnum);
1888 buf = p = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
1889 if (!buf) {
1890 ubifs_err("cannot allocate memory to dump LPT");
1891 return;
1892 }
1893
1877 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size); 1894 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1878 if (err) { 1895 if (err) {
1879 ubifs_err("cannot read LEB %d, error %d", lnum, err); 1896 ubifs_err("cannot read LEB %d, error %d", lnum, err);
1880 return; 1897 goto out;
1881 } 1898 }
1882 while (1) { 1899 while (1) {
1883 offs = c->leb_size - len; 1900 offs = c->leb_size - len;
1884 if (!is_a_node(c, buf, len)) { 1901 if (!is_a_node(c, p, len)) {
1885 int pad_len; 1902 int pad_len;
1886 1903
1887 pad_len = get_pad_len(c, buf, len); 1904 pad_len = get_pad_len(c, p, len);
1888 if (pad_len) { 1905 if (pad_len) {
1889 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n", 1906 printk(KERN_DEBUG "LEB %d:%d, pad %d bytes\n",
1890 lnum, offs, pad_len); 1907 lnum, offs, pad_len);
1891 buf += pad_len; 1908 p += pad_len;
1892 len -= pad_len; 1909 len -= pad_len;
1893 continue; 1910 continue;
1894 } 1911 }
@@ -1898,7 +1915,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1898 break; 1915 break;
1899 } 1916 }
1900 1917
1901 node_type = get_lpt_node_type(c, buf, &node_num); 1918 node_type = get_lpt_node_type(c, p, &node_num);
1902 switch (node_type) { 1919 switch (node_type) {
1903 case UBIFS_LPT_PNODE: 1920 case UBIFS_LPT_PNODE:
1904 { 1921 {
@@ -1923,7 +1940,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1923 else 1940 else
1924 printk(KERN_DEBUG "LEB %d:%d, nnode, ", 1941 printk(KERN_DEBUG "LEB %d:%d, nnode, ",
1925 lnum, offs); 1942 lnum, offs);
1926 err = ubifs_unpack_nnode(c, buf, &nnode); 1943 err = ubifs_unpack_nnode(c, p, &nnode);
1927 for (i = 0; i < UBIFS_LPT_FANOUT; i++) { 1944 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1928 printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum, 1945 printk(KERN_CONT "%d:%d", nnode.nbranch[i].lnum,
1929 nnode.nbranch[i].offs); 1946 nnode.nbranch[i].offs);
@@ -1944,15 +1961,18 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum)
1944 break; 1961 break;
1945 default: 1962 default:
1946 ubifs_err("LPT node type %d not recognized", node_type); 1963 ubifs_err("LPT node type %d not recognized", node_type);
1947 return; 1964 goto out;
1948 } 1965 }
1949 1966
1950 buf += node_len; 1967 p += node_len;
1951 len -= node_len; 1968 len -= node_len;
1952 } 1969 }
1953 1970
1954 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n", 1971 printk(KERN_DEBUG "(pid %d) finish dumping LEB %d\n",
1955 current->pid, lnum); 1972 current->pid, lnum);
1973out:
1974 vfree(buf);
1975 return;
1956} 1976}
1957 1977
1958/** 1978/**
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 82009c74b6a3..2cdbd31641d7 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -892,15 +892,22 @@ static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
892static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) 892static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
893{ 893{
894 int lnum, err = 0; 894 int lnum, err = 0;
895 void *buf;
895 896
896 /* Check no-orphans flag and skip this if no orphans */ 897 /* Check no-orphans flag and skip this if no orphans */
897 if (c->no_orphs) 898 if (c->no_orphs)
898 return 0; 899 return 0;
899 900
901 buf = __vmalloc(c->leb_size, GFP_KERNEL | GFP_NOFS, PAGE_KERNEL);
902 if (!buf) {
903 ubifs_err("cannot allocate memory to check orphans");
904 return 0;
905 }
906
900 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { 907 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
901 struct ubifs_scan_leb *sleb; 908 struct ubifs_scan_leb *sleb;
902 909
903 sleb = ubifs_scan(c, lnum, 0, c->dbg->buf, 0); 910 sleb = ubifs_scan(c, lnum, 0, buf, 0);
904 if (IS_ERR(sleb)) { 911 if (IS_ERR(sleb)) {
905 err = PTR_ERR(sleb); 912 err = PTR_ERR(sleb);
906 break; 913 break;
@@ -912,6 +919,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
912 break; 919 break;
913 } 920 }
914 921
922 vfree(buf);
915 return err; 923 return err;
916} 924}
917 925
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index 77e9b874b6c2..936f2cbfe6b6 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -28,6 +28,23 @@
28 * UBIFS always cleans away all remnants of an unclean un-mount, so that 28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
29 * errors do not accumulate. However UBIFS defers recovery if it is mounted 29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
30 * read-only, and the flash is not modified in that case. 30 * read-only, and the flash is not modified in that case.
31 *
32 * The general UBIFS approach to the recovery is that it recovers from
33 * corruptions which could be caused by power cuts, but it refuses to recover
34 * from corruption caused by other reasons. And UBIFS tries to distinguish
35 * between these 2 reasons of corruptions and silently recover in the former
36 * case and loudly complain in the latter case.
37 *
38 * UBIFS writes only to erased LEBs, so it writes only to the flash space
39 * containing only 0xFFs. UBIFS also always writes strictly from the beginning
40 * of the LEB to the end. And UBIFS assumes that the underlying flash media
41 * writes in @c->max_write_size bytes at a time.
42 *
43 * Hence, if UBIFS finds a corrupted node at offset X, it expects only the min.
44 * I/O unit corresponding to offset X to contain corrupted data, all the
45 * following min. I/O units have to contain empty space (all 0xFFs). If this is
46 * not true, the corruption cannot be the result of a power cut, and UBIFS
47 * refuses to mount.
31 */ 48 */
32 49
33#include <linux/crc32.h> 50#include <linux/crc32.h>
@@ -362,8 +379,9 @@ int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
362 * @offs: offset to check 379 * @offs: offset to check
363 * 380 *
364 * This function returns %1 if @offs was in the last write to the LEB whose data 381 * This function returns %1 if @offs was in the last write to the LEB whose data
365 * is in @buf, otherwise %0 is returned. The determination is made by checking 382 * is in @buf, otherwise %0 is returned. The determination is made by checking
366 * for subsequent empty space starting from the next @c->min_io_size boundary. 383 * for subsequent empty space starting from the next @c->max_write_size
384 * boundary.
367 */ 385 */
368static int is_last_write(const struct ubifs_info *c, void *buf, int offs) 386static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
369{ 387{
@@ -371,10 +389,10 @@ static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
371 uint8_t *p; 389 uint8_t *p;
372 390
373 /* 391 /*
374 * Round up to the next @c->min_io_size boundary i.e. @offs is in the 392 * Round up to the next @c->max_write_size boundary i.e. @offs is in
375 * last wbuf written. After that should be empty space. 393 * the last wbuf written. After that should be empty space.
376 */ 394 */
377 empty_offs = ALIGN(offs + 1, c->min_io_size); 395 empty_offs = ALIGN(offs + 1, c->max_write_size);
378 check_len = c->leb_size - empty_offs; 396 check_len = c->leb_size - empty_offs;
379 p = buf + empty_offs - offs; 397 p = buf + empty_offs - offs;
380 return is_empty(p, check_len); 398 return is_empty(p, check_len);
@@ -429,7 +447,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
429 int skip, dlen = le32_to_cpu(ch->len); 447 int skip, dlen = le32_to_cpu(ch->len);
430 448
431 /* Check for empty space after the corrupt node's common header */ 449 /* Check for empty space after the corrupt node's common header */
432 skip = ALIGN(offs + UBIFS_CH_SZ, c->min_io_size) - offs; 450 skip = ALIGN(offs + UBIFS_CH_SZ, c->max_write_size) - offs;
433 if (is_empty(buf + skip, len - skip)) 451 if (is_empty(buf + skip, len - skip))
434 return 1; 452 return 1;
435 /* 453 /*
@@ -441,7 +459,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
441 return 0; 459 return 0;
442 } 460 }
443 /* Now we know the corrupt node's length we can skip over it */ 461 /* Now we know the corrupt node's length we can skip over it */
444 skip = ALIGN(offs + dlen, c->min_io_size) - offs; 462 skip = ALIGN(offs + dlen, c->max_write_size) - offs;
445 /* After which there should be empty space */ 463 /* After which there should be empty space */
446 if (is_empty(buf + skip, len - skip)) 464 if (is_empty(buf + skip, len - skip))
447 return 1; 465 return 1;
@@ -671,10 +689,14 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
671 } else { 689 } else {
672 int corruption = first_non_ff(buf, len); 690 int corruption = first_non_ff(buf, len);
673 691
692 /*
693 * See header comment for this file for more
694 * explanations about the reasons we have this check.
695 */
674 ubifs_err("corrupt empty space LEB %d:%d, corruption " 696 ubifs_err("corrupt empty space LEB %d:%d, corruption "
675 "starts at %d", lnum, offs, corruption); 697 "starts at %d", lnum, offs, corruption);
676 /* Make sure we dump interesting non-0xFF data */ 698 /* Make sure we dump interesting non-0xFF data */
677 offs = corruption; 699 offs += corruption;
678 buf += corruption; 700 buf += corruption;
679 goto corrupted; 701 goto corrupted;
680 } 702 }
@@ -836,12 +858,8 @@ struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
836static int recover_head(const struct ubifs_info *c, int lnum, int offs, 858static int recover_head(const struct ubifs_info *c, int lnum, int offs,
837 void *sbuf) 859 void *sbuf)
838{ 860{
839 int len, err; 861 int len = c->max_write_size, err;
840 862
841 if (c->min_io_size > 1)
842 len = c->min_io_size;
843 else
844 len = 512;
845 if (offs + len > c->leb_size) 863 if (offs + len > c->leb_size)
846 len = c->leb_size - offs; 864 len = c->leb_size - offs;
847 865
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index 3e1ee57dbeaa..36216b46f772 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -328,7 +328,7 @@ struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
328 if (!quiet) 328 if (!quiet)
329 ubifs_err("empty space starts at non-aligned offset %d", 329 ubifs_err("empty space starts at non-aligned offset %d",
330 offs); 330 offs);
331 goto corrupted;; 331 goto corrupted;
332 } 332 }
333 333
334 ubifs_end_scan(c, sleb, lnum, offs); 334 ubifs_end_scan(c, sleb, lnum, offs);
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 6e11c2975dcf..e5dc1e120e8d 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -512,9 +512,12 @@ static int init_constants_early(struct ubifs_info *c)
512 512
513 c->leb_cnt = c->vi.size; 513 c->leb_cnt = c->vi.size;
514 c->leb_size = c->vi.usable_leb_size; 514 c->leb_size = c->vi.usable_leb_size;
515 c->leb_start = c->di.leb_start;
515 c->half_leb_size = c->leb_size / 2; 516 c->half_leb_size = c->leb_size / 2;
516 c->min_io_size = c->di.min_io_size; 517 c->min_io_size = c->di.min_io_size;
517 c->min_io_shift = fls(c->min_io_size) - 1; 518 c->min_io_shift = fls(c->min_io_size) - 1;
519 c->max_write_size = c->di.max_write_size;
520 c->max_write_shift = fls(c->max_write_size) - 1;
518 521
519 if (c->leb_size < UBIFS_MIN_LEB_SZ) { 522 if (c->leb_size < UBIFS_MIN_LEB_SZ) {
520 ubifs_err("too small LEBs (%d bytes), min. is %d bytes", 523 ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
@@ -534,6 +537,18 @@ static int init_constants_early(struct ubifs_info *c)
534 } 537 }
535 538
536 /* 539 /*
540 * Maximum write size has to be greater or equivalent to min. I/O
541 * size, and be multiple of min. I/O size.
542 */
543 if (c->max_write_size < c->min_io_size ||
544 c->max_write_size % c->min_io_size ||
545 !is_power_of_2(c->max_write_size)) {
546 ubifs_err("bad write buffer size %d for %d min. I/O unit",
547 c->max_write_size, c->min_io_size);
548 return -EINVAL;
549 }
550
551 /*
537 * UBIFS aligns all node to 8-byte boundary, so to make function in 552 * UBIFS aligns all node to 8-byte boundary, so to make function in
538 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is 553 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
539 * less than 8. 554 * less than 8.
@@ -541,6 +556,10 @@ static int init_constants_early(struct ubifs_info *c)
541 if (c->min_io_size < 8) { 556 if (c->min_io_size < 8) {
542 c->min_io_size = 8; 557 c->min_io_size = 8;
543 c->min_io_shift = 3; 558 c->min_io_shift = 3;
559 if (c->max_write_size < c->min_io_size) {
560 c->max_write_size = c->min_io_size;
561 c->max_write_shift = c->min_io_shift;
562 }
544 } 563 }
545 564
546 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size); 565 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
@@ -1202,11 +1221,14 @@ static int mount_ubifs(struct ubifs_info *c)
1202 if (c->bulk_read == 1) 1221 if (c->bulk_read == 1)
1203 bu_init(c); 1222 bu_init(c);
1204 1223
1205 /* 1224 if (!c->ro_mount) {
1206 * We have to check all CRCs, even for data nodes, when we mount the FS 1225 c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ,
1207 * (specifically, when we are replaying). 1226 GFP_KERNEL);
1208 */ 1227 if (!c->write_reserve_buf)
1209 c->always_chk_crc = 1; 1228 goto out_free;
1229 }
1230
1231 c->mounting = 1;
1210 1232
1211 err = ubifs_read_superblock(c); 1233 err = ubifs_read_superblock(c);
1212 if (err) 1234 if (err)
@@ -1382,7 +1404,7 @@ static int mount_ubifs(struct ubifs_info *c)
1382 if (err) 1404 if (err)
1383 goto out_infos; 1405 goto out_infos;
1384 1406
1385 c->always_chk_crc = 0; 1407 c->mounting = 0;
1386 1408
1387 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", 1409 ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"",
1388 c->vi.ubi_num, c->vi.vol_id, c->vi.name); 1410 c->vi.ubi_num, c->vi.vol_id, c->vi.name);
@@ -1403,6 +1425,7 @@ static int mount_ubifs(struct ubifs_info *c)
1403 1425
1404 dbg_msg("compiled on: " __DATE__ " at " __TIME__); 1426 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
1405 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); 1427 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
1428 dbg_msg("max. write size: %d bytes", c->max_write_size);
1406 dbg_msg("LEB size: %d bytes (%d KiB)", 1429 dbg_msg("LEB size: %d bytes (%d KiB)",
1407 c->leb_size, c->leb_size >> 10); 1430 c->leb_size, c->leb_size >> 10);
1408 dbg_msg("data journal heads: %d", 1431 dbg_msg("data journal heads: %d",
@@ -1432,9 +1455,9 @@ static int mount_ubifs(struct ubifs_info *c)
1432 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ); 1455 UBIFS_TRUN_NODE_SZ, UBIFS_SB_NODE_SZ, UBIFS_MST_NODE_SZ);
1433 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", 1456 dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu",
1434 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); 1457 UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ);
1435 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu", 1458 dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d",
1436 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, 1459 UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ,
1437 UBIFS_MAX_DENT_NODE_SZ); 1460 UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout));
1438 dbg_msg("dead watermark: %d", c->dead_wm); 1461 dbg_msg("dead watermark: %d", c->dead_wm);
1439 dbg_msg("dark watermark: %d", c->dark_wm); 1462 dbg_msg("dark watermark: %d", c->dark_wm);
1440 dbg_msg("LEB overhead: %d", c->leb_overhead); 1463 dbg_msg("LEB overhead: %d", c->leb_overhead);
@@ -1474,6 +1497,7 @@ out_wbufs:
1474out_cbuf: 1497out_cbuf:
1475 kfree(c->cbuf); 1498 kfree(c->cbuf);
1476out_free: 1499out_free:
1500 kfree(c->write_reserve_buf);
1477 kfree(c->bu.buf); 1501 kfree(c->bu.buf);
1478 vfree(c->ileb_buf); 1502 vfree(c->ileb_buf);
1479 vfree(c->sbuf); 1503 vfree(c->sbuf);
@@ -1512,6 +1536,7 @@ static void ubifs_umount(struct ubifs_info *c)
1512 kfree(c->cbuf); 1536 kfree(c->cbuf);
1513 kfree(c->rcvrd_mst_node); 1537 kfree(c->rcvrd_mst_node);
1514 kfree(c->mst_node); 1538 kfree(c->mst_node);
1539 kfree(c->write_reserve_buf);
1515 kfree(c->bu.buf); 1540 kfree(c->bu.buf);
1516 vfree(c->ileb_buf); 1541 vfree(c->ileb_buf);
1517 vfree(c->sbuf); 1542 vfree(c->sbuf);
@@ -1543,7 +1568,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1543 mutex_lock(&c->umount_mutex); 1568 mutex_lock(&c->umount_mutex);
1544 dbg_save_space_info(c); 1569 dbg_save_space_info(c);
1545 c->remounting_rw = 1; 1570 c->remounting_rw = 1;
1546 c->always_chk_crc = 1;
1547 1571
1548 err = check_free_space(c); 1572 err = check_free_space(c);
1549 if (err) 1573 if (err)
@@ -1598,6 +1622,10 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1598 goto out; 1622 goto out;
1599 } 1623 }
1600 1624
1625 c->write_reserve_buf = kmalloc(COMPRESSED_DATA_NODE_BUF_SZ, GFP_KERNEL);
1626 if (!c->write_reserve_buf)
1627 goto out;
1628
1601 err = ubifs_lpt_init(c, 0, 1); 1629 err = ubifs_lpt_init(c, 0, 1);
1602 if (err) 1630 if (err)
1603 goto out; 1631 goto out;
@@ -1650,7 +1678,6 @@ static int ubifs_remount_rw(struct ubifs_info *c)
1650 dbg_gen("re-mounted read-write"); 1678 dbg_gen("re-mounted read-write");
1651 c->ro_mount = 0; 1679 c->ro_mount = 0;
1652 c->remounting_rw = 0; 1680 c->remounting_rw = 0;
1653 c->always_chk_crc = 0;
1654 err = dbg_check_space_info(c); 1681 err = dbg_check_space_info(c);
1655 mutex_unlock(&c->umount_mutex); 1682 mutex_unlock(&c->umount_mutex);
1656 return err; 1683 return err;
@@ -1663,11 +1690,12 @@ out:
1663 c->bgt = NULL; 1690 c->bgt = NULL;
1664 } 1691 }
1665 free_wbufs(c); 1692 free_wbufs(c);
1693 kfree(c->write_reserve_buf);
1694 c->write_reserve_buf = NULL;
1666 vfree(c->ileb_buf); 1695 vfree(c->ileb_buf);
1667 c->ileb_buf = NULL; 1696 c->ileb_buf = NULL;
1668 ubifs_lpt_free(c, 1); 1697 ubifs_lpt_free(c, 1);
1669 c->remounting_rw = 0; 1698 c->remounting_rw = 0;
1670 c->always_chk_crc = 0;
1671 mutex_unlock(&c->umount_mutex); 1699 mutex_unlock(&c->umount_mutex);
1672 return err; 1700 return err;
1673} 1701}
@@ -1707,6 +1735,8 @@ static void ubifs_remount_ro(struct ubifs_info *c)
1707 free_wbufs(c); 1735 free_wbufs(c);
1708 vfree(c->orph_buf); 1736 vfree(c->orph_buf);
1709 c->orph_buf = NULL; 1737 c->orph_buf = NULL;
1738 kfree(c->write_reserve_buf);
1739 c->write_reserve_buf = NULL;
1710 vfree(c->ileb_buf); 1740 vfree(c->ileb_buf);
1711 c->ileb_buf = NULL; 1741 c->ileb_buf = NULL;
1712 ubifs_lpt_free(c, 1); 1742 ubifs_lpt_free(c, 1);
@@ -1937,6 +1967,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1937 mutex_init(&c->mst_mutex); 1967 mutex_init(&c->mst_mutex);
1938 mutex_init(&c->umount_mutex); 1968 mutex_init(&c->umount_mutex);
1939 mutex_init(&c->bu_mutex); 1969 mutex_init(&c->bu_mutex);
1970 mutex_init(&c->write_reserve_mutex);
1940 init_waitqueue_head(&c->cmt_wq); 1971 init_waitqueue_head(&c->cmt_wq);
1941 c->buds = RB_ROOT; 1972 c->buds = RB_ROOT;
1942 c->old_idx = RB_ROOT; 1973 c->old_idx = RB_ROOT;
@@ -1954,6 +1985,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1954 INIT_LIST_HEAD(&c->old_buds); 1985 INIT_LIST_HEAD(&c->old_buds);
1955 INIT_LIST_HEAD(&c->orph_list); 1986 INIT_LIST_HEAD(&c->orph_list);
1956 INIT_LIST_HEAD(&c->orph_new); 1987 INIT_LIST_HEAD(&c->orph_new);
1988 c->no_chk_data_crc = 1;
1957 1989
1958 c->vfs_sb = sb; 1990 c->vfs_sb = sb;
1959 c->highest_inum = UBIFS_FIRST_INO; 1991 c->highest_inum = UBIFS_FIRST_INO;
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index ad9cf0133622..de485979ca39 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -447,8 +447,11 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
447 * 447 *
448 * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc 448 * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
449 * is true (it is controlled by corresponding mount option). However, if 449 * is true (it is controlled by corresponding mount option). However, if
450 * @c->always_chk_crc is true, @c->no_chk_data_crc is ignored and CRC is always 450 * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to
451 * checked. 451 * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is
452 * because during mounting or re-mounting from R/O mode to R/W mode we may read
453 * journal nodes (when replying the journal or doing the recovery) and the
454 * journal nodes may potentially be corrupted, so checking is required.
452 */ 455 */
453static int try_read_node(const struct ubifs_info *c, void *buf, int type, 456static int try_read_node(const struct ubifs_info *c, void *buf, int type,
454 int len, int lnum, int offs) 457 int len, int lnum, int offs)
@@ -476,7 +479,8 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type,
476 if (node_len != len) 479 if (node_len != len)
477 return 0; 480 return 0;
478 481
479 if (type == UBIFS_DATA_NODE && !c->always_chk_crc && c->no_chk_data_crc) 482 if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
483 !c->remounting_rw)
480 return 1; 484 return 1;
481 485
482 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); 486 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 381d6b207a52..8c40ad3c6721 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -151,6 +151,12 @@
151 */ 151 */
152#define WORST_COMPR_FACTOR 2 152#define WORST_COMPR_FACTOR 2
153 153
154/*
155 * How much memory is needed for a buffer where we comress a data node.
156 */
157#define COMPRESSED_DATA_NODE_BUF_SZ \
158 (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR)
159
154/* Maximum expected tree height for use by bottom_up_buf */ 160/* Maximum expected tree height for use by bottom_up_buf */
155#define BOTTOM_UP_HEIGHT 64 161#define BOTTOM_UP_HEIGHT 64
156 162
@@ -646,6 +652,7 @@ typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
646 * @offs: write-buffer offset in this logical eraseblock 652 * @offs: write-buffer offset in this logical eraseblock
647 * @avail: number of bytes available in the write-buffer 653 * @avail: number of bytes available in the write-buffer
648 * @used: number of used bytes in the write-buffer 654 * @used: number of used bytes in the write-buffer
655 * @size: write-buffer size (in [@c->min_io_size, @c->max_write_size] range)
649 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM, 656 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
650 * %UBI_UNKNOWN) 657 * %UBI_UNKNOWN)
651 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep 658 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
@@ -680,6 +687,7 @@ struct ubifs_wbuf {
680 int offs; 687 int offs;
681 int avail; 688 int avail;
682 int used; 689 int used;
690 int size;
683 int dtype; 691 int dtype;
684 int jhead; 692 int jhead;
685 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad); 693 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
@@ -1003,6 +1011,11 @@ struct ubifs_debug_info;
1003 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu 1011 * @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
1004 * @bu: pre-allocated bulk-read information 1012 * @bu: pre-allocated bulk-read information
1005 * 1013 *
1014 * @write_reserve_mutex: protects @write_reserve_buf
1015 * @write_reserve_buf: on the write path we allocate memory, which might
1016 * sometimes be unavailable, in which case we use this
1017 * write reserve buffer
1018 *
1006 * @log_lebs: number of logical eraseblocks in the log 1019 * @log_lebs: number of logical eraseblocks in the log
1007 * @log_bytes: log size in bytes 1020 * @log_bytes: log size in bytes
1008 * @log_last: last LEB of the log 1021 * @log_last: last LEB of the log
@@ -1024,7 +1037,12 @@ struct ubifs_debug_info;
1024 * 1037 *
1025 * @min_io_size: minimal input/output unit size 1038 * @min_io_size: minimal input/output unit size
1026 * @min_io_shift: number of bits in @min_io_size minus one 1039 * @min_io_shift: number of bits in @min_io_size minus one
1040 * @max_write_size: maximum amount of bytes the underlying flash can write at a
1041 * time (MTD write buffer size)
1042 * @max_write_shift: number of bits in @max_write_size minus one
1027 * @leb_size: logical eraseblock size in bytes 1043 * @leb_size: logical eraseblock size in bytes
1044 * @leb_start: starting offset of logical eraseblocks within physical
1045 * eraseblocks
1028 * @half_leb_size: half LEB size 1046 * @half_leb_size: half LEB size
1029 * @idx_leb_size: how many bytes of an LEB are effectively available when it is 1047 * @idx_leb_size: how many bytes of an LEB are effectively available when it is
1030 * used to store indexing nodes (@leb_size - @max_idx_node_sz) 1048 * used to store indexing nodes (@leb_size - @max_idx_node_sz)
@@ -1166,22 +1184,21 @@ struct ubifs_debug_info;
1166 * @rp_uid: reserved pool user ID 1184 * @rp_uid: reserved pool user ID
1167 * @rp_gid: reserved pool group ID 1185 * @rp_gid: reserved pool group ID
1168 * 1186 *
1169 * @empty: if the UBI device is empty 1187 * @empty: %1 if the UBI device is empty
1188 * @need_recovery: %1 if the file-system needs recovery
1189 * @replaying: %1 during journal replay
1190 * @mounting: %1 while mounting
1191 * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode
1170 * @replay_tree: temporary tree used during journal replay 1192 * @replay_tree: temporary tree used during journal replay
1171 * @replay_list: temporary list used during journal replay 1193 * @replay_list: temporary list used during journal replay
1172 * @replay_buds: list of buds to replay 1194 * @replay_buds: list of buds to replay
1173 * @cs_sqnum: sequence number of first node in the log (commit start node) 1195 * @cs_sqnum: sequence number of first node in the log (commit start node)
1174 * @replay_sqnum: sequence number of node currently being replayed 1196 * @replay_sqnum: sequence number of node currently being replayed
1175 * @need_recovery: file-system needs recovery
1176 * @replaying: set to %1 during journal replay
1177 * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W 1197 * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W
1178 * mode 1198 * mode
1179 * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted 1199 * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted
1180 * FS to R/W mode 1200 * FS to R/W mode
1181 * @size_tree: inode size information for recovery 1201 * @size_tree: inode size information for recovery
1182 * @remounting_rw: set while re-mounting from R/O mode to R/W mode
1183 * @always_chk_crc: always check CRCs (while mounting and remounting to R/W
1184 * mode)
1185 * @mount_opts: UBIFS-specific mount options 1202 * @mount_opts: UBIFS-specific mount options
1186 * 1203 *
1187 * @dbg: debugging-related information 1204 * @dbg: debugging-related information
@@ -1250,6 +1267,9 @@ struct ubifs_info {
1250 struct mutex bu_mutex; 1267 struct mutex bu_mutex;
1251 struct bu_info bu; 1268 struct bu_info bu;
1252 1269
1270 struct mutex write_reserve_mutex;
1271 void *write_reserve_buf;
1272
1253 int log_lebs; 1273 int log_lebs;
1254 long long log_bytes; 1274 long long log_bytes;
1255 int log_last; 1275 int log_last;
@@ -1271,7 +1291,10 @@ struct ubifs_info {
1271 1291
1272 int min_io_size; 1292 int min_io_size;
1273 int min_io_shift; 1293 int min_io_shift;
1294 int max_write_size;
1295 int max_write_shift;
1274 int leb_size; 1296 int leb_size;
1297 int leb_start;
1275 int half_leb_size; 1298 int half_leb_size;
1276 int idx_leb_size; 1299 int idx_leb_size;
1277 int leb_cnt; 1300 int leb_cnt;
@@ -1402,19 +1425,19 @@ struct ubifs_info {
1402 gid_t rp_gid; 1425 gid_t rp_gid;
1403 1426
1404 /* The below fields are used only during mounting and re-mounting */ 1427 /* The below fields are used only during mounting and re-mounting */
1405 int empty; 1428 unsigned int empty:1;
1429 unsigned int need_recovery:1;
1430 unsigned int replaying:1;
1431 unsigned int mounting:1;
1432 unsigned int remounting_rw:1;
1406 struct rb_root replay_tree; 1433 struct rb_root replay_tree;
1407 struct list_head replay_list; 1434 struct list_head replay_list;
1408 struct list_head replay_buds; 1435 struct list_head replay_buds;
1409 unsigned long long cs_sqnum; 1436 unsigned long long cs_sqnum;
1410 unsigned long long replay_sqnum; 1437 unsigned long long replay_sqnum;
1411 int need_recovery;
1412 int replaying;
1413 struct list_head unclean_leb_list; 1438 struct list_head unclean_leb_list;
1414 struct ubifs_mst_node *rcvrd_mst_node; 1439 struct ubifs_mst_node *rcvrd_mst_node;
1415 struct rb_root size_tree; 1440 struct rb_root size_tree;
1416 int remounting_rw;
1417 int always_chk_crc;
1418 struct ubifs_mount_opts mount_opts; 1441 struct ubifs_mount_opts mount_opts;
1419 1442
1420#ifdef CONFIG_UBIFS_FS_DEBUG 1443#ifdef CONFIG_UBIFS_FS_DEBUG