aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-16 18:02:57 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-16 18:02:57 -0400
commit9c1be0c4712fe760d8969427ef91107e9c062d91 (patch)
tree01210aba49c120116bb99ba031ff86a525ffb63d /fs
parent42fdd144a40f3afaccaa7ea538268bad3596439e (diff)
parent0d7eff873caaeac84de01a1acdca983d2c7ba3fe (diff)
Merge branch 'for_linus' of git://git.infradead.org/~dedekind/ubifs-2.6
* 'for_linus' of git://git.infradead.org/~dedekind/ubifs-2.6: UBIFS: include to compilation UBIFS: add new flash file system UBIFS: add brief documentation MAINTAINERS: add UBIFS section do_mounts: allow UBI root device name VFS: export sync_sb_inodes VFS: move inode_lock into sync_sb_inodes
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Makefile1
-rw-r--r--fs/fs-writeback.c22
-rw-r--r--fs/ubifs/Kconfig72
-rw-r--r--fs/ubifs/Makefile9
-rw-r--r--fs/ubifs/budget.c731
-rw-r--r--fs/ubifs/commit.c677
-rw-r--r--fs/ubifs/compress.c253
-rw-r--r--fs/ubifs/debug.c2289
-rw-r--r--fs/ubifs/debug.h403
-rw-r--r--fs/ubifs/dir.c1240
-rw-r--r--fs/ubifs/file.c1275
-rw-r--r--fs/ubifs/find.c975
-rw-r--r--fs/ubifs/gc.c773
-rw-r--r--fs/ubifs/io.c914
-rw-r--r--fs/ubifs/ioctl.c204
-rw-r--r--fs/ubifs/journal.c1387
-rw-r--r--fs/ubifs/key.h533
-rw-r--r--fs/ubifs/log.c805
-rw-r--r--fs/ubifs/lprops.c1357
-rw-r--r--fs/ubifs/lpt.c2243
-rw-r--r--fs/ubifs/lpt_commit.c1648
-rw-r--r--fs/ubifs/master.c387
-rw-r--r--fs/ubifs/misc.h342
-rw-r--r--fs/ubifs/orphan.c958
-rw-r--r--fs/ubifs/recovery.c1519
-rw-r--r--fs/ubifs/replay.c1075
-rw-r--r--fs/ubifs/sb.c629
-rw-r--r--fs/ubifs/scan.c362
-rw-r--r--fs/ubifs/shrinker.c322
-rw-r--r--fs/ubifs/super.c1951
-rw-r--r--fs/ubifs/tnc.c2956
-rw-r--r--fs/ubifs/tnc_commit.c1103
-rw-r--r--fs/ubifs/tnc_misc.c494
-rw-r--r--fs/ubifs/ubifs-media.h745
-rw-r--r--fs/ubifs/ubifs.h1649
-rw-r--r--fs/ubifs/xattr.c581
37 files changed, 32877 insertions, 10 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index 84ab76a206a0..17216ba99c85 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1375,6 +1375,9 @@ config JFFS2_CMODE_FAVOURLZO
1375 1375
1376endchoice 1376endchoice
1377 1377
1378# UBIFS File system configuration
1379source "fs/ubifs/Kconfig"
1380
1378config CRAMFS 1381config CRAMFS
1379 tristate "Compressed ROM file system support (cramfs)" 1382 tristate "Compressed ROM file system support (cramfs)"
1380 depends on BLOCK 1383 depends on BLOCK
diff --git a/fs/Makefile b/fs/Makefile
index 277b079dec9e..3b2178b4bb66 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -101,6 +101,7 @@ obj-$(CONFIG_NTFS_FS) += ntfs/
101obj-$(CONFIG_UFS_FS) += ufs/ 101obj-$(CONFIG_UFS_FS) += ufs/
102obj-$(CONFIG_EFS_FS) += efs/ 102obj-$(CONFIG_EFS_FS) += efs/
103obj-$(CONFIG_JFFS2_FS) += jffs2/ 103obj-$(CONFIG_JFFS2_FS) += jffs2/
104obj-$(CONFIG_UBIFS_FS) += ubifs/
104obj-$(CONFIG_AFFS_FS) += affs/ 105obj-$(CONFIG_AFFS_FS) += affs/
105obj-$(CONFIG_ROMFS_FS) += romfs/ 106obj-$(CONFIG_ROMFS_FS) += romfs/
106obj-$(CONFIG_QNX4FS_FS) += qnx4/ 107obj-$(CONFIG_QNX4FS_FS) += qnx4/
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ae45f77765c0..25adfc3c693a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -424,8 +424,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
424 * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so 424 * WB_SYNC_HOLD is a hack for sys_sync(): reattach the inode to sb->s_dirty so
425 * that it can be located for waiting on in __writeback_single_inode(). 425 * that it can be located for waiting on in __writeback_single_inode().
426 * 426 *
427 * Called under inode_lock.
428 *
429 * If `bdi' is non-zero then we're being asked to writeback a specific queue. 427 * If `bdi' is non-zero then we're being asked to writeback a specific queue.
430 * This function assumes that the blockdev superblock's inodes are backed by 428 * This function assumes that the blockdev superblock's inodes are backed by
431 * a variety of queues, so all inodes are searched. For other superblocks, 429 * a variety of queues, so all inodes are searched. For other superblocks,
@@ -441,11 +439,12 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
441 * on the writer throttling path, and we get decent balancing between many 439 * on the writer throttling path, and we get decent balancing between many
442 * throttled threads: we don't want them all piling up on inode_sync_wait. 440 * throttled threads: we don't want them all piling up on inode_sync_wait.
443 */ 441 */
444static void 442void generic_sync_sb_inodes(struct super_block *sb,
445sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc) 443 struct writeback_control *wbc)
446{ 444{
447 const unsigned long start = jiffies; /* livelock avoidance */ 445 const unsigned long start = jiffies; /* livelock avoidance */
448 446
447 spin_lock(&inode_lock);
449 if (!wbc->for_kupdate || list_empty(&sb->s_io)) 448 if (!wbc->for_kupdate || list_empty(&sb->s_io))
450 queue_io(sb, wbc->older_than_this); 449 queue_io(sb, wbc->older_than_this);
451 450
@@ -524,8 +523,16 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
524 if (!list_empty(&sb->s_more_io)) 523 if (!list_empty(&sb->s_more_io))
525 wbc->more_io = 1; 524 wbc->more_io = 1;
526 } 525 }
526 spin_unlock(&inode_lock);
527 return; /* Leave any unwritten inodes on s_io */ 527 return; /* Leave any unwritten inodes on s_io */
528} 528}
529EXPORT_SYMBOL_GPL(generic_sync_sb_inodes);
530
531static void sync_sb_inodes(struct super_block *sb,
532 struct writeback_control *wbc)
533{
534 generic_sync_sb_inodes(sb, wbc);
535}
529 536
530/* 537/*
531 * Start writeback of dirty pagecache data against all unlocked inodes. 538 * Start writeback of dirty pagecache data against all unlocked inodes.
@@ -565,11 +572,8 @@ restart:
565 * be unmounted by the time it is released. 572 * be unmounted by the time it is released.
566 */ 573 */
567 if (down_read_trylock(&sb->s_umount)) { 574 if (down_read_trylock(&sb->s_umount)) {
568 if (sb->s_root) { 575 if (sb->s_root)
569 spin_lock(&inode_lock);
570 sync_sb_inodes(sb, wbc); 576 sync_sb_inodes(sb, wbc);
571 spin_unlock(&inode_lock);
572 }
573 up_read(&sb->s_umount); 577 up_read(&sb->s_umount);
574 } 578 }
575 spin_lock(&sb_lock); 579 spin_lock(&sb_lock);
@@ -607,9 +611,7 @@ void sync_inodes_sb(struct super_block *sb, int wait)
607 (inodes_stat.nr_inodes - inodes_stat.nr_unused) + 611 (inodes_stat.nr_inodes - inodes_stat.nr_unused) +
608 nr_dirty + nr_unstable; 612 nr_dirty + nr_unstable;
609 wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */ 613 wbc.nr_to_write += wbc.nr_to_write / 2; /* Bit more for luck */
610 spin_lock(&inode_lock);
611 sync_sb_inodes(sb, &wbc); 614 sync_sb_inodes(sb, &wbc);
612 spin_unlock(&inode_lock);
613} 615}
614 616
615/* 617/*
diff --git a/fs/ubifs/Kconfig b/fs/ubifs/Kconfig
new file mode 100644
index 000000000000..91ceeda7e5bf
--- /dev/null
+++ b/fs/ubifs/Kconfig
@@ -0,0 +1,72 @@
1config UBIFS_FS
2 tristate "UBIFS file system support"
3 select CRC16
4 select CRC32
5 select CRYPTO if UBIFS_FS_ADVANCED_COMPR
6 select CRYPTO if UBIFS_FS_LZO
7 select CRYPTO if UBIFS_FS_ZLIB
8 select CRYPTO_LZO if UBIFS_FS_LZO
9 select CRYPTO_DEFLATE if UBIFS_FS_ZLIB
10 depends on MTD_UBI
11 help
12 UBIFS is a file system for flash devices which works on top of UBI.
13
14config UBIFS_FS_XATTR
15 bool "Extended attributes support"
16 depends on UBIFS_FS
17 help
18 This option enables support of extended attributes.
19
20config UBIFS_FS_ADVANCED_COMPR
21 bool "Advanced compression options"
22 depends on UBIFS_FS
23 help
24 This option allows to explicitly choose which compressions, if any,
25 are enabled in UBIFS. Removing compressors means inbility to read
26 existing file systems.
27
28 If unsure, say 'N'.
29
30config UBIFS_FS_LZO
31 bool "LZO compression support" if UBIFS_FS_ADVANCED_COMPR
32 depends on UBIFS_FS
33 default y
34 help
35 LZO compressor is generally faster then zlib but compresses worse.
36 Say 'Y' if unsure.
37
38config UBIFS_FS_ZLIB
39 bool "ZLIB compression support" if UBIFS_FS_ADVANCED_COMPR
40 depends on UBIFS_FS
41 default y
42 help
43 Zlib copresses better then LZO but it is slower. Say 'Y' if unsure.
44
45# Debugging-related stuff
46config UBIFS_FS_DEBUG
47 bool "Enable debugging"
48 depends on UBIFS_FS
49 select DEBUG_FS
50 select KALLSYMS_ALL
51 help
52 This option enables UBIFS debugging.
53
54config UBIFS_FS_DEBUG_MSG_LVL
55 int "Default message level (0 = no extra messages, 3 = lots)"
56 depends on UBIFS_FS_DEBUG
57 default "0"
58 help
59 This controls the amount of debugging messages produced by UBIFS.
60 If reporting bugs, please try to have available a full dump of the
61 messages at level 1 while the misbehaviour was occurring. Level 2
62 may become necessary if level 1 messages were not enough to find the
63 bug. Generally Level 3 should be avoided.
64
65config UBIFS_FS_DEBUG_CHKS
66 bool "Enable extra checks"
67 depends on UBIFS_FS_DEBUG
68 help
69 If extra checks are enabled UBIFS will check the consistency of its
70 internal data structures during operation. However, UBIFS performance
71 is dramatically slower when this option is selected especially if the
72 file system is large.
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
new file mode 100644
index 000000000000..80e93c35e496
--- /dev/null
+++ b/fs/ubifs/Makefile
@@ -0,0 +1,9 @@
1obj-$(CONFIG_UBIFS_FS) += ubifs.o
2
3ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o
4ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
5ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
6ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o
7
8ubifs-$(CONFIG_UBIFS_FS_DEBUG) += debug.o
9ubifs-$(CONFIG_UBIFS_FS_XATTR) += xattr.o
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
new file mode 100644
index 000000000000..d81fb9ed2b8e
--- /dev/null
+++ b/fs/ubifs/budget.c
@@ -0,0 +1,731 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements the budgeting sub-system which is responsible for UBIFS
25 * space management.
26 *
27 * Factors such as compression, wasted space at the ends of LEBs, space in other
28 * journal heads, the effect of updates on the index, and so on, make it
29 * impossible to accurately predict the amount of space needed. Consequently
30 * approximations are used.
31 */
32
33#include "ubifs.h"
34#include <linux/writeback.h>
35#include <asm/div64.h>
36
37/*
38 * When pessimistic budget calculations say that there is no enough space,
39 * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
40 * or committing. The below constants define maximum number of times UBIFS
41 * repeats the operations.
42 */
43#define MAX_SHRINK_RETRIES 8
44#define MAX_GC_RETRIES 4
45#define MAX_CMT_RETRIES 2
46#define MAX_NOSPC_RETRIES 1
47
48/*
49 * The below constant defines amount of dirty pages which should be written
50 * back at when trying to shrink the liability.
51 */
52#define NR_TO_WRITE 16
53
54/**
55 * struct retries_info - information about re-tries while making free space.
56 * @prev_liability: previous liability
57 * @shrink_cnt: how many times the liability was shrinked
58 * @shrink_retries: count of liability shrink re-tries (increased when
59 * liability does not shrink)
60 * @try_gc: GC should be tried first
61 * @gc_retries: how many times GC was run
62 * @cmt_retries: how many times commit has been done
63 * @nospc_retries: how many times GC returned %-ENOSPC
64 *
65 * Since we consider budgeting to be the fast-path, and this structure has to
66 * be allocated on stack and zeroed out, we make it smaller using bit-fields.
67 */
68struct retries_info {
69 long long prev_liability;
70 unsigned int shrink_cnt;
71 unsigned int shrink_retries:5;
72 unsigned int try_gc:1;
73 unsigned int gc_retries:4;
74 unsigned int cmt_retries:3;
75 unsigned int nospc_retries:1;
76};
77
78/**
79 * shrink_liability - write-back some dirty pages/inodes.
80 * @c: UBIFS file-system description object
81 * @nr_to_write: how many dirty pages to write-back
82 *
83 * This function shrinks UBIFS liability by means of writing back some amount
84 * of dirty inodes and their pages. Returns the amount of pages which were
85 * written back. The returned value does not include dirty inodes which were
86 * synchronized.
87 *
88 * Note, this function synchronizes even VFS inodes which are locked
89 * (@i_mutex) by the caller of the budgeting function, because write-back does
90 * not touch @i_mutex.
91 */
92static int shrink_liability(struct ubifs_info *c, int nr_to_write)
93{
94 int nr_written;
95 struct writeback_control wbc = {
96 .sync_mode = WB_SYNC_NONE,
97 .range_end = LLONG_MAX,
98 .nr_to_write = nr_to_write,
99 };
100
101 generic_sync_sb_inodes(c->vfs_sb, &wbc);
102 nr_written = nr_to_write - wbc.nr_to_write;
103
104 if (!nr_written) {
105 /*
106 * Re-try again but wait on pages/inodes which are being
107 * written-back concurrently (e.g., by pdflush).
108 */
109 memset(&wbc, 0, sizeof(struct writeback_control));
110 wbc.sync_mode = WB_SYNC_ALL;
111 wbc.range_end = LLONG_MAX;
112 wbc.nr_to_write = nr_to_write;
113 generic_sync_sb_inodes(c->vfs_sb, &wbc);
114 nr_written = nr_to_write - wbc.nr_to_write;
115 }
116
117 dbg_budg("%d pages were written back", nr_written);
118 return nr_written;
119}
120
121
122/**
123 * run_gc - run garbage collector.
124 * @c: UBIFS file-system description object
125 *
126 * This function runs garbage collector to make some more free space. Returns
127 * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a
128 * negative error code in case of failure.
129 */
130static int run_gc(struct ubifs_info *c)
131{
132 int err, lnum;
133
134 /* Make some free space by garbage-collecting dirty space */
135 down_read(&c->commit_sem);
136 lnum = ubifs_garbage_collect(c, 1);
137 up_read(&c->commit_sem);
138 if (lnum < 0)
139 return lnum;
140
141 /* GC freed one LEB, return it to lprops */
142 dbg_budg("GC freed LEB %d", lnum);
143 err = ubifs_return_leb(c, lnum);
144 if (err)
145 return err;
146 return 0;
147}
148
149/**
150 * make_free_space - make more free space on the file-system.
151 * @c: UBIFS file-system description object
152 * @ri: information about previous invocations of this function
153 *
154 * This function is called when an operation cannot be budgeted because there
155 * is supposedly no free space. But in most cases there is some free space:
156 * o budgeting is pessimistic, so it always budgets more then it is actually
157 * needed, so shrinking the liability is one way to make free space - the
158 * cached data will take less space then it was budgeted for;
159 * o GC may turn some dark space into free space (budgeting treats dark space
160 * as not available);
161 * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs.
162 *
163 * So this function tries to do the above. Returns %-EAGAIN if some free space
164 * was presumably made and the caller has to re-try budgeting the operation.
165 * Returns %-ENOSPC if it couldn't do more free space, and other negative error
166 * codes on failures.
167 */
168static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
169{
170 int err;
171
172 /*
173 * If we have some dirty pages and inodes (liability), try to write
174 * them back unless this was tried too many times without effect
175 * already.
176 */
177 if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
178 long long liability;
179
180 spin_lock(&c->space_lock);
181 liability = c->budg_idx_growth + c->budg_data_growth +
182 c->budg_dd_growth;
183 spin_unlock(&c->space_lock);
184
185 if (ri->prev_liability >= liability) {
186 /* Liability does not shrink, next time try GC then */
187 ri->shrink_retries += 1;
188 if (ri->gc_retries < MAX_GC_RETRIES)
189 ri->try_gc = 1;
190 dbg_budg("liability did not shrink: retries %d of %d",
191 ri->shrink_retries, MAX_SHRINK_RETRIES);
192 }
193
194 dbg_budg("force write-back (count %d)", ri->shrink_cnt);
195 shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt);
196
197 ri->prev_liability = liability;
198 ri->shrink_cnt += 1;
199 return -EAGAIN;
200 }
201
202 /*
203 * Try to run garbage collector unless it was already tried too many
204 * times.
205 */
206 if (ri->gc_retries < MAX_GC_RETRIES) {
207 ri->gc_retries += 1;
208 dbg_budg("run GC, retries %d of %d",
209 ri->gc_retries, MAX_GC_RETRIES);
210
211 ri->try_gc = 0;
212 err = run_gc(c);
213 if (!err)
214 return -EAGAIN;
215
216 if (err == -EAGAIN) {
217 dbg_budg("GC asked to commit");
218 err = ubifs_run_commit(c);
219 if (err)
220 return err;
221 return -EAGAIN;
222 }
223
224 if (err != -ENOSPC)
225 return err;
226
227 /*
228 * GC could not make any progress. If this is the first time,
229 * then it makes sense to try to commit, because it might make
230 * some dirty space.
231 */
232 dbg_budg("GC returned -ENOSPC, retries %d",
233 ri->nospc_retries);
234 if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
235 return err;
236 ri->nospc_retries += 1;
237 }
238
239 /* Neither GC nor write-back helped, try to commit */
240 if (ri->cmt_retries < MAX_CMT_RETRIES) {
241 ri->cmt_retries += 1;
242 dbg_budg("run commit, retries %d of %d",
243 ri->cmt_retries, MAX_CMT_RETRIES);
244 err = ubifs_run_commit(c);
245 if (err)
246 return err;
247 return -EAGAIN;
248 }
249 return -ENOSPC;
250}
251
252/**
253 * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
254 * @c: UBIFS file-system description object
255 *
256 * This function calculates and returns the number of eraseblocks which should
257 * be kept for index usage.
258 */
259int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
260{
261 int ret;
262 uint64_t idx_size;
263
264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
265
266 /* And make sure we have twice the index size of space reserved */
267 idx_size <<= 1;
268
269 /*
270 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
271 * pair, nor similarly the two variables for the new index size, so we
272 * have to do this costly 64-bit division on fast-path.
273 */
274 if (do_div(idx_size, c->leb_size - c->max_idx_node_sz))
275 ret = idx_size + 1;
276 else
277 ret = idx_size;
278 /*
279 * The index head is not available for the in-the-gaps method, so add an
280 * extra LEB to compensate.
281 */
282 ret += 1;
283 /*
284 * At present the index needs at least 2 LEBs: one for the index head
285 * and one for in-the-gaps method (which currently does not cater for
286 * the index head and so excludes it from consideration).
287 */
288 if (ret < 2)
289 ret = 2;
290 return ret;
291}
292
293/**
294 * ubifs_calc_available - calculate available FS space.
295 * @c: UBIFS file-system description object
296 * @min_idx_lebs: minimum number of LEBs reserved for the index
297 *
298 * This function calculates and returns amount of FS space available for use.
299 */
300long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
301{
302 int subtract_lebs;
303 long long available;
304
305 /*
306 * Force the amount available to the total size reported if the used
307 * space is zero.
308 */
309 if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
310 c->budg_data_growth + c->budg_dd_growth == 0) {
311 /* Do the same calculation as for c->block_cnt */
312 available = c->main_lebs - 2;
313 available *= c->leb_size - c->dark_wm;
314 return available;
315 }
316
317 available = c->main_bytes - c->lst.total_used;
318
319 /*
320 * Now 'available' contains theoretically available flash space
321 * assuming there is no index, so we have to subtract the space which
322 * is reserved for the index.
323 */
324 subtract_lebs = min_idx_lebs;
325
326 /* Take into account that GC reserves one LEB for its own needs */
327 subtract_lebs += 1;
328
329 /*
330 * The GC journal head LEB is not really accessible. And since
331 * different write types go to different heads, we may count only on
332 * one head's space.
333 */
334 subtract_lebs += c->jhead_cnt - 1;
335
336 /* We also reserve one LEB for deletions, which bypass budgeting */
337 subtract_lebs += 1;
338
339 available -= (long long)subtract_lebs * c->leb_size;
340
341 /* Subtract the dead space which is not available for use */
342 available -= c->lst.total_dead;
343
344 /*
345 * Subtract dark space, which might or might not be usable - it depends
346 * on the data which we have on the media and which will be written. If
347 * this is a lot of uncompressed or not-compressible data, the dark
348 * space cannot be used.
349 */
350 available -= c->lst.total_dark;
351
352 /*
353 * However, there is more dark space. The index may be bigger than
354 * @min_idx_lebs. Those extra LEBs are assumed to be available, but
355 * their dark space is not included in total_dark, so it is subtracted
356 * here.
357 */
358 if (c->lst.idx_lebs > min_idx_lebs) {
359 subtract_lebs = c->lst.idx_lebs - min_idx_lebs;
360 available -= subtract_lebs * c->dark_wm;
361 }
362
363 /* The calculations are rough and may end up with a negative number */
364 return available > 0 ? available : 0;
365}
366
367/**
368 * can_use_rp - check whether the user is allowed to use reserved pool.
369 * @c: UBIFS file-system description object
370 *
371 * UBIFS has so-called "reserved pool" which is flash space reserved
372 * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock.
373 * This function checks whether current user is allowed to use reserved pool.
374 * Returns %1 current user is allowed to use reserved pool and %0 otherwise.
375 */
376static int can_use_rp(struct ubifs_info *c)
377{
378 if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
379 (c->rp_gid != 0 && in_group_p(c->rp_gid)))
380 return 1;
381 return 0;
382}
383
384/**
385 * do_budget_space - reserve flash space for index and data growth.
386 * @c: UBIFS file-system description object
387 *
388 * This function makes sure UBIFS has enough free eraseblocks for index growth
389 * and data.
390 *
391 * When budgeting index space, UBIFS reserves twice as more LEBs as the index
392 * would take if it was consolidated and written to the flash. This guarantees
393 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
394 * be able to commit dirty index. So this function basically adds amount of
395 * budgeted index space to the size of the current index, multiplies this by 2,
396 * and makes sure this does not exceed the amount of free eraseblocks.
397 *
398 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
399 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
400 * be large, because UBIFS does not do any index consolidation as long as
401 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
402 * will contain a lot of dirt.
403 * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
404 * consolidated to take up to @c->min_idx_lebs LEBs.
405 *
406 * This function returns zero in case of success, and %-ENOSPC in case of
407 * failure.
408 */
409static int do_budget_space(struct ubifs_info *c)
410{
411 long long outstanding, available;
412 int lebs, rsvd_idx_lebs, min_idx_lebs;
413
414 /* First budget index space */
415 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
416
417 /* Now 'min_idx_lebs' contains number of LEBs to reserve */
418 if (min_idx_lebs > c->lst.idx_lebs)
419 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
420 else
421 rsvd_idx_lebs = 0;
422
423 /*
424 * The number of LEBs that are available to be used by the index is:
425 *
426 * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
427 * @c->lst.taken_empty_lebs
428 *
429 * @empty_lebs are available because they are empty. @freeable_cnt are
430 * available because they contain only free and dirty space and the
431 * index allocation always occurs after wbufs are synch'ed.
432 * @idx_gc_cnt are available because they are index LEBs that have been
433 * garbage collected (including trivial GC) and are awaiting the commit
434 * before they can be unmapped - note that the in-the-gaps method will
435 * grab these if it needs them. @taken_empty_lebs are empty_lebs that
436 * have already been allocated for some purpose (also includes those
437 * LEBs on the @idx_gc list).
438 *
439 * Note, @taken_empty_lebs may temporarily be higher by one because of
440 * the way we serialize LEB allocations and budgeting. See a comment in
441 * 'ubifs_find_free_space()'.
442 */
443 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
444 c->lst.taken_empty_lebs;
445 if (unlikely(rsvd_idx_lebs > lebs)) {
446 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
447 "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
448 rsvd_idx_lebs);
449 return -ENOSPC;
450 }
451
452 available = ubifs_calc_available(c, min_idx_lebs);
453 outstanding = c->budg_data_growth + c->budg_dd_growth;
454
455 if (unlikely(available < outstanding)) {
456 dbg_budg("out of data space: available %lld, outstanding %lld",
457 available, outstanding);
458 return -ENOSPC;
459 }
460
461 if (available - outstanding <= c->rp_size && !can_use_rp(c))
462 return -ENOSPC;
463
464 c->min_idx_lebs = min_idx_lebs;
465 return 0;
466}
467
468/**
469 * calc_idx_growth - calculate approximate index growth from budgeting request.
470 * @c: UBIFS file-system description object
471 * @req: budgeting request
472 *
473 * For now we assume each new node adds one znode. But this is rather poor
474 * approximation, though.
475 */
476static int calc_idx_growth(const struct ubifs_info *c,
477 const struct ubifs_budget_req *req)
478{
479 int znodes;
480
481 znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) +
482 req->new_dent;
483 return znodes * c->max_idx_node_sz;
484}
485
486/**
487 * calc_data_growth - calculate approximate amount of new data from budgeting
488 * request.
489 * @c: UBIFS file-system description object
490 * @req: budgeting request
491 */
492static int calc_data_growth(const struct ubifs_info *c,
493 const struct ubifs_budget_req *req)
494{
495 int data_growth;
496
497 data_growth = req->new_ino ? c->inode_budget : 0;
498 if (req->new_page)
499 data_growth += c->page_budget;
500 if (req->new_dent)
501 data_growth += c->dent_budget;
502 data_growth += req->new_ino_d;
503 return data_growth;
504}
505
506/**
507 * calc_dd_growth - calculate approximate amount of data which makes other data
508 * dirty from budgeting request.
509 * @c: UBIFS file-system description object
510 * @req: budgeting request
511 */
512static int calc_dd_growth(const struct ubifs_info *c,
513 const struct ubifs_budget_req *req)
514{
515 int dd_growth;
516
517 dd_growth = req->dirtied_page ? c->page_budget : 0;
518
519 if (req->dirtied_ino)
520 dd_growth += c->inode_budget << (req->dirtied_ino - 1);
521 if (req->mod_dent)
522 dd_growth += c->dent_budget;
523 dd_growth += req->dirtied_ino_d;
524 return dd_growth;
525}
526
527/**
528 * ubifs_budget_space - ensure there is enough space to complete an operation.
529 * @c: UBIFS file-system description object
530 * @req: budget request
531 *
532 * This function allocates budget for an operation. It uses pessimistic
533 * approximation of how much flash space the operation needs. The goal of this
534 * function is to make sure UBIFS always has flash space to flush all dirty
535 * pages, dirty inodes, and dirty znodes (liability). This function may force
536 * commit, garbage-collection or write-back. Returns zero in case of success,
537 * %-ENOSPC if there is no free space and other negative error codes in case of
538 * failures.
539 */
540int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
541{
542 int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
543 int err, idx_growth, data_growth, dd_growth;
544 struct retries_info ri;
545
546 ubifs_assert(req->dirtied_ino <= 4);
547 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
548
549 data_growth = calc_data_growth(c, req);
550 dd_growth = calc_dd_growth(c, req);
551 if (!data_growth && !dd_growth)
552 return 0;
553 idx_growth = calc_idx_growth(c, req);
554 memset(&ri, 0, sizeof(struct retries_info));
555
556again:
557 spin_lock(&c->space_lock);
558 ubifs_assert(c->budg_idx_growth >= 0);
559 ubifs_assert(c->budg_data_growth >= 0);
560 ubifs_assert(c->budg_dd_growth >= 0);
561
562 if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
563 dbg_budg("no space");
564 spin_unlock(&c->space_lock);
565 return -ENOSPC;
566 }
567
568 c->budg_idx_growth += idx_growth;
569 c->budg_data_growth += data_growth;
570 c->budg_dd_growth += dd_growth;
571
572 err = do_budget_space(c);
573 if (likely(!err)) {
574 req->idx_growth = idx_growth;
575 req->data_growth = data_growth;
576 req->dd_growth = dd_growth;
577 spin_unlock(&c->space_lock);
578 return 0;
579 }
580
581 /* Restore the old values */
582 c->budg_idx_growth -= idx_growth;
583 c->budg_data_growth -= data_growth;
584 c->budg_dd_growth -= dd_growth;
585 spin_unlock(&c->space_lock);
586
587 if (req->fast) {
588 dbg_budg("no space for fast budgeting");
589 return err;
590 }
591
592 err = make_free_space(c, &ri);
593 if (err == -EAGAIN) {
594 dbg_budg("try again");
595 cond_resched();
596 goto again;
597 } else if (err == -ENOSPC) {
598 dbg_budg("FS is full, -ENOSPC");
599 c->nospace = 1;
600 if (can_use_rp(c) || c->rp_size == 0)
601 c->nospace_rp = 1;
602 smp_wmb();
603 } else
604 ubifs_err("cannot budget space, error %d", err);
605 return err;
606}
607
608/**
609 * ubifs_release_budget - release budgeted free space.
610 * @c: UBIFS file-system description object
611 * @req: budget request
612 *
613 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
614 * since the index changes (which were budgeted for in @req->idx_growth) will
615 * only be written to the media on commit, this function moves the index budget
616 * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
617 * zeroed by the commit operation.
618 */
619void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
620{
621 ubifs_assert(req->dirtied_ino <= 4);
622 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
623 if (!req->recalculate) {
624 ubifs_assert(req->idx_growth >= 0);
625 ubifs_assert(req->data_growth >= 0);
626 ubifs_assert(req->dd_growth >= 0);
627 }
628
629 if (req->recalculate) {
630 req->data_growth = calc_data_growth(c, req);
631 req->dd_growth = calc_dd_growth(c, req);
632 req->idx_growth = calc_idx_growth(c, req);
633 }
634
635 if (!req->data_growth && !req->dd_growth)
636 return;
637
638 c->nospace = c->nospace_rp = 0;
639 smp_wmb();
640
641 spin_lock(&c->space_lock);
642 c->budg_idx_growth -= req->idx_growth;
643 c->budg_uncommitted_idx += req->idx_growth;
644 c->budg_data_growth -= req->data_growth;
645 c->budg_dd_growth -= req->dd_growth;
646 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
647
648 ubifs_assert(c->budg_idx_growth >= 0);
649 ubifs_assert(c->budg_data_growth >= 0);
650 ubifs_assert(c->min_idx_lebs < c->main_lebs);
651 spin_unlock(&c->space_lock);
652}
653
654/**
655 * ubifs_convert_page_budget - convert budget of a new page.
656 * @c: UBIFS file-system description object
657 *
658 * This function converts budget which was allocated for a new page of data to
659 * the budget of changing an existing page of data. The latter is smaller then
660 * the former, so this function only does simple re-calculation and does not
661 * involve any write-back.
662 */
663void ubifs_convert_page_budget(struct ubifs_info *c)
664{
665 spin_lock(&c->space_lock);
666 /* Release the index growth reservation */
667 c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
668 /* Release the data growth reservation */
669 c->budg_data_growth -= c->page_budget;
670 /* Increase the dirty data growth reservation instead */
671 c->budg_dd_growth += c->page_budget;
672 /* And re-calculate the indexing space reservation */
673 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
674 spin_unlock(&c->space_lock);
675}
676
677/**
678 * ubifs_release_dirty_inode_budget - release dirty inode budget.
679 * @c: UBIFS file-system description object
680 * @ui: UBIFS inode to release the budget for
681 *
682 * This function releases budget corresponding to a dirty inode. It is usually
683 * called when after the inode has been written to the media and marked as
684 * clean.
685 */
686void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
687 struct ubifs_inode *ui)
688{
689 struct ubifs_budget_req req = {.dd_growth = c->inode_budget,
690 .dirtied_ino_d = ui->data_len};
691
692 ubifs_release_budget(c, &req);
693}
694
695/**
696 * ubifs_budg_get_free_space - return amount of free space.
697 * @c: UBIFS file-system description object
698 *
699 * This function returns amount of free space on the file-system.
700 */
701long long ubifs_budg_get_free_space(struct ubifs_info *c)
702{
703 int min_idx_lebs, rsvd_idx_lebs;
704 long long available, outstanding, free;
705
706 /* Do exactly the same calculations as in 'do_budget_space()' */
707 spin_lock(&c->space_lock);
708 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
709
710 if (min_idx_lebs > c->lst.idx_lebs)
711 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
712 else
713 rsvd_idx_lebs = 0;
714
715 if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
716 - c->lst.taken_empty_lebs) {
717 spin_unlock(&c->space_lock);
718 return 0;
719 }
720
721 available = ubifs_calc_available(c, min_idx_lebs);
722 outstanding = c->budg_data_growth + c->budg_dd_growth;
723 c->min_idx_lebs = min_idx_lebs;
724 spin_unlock(&c->space_lock);
725
726 if (available > outstanding)
727 free = ubifs_reported_space(c, available - outstanding);
728 else
729 free = 0;
730 return free;
731}
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
new file mode 100644
index 000000000000..3b516316c9b3
--- /dev/null
+++ b/fs/ubifs/commit.c
@@ -0,0 +1,677 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements functions that manage the running of the commit process.
25 * Each affected module has its own functions to accomplish their part in the
26 * commit and those functions are called here.
27 *
28 * The commit is the process whereby all updates to the index and LEB properties
29 * are written out together and the journal becomes empty. This keeps the
30 * file system consistent - at all times the state can be recreated by reading
31 * the index and LEB properties and then replaying the journal.
32 *
33 * The commit is split into two parts named "commit start" and "commit end".
34 * During commit start, the commit process has exclusive access to the journal
35 * by holding the commit semaphore down for writing. As few I/O operations as
36 * possible are performed during commit start, instead the nodes that are to be
37 * written are merely identified. During commit end, the commit semaphore is no
38 * longer held and the journal is again in operation, allowing users to continue
39 * to use the file system while the bulk of the commit I/O is performed. The
40 * purpose of this two-step approach is to prevent the commit from causing any
41 * latency blips. Note that in any case, the commit does not prevent lookups
42 * (as permitted by the TNC mutex), or access to VFS data structures e.g. page
43 * cache.
44 */
45
46#include <linux/freezer.h>
47#include <linux/kthread.h>
48#include "ubifs.h"
49
50/**
51 * do_commit - commit the journal.
52 * @c: UBIFS file-system description object
53 *
54 * This function implements UBIFS commit. It has to be called with commit lock
55 * locked. Returns zero in case of success and a negative error code in case of
56 * failure.
57 */
58static int do_commit(struct ubifs_info *c)
59{
60 int err, new_ltail_lnum, old_ltail_lnum, i;
61 struct ubifs_zbranch zroot;
62 struct ubifs_lp_stats lst;
63
64 dbg_cmt("start");
65 if (c->ro_media) {
66 err = -EROFS;
67 goto out_up;
68 }
69
70 /* Sync all write buffers (necessary for recovery) */
71 for (i = 0; i < c->jhead_cnt; i++) {
72 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
73 if (err)
74 goto out_up;
75 }
76
77 err = ubifs_gc_start_commit(c);
78 if (err)
79 goto out_up;
80 err = dbg_check_lprops(c);
81 if (err)
82 goto out_up;
83 err = ubifs_log_start_commit(c, &new_ltail_lnum);
84 if (err)
85 goto out_up;
86 err = ubifs_tnc_start_commit(c, &zroot);
87 if (err)
88 goto out_up;
89 err = ubifs_lpt_start_commit(c);
90 if (err)
91 goto out_up;
92 err = ubifs_orphan_start_commit(c);
93 if (err)
94 goto out_up;
95
96 ubifs_get_lp_stats(c, &lst);
97
98 up_write(&c->commit_sem);
99
100 err = ubifs_tnc_end_commit(c);
101 if (err)
102 goto out;
103 err = ubifs_lpt_end_commit(c);
104 if (err)
105 goto out;
106 err = ubifs_orphan_end_commit(c);
107 if (err)
108 goto out;
109 old_ltail_lnum = c->ltail_lnum;
110 err = ubifs_log_end_commit(c, new_ltail_lnum);
111 if (err)
112 goto out;
113 err = dbg_check_old_index(c, &zroot);
114 if (err)
115 goto out;
116
117 mutex_lock(&c->mst_mutex);
118 c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no);
119 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
120 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
121 c->mst_node->root_offs = cpu_to_le32(zroot.offs);
122 c->mst_node->root_len = cpu_to_le32(zroot.len);
123 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
124 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
125 c->mst_node->index_size = cpu_to_le64(c->old_idx_sz);
126 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
127 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
128 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
129 c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs);
130 c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum);
131 c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs);
132 c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum);
133 c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs);
134 c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum);
135 c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs);
136 c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs);
137 c->mst_node->total_free = cpu_to_le64(lst.total_free);
138 c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty);
139 c->mst_node->total_used = cpu_to_le64(lst.total_used);
140 c->mst_node->total_dead = cpu_to_le64(lst.total_dead);
141 c->mst_node->total_dark = cpu_to_le64(lst.total_dark);
142 if (c->no_orphs)
143 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
144 else
145 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
146 err = ubifs_write_master(c);
147 mutex_unlock(&c->mst_mutex);
148 if (err)
149 goto out;
150
151 err = ubifs_log_post_commit(c, old_ltail_lnum);
152 if (err)
153 goto out;
154 err = ubifs_gc_end_commit(c);
155 if (err)
156 goto out;
157 err = ubifs_lpt_post_commit(c);
158 if (err)
159 goto out;
160
161 spin_lock(&c->cs_lock);
162 c->cmt_state = COMMIT_RESTING;
163 wake_up(&c->cmt_wq);
164 dbg_cmt("commit end");
165 spin_unlock(&c->cs_lock);
166
167 return 0;
168
169out_up:
170 up_write(&c->commit_sem);
171out:
172 ubifs_err("commit failed, error %d", err);
173 spin_lock(&c->cs_lock);
174 c->cmt_state = COMMIT_BROKEN;
175 wake_up(&c->cmt_wq);
176 spin_unlock(&c->cs_lock);
177 ubifs_ro_mode(c, err);
178 return err;
179}
180
181/**
182 * run_bg_commit - run background commit if it is needed.
183 * @c: UBIFS file-system description object
184 *
185 * This function runs background commit if it is needed. Returns zero in case
186 * of success and a negative error code in case of failure.
187 */
188static int run_bg_commit(struct ubifs_info *c)
189{
190 spin_lock(&c->cs_lock);
191 /*
192 * Run background commit only if background commit was requested or if
193 * commit is required.
194 */
195 if (c->cmt_state != COMMIT_BACKGROUND &&
196 c->cmt_state != COMMIT_REQUIRED)
197 goto out;
198 spin_unlock(&c->cs_lock);
199
200 down_write(&c->commit_sem);
201 spin_lock(&c->cs_lock);
202 if (c->cmt_state == COMMIT_REQUIRED)
203 c->cmt_state = COMMIT_RUNNING_REQUIRED;
204 else if (c->cmt_state == COMMIT_BACKGROUND)
205 c->cmt_state = COMMIT_RUNNING_BACKGROUND;
206 else
207 goto out_cmt_unlock;
208 spin_unlock(&c->cs_lock);
209
210 return do_commit(c);
211
212out_cmt_unlock:
213 up_write(&c->commit_sem);
214out:
215 spin_unlock(&c->cs_lock);
216 return 0;
217}
218
219/**
220 * ubifs_bg_thread - UBIFS background thread function.
221 * @info: points to the file-system description object
222 *
223 * This function implements various file-system background activities:
224 * o when a write-buffer timer expires it synchronizes the appropriate
225 * write-buffer;
226 * o when the journal is about to be full, it starts in-advance commit.
227 *
228 * Note, other stuff like background garbage collection may be added here in
229 * future.
230 */
231int ubifs_bg_thread(void *info)
232{
233 int err;
234 struct ubifs_info *c = info;
235
236 ubifs_msg("background thread \"%s\" started, PID %d",
237 c->bgt_name, current->pid);
238 set_freezable();
239
240 while (1) {
241 if (kthread_should_stop())
242 break;
243
244 if (try_to_freeze())
245 continue;
246
247 set_current_state(TASK_INTERRUPTIBLE);
248 /* Check if there is something to do */
249 if (!c->need_bgt) {
250 /*
251 * Nothing prevents us from going sleep now and
252 * be never woken up and block the task which
253 * could wait in 'kthread_stop()' forever.
254 */
255 if (kthread_should_stop())
256 break;
257 schedule();
258 continue;
259 } else
260 __set_current_state(TASK_RUNNING);
261
262 c->need_bgt = 0;
263 err = ubifs_bg_wbufs_sync(c);
264 if (err)
265 ubifs_ro_mode(c, err);
266
267 run_bg_commit(c);
268 cond_resched();
269 }
270
271 dbg_msg("background thread \"%s\" stops", c->bgt_name);
272 return 0;
273}
274
275/**
276 * ubifs_commit_required - set commit state to "required".
277 * @c: UBIFS file-system description object
278 *
279 * This function is called if a commit is required but cannot be done from the
280 * calling function, so it is just flagged instead.
281 */
282void ubifs_commit_required(struct ubifs_info *c)
283{
284 spin_lock(&c->cs_lock);
285 switch (c->cmt_state) {
286 case COMMIT_RESTING:
287 case COMMIT_BACKGROUND:
288 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
289 dbg_cstate(COMMIT_REQUIRED));
290 c->cmt_state = COMMIT_REQUIRED;
291 break;
292 case COMMIT_RUNNING_BACKGROUND:
293 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
294 dbg_cstate(COMMIT_RUNNING_REQUIRED));
295 c->cmt_state = COMMIT_RUNNING_REQUIRED;
296 break;
297 case COMMIT_REQUIRED:
298 case COMMIT_RUNNING_REQUIRED:
299 case COMMIT_BROKEN:
300 break;
301 }
302 spin_unlock(&c->cs_lock);
303}
304
305/**
306 * ubifs_request_bg_commit - notify the background thread to do a commit.
307 * @c: UBIFS file-system description object
308 *
309 * This function is called if the journal is full enough to make a commit
310 * worthwhile, so background thread is kicked to start it.
311 */
312void ubifs_request_bg_commit(struct ubifs_info *c)
313{
314 spin_lock(&c->cs_lock);
315 if (c->cmt_state == COMMIT_RESTING) {
316 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
317 dbg_cstate(COMMIT_BACKGROUND));
318 c->cmt_state = COMMIT_BACKGROUND;
319 spin_unlock(&c->cs_lock);
320 ubifs_wake_up_bgt(c);
321 } else
322 spin_unlock(&c->cs_lock);
323}
324
325/**
326 * wait_for_commit - wait for commit.
327 * @c: UBIFS file-system description object
328 *
329 * This function sleeps until the commit operation is no longer running.
330 */
331static int wait_for_commit(struct ubifs_info *c)
332{
333 dbg_cmt("pid %d goes sleep", current->pid);
334
335 /*
336 * The following sleeps if the condition is false, and will be woken
337 * when the commit ends. It is possible, although very unlikely, that we
338 * will wake up and see the subsequent commit running, rather than the
339 * one we were waiting for, and go back to sleep. However, we will be
340 * woken again, so there is no danger of sleeping forever.
341 */
342 wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND &&
343 c->cmt_state != COMMIT_RUNNING_REQUIRED);
344 dbg_cmt("commit finished, pid %d woke up", current->pid);
345 return 0;
346}
347
348/**
349 * ubifs_run_commit - run or wait for commit.
350 * @c: UBIFS file-system description object
351 *
352 * This function runs commit and returns zero in case of success and a negative
353 * error code in case of failure.
354 */
355int ubifs_run_commit(struct ubifs_info *c)
356{
357 int err = 0;
358
359 spin_lock(&c->cs_lock);
360 if (c->cmt_state == COMMIT_BROKEN) {
361 err = -EINVAL;
362 goto out;
363 }
364
365 if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
366 /*
367 * We set the commit state to 'running required' to indicate
368 * that we want it to complete as quickly as possible.
369 */
370 c->cmt_state = COMMIT_RUNNING_REQUIRED;
371
372 if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
373 spin_unlock(&c->cs_lock);
374 return wait_for_commit(c);
375 }
376 spin_unlock(&c->cs_lock);
377
378 /* Ok, the commit is indeed needed */
379
380 down_write(&c->commit_sem);
381 spin_lock(&c->cs_lock);
382 /*
383 * Since we unlocked 'c->cs_lock', the state may have changed, so
384 * re-check it.
385 */
386 if (c->cmt_state == COMMIT_BROKEN) {
387 err = -EINVAL;
388 goto out_cmt_unlock;
389 }
390
391 if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
392 c->cmt_state = COMMIT_RUNNING_REQUIRED;
393
394 if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
395 up_write(&c->commit_sem);
396 spin_unlock(&c->cs_lock);
397 return wait_for_commit(c);
398 }
399 c->cmt_state = COMMIT_RUNNING_REQUIRED;
400 spin_unlock(&c->cs_lock);
401
402 err = do_commit(c);
403 return err;
404
405out_cmt_unlock:
406 up_write(&c->commit_sem);
407out:
408 spin_unlock(&c->cs_lock);
409 return err;
410}
411
412/**
413 * ubifs_gc_should_commit - determine if it is time for GC to run commit.
414 * @c: UBIFS file-system description object
415 *
416 * This function is called by garbage collection to determine if commit should
417 * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal
418 * is full enough to start commit, this function returns true. It is not
419 * absolutely necessary to commit yet, but it feels like this should be better
420 * then to keep doing GC. This function returns %1 if GC has to initiate commit
421 * and %0 if not.
422 */
423int ubifs_gc_should_commit(struct ubifs_info *c)
424{
425 int ret = 0;
426
427 spin_lock(&c->cs_lock);
428 if (c->cmt_state == COMMIT_BACKGROUND) {
429 dbg_cmt("commit required now");
430 c->cmt_state = COMMIT_REQUIRED;
431 } else
432 dbg_cmt("commit not requested");
433 if (c->cmt_state == COMMIT_REQUIRED)
434 ret = 1;
435 spin_unlock(&c->cs_lock);
436 return ret;
437}
438
439#ifdef CONFIG_UBIFS_FS_DEBUG
440
441/**
442 * struct idx_node - hold index nodes during index tree traversal.
443 * @list: list
444 * @iip: index in parent (slot number of this indexing node in the parent
445 * indexing node)
446 * @upper_key: all keys in this indexing node have to be less or equivalent to
447 * this key
448 * @idx: index node (8-byte aligned because all node structures must be 8-byte
449 * aligned)
450 */
451struct idx_node {
452 struct list_head list;
453 int iip;
454 union ubifs_key upper_key;
455 struct ubifs_idx_node idx __attribute__((aligned(8)));
456};
457
458/**
459 * dbg_old_index_check_init - get information for the next old index check.
460 * @c: UBIFS file-system description object
461 * @zroot: root of the index
462 *
463 * This function records information about the index that will be needed for the
464 * next old index check i.e. 'dbg_check_old_index()'.
465 *
466 * This function returns %0 on success and a negative error code on failure.
467 */
468int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
469{
470 struct ubifs_idx_node *idx;
471 int lnum, offs, len, err = 0;
472
473 c->old_zroot = *zroot;
474
475 lnum = c->old_zroot.lnum;
476 offs = c->old_zroot.offs;
477 len = c->old_zroot.len;
478
479 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
480 if (!idx)
481 return -ENOMEM;
482
483 err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
484 if (err)
485 goto out;
486
487 c->old_zroot_level = le16_to_cpu(idx->level);
488 c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
489out:
490 kfree(idx);
491 return err;
492}
493
494/**
495 * dbg_check_old_index - check the old copy of the index.
496 * @c: UBIFS file-system description object
497 * @zroot: root of the new index
498 *
499 * In order to be able to recover from an unclean unmount, a complete copy of
500 * the index must exist on flash. This is the "old" index. The commit process
501 * must write the "new" index to flash without overwriting or destroying any
502 * part of the old index. This function is run at commit end in order to check
503 * that the old index does indeed exist completely intact.
504 *
505 * This function returns %0 on success and a negative error code on failure.
506 */
507int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
508{
509 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
510 int first = 1, iip;
511 union ubifs_key lower_key, upper_key, l_key, u_key;
512 unsigned long long uninitialized_var(last_sqnum);
513 struct ubifs_idx_node *idx;
514 struct list_head list;
515 struct idx_node *i;
516 size_t sz;
517
518 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
519 goto out;
520
521 INIT_LIST_HEAD(&list);
522
523 sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) -
524 UBIFS_IDX_NODE_SZ;
525
526 /* Start at the old zroot */
527 lnum = c->old_zroot.lnum;
528 offs = c->old_zroot.offs;
529 len = c->old_zroot.len;
530 iip = 0;
531
532 /*
533 * Traverse the index tree preorder depth-first i.e. do a node and then
534 * its subtrees from left to right.
535 */
536 while (1) {
537 struct ubifs_branch *br;
538
539 /* Get the next index node */
540 i = kmalloc(sz, GFP_NOFS);
541 if (!i) {
542 err = -ENOMEM;
543 goto out_free;
544 }
545 i->iip = iip;
546 /* Keep the index nodes on our path in a linked list */
547 list_add_tail(&i->list, &list);
548 /* Read the index node */
549 idx = &i->idx;
550 err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
551 if (err)
552 goto out_free;
553 /* Validate index node */
554 child_cnt = le16_to_cpu(idx->child_cnt);
555 if (child_cnt < 1 || child_cnt > c->fanout) {
556 err = 1;
557 goto out_dump;
558 }
559 if (first) {
560 first = 0;
561 /* Check root level and sqnum */
562 if (le16_to_cpu(idx->level) != c->old_zroot_level) {
563 err = 2;
564 goto out_dump;
565 }
566 if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) {
567 err = 3;
568 goto out_dump;
569 }
570 /* Set last values as though root had a parent */
571 last_level = le16_to_cpu(idx->level) + 1;
572 last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1;
573 key_read(c, ubifs_idx_key(c, idx), &lower_key);
574 highest_ino_key(c, &upper_key, INUM_WATERMARK);
575 }
576 key_copy(c, &upper_key, &i->upper_key);
577 if (le16_to_cpu(idx->level) != last_level - 1) {
578 err = 3;
579 goto out_dump;
580 }
581 /*
582 * The index is always written bottom up hence a child's sqnum
583 * is always less than the parents.
584 */
585 if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) {
586 err = 4;
587 goto out_dump;
588 }
589 /* Check key range */
590 key_read(c, ubifs_idx_key(c, idx), &l_key);
591 br = ubifs_idx_branch(c, idx, child_cnt - 1);
592 key_read(c, &br->key, &u_key);
593 if (keys_cmp(c, &lower_key, &l_key) > 0) {
594 err = 5;
595 goto out_dump;
596 }
597 if (keys_cmp(c, &upper_key, &u_key) < 0) {
598 err = 6;
599 goto out_dump;
600 }
601 if (keys_cmp(c, &upper_key, &u_key) == 0)
602 if (!is_hash_key(c, &u_key)) {
603 err = 7;
604 goto out_dump;
605 }
606 /* Go to next index node */
607 if (le16_to_cpu(idx->level) == 0) {
608 /* At the bottom, so go up until can go right */
609 while (1) {
610 /* Drop the bottom of the list */
611 list_del(&i->list);
612 kfree(i);
613 /* No more list means we are done */
614 if (list_empty(&list))
615 goto out;
616 /* Look at the new bottom */
617 i = list_entry(list.prev, struct idx_node,
618 list);
619 idx = &i->idx;
620 /* Can we go right */
621 if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
622 iip = iip + 1;
623 break;
624 } else
625 /* Nope, so go up again */
626 iip = i->iip;
627 }
628 } else
629 /* Go down left */
630 iip = 0;
631 /*
632 * We have the parent in 'idx' and now we set up for reading the
633 * child pointed to by slot 'iip'.
634 */
635 last_level = le16_to_cpu(idx->level);
636 last_sqnum = le64_to_cpu(idx->ch.sqnum);
637 br = ubifs_idx_branch(c, idx, iip);
638 lnum = le32_to_cpu(br->lnum);
639 offs = le32_to_cpu(br->offs);
640 len = le32_to_cpu(br->len);
641 key_read(c, &br->key, &lower_key);
642 if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
643 br = ubifs_idx_branch(c, idx, iip + 1);
644 key_read(c, &br->key, &upper_key);
645 } else
646 key_copy(c, &i->upper_key, &upper_key);
647 }
648out:
649 err = dbg_old_index_check_init(c, zroot);
650 if (err)
651 goto out_free;
652
653 return 0;
654
655out_dump:
656 dbg_err("dumping index node (iip=%d)", i->iip);
657 dbg_dump_node(c, idx);
658 list_del(&i->list);
659 kfree(i);
660 if (!list_empty(&list)) {
661 i = list_entry(list.prev, struct idx_node, list);
662 dbg_err("dumping parent index node");
663 dbg_dump_node(c, &i->idx);
664 }
665out_free:
666 while (!list_empty(&list)) {
667 i = list_entry(list.next, struct idx_node, list);
668 list_del(&i->list);
669 kfree(i);
670 }
671 ubifs_err("failed, error %d", err);
672 if (err > 0)
673 err = -EINVAL;
674 return err;
675}
676
677#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
new file mode 100644
index 000000000000..5bb51dac3c16
--- /dev/null
+++ b/fs/ubifs/compress.c
@@ -0,0 +1,253 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 * Copyright (C) 2006, 2007 University of Szeged, Hungary
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Authors: Adrian Hunter
21 * Artem Bityutskiy (Битюцкий Артём)
22 * Zoltan Sogor
23 */
24
25/*
26 * This file provides a single place to access to compression and
27 * decompression.
28 */
29
30#include <linux/crypto.h>
31#include "ubifs.h"
32
33/* Fake description object for the "none" compressor */
34static struct ubifs_compressor none_compr = {
35 .compr_type = UBIFS_COMPR_NONE,
36 .name = "no compression",
37 .capi_name = "",
38};
39
40#ifdef CONFIG_UBIFS_FS_LZO
41static DEFINE_MUTEX(lzo_mutex);
42
43static struct ubifs_compressor lzo_compr = {
44 .compr_type = UBIFS_COMPR_LZO,
45 .comp_mutex = &lzo_mutex,
46 .name = "LZO",
47 .capi_name = "lzo",
48};
49#else
50static struct ubifs_compressor lzo_compr = {
51 .compr_type = UBIFS_COMPR_LZO,
52 .name = "LZO",
53};
54#endif
55
56#ifdef CONFIG_UBIFS_FS_ZLIB
57static DEFINE_MUTEX(deflate_mutex);
58static DEFINE_MUTEX(inflate_mutex);
59
60static struct ubifs_compressor zlib_compr = {
61 .compr_type = UBIFS_COMPR_ZLIB,
62 .comp_mutex = &deflate_mutex,
63 .decomp_mutex = &inflate_mutex,
64 .name = "zlib",
65 .capi_name = "deflate",
66};
67#else
68static struct ubifs_compressor zlib_compr = {
69 .compr_type = UBIFS_COMPR_ZLIB,
70 .name = "zlib",
71};
72#endif
73
74/* All UBIFS compressors */
75struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
76
77/**
78 * ubifs_compress - compress data.
79 * @in_buf: data to compress
80 * @in_len: length of the data to compress
81 * @out_buf: output buffer where compressed data should be stored
82 * @out_len: output buffer length is returned here
83 * @compr_type: type of compression to use on enter, actually used compression
84 * type on exit
85 *
86 * This function compresses input buffer @in_buf of length @in_len and stores
87 * the result in the output buffer @out_buf and the resulting length in
88 * @out_len. If the input buffer does not compress, it is just copied to the
89 * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if
90 * compression error occurred.
91 *
92 * Note, if the input buffer was not compressed, it is copied to the output
93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
94 *
95 * This functions returns %0 on success or a negative error code on failure.
96 */
97void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
98 int *compr_type)
99{
100 int err;
101 struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
102
103 if (*compr_type == UBIFS_COMPR_NONE)
104 goto no_compr;
105
106 /* If the input data is small, do not even try to compress it */
107 if (in_len < UBIFS_MIN_COMPR_LEN)
108 goto no_compr;
109
110 if (compr->comp_mutex)
111 mutex_lock(compr->comp_mutex);
112 err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
113 out_len);
114 if (compr->comp_mutex)
115 mutex_unlock(compr->comp_mutex);
116 if (unlikely(err)) {
117 ubifs_warn("cannot compress %d bytes, compressor %s, "
118 "error %d, leave data uncompressed",
119 in_len, compr->name, err);
120 goto no_compr;
121 }
122
123 /*
124 * Presently, we just require that compression results in less data,
125 * rather than any defined minimum compression ratio or amount.
126 */
127 if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8))
128 goto no_compr;
129
130 return;
131
132no_compr:
133 memcpy(out_buf, in_buf, in_len);
134 *out_len = in_len;
135 *compr_type = UBIFS_COMPR_NONE;
136}
137
138/**
139 * ubifs_decompress - decompress data.
140 * @in_buf: data to decompress
141 * @in_len: length of the data to decompress
142 * @out_buf: output buffer where decompressed data should
143 * @out_len: output length is returned here
144 * @compr_type: type of compression
145 *
146 * This function decompresses data from buffer @in_buf into buffer @out_buf.
147 * The length of the uncompressed data is returned in @out_len. This functions
148 * returns %0 on success or a negative error code on failure.
149 */
150int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
151 int *out_len, int compr_type)
152{
153 int err;
154 struct ubifs_compressor *compr;
155
156 if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
157 ubifs_err("invalid compression type %d", compr_type);
158 return -EINVAL;
159 }
160
161 compr = ubifs_compressors[compr_type];
162
163 if (unlikely(!compr->capi_name)) {
164 ubifs_err("%s compression is not compiled in", compr->name);
165 return -EINVAL;
166 }
167
168 if (compr_type == UBIFS_COMPR_NONE) {
169 memcpy(out_buf, in_buf, in_len);
170 *out_len = in_len;
171 return 0;
172 }
173
174 if (compr->decomp_mutex)
175 mutex_lock(compr->decomp_mutex);
176 err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf,
177 out_len);
178 if (compr->decomp_mutex)
179 mutex_unlock(compr->decomp_mutex);
180 if (err)
181 ubifs_err("cannot decompress %d bytes, compressor %s, "
182 "error %d", in_len, compr->name, err);
183
184 return err;
185}
186
187/**
188 * compr_init - initialize a compressor.
189 * @compr: compressor description object
190 *
191 * This function initializes the requested compressor and returns zero in case
192 * of success or a negative error code in case of failure.
193 */
194static int __init compr_init(struct ubifs_compressor *compr)
195{
196 if (compr->capi_name) {
197 compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0);
198 if (IS_ERR(compr->cc)) {
199 ubifs_err("cannot initialize compressor %s, error %ld",
200 compr->name, PTR_ERR(compr->cc));
201 return PTR_ERR(compr->cc);
202 }
203 }
204
205 ubifs_compressors[compr->compr_type] = compr;
206 return 0;
207}
208
209/**
210 * compr_exit - de-initialize a compressor.
211 * @compr: compressor description object
212 */
213static void compr_exit(struct ubifs_compressor *compr)
214{
215 if (compr->capi_name)
216 crypto_free_comp(compr->cc);
217 return;
218}
219
220/**
221 * ubifs_compressors_init - initialize UBIFS compressors.
222 *
223 * This function initializes the compressor which were compiled in. Returns
224 * zero in case of success and a negative error code in case of failure.
225 */
226int __init ubifs_compressors_init(void)
227{
228 int err;
229
230 err = compr_init(&lzo_compr);
231 if (err)
232 return err;
233
234 err = compr_init(&zlib_compr);
235 if (err)
236 goto out_lzo;
237
238 ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr;
239 return 0;
240
241out_lzo:
242 compr_exit(&lzo_compr);
243 return err;
244}
245
246/**
247 * ubifs_compressors_exit - de-initialize UBIFS compressors.
248 */
249void __exit ubifs_compressors_exit(void)
250{
251 compr_exit(&lzo_compr);
252 compr_exit(&zlib_compr);
253}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
new file mode 100644
index 000000000000..4e3aaeba4eca
--- /dev/null
+++ b/fs/ubifs/debug.c
@@ -0,0 +1,2289 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements most of the debugging stuff which is compiled in only
25 * when it is enabled. But some debugging check functions are implemented in
26 * corresponding subsystem, just because they are closely related and utilize
27 * various local functions of those subsystems.
28 */
29
30#define UBIFS_DBG_PRESERVE_UBI
31
32#include "ubifs.h"
33#include <linux/module.h>
34#include <linux/moduleparam.h>
35
36#ifdef CONFIG_UBIFS_FS_DEBUG
37
38DEFINE_SPINLOCK(dbg_lock);
39
40static char dbg_key_buf0[128];
41static char dbg_key_buf1[128];
42
43unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT;
44unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT;
45unsigned int ubifs_tst_flags;
46
47module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
48module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
49module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
50
51MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
52MODULE_PARM_DESC(debug_chks, "Debug check flags");
53MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
54
55static const char *get_key_fmt(int fmt)
56{
57 switch (fmt) {
58 case UBIFS_SIMPLE_KEY_FMT:
59 return "simple";
60 default:
61 return "unknown/invalid format";
62 }
63}
64
65static const char *get_key_hash(int hash)
66{
67 switch (hash) {
68 case UBIFS_KEY_HASH_R5:
69 return "R5";
70 case UBIFS_KEY_HASH_TEST:
71 return "test";
72 default:
73 return "unknown/invalid name hash";
74 }
75}
76
77static const char *get_key_type(int type)
78{
79 switch (type) {
80 case UBIFS_INO_KEY:
81 return "inode";
82 case UBIFS_DENT_KEY:
83 return "direntry";
84 case UBIFS_XENT_KEY:
85 return "xentry";
86 case UBIFS_DATA_KEY:
87 return "data";
88 case UBIFS_TRUN_KEY:
89 return "truncate";
90 default:
91 return "unknown/invalid key";
92 }
93}
94
95static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
96 char *buffer)
97{
98 char *p = buffer;
99 int type = key_type(c, key);
100
101 if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) {
102 switch (type) {
103 case UBIFS_INO_KEY:
104 sprintf(p, "(%lu, %s)", key_inum(c, key),
105 get_key_type(type));
106 break;
107 case UBIFS_DENT_KEY:
108 case UBIFS_XENT_KEY:
109 sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key),
110 get_key_type(type), key_hash(c, key));
111 break;
112 case UBIFS_DATA_KEY:
113 sprintf(p, "(%lu, %s, %u)", key_inum(c, key),
114 get_key_type(type), key_block(c, key));
115 break;
116 case UBIFS_TRUN_KEY:
117 sprintf(p, "(%lu, %s)",
118 key_inum(c, key), get_key_type(type));
119 break;
120 default:
121 sprintf(p, "(bad key type: %#08x, %#08x)",
122 key->u32[0], key->u32[1]);
123 }
124 } else
125 sprintf(p, "bad key format %d", c->key_fmt);
126}
127
128const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key)
129{
130 /* dbg_lock must be held */
131 sprintf_key(c, key, dbg_key_buf0);
132 return dbg_key_buf0;
133}
134
135const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key)
136{
137 /* dbg_lock must be held */
138 sprintf_key(c, key, dbg_key_buf1);
139 return dbg_key_buf1;
140}
141
142const char *dbg_ntype(int type)
143{
144 switch (type) {
145 case UBIFS_PAD_NODE:
146 return "padding node";
147 case UBIFS_SB_NODE:
148 return "superblock node";
149 case UBIFS_MST_NODE:
150 return "master node";
151 case UBIFS_REF_NODE:
152 return "reference node";
153 case UBIFS_INO_NODE:
154 return "inode node";
155 case UBIFS_DENT_NODE:
156 return "direntry node";
157 case UBIFS_XENT_NODE:
158 return "xentry node";
159 case UBIFS_DATA_NODE:
160 return "data node";
161 case UBIFS_TRUN_NODE:
162 return "truncate node";
163 case UBIFS_IDX_NODE:
164 return "indexing node";
165 case UBIFS_CS_NODE:
166 return "commit start node";
167 case UBIFS_ORPH_NODE:
168 return "orphan node";
169 default:
170 return "unknown node";
171 }
172}
173
174static const char *dbg_gtype(int type)
175{
176 switch (type) {
177 case UBIFS_NO_NODE_GROUP:
178 return "no node group";
179 case UBIFS_IN_NODE_GROUP:
180 return "in node group";
181 case UBIFS_LAST_OF_NODE_GROUP:
182 return "last of node group";
183 default:
184 return "unknown";
185 }
186}
187
188const char *dbg_cstate(int cmt_state)
189{
190 switch (cmt_state) {
191 case COMMIT_RESTING:
192 return "commit resting";
193 case COMMIT_BACKGROUND:
194 return "background commit requested";
195 case COMMIT_REQUIRED:
196 return "commit required";
197 case COMMIT_RUNNING_BACKGROUND:
198 return "BACKGROUND commit running";
199 case COMMIT_RUNNING_REQUIRED:
200 return "commit running and required";
201 case COMMIT_BROKEN:
202 return "broken commit";
203 default:
204 return "unknown commit state";
205 }
206}
207
208static void dump_ch(const struct ubifs_ch *ch)
209{
210 printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic));
211 printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc));
212 printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type,
213 dbg_ntype(ch->node_type));
214 printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type,
215 dbg_gtype(ch->group_type));
216 printk(KERN_DEBUG "\tsqnum %llu\n",
217 (unsigned long long)le64_to_cpu(ch->sqnum));
218 printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len));
219}
220
221void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
222{
223 const struct ubifs_inode *ui = ubifs_inode(inode);
224
225 printk(KERN_DEBUG "inode %lu\n", inode->i_ino);
226 printk(KERN_DEBUG "size %llu\n",
227 (unsigned long long)i_size_read(inode));
228 printk(KERN_DEBUG "nlink %u\n", inode->i_nlink);
229 printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid);
230 printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid);
231 printk(KERN_DEBUG "atime %u.%u\n",
232 (unsigned int)inode->i_atime.tv_sec,
233 (unsigned int)inode->i_atime.tv_nsec);
234 printk(KERN_DEBUG "mtime %u.%u\n",
235 (unsigned int)inode->i_mtime.tv_sec,
236 (unsigned int)inode->i_mtime.tv_nsec);
237 printk(KERN_DEBUG "ctime %u.%u\n",
238 (unsigned int)inode->i_ctime.tv_sec,
239 (unsigned int)inode->i_ctime.tv_nsec);
240 printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum);
241 printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size);
242 printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt);
243 printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names);
244 printk(KERN_DEBUG "dirty %u\n", ui->dirty);
245 printk(KERN_DEBUG "xattr %u\n", ui->xattr);
246 printk(KERN_DEBUG "flags %d\n", ui->flags);
247 printk(KERN_DEBUG "compr_type %d\n", ui->compr_type);
248 printk(KERN_DEBUG "data_len %d\n", ui->data_len);
249}
250
251void dbg_dump_node(const struct ubifs_info *c, const void *node)
252{
253 int i, n;
254 union ubifs_key key;
255 const struct ubifs_ch *ch = node;
256
257 if (dbg_failure_mode)
258 return;
259
260 /* If the magic is incorrect, just hexdump the first bytes */
261 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
262 printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ);
263 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
264 (void *)node, UBIFS_CH_SZ, 1);
265 return;
266 }
267
268 spin_lock(&dbg_lock);
269 dump_ch(node);
270
271 switch (ch->node_type) {
272 case UBIFS_PAD_NODE:
273 {
274 const struct ubifs_pad_node *pad = node;
275
276 printk(KERN_DEBUG "\tpad_len %u\n",
277 le32_to_cpu(pad->pad_len));
278 break;
279 }
280 case UBIFS_SB_NODE:
281 {
282 const struct ubifs_sb_node *sup = node;
283 unsigned int sup_flags = le32_to_cpu(sup->flags);
284
285 printk(KERN_DEBUG "\tkey_hash %d (%s)\n",
286 (int)sup->key_hash, get_key_hash(sup->key_hash));
287 printk(KERN_DEBUG "\tkey_fmt %d (%s)\n",
288 (int)sup->key_fmt, get_key_fmt(sup->key_fmt));
289 printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
290 printk(KERN_DEBUG "\t big_lpt %u\n",
291 !!(sup_flags & UBIFS_FLG_BIGLPT));
292 printk(KERN_DEBUG "\tmin_io_size %u\n",
293 le32_to_cpu(sup->min_io_size));
294 printk(KERN_DEBUG "\tleb_size %u\n",
295 le32_to_cpu(sup->leb_size));
296 printk(KERN_DEBUG "\tleb_cnt %u\n",
297 le32_to_cpu(sup->leb_cnt));
298 printk(KERN_DEBUG "\tmax_leb_cnt %u\n",
299 le32_to_cpu(sup->max_leb_cnt));
300 printk(KERN_DEBUG "\tmax_bud_bytes %llu\n",
301 (unsigned long long)le64_to_cpu(sup->max_bud_bytes));
302 printk(KERN_DEBUG "\tlog_lebs %u\n",
303 le32_to_cpu(sup->log_lebs));
304 printk(KERN_DEBUG "\tlpt_lebs %u\n",
305 le32_to_cpu(sup->lpt_lebs));
306 printk(KERN_DEBUG "\torph_lebs %u\n",
307 le32_to_cpu(sup->orph_lebs));
308 printk(KERN_DEBUG "\tjhead_cnt %u\n",
309 le32_to_cpu(sup->jhead_cnt));
310 printk(KERN_DEBUG "\tfanout %u\n",
311 le32_to_cpu(sup->fanout));
312 printk(KERN_DEBUG "\tlsave_cnt %u\n",
313 le32_to_cpu(sup->lsave_cnt));
314 printk(KERN_DEBUG "\tdefault_compr %u\n",
315 (int)le16_to_cpu(sup->default_compr));
316 printk(KERN_DEBUG "\trp_size %llu\n",
317 (unsigned long long)le64_to_cpu(sup->rp_size));
318 printk(KERN_DEBUG "\trp_uid %u\n",
319 le32_to_cpu(sup->rp_uid));
320 printk(KERN_DEBUG "\trp_gid %u\n",
321 le32_to_cpu(sup->rp_gid));
322 printk(KERN_DEBUG "\tfmt_version %u\n",
323 le32_to_cpu(sup->fmt_version));
324 printk(KERN_DEBUG "\ttime_gran %u\n",
325 le32_to_cpu(sup->time_gran));
326 printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X"
327 "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
328 sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3],
329 sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7],
330 sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11],
331 sup->uuid[12], sup->uuid[13], sup->uuid[14],
332 sup->uuid[15]);
333 break;
334 }
335 case UBIFS_MST_NODE:
336 {
337 const struct ubifs_mst_node *mst = node;
338
339 printk(KERN_DEBUG "\thighest_inum %llu\n",
340 (unsigned long long)le64_to_cpu(mst->highest_inum));
341 printk(KERN_DEBUG "\tcommit number %llu\n",
342 (unsigned long long)le64_to_cpu(mst->cmt_no));
343 printk(KERN_DEBUG "\tflags %#x\n",
344 le32_to_cpu(mst->flags));
345 printk(KERN_DEBUG "\tlog_lnum %u\n",
346 le32_to_cpu(mst->log_lnum));
347 printk(KERN_DEBUG "\troot_lnum %u\n",
348 le32_to_cpu(mst->root_lnum));
349 printk(KERN_DEBUG "\troot_offs %u\n",
350 le32_to_cpu(mst->root_offs));
351 printk(KERN_DEBUG "\troot_len %u\n",
352 le32_to_cpu(mst->root_len));
353 printk(KERN_DEBUG "\tgc_lnum %u\n",
354 le32_to_cpu(mst->gc_lnum));
355 printk(KERN_DEBUG "\tihead_lnum %u\n",
356 le32_to_cpu(mst->ihead_lnum));
357 printk(KERN_DEBUG "\tihead_offs %u\n",
358 le32_to_cpu(mst->ihead_offs));
359 printk(KERN_DEBUG "\tindex_size %u\n",
360 le32_to_cpu(mst->index_size));
361 printk(KERN_DEBUG "\tlpt_lnum %u\n",
362 le32_to_cpu(mst->lpt_lnum));
363 printk(KERN_DEBUG "\tlpt_offs %u\n",
364 le32_to_cpu(mst->lpt_offs));
365 printk(KERN_DEBUG "\tnhead_lnum %u\n",
366 le32_to_cpu(mst->nhead_lnum));
367 printk(KERN_DEBUG "\tnhead_offs %u\n",
368 le32_to_cpu(mst->nhead_offs));
369 printk(KERN_DEBUG "\tltab_lnum %u\n",
370 le32_to_cpu(mst->ltab_lnum));
371 printk(KERN_DEBUG "\tltab_offs %u\n",
372 le32_to_cpu(mst->ltab_offs));
373 printk(KERN_DEBUG "\tlsave_lnum %u\n",
374 le32_to_cpu(mst->lsave_lnum));
375 printk(KERN_DEBUG "\tlsave_offs %u\n",
376 le32_to_cpu(mst->lsave_offs));
377 printk(KERN_DEBUG "\tlscan_lnum %u\n",
378 le32_to_cpu(mst->lscan_lnum));
379 printk(KERN_DEBUG "\tleb_cnt %u\n",
380 le32_to_cpu(mst->leb_cnt));
381 printk(KERN_DEBUG "\tempty_lebs %u\n",
382 le32_to_cpu(mst->empty_lebs));
383 printk(KERN_DEBUG "\tidx_lebs %u\n",
384 le32_to_cpu(mst->idx_lebs));
385 printk(KERN_DEBUG "\ttotal_free %llu\n",
386 (unsigned long long)le64_to_cpu(mst->total_free));
387 printk(KERN_DEBUG "\ttotal_dirty %llu\n",
388 (unsigned long long)le64_to_cpu(mst->total_dirty));
389 printk(KERN_DEBUG "\ttotal_used %llu\n",
390 (unsigned long long)le64_to_cpu(mst->total_used));
391 printk(KERN_DEBUG "\ttotal_dead %llu\n",
392 (unsigned long long)le64_to_cpu(mst->total_dead));
393 printk(KERN_DEBUG "\ttotal_dark %llu\n",
394 (unsigned long long)le64_to_cpu(mst->total_dark));
395 break;
396 }
397 case UBIFS_REF_NODE:
398 {
399 const struct ubifs_ref_node *ref = node;
400
401 printk(KERN_DEBUG "\tlnum %u\n",
402 le32_to_cpu(ref->lnum));
403 printk(KERN_DEBUG "\toffs %u\n",
404 le32_to_cpu(ref->offs));
405 printk(KERN_DEBUG "\tjhead %u\n",
406 le32_to_cpu(ref->jhead));
407 break;
408 }
409 case UBIFS_INO_NODE:
410 {
411 const struct ubifs_ino_node *ino = node;
412
413 key_read(c, &ino->key, &key);
414 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
415 printk(KERN_DEBUG "\tcreat_sqnum %llu\n",
416 (unsigned long long)le64_to_cpu(ino->creat_sqnum));
417 printk(KERN_DEBUG "\tsize %llu\n",
418 (unsigned long long)le64_to_cpu(ino->size));
419 printk(KERN_DEBUG "\tnlink %u\n",
420 le32_to_cpu(ino->nlink));
421 printk(KERN_DEBUG "\tatime %lld.%u\n",
422 (long long)le64_to_cpu(ino->atime_sec),
423 le32_to_cpu(ino->atime_nsec));
424 printk(KERN_DEBUG "\tmtime %lld.%u\n",
425 (long long)le64_to_cpu(ino->mtime_sec),
426 le32_to_cpu(ino->mtime_nsec));
427 printk(KERN_DEBUG "\tctime %lld.%u\n",
428 (long long)le64_to_cpu(ino->ctime_sec),
429 le32_to_cpu(ino->ctime_nsec));
430 printk(KERN_DEBUG "\tuid %u\n",
431 le32_to_cpu(ino->uid));
432 printk(KERN_DEBUG "\tgid %u\n",
433 le32_to_cpu(ino->gid));
434 printk(KERN_DEBUG "\tmode %u\n",
435 le32_to_cpu(ino->mode));
436 printk(KERN_DEBUG "\tflags %#x\n",
437 le32_to_cpu(ino->flags));
438 printk(KERN_DEBUG "\txattr_cnt %u\n",
439 le32_to_cpu(ino->xattr_cnt));
440 printk(KERN_DEBUG "\txattr_size %u\n",
441 le32_to_cpu(ino->xattr_size));
442 printk(KERN_DEBUG "\txattr_names %u\n",
443 le32_to_cpu(ino->xattr_names));
444 printk(KERN_DEBUG "\tcompr_type %#x\n",
445 (int)le16_to_cpu(ino->compr_type));
446 printk(KERN_DEBUG "\tdata len %u\n",
447 le32_to_cpu(ino->data_len));
448 break;
449 }
450 case UBIFS_DENT_NODE:
451 case UBIFS_XENT_NODE:
452 {
453 const struct ubifs_dent_node *dent = node;
454 int nlen = le16_to_cpu(dent->nlen);
455
456 key_read(c, &dent->key, &key);
457 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
458 printk(KERN_DEBUG "\tinum %llu\n",
459 (unsigned long long)le64_to_cpu(dent->inum));
460 printk(KERN_DEBUG "\ttype %d\n", (int)dent->type);
461 printk(KERN_DEBUG "\tnlen %d\n", nlen);
462 printk(KERN_DEBUG "\tname ");
463
464 if (nlen > UBIFS_MAX_NLEN)
465 printk(KERN_DEBUG "(bad name length, not printing, "
466 "bad or corrupted node)");
467 else {
468 for (i = 0; i < nlen && dent->name[i]; i++)
469 printk("%c", dent->name[i]);
470 }
471 printk("\n");
472
473 break;
474 }
475 case UBIFS_DATA_NODE:
476 {
477 const struct ubifs_data_node *dn = node;
478 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
479
480 key_read(c, &dn->key, &key);
481 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
482 printk(KERN_DEBUG "\tsize %u\n",
483 le32_to_cpu(dn->size));
484 printk(KERN_DEBUG "\tcompr_typ %d\n",
485 (int)le16_to_cpu(dn->compr_type));
486 printk(KERN_DEBUG "\tdata size %d\n",
487 dlen);
488 printk(KERN_DEBUG "\tdata:\n");
489 print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1,
490 (void *)&dn->data, dlen, 0);
491 break;
492 }
493 case UBIFS_TRUN_NODE:
494 {
495 const struct ubifs_trun_node *trun = node;
496
497 printk(KERN_DEBUG "\tinum %u\n",
498 le32_to_cpu(trun->inum));
499 printk(KERN_DEBUG "\told_size %llu\n",
500 (unsigned long long)le64_to_cpu(trun->old_size));
501 printk(KERN_DEBUG "\tnew_size %llu\n",
502 (unsigned long long)le64_to_cpu(trun->new_size));
503 break;
504 }
505 case UBIFS_IDX_NODE:
506 {
507 const struct ubifs_idx_node *idx = node;
508
509 n = le16_to_cpu(idx->child_cnt);
510 printk(KERN_DEBUG "\tchild_cnt %d\n", n);
511 printk(KERN_DEBUG "\tlevel %d\n",
512 (int)le16_to_cpu(idx->level));
513 printk(KERN_DEBUG "\tBranches:\n");
514
515 for (i = 0; i < n && i < c->fanout - 1; i++) {
516 const struct ubifs_branch *br;
517
518 br = ubifs_idx_branch(c, idx, i);
519 key_read(c, &br->key, &key);
520 printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n",
521 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
522 le32_to_cpu(br->len), DBGKEY(&key));
523 }
524 break;
525 }
526 case UBIFS_CS_NODE:
527 break;
528 case UBIFS_ORPH_NODE:
529 {
530 const struct ubifs_orph_node *orph = node;
531
532 printk(KERN_DEBUG "\tcommit number %llu\n",
533 (unsigned long long)
534 le64_to_cpu(orph->cmt_no) & LLONG_MAX);
535 printk(KERN_DEBUG "\tlast node flag %llu\n",
536 (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
537 n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
538 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
539 for (i = 0; i < n; i++)
540 printk(KERN_DEBUG "\t ino %llu\n",
541 le64_to_cpu(orph->inos[i]));
542 break;
543 }
544 default:
545 printk(KERN_DEBUG "node type %d was not recognized\n",
546 (int)ch->node_type);
547 }
548 spin_unlock(&dbg_lock);
549}
550
551void dbg_dump_budget_req(const struct ubifs_budget_req *req)
552{
553 spin_lock(&dbg_lock);
554 printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n",
555 req->new_ino, req->dirtied_ino);
556 printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n",
557 req->new_ino_d, req->dirtied_ino_d);
558 printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n",
559 req->new_page, req->dirtied_page);
560 printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n",
561 req->new_dent, req->mod_dent);
562 printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth);
563 printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n",
564 req->data_growth, req->dd_growth);
565 spin_unlock(&dbg_lock);
566}
567
568void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
569{
570 spin_lock(&dbg_lock);
571 printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n",
572 lst->empty_lebs, lst->idx_lebs);
573 printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
574 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
575 lst->total_dirty);
576 printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, "
577 "total_dead %lld\n", lst->total_used, lst->total_dark,
578 lst->total_dead);
579 spin_unlock(&dbg_lock);
580}
581
582void dbg_dump_budg(struct ubifs_info *c)
583{
584 int i;
585 struct rb_node *rb;
586 struct ubifs_bud *bud;
587 struct ubifs_gced_idx_leb *idx_gc;
588
589 spin_lock(&dbg_lock);
590 printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, "
591 "budg_dd_growth %lld, budg_idx_growth %lld\n",
592 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
593 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
594 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
595 c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
596 c->freeable_cnt);
597 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
598 "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
599 c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
600 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
601 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
602 atomic_long_read(&c->dirty_zn_cnt),
603 atomic_long_read(&c->clean_zn_cnt));
604 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
605 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
606 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
607 c->gc_lnum, c->ihead_lnum);
608 for (i = 0; i < c->jhead_cnt; i++)
609 printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
610 c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
611 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
612 bud = rb_entry(rb, struct ubifs_bud, rb);
613 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
614 }
615 list_for_each_entry(bud, &c->old_buds, list)
616 printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum);
617 list_for_each_entry(idx_gc, &c->idx_gc, list)
618 printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n",
619 idx_gc->lnum, idx_gc->unmap);
620 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
621 spin_unlock(&dbg_lock);
622}
623
624void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
625{
626 printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), "
627 "flags %#x\n", lp->lnum, lp->free, lp->dirty,
628 c->leb_size - lp->free - lp->dirty, lp->flags);
629}
630
631void dbg_dump_lprops(struct ubifs_info *c)
632{
633 int lnum, err;
634 struct ubifs_lprops lp;
635 struct ubifs_lp_stats lst;
636
637 printk(KERN_DEBUG "Dumping LEB properties\n");
638 ubifs_get_lp_stats(c, &lst);
639 dbg_dump_lstats(&lst);
640
641 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
642 err = ubifs_read_one_lp(c, lnum, &lp);
643 if (err)
644 ubifs_err("cannot read lprops for LEB %d", lnum);
645
646 dbg_dump_lprop(c, &lp);
647 }
648}
649
650void dbg_dump_leb(const struct ubifs_info *c, int lnum)
651{
652 struct ubifs_scan_leb *sleb;
653 struct ubifs_scan_node *snod;
654
655 if (dbg_failure_mode)
656 return;
657
658 printk(KERN_DEBUG "Dumping LEB %d\n", lnum);
659
660 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
661 if (IS_ERR(sleb)) {
662 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
663 return;
664 }
665
666 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
667 sleb->nodes_cnt, sleb->endpt);
668
669 list_for_each_entry(snod, &sleb->nodes, list) {
670 cond_resched();
671 printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum,
672 snod->offs, snod->len);
673 dbg_dump_node(c, snod->node);
674 }
675
676 ubifs_scan_destroy(sleb);
677 return;
678}
679
680void dbg_dump_znode(const struct ubifs_info *c,
681 const struct ubifs_znode *znode)
682{
683 int n;
684 const struct ubifs_zbranch *zbr;
685
686 spin_lock(&dbg_lock);
687 if (znode->parent)
688 zbr = &znode->parent->zbranch[znode->iip];
689 else
690 zbr = &c->zroot;
691
692 printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
693 " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs,
694 zbr->len, znode->parent, znode->iip, znode->level,
695 znode->child_cnt, znode->flags);
696
697 if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
698 spin_unlock(&dbg_lock);
699 return;
700 }
701
702 printk(KERN_DEBUG "zbranches:\n");
703 for (n = 0; n < znode->child_cnt; n++) {
704 zbr = &znode->zbranch[n];
705 if (znode->level > 0)
706 printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key "
707 "%s\n", n, zbr->znode, zbr->lnum,
708 zbr->offs, zbr->len,
709 DBGKEY(&zbr->key));
710 else
711 printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key "
712 "%s\n", n, zbr->znode, zbr->lnum,
713 zbr->offs, zbr->len,
714 DBGKEY(&zbr->key));
715 }
716 spin_unlock(&dbg_lock);
717}
718
719void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
720{
721 int i;
722
723 printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n",
724 cat, heap->cnt);
725 for (i = 0; i < heap->cnt; i++) {
726 struct ubifs_lprops *lprops = heap->arr[i];
727
728 printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d "
729 "flags %d\n", i, lprops->lnum, lprops->hpos,
730 lprops->free, lprops->dirty, lprops->flags);
731 }
732}
733
734void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
735 struct ubifs_nnode *parent, int iip)
736{
737 int i;
738
739 printk(KERN_DEBUG "Dumping pnode:\n");
740 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
741 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
742 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
743 pnode->flags, iip, pnode->level, pnode->num);
744 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
745 struct ubifs_lprops *lp = &pnode->lprops[i];
746
747 printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n",
748 i, lp->free, lp->dirty, lp->flags, lp->lnum);
749 }
750}
751
752void dbg_dump_tnc(struct ubifs_info *c)
753{
754 struct ubifs_znode *znode;
755 int level;
756
757 printk(KERN_DEBUG "\n");
758 printk(KERN_DEBUG "Dumping the TNC tree\n");
759 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
760 level = znode->level;
761 printk(KERN_DEBUG "== Level %d ==\n", level);
762 while (znode) {
763 if (level != znode->level) {
764 level = znode->level;
765 printk(KERN_DEBUG "== Level %d ==\n", level);
766 }
767 dbg_dump_znode(c, znode);
768 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
769 }
770
771 printk(KERN_DEBUG "\n");
772}
773
774static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
775 void *priv)
776{
777 dbg_dump_znode(c, znode);
778 return 0;
779}
780
781/**
782 * dbg_dump_index - dump the on-flash index.
783 * @c: UBIFS file-system description object
784 *
785 * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()'
786 * which dumps only in-memory znodes and does not read znodes which from flash.
787 */
788void dbg_dump_index(struct ubifs_info *c)
789{
790 dbg_walk_index(c, NULL, dump_znode, NULL);
791}
792
793/**
794 * dbg_check_synced_i_size - check synchronized inode size.
795 * @inode: inode to check
796 *
797 * If inode is clean, synchronized inode size has to be equivalent to current
798 * inode size. This function has to be called only for locked inodes (@i_mutex
799 * has to be locked). Returns %0 if synchronized inode size if correct, and
800 * %-EINVAL if not.
801 */
802int dbg_check_synced_i_size(struct inode *inode)
803{
804 int err = 0;
805 struct ubifs_inode *ui = ubifs_inode(inode);
806
807 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
808 return 0;
809 if (!S_ISREG(inode->i_mode))
810 return 0;
811
812 mutex_lock(&ui->ui_mutex);
813 spin_lock(&ui->ui_lock);
814 if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
815 ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode "
816 "is clean", ui->ui_size, ui->synced_i_size);
817 ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
818 inode->i_mode, i_size_read(inode));
819 dbg_dump_stack();
820 err = -EINVAL;
821 }
822 spin_unlock(&ui->ui_lock);
823 mutex_unlock(&ui->ui_mutex);
824 return err;
825}
826
827/*
828 * dbg_check_dir - check directory inode size and link count.
829 * @c: UBIFS file-system description object
830 * @dir: the directory to calculate size for
831 * @size: the result is returned here
832 *
833 * This function makes sure that directory size and link count are correct.
834 * Returns zero in case of success and a negative error code in case of
835 * failure.
836 *
837 * Note, it is good idea to make sure the @dir->i_mutex is locked before
838 * calling this function.
839 */
840int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
841{
842 unsigned int nlink = 2;
843 union ubifs_key key;
844 struct ubifs_dent_node *dent, *pdent = NULL;
845 struct qstr nm = { .name = NULL };
846 loff_t size = UBIFS_INO_NODE_SZ;
847
848 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
849 return 0;
850
851 if (!S_ISDIR(dir->i_mode))
852 return 0;
853
854 lowest_dent_key(c, &key, dir->i_ino);
855 while (1) {
856 int err;
857
858 dent = ubifs_tnc_next_ent(c, &key, &nm);
859 if (IS_ERR(dent)) {
860 err = PTR_ERR(dent);
861 if (err == -ENOENT)
862 break;
863 return err;
864 }
865
866 nm.name = dent->name;
867 nm.len = le16_to_cpu(dent->nlen);
868 size += CALC_DENT_SIZE(nm.len);
869 if (dent->type == UBIFS_ITYPE_DIR)
870 nlink += 1;
871 kfree(pdent);
872 pdent = dent;
873 key_read(c, &dent->key, &key);
874 }
875 kfree(pdent);
876
877 if (i_size_read(dir) != size) {
878 ubifs_err("directory inode %lu has size %llu, "
879 "but calculated size is %llu", dir->i_ino,
880 (unsigned long long)i_size_read(dir),
881 (unsigned long long)size);
882 dump_stack();
883 return -EINVAL;
884 }
885 if (dir->i_nlink != nlink) {
886 ubifs_err("directory inode %lu has nlink %u, but calculated "
887 "nlink is %u", dir->i_ino, dir->i_nlink, nlink);
888 dump_stack();
889 return -EINVAL;
890 }
891
892 return 0;
893}
894
895/**
896 * dbg_check_key_order - make sure that colliding keys are properly ordered.
897 * @c: UBIFS file-system description object
898 * @zbr1: first zbranch
899 * @zbr2: following zbranch
900 *
901 * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of
902 * names of the direntries/xentries which are referred by the keys. This
903 * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes
904 * sure the name of direntry/xentry referred by @zbr1 is less than
905 * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not,
906 * and a negative error code in case of failure.
907 */
908static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
909 struct ubifs_zbranch *zbr2)
910{
911 int err, nlen1, nlen2, cmp;
912 struct ubifs_dent_node *dent1, *dent2;
913 union ubifs_key key;
914
915 ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key));
916 dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
917 if (!dent1)
918 return -ENOMEM;
919 dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
920 if (!dent2) {
921 err = -ENOMEM;
922 goto out_free;
923 }
924
925 err = ubifs_tnc_read_node(c, zbr1, dent1);
926 if (err)
927 goto out_free;
928 err = ubifs_validate_entry(c, dent1);
929 if (err)
930 goto out_free;
931
932 err = ubifs_tnc_read_node(c, zbr2, dent2);
933 if (err)
934 goto out_free;
935 err = ubifs_validate_entry(c, dent2);
936 if (err)
937 goto out_free;
938
939 /* Make sure node keys are the same as in zbranch */
940 err = 1;
941 key_read(c, &dent1->key, &key);
942 if (keys_cmp(c, &zbr1->key, &key)) {
943 dbg_err("1st entry at %d:%d has key %s", zbr1->lnum,
944 zbr1->offs, DBGKEY(&key));
945 dbg_err("but it should have key %s according to tnc",
946 DBGKEY(&zbr1->key));
947 dbg_dump_node(c, dent1);
948 goto out_free;
949 }
950
951 key_read(c, &dent2->key, &key);
952 if (keys_cmp(c, &zbr2->key, &key)) {
953 dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum,
954 zbr1->offs, DBGKEY(&key));
955 dbg_err("but it should have key %s according to tnc",
956 DBGKEY(&zbr2->key));
957 dbg_dump_node(c, dent2);
958 goto out_free;
959 }
960
961 nlen1 = le16_to_cpu(dent1->nlen);
962 nlen2 = le16_to_cpu(dent2->nlen);
963
964 cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2));
965 if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) {
966 err = 0;
967 goto out_free;
968 }
969 if (cmp == 0 && nlen1 == nlen2)
970 dbg_err("2 xent/dent nodes with the same name");
971 else
972 dbg_err("bad order of colliding key %s",
973 DBGKEY(&key));
974
975 dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
976 dbg_dump_node(c, dent1);
977 dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
978 dbg_dump_node(c, dent2);
979
980out_free:
981 kfree(dent2);
982 kfree(dent1);
983 return err;
984}
985
986/**
987 * dbg_check_znode - check if znode is all right.
988 * @c: UBIFS file-system description object
989 * @zbr: zbranch which points to this znode
990 *
991 * This function makes sure that znode referred to by @zbr is all right.
992 * Returns zero if it is, and %-EINVAL if it is not.
993 */
994static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
995{
996 struct ubifs_znode *znode = zbr->znode;
997 struct ubifs_znode *zp = znode->parent;
998 int n, err, cmp;
999
1000 if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
1001 err = 1;
1002 goto out;
1003 }
1004 if (znode->level < 0) {
1005 err = 2;
1006 goto out;
1007 }
1008 if (znode->iip < 0 || znode->iip >= c->fanout) {
1009 err = 3;
1010 goto out;
1011 }
1012
1013 if (zbr->len == 0)
1014 /* Only dirty zbranch may have no on-flash nodes */
1015 if (!ubifs_zn_dirty(znode)) {
1016 err = 4;
1017 goto out;
1018 }
1019
1020 if (ubifs_zn_dirty(znode)) {
1021 /*
1022 * If znode is dirty, its parent has to be dirty as well. The
1023 * order of the operation is important, so we have to have
1024 * memory barriers.
1025 */
1026 smp_mb();
1027 if (zp && !ubifs_zn_dirty(zp)) {
1028 /*
1029 * The dirty flag is atomic and is cleared outside the
1030 * TNC mutex, so znode's dirty flag may now have
1031 * been cleared. The child is always cleared before the
1032 * parent, so we just need to check again.
1033 */
1034 smp_mb();
1035 if (ubifs_zn_dirty(znode)) {
1036 err = 5;
1037 goto out;
1038 }
1039 }
1040 }
1041
1042 if (zp) {
1043 const union ubifs_key *min, *max;
1044
1045 if (znode->level != zp->level - 1) {
1046 err = 6;
1047 goto out;
1048 }
1049
1050 /* Make sure the 'parent' pointer in our znode is correct */
1051 err = ubifs_search_zbranch(c, zp, &zbr->key, &n);
1052 if (!err) {
1053 /* This zbranch does not exist in the parent */
1054 err = 7;
1055 goto out;
1056 }
1057
1058 if (znode->iip >= zp->child_cnt) {
1059 err = 8;
1060 goto out;
1061 }
1062
1063 if (znode->iip != n) {
1064 /* This may happen only in case of collisions */
1065 if (keys_cmp(c, &zp->zbranch[n].key,
1066 &zp->zbranch[znode->iip].key)) {
1067 err = 9;
1068 goto out;
1069 }
1070 n = znode->iip;
1071 }
1072
1073 /*
1074 * Make sure that the first key in our znode is greater than or
1075 * equal to the key in the pointing zbranch.
1076 */
1077 min = &zbr->key;
1078 cmp = keys_cmp(c, min, &znode->zbranch[0].key);
1079 if (cmp == 1) {
1080 err = 10;
1081 goto out;
1082 }
1083
1084 if (n + 1 < zp->child_cnt) {
1085 max = &zp->zbranch[n + 1].key;
1086
1087 /*
1088 * Make sure the last key in our znode is less or
1089 * equivalent than the the key in zbranch which goes
1090 * after our pointing zbranch.
1091 */
1092 cmp = keys_cmp(c, max,
1093 &znode->zbranch[znode->child_cnt - 1].key);
1094 if (cmp == -1) {
1095 err = 11;
1096 goto out;
1097 }
1098 }
1099 } else {
1100 /* This may only be root znode */
1101 if (zbr != &c->zroot) {
1102 err = 12;
1103 goto out;
1104 }
1105 }
1106
1107 /*
1108 * Make sure that next key is greater or equivalent then the previous
1109 * one.
1110 */
1111 for (n = 1; n < znode->child_cnt; n++) {
1112 cmp = keys_cmp(c, &znode->zbranch[n - 1].key,
1113 &znode->zbranch[n].key);
1114 if (cmp > 0) {
1115 err = 13;
1116 goto out;
1117 }
1118 if (cmp == 0) {
1119 /* This can only be keys with colliding hash */
1120 if (!is_hash_key(c, &znode->zbranch[n].key)) {
1121 err = 14;
1122 goto out;
1123 }
1124
1125 if (znode->level != 0 || c->replaying)
1126 continue;
1127
1128 /*
1129 * Colliding keys should follow binary order of
1130 * corresponding xentry/dentry names.
1131 */
1132 err = dbg_check_key_order(c, &znode->zbranch[n - 1],
1133 &znode->zbranch[n]);
1134 if (err < 0)
1135 return err;
1136 if (err) {
1137 err = 15;
1138 goto out;
1139 }
1140 }
1141 }
1142
1143 for (n = 0; n < znode->child_cnt; n++) {
1144 if (!znode->zbranch[n].znode &&
1145 (znode->zbranch[n].lnum == 0 ||
1146 znode->zbranch[n].len == 0)) {
1147 err = 16;
1148 goto out;
1149 }
1150
1151 if (znode->zbranch[n].lnum != 0 &&
1152 znode->zbranch[n].len == 0) {
1153 err = 17;
1154 goto out;
1155 }
1156
1157 if (znode->zbranch[n].lnum == 0 &&
1158 znode->zbranch[n].len != 0) {
1159 err = 18;
1160 goto out;
1161 }
1162
1163 if (znode->zbranch[n].lnum == 0 &&
1164 znode->zbranch[n].offs != 0) {
1165 err = 19;
1166 goto out;
1167 }
1168
1169 if (znode->level != 0 && znode->zbranch[n].znode)
1170 if (znode->zbranch[n].znode->parent != znode) {
1171 err = 20;
1172 goto out;
1173 }
1174 }
1175
1176 return 0;
1177
1178out:
1179 ubifs_err("failed, error %d", err);
1180 ubifs_msg("dump of the znode");
1181 dbg_dump_znode(c, znode);
1182 if (zp) {
1183 ubifs_msg("dump of the parent znode");
1184 dbg_dump_znode(c, zp);
1185 }
1186 dump_stack();
1187 return -EINVAL;
1188}
1189
1190/**
1191 * dbg_check_tnc - check TNC tree.
1192 * @c: UBIFS file-system description object
1193 * @extra: do extra checks that are possible at start commit
1194 *
1195 * This function traverses whole TNC tree and checks every znode. Returns zero
1196 * if everything is all right and %-EINVAL if something is wrong with TNC.
1197 */
1198int dbg_check_tnc(struct ubifs_info *c, int extra)
1199{
1200 struct ubifs_znode *znode;
1201 long clean_cnt = 0, dirty_cnt = 0;
1202 int err, last;
1203
1204 if (!(ubifs_chk_flags & UBIFS_CHK_TNC))
1205 return 0;
1206
1207 ubifs_assert(mutex_is_locked(&c->tnc_mutex));
1208 if (!c->zroot.znode)
1209 return 0;
1210
1211 znode = ubifs_tnc_postorder_first(c->zroot.znode);
1212 while (1) {
1213 struct ubifs_znode *prev;
1214 struct ubifs_zbranch *zbr;
1215
1216 if (!znode->parent)
1217 zbr = &c->zroot;
1218 else
1219 zbr = &znode->parent->zbranch[znode->iip];
1220
1221 err = dbg_check_znode(c, zbr);
1222 if (err)
1223 return err;
1224
1225 if (extra) {
1226 if (ubifs_zn_dirty(znode))
1227 dirty_cnt += 1;
1228 else
1229 clean_cnt += 1;
1230 }
1231
1232 prev = znode;
1233 znode = ubifs_tnc_postorder_next(znode);
1234 if (!znode)
1235 break;
1236
1237 /*
1238 * If the last key of this znode is equivalent to the first key
1239 * of the next znode (collision), then check order of the keys.
1240 */
1241 last = prev->child_cnt - 1;
1242 if (prev->level == 0 && znode->level == 0 && !c->replaying &&
1243 !keys_cmp(c, &prev->zbranch[last].key,
1244 &znode->zbranch[0].key)) {
1245 err = dbg_check_key_order(c, &prev->zbranch[last],
1246 &znode->zbranch[0]);
1247 if (err < 0)
1248 return err;
1249 if (err) {
1250 ubifs_msg("first znode");
1251 dbg_dump_znode(c, prev);
1252 ubifs_msg("second znode");
1253 dbg_dump_znode(c, znode);
1254 return -EINVAL;
1255 }
1256 }
1257 }
1258
1259 if (extra) {
1260 if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) {
1261 ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld",
1262 atomic_long_read(&c->clean_zn_cnt),
1263 clean_cnt);
1264 return -EINVAL;
1265 }
1266 if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) {
1267 ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld",
1268 atomic_long_read(&c->dirty_zn_cnt),
1269 dirty_cnt);
1270 return -EINVAL;
1271 }
1272 }
1273
1274 return 0;
1275}
1276
1277/**
1278 * dbg_walk_index - walk the on-flash index.
1279 * @c: UBIFS file-system description object
1280 * @leaf_cb: called for each leaf node
1281 * @znode_cb: called for each indexing node
1282 * @priv: private date which is passed to callbacks
1283 *
1284 * This function walks the UBIFS index and calls the @leaf_cb for each leaf
1285 * node and @znode_cb for each indexing node. Returns zero in case of success
1286 * and a negative error code in case of failure.
1287 *
1288 * It would be better if this function removed every znode it pulled to into
1289 * the TNC, so that the behavior more closely matched the non-debugging
1290 * behavior.
1291 */
1292int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
1293 dbg_znode_callback znode_cb, void *priv)
1294{
1295 int err;
1296 struct ubifs_zbranch *zbr;
1297 struct ubifs_znode *znode, *child;
1298
1299 mutex_lock(&c->tnc_mutex);
1300 /* If the root indexing node is not in TNC - pull it */
1301 if (!c->zroot.znode) {
1302 c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
1303 if (IS_ERR(c->zroot.znode)) {
1304 err = PTR_ERR(c->zroot.znode);
1305 c->zroot.znode = NULL;
1306 goto out_unlock;
1307 }
1308 }
1309
1310 /*
1311 * We are going to traverse the indexing tree in the postorder manner.
1312 * Go down and find the leftmost indexing node where we are going to
1313 * start from.
1314 */
1315 znode = c->zroot.znode;
1316 while (znode->level > 0) {
1317 zbr = &znode->zbranch[0];
1318 child = zbr->znode;
1319 if (!child) {
1320 child = ubifs_load_znode(c, zbr, znode, 0);
1321 if (IS_ERR(child)) {
1322 err = PTR_ERR(child);
1323 goto out_unlock;
1324 }
1325 zbr->znode = child;
1326 }
1327
1328 znode = child;
1329 }
1330
1331 /* Iterate over all indexing nodes */
1332 while (1) {
1333 int idx;
1334
1335 cond_resched();
1336
1337 if (znode_cb) {
1338 err = znode_cb(c, znode, priv);
1339 if (err) {
1340 ubifs_err("znode checking function returned "
1341 "error %d", err);
1342 dbg_dump_znode(c, znode);
1343 goto out_dump;
1344 }
1345 }
1346 if (leaf_cb && znode->level == 0) {
1347 for (idx = 0; idx < znode->child_cnt; idx++) {
1348 zbr = &znode->zbranch[idx];
1349 err = leaf_cb(c, zbr, priv);
1350 if (err) {
1351 ubifs_err("leaf checking function "
1352 "returned error %d, for leaf "
1353 "at LEB %d:%d",
1354 err, zbr->lnum, zbr->offs);
1355 goto out_dump;
1356 }
1357 }
1358 }
1359
1360 if (!znode->parent)
1361 break;
1362
1363 idx = znode->iip + 1;
1364 znode = znode->parent;
1365 if (idx < znode->child_cnt) {
1366 /* Switch to the next index in the parent */
1367 zbr = &znode->zbranch[idx];
1368 child = zbr->znode;
1369 if (!child) {
1370 child = ubifs_load_znode(c, zbr, znode, idx);
1371 if (IS_ERR(child)) {
1372 err = PTR_ERR(child);
1373 goto out_unlock;
1374 }
1375 zbr->znode = child;
1376 }
1377 znode = child;
1378 } else
1379 /*
1380 * This is the last child, switch to the parent and
1381 * continue.
1382 */
1383 continue;
1384
1385 /* Go to the lowest leftmost znode in the new sub-tree */
1386 while (znode->level > 0) {
1387 zbr = &znode->zbranch[0];
1388 child = zbr->znode;
1389 if (!child) {
1390 child = ubifs_load_znode(c, zbr, znode, 0);
1391 if (IS_ERR(child)) {
1392 err = PTR_ERR(child);
1393 goto out_unlock;
1394 }
1395 zbr->znode = child;
1396 }
1397 znode = child;
1398 }
1399 }
1400
1401 mutex_unlock(&c->tnc_mutex);
1402 return 0;
1403
1404out_dump:
1405 if (znode->parent)
1406 zbr = &znode->parent->zbranch[znode->iip];
1407 else
1408 zbr = &c->zroot;
1409 ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs);
1410 dbg_dump_znode(c, znode);
1411out_unlock:
1412 mutex_unlock(&c->tnc_mutex);
1413 return err;
1414}
1415
1416/**
1417 * add_size - add znode size to partially calculated index size.
1418 * @c: UBIFS file-system description object
1419 * @znode: znode to add size for
1420 * @priv: partially calculated index size
1421 *
1422 * This is a helper function for 'dbg_check_idx_size()' which is called for
1423 * every indexing node and adds its size to the 'long long' variable pointed to
1424 * by @priv.
1425 */
1426static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv)
1427{
1428 long long *idx_size = priv;
1429 int add;
1430
1431 add = ubifs_idx_node_sz(c, znode->child_cnt);
1432 add = ALIGN(add, 8);
1433 *idx_size += add;
1434 return 0;
1435}
1436
1437/**
1438 * dbg_check_idx_size - check index size.
1439 * @c: UBIFS file-system description object
1440 * @idx_size: size to check
1441 *
1442 * This function walks the UBIFS index, calculates its size and checks that the
1443 * size is equivalent to @idx_size. Returns zero in case of success and a
1444 * negative error code in case of failure.
1445 */
1446int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
1447{
1448 int err;
1449 long long calc = 0;
1450
1451 if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ))
1452 return 0;
1453
1454 err = dbg_walk_index(c, NULL, add_size, &calc);
1455 if (err) {
1456 ubifs_err("error %d while walking the index", err);
1457 return err;
1458 }
1459
1460 if (calc != idx_size) {
1461 ubifs_err("index size check failed: calculated size is %lld, "
1462 "should be %lld", calc, idx_size);
1463 dump_stack();
1464 return -EINVAL;
1465 }
1466
1467 return 0;
1468}
1469
1470/**
1471 * struct fsck_inode - information about an inode used when checking the file-system.
1472 * @rb: link in the RB-tree of inodes
1473 * @inum: inode number
1474 * @mode: inode type, permissions, etc
1475 * @nlink: inode link count
1476 * @xattr_cnt: count of extended attributes
1477 * @references: how many directory/xattr entries refer this inode (calculated
1478 * while walking the index)
1479 * @calc_cnt: for directory inode count of child directories
1480 * @size: inode size (read from on-flash inode)
1481 * @xattr_sz: summary size of all extended attributes (read from on-flash
1482 * inode)
1483 * @calc_sz: for directories calculated directory size
1484 * @calc_xcnt: count of extended attributes
1485 * @calc_xsz: calculated summary size of all extended attributes
1486 * @xattr_nms: sum of lengths of all extended attribute names belonging to this
1487 * inode (read from on-flash inode)
1488 * @calc_xnms: calculated sum of lengths of all extended attribute names
1489 */
1490struct fsck_inode {
1491 struct rb_node rb;
1492 ino_t inum;
1493 umode_t mode;
1494 unsigned int nlink;
1495 unsigned int xattr_cnt;
1496 int references;
1497 int calc_cnt;
1498 long long size;
1499 unsigned int xattr_sz;
1500 long long calc_sz;
1501 long long calc_xcnt;
1502 long long calc_xsz;
1503 unsigned int xattr_nms;
1504 long long calc_xnms;
1505};
1506
1507/**
1508 * struct fsck_data - private FS checking information.
1509 * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects)
1510 */
1511struct fsck_data {
1512 struct rb_root inodes;
1513};
1514
1515/**
1516 * add_inode - add inode information to RB-tree of inodes.
1517 * @c: UBIFS file-system description object
1518 * @fsckd: FS checking information
1519 * @ino: raw UBIFS inode to add
1520 *
1521 * This is a helper function for 'check_leaf()' which adds information about
1522 * inode @ino to the RB-tree of inodes. Returns inode information pointer in
1523 * case of success and a negative error code in case of failure.
1524 */
1525static struct fsck_inode *add_inode(struct ubifs_info *c,
1526 struct fsck_data *fsckd,
1527 struct ubifs_ino_node *ino)
1528{
1529 struct rb_node **p, *parent = NULL;
1530 struct fsck_inode *fscki;
1531 ino_t inum = key_inum_flash(c, &ino->key);
1532
1533 p = &fsckd->inodes.rb_node;
1534 while (*p) {
1535 parent = *p;
1536 fscki = rb_entry(parent, struct fsck_inode, rb);
1537 if (inum < fscki->inum)
1538 p = &(*p)->rb_left;
1539 else if (inum > fscki->inum)
1540 p = &(*p)->rb_right;
1541 else
1542 return fscki;
1543 }
1544
1545 if (inum > c->highest_inum) {
1546 ubifs_err("too high inode number, max. is %lu",
1547 c->highest_inum);
1548 return ERR_PTR(-EINVAL);
1549 }
1550
1551 fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS);
1552 if (!fscki)
1553 return ERR_PTR(-ENOMEM);
1554
1555 fscki->inum = inum;
1556 fscki->nlink = le32_to_cpu(ino->nlink);
1557 fscki->size = le64_to_cpu(ino->size);
1558 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
1559 fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
1560 fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
1561 fscki->mode = le32_to_cpu(ino->mode);
1562 if (S_ISDIR(fscki->mode)) {
1563 fscki->calc_sz = UBIFS_INO_NODE_SZ;
1564 fscki->calc_cnt = 2;
1565 }
1566 rb_link_node(&fscki->rb, parent, p);
1567 rb_insert_color(&fscki->rb, &fsckd->inodes);
1568 return fscki;
1569}
1570
1571/**
1572 * search_inode - search inode in the RB-tree of inodes.
1573 * @fsckd: FS checking information
1574 * @inum: inode number to search
1575 *
1576 * This is a helper function for 'check_leaf()' which searches inode @inum in
1577 * the RB-tree of inodes and returns an inode information pointer or %NULL if
1578 * the inode was not found.
1579 */
1580static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum)
1581{
1582 struct rb_node *p;
1583 struct fsck_inode *fscki;
1584
1585 p = fsckd->inodes.rb_node;
1586 while (p) {
1587 fscki = rb_entry(p, struct fsck_inode, rb);
1588 if (inum < fscki->inum)
1589 p = p->rb_left;
1590 else if (inum > fscki->inum)
1591 p = p->rb_right;
1592 else
1593 return fscki;
1594 }
1595 return NULL;
1596}
1597
1598/**
1599 * read_add_inode - read inode node and add it to RB-tree of inodes.
1600 * @c: UBIFS file-system description object
1601 * @fsckd: FS checking information
1602 * @inum: inode number to read
1603 *
1604 * This is a helper function for 'check_leaf()' which finds inode node @inum in
1605 * the index, reads it, and adds it to the RB-tree of inodes. Returns inode
1606 * information pointer in case of success and a negative error code in case of
1607 * failure.
1608 */
1609static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1610 struct fsck_data *fsckd, ino_t inum)
1611{
1612 int n, err;
1613 union ubifs_key key;
1614 struct ubifs_znode *znode;
1615 struct ubifs_zbranch *zbr;
1616 struct ubifs_ino_node *ino;
1617 struct fsck_inode *fscki;
1618
1619 fscki = search_inode(fsckd, inum);
1620 if (fscki)
1621 return fscki;
1622
1623 ino_key_init(c, &key, inum);
1624 err = ubifs_lookup_level0(c, &key, &znode, &n);
1625 if (!err) {
1626 ubifs_err("inode %lu not found in index", inum);
1627 return ERR_PTR(-ENOENT);
1628 } else if (err < 0) {
1629 ubifs_err("error %d while looking up inode %lu", err, inum);
1630 return ERR_PTR(err);
1631 }
1632
1633 zbr = &znode->zbranch[n];
1634 if (zbr->len < UBIFS_INO_NODE_SZ) {
1635 ubifs_err("bad node %lu node length %d", inum, zbr->len);
1636 return ERR_PTR(-EINVAL);
1637 }
1638
1639 ino = kmalloc(zbr->len, GFP_NOFS);
1640 if (!ino)
1641 return ERR_PTR(-ENOMEM);
1642
1643 err = ubifs_tnc_read_node(c, zbr, ino);
1644 if (err) {
1645 ubifs_err("cannot read inode node at LEB %d:%d, error %d",
1646 zbr->lnum, zbr->offs, err);
1647 kfree(ino);
1648 return ERR_PTR(err);
1649 }
1650
1651 fscki = add_inode(c, fsckd, ino);
1652 kfree(ino);
1653 if (IS_ERR(fscki)) {
1654 ubifs_err("error %ld while adding inode %lu node",
1655 PTR_ERR(fscki), inum);
1656 return fscki;
1657 }
1658
1659 return fscki;
1660}
1661
1662/**
1663 * check_leaf - check leaf node.
1664 * @c: UBIFS file-system description object
1665 * @zbr: zbranch of the leaf node to check
1666 * @priv: FS checking information
1667 *
1668 * This is a helper function for 'dbg_check_filesystem()' which is called for
1669 * every single leaf node while walking the indexing tree. It checks that the
1670 * leaf node referred from the indexing tree exists, has correct CRC, and does
1671 * some other basic validation. This function is also responsible for building
1672 * an RB-tree of inodes - it adds all inodes into the RB-tree. It also
1673 * calculates reference count, size, etc for each inode in order to later
1674 * compare them to the information stored inside the inodes and detect possible
1675 * inconsistencies. Returns zero in case of success and a negative error code
1676 * in case of failure.
1677 */
1678static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1679 void *priv)
1680{
1681 ino_t inum;
1682 void *node;
1683 struct ubifs_ch *ch;
1684 int err, type = key_type(c, &zbr->key);
1685 struct fsck_inode *fscki;
1686
1687 if (zbr->len < UBIFS_CH_SZ) {
1688 ubifs_err("bad leaf length %d (LEB %d:%d)",
1689 zbr->len, zbr->lnum, zbr->offs);
1690 return -EINVAL;
1691 }
1692
1693 node = kmalloc(zbr->len, GFP_NOFS);
1694 if (!node)
1695 return -ENOMEM;
1696
1697 err = ubifs_tnc_read_node(c, zbr, node);
1698 if (err) {
1699 ubifs_err("cannot read leaf node at LEB %d:%d, error %d",
1700 zbr->lnum, zbr->offs, err);
1701 goto out_free;
1702 }
1703
1704 /* If this is an inode node, add it to RB-tree of inodes */
1705 if (type == UBIFS_INO_KEY) {
1706 fscki = add_inode(c, priv, node);
1707 if (IS_ERR(fscki)) {
1708 err = PTR_ERR(fscki);
1709 ubifs_err("error %d while adding inode node", err);
1710 goto out_dump;
1711 }
1712 goto out;
1713 }
1714
1715 if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY &&
1716 type != UBIFS_DATA_KEY) {
1717 ubifs_err("unexpected node type %d at LEB %d:%d",
1718 type, zbr->lnum, zbr->offs);
1719 err = -EINVAL;
1720 goto out_free;
1721 }
1722
1723 ch = node;
1724 if (le64_to_cpu(ch->sqnum) > c->max_sqnum) {
1725 ubifs_err("too high sequence number, max. is %llu",
1726 c->max_sqnum);
1727 err = -EINVAL;
1728 goto out_dump;
1729 }
1730
1731 if (type == UBIFS_DATA_KEY) {
1732 long long blk_offs;
1733 struct ubifs_data_node *dn = node;
1734
1735 /*
1736 * Search the inode node this data node belongs to and insert
1737 * it to the RB-tree of inodes.
1738 */
1739 inum = key_inum_flash(c, &dn->key);
1740 fscki = read_add_inode(c, priv, inum);
1741 if (IS_ERR(fscki)) {
1742 err = PTR_ERR(fscki);
1743 ubifs_err("error %d while processing data node and "
1744 "trying to find inode node %lu", err, inum);
1745 goto out_dump;
1746 }
1747
1748 /* Make sure the data node is within inode size */
1749 blk_offs = key_block_flash(c, &dn->key);
1750 blk_offs <<= UBIFS_BLOCK_SHIFT;
1751 blk_offs += le32_to_cpu(dn->size);
1752 if (blk_offs > fscki->size) {
1753 ubifs_err("data node at LEB %d:%d is not within inode "
1754 "size %lld", zbr->lnum, zbr->offs,
1755 fscki->size);
1756 err = -EINVAL;
1757 goto out_dump;
1758 }
1759 } else {
1760 int nlen;
1761 struct ubifs_dent_node *dent = node;
1762 struct fsck_inode *fscki1;
1763
1764 err = ubifs_validate_entry(c, dent);
1765 if (err)
1766 goto out_dump;
1767
1768 /*
1769 * Search the inode node this entry refers to and the parent
1770 * inode node and insert them to the RB-tree of inodes.
1771 */
1772 inum = le64_to_cpu(dent->inum);
1773 fscki = read_add_inode(c, priv, inum);
1774 if (IS_ERR(fscki)) {
1775 err = PTR_ERR(fscki);
1776 ubifs_err("error %d while processing entry node and "
1777 "trying to find inode node %lu", err, inum);
1778 goto out_dump;
1779 }
1780
1781 /* Count how many direntries or xentries refers this inode */
1782 fscki->references += 1;
1783
1784 inum = key_inum_flash(c, &dent->key);
1785 fscki1 = read_add_inode(c, priv, inum);
1786 if (IS_ERR(fscki1)) {
1787 err = PTR_ERR(fscki);
1788 ubifs_err("error %d while processing entry node and "
1789 "trying to find parent inode node %lu",
1790 err, inum);
1791 goto out_dump;
1792 }
1793
1794 nlen = le16_to_cpu(dent->nlen);
1795 if (type == UBIFS_XENT_KEY) {
1796 fscki1->calc_xcnt += 1;
1797 fscki1->calc_xsz += CALC_DENT_SIZE(nlen);
1798 fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size);
1799 fscki1->calc_xnms += nlen;
1800 } else {
1801 fscki1->calc_sz += CALC_DENT_SIZE(nlen);
1802 if (dent->type == UBIFS_ITYPE_DIR)
1803 fscki1->calc_cnt += 1;
1804 }
1805 }
1806
1807out:
1808 kfree(node);
1809 return 0;
1810
1811out_dump:
1812 ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
1813 dbg_dump_node(c, node);
1814out_free:
1815 kfree(node);
1816 return err;
1817}
1818
1819/**
1820 * free_inodes - free RB-tree of inodes.
1821 * @fsckd: FS checking information
1822 */
1823static void free_inodes(struct fsck_data *fsckd)
1824{
1825 struct rb_node *this = fsckd->inodes.rb_node;
1826 struct fsck_inode *fscki;
1827
1828 while (this) {
1829 if (this->rb_left)
1830 this = this->rb_left;
1831 else if (this->rb_right)
1832 this = this->rb_right;
1833 else {
1834 fscki = rb_entry(this, struct fsck_inode, rb);
1835 this = rb_parent(this);
1836 if (this) {
1837 if (this->rb_left == &fscki->rb)
1838 this->rb_left = NULL;
1839 else
1840 this->rb_right = NULL;
1841 }
1842 kfree(fscki);
1843 }
1844 }
1845}
1846
1847/**
1848 * check_inodes - checks all inodes.
1849 * @c: UBIFS file-system description object
1850 * @fsckd: FS checking information
1851 *
1852 * This is a helper function for 'dbg_check_filesystem()' which walks the
1853 * RB-tree of inodes after the index scan has been finished, and checks that
1854 * inode nlink, size, etc are correct. Returns zero if inodes are fine,
1855 * %-EINVAL if not, and a negative error code in case of failure.
1856 */
1857static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
1858{
1859 int n, err;
1860 union ubifs_key key;
1861 struct ubifs_znode *znode;
1862 struct ubifs_zbranch *zbr;
1863 struct ubifs_ino_node *ino;
1864 struct fsck_inode *fscki;
1865 struct rb_node *this = rb_first(&fsckd->inodes);
1866
1867 while (this) {
1868 fscki = rb_entry(this, struct fsck_inode, rb);
1869 this = rb_next(this);
1870
1871 if (S_ISDIR(fscki->mode)) {
1872 /*
1873 * Directories have to have exactly one reference (they
1874 * cannot have hardlinks), although root inode is an
1875 * exception.
1876 */
1877 if (fscki->inum != UBIFS_ROOT_INO &&
1878 fscki->references != 1) {
1879 ubifs_err("directory inode %lu has %d "
1880 "direntries which refer it, but "
1881 "should be 1", fscki->inum,
1882 fscki->references);
1883 goto out_dump;
1884 }
1885 if (fscki->inum == UBIFS_ROOT_INO &&
1886 fscki->references != 0) {
1887 ubifs_err("root inode %lu has non-zero (%d) "
1888 "direntries which refer it",
1889 fscki->inum, fscki->references);
1890 goto out_dump;
1891 }
1892 if (fscki->calc_sz != fscki->size) {
1893 ubifs_err("directory inode %lu size is %lld, "
1894 "but calculated size is %lld",
1895 fscki->inum, fscki->size,
1896 fscki->calc_sz);
1897 goto out_dump;
1898 }
1899 if (fscki->calc_cnt != fscki->nlink) {
1900 ubifs_err("directory inode %lu nlink is %d, "
1901 "but calculated nlink is %d",
1902 fscki->inum, fscki->nlink,
1903 fscki->calc_cnt);
1904 goto out_dump;
1905 }
1906 } else {
1907 if (fscki->references != fscki->nlink) {
1908 ubifs_err("inode %lu nlink is %d, but "
1909 "calculated nlink is %d", fscki->inum,
1910 fscki->nlink, fscki->references);
1911 goto out_dump;
1912 }
1913 }
1914 if (fscki->xattr_sz != fscki->calc_xsz) {
1915 ubifs_err("inode %lu has xattr size %u, but "
1916 "calculated size is %lld",
1917 fscki->inum, fscki->xattr_sz,
1918 fscki->calc_xsz);
1919 goto out_dump;
1920 }
1921 if (fscki->xattr_cnt != fscki->calc_xcnt) {
1922 ubifs_err("inode %lu has %u xattrs, but "
1923 "calculated count is %lld", fscki->inum,
1924 fscki->xattr_cnt, fscki->calc_xcnt);
1925 goto out_dump;
1926 }
1927 if (fscki->xattr_nms != fscki->calc_xnms) {
1928 ubifs_err("inode %lu has xattr names' size %u, but "
1929 "calculated names' size is %lld",
1930 fscki->inum, fscki->xattr_nms,
1931 fscki->calc_xnms);
1932 goto out_dump;
1933 }
1934 }
1935
1936 return 0;
1937
1938out_dump:
1939 /* Read the bad inode and dump it */
1940 ino_key_init(c, &key, fscki->inum);
1941 err = ubifs_lookup_level0(c, &key, &znode, &n);
1942 if (!err) {
1943 ubifs_err("inode %lu not found in index", fscki->inum);
1944 return -ENOENT;
1945 } else if (err < 0) {
1946 ubifs_err("error %d while looking up inode %lu",
1947 err, fscki->inum);
1948 return err;
1949 }
1950
1951 zbr = &znode->zbranch[n];
1952 ino = kmalloc(zbr->len, GFP_NOFS);
1953 if (!ino)
1954 return -ENOMEM;
1955
1956 err = ubifs_tnc_read_node(c, zbr, ino);
1957 if (err) {
1958 ubifs_err("cannot read inode node at LEB %d:%d, error %d",
1959 zbr->lnum, zbr->offs, err);
1960 kfree(ino);
1961 return err;
1962 }
1963
1964 ubifs_msg("dump of the inode %lu sitting in LEB %d:%d",
1965 fscki->inum, zbr->lnum, zbr->offs);
1966 dbg_dump_node(c, ino);
1967 kfree(ino);
1968 return -EINVAL;
1969}
1970
1971/**
1972 * dbg_check_filesystem - check the file-system.
1973 * @c: UBIFS file-system description object
1974 *
1975 * This function checks the file system, namely:
1976 * o makes sure that all leaf nodes exist and their CRCs are correct;
1977 * o makes sure inode nlink, size, xattr size/count are correct (for all
1978 * inodes).
1979 *
1980 * The function reads whole indexing tree and all nodes, so it is pretty
1981 * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if
1982 * not, and a negative error code in case of failure.
1983 */
1984int dbg_check_filesystem(struct ubifs_info *c)
1985{
1986 int err;
1987 struct fsck_data fsckd;
1988
1989 if (!(ubifs_chk_flags & UBIFS_CHK_FS))
1990 return 0;
1991
1992 fsckd.inodes = RB_ROOT;
1993 err = dbg_walk_index(c, check_leaf, NULL, &fsckd);
1994 if (err)
1995 goto out_free;
1996
1997 err = check_inodes(c, &fsckd);
1998 if (err)
1999 goto out_free;
2000
2001 free_inodes(&fsckd);
2002 return 0;
2003
2004out_free:
2005 ubifs_err("file-system check failed with error %d", err);
2006 dump_stack();
2007 free_inodes(&fsckd);
2008 return err;
2009}
2010
2011static int invocation_cnt;
2012
2013int dbg_force_in_the_gaps(void)
2014{
2015 if (!dbg_force_in_the_gaps_enabled)
2016 return 0;
2017 /* Force in-the-gaps every 8th commit */
2018 return !((invocation_cnt++) & 0x7);
2019}
2020
2021/* Failure mode for recovery testing */
2022
2023#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d))
2024
2025struct failure_mode_info {
2026 struct list_head list;
2027 struct ubifs_info *c;
2028};
2029
2030static LIST_HEAD(fmi_list);
2031static DEFINE_SPINLOCK(fmi_lock);
2032
2033static unsigned int next;
2034
2035static int simple_rand(void)
2036{
2037 if (next == 0)
2038 next = current->pid;
2039 next = next * 1103515245 + 12345;
2040 return (next >> 16) & 32767;
2041}
2042
2043void dbg_failure_mode_registration(struct ubifs_info *c)
2044{
2045 struct failure_mode_info *fmi;
2046
2047 fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
2048 if (!fmi) {
2049 dbg_err("Failed to register failure mode - no memory");
2050 return;
2051 }
2052 fmi->c = c;
2053 spin_lock(&fmi_lock);
2054 list_add_tail(&fmi->list, &fmi_list);
2055 spin_unlock(&fmi_lock);
2056}
2057
2058void dbg_failure_mode_deregistration(struct ubifs_info *c)
2059{
2060 struct failure_mode_info *fmi, *tmp;
2061
2062 spin_lock(&fmi_lock);
2063 list_for_each_entry_safe(fmi, tmp, &fmi_list, list)
2064 if (fmi->c == c) {
2065 list_del(&fmi->list);
2066 kfree(fmi);
2067 }
2068 spin_unlock(&fmi_lock);
2069}
2070
2071static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc)
2072{
2073 struct failure_mode_info *fmi;
2074
2075 spin_lock(&fmi_lock);
2076 list_for_each_entry(fmi, &fmi_list, list)
2077 if (fmi->c->ubi == desc) {
2078 struct ubifs_info *c = fmi->c;
2079
2080 spin_unlock(&fmi_lock);
2081 return c;
2082 }
2083 spin_unlock(&fmi_lock);
2084 return NULL;
2085}
2086
2087static int in_failure_mode(struct ubi_volume_desc *desc)
2088{
2089 struct ubifs_info *c = dbg_find_info(desc);
2090
2091 if (c && dbg_failure_mode)
2092 return c->failure_mode;
2093 return 0;
2094}
2095
2096static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
2097{
2098 struct ubifs_info *c = dbg_find_info(desc);
2099
2100 if (!c || !dbg_failure_mode)
2101 return 0;
2102 if (c->failure_mode)
2103 return 1;
2104 if (!c->fail_cnt) {
2105 /* First call - decide delay to failure */
2106 if (chance(1, 2)) {
2107 unsigned int delay = 1 << (simple_rand() >> 11);
2108
2109 if (chance(1, 2)) {
2110 c->fail_delay = 1;
2111 c->fail_timeout = jiffies +
2112 msecs_to_jiffies(delay);
2113 dbg_rcvry("failing after %ums", delay);
2114 } else {
2115 c->fail_delay = 2;
2116 c->fail_cnt_max = delay;
2117 dbg_rcvry("failing after %u calls", delay);
2118 }
2119 }
2120 c->fail_cnt += 1;
2121 }
2122 /* Determine if failure delay has expired */
2123 if (c->fail_delay == 1) {
2124 if (time_before(jiffies, c->fail_timeout))
2125 return 0;
2126 } else if (c->fail_delay == 2)
2127 if (c->fail_cnt++ < c->fail_cnt_max)
2128 return 0;
2129 if (lnum == UBIFS_SB_LNUM) {
2130 if (write) {
2131 if (chance(1, 2))
2132 return 0;
2133 } else if (chance(19, 20))
2134 return 0;
2135 dbg_rcvry("failing in super block LEB %d", lnum);
2136 } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
2137 if (chance(19, 20))
2138 return 0;
2139 dbg_rcvry("failing in master LEB %d", lnum);
2140 } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
2141 if (write) {
2142 if (chance(99, 100))
2143 return 0;
2144 } else if (chance(399, 400))
2145 return 0;
2146 dbg_rcvry("failing in log LEB %d", lnum);
2147 } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
2148 if (write) {
2149 if (chance(7, 8))
2150 return 0;
2151 } else if (chance(19, 20))
2152 return 0;
2153 dbg_rcvry("failing in LPT LEB %d", lnum);
2154 } else if (lnum >= c->orph_first && lnum <= c->orph_last) {
2155 if (write) {
2156 if (chance(1, 2))
2157 return 0;
2158 } else if (chance(9, 10))
2159 return 0;
2160 dbg_rcvry("failing in orphan LEB %d", lnum);
2161 } else if (lnum == c->ihead_lnum) {
2162 if (chance(99, 100))
2163 return 0;
2164 dbg_rcvry("failing in index head LEB %d", lnum);
2165 } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
2166 if (chance(9, 10))
2167 return 0;
2168 dbg_rcvry("failing in GC head LEB %d", lnum);
2169 } else if (write && !RB_EMPTY_ROOT(&c->buds) &&
2170 !ubifs_search_bud(c, lnum)) {
2171 if (chance(19, 20))
2172 return 0;
2173 dbg_rcvry("failing in non-bud LEB %d", lnum);
2174 } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
2175 c->cmt_state == COMMIT_RUNNING_REQUIRED) {
2176 if (chance(999, 1000))
2177 return 0;
2178 dbg_rcvry("failing in bud LEB %d commit running", lnum);
2179 } else {
2180 if (chance(9999, 10000))
2181 return 0;
2182 dbg_rcvry("failing in bud LEB %d commit not running", lnum);
2183 }
2184 ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
2185 c->failure_mode = 1;
2186 dump_stack();
2187 return 1;
2188}
2189
2190static void cut_data(const void *buf, int len)
2191{
2192 int flen, i;
2193 unsigned char *p = (void *)buf;
2194
2195 flen = (len * (long long)simple_rand()) >> 15;
2196 for (i = flen; i < len; i++)
2197 p[i] = 0xff;
2198}
2199
2200int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
2201 int len, int check)
2202{
2203 if (in_failure_mode(desc))
2204 return -EIO;
2205 return ubi_leb_read(desc, lnum, buf, offset, len, check);
2206}
2207
2208int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2209 int offset, int len, int dtype)
2210{
2211 int err;
2212
2213 if (in_failure_mode(desc))
2214 return -EIO;
2215 if (do_fail(desc, lnum, 1))
2216 cut_data(buf, len);
2217 err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
2218 if (err)
2219 return err;
2220 if (in_failure_mode(desc))
2221 return -EIO;
2222 return 0;
2223}
2224
2225int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
2226 int len, int dtype)
2227{
2228 int err;
2229
2230 if (do_fail(desc, lnum, 1))
2231 return -EIO;
2232 err = ubi_leb_change(desc, lnum, buf, len, dtype);
2233 if (err)
2234 return err;
2235 if (do_fail(desc, lnum, 1))
2236 return -EIO;
2237 return 0;
2238}
2239
2240int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
2241{
2242 int err;
2243
2244 if (do_fail(desc, lnum, 0))
2245 return -EIO;
2246 err = ubi_leb_erase(desc, lnum);
2247 if (err)
2248 return err;
2249 if (do_fail(desc, lnum, 0))
2250 return -EIO;
2251 return 0;
2252}
2253
2254int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
2255{
2256 int err;
2257
2258 if (do_fail(desc, lnum, 0))
2259 return -EIO;
2260 err = ubi_leb_unmap(desc, lnum);
2261 if (err)
2262 return err;
2263 if (do_fail(desc, lnum, 0))
2264 return -EIO;
2265 return 0;
2266}
2267
2268int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
2269{
2270 if (in_failure_mode(desc))
2271 return -EIO;
2272 return ubi_is_mapped(desc, lnum);
2273}
2274
2275int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
2276{
2277 int err;
2278
2279 if (do_fail(desc, lnum, 0))
2280 return -EIO;
2281 err = ubi_leb_map(desc, lnum, dtype);
2282 if (err)
2283 return err;
2284 if (do_fail(desc, lnum, 0))
2285 return -EIO;
2286 return 0;
2287}
2288
2289#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
new file mode 100644
index 000000000000..3c4f1e93c9e0
--- /dev/null
+++ b/fs/ubifs/debug.h
@@ -0,0 +1,403 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23#ifndef __UBIFS_DEBUG_H__
24#define __UBIFS_DEBUG_H__
25
26#ifdef CONFIG_UBIFS_FS_DEBUG
27
28#define UBIFS_DBG(op) op
29
30#define ubifs_assert(expr) do { \
31 if (unlikely(!(expr))) { \
32 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
33 __func__, __LINE__, current->pid); \
34 dbg_dump_stack(); \
35 } \
36} while (0)
37
38#define ubifs_assert_cmt_locked(c) do { \
39 if (unlikely(down_write_trylock(&(c)->commit_sem))) { \
40 up_write(&(c)->commit_sem); \
41 printk(KERN_CRIT "commit lock is not locked!\n"); \
42 ubifs_assert(0); \
43 } \
44} while (0)
45
46#define dbg_dump_stack() do { \
47 if (!dbg_failure_mode) \
48 dump_stack(); \
49} while (0)
50
51/* Generic debugging messages */
52#define dbg_msg(fmt, ...) do { \
53 spin_lock(&dbg_lock); \
54 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
55 __func__, ##__VA_ARGS__); \
56 spin_unlock(&dbg_lock); \
57} while (0)
58
59#define dbg_do_msg(typ, fmt, ...) do { \
60 if (ubifs_msg_flags & typ) \
61 dbg_msg(fmt, ##__VA_ARGS__); \
62} while (0)
63
64#define dbg_err(fmt, ...) do { \
65 spin_lock(&dbg_lock); \
66 ubifs_err(fmt, ##__VA_ARGS__); \
67 spin_unlock(&dbg_lock); \
68} while (0)
69
70const char *dbg_key_str0(const struct ubifs_info *c,
71 const union ubifs_key *key);
72const char *dbg_key_str1(const struct ubifs_info *c,
73 const union ubifs_key *key);
74
75/*
76 * DBGKEY macros require dbg_lock to be held, which it is in the dbg message
77 * macros.
78 */
79#define DBGKEY(key) dbg_key_str0(c, (key))
80#define DBGKEY1(key) dbg_key_str1(c, (key))
81
82/* General messages */
83#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
84
85/* Additional journal messages */
86#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
87
88/* Additional TNC messages */
89#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
90
91/* Additional lprops messages */
92#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
93
94/* Additional LEB find messages */
95#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
96
97/* Additional mount messages */
98#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
99
100/* Additional I/O messages */
101#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
102
103/* Additional commit messages */
104#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
105
106/* Additional budgeting messages */
107#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
108
109/* Additional log messages */
110#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
111
112/* Additional gc messages */
113#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
114
115/* Additional scan messages */
116#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
117
118/* Additional recovery messages */
119#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
120
121/*
122 * Debugging message type flags (must match msg_type_names in debug.c).
123 *
124 * UBIFS_MSG_GEN: general messages
125 * UBIFS_MSG_JNL: journal messages
126 * UBIFS_MSG_MNT: mount messages
127 * UBIFS_MSG_CMT: commit messages
128 * UBIFS_MSG_FIND: LEB find messages
129 * UBIFS_MSG_BUDG: budgeting messages
130 * UBIFS_MSG_GC: garbage collection messages
131 * UBIFS_MSG_TNC: TNC messages
132 * UBIFS_MSG_LP: lprops messages
133 * UBIFS_MSG_IO: I/O messages
134 * UBIFS_MSG_LOG: log messages
135 * UBIFS_MSG_SCAN: scan messages
136 * UBIFS_MSG_RCVRY: recovery messages
137 */
138enum {
139 UBIFS_MSG_GEN = 0x1,
140 UBIFS_MSG_JNL = 0x2,
141 UBIFS_MSG_MNT = 0x4,
142 UBIFS_MSG_CMT = 0x8,
143 UBIFS_MSG_FIND = 0x10,
144 UBIFS_MSG_BUDG = 0x20,
145 UBIFS_MSG_GC = 0x40,
146 UBIFS_MSG_TNC = 0x80,
147 UBIFS_MSG_LP = 0x100,
148 UBIFS_MSG_IO = 0x200,
149 UBIFS_MSG_LOG = 0x400,
150 UBIFS_MSG_SCAN = 0x800,
151 UBIFS_MSG_RCVRY = 0x1000,
152};
153
154/* Debugging message type flags for each default debug message level */
155#define UBIFS_MSG_LVL_0 0
156#define UBIFS_MSG_LVL_1 0x1
157#define UBIFS_MSG_LVL_2 0x7f
158#define UBIFS_MSG_LVL_3 0xffff
159
160/*
161 * Debugging check flags (must match chk_names in debug.c).
162 *
163 * UBIFS_CHK_GEN: general checks
164 * UBIFS_CHK_TNC: check TNC
165 * UBIFS_CHK_IDX_SZ: check index size
166 * UBIFS_CHK_ORPH: check orphans
167 * UBIFS_CHK_OLD_IDX: check the old index
168 * UBIFS_CHK_LPROPS: check lprops
169 * UBIFS_CHK_FS: check the file-system
170 */
171enum {
172 UBIFS_CHK_GEN = 0x1,
173 UBIFS_CHK_TNC = 0x2,
174 UBIFS_CHK_IDX_SZ = 0x4,
175 UBIFS_CHK_ORPH = 0x8,
176 UBIFS_CHK_OLD_IDX = 0x10,
177 UBIFS_CHK_LPROPS = 0x20,
178 UBIFS_CHK_FS = 0x40,
179};
180
181/*
182 * Special testing flags (must match tst_names in debug.c).
183 *
184 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
185 * UBIFS_TST_RCVRY: failure mode for recovery testing
186 */
187enum {
188 UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
189 UBIFS_TST_RCVRY = 0x4,
190};
191
192#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
193#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
194#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
195#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
196#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
197#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
198#else
199#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
200#endif
201
202#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
203#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
204#else
205#define UBIFS_CHK_FLAGS_DEFAULT 0
206#endif
207
208extern spinlock_t dbg_lock;
209
210extern unsigned int ubifs_msg_flags;
211extern unsigned int ubifs_chk_flags;
212extern unsigned int ubifs_tst_flags;
213
214/* Dump functions */
215
216const char *dbg_ntype(int type);
217const char *dbg_cstate(int cmt_state);
218const char *dbg_get_key_dump(const struct ubifs_info *c,
219 const union ubifs_key *key);
220void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
221void dbg_dump_node(const struct ubifs_info *c, const void *node);
222void dbg_dump_budget_req(const struct ubifs_budget_req *req);
223void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
224void dbg_dump_budg(struct ubifs_info *c);
225void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
226void dbg_dump_lprops(struct ubifs_info *c);
227void dbg_dump_leb(const struct ubifs_info *c, int lnum);
228void dbg_dump_znode(const struct ubifs_info *c,
229 const struct ubifs_znode *znode);
230void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat);
231void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
232 struct ubifs_nnode *parent, int iip);
233void dbg_dump_tnc(struct ubifs_info *c);
234void dbg_dump_index(struct ubifs_info *c);
235
236/* Checking helper functions */
237
238typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
239 struct ubifs_zbranch *zbr, void *priv);
240typedef int (*dbg_znode_callback)(struct ubifs_info *c,
241 struct ubifs_znode *znode, void *priv);
242
243int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
244 dbg_znode_callback znode_cb, void *priv);
245
246/* Checking functions */
247
248int dbg_check_lprops(struct ubifs_info *c);
249
250int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
251int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
252
253int dbg_check_cats(struct ubifs_info *c);
254
255int dbg_check_ltab(struct ubifs_info *c);
256
257int dbg_check_synced_i_size(struct inode *inode);
258
259int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
260
261int dbg_check_tnc(struct ubifs_info *c, int extra);
262
263int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
264
265int dbg_check_filesystem(struct ubifs_info *c);
266
267void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
268 int add_pos);
269
270int dbg_check_lprops(struct ubifs_info *c);
271int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
272 int row, int col);
273
274/* Force the use of in-the-gaps method for testing */
275
276#define dbg_force_in_the_gaps_enabled \
277 (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
278
279int dbg_force_in_the_gaps(void);
280
281/* Failure mode for recovery testing */
282
283#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
284
285void dbg_failure_mode_registration(struct ubifs_info *c);
286void dbg_failure_mode_deregistration(struct ubifs_info *c);
287
288#ifndef UBIFS_DBG_PRESERVE_UBI
289
290#define ubi_leb_read dbg_leb_read
291#define ubi_leb_write dbg_leb_write
292#define ubi_leb_change dbg_leb_change
293#define ubi_leb_erase dbg_leb_erase
294#define ubi_leb_unmap dbg_leb_unmap
295#define ubi_is_mapped dbg_is_mapped
296#define ubi_leb_map dbg_leb_map
297
298#endif
299
300int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
301 int len, int check);
302int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
303 int offset, int len, int dtype);
304int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
305 int len, int dtype);
306int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum);
307int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum);
308int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum);
309int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
310
311static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf,
312 int offset, int len)
313{
314 return dbg_leb_read(desc, lnum, buf, offset, len, 0);
315}
316
317static inline int dbg_write(struct ubi_volume_desc *desc, int lnum,
318 const void *buf, int offset, int len)
319{
320 return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN);
321}
322
323static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
324 const void *buf, int len)
325{
326 return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
327}
328
329#else /* !CONFIG_UBIFS_FS_DEBUG */
330
331#define UBIFS_DBG(op)
332#define ubifs_assert(expr) ({})
333#define ubifs_assert_cmt_locked(c)
334#define dbg_dump_stack()
335#define dbg_err(fmt, ...) ({})
336#define dbg_msg(fmt, ...) ({})
337#define dbg_key(c, key, fmt, ...) ({})
338
339#define dbg_gen(fmt, ...) ({})
340#define dbg_jnl(fmt, ...) ({})
341#define dbg_tnc(fmt, ...) ({})
342#define dbg_lp(fmt, ...) ({})
343#define dbg_find(fmt, ...) ({})
344#define dbg_mnt(fmt, ...) ({})
345#define dbg_io(fmt, ...) ({})
346#define dbg_cmt(fmt, ...) ({})
347#define dbg_budg(fmt, ...) ({})
348#define dbg_log(fmt, ...) ({})
349#define dbg_gc(fmt, ...) ({})
350#define dbg_scan(fmt, ...) ({})
351#define dbg_rcvry(fmt, ...) ({})
352
353#define dbg_ntype(type) ""
354#define dbg_cstate(cmt_state) ""
355#define dbg_get_key_dump(c, key) ({})
356#define dbg_dump_inode(c, inode) ({})
357#define dbg_dump_node(c, node) ({})
358#define dbg_dump_budget_req(req) ({})
359#define dbg_dump_lstats(lst) ({})
360#define dbg_dump_budg(c) ({})
361#define dbg_dump_lprop(c, lp) ({})
362#define dbg_dump_lprops(c) ({})
363#define dbg_dump_leb(c, lnum) ({})
364#define dbg_dump_znode(c, znode) ({})
365#define dbg_dump_heap(c, heap, cat) ({})
366#define dbg_dump_pnode(c, pnode, parent, iip) ({})
367#define dbg_dump_tnc(c) ({})
368#define dbg_dump_index(c) ({})
369
370#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
371
372#define dbg_old_index_check_init(c, zroot) 0
373#define dbg_check_old_index(c, zroot) 0
374
375#define dbg_check_cats(c) 0
376
377#define dbg_check_ltab(c) 0
378
379#define dbg_check_synced_i_size(inode) 0
380
381#define dbg_check_dir_size(c, dir) 0
382
383#define dbg_check_tnc(c, x) 0
384
385#define dbg_check_idx_size(c, idx_size) 0
386
387#define dbg_check_filesystem(c) 0
388
389#define dbg_check_heap(c, heap, cat, add_pos) ({})
390
391#define dbg_check_lprops(c) 0
392#define dbg_check_lpt_nodes(c, cnode, row, col) 0
393
394#define dbg_force_in_the_gaps_enabled 0
395#define dbg_force_in_the_gaps() 0
396
397#define dbg_failure_mode 0
398#define dbg_failure_mode_registration(c) ({})
399#define dbg_failure_mode_deregistration(c) ({})
400
401#endif /* !CONFIG_UBIFS_FS_DEBUG */
402
403#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
new file mode 100644
index 000000000000..e90374be7d3b
--- /dev/null
+++ b/fs/ubifs/dir.c
@@ -0,0 +1,1240 @@
1/* * This file is part of UBIFS.
2 *
3 * Copyright (C) 2006-2008 Nokia Corporation.
4 * Copyright (C) 2006, 2007 University of Szeged, Hungary
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 * Zoltan Sogor
22 */
23
24/*
25 * This file implements directory operations.
26 *
27 * All FS operations in this file allocate budget before writing anything to the
28 * media. If they fail to allocate it, the error is returned. The only
29 * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even
30 * if they unable to allocate the budget, because deletion %-ENOSPC failure is
31 * not what users are usually ready to get. UBIFS budgeting subsystem has some
32 * space reserved for these purposes.
33 *
34 * All operations in this file write all inodes which they change straight
35 * away, instead of marking them dirty. For example, 'ubifs_link()' changes
36 * @i_size of the parent inode and writes the parent inode together with the
37 * target inode. This was done to simplify file-system recovery which would
38 * otherwise be very difficult to do. The only exception is rename which marks
39 * the re-named inode dirty (because its @i_ctime is updated) but does not
40 * write it, but just marks it as dirty.
41 */
42
43#include "ubifs.h"
44
45/**
46 * inherit_flags - inherit flags of the parent inode.
47 * @dir: parent inode
48 * @mode: new inode mode flags
49 *
50 * This is a helper function for 'ubifs_new_inode()' which inherits flag of the
51 * parent directory inode @dir. UBIFS inodes inherit the following flags:
52 * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on
53 * sub-directory basis;
54 * o %UBIFS_SYNC_FL - useful for the same reasons;
55 * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories.
56 *
57 * This function returns the inherited flags.
58 */
59static int inherit_flags(const struct inode *dir, int mode)
60{
61 int flags;
62 const struct ubifs_inode *ui = ubifs_inode(dir);
63
64 if (!S_ISDIR(dir->i_mode))
65 /*
66 * The parent is not a directory, which means that an extended
67 * attribute inode is being created. No flags.
68 */
69 return 0;
70
71 flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL);
72 if (!S_ISDIR(mode))
73 /* The "DIRSYNC" flag only applies to directories */
74 flags &= ~UBIFS_DIRSYNC_FL;
75 return flags;
76}
77
78/**
79 * ubifs_new_inode - allocate new UBIFS inode object.
80 * @c: UBIFS file-system description object
81 * @dir: parent directory inode
82 * @mode: inode mode flags
83 *
84 * This function finds an unused inode number, allocates new inode and
85 * initializes it. Returns new inode in case of success and an error code in
86 * case of failure.
87 */
88struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
89 int mode)
90{
91 struct inode *inode;
92 struct ubifs_inode *ui;
93
94 inode = new_inode(c->vfs_sb);
95 ui = ubifs_inode(inode);
96 if (!inode)
97 return ERR_PTR(-ENOMEM);
98
99 /*
100 * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and
101 * marking them dirty in file write path (see 'file_update_time()').
102 * UBIFS has to fully control "clean <-> dirty" transitions of inodes
103 * to make budgeting work.
104 */
105 inode->i_flags |= (S_NOCMTIME);
106
107 inode->i_uid = current->fsuid;
108 if (dir->i_mode & S_ISGID) {
109 inode->i_gid = dir->i_gid;
110 if (S_ISDIR(mode))
111 mode |= S_ISGID;
112 } else
113 inode->i_gid = current->fsgid;
114 inode->i_mode = mode;
115 inode->i_mtime = inode->i_atime = inode->i_ctime =
116 ubifs_current_time(inode);
117 inode->i_mapping->nrpages = 0;
118 /* Disable readahead */
119 inode->i_mapping->backing_dev_info = &c->bdi;
120
121 switch (mode & S_IFMT) {
122 case S_IFREG:
123 inode->i_mapping->a_ops = &ubifs_file_address_operations;
124 inode->i_op = &ubifs_file_inode_operations;
125 inode->i_fop = &ubifs_file_operations;
126 break;
127 case S_IFDIR:
128 inode->i_op = &ubifs_dir_inode_operations;
129 inode->i_fop = &ubifs_dir_operations;
130 inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ;
131 break;
132 case S_IFLNK:
133 inode->i_op = &ubifs_symlink_inode_operations;
134 break;
135 case S_IFSOCK:
136 case S_IFIFO:
137 case S_IFBLK:
138 case S_IFCHR:
139 inode->i_op = &ubifs_file_inode_operations;
140 break;
141 default:
142 BUG();
143 }
144
145 ui->flags = inherit_flags(dir, mode);
146 ubifs_set_inode_flags(inode);
147 if (S_ISREG(mode))
148 ui->compr_type = c->default_compr;
149 else
150 ui->compr_type = UBIFS_COMPR_NONE;
151 ui->synced_i_size = 0;
152
153 spin_lock(&c->cnt_lock);
154 /* Inode number overflow is currently not supported */
155 if (c->highest_inum >= INUM_WARN_WATERMARK) {
156 if (c->highest_inum >= INUM_WATERMARK) {
157 spin_unlock(&c->cnt_lock);
158 ubifs_err("out of inode numbers");
159 make_bad_inode(inode);
160 iput(inode);
161 return ERR_PTR(-EINVAL);
162 }
163 ubifs_warn("running out of inode numbers (current %lu, max %d)",
164 c->highest_inum, INUM_WATERMARK);
165 }
166
167 inode->i_ino = ++c->highest_inum;
168 inode->i_generation = ++c->vfs_gen;
169 /*
170 * The creation sequence number remains with this inode for its
171 * lifetime. All nodes for this inode have a greater sequence number,
172 * and so it is possible to distinguish obsolete nodes belonging to a
173 * previous incarnation of the same inode number - for example, for the
174 * purpose of rebuilding the index.
175 */
176 ui->creat_sqnum = ++c->max_sqnum;
177 spin_unlock(&c->cnt_lock);
178 return inode;
179}
180
181#ifdef CONFIG_UBIFS_FS_DEBUG
182
183static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
184{
185 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
186 return 0;
187 if (le16_to_cpu(dent->nlen) != nm->len)
188 return -EINVAL;
189 if (memcmp(dent->name, nm->name, nm->len))
190 return -EINVAL;
191 return 0;
192}
193
194#else
195
196#define dbg_check_name(dent, nm) 0
197
198#endif
199
200static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
201 struct nameidata *nd)
202{
203 int err;
204 union ubifs_key key;
205 struct inode *inode = NULL;
206 struct ubifs_dent_node *dent;
207 struct ubifs_info *c = dir->i_sb->s_fs_info;
208
209 dbg_gen("'%.*s' in dir ino %lu",
210 dentry->d_name.len, dentry->d_name.name, dir->i_ino);
211
212 if (dentry->d_name.len > UBIFS_MAX_NLEN)
213 return ERR_PTR(-ENAMETOOLONG);
214
215 dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
216 if (!dent)
217 return ERR_PTR(-ENOMEM);
218
219 dent_key_init(c, &key, dir->i_ino, &dentry->d_name);
220
221 err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
222 if (err) {
223 /*
224 * Do not hash the direntry if parent 'i_nlink' is zero, because
225 * this has side-effects - '->delete_inode()' call will not be
226 * called for the parent orphan inode, because 'd_count' of its
227 * direntry will stay 1 (it'll be negative direntry I guess)
228 * and prevent 'iput_final()' until the dentry is destroyed due
229 * to unmount or memory pressure.
230 */
231 if (err == -ENOENT && dir->i_nlink != 0) {
232 dbg_gen("not found");
233 goto done;
234 }
235 goto out;
236 }
237
238 if (dbg_check_name(dent, &dentry->d_name)) {
239 err = -EINVAL;
240 goto out;
241 }
242
243 inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum));
244 if (IS_ERR(inode)) {
245 /*
246 * This should not happen. Probably the file-system needs
247 * checking.
248 */
249 err = PTR_ERR(inode);
250 ubifs_err("dead directory entry '%.*s', error %d",
251 dentry->d_name.len, dentry->d_name.name, err);
252 ubifs_ro_mode(c, err);
253 goto out;
254 }
255
256done:
257 kfree(dent);
258 /*
259 * Note, d_splice_alias() would be required instead if we supported
260 * NFS.
261 */
262 d_add(dentry, inode);
263 return NULL;
264
265out:
266 kfree(dent);
267 return ERR_PTR(err);
268}
269
270static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
271 struct nameidata *nd)
272{
273 struct inode *inode;
274 struct ubifs_info *c = dir->i_sb->s_fs_info;
275 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
276 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
277 .dirtied_ino = 1 };
278 struct ubifs_inode *dir_ui = ubifs_inode(dir);
279
280 /*
281 * Budget request settings: new inode, new direntry, changing the
282 * parent directory inode.
283 */
284
285 dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
286 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
287
288 err = ubifs_budget_space(c, &req);
289 if (err)
290 return err;
291
292 inode = ubifs_new_inode(c, dir, mode);
293 if (IS_ERR(inode)) {
294 err = PTR_ERR(inode);
295 goto out_budg;
296 }
297
298 mutex_lock(&dir_ui->ui_mutex);
299 dir->i_size += sz_change;
300 dir_ui->ui_size = dir->i_size;
301 dir->i_mtime = dir->i_ctime = inode->i_ctime;
302 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
303 if (err)
304 goto out_cancel;
305 mutex_unlock(&dir_ui->ui_mutex);
306
307 ubifs_release_budget(c, &req);
308 insert_inode_hash(inode);
309 d_instantiate(dentry, inode);
310 return 0;
311
312out_cancel:
313 dir->i_size -= sz_change;
314 dir_ui->ui_size = dir->i_size;
315 mutex_unlock(&dir_ui->ui_mutex);
316 make_bad_inode(inode);
317 iput(inode);
318out_budg:
319 ubifs_release_budget(c, &req);
320 ubifs_err("cannot create regular file, error %d", err);
321 return err;
322}
323
324/**
325 * vfs_dent_type - get VFS directory entry type.
326 * @type: UBIFS directory entry type
327 *
328 * This function converts UBIFS directory entry type into VFS directory entry
329 * type.
330 */
331static unsigned int vfs_dent_type(uint8_t type)
332{
333 switch (type) {
334 case UBIFS_ITYPE_REG:
335 return DT_REG;
336 case UBIFS_ITYPE_DIR:
337 return DT_DIR;
338 case UBIFS_ITYPE_LNK:
339 return DT_LNK;
340 case UBIFS_ITYPE_BLK:
341 return DT_BLK;
342 case UBIFS_ITYPE_CHR:
343 return DT_CHR;
344 case UBIFS_ITYPE_FIFO:
345 return DT_FIFO;
346 case UBIFS_ITYPE_SOCK:
347 return DT_SOCK;
348 default:
349 BUG();
350 }
351 return 0;
352}
353
354/*
355 * The classical Unix view for directory is that it is a linear array of
356 * (name, inode number) entries. Linux/VFS assumes this model as well.
357 * Particularly, 'readdir()' call wants us to return a directory entry offset
358 * which later may be used to continue 'readdir()'ing the directory or to
359 * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this
360 * model because directory entries are identified by keys, which may collide.
361 *
362 * UBIFS uses directory entry hash value for directory offsets, so
363 * 'seekdir()'/'telldir()' may not always work because of possible key
364 * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work
365 * properly by means of saving full directory entry name in the private field
366 * of the file description object.
367 *
368 * This means that UBIFS cannot support NFS which requires full
369 * 'seekdir()'/'telldir()' support.
370 */
371static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
372{
373 int err, over = 0;
374 struct qstr nm;
375 union ubifs_key key;
376 struct ubifs_dent_node *dent;
377 struct inode *dir = file->f_path.dentry->d_inode;
378 struct ubifs_info *c = dir->i_sb->s_fs_info;
379
380 dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos);
381
382 if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2)
383 /*
384 * The directory was seek'ed to a senseless position or there
385 * are no more entries.
386 */
387 return 0;
388
389 /* File positions 0 and 1 correspond to "." and ".." */
390 if (file->f_pos == 0) {
391 ubifs_assert(!file->private_data);
392 over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR);
393 if (over)
394 return 0;
395 file->f_pos = 1;
396 }
397
398 if (file->f_pos == 1) {
399 ubifs_assert(!file->private_data);
400 over = filldir(dirent, "..", 2, 1,
401 parent_ino(file->f_path.dentry), DT_DIR);
402 if (over)
403 return 0;
404
405 /* Find the first entry in TNC and save it */
406 lowest_dent_key(c, &key, dir->i_ino);
407 nm.name = NULL;
408 dent = ubifs_tnc_next_ent(c, &key, &nm);
409 if (IS_ERR(dent)) {
410 err = PTR_ERR(dent);
411 goto out;
412 }
413
414 file->f_pos = key_hash_flash(c, &dent->key);
415 file->private_data = dent;
416 }
417
418 dent = file->private_data;
419 if (!dent) {
420 /*
421 * The directory was seek'ed to and is now readdir'ed.
422 * Find the entry corresponding to @file->f_pos or the
423 * closest one.
424 */
425 dent_key_init_hash(c, &key, dir->i_ino, file->f_pos);
426 nm.name = NULL;
427 dent = ubifs_tnc_next_ent(c, &key, &nm);
428 if (IS_ERR(dent)) {
429 err = PTR_ERR(dent);
430 goto out;
431 }
432 file->f_pos = key_hash_flash(c, &dent->key);
433 file->private_data = dent;
434 }
435
436 while (1) {
437 dbg_gen("feed '%s', ino %llu, new f_pos %#x",
438 dent->name, le64_to_cpu(dent->inum),
439 key_hash_flash(c, &dent->key));
440 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
441
442 nm.len = le16_to_cpu(dent->nlen);
443 over = filldir(dirent, dent->name, nm.len, file->f_pos,
444 le64_to_cpu(dent->inum),
445 vfs_dent_type(dent->type));
446 if (over)
447 return 0;
448
449 /* Switch to the next entry */
450 key_read(c, &dent->key, &key);
451 nm.name = dent->name;
452 dent = ubifs_tnc_next_ent(c, &key, &nm);
453 if (IS_ERR(dent)) {
454 err = PTR_ERR(dent);
455 goto out;
456 }
457
458 kfree(file->private_data);
459 file->f_pos = key_hash_flash(c, &dent->key);
460 file->private_data = dent;
461 cond_resched();
462 }
463
464out:
465 if (err != -ENOENT) {
466 ubifs_err("cannot find next direntry, error %d", err);
467 return err;
468 }
469
470 kfree(file->private_data);
471 file->private_data = NULL;
472 file->f_pos = 2;
473 return 0;
474}
475
476/* If a directory is seeked, we have to free saved readdir() state */
477static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin)
478{
479 kfree(file->private_data);
480 file->private_data = NULL;
481 return generic_file_llseek(file, offset, origin);
482}
483
484/* Free saved readdir() state when the directory is closed */
485static int ubifs_dir_release(struct inode *dir, struct file *file)
486{
487 kfree(file->private_data);
488 file->private_data = NULL;
489 return 0;
490}
491
492/**
493 * lock_2_inodes - lock two UBIFS inodes.
494 * @inode1: first inode
495 * @inode2: second inode
496 */
497static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
498{
499 if (inode1->i_ino < inode2->i_ino) {
500 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2);
501 mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3);
502 } else {
503 mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
504 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3);
505 }
506}
507
508/**
509 * unlock_2_inodes - unlock two UBIFS inodes inodes.
510 * @inode1: first inode
511 * @inode2: second inode
512 */
513static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
514{
515 mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
516 mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
517}
518
519static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
520 struct dentry *dentry)
521{
522 struct ubifs_info *c = dir->i_sb->s_fs_info;
523 struct inode *inode = old_dentry->d_inode;
524 struct ubifs_inode *ui = ubifs_inode(inode);
525 struct ubifs_inode *dir_ui = ubifs_inode(dir);
526 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
527 struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
528 .dirtied_ino_d = ui->data_len };
529
530 /*
531 * Budget request settings: new direntry, changing the target inode,
532 * changing the parent inode.
533 */
534
535 dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
536 dentry->d_name.len, dentry->d_name.name, inode->i_ino,
537 inode->i_nlink, dir->i_ino);
538 err = dbg_check_synced_i_size(inode);
539 if (err)
540 return err;
541
542 err = ubifs_budget_space(c, &req);
543 if (err)
544 return err;
545
546 lock_2_inodes(dir, inode);
547 inc_nlink(inode);
548 atomic_inc(&inode->i_count);
549 inode->i_ctime = ubifs_current_time(inode);
550 dir->i_size += sz_change;
551 dir_ui->ui_size = dir->i_size;
552 dir->i_mtime = dir->i_ctime = inode->i_ctime;
553 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
554 if (err)
555 goto out_cancel;
556 unlock_2_inodes(dir, inode);
557
558 ubifs_release_budget(c, &req);
559 d_instantiate(dentry, inode);
560 return 0;
561
562out_cancel:
563 dir->i_size -= sz_change;
564 dir_ui->ui_size = dir->i_size;
565 drop_nlink(inode);
566 unlock_2_inodes(dir, inode);
567 ubifs_release_budget(c, &req);
568 iput(inode);
569 return err;
570}
571
572static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
573{
574 struct ubifs_info *c = dir->i_sb->s_fs_info;
575 struct inode *inode = dentry->d_inode;
576 struct ubifs_inode *dir_ui = ubifs_inode(dir);
577 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
578 int err, budgeted = 1;
579 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
580
581 /*
582 * Budget request settings: deletion direntry, deletion inode (+1 for
583 * @dirtied_ino), changing the parent directory inode. If budgeting
584 * fails, go ahead anyway because we have extra space reserved for
585 * deletions.
586 */
587
588 dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
589 dentry->d_name.len, dentry->d_name.name, inode->i_ino,
590 inode->i_nlink, dir->i_ino);
591 err = dbg_check_synced_i_size(inode);
592 if (err)
593 return err;
594
595 err = ubifs_budget_space(c, &req);
596 if (err) {
597 if (err != -ENOSPC)
598 return err;
599 err = 0;
600 budgeted = 0;
601 }
602
603 lock_2_inodes(dir, inode);
604 inode->i_ctime = ubifs_current_time(dir);
605 drop_nlink(inode);
606 dir->i_size -= sz_change;
607 dir_ui->ui_size = dir->i_size;
608 dir->i_mtime = dir->i_ctime = inode->i_ctime;
609 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
610 if (err)
611 goto out_cancel;
612 unlock_2_inodes(dir, inode);
613
614 if (budgeted)
615 ubifs_release_budget(c, &req);
616 else {
617 /* We've deleted something - clean the "no space" flags */
618 c->nospace = c->nospace_rp = 0;
619 smp_wmb();
620 }
621 return 0;
622
623out_cancel:
624 dir->i_size += sz_change;
625 dir_ui->ui_size = dir->i_size;
626 inc_nlink(inode);
627 unlock_2_inodes(dir, inode);
628 if (budgeted)
629 ubifs_release_budget(c, &req);
630 return err;
631}
632
633/**
634 * check_dir_empty - check if a directory is empty or not.
635 * @c: UBIFS file-system description object
636 * @dir: VFS inode object of the directory to check
637 *
638 * This function checks if directory @dir is empty. Returns zero if the
639 * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes
640 * in case of of errors.
641 */
642static int check_dir_empty(struct ubifs_info *c, struct inode *dir)
643{
644 struct qstr nm = { .name = NULL };
645 struct ubifs_dent_node *dent;
646 union ubifs_key key;
647 int err;
648
649 lowest_dent_key(c, &key, dir->i_ino);
650 dent = ubifs_tnc_next_ent(c, &key, &nm);
651 if (IS_ERR(dent)) {
652 err = PTR_ERR(dent);
653 if (err == -ENOENT)
654 err = 0;
655 } else {
656 kfree(dent);
657 err = -ENOTEMPTY;
658 }
659 return err;
660}
661
662static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
663{
664 struct ubifs_info *c = dir->i_sb->s_fs_info;
665 struct inode *inode = dentry->d_inode;
666 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
667 int err, budgeted = 1;
668 struct ubifs_inode *dir_ui = ubifs_inode(dir);
669 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
670
671 /*
672 * Budget request settings: deletion direntry, deletion inode and
673 * changing the parent inode. If budgeting fails, go ahead anyway
674 * because we have extra space reserved for deletions.
675 */
676
677 dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
678 dentry->d_name.name, inode->i_ino, dir->i_ino);
679
680 err = check_dir_empty(c, dentry->d_inode);
681 if (err)
682 return err;
683
684 err = ubifs_budget_space(c, &req);
685 if (err) {
686 if (err != -ENOSPC)
687 return err;
688 budgeted = 0;
689 }
690
691 lock_2_inodes(dir, inode);
692 inode->i_ctime = ubifs_current_time(dir);
693 clear_nlink(inode);
694 drop_nlink(dir);
695 dir->i_size -= sz_change;
696 dir_ui->ui_size = dir->i_size;
697 dir->i_mtime = dir->i_ctime = inode->i_ctime;
698 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
699 if (err)
700 goto out_cancel;
701 unlock_2_inodes(dir, inode);
702
703 if (budgeted)
704 ubifs_release_budget(c, &req);
705 else {
706 /* We've deleted something - clean the "no space" flags */
707 c->nospace = c->nospace_rp = 0;
708 smp_wmb();
709 }
710 return 0;
711
712out_cancel:
713 dir->i_size += sz_change;
714 dir_ui->ui_size = dir->i_size;
715 inc_nlink(dir);
716 inc_nlink(inode);
717 inc_nlink(inode);
718 unlock_2_inodes(dir, inode);
719 if (budgeted)
720 ubifs_release_budget(c, &req);
721 return err;
722}
723
724static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
725{
726 struct inode *inode;
727 struct ubifs_inode *dir_ui = ubifs_inode(dir);
728 struct ubifs_info *c = dir->i_sb->s_fs_info;
729 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
730 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
731 .dirtied_ino_d = 1 };
732
733 /*
734 * Budget request settings: new inode, new direntry and changing parent
735 * directory inode.
736 */
737
738 dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
739 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
740
741 err = ubifs_budget_space(c, &req);
742 if (err)
743 return err;
744
745 inode = ubifs_new_inode(c, dir, S_IFDIR | mode);
746 if (IS_ERR(inode)) {
747 err = PTR_ERR(inode);
748 goto out_budg;
749 }
750
751 mutex_lock(&dir_ui->ui_mutex);
752 insert_inode_hash(inode);
753 inc_nlink(inode);
754 inc_nlink(dir);
755 dir->i_size += sz_change;
756 dir_ui->ui_size = dir->i_size;
757 dir->i_mtime = dir->i_ctime = inode->i_ctime;
758 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
759 if (err) {
760 ubifs_err("cannot create directory, error %d", err);
761 goto out_cancel;
762 }
763 mutex_unlock(&dir_ui->ui_mutex);
764
765 ubifs_release_budget(c, &req);
766 d_instantiate(dentry, inode);
767 return 0;
768
769out_cancel:
770 dir->i_size -= sz_change;
771 dir_ui->ui_size = dir->i_size;
772 drop_nlink(dir);
773 mutex_unlock(&dir_ui->ui_mutex);
774 make_bad_inode(inode);
775 iput(inode);
776out_budg:
777 ubifs_release_budget(c, &req);
778 return err;
779}
780
781static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
782 int mode, dev_t rdev)
783{
784 struct inode *inode;
785 struct ubifs_inode *ui;
786 struct ubifs_inode *dir_ui = ubifs_inode(dir);
787 struct ubifs_info *c = dir->i_sb->s_fs_info;
788 union ubifs_dev_desc *dev = NULL;
789 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
790 int err, devlen = 0;
791 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
792 .new_ino_d = devlen, .dirtied_ino = 1 };
793
794 /*
795 * Budget request settings: new inode, new direntry and changing parent
796 * directory inode.
797 */
798
799 dbg_gen("dent '%.*s' in dir ino %lu",
800 dentry->d_name.len, dentry->d_name.name, dir->i_ino);
801
802 if (!new_valid_dev(rdev))
803 return -EINVAL;
804
805 if (S_ISBLK(mode) || S_ISCHR(mode)) {
806 dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
807 if (!dev)
808 return -ENOMEM;
809 devlen = ubifs_encode_dev(dev, rdev);
810 }
811
812 err = ubifs_budget_space(c, &req);
813 if (err) {
814 kfree(dev);
815 return err;
816 }
817
818 inode = ubifs_new_inode(c, dir, mode);
819 if (IS_ERR(inode)) {
820 kfree(dev);
821 err = PTR_ERR(inode);
822 goto out_budg;
823 }
824
825 init_special_inode(inode, inode->i_mode, rdev);
826 inode->i_size = ubifs_inode(inode)->ui_size = devlen;
827 ui = ubifs_inode(inode);
828 ui->data = dev;
829 ui->data_len = devlen;
830
831 mutex_lock(&dir_ui->ui_mutex);
832 dir->i_size += sz_change;
833 dir_ui->ui_size = dir->i_size;
834 dir->i_mtime = dir->i_ctime = inode->i_ctime;
835 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
836 if (err)
837 goto out_cancel;
838 mutex_unlock(&dir_ui->ui_mutex);
839
840 ubifs_release_budget(c, &req);
841 insert_inode_hash(inode);
842 d_instantiate(dentry, inode);
843 return 0;
844
845out_cancel:
846 dir->i_size -= sz_change;
847 dir_ui->ui_size = dir->i_size;
848 mutex_unlock(&dir_ui->ui_mutex);
849 make_bad_inode(inode);
850 iput(inode);
851out_budg:
852 ubifs_release_budget(c, &req);
853 return err;
854}
855
856static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
857 const char *symname)
858{
859 struct inode *inode;
860 struct ubifs_inode *ui;
861 struct ubifs_inode *dir_ui = ubifs_inode(dir);
862 struct ubifs_info *c = dir->i_sb->s_fs_info;
863 int err, len = strlen(symname);
864 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
865 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
866 .new_ino_d = len, .dirtied_ino = 1 };
867
868 /*
869 * Budget request settings: new inode, new direntry and changing parent
870 * directory inode.
871 */
872
873 dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len,
874 dentry->d_name.name, symname, dir->i_ino);
875
876 if (len > UBIFS_MAX_INO_DATA)
877 return -ENAMETOOLONG;
878
879 err = ubifs_budget_space(c, &req);
880 if (err)
881 return err;
882
883 inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO);
884 if (IS_ERR(inode)) {
885 err = PTR_ERR(inode);
886 goto out_budg;
887 }
888
889 ui = ubifs_inode(inode);
890 ui->data = kmalloc(len + 1, GFP_NOFS);
891 if (!ui->data) {
892 err = -ENOMEM;
893 goto out_inode;
894 }
895
896 memcpy(ui->data, symname, len);
897 ((char *)ui->data)[len] = '\0';
898 /*
899 * The terminating zero byte is not written to the flash media and it
900 * is put just to make later in-memory string processing simpler. Thus,
901 * data length is @len, not @len + %1.
902 */
903 ui->data_len = len;
904 inode->i_size = ubifs_inode(inode)->ui_size = len;
905
906 mutex_lock(&dir_ui->ui_mutex);
907 dir->i_size += sz_change;
908 dir_ui->ui_size = dir->i_size;
909 dir->i_mtime = dir->i_ctime = inode->i_ctime;
910 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
911 if (err)
912 goto out_cancel;
913 mutex_unlock(&dir_ui->ui_mutex);
914
915 ubifs_release_budget(c, &req);
916 insert_inode_hash(inode);
917 d_instantiate(dentry, inode);
918 return 0;
919
920out_cancel:
921 dir->i_size -= sz_change;
922 dir_ui->ui_size = dir->i_size;
923 mutex_unlock(&dir_ui->ui_mutex);
924out_inode:
925 make_bad_inode(inode);
926 iput(inode);
927out_budg:
928 ubifs_release_budget(c, &req);
929 return err;
930}
931
932/**
933 * lock_3_inodes - lock three UBIFS inodes for rename.
934 * @inode1: first inode
935 * @inode2: second inode
936 * @inode3: third inode
937 *
938 * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may
939 * be null.
940 */
941static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
942 struct inode *inode3)
943{
944 struct inode *i1, *i2, *i3;
945
946 if (!inode3) {
947 if (inode1 != inode2) {
948 lock_2_inodes(inode1, inode2);
949 return;
950 }
951 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
952 return;
953 }
954
955 if (inode1 == inode2) {
956 lock_2_inodes(inode1, inode3);
957 return;
958 }
959
960 /* 3 different inodes */
961 if (inode1 < inode2) {
962 i3 = inode2;
963 if (inode1 < inode3) {
964 i1 = inode1;
965 i2 = inode3;
966 } else {
967 i1 = inode3;
968 i2 = inode1;
969 }
970 } else {
971 i3 = inode1;
972 if (inode2 < inode3) {
973 i1 = inode2;
974 i2 = inode3;
975 } else {
976 i1 = inode3;
977 i2 = inode2;
978 }
979 }
980 mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1);
981 lock_2_inodes(i2, i3);
982}
983
984/**
985 * unlock_3_inodes - unlock three UBIFS inodes for rename.
986 * @inode1: first inode
987 * @inode2: second inode
988 * @inode3: third inode
989 */
990static void unlock_3_inodes(struct inode *inode1, struct inode *inode2,
991 struct inode *inode3)
992{
993 mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
994 if (inode1 != inode2)
995 mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
996 if (inode3)
997 mutex_unlock(&ubifs_inode(inode3)->ui_mutex);
998}
999
1000static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1001 struct inode *new_dir, struct dentry *new_dentry)
1002{
1003 struct ubifs_info *c = old_dir->i_sb->s_fs_info;
1004 struct inode *old_inode = old_dentry->d_inode;
1005 struct inode *new_inode = new_dentry->d_inode;
1006 struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode);
1007 int err, release, sync = 0, move = (new_dir != old_dir);
1008 int is_dir = S_ISDIR(old_inode->i_mode);
1009 int unlink = !!new_inode;
1010 int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len);
1011 int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len);
1012 struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
1013 .dirtied_ino = 3 };
1014 struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
1015 .dirtied_ino_d = old_inode_ui->data_len };
1016 struct timespec time;
1017
1018 /*
1019 * Budget request settings: deletion direntry, new direntry, removing
1020 * the old inode, and changing old and new parent directory inodes.
1021 *
1022 * However, this operation also marks the target inode as dirty and
1023 * does not write it, so we allocate budget for the target inode
1024 * separately.
1025 */
1026
1027 dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in "
1028 "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name,
1029 old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
1030 new_dentry->d_name.name, new_dir->i_ino);
1031
1032 if (unlink && is_dir) {
1033 err = check_dir_empty(c, new_inode);
1034 if (err)
1035 return err;
1036 }
1037
1038 err = ubifs_budget_space(c, &req);
1039 if (err)
1040 return err;
1041 err = ubifs_budget_space(c, &ino_req);
1042 if (err) {
1043 ubifs_release_budget(c, &req);
1044 return err;
1045 }
1046
1047 lock_3_inodes(old_dir, new_dir, new_inode);
1048
1049 /*
1050 * Like most other Unix systems, set the @i_ctime for inodes on a
1051 * rename.
1052 */
1053 time = ubifs_current_time(old_dir);
1054 old_inode->i_ctime = time;
1055
1056 /* We must adjust parent link count when renaming directories */
1057 if (is_dir) {
1058 if (move) {
1059 /*
1060 * @old_dir loses a link because we are moving
1061 * @old_inode to a different directory.
1062 */
1063 drop_nlink(old_dir);
1064 /*
1065 * @new_dir only gains a link if we are not also
1066 * overwriting an existing directory.
1067 */
1068 if (!unlink)
1069 inc_nlink(new_dir);
1070 } else {
1071 /*
1072 * @old_inode is not moving to a different directory,
1073 * but @old_dir still loses a link if we are
1074 * overwriting an existing directory.
1075 */
1076 if (unlink)
1077 drop_nlink(old_dir);
1078 }
1079 }
1080
1081 old_dir->i_size -= old_sz;
1082 ubifs_inode(old_dir)->ui_size = old_dir->i_size;
1083 old_dir->i_mtime = old_dir->i_ctime = time;
1084 new_dir->i_mtime = new_dir->i_ctime = time;
1085
1086 /*
1087 * And finally, if we unlinked a direntry which happened to have the
1088 * same name as the moved direntry, we have to decrement @i_nlink of
1089 * the unlinked inode and change its ctime.
1090 */
1091 if (unlink) {
1092 /*
1093 * Directories cannot have hard-links, so if this is a
1094 * directory, decrement its @i_nlink twice because an empty
1095 * directory has @i_nlink 2.
1096 */
1097 if (is_dir)
1098 drop_nlink(new_inode);
1099 new_inode->i_ctime = time;
1100 drop_nlink(new_inode);
1101 } else {
1102 new_dir->i_size += new_sz;
1103 ubifs_inode(new_dir)->ui_size = new_dir->i_size;
1104 }
1105
1106 /*
1107 * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode
1108 * is dirty, because this will be done later on at the end of
1109 * 'ubifs_rename()'.
1110 */
1111 if (IS_SYNC(old_inode)) {
1112 sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
1113 if (unlink && IS_SYNC(new_inode))
1114 sync = 1;
1115 }
1116 err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry,
1117 sync);
1118 if (err)
1119 goto out_cancel;
1120
1121 unlock_3_inodes(old_dir, new_dir, new_inode);
1122 ubifs_release_budget(c, &req);
1123
1124 mutex_lock(&old_inode_ui->ui_mutex);
1125 release = old_inode_ui->dirty;
1126 mark_inode_dirty_sync(old_inode);
1127 mutex_unlock(&old_inode_ui->ui_mutex);
1128
1129 if (release)
1130 ubifs_release_budget(c, &ino_req);
1131 if (IS_SYNC(old_inode))
1132 err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
1133 return err;
1134
1135out_cancel:
1136 if (unlink) {
1137 if (is_dir)
1138 inc_nlink(new_inode);
1139 inc_nlink(new_inode);
1140 } else {
1141 new_dir->i_size -= new_sz;
1142 ubifs_inode(new_dir)->ui_size = new_dir->i_size;
1143 }
1144 old_dir->i_size += old_sz;
1145 ubifs_inode(old_dir)->ui_size = old_dir->i_size;
1146 if (is_dir) {
1147 if (move) {
1148 inc_nlink(old_dir);
1149 if (!unlink)
1150 drop_nlink(new_dir);
1151 } else {
1152 if (unlink)
1153 inc_nlink(old_dir);
1154 }
1155 }
1156 unlock_3_inodes(old_dir, new_dir, new_inode);
1157 ubifs_release_budget(c, &ino_req);
1158 ubifs_release_budget(c, &req);
1159 return err;
1160}
1161
1162int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1163 struct kstat *stat)
1164{
1165 loff_t size;
1166 struct inode *inode = dentry->d_inode;
1167 struct ubifs_inode *ui = ubifs_inode(inode);
1168
1169 mutex_lock(&ui->ui_mutex);
1170 stat->dev = inode->i_sb->s_dev;
1171 stat->ino = inode->i_ino;
1172 stat->mode = inode->i_mode;
1173 stat->nlink = inode->i_nlink;
1174 stat->uid = inode->i_uid;
1175 stat->gid = inode->i_gid;
1176 stat->rdev = inode->i_rdev;
1177 stat->atime = inode->i_atime;
1178 stat->mtime = inode->i_mtime;
1179 stat->ctime = inode->i_ctime;
1180 stat->blksize = UBIFS_BLOCK_SIZE;
1181 stat->size = ui->ui_size;
1182
1183 /*
1184 * Unfortunately, the 'stat()' system call was designed for block
1185 * device based file systems, and it is not appropriate for UBIFS,
1186 * because UBIFS does not have notion of "block". For example, it is
1187 * difficult to tell how many block a directory takes - it actually
1188 * takes less than 300 bytes, but we have to round it to block size,
1189 * which introduces large mistake. This makes utilities like 'du' to
1190 * report completely senseless numbers. This is the reason why UBIFS
1191 * goes the same way as JFFS2 - it reports zero blocks for everything
1192 * but regular files, which makes more sense than reporting completely
1193 * wrong sizes.
1194 */
1195 if (S_ISREG(inode->i_mode)) {
1196 size = ui->xattr_size;
1197 size += stat->size;
1198 size = ALIGN(size, UBIFS_BLOCK_SIZE);
1199 /*
1200 * Note, user-space expects 512-byte blocks count irrespectively
1201 * of what was reported in @stat->size.
1202 */
1203 stat->blocks = size >> 9;
1204 } else
1205 stat->blocks = 0;
1206 mutex_unlock(&ui->ui_mutex);
1207 return 0;
1208}
1209
1210struct inode_operations ubifs_dir_inode_operations = {
1211 .lookup = ubifs_lookup,
1212 .create = ubifs_create,
1213 .link = ubifs_link,
1214 .symlink = ubifs_symlink,
1215 .unlink = ubifs_unlink,
1216 .mkdir = ubifs_mkdir,
1217 .rmdir = ubifs_rmdir,
1218 .mknod = ubifs_mknod,
1219 .rename = ubifs_rename,
1220 .setattr = ubifs_setattr,
1221 .getattr = ubifs_getattr,
1222#ifdef CONFIG_UBIFS_FS_XATTR
1223 .setxattr = ubifs_setxattr,
1224 .getxattr = ubifs_getxattr,
1225 .listxattr = ubifs_listxattr,
1226 .removexattr = ubifs_removexattr,
1227#endif
1228};
1229
1230struct file_operations ubifs_dir_operations = {
1231 .llseek = ubifs_dir_llseek,
1232 .release = ubifs_dir_release,
1233 .read = generic_read_dir,
1234 .readdir = ubifs_readdir,
1235 .fsync = ubifs_fsync,
1236 .unlocked_ioctl = ubifs_ioctl,
1237#ifdef CONFIG_COMPAT
1238 .compat_ioctl = ubifs_compat_ioctl,
1239#endif
1240};
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
new file mode 100644
index 000000000000..005a3b854d96
--- /dev/null
+++ b/fs/ubifs/file.c
@@ -0,0 +1,1275 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements VFS file and inode operations of regular files, device
25 * nodes and symlinks as well as address space operations.
26 *
27 * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
28 * page is dirty and is used for budgeting purposes - dirty pages should not be
29 * budgeted. The PG_checked flag is set if full budgeting is required for the
30 * page e.g., when it corresponds to a file hole or it is just beyond the file
31 * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
32 * fail in this function, and the budget is released in 'ubifs_write_end()'. So
33 * the PG_private and PG_checked flags carry the information about how the page
34 * was budgeted, to make it possible to release the budget properly.
35 *
36 * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
37 * we implement. However, this is not true for '->writepage()', which might be
38 * called with 'i_mutex' unlocked. For example, when pdflush is performing
39 * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
40 * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
41 * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
42 * path'. So, in '->writepage()' we are only guaranteed that the page is
43 * locked.
44 *
45 * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
46 * readahead path does not have it locked ("sys_read -> generic_file_aio_read
47 * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
48 * not set as well. However, UBIFS disables readahead.
49 *
50 * This, for example means that there might be 2 concurrent '->writepage()'
51 * calls for the same inode, but different inode dirty pages.
52 */
53
54#include "ubifs.h"
55#include <linux/mount.h>
56
57static int read_block(struct inode *inode, void *addr, unsigned int block,
58 struct ubifs_data_node *dn)
59{
60 struct ubifs_info *c = inode->i_sb->s_fs_info;
61 int err, len, out_len;
62 union ubifs_key key;
63 unsigned int dlen;
64
65 data_key_init(c, &key, inode->i_ino, block);
66 err = ubifs_tnc_lookup(c, &key, dn);
67 if (err) {
68 if (err == -ENOENT)
69 /* Not found, so it must be a hole */
70 memset(addr, 0, UBIFS_BLOCK_SIZE);
71 return err;
72 }
73
74 ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum);
75
76 len = le32_to_cpu(dn->size);
77 if (len <= 0 || len > UBIFS_BLOCK_SIZE)
78 goto dump;
79
80 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
81 out_len = UBIFS_BLOCK_SIZE;
82 err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
83 le16_to_cpu(dn->compr_type));
84 if (err || len != out_len)
85 goto dump;
86
87 /*
88 * Data length can be less than a full block, even for blocks that are
89 * not the last in the file (e.g., as a result of making a hole and
90 * appending data). Ensure that the remainder is zeroed out.
91 */
92 if (len < UBIFS_BLOCK_SIZE)
93 memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
94
95 return 0;
96
97dump:
98 ubifs_err("bad data node (block %u, inode %lu)",
99 block, inode->i_ino);
100 dbg_dump_node(c, dn);
101 return -EINVAL;
102}
103
104static int do_readpage(struct page *page)
105{
106 void *addr;
107 int err = 0, i;
108 unsigned int block, beyond;
109 struct ubifs_data_node *dn;
110 struct inode *inode = page->mapping->host;
111 loff_t i_size = i_size_read(inode);
112
113 dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
114 inode->i_ino, page->index, i_size, page->flags);
115 ubifs_assert(!PageChecked(page));
116 ubifs_assert(!PagePrivate(page));
117
118 addr = kmap(page);
119
120 block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
121 beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
122 if (block >= beyond) {
123 /* Reading beyond inode */
124 SetPageChecked(page);
125 memset(addr, 0, PAGE_CACHE_SIZE);
126 goto out;
127 }
128
129 dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS);
130 if (!dn) {
131 err = -ENOMEM;
132 goto error;
133 }
134
135 i = 0;
136 while (1) {
137 int ret;
138
139 if (block >= beyond) {
140 /* Reading beyond inode */
141 err = -ENOENT;
142 memset(addr, 0, UBIFS_BLOCK_SIZE);
143 } else {
144 ret = read_block(inode, addr, block, dn);
145 if (ret) {
146 err = ret;
147 if (err != -ENOENT)
148 break;
149 }
150 }
151 if (++i >= UBIFS_BLOCKS_PER_PAGE)
152 break;
153 block += 1;
154 addr += UBIFS_BLOCK_SIZE;
155 }
156 if (err) {
157 if (err == -ENOENT) {
158 /* Not found, so it must be a hole */
159 SetPageChecked(page);
160 dbg_gen("hole");
161 goto out_free;
162 }
163 ubifs_err("cannot read page %lu of inode %lu, error %d",
164 page->index, inode->i_ino, err);
165 goto error;
166 }
167
168out_free:
169 kfree(dn);
170out:
171 SetPageUptodate(page);
172 ClearPageError(page);
173 flush_dcache_page(page);
174 kunmap(page);
175 return 0;
176
177error:
178 kfree(dn);
179 ClearPageUptodate(page);
180 SetPageError(page);
181 flush_dcache_page(page);
182 kunmap(page);
183 return err;
184}
185
186/**
187 * release_new_page_budget - release budget of a new page.
188 * @c: UBIFS file-system description object
189 *
190 * This is a helper function which releases budget corresponding to the budget
191 * of one new page of data.
192 */
193static void release_new_page_budget(struct ubifs_info *c)
194{
195 struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 };
196
197 ubifs_release_budget(c, &req);
198}
199
200/**
201 * release_existing_page_budget - release budget of an existing page.
202 * @c: UBIFS file-system description object
203 *
204 * This is a helper function which releases budget corresponding to the budget
205 * of changing one one page of data which already exists on the flash media.
206 */
207static void release_existing_page_budget(struct ubifs_info *c)
208{
209 struct ubifs_budget_req req = { .dd_growth = c->page_budget};
210
211 ubifs_release_budget(c, &req);
212}
213
214static int write_begin_slow(struct address_space *mapping,
215 loff_t pos, unsigned len, struct page **pagep)
216{
217 struct inode *inode = mapping->host;
218 struct ubifs_info *c = inode->i_sb->s_fs_info;
219 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
220 struct ubifs_budget_req req = { .new_page = 1 };
221 int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
222 struct page *page;
223
224 dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
225 inode->i_ino, pos, len, inode->i_size);
226
227 /*
228 * At the slow path we have to budget before locking the page, because
229 * budgeting may force write-back, which would wait on locked pages and
230 * deadlock if we had the page locked. At this point we do not know
231 * anything about the page, so assume that this is a new page which is
232 * written to a hole. This corresponds to largest budget. Later the
233 * budget will be amended if this is not true.
234 */
235 if (appending)
236 /* We are appending data, budget for inode change */
237 req.dirtied_ino = 1;
238
239 err = ubifs_budget_space(c, &req);
240 if (unlikely(err))
241 return err;
242
243 page = __grab_cache_page(mapping, index);
244 if (unlikely(!page)) {
245 ubifs_release_budget(c, &req);
246 return -ENOMEM;
247 }
248
249 if (!PageUptodate(page)) {
250 if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
251 SetPageChecked(page);
252 else {
253 err = do_readpage(page);
254 if (err) {
255 unlock_page(page);
256 page_cache_release(page);
257 return err;
258 }
259 }
260
261 SetPageUptodate(page);
262 ClearPageError(page);
263 }
264
265 if (PagePrivate(page))
266 /*
267 * The page is dirty, which means it was budgeted twice:
268 * o first time the budget was allocated by the task which
269 * made the page dirty and set the PG_private flag;
270 * o and then we budgeted for it for the second time at the
271 * very beginning of this function.
272 *
273 * So what we have to do is to release the page budget we
274 * allocated.
275 */
276 release_new_page_budget(c);
277 else if (!PageChecked(page))
278 /*
279 * We are changing a page which already exists on the media.
280 * This means that changing the page does not make the amount
281 * of indexing information larger, and this part of the budget
282 * which we have already acquired may be released.
283 */
284 ubifs_convert_page_budget(c);
285
286 if (appending) {
287 struct ubifs_inode *ui = ubifs_inode(inode);
288
289 /*
290 * 'ubifs_write_end()' is optimized from the fast-path part of
291 * 'ubifs_write_begin()' and expects the @ui_mutex to be locked
292 * if data is appended.
293 */
294 mutex_lock(&ui->ui_mutex);
295 if (ui->dirty)
296 /*
297 * The inode is dirty already, so we may free the
298 * budget we allocated.
299 */
300 ubifs_release_dirty_inode_budget(c, ui);
301 }
302
303 *pagep = page;
304 return 0;
305}
306
307/**
308 * allocate_budget - allocate budget for 'ubifs_write_begin()'.
309 * @c: UBIFS file-system description object
310 * @page: page to allocate budget for
311 * @ui: UBIFS inode object the page belongs to
312 * @appending: non-zero if the page is appended
313 *
314 * This is a helper function for 'ubifs_write_begin()' which allocates budget
315 * for the operation. The budget is allocated differently depending on whether
316 * this is appending, whether the page is dirty or not, and so on. This
317 * function leaves the @ui->ui_mutex locked in case of appending. Returns zero
318 * in case of success and %-ENOSPC in case of failure.
319 */
320static int allocate_budget(struct ubifs_info *c, struct page *page,
321 struct ubifs_inode *ui, int appending)
322{
323 struct ubifs_budget_req req = { .fast = 1 };
324
325 if (PagePrivate(page)) {
326 if (!appending)
327 /*
328 * The page is dirty and we are not appending, which
329 * means no budget is needed at all.
330 */
331 return 0;
332
333 mutex_lock(&ui->ui_mutex);
334 if (ui->dirty)
335 /*
336 * The page is dirty and we are appending, so the inode
337 * has to be marked as dirty. However, it is already
338 * dirty, so we do not need any budget. We may return,
339 * but @ui->ui_mutex hast to be left locked because we
340 * should prevent write-back from flushing the inode
341 * and freeing the budget. The lock will be released in
342 * 'ubifs_write_end()'.
343 */
344 return 0;
345
346 /*
347 * The page is dirty, we are appending, the inode is clean, so
348 * we need to budget the inode change.
349 */
350 req.dirtied_ino = 1;
351 } else {
352 if (PageChecked(page))
353 /*
354 * The page corresponds to a hole and does not
355 * exist on the media. So changing it makes
356 * make the amount of indexing information
357 * larger, and we have to budget for a new
358 * page.
359 */
360 req.new_page = 1;
361 else
362 /*
363 * Not a hole, the change will not add any new
364 * indexing information, budget for page
365 * change.
366 */
367 req.dirtied_page = 1;
368
369 if (appending) {
370 mutex_lock(&ui->ui_mutex);
371 if (!ui->dirty)
372 /*
373 * The inode is clean but we will have to mark
374 * it as dirty because we are appending. This
375 * needs a budget.
376 */
377 req.dirtied_ino = 1;
378 }
379 }
380
381 return ubifs_budget_space(c, &req);
382}
383
384/*
385 * This function is called when a page of data is going to be written. Since
386 * the page of data will not necessarily go to the flash straight away, UBIFS
387 * has to reserve space on the media for it, which is done by means of
388 * budgeting.
389 *
390 * This is the hot-path of the file-system and we are trying to optimize it as
391 * much as possible. For this reasons it is split on 2 parts - slow and fast.
392 *
393 * There many budgeting cases:
394 * o a new page is appended - we have to budget for a new page and for
395 * changing the inode; however, if the inode is already dirty, there is
396 * no need to budget for it;
397 * o an existing clean page is changed - we have budget for it; if the page
398 * does not exist on the media (a hole), we have to budget for a new
399 * page; otherwise, we may budget for changing an existing page; the
400 * difference between these cases is that changing an existing page does
401 * not introduce anything new to the FS indexing information, so it does
402 * not grow, and smaller budget is acquired in this case;
403 * o an existing dirty page is changed - no need to budget at all, because
404 * the page budget has been acquired by earlier, when the page has been
405 * marked dirty.
406 *
407 * UBIFS budgeting sub-system may force write-back if it thinks there is no
408 * space to reserve. This imposes some locking restrictions and makes it
409 * impossible to take into account the above cases, and makes it impossible to
410 * optimize budgeting.
411 *
412 * The solution for this is that the fast path of 'ubifs_write_begin()' assumes
413 * there is a plenty of flash space and the budget will be acquired quickly,
414 * without forcing write-back. The slow path does not make this assumption.
415 */
416static int ubifs_write_begin(struct file *file, struct address_space *mapping,
417 loff_t pos, unsigned len, unsigned flags,
418 struct page **pagep, void **fsdata)
419{
420 struct inode *inode = mapping->host;
421 struct ubifs_info *c = inode->i_sb->s_fs_info;
422 struct ubifs_inode *ui = ubifs_inode(inode);
423 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
424 int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
425 struct page *page;
426
427
428 ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
429
430 if (unlikely(c->ro_media))
431 return -EROFS;
432
433 /* Try out the fast-path part first */
434 page = __grab_cache_page(mapping, index);
435 if (unlikely(!page))
436 return -ENOMEM;
437
438 if (!PageUptodate(page)) {
439 /* The page is not loaded from the flash */
440 if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
441 /*
442 * We change whole page so no need to load it. But we
443 * have to set the @PG_checked flag to make the further
444 * code the page is new. This might be not true, but it
445 * is better to budget more that to read the page from
446 * the media.
447 */
448 SetPageChecked(page);
449 else {
450 err = do_readpage(page);
451 if (err) {
452 unlock_page(page);
453 page_cache_release(page);
454 return err;
455 }
456 }
457
458 SetPageUptodate(page);
459 ClearPageError(page);
460 }
461
462 err = allocate_budget(c, page, ui, appending);
463 if (unlikely(err)) {
464 ubifs_assert(err == -ENOSPC);
465 /*
466 * Budgeting failed which means it would have to force
467 * write-back but didn't, because we set the @fast flag in the
468 * request. Write-back cannot be done now, while we have the
469 * page locked, because it would deadlock. Unlock and free
470 * everything and fall-back to slow-path.
471 */
472 if (appending) {
473 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
474 mutex_unlock(&ui->ui_mutex);
475 }
476 unlock_page(page);
477 page_cache_release(page);
478
479 return write_begin_slow(mapping, pos, len, pagep);
480 }
481
482 /*
483 * Whee, we aquired budgeting quickly - without involving
484 * garbage-collection, committing or forceing write-back. We return
485 * with @ui->ui_mutex locked if we are appending pages, and unlocked
486 * otherwise. This is an optimization (slightly hacky though).
487 */
488 *pagep = page;
489 return 0;
490
491}
492
493/**
494 * cancel_budget - cancel budget.
495 * @c: UBIFS file-system description object
496 * @page: page to cancel budget for
497 * @ui: UBIFS inode object the page belongs to
498 * @appending: non-zero if the page is appended
499 *
500 * This is a helper function for a page write operation. It unlocks the
501 * @ui->ui_mutex in case of appending.
502 */
503static void cancel_budget(struct ubifs_info *c, struct page *page,
504 struct ubifs_inode *ui, int appending)
505{
506 if (appending) {
507 if (!ui->dirty)
508 ubifs_release_dirty_inode_budget(c, ui);
509 mutex_unlock(&ui->ui_mutex);
510 }
511 if (!PagePrivate(page)) {
512 if (PageChecked(page))
513 release_new_page_budget(c);
514 else
515 release_existing_page_budget(c);
516 }
517}
518
519static int ubifs_write_end(struct file *file, struct address_space *mapping,
520 loff_t pos, unsigned len, unsigned copied,
521 struct page *page, void *fsdata)
522{
523 struct inode *inode = mapping->host;
524 struct ubifs_inode *ui = ubifs_inode(inode);
525 struct ubifs_info *c = inode->i_sb->s_fs_info;
526 loff_t end_pos = pos + len;
527 int appending = !!(end_pos > inode->i_size);
528
529 dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
530 inode->i_ino, pos, page->index, len, copied, inode->i_size);
531
532 if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) {
533 /*
534 * VFS copied less data to the page that it intended and
535 * declared in its '->write_begin()' call via the @len
536 * argument. If the page was not up-to-date, and @len was
537 * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did
538 * not load it from the media (for optimization reasons). This
539 * means that part of the page contains garbage. So read the
540 * page now.
541 */
542 dbg_gen("copied %d instead of %d, read page and repeat",
543 copied, len);
544 cancel_budget(c, page, ui, appending);
545
546 /*
547 * Return 0 to force VFS to repeat the whole operation, or the
548 * error code if 'do_readpage()' failes.
549 */
550 copied = do_readpage(page);
551 goto out;
552 }
553
554 if (!PagePrivate(page)) {
555 SetPagePrivate(page);
556 atomic_long_inc(&c->dirty_pg_cnt);
557 __set_page_dirty_nobuffers(page);
558 }
559
560 if (appending) {
561 i_size_write(inode, end_pos);
562 ui->ui_size = end_pos;
563 /*
564 * Note, we do not set @I_DIRTY_PAGES (which means that the
565 * inode has dirty pages), this has been done in
566 * '__set_page_dirty_nobuffers()'.
567 */
568 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
569 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
570 mutex_unlock(&ui->ui_mutex);
571 }
572
573out:
574 unlock_page(page);
575 page_cache_release(page);
576 return copied;
577}
578
579static int ubifs_readpage(struct file *file, struct page *page)
580{
581 do_readpage(page);
582 unlock_page(page);
583 return 0;
584}
585
586static int do_writepage(struct page *page, int len)
587{
588 int err = 0, i, blen;
589 unsigned int block;
590 void *addr;
591 union ubifs_key key;
592 struct inode *inode = page->mapping->host;
593 struct ubifs_info *c = inode->i_sb->s_fs_info;
594
595#ifdef UBIFS_DEBUG
596 spin_lock(&ui->ui_lock);
597 ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE);
598 spin_unlock(&ui->ui_lock);
599#endif
600
601 /* Update radix tree tags */
602 set_page_writeback(page);
603
604 addr = kmap(page);
605 block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
606 i = 0;
607 while (len) {
608 blen = min_t(int, len, UBIFS_BLOCK_SIZE);
609 data_key_init(c, &key, inode->i_ino, block);
610 err = ubifs_jnl_write_data(c, inode, &key, addr, blen);
611 if (err)
612 break;
613 if (++i >= UBIFS_BLOCKS_PER_PAGE)
614 break;
615 block += 1;
616 addr += blen;
617 len -= blen;
618 }
619 if (err) {
620 SetPageError(page);
621 ubifs_err("cannot write page %lu of inode %lu, error %d",
622 page->index, inode->i_ino, err);
623 ubifs_ro_mode(c, err);
624 }
625
626 ubifs_assert(PagePrivate(page));
627 if (PageChecked(page))
628 release_new_page_budget(c);
629 else
630 release_existing_page_budget(c);
631
632 atomic_long_dec(&c->dirty_pg_cnt);
633 ClearPagePrivate(page);
634 ClearPageChecked(page);
635
636 kunmap(page);
637 unlock_page(page);
638 end_page_writeback(page);
639 return err;
640}
641
642/*
643 * When writing-back dirty inodes, VFS first writes-back pages belonging to the
644 * inode, then the inode itself. For UBIFS this may cause a problem. Consider a
645 * situation when a we have an inode with size 0, then a megabyte of data is
646 * appended to the inode, then write-back starts and flushes some amount of the
647 * dirty pages, the journal becomes full, commit happens and finishes, and then
648 * an unclean reboot happens. When the file system is mounted next time, the
649 * inode size would still be 0, but there would be many pages which are beyond
650 * the inode size, they would be indexed and consume flash space. Because the
651 * journal has been committed, the replay would not be able to detect this
652 * situation and correct the inode size. This means UBIFS would have to scan
653 * whole index and correct all inode sizes, which is long an unacceptable.
654 *
655 * To prevent situations like this, UBIFS writes pages back only if they are
656 * within last synchronized inode size, i.e. the the size which has been
657 * written to the flash media last time. Otherwise, UBIFS forces inode
658 * write-back, thus making sure the on-flash inode contains current inode size,
659 * and then keeps writing pages back.
660 *
661 * Some locking issues explanation. 'ubifs_writepage()' first is called with
662 * the page locked, and it locks @ui_mutex. However, write-back does take inode
663 * @i_mutex, which means other VFS operations may be run on this inode at the
664 * same time. And the problematic one is truncation to smaller size, from where
665 * we have to call 'vmtruncate()', which first changes @inode->i_size, then
666 * drops the truncated pages. And while dropping the pages, it takes the page
667 * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with
668 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
669 * means that @inode->i_size is changed while @ui_mutex is unlocked.
670 *
671 * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
672 * inode size. How do we do this if @inode->i_size may became smaller while we
673 * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the
674 * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size
675 * internally and updates it under @ui_mutex.
676 *
677 * Q: why we do not worry that if we race with truncation, we may end up with a
678 * situation when the inode is truncated while we are in the middle of
679 * 'do_writepage()', so we do write beyond inode size?
680 * A: If we are in the middle of 'do_writepage()', truncation would be locked
681 * on the page lock and it would not write the truncated inode node to the
682 * journal before we have finished.
683 */
684static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
685{
686 struct inode *inode = page->mapping->host;
687 struct ubifs_inode *ui = ubifs_inode(inode);
688 loff_t i_size = i_size_read(inode), synced_i_size;
689 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
690 int err, len = i_size & (PAGE_CACHE_SIZE - 1);
691 void *kaddr;
692
693 dbg_gen("ino %lu, pg %lu, pg flags %#lx",
694 inode->i_ino, page->index, page->flags);
695 ubifs_assert(PagePrivate(page));
696
697 /* Is the page fully outside @i_size? (truncate in progress) */
698 if (page->index > end_index || (page->index == end_index && !len)) {
699 err = 0;
700 goto out_unlock;
701 }
702
703 spin_lock(&ui->ui_lock);
704 synced_i_size = ui->synced_i_size;
705 spin_unlock(&ui->ui_lock);
706
707 /* Is the page fully inside @i_size? */
708 if (page->index < end_index) {
709 if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
710 err = inode->i_sb->s_op->write_inode(inode, 1);
711 if (err)
712 goto out_unlock;
713 /*
714 * The inode has been written, but the write-buffer has
715 * not been synchronized, so in case of an unclean
716 * reboot we may end up with some pages beyond inode
717 * size, but they would be in the journal (because
718 * commit flushes write buffers) and recovery would deal
719 * with this.
720 */
721 }
722 return do_writepage(page, PAGE_CACHE_SIZE);
723 }
724
725 /*
726 * The page straddles @i_size. It must be zeroed out on each and every
727 * writepage invocation because it may be mmapped. "A file is mapped
728 * in multiples of the page size. For a file that is not a multiple of
729 * the page size, the remaining memory is zeroed when mapped, and
730 * writes to that region are not written out to the file."
731 */
732 kaddr = kmap_atomic(page, KM_USER0);
733 memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
734 flush_dcache_page(page);
735 kunmap_atomic(kaddr, KM_USER0);
736
737 if (i_size > synced_i_size) {
738 err = inode->i_sb->s_op->write_inode(inode, 1);
739 if (err)
740 goto out_unlock;
741 }
742
743 return do_writepage(page, len);
744
745out_unlock:
746 unlock_page(page);
747 return err;
748}
749
750/**
751 * do_attr_changes - change inode attributes.
752 * @inode: inode to change attributes for
753 * @attr: describes attributes to change
754 */
755static void do_attr_changes(struct inode *inode, const struct iattr *attr)
756{
757 if (attr->ia_valid & ATTR_UID)
758 inode->i_uid = attr->ia_uid;
759 if (attr->ia_valid & ATTR_GID)
760 inode->i_gid = attr->ia_gid;
761 if (attr->ia_valid & ATTR_ATIME)
762 inode->i_atime = timespec_trunc(attr->ia_atime,
763 inode->i_sb->s_time_gran);
764 if (attr->ia_valid & ATTR_MTIME)
765 inode->i_mtime = timespec_trunc(attr->ia_mtime,
766 inode->i_sb->s_time_gran);
767 if (attr->ia_valid & ATTR_CTIME)
768 inode->i_ctime = timespec_trunc(attr->ia_ctime,
769 inode->i_sb->s_time_gran);
770 if (attr->ia_valid & ATTR_MODE) {
771 umode_t mode = attr->ia_mode;
772
773 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
774 mode &= ~S_ISGID;
775 inode->i_mode = mode;
776 }
777}
778
779/**
780 * do_truncation - truncate an inode.
781 * @c: UBIFS file-system description object
782 * @inode: inode to truncate
783 * @attr: inode attribute changes description
784 *
785 * This function implements VFS '->setattr()' call when the inode is truncated
786 * to a smaller size. Returns zero in case of success and a negative error code
787 * in case of failure.
788 */
789static int do_truncation(struct ubifs_info *c, struct inode *inode,
790 const struct iattr *attr)
791{
792 int err;
793 struct ubifs_budget_req req;
794 loff_t old_size = inode->i_size, new_size = attr->ia_size;
795 int offset = new_size & (UBIFS_BLOCK_SIZE - 1);
796 struct ubifs_inode *ui = ubifs_inode(inode);
797
798 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
799 memset(&req, 0, sizeof(struct ubifs_budget_req));
800
801 /*
802 * If this is truncation to a smaller size, and we do not truncate on a
803 * block boundary, budget for changing one data block, because the last
804 * block will be re-written.
805 */
806 if (new_size & (UBIFS_BLOCK_SIZE - 1))
807 req.dirtied_page = 1;
808
809 req.dirtied_ino = 1;
810 /* A funny way to budget for truncation node */
811 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
812 err = ubifs_budget_space(c, &req);
813 if (err)
814 return err;
815
816 err = vmtruncate(inode, new_size);
817 if (err)
818 goto out_budg;
819
820 if (offset) {
821 pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
822 struct page *page;
823
824 page = find_lock_page(inode->i_mapping, index);
825 if (page) {
826 if (PageDirty(page)) {
827 /*
828 * 'ubifs_jnl_truncate()' will try to truncate
829 * the last data node, but it contains
830 * out-of-date data because the page is dirty.
831 * Write the page now, so that
832 * 'ubifs_jnl_truncate()' will see an already
833 * truncated (and up to date) data node.
834 */
835 ubifs_assert(PagePrivate(page));
836
837 clear_page_dirty_for_io(page);
838 if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
839 offset = new_size &
840 (PAGE_CACHE_SIZE - 1);
841 err = do_writepage(page, offset);
842 page_cache_release(page);
843 if (err)
844 goto out_budg;
845 /*
846 * We could now tell 'ubifs_jnl_truncate()' not
847 * to read the last block.
848 */
849 } else {
850 /*
851 * We could 'kmap()' the page and pass the data
852 * to 'ubifs_jnl_truncate()' to save it from
853 * having to read it.
854 */
855 unlock_page(page);
856 page_cache_release(page);
857 }
858 }
859 }
860
861 mutex_lock(&ui->ui_mutex);
862 ui->ui_size = inode->i_size;
863 /* Truncation changes inode [mc]time */
864 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
865 /* The other attributes may be changed at the same time as well */
866 do_attr_changes(inode, attr);
867
868 err = ubifs_jnl_truncate(c, inode, old_size, new_size);
869 mutex_unlock(&ui->ui_mutex);
870out_budg:
871 ubifs_release_budget(c, &req);
872 return err;
873}
874
875/**
876 * do_setattr - change inode attributes.
877 * @c: UBIFS file-system description object
878 * @inode: inode to change attributes for
879 * @attr: inode attribute changes description
880 *
881 * This function implements VFS '->setattr()' call for all cases except
882 * truncations to smaller size. Returns zero in case of success and a negative
883 * error code in case of failure.
884 */
885static int do_setattr(struct ubifs_info *c, struct inode *inode,
886 const struct iattr *attr)
887{
888 int err, release;
889 loff_t new_size = attr->ia_size;
890 struct ubifs_inode *ui = ubifs_inode(inode);
891 struct ubifs_budget_req req = { .dirtied_ino = 1,
892 .dirtied_ino_d = ui->data_len };
893
894 err = ubifs_budget_space(c, &req);
895 if (err)
896 return err;
897
898 if (attr->ia_valid & ATTR_SIZE) {
899 dbg_gen("size %lld -> %lld", inode->i_size, new_size);
900 err = vmtruncate(inode, new_size);
901 if (err)
902 goto out;
903 }
904
905 mutex_lock(&ui->ui_mutex);
906 if (attr->ia_valid & ATTR_SIZE) {
907 /* Truncation changes inode [mc]time */
908 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
909 /* 'vmtruncate()' changed @i_size, update @ui_size */
910 ui->ui_size = inode->i_size;
911 }
912
913 do_attr_changes(inode, attr);
914
915 release = ui->dirty;
916 if (attr->ia_valid & ATTR_SIZE)
917 /*
918 * Inode length changed, so we have to make sure
919 * @I_DIRTY_DATASYNC is set.
920 */
921 __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC);
922 else
923 mark_inode_dirty_sync(inode);
924 mutex_unlock(&ui->ui_mutex);
925
926 if (release)
927 ubifs_release_budget(c, &req);
928 if (IS_SYNC(inode))
929 err = inode->i_sb->s_op->write_inode(inode, 1);
930 return err;
931
932out:
933 ubifs_release_budget(c, &req);
934 return err;
935}
936
937int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
938{
939 int err;
940 struct inode *inode = dentry->d_inode;
941 struct ubifs_info *c = inode->i_sb->s_fs_info;
942
943 dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid);
944 err = inode_change_ok(inode, attr);
945 if (err)
946 return err;
947
948 err = dbg_check_synced_i_size(inode);
949 if (err)
950 return err;
951
952 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size)
953 /* Truncation to a smaller size */
954 err = do_truncation(c, inode, attr);
955 else
956 err = do_setattr(c, inode, attr);
957
958 return err;
959}
960
961static void ubifs_invalidatepage(struct page *page, unsigned long offset)
962{
963 struct inode *inode = page->mapping->host;
964 struct ubifs_info *c = inode->i_sb->s_fs_info;
965
966 ubifs_assert(PagePrivate(page));
967 if (offset)
968 /* Partial page remains dirty */
969 return;
970
971 if (PageChecked(page))
972 release_new_page_budget(c);
973 else
974 release_existing_page_budget(c);
975
976 atomic_long_dec(&c->dirty_pg_cnt);
977 ClearPagePrivate(page);
978 ClearPageChecked(page);
979}
980
981static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
982{
983 struct ubifs_inode *ui = ubifs_inode(dentry->d_inode);
984
985 nd_set_link(nd, ui->data);
986 return NULL;
987}
988
989int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
990{
991 struct inode *inode = dentry->d_inode;
992 struct ubifs_info *c = inode->i_sb->s_fs_info;
993 int err;
994
995 dbg_gen("syncing inode %lu", inode->i_ino);
996
997 /*
998 * VFS has already synchronized dirty pages for this inode. Synchronize
999 * the inode unless this is a 'datasync()' call.
1000 */
1001 if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
1002 err = inode->i_sb->s_op->write_inode(inode, 1);
1003 if (err)
1004 return err;
1005 }
1006
1007 /*
1008 * Nodes related to this inode may still sit in a write-buffer. Flush
1009 * them.
1010 */
1011 err = ubifs_sync_wbufs_by_inode(c, inode);
1012 if (err)
1013 return err;
1014
1015 return 0;
1016}
1017
1018/**
1019 * mctime_update_needed - check if mtime or ctime update is needed.
1020 * @inode: the inode to do the check for
1021 * @now: current time
1022 *
1023 * This helper function checks if the inode mtime/ctime should be updated or
1024 * not. If current values of the time-stamps are within the UBIFS inode time
1025 * granularity, they are not updated. This is an optimization.
1026 */
1027static inline int mctime_update_needed(const struct inode *inode,
1028 const struct timespec *now)
1029{
1030 if (!timespec_equal(&inode->i_mtime, now) ||
1031 !timespec_equal(&inode->i_ctime, now))
1032 return 1;
1033 return 0;
1034}
1035
1036/**
1037 * update_ctime - update mtime and ctime of an inode.
1038 * @c: UBIFS file-system description object
1039 * @inode: inode to update
1040 *
1041 * This function updates mtime and ctime of the inode if it is not equivalent to
1042 * current time. Returns zero in case of success and a negative error code in
1043 * case of failure.
1044 */
1045static int update_mctime(struct ubifs_info *c, struct inode *inode)
1046{
1047 struct timespec now = ubifs_current_time(inode);
1048 struct ubifs_inode *ui = ubifs_inode(inode);
1049
1050 if (mctime_update_needed(inode, &now)) {
1051 int err, release;
1052 struct ubifs_budget_req req = { .dirtied_ino = 1,
1053 .dirtied_ino_d = ui->data_len };
1054
1055 err = ubifs_budget_space(c, &req);
1056 if (err)
1057 return err;
1058
1059 mutex_lock(&ui->ui_mutex);
1060 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
1061 release = ui->dirty;
1062 mark_inode_dirty_sync(inode);
1063 mutex_unlock(&ui->ui_mutex);
1064 if (release)
1065 ubifs_release_budget(c, &req);
1066 }
1067
1068 return 0;
1069}
1070
1071static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
1072 unsigned long nr_segs, loff_t pos)
1073{
1074 int err;
1075 ssize_t ret;
1076 struct inode *inode = iocb->ki_filp->f_mapping->host;
1077 struct ubifs_info *c = inode->i_sb->s_fs_info;
1078
1079 err = update_mctime(c, inode);
1080 if (err)
1081 return err;
1082
1083 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
1084 if (ret < 0)
1085 return ret;
1086
1087 if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
1088 err = ubifs_sync_wbufs_by_inode(c, inode);
1089 if (err)
1090 return err;
1091 }
1092
1093 return ret;
1094}
1095
1096static int ubifs_set_page_dirty(struct page *page)
1097{
1098 int ret;
1099
1100 ret = __set_page_dirty_nobuffers(page);
1101 /*
1102 * An attempt to dirty a page without budgeting for it - should not
1103 * happen.
1104 */
1105 ubifs_assert(ret == 0);
1106 return ret;
1107}
1108
1109static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1110{
1111 /*
1112 * An attempt to release a dirty page without budgeting for it - should
1113 * not happen.
1114 */
1115 if (PageWriteback(page))
1116 return 0;
1117 ubifs_assert(PagePrivate(page));
1118 ubifs_assert(0);
1119 ClearPagePrivate(page);
1120 ClearPageChecked(page);
1121 return 1;
1122}
1123
1124/*
1125 * mmap()d file has taken write protection fault and is being made
1126 * writable. UBIFS must ensure page is budgeted for.
1127 */
1128static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1129{
1130 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1131 struct ubifs_info *c = inode->i_sb->s_fs_info;
1132 struct timespec now = ubifs_current_time(inode);
1133 struct ubifs_budget_req req = { .new_page = 1 };
1134 int err, update_time;
1135
1136 dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
1137 i_size_read(inode));
1138 ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
1139
1140 if (unlikely(c->ro_media))
1141 return -EROFS;
1142
1143 /*
1144 * We have not locked @page so far so we may budget for changing the
1145 * page. Note, we cannot do this after we locked the page, because
1146 * budgeting may cause write-back which would cause deadlock.
1147 *
1148 * At the moment we do not know whether the page is dirty or not, so we
1149 * assume that it is not and budget for a new page. We could look at
1150 * the @PG_private flag and figure this out, but we may race with write
1151 * back and the page state may change by the time we lock it, so this
1152 * would need additional care. We do not bother with this at the
1153 * moment, although it might be good idea to do. Instead, we allocate
1154 * budget for a new page and amend it later on if the page was in fact
1155 * dirty.
1156 *
1157 * The budgeting-related logic of this function is similar to what we
1158 * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there
1159 * for more comments.
1160 */
1161 update_time = mctime_update_needed(inode, &now);
1162 if (update_time)
1163 /*
1164 * We have to change inode time stamp which requires extra
1165 * budgeting.
1166 */
1167 req.dirtied_ino = 1;
1168
1169 err = ubifs_budget_space(c, &req);
1170 if (unlikely(err)) {
1171 if (err == -ENOSPC)
1172 ubifs_warn("out of space for mmapped file "
1173 "(inode number %lu)", inode->i_ino);
1174 return err;
1175 }
1176
1177 lock_page(page);
1178 if (unlikely(page->mapping != inode->i_mapping ||
1179 page_offset(page) > i_size_read(inode))) {
1180 /* Page got truncated out from underneath us */
1181 err = -EINVAL;
1182 goto out_unlock;
1183 }
1184
1185 if (PagePrivate(page))
1186 release_new_page_budget(c);
1187 else {
1188 if (!PageChecked(page))
1189 ubifs_convert_page_budget(c);
1190 SetPagePrivate(page);
1191 atomic_long_inc(&c->dirty_pg_cnt);
1192 __set_page_dirty_nobuffers(page);
1193 }
1194
1195 if (update_time) {
1196 int release;
1197 struct ubifs_inode *ui = ubifs_inode(inode);
1198
1199 mutex_lock(&ui->ui_mutex);
1200 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
1201 release = ui->dirty;
1202 mark_inode_dirty_sync(inode);
1203 mutex_unlock(&ui->ui_mutex);
1204 if (release)
1205 ubifs_release_dirty_inode_budget(c, ui);
1206 }
1207
1208 unlock_page(page);
1209 return 0;
1210
1211out_unlock:
1212 unlock_page(page);
1213 ubifs_release_budget(c, &req);
1214 return err;
1215}
1216
1217static struct vm_operations_struct ubifs_file_vm_ops = {
1218 .fault = filemap_fault,
1219 .page_mkwrite = ubifs_vm_page_mkwrite,
1220};
1221
1222static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1223{
1224 int err;
1225
1226 /* 'generic_file_mmap()' takes care of NOMMU case */
1227 err = generic_file_mmap(file, vma);
1228 if (err)
1229 return err;
1230 vma->vm_ops = &ubifs_file_vm_ops;
1231 return 0;
1232}
1233
1234struct address_space_operations ubifs_file_address_operations = {
1235 .readpage = ubifs_readpage,
1236 .writepage = ubifs_writepage,
1237 .write_begin = ubifs_write_begin,
1238 .write_end = ubifs_write_end,
1239 .invalidatepage = ubifs_invalidatepage,
1240 .set_page_dirty = ubifs_set_page_dirty,
1241 .releasepage = ubifs_releasepage,
1242};
1243
1244struct inode_operations ubifs_file_inode_operations = {
1245 .setattr = ubifs_setattr,
1246 .getattr = ubifs_getattr,
1247#ifdef CONFIG_UBIFS_FS_XATTR
1248 .setxattr = ubifs_setxattr,
1249 .getxattr = ubifs_getxattr,
1250 .listxattr = ubifs_listxattr,
1251 .removexattr = ubifs_removexattr,
1252#endif
1253};
1254
1255struct inode_operations ubifs_symlink_inode_operations = {
1256 .readlink = generic_readlink,
1257 .follow_link = ubifs_follow_link,
1258 .setattr = ubifs_setattr,
1259 .getattr = ubifs_getattr,
1260};
1261
1262struct file_operations ubifs_file_operations = {
1263 .llseek = generic_file_llseek,
1264 .read = do_sync_read,
1265 .write = do_sync_write,
1266 .aio_read = generic_file_aio_read,
1267 .aio_write = ubifs_aio_write,
1268 .mmap = ubifs_file_mmap,
1269 .fsync = ubifs_fsync,
1270 .unlocked_ioctl = ubifs_ioctl,
1271 .splice_read = generic_file_splice_read,
1272#ifdef CONFIG_COMPAT
1273 .compat_ioctl = ubifs_compat_ioctl,
1274#endif
1275};
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
new file mode 100644
index 000000000000..10394c548367
--- /dev/null
+++ b/fs/ubifs/find.c
@@ -0,0 +1,975 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file contains functions for finding LEBs for various purposes e.g.
25 * garbage collection. In general, lprops category heaps and lists are used
26 * for fast access, falling back on scanning the LPT as a last resort.
27 */
28
29#include <linux/sort.h>
30#include "ubifs.h"
31
32/**
33 * struct scan_data - data provided to scan callback functions
34 * @min_space: minimum number of bytes for which to scan
35 * @pick_free: whether it is OK to scan for empty LEBs
36 * @lnum: LEB number found is returned here
37 * @exclude_index: whether to exclude index LEBs
38 */
39struct scan_data {
40 int min_space;
41 int pick_free;
42 int lnum;
43 int exclude_index;
44};
45
46/**
47 * valuable - determine whether LEB properties are valuable.
48 * @c: the UBIFS file-system description object
49 * @lprops: LEB properties
50 *
51 * This function return %1 if the LEB properties should be added to the LEB
52 * properties tree in memory. Otherwise %0 is returned.
53 */
54static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops)
55{
56 int n, cat = lprops->flags & LPROPS_CAT_MASK;
57 struct ubifs_lpt_heap *heap;
58
59 switch (cat) {
60 case LPROPS_DIRTY:
61 case LPROPS_DIRTY_IDX:
62 case LPROPS_FREE:
63 heap = &c->lpt_heap[cat - 1];
64 if (heap->cnt < heap->max_cnt)
65 return 1;
66 if (lprops->free + lprops->dirty >= c->dark_wm)
67 return 1;
68 return 0;
69 case LPROPS_EMPTY:
70 n = c->lst.empty_lebs + c->freeable_cnt -
71 c->lst.taken_empty_lebs;
72 if (n < c->lsave_cnt)
73 return 1;
74 return 0;
75 case LPROPS_FREEABLE:
76 return 1;
77 case LPROPS_FRDI_IDX:
78 return 1;
79 }
80 return 0;
81}
82
83/**
84 * scan_for_dirty_cb - dirty space scan callback.
85 * @c: the UBIFS file-system description object
86 * @lprops: LEB properties to scan
87 * @in_tree: whether the LEB properties are in main memory
88 * @data: information passed to and from the caller of the scan
89 *
90 * This function returns a code that indicates whether the scan should continue
91 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
92 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
93 * (%LPT_SCAN_STOP).
94 */
95static int scan_for_dirty_cb(struct ubifs_info *c,
96 const struct ubifs_lprops *lprops, int in_tree,
97 struct scan_data *data)
98{
99 int ret = LPT_SCAN_CONTINUE;
100
101 /* Exclude LEBs that are currently in use */
102 if (lprops->flags & LPROPS_TAKEN)
103 return LPT_SCAN_CONTINUE;
104 /* Determine whether to add these LEB properties to the tree */
105 if (!in_tree && valuable(c, lprops))
106 ret |= LPT_SCAN_ADD;
107 /* Exclude LEBs with too little space */
108 if (lprops->free + lprops->dirty < data->min_space)
109 return ret;
110 /* If specified, exclude index LEBs */
111 if (data->exclude_index && lprops->flags & LPROPS_INDEX)
112 return ret;
113 /* If specified, exclude empty or freeable LEBs */
114 if (lprops->free + lprops->dirty == c->leb_size) {
115 if (!data->pick_free)
116 return ret;
117 /* Exclude LEBs with too little dirty space (unless it is empty) */
118 } else if (lprops->dirty < c->dead_wm)
119 return ret;
120 /* Finally we found space */
121 data->lnum = lprops->lnum;
122 return LPT_SCAN_ADD | LPT_SCAN_STOP;
123}
124
125/**
126 * scan_for_dirty - find a data LEB with free space.
127 * @c: the UBIFS file-system description object
128 * @min_space: minimum amount free plus dirty space the returned LEB has to
129 * have
130 * @pick_free: if it is OK to return a free or freeable LEB
131 * @exclude_index: whether to exclude index LEBs
132 *
133 * This function returns a pointer to the LEB properties found or a negative
134 * error code.
135 */
136static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
137 int min_space, int pick_free,
138 int exclude_index)
139{
140 const struct ubifs_lprops *lprops;
141 struct ubifs_lpt_heap *heap;
142 struct scan_data data;
143 int err, i;
144
145 /* There may be an LEB with enough dirty space on the free heap */
146 heap = &c->lpt_heap[LPROPS_FREE - 1];
147 for (i = 0; i < heap->cnt; i++) {
148 lprops = heap->arr[i];
149 if (lprops->free + lprops->dirty < min_space)
150 continue;
151 if (lprops->dirty < c->dead_wm)
152 continue;
153 return lprops;
154 }
155 /*
156 * A LEB may have fallen off of the bottom of the dirty heap, and ended
157 * up as uncategorized even though it has enough dirty space for us now,
158 * so check the uncategorized list. N.B. neither empty nor freeable LEBs
159 * can end up as uncategorized because they are kept on lists not
160 * finite-sized heaps.
161 */
162 list_for_each_entry(lprops, &c->uncat_list, list) {
163 if (lprops->flags & LPROPS_TAKEN)
164 continue;
165 if (lprops->free + lprops->dirty < min_space)
166 continue;
167 if (exclude_index && (lprops->flags & LPROPS_INDEX))
168 continue;
169 if (lprops->dirty < c->dead_wm)
170 continue;
171 return lprops;
172 }
173 /* We have looked everywhere in main memory, now scan the flash */
174 if (c->pnodes_have >= c->pnode_cnt)
175 /* All pnodes are in memory, so skip scan */
176 return ERR_PTR(-ENOSPC);
177 data.min_space = min_space;
178 data.pick_free = pick_free;
179 data.lnum = -1;
180 data.exclude_index = exclude_index;
181 err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
182 (ubifs_lpt_scan_callback)scan_for_dirty_cb,
183 &data);
184 if (err)
185 return ERR_PTR(err);
186 ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
187 c->lscan_lnum = data.lnum;
188 lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
189 if (IS_ERR(lprops))
190 return lprops;
191 ubifs_assert(lprops->lnum == data.lnum);
192 ubifs_assert(lprops->free + lprops->dirty >= min_space);
193 ubifs_assert(lprops->dirty >= c->dead_wm ||
194 (pick_free &&
195 lprops->free + lprops->dirty == c->leb_size));
196 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
197 ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX));
198 return lprops;
199}
200
201/**
202 * ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector.
203 * @c: the UBIFS file-system description object
204 * @ret_lp: LEB properties are returned here on exit
205 * @min_space: minimum amount free plus dirty space the returned LEB has to
206 * have
207 * @pick_free: controls whether it is OK to pick empty or index LEBs
208 *
209 * This function tries to find a dirty logical eraseblock which has at least
210 * @min_space free and dirty space. It prefers to take an LEB from the dirty or
211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty
212 * or do not have an LEB which satisfies the @min_space criteria.
213 *
214 * Note:
215 * o LEBs which have less than dead watermark of dirty space are never picked
216 * by this function;
217 *
218 * Returns zero and the LEB properties of
219 * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
220 * negative error code in case of other failures. The returned LEB is marked as
221 * "taken".
222 *
223 * The additional @pick_free argument controls if this function has to return a
224 * free or freeable LEB if one is present. For example, GC must to set it to %1,
225 * when called from the journal space reservation function, because the
226 * appearance of free space may coincide with the loss of enough dirty space
227 * for GC to succeed anyway.
228 *
229 * In contrast, if the Garbage Collector is called from budgeting, it should
230 * just make free space, not return LEBs which are already free or freeable.
231 *
232 * In addition @pick_free is set to %2 by the recovery process in order to
233 * recover gc_lnum in which case an index LEB must not be returned.
234 */
235int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
236 int min_space, int pick_free)
237{
238 int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0;
239 const struct ubifs_lprops *lp = NULL, *idx_lp = NULL;
240 struct ubifs_lpt_heap *heap, *idx_heap;
241
242 ubifs_get_lprops(c);
243
244 if (pick_free) {
245 int lebs, rsvd_idx_lebs = 0;
246
247 spin_lock(&c->space_lock);
248 lebs = c->lst.empty_lebs;
249 lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
250
251 /*
252 * Note, the index may consume more LEBs than have been reserved
253 * for it. It is OK because it might be consolidated by GC.
254 * But if the index takes fewer LEBs than it is reserved for it,
255 * this function must avoid picking those reserved LEBs.
256 */
257 if (c->min_idx_lebs >= c->lst.idx_lebs) {
258 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
259 exclude_index = 1;
260 }
261 spin_unlock(&c->space_lock);
262
263 /* Check if there are enough free LEBs for the index */
264 if (rsvd_idx_lebs < lebs) {
265 /* OK, try to find an empty LEB */
266 lp = ubifs_fast_find_empty(c);
267 if (lp)
268 goto found;
269
270 /* Or a freeable LEB */
271 lp = ubifs_fast_find_freeable(c);
272 if (lp)
273 goto found;
274 } else
275 /*
276 * We cannot pick free/freeable LEBs in the below code.
277 */
278 pick_free = 0;
279 } else {
280 spin_lock(&c->space_lock);
281 exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
282 spin_unlock(&c->space_lock);
283 }
284
285 /* Look on the dirty and dirty index heaps */
286 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
287 idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
288
289 if (idx_heap->cnt && !exclude_index) {
290 idx_lp = idx_heap->arr[0];
291 sum = idx_lp->free + idx_lp->dirty;
292 /*
293 * Since we reserve twice as more space for the index than it
294 * actually takes, it does not make sense to pick indexing LEBs
295 * with less than half LEB of dirty space.
296 */
297 if (sum < min_space || sum < c->half_leb_size)
298 idx_lp = NULL;
299 }
300
301 if (heap->cnt) {
302 lp = heap->arr[0];
303 if (lp->dirty + lp->free < min_space)
304 lp = NULL;
305 }
306
307 /* Pick the LEB with most space */
308 if (idx_lp && lp) {
309 if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty)
310 lp = idx_lp;
311 } else if (idx_lp && !lp)
312 lp = idx_lp;
313
314 if (lp) {
315 ubifs_assert(lp->dirty >= c->dead_wm);
316 goto found;
317 }
318
319 /* Did not find a dirty LEB on the dirty heaps, have to scan */
320 dbg_find("scanning LPT for a dirty LEB");
321 lp = scan_for_dirty(c, min_space, pick_free, exclude_index);
322 if (IS_ERR(lp)) {
323 err = PTR_ERR(lp);
324 goto out;
325 }
326 ubifs_assert(lp->dirty >= c->dead_wm ||
327 (pick_free && lp->free + lp->dirty == c->leb_size));
328
329found:
330 dbg_find("found LEB %d, free %d, dirty %d, flags %#x",
331 lp->lnum, lp->free, lp->dirty, lp->flags);
332
333 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
334 lp->flags | LPROPS_TAKEN, 0);
335 if (IS_ERR(lp)) {
336 err = PTR_ERR(lp);
337 goto out;
338 }
339
340 memcpy(ret_lp, lp, sizeof(struct ubifs_lprops));
341
342out:
343 ubifs_release_lprops(c);
344 return err;
345}
346
347/**
348 * scan_for_free_cb - free space scan callback.
349 * @c: the UBIFS file-system description object
350 * @lprops: LEB properties to scan
351 * @in_tree: whether the LEB properties are in main memory
352 * @data: information passed to and from the caller of the scan
353 *
354 * This function returns a code that indicates whether the scan should continue
355 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
356 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
357 * (%LPT_SCAN_STOP).
358 */
359static int scan_for_free_cb(struct ubifs_info *c,
360 const struct ubifs_lprops *lprops, int in_tree,
361 struct scan_data *data)
362{
363 int ret = LPT_SCAN_CONTINUE;
364
365 /* Exclude LEBs that are currently in use */
366 if (lprops->flags & LPROPS_TAKEN)
367 return LPT_SCAN_CONTINUE;
368 /* Determine whether to add these LEB properties to the tree */
369 if (!in_tree && valuable(c, lprops))
370 ret |= LPT_SCAN_ADD;
371 /* Exclude index LEBs */
372 if (lprops->flags & LPROPS_INDEX)
373 return ret;
374 /* Exclude LEBs with too little space */
375 if (lprops->free < data->min_space)
376 return ret;
377 /* If specified, exclude empty LEBs */
378 if (!data->pick_free && lprops->free == c->leb_size)
379 return ret;
380 /*
381 * LEBs that have only free and dirty space must not be allocated
382 * because they may have been unmapped already or they may have data
383 * that is obsolete only because of nodes that are still sitting in a
384 * wbuf.
385 */
386 if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0)
387 return ret;
388 /* Finally we found space */
389 data->lnum = lprops->lnum;
390 return LPT_SCAN_ADD | LPT_SCAN_STOP;
391}
392
393/**
394 * do_find_free_space - find a data LEB with free space.
395 * @c: the UBIFS file-system description object
396 * @min_space: minimum amount of free space required
397 * @pick_free: whether it is OK to scan for empty LEBs
398 * @squeeze: whether to try to find space in a non-empty LEB first
399 *
400 * This function returns a pointer to the LEB properties found or a negative
401 * error code.
402 */
403static
404const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
405 int min_space, int pick_free,
406 int squeeze)
407{
408 const struct ubifs_lprops *lprops;
409 struct ubifs_lpt_heap *heap;
410 struct scan_data data;
411 int err, i;
412
413 if (squeeze) {
414 lprops = ubifs_fast_find_free(c);
415 if (lprops && lprops->free >= min_space)
416 return lprops;
417 }
418 if (pick_free) {
419 lprops = ubifs_fast_find_empty(c);
420 if (lprops)
421 return lprops;
422 }
423 if (!squeeze) {
424 lprops = ubifs_fast_find_free(c);
425 if (lprops && lprops->free >= min_space)
426 return lprops;
427 }
428 /* There may be an LEB with enough free space on the dirty heap */
429 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
430 for (i = 0; i < heap->cnt; i++) {
431 lprops = heap->arr[i];
432 if (lprops->free >= min_space)
433 return lprops;
434 }
435 /*
436 * A LEB may have fallen off of the bottom of the free heap, and ended
437 * up as uncategorized even though it has enough free space for us now,
438 * so check the uncategorized list. N.B. neither empty nor freeable LEBs
439 * can end up as uncategorized because they are kept on lists not
440 * finite-sized heaps.
441 */
442 list_for_each_entry(lprops, &c->uncat_list, list) {
443 if (lprops->flags & LPROPS_TAKEN)
444 continue;
445 if (lprops->flags & LPROPS_INDEX)
446 continue;
447 if (lprops->free >= min_space)
448 return lprops;
449 }
450 /* We have looked everywhere in main memory, now scan the flash */
451 if (c->pnodes_have >= c->pnode_cnt)
452 /* All pnodes are in memory, so skip scan */
453 return ERR_PTR(-ENOSPC);
454 data.min_space = min_space;
455 data.pick_free = pick_free;
456 data.lnum = -1;
457 err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
458 (ubifs_lpt_scan_callback)scan_for_free_cb,
459 &data);
460 if (err)
461 return ERR_PTR(err);
462 ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
463 c->lscan_lnum = data.lnum;
464 lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
465 if (IS_ERR(lprops))
466 return lprops;
467 ubifs_assert(lprops->lnum == data.lnum);
468 ubifs_assert(lprops->free >= min_space);
469 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
470 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
471 return lprops;
472}
473
474/**
475 * ubifs_find_free_space - find a data LEB with free space.
476 * @c: the UBIFS file-system description object
477 * @min_space: minimum amount of required free space
478 * @free: contains amount of free space in the LEB on exit
479 * @squeeze: whether to try to find space in a non-empty LEB first
480 *
481 * This function looks for an LEB with at least @min_space bytes of free space.
482 * It tries to find an empty LEB if possible. If no empty LEBs are available,
483 * this function searches for a non-empty data LEB. The returned LEB is marked
484 * as "taken".
485 *
486 * This function returns found LEB number in case of success, %-ENOSPC if it
487 * failed to find a LEB with @min_space bytes of free space and other a negative
488 * error codes in case of failure.
489 */
490int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
491 int squeeze)
492{
493 const struct ubifs_lprops *lprops;
494 int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags;
495
496 dbg_find("min_space %d", min_space);
497 ubifs_get_lprops(c);
498
499 /* Check if there are enough empty LEBs for commit */
500 spin_lock(&c->space_lock);
501 if (c->min_idx_lebs > c->lst.idx_lebs)
502 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
503 else
504 rsvd_idx_lebs = 0;
505 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
506 c->lst.taken_empty_lebs;
507 ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs);
508 if (rsvd_idx_lebs < lebs)
509 /*
510 * OK to allocate an empty LEB, but we still don't want to go
511 * looking for one if there aren't any.
512 */
513 if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
514 pick_free = 1;
515 /*
516 * Because we release the space lock, we must account
517 * for this allocation here. After the LEB properties
518 * flags have been updated, we subtract one. Note, the
519 * result of this is that lprops also decreases
520 * @taken_empty_lebs in 'ubifs_change_lp()', so it is
521 * off by one for a short period of time which may
522 * introduce a small disturbance to budgeting
523 * calculations, but this is harmless because at the
524 * worst case this would make the budgeting subsystem
525 * be more pessimistic than needed.
526 *
527 * Fundamentally, this is about serialization of the
528 * budgeting and lprops subsystems. We could make the
529 * @space_lock a mutex and avoid dropping it before
530 * calling 'ubifs_change_lp()', but mutex is more
531 * heavy-weight, and we want budgeting to be as fast as
532 * possible.
533 */
534 c->lst.taken_empty_lebs += 1;
535 }
536 spin_unlock(&c->space_lock);
537
538 lprops = do_find_free_space(c, min_space, pick_free, squeeze);
539 if (IS_ERR(lprops)) {
540 err = PTR_ERR(lprops);
541 goto out;
542 }
543
544 lnum = lprops->lnum;
545 flags = lprops->flags | LPROPS_TAKEN;
546
547 lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0);
548 if (IS_ERR(lprops)) {
549 err = PTR_ERR(lprops);
550 goto out;
551 }
552
553 if (pick_free) {
554 spin_lock(&c->space_lock);
555 c->lst.taken_empty_lebs -= 1;
556 spin_unlock(&c->space_lock);
557 }
558
559 *free = lprops->free;
560 ubifs_release_lprops(c);
561
562 if (*free == c->leb_size) {
563 /*
564 * Ensure that empty LEBs have been unmapped. They may not have
565 * been, for example, because of an unclean unmount. Also
566 * LEBs that were freeable LEBs (free + dirty == leb_size) will
567 * not have been unmapped.
568 */
569 err = ubifs_leb_unmap(c, lnum);
570 if (err)
571 return err;
572 }
573
574 dbg_find("found LEB %d, free %d", lnum, *free);
575 ubifs_assert(*free >= min_space);
576 return lnum;
577
578out:
579 if (pick_free) {
580 spin_lock(&c->space_lock);
581 c->lst.taken_empty_lebs -= 1;
582 spin_unlock(&c->space_lock);
583 }
584 ubifs_release_lprops(c);
585 return err;
586}
587
588/**
589 * scan_for_idx_cb - callback used by the scan for a free LEB for the index.
590 * @c: the UBIFS file-system description object
591 * @lprops: LEB properties to scan
592 * @in_tree: whether the LEB properties are in main memory
593 * @data: information passed to and from the caller of the scan
594 *
595 * This function returns a code that indicates whether the scan should continue
596 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
597 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
598 * (%LPT_SCAN_STOP).
599 */
600static int scan_for_idx_cb(struct ubifs_info *c,
601 const struct ubifs_lprops *lprops, int in_tree,
602 struct scan_data *data)
603{
604 int ret = LPT_SCAN_CONTINUE;
605
606 /* Exclude LEBs that are currently in use */
607 if (lprops->flags & LPROPS_TAKEN)
608 return LPT_SCAN_CONTINUE;
609 /* Determine whether to add these LEB properties to the tree */
610 if (!in_tree && valuable(c, lprops))
611 ret |= LPT_SCAN_ADD;
612 /* Exclude index LEBS */
613 if (lprops->flags & LPROPS_INDEX)
614 return ret;
615 /* Exclude LEBs that cannot be made empty */
616 if (lprops->free + lprops->dirty != c->leb_size)
617 return ret;
618 /*
619 * We are allocating for the index so it is safe to allocate LEBs with
620 * only free and dirty space, because write buffers are sync'd at commit
621 * start.
622 */
623 data->lnum = lprops->lnum;
624 return LPT_SCAN_ADD | LPT_SCAN_STOP;
625}
626
627/**
628 * scan_for_leb_for_idx - scan for a free LEB for the index.
629 * @c: the UBIFS file-system description object
630 */
631static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
632{
633 struct ubifs_lprops *lprops;
634 struct scan_data data;
635 int err;
636
637 data.lnum = -1;
638 err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
639 (ubifs_lpt_scan_callback)scan_for_idx_cb,
640 &data);
641 if (err)
642 return ERR_PTR(err);
643 ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
644 c->lscan_lnum = data.lnum;
645 lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
646 if (IS_ERR(lprops))
647 return lprops;
648 ubifs_assert(lprops->lnum == data.lnum);
649 ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
650 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
651 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
652 return lprops;
653}
654
655/**
656 * ubifs_find_free_leb_for_idx - find a free LEB for the index.
657 * @c: the UBIFS file-system description object
658 *
659 * This function looks for a free LEB and returns that LEB number. The returned
660 * LEB is marked as "taken", "index".
661 *
662 * Only empty LEBs are allocated. This is for two reasons. First, the commit
663 * calculates the number of LEBs to allocate based on the assumption that they
664 * will be empty. Secondly, free space at the end of an index LEB is not
665 * guaranteed to be empty because it may have been used by the in-the-gaps
666 * method prior to an unclean unmount.
667 *
668 * If no LEB is found %-ENOSPC is returned. For other failures another negative
669 * error code is returned.
670 */
671int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
672{
673 const struct ubifs_lprops *lprops;
674 int lnum = -1, err, flags;
675
676 ubifs_get_lprops(c);
677
678 lprops = ubifs_fast_find_empty(c);
679 if (!lprops) {
680 lprops = ubifs_fast_find_freeable(c);
681 if (!lprops) {
682 ubifs_assert(c->freeable_cnt == 0);
683 if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
684 lprops = scan_for_leb_for_idx(c);
685 if (IS_ERR(lprops)) {
686 err = PTR_ERR(lprops);
687 goto out;
688 }
689 }
690 }
691 }
692
693 if (!lprops) {
694 err = -ENOSPC;
695 goto out;
696 }
697
698 lnum = lprops->lnum;
699
700 dbg_find("found LEB %d, free %d, dirty %d, flags %#x",
701 lnum, lprops->free, lprops->dirty, lprops->flags);
702
703 flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX;
704 lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0);
705 if (IS_ERR(lprops)) {
706 err = PTR_ERR(lprops);
707 goto out;
708 }
709
710 ubifs_release_lprops(c);
711
712 /*
713 * Ensure that empty LEBs have been unmapped. They may not have been,
714 * for example, because of an unclean unmount. Also LEBs that were
715 * freeable LEBs (free + dirty == leb_size) will not have been unmapped.
716 */
717 err = ubifs_leb_unmap(c, lnum);
718 if (err) {
719 ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
720 LPROPS_TAKEN | LPROPS_INDEX, 0);
721 return err;
722 }
723
724 return lnum;
725
726out:
727 ubifs_release_lprops(c);
728 return err;
729}
730
731static int cmp_dirty_idx(const struct ubifs_lprops **a,
732 const struct ubifs_lprops **b)
733{
734 const struct ubifs_lprops *lpa = *a;
735 const struct ubifs_lprops *lpb = *b;
736
737 return lpa->dirty + lpa->free - lpb->dirty - lpb->free;
738}
739
740static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b,
741 int size)
742{
743 struct ubifs_lprops *t = *a;
744
745 *a = *b;
746 *b = t;
747}
748
749/**
750 * ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos.
751 * @c: the UBIFS file-system description object
752 *
753 * This function is called each commit to create an array of LEB numbers of
754 * dirty index LEBs sorted in order of dirty and free space. This is used by
755 * the in-the-gaps method of TNC commit.
756 */
757int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
758{
759 int i;
760
761 ubifs_get_lprops(c);
762 /* Copy the LPROPS_DIRTY_IDX heap */
763 c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt;
764 memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr,
765 sizeof(void *) * c->dirty_idx.cnt);
766 /* Sort it so that the dirtiest is now at the end */
767 sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *),
768 (int (*)(const void *, const void *))cmp_dirty_idx,
769 (void (*)(void *, void *, int))swap_dirty_idx);
770 dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt);
771 if (c->dirty_idx.cnt)
772 dbg_find("dirtiest index LEB is %d with dirty %d and free %d",
773 c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum,
774 c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty,
775 c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free);
776 /* Replace the lprops pointers with LEB numbers */
777 for (i = 0; i < c->dirty_idx.cnt; i++)
778 c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum;
779 ubifs_release_lprops(c);
780 return 0;
781}
782
783/**
784 * scan_dirty_idx_cb - callback used by the scan for a dirty index LEB.
785 * @c: the UBIFS file-system description object
786 * @lprops: LEB properties to scan
787 * @in_tree: whether the LEB properties are in main memory
788 * @data: information passed to and from the caller of the scan
789 *
790 * This function returns a code that indicates whether the scan should continue
791 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
792 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
793 * (%LPT_SCAN_STOP).
794 */
795static int scan_dirty_idx_cb(struct ubifs_info *c,
796 const struct ubifs_lprops *lprops, int in_tree,
797 struct scan_data *data)
798{
799 int ret = LPT_SCAN_CONTINUE;
800
801 /* Exclude LEBs that are currently in use */
802 if (lprops->flags & LPROPS_TAKEN)
803 return LPT_SCAN_CONTINUE;
804 /* Determine whether to add these LEB properties to the tree */
805 if (!in_tree && valuable(c, lprops))
806 ret |= LPT_SCAN_ADD;
807 /* Exclude non-index LEBs */
808 if (!(lprops->flags & LPROPS_INDEX))
809 return ret;
810 /* Exclude LEBs with too little space */
811 if (lprops->free + lprops->dirty < c->min_idx_node_sz)
812 return ret;
813 /* Finally we found space */
814 data->lnum = lprops->lnum;
815 return LPT_SCAN_ADD | LPT_SCAN_STOP;
816}
817
818/**
819 * find_dirty_idx_leb - find a dirty index LEB.
820 * @c: the UBIFS file-system description object
821 *
822 * This function returns LEB number upon success and a negative error code upon
823 * failure. In particular, -ENOSPC is returned if a dirty index LEB is not
824 * found.
825 *
826 * Note that this function scans the entire LPT but it is called very rarely.
827 */
828static int find_dirty_idx_leb(struct ubifs_info *c)
829{
830 const struct ubifs_lprops *lprops;
831 struct ubifs_lpt_heap *heap;
832 struct scan_data data;
833 int err, i, ret;
834
835 /* Check all structures in memory first */
836 data.lnum = -1;
837 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
838 for (i = 0; i < heap->cnt; i++) {
839 lprops = heap->arr[i];
840 ret = scan_dirty_idx_cb(c, lprops, 1, &data);
841 if (ret & LPT_SCAN_STOP)
842 goto found;
843 }
844 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
845 ret = scan_dirty_idx_cb(c, lprops, 1, &data);
846 if (ret & LPT_SCAN_STOP)
847 goto found;
848 }
849 list_for_each_entry(lprops, &c->uncat_list, list) {
850 ret = scan_dirty_idx_cb(c, lprops, 1, &data);
851 if (ret & LPT_SCAN_STOP)
852 goto found;
853 }
854 if (c->pnodes_have >= c->pnode_cnt)
855 /* All pnodes are in memory, so skip scan */
856 return -ENOSPC;
857 err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
858 (ubifs_lpt_scan_callback)scan_dirty_idx_cb,
859 &data);
860 if (err)
861 return err;
862found:
863 ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
864 c->lscan_lnum = data.lnum;
865 lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
866 if (IS_ERR(lprops))
867 return PTR_ERR(lprops);
868 ubifs_assert(lprops->lnum == data.lnum);
869 ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz);
870 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
871 ubifs_assert((lprops->flags & LPROPS_INDEX));
872
873 dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x",
874 lprops->lnum, lprops->free, lprops->dirty, lprops->flags);
875
876 lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC,
877 lprops->flags | LPROPS_TAKEN, 0);
878 if (IS_ERR(lprops))
879 return PTR_ERR(lprops);
880
881 return lprops->lnum;
882}
883
884/**
885 * get_idx_gc_leb - try to get a LEB number from trivial GC.
886 * @c: the UBIFS file-system description object
887 */
888static int get_idx_gc_leb(struct ubifs_info *c)
889{
890 const struct ubifs_lprops *lp;
891 int err, lnum;
892
893 err = ubifs_get_idx_gc_leb(c);
894 if (err < 0)
895 return err;
896 lnum = err;
897 /*
898 * The LEB was due to be unmapped after the commit but
899 * it is needed now for this commit.
900 */
901 lp = ubifs_lpt_lookup_dirty(c, lnum);
902 if (unlikely(IS_ERR(lp)))
903 return PTR_ERR(lp);
904 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
905 lp->flags | LPROPS_INDEX, -1);
906 if (unlikely(IS_ERR(lp)))
907 return PTR_ERR(lp);
908 dbg_find("LEB %d, dirty %d and free %d flags %#x",
909 lp->lnum, lp->dirty, lp->free, lp->flags);
910 return lnum;
911}
912
913/**
914 * find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array.
915 * @c: the UBIFS file-system description object
916 */
917static int find_dirtiest_idx_leb(struct ubifs_info *c)
918{
919 const struct ubifs_lprops *lp;
920 int lnum;
921
922 while (1) {
923 if (!c->dirty_idx.cnt)
924 return -ENOSPC;
925 /* The lprops pointers were replaced by LEB numbers */
926 lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt];
927 lp = ubifs_lpt_lookup(c, lnum);
928 if (IS_ERR(lp))
929 return PTR_ERR(lp);
930 if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX))
931 continue;
932 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
933 lp->flags | LPROPS_TAKEN, 0);
934 if (IS_ERR(lp))
935 return PTR_ERR(lp);
936 break;
937 }
938 dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
939 lp->free, lp->flags);
940 ubifs_assert(lp->flags | LPROPS_TAKEN);
941 ubifs_assert(lp->flags | LPROPS_INDEX);
942 return lnum;
943}
944
945/**
946 * ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit.
947 * @c: the UBIFS file-system description object
948 *
949 * This function attempts to find an untaken index LEB with the most free and
950 * dirty space that can be used without overwriting index nodes that were in the
951 * last index committed.
952 */
953int ubifs_find_dirty_idx_leb(struct ubifs_info *c)
954{
955 int err;
956
957 ubifs_get_lprops(c);
958
959 /*
960 * We made an array of the dirtiest index LEB numbers as at the start of
961 * last commit. Try that array first.
962 */
963 err = find_dirtiest_idx_leb(c);
964
965 /* Next try scanning the entire LPT */
966 if (err == -ENOSPC)
967 err = find_dirty_idx_leb(c);
968
969 /* Finally take any index LEBs awaiting trivial GC */
970 if (err == -ENOSPC)
971 err = get_idx_gc_leb(c);
972
973 ubifs_release_lprops(c);
974 return err;
975}
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
new file mode 100644
index 000000000000..d0f3dac29081
--- /dev/null
+++ b/fs/ubifs/gc.c
@@ -0,0 +1,773 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements garbage collection. The procedure for garbage collection
25 * is different depending on whether a LEB as an index LEB (contains index
26 * nodes) or not. For non-index LEBs, garbage collection finds a LEB which
27 * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete
28 * nodes to the journal, at which point the garbage-collected LEB is free to be
29 * reused. For index LEBs, garbage collection marks the non-obsolete index nodes
30 * dirty in the TNC, and after the next commit, the garbage-collected LEB is
31 * to be reused. Garbage collection will cause the number of dirty index nodes
32 * to grow, however sufficient space is reserved for the index to ensure the
33 * commit will never run out of space.
34 */
35
36#include <linux/pagemap.h>
37#include "ubifs.h"
38
39/*
40 * GC tries to optimize the way it fit nodes to available space, and it sorts
41 * nodes a little. The below constants are watermarks which define "large",
42 * "medium", and "small" nodes.
43 */
44#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
45#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ
46
47/*
48 * GC may need to move more then one LEB to make progress. The below constants
49 * define "soft" and "hard" limits on the number of LEBs the garbage collector
50 * may move.
51 */
52#define SOFT_LEBS_LIMIT 4
53#define HARD_LEBS_LIMIT 32
54
55/**
56 * switch_gc_head - switch the garbage collection journal head.
57 * @c: UBIFS file-system description object
58 * @buf: buffer to write
59 * @len: length of the buffer to write
60 * @lnum: LEB number written is returned here
61 * @offs: offset written is returned here
62 *
63 * This function switch the GC head to the next LEB which is reserved in
64 * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required,
65 * and other negative error code in case of failures.
66 */
67static int switch_gc_head(struct ubifs_info *c)
68{
69 int err, gc_lnum = c->gc_lnum;
70 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
71
72 ubifs_assert(gc_lnum != -1);
73 dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)",
74 wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum,
75 c->leb_size - wbuf->offs - wbuf->used);
76
77 err = ubifs_wbuf_sync_nolock(wbuf);
78 if (err)
79 return err;
80
81 /*
82 * The GC write-buffer was synchronized, we may safely unmap
83 * 'c->gc_lnum'.
84 */
85 err = ubifs_leb_unmap(c, gc_lnum);
86 if (err)
87 return err;
88
89 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
90 if (err)
91 return err;
92
93 c->gc_lnum = -1;
94 err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM);
95 return err;
96}
97
98/**
99 * move_nodes - move nodes.
100 * @c: UBIFS file-system description object
101 * @sleb: describes nodes to move
102 *
103 * This function moves valid nodes from data LEB described by @sleb to the GC
104 * journal head. The obsolete nodes are dropped.
105 *
106 * When moving nodes we have to deal with classical bin-packing problem: the
107 * space in the current GC journal head LEB and in @c->gc_lnum are the "bins",
108 * where the nodes in the @sleb->nodes list are the elements which should be
109 * fit optimally to the bins. This function uses the "first fit decreasing"
110 * strategy, although it does not really sort the nodes but just split them on
111 * 3 classes - large, medium, and small, so they are roughly sorted.
112 *
113 * This function returns zero in case of success, %-EAGAIN if commit is
114 * required, and other negative error codes in case of other failures.
115 */
116static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
117{
118 struct ubifs_scan_node *snod, *tmp;
119 struct list_head large, medium, small;
120 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
121 int avail, err, min = INT_MAX;
122
123 INIT_LIST_HEAD(&large);
124 INIT_LIST_HEAD(&medium);
125 INIT_LIST_HEAD(&small);
126
127 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
128 struct list_head *lst;
129
130 ubifs_assert(snod->type != UBIFS_IDX_NODE);
131 ubifs_assert(snod->type != UBIFS_REF_NODE);
132 ubifs_assert(snod->type != UBIFS_CS_NODE);
133
134 err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
135 snod->offs, 0);
136 if (err < 0)
137 goto out;
138
139 lst = &snod->list;
140 list_del(lst);
141 if (!err) {
142 /* The node is obsolete, remove it from the list */
143 kfree(snod);
144 continue;
145 }
146
147 /*
148 * Sort the list of nodes so that large nodes go first, and
149 * small nodes go last.
150 */
151 if (snod->len > MEDIUM_NODE_WM)
152 list_add(lst, &large);
153 else if (snod->len > SMALL_NODE_WM)
154 list_add(lst, &medium);
155 else
156 list_add(lst, &small);
157
158 /* And find the smallest node */
159 if (snod->len < min)
160 min = snod->len;
161 }
162
163 /*
164 * Join the tree lists so that we'd have one roughly sorted list
165 * ('large' will be the head of the joined list).
166 */
167 list_splice(&medium, large.prev);
168 list_splice(&small, large.prev);
169
170 if (wbuf->lnum == -1) {
171 /*
172 * The GC journal head is not set, because it is the first GC
173 * invocation since mount.
174 */
175 err = switch_gc_head(c);
176 if (err)
177 goto out;
178 }
179
180 /* Write nodes to their new location. Use the first-fit strategy */
181 while (1) {
182 avail = c->leb_size - wbuf->offs - wbuf->used;
183 list_for_each_entry_safe(snod, tmp, &large, list) {
184 int new_lnum, new_offs;
185
186 if (avail < min)
187 break;
188
189 if (snod->len > avail)
190 /* This node does not fit */
191 continue;
192
193 cond_resched();
194
195 new_lnum = wbuf->lnum;
196 new_offs = wbuf->offs + wbuf->used;
197 err = ubifs_wbuf_write_nolock(wbuf, snod->node,
198 snod->len);
199 if (err)
200 goto out;
201 err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
202 snod->offs, new_lnum, new_offs,
203 snod->len);
204 if (err)
205 goto out;
206
207 avail = c->leb_size - wbuf->offs - wbuf->used;
208 list_del(&snod->list);
209 kfree(snod);
210 }
211
212 if (list_empty(&large))
213 break;
214
215 /*
216 * Waste the rest of the space in the LEB and switch to the
217 * next LEB.
218 */
219 err = switch_gc_head(c);
220 if (err)
221 goto out;
222 }
223
224 return 0;
225
226out:
227 list_for_each_entry_safe(snod, tmp, &large, list) {
228 list_del(&snod->list);
229 kfree(snod);
230 }
231 return err;
232}
233
234/**
235 * gc_sync_wbufs - sync write-buffers for GC.
236 * @c: UBIFS file-system description object
237 *
238 * We must guarantee that obsoleting nodes are on flash. Unfortunately they may
239 * be in a write-buffer instead. That is, a node could be written to a
240 * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is
241 * erased before the write-buffer is sync'd and then there is an unclean
242 * unmount, then an existing node is lost. To avoid this, we sync all
243 * write-buffers.
244 *
245 * This function returns %0 on success or a negative error code on failure.
246 */
247static int gc_sync_wbufs(struct ubifs_info *c)
248{
249 int err, i;
250
251 for (i = 0; i < c->jhead_cnt; i++) {
252 if (i == GCHD)
253 continue;
254 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
255 if (err)
256 return err;
257 }
258 return 0;
259}
260
261/**
262 * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock.
263 * @c: UBIFS file-system description object
264 * @lp: describes the LEB to garbage collect
265 *
266 * This function garbage-collects an LEB and returns one of the @LEB_FREED,
267 * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is
268 * required, and other negative error codes in case of failures.
269 */
270int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
271{
272 struct ubifs_scan_leb *sleb;
273 struct ubifs_scan_node *snod;
274 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
275 int err = 0, lnum = lp->lnum;
276
277 ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 ||
278 c->need_recovery);
279 ubifs_assert(c->gc_lnum != lnum);
280 ubifs_assert(wbuf->lnum != lnum);
281
282 /*
283 * We scan the entire LEB even though we only really need to scan up to
284 * (c->leb_size - lp->free).
285 */
286 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
287 if (IS_ERR(sleb))
288 return PTR_ERR(sleb);
289
290 ubifs_assert(!list_empty(&sleb->nodes));
291 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
292
293 if (snod->type == UBIFS_IDX_NODE) {
294 struct ubifs_gced_idx_leb *idx_gc;
295
296 dbg_gc("indexing LEB %d (free %d, dirty %d)",
297 lnum, lp->free, lp->dirty);
298 list_for_each_entry(snod, &sleb->nodes, list) {
299 struct ubifs_idx_node *idx = snod->node;
300 int level = le16_to_cpu(idx->level);
301
302 ubifs_assert(snod->type == UBIFS_IDX_NODE);
303 key_read(c, ubifs_idx_key(c, idx), &snod->key);
304 err = ubifs_dirty_idx_node(c, &snod->key, level, lnum,
305 snod->offs);
306 if (err)
307 goto out;
308 }
309
310 idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
311 if (!idx_gc) {
312 err = -ENOMEM;
313 goto out;
314 }
315
316 idx_gc->lnum = lnum;
317 idx_gc->unmap = 0;
318 list_add(&idx_gc->list, &c->idx_gc);
319
320 /*
321 * Don't release the LEB until after the next commit, because
322 * it may contain date which is needed for recovery. So
323 * although we freed this LEB, it will become usable only after
324 * the commit.
325 */
326 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0,
327 LPROPS_INDEX, 1);
328 if (err)
329 goto out;
330 err = LEB_FREED_IDX;
331 } else {
332 dbg_gc("data LEB %d (free %d, dirty %d)",
333 lnum, lp->free, lp->dirty);
334
335 err = move_nodes(c, sleb);
336 if (err)
337 goto out;
338
339 err = gc_sync_wbufs(c);
340 if (err)
341 goto out;
342
343 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
344 if (err)
345 goto out;
346
347 if (c->gc_lnum == -1) {
348 c->gc_lnum = lnum;
349 err = LEB_RETAINED;
350 } else {
351 err = ubifs_wbuf_sync_nolock(wbuf);
352 if (err)
353 goto out;
354
355 err = ubifs_leb_unmap(c, lnum);
356 if (err)
357 goto out;
358
359 err = LEB_FREED;
360 }
361 }
362
363out:
364 ubifs_scan_destroy(sleb);
365 return err;
366}
367
368/**
369 * ubifs_garbage_collect - UBIFS garbage collector.
370 * @c: UBIFS file-system description object
371 * @anyway: do GC even if there are free LEBs
372 *
373 * This function does out-of-place garbage collection. The return codes are:
374 * o positive LEB number if the LEB has been freed and may be used;
375 * o %-EAGAIN if the caller has to run commit;
376 * o %-ENOSPC if GC failed to make any progress;
377 * o other negative error codes in case of other errors.
378 *
379 * Garbage collector writes data to the journal when GC'ing data LEBs, and just
380 * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point
381 * commit may be required. But commit cannot be run from inside GC, because the
382 * caller might be holding the commit lock, so %-EAGAIN is returned instead;
383 * And this error code means that the caller has to run commit, and re-run GC
384 * if there is still no free space.
385 *
386 * There are many reasons why this function may return %-EAGAIN:
387 * o the log is full and there is no space to write an LEB reference for
388 * @c->gc_lnum;
389 * o the journal is too large and exceeds size limitations;
390 * o GC moved indexing LEBs, but they can be used only after the commit;
391 * o the shrinker fails to find clean znodes to free and requests the commit;
392 * o etc.
393 *
394 * Note, if the file-system is close to be full, this function may return
395 * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of
396 * the function. E.g., this happens if the limits on the journal size are too
397 * tough and GC writes too much to the journal before an LEB is freed. This
398 * might also mean that the journal is too large, and the TNC becomes to big,
399 * so that the shrinker is constantly called, finds not clean znodes to free,
400 * and requests commit. Well, this may also happen if the journal is all right,
401 * but another kernel process consumes too much memory. Anyway, infinite
402 * %-EAGAIN may happen, but in some extreme/misconfiguration cases.
403 */
404int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
405{
406 int i, err, ret, min_space = c->dead_wm;
407 struct ubifs_lprops lp;
408 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
409
410 ubifs_assert_cmt_locked(c);
411
412 if (ubifs_gc_should_commit(c))
413 return -EAGAIN;
414
415 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
416
417 if (c->ro_media) {
418 ret = -EROFS;
419 goto out_unlock;
420 }
421
422 /* We expect the write-buffer to be empty on entry */
423 ubifs_assert(!wbuf->used);
424
425 for (i = 0; ; i++) {
426 int space_before = c->leb_size - wbuf->offs - wbuf->used;
427 int space_after;
428
429 cond_resched();
430
431 /* Give the commit an opportunity to run */
432 if (ubifs_gc_should_commit(c)) {
433 ret = -EAGAIN;
434 break;
435 }
436
437 if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) {
438 /*
439 * We've done enough iterations. Indexing LEBs were
440 * moved and will be available after the commit.
441 */
442 dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN");
443 ubifs_commit_required(c);
444 ret = -EAGAIN;
445 break;
446 }
447
448 if (i > HARD_LEBS_LIMIT) {
449 /*
450 * We've moved too many LEBs and have not made
451 * progress, give up.
452 */
453 dbg_gc("hard limit, -ENOSPC");
454 ret = -ENOSPC;
455 break;
456 }
457
458 /*
459 * Empty and freeable LEBs can turn up while we waited for
460 * the wbuf lock, or while we have been running GC. In that
461 * case, we should just return one of those instead of
462 * continuing to GC dirty LEBs. Hence we request
463 * 'ubifs_find_dirty_leb()' to return an empty LEB if it can.
464 */
465 ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1);
466 if (ret) {
467 if (ret == -ENOSPC)
468 dbg_gc("no more dirty LEBs");
469 break;
470 }
471
472 dbg_gc("found LEB %d: free %d, dirty %d, sum %d "
473 "(min. space %d)", lp.lnum, lp.free, lp.dirty,
474 lp.free + lp.dirty, min_space);
475
476 if (lp.free + lp.dirty == c->leb_size) {
477 /* An empty LEB was returned */
478 dbg_gc("LEB %d is free, return it", lp.lnum);
479 /*
480 * ubifs_find_dirty_leb() doesn't return freeable index
481 * LEBs.
482 */
483 ubifs_assert(!(lp.flags & LPROPS_INDEX));
484 if (lp.free != c->leb_size) {
485 /*
486 * Write buffers must be sync'd before
487 * unmapping freeable LEBs, because one of them
488 * may contain data which obsoletes something
489 * in 'lp.pnum'.
490 */
491 ret = gc_sync_wbufs(c);
492 if (ret)
493 goto out;
494 ret = ubifs_change_one_lp(c, lp.lnum,
495 c->leb_size, 0, 0, 0,
496 0);
497 if (ret)
498 goto out;
499 }
500 ret = ubifs_leb_unmap(c, lp.lnum);
501 if (ret)
502 goto out;
503 ret = lp.lnum;
504 break;
505 }
506
507 space_before = c->leb_size - wbuf->offs - wbuf->used;
508 if (wbuf->lnum == -1)
509 space_before = 0;
510
511 ret = ubifs_garbage_collect_leb(c, &lp);
512 if (ret < 0) {
513 if (ret == -EAGAIN || ret == -ENOSPC) {
514 /*
515 * These codes are not errors, so we have to
516 * return the LEB to lprops. But if the
517 * 'ubifs_return_leb()' function fails, its
518 * failure code is propagated to the caller
519 * instead of the original '-EAGAIN' or
520 * '-ENOSPC'.
521 */
522 err = ubifs_return_leb(c, lp.lnum);
523 if (err)
524 ret = err;
525 break;
526 }
527 goto out;
528 }
529
530 if (ret == LEB_FREED) {
531 /* An LEB has been freed and is ready for use */
532 dbg_gc("LEB %d freed, return", lp.lnum);
533 ret = lp.lnum;
534 break;
535 }
536
537 if (ret == LEB_FREED_IDX) {
538 /*
539 * This was an indexing LEB and it cannot be
540 * immediately used. And instead of requesting the
541 * commit straight away, we try to garbage collect some
542 * more.
543 */
544 dbg_gc("indexing LEB %d freed, continue", lp.lnum);
545 continue;
546 }
547
548 ubifs_assert(ret == LEB_RETAINED);
549 space_after = c->leb_size - wbuf->offs - wbuf->used;
550 dbg_gc("LEB %d retained, freed %d bytes", lp.lnum,
551 space_after - space_before);
552
553 if (space_after > space_before) {
554 /* GC makes progress, keep working */
555 min_space >>= 1;
556 if (min_space < c->dead_wm)
557 min_space = c->dead_wm;
558 continue;
559 }
560
561 dbg_gc("did not make progress");
562
563 /*
564 * GC moved an LEB bud have not done any progress. This means
565 * that the previous GC head LEB contained too few free space
566 * and the LEB which was GC'ed contained only large nodes which
567 * did not fit that space.
568 *
569 * We can do 2 things:
570 * 1. pick another LEB in a hope it'll contain a small node
571 * which will fit the space we have at the end of current GC
572 * head LEB, but there is no guarantee, so we try this out
573 * unless we have already been working for too long;
574 * 2. request an LEB with more dirty space, which will force
575 * 'ubifs_find_dirty_leb()' to start scanning the lprops
576 * table, instead of just picking one from the heap
577 * (previously it already picked the dirtiest LEB).
578 */
579 if (i < SOFT_LEBS_LIMIT) {
580 dbg_gc("try again");
581 continue;
582 }
583
584 min_space <<= 1;
585 if (min_space > c->dark_wm)
586 min_space = c->dark_wm;
587 dbg_gc("set min. space to %d", min_space);
588 }
589
590 if (ret == -ENOSPC && !list_empty(&c->idx_gc)) {
591 dbg_gc("no space, some index LEBs GC'ed, -EAGAIN");
592 ubifs_commit_required(c);
593 ret = -EAGAIN;
594 }
595
596 err = ubifs_wbuf_sync_nolock(wbuf);
597 if (!err)
598 err = ubifs_leb_unmap(c, c->gc_lnum);
599 if (err) {
600 ret = err;
601 goto out;
602 }
603out_unlock:
604 mutex_unlock(&wbuf->io_mutex);
605 return ret;
606
607out:
608 ubifs_assert(ret < 0);
609 ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
610 ubifs_ro_mode(c, ret);
611 ubifs_wbuf_sync_nolock(wbuf);
612 mutex_unlock(&wbuf->io_mutex);
613 ubifs_return_leb(c, lp.lnum);
614 return ret;
615}
616
617/**
618 * ubifs_gc_start_commit - garbage collection at start of commit.
619 * @c: UBIFS file-system description object
620 *
621 * If a LEB has only dirty and free space, then we may safely unmap it and make
622 * it free. Note, we cannot do this with indexing LEBs because dirty space may
623 * correspond index nodes that are required for recovery. In that case, the
624 * LEB cannot be unmapped until after the next commit.
625 *
626 * This function returns %0 upon success and a negative error code upon failure.
627 */
628int ubifs_gc_start_commit(struct ubifs_info *c)
629{
630 struct ubifs_gced_idx_leb *idx_gc;
631 const struct ubifs_lprops *lp;
632 int err = 0, flags;
633
634 ubifs_get_lprops(c);
635
636 /*
637 * Unmap (non-index) freeable LEBs. Note that recovery requires that all
638 * wbufs are sync'd before this, which is done in 'do_commit()'.
639 */
640 while (1) {
641 lp = ubifs_fast_find_freeable(c);
642 if (unlikely(IS_ERR(lp))) {
643 err = PTR_ERR(lp);
644 goto out;
645 }
646 if (!lp)
647 break;
648 ubifs_assert(!(lp->flags & LPROPS_TAKEN));
649 ubifs_assert(!(lp->flags & LPROPS_INDEX));
650 err = ubifs_leb_unmap(c, lp->lnum);
651 if (err)
652 goto out;
653 lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
654 if (unlikely(IS_ERR(lp))) {
655 err = PTR_ERR(lp);
656 goto out;
657 }
658 ubifs_assert(!(lp->flags & LPROPS_TAKEN));
659 ubifs_assert(!(lp->flags & LPROPS_INDEX));
660 }
661
662 /* Mark GC'd index LEBs OK to unmap after this commit finishes */
663 list_for_each_entry(idx_gc, &c->idx_gc, list)
664 idx_gc->unmap = 1;
665
666 /* Record index freeable LEBs for unmapping after commit */
667 while (1) {
668 lp = ubifs_fast_find_frdi_idx(c);
669 if (unlikely(IS_ERR(lp))) {
670 err = PTR_ERR(lp);
671 goto out;
672 }
673 if (!lp)
674 break;
675 idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
676 if (!idx_gc) {
677 err = -ENOMEM;
678 goto out;
679 }
680 ubifs_assert(!(lp->flags & LPROPS_TAKEN));
681 ubifs_assert(lp->flags & LPROPS_INDEX);
682 /* Don't release the LEB until after the next commit */
683 flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
684 lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
685 if (unlikely(IS_ERR(lp))) {
686 err = PTR_ERR(lp);
687 kfree(idx_gc);
688 goto out;
689 }
690 ubifs_assert(lp->flags & LPROPS_TAKEN);
691 ubifs_assert(!(lp->flags & LPROPS_INDEX));
692 idx_gc->lnum = lp->lnum;
693 idx_gc->unmap = 1;
694 list_add(&idx_gc->list, &c->idx_gc);
695 }
696out:
697 ubifs_release_lprops(c);
698 return err;
699}
700
701/**
702 * ubifs_gc_end_commit - garbage collection at end of commit.
703 * @c: UBIFS file-system description object
704 *
705 * This function completes out-of-place garbage collection of index LEBs.
706 */
707int ubifs_gc_end_commit(struct ubifs_info *c)
708{
709 struct ubifs_gced_idx_leb *idx_gc, *tmp;
710 struct ubifs_wbuf *wbuf;
711 int err = 0;
712
713 wbuf = &c->jheads[GCHD].wbuf;
714 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
715 list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list)
716 if (idx_gc->unmap) {
717 dbg_gc("LEB %d", idx_gc->lnum);
718 err = ubifs_leb_unmap(c, idx_gc->lnum);
719 if (err)
720 goto out;
721 err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC,
722 LPROPS_NC, 0, LPROPS_TAKEN, -1);
723 if (err)
724 goto out;
725 list_del(&idx_gc->list);
726 kfree(idx_gc);
727 }
728out:
729 mutex_unlock(&wbuf->io_mutex);
730 return err;
731}
732
733/**
734 * ubifs_destroy_idx_gc - destroy idx_gc list.
735 * @c: UBIFS file-system description object
736 *
737 * This function destroys the idx_gc list. It is called when unmounting or
738 * remounting read-only so locks are not needed.
739 */
740void ubifs_destroy_idx_gc(struct ubifs_info *c)
741{
742 while (!list_empty(&c->idx_gc)) {
743 struct ubifs_gced_idx_leb *idx_gc;
744
745 idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb,
746 list);
747 c->idx_gc_cnt -= 1;
748 list_del(&idx_gc->list);
749 kfree(idx_gc);
750 }
751
752}
753
754/**
755 * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list.
756 * @c: UBIFS file-system description object
757 *
758 * Called during start commit so locks are not needed.
759 */
760int ubifs_get_idx_gc_leb(struct ubifs_info *c)
761{
762 struct ubifs_gced_idx_leb *idx_gc;
763 int lnum;
764
765 if (list_empty(&c->idx_gc))
766 return -ENOSPC;
767 idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list);
768 lnum = idx_gc->lnum;
769 /* c->idx_gc_cnt is updated by the caller when lprops are updated */
770 list_del(&idx_gc->list);
771 kfree(idx_gc);
772 return lnum;
773}
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
new file mode 100644
index 000000000000..3374f91b6709
--- /dev/null
+++ b/fs/ubifs/io.c
@@ -0,0 +1,914 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 * Copyright (C) 2006, 2007 University of Szeged, Hungary
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Authors: Artem Bityutskiy (Битюцкий Артём)
21 * Adrian Hunter
22 * Zoltan Sogor
23 */
24
25/*
26 * This file implements UBIFS I/O subsystem which provides various I/O-related
27 * helper functions (reading/writing/checking/validating nodes) and implements
28 * write-buffering support. Write buffers help to save space which otherwise
29 * would have been wasted for padding to the nearest minimal I/O unit boundary.
30 * Instead, data first goes to the write-buffer and is flushed when the
31 * buffer is full or when it is not used for some time (by timer). This is
32 * similarto the mechanism is used by JFFS2.
33 *
34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
35 * mutexes defined inside these objects. Since sometimes upper-level code
36 * has to lock the write-buffer (e.g. journal space reservation code), many
37 * functions related to write-buffers have "nolock" suffix which means that the
38 * caller has to lock the write-buffer before calling this function.
39 *
40 * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not
41 * aligned, UBIFS starts the next node from the aligned address, and the padded
42 * bytes may contain any rubbish. In other words, UBIFS does not put padding
43 * bytes in those small gaps. Common headers of nodes store real node lengths,
44 * not aligned lengths. Indexing nodes also store real lengths in branches.
45 *
46 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
47 * uses padding nodes or padding bytes, if the padding node does not fit.
48 *
49 * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes
50 * every time they are read from the flash media.
51 */
52
53#include <linux/crc32.h>
54#include "ubifs.h"
55
56/**
57 * ubifs_check_node - check node.
58 * @c: UBIFS file-system description object
59 * @buf: node to check
60 * @lnum: logical eraseblock number
61 * @offs: offset within the logical eraseblock
62 * @quiet: print no messages
63 *
64 * This function checks node magic number and CRC checksum. This function also
65 * validates node length to prevent UBIFS from becoming crazy when an attacker
66 * feeds it a file-system image with incorrect nodes. For example, too large
67 * node length in the common header could cause UBIFS to read memory outside of
68 * allocated buffer when checking the CRC checksum.
69 *
70 * This function returns zero in case of success %-EUCLEAN in case of bad CRC
71 * or magic.
72 */
73int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
74 int offs, int quiet)
75{
76 int err = -EINVAL, type, node_len;
77 uint32_t crc, node_crc, magic;
78 const struct ubifs_ch *ch = buf;
79
80 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
81 ubifs_assert(!(offs & 7) && offs < c->leb_size);
82
83 magic = le32_to_cpu(ch->magic);
84 if (magic != UBIFS_NODE_MAGIC) {
85 if (!quiet)
86 ubifs_err("bad magic %#08x, expected %#08x",
87 magic, UBIFS_NODE_MAGIC);
88 err = -EUCLEAN;
89 goto out;
90 }
91
92 type = ch->node_type;
93 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
94 if (!quiet)
95 ubifs_err("bad node type %d", type);
96 goto out;
97 }
98
99 node_len = le32_to_cpu(ch->len);
100 if (node_len + offs > c->leb_size)
101 goto out_len;
102
103 if (c->ranges[type].max_len == 0) {
104 if (node_len != c->ranges[type].len)
105 goto out_len;
106 } else if (node_len < c->ranges[type].min_len ||
107 node_len > c->ranges[type].max_len)
108 goto out_len;
109
110 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
111 node_crc = le32_to_cpu(ch->crc);
112 if (crc != node_crc) {
113 if (!quiet)
114 ubifs_err("bad CRC: calculated %#08x, read %#08x",
115 crc, node_crc);
116 err = -EUCLEAN;
117 goto out;
118 }
119
120 return 0;
121
122out_len:
123 if (!quiet)
124 ubifs_err("bad node length %d", node_len);
125out:
126 if (!quiet) {
127 ubifs_err("bad node at LEB %d:%d", lnum, offs);
128 dbg_dump_node(c, buf);
129 dbg_dump_stack();
130 }
131 return err;
132}
133
134/**
135 * ubifs_pad - pad flash space.
136 * @c: UBIFS file-system description object
137 * @buf: buffer to put padding to
138 * @pad: how many bytes to pad
139 *
140 * The flash media obliges us to write only in chunks of %c->min_io_size and
141 * when we have to write less data we add padding node to the write-buffer and
142 * pad it to the next minimal I/O unit's boundary. Padding nodes help when the
143 * media is being scanned. If the amount of wasted space is not enough to fit a
144 * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes
145 * pattern (%UBIFS_PADDING_BYTE).
146 *
147 * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is
148 * used.
149 */
150void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
151{
152 uint32_t crc;
153
154 ubifs_assert(pad >= 0 && !(pad & 7));
155
156 if (pad >= UBIFS_PAD_NODE_SZ) {
157 struct ubifs_ch *ch = buf;
158 struct ubifs_pad_node *pad_node = buf;
159
160 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
161 ch->node_type = UBIFS_PAD_NODE;
162 ch->group_type = UBIFS_NO_NODE_GROUP;
163 ch->padding[0] = ch->padding[1] = 0;
164 ch->sqnum = 0;
165 ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ);
166 pad -= UBIFS_PAD_NODE_SZ;
167 pad_node->pad_len = cpu_to_le32(pad);
168 crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8);
169 ch->crc = cpu_to_le32(crc);
170 memset(buf + UBIFS_PAD_NODE_SZ, 0, pad);
171 } else if (pad > 0)
172 /* Too little space, padding node won't fit */
173 memset(buf, UBIFS_PADDING_BYTE, pad);
174}
175
176/**
177 * next_sqnum - get next sequence number.
178 * @c: UBIFS file-system description object
179 */
180static unsigned long long next_sqnum(struct ubifs_info *c)
181{
182 unsigned long long sqnum;
183
184 spin_lock(&c->cnt_lock);
185 sqnum = ++c->max_sqnum;
186 spin_unlock(&c->cnt_lock);
187
188 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) {
189 if (sqnum >= SQNUM_WATERMARK) {
190 ubifs_err("sequence number overflow %llu, end of life",
191 sqnum);
192 ubifs_ro_mode(c, -EINVAL);
193 }
194 ubifs_warn("running out of sequence numbers, end of life soon");
195 }
196
197 return sqnum;
198}
199
200/**
201 * ubifs_prepare_node - prepare node to be written to flash.
202 * @c: UBIFS file-system description object
203 * @node: the node to pad
204 * @len: node length
205 * @pad: if the buffer has to be padded
206 *
207 * This function prepares node at @node to be written to the media - it
208 * calculates node CRC, fills the common header, and adds proper padding up to
209 * the next minimum I/O unit if @pad is not zero.
210 */
211void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
212{
213 uint32_t crc;
214 struct ubifs_ch *ch = node;
215 unsigned long long sqnum = next_sqnum(c);
216
217 ubifs_assert(len >= UBIFS_CH_SZ);
218
219 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
220 ch->len = cpu_to_le32(len);
221 ch->group_type = UBIFS_NO_NODE_GROUP;
222 ch->sqnum = cpu_to_le64(sqnum);
223 ch->padding[0] = ch->padding[1] = 0;
224 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
225 ch->crc = cpu_to_le32(crc);
226
227 if (pad) {
228 len = ALIGN(len, 8);
229 pad = ALIGN(len, c->min_io_size) - len;
230 ubifs_pad(c, node + len, pad);
231 }
232}
233
234/**
235 * ubifs_prep_grp_node - prepare node of a group to be written to flash.
236 * @c: UBIFS file-system description object
237 * @node: the node to pad
238 * @len: node length
239 * @last: indicates the last node of the group
240 *
241 * This function prepares node at @node to be written to the media - it
242 * calculates node CRC and fills the common header.
243 */
244void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
245{
246 uint32_t crc;
247 struct ubifs_ch *ch = node;
248 unsigned long long sqnum = next_sqnum(c);
249
250 ubifs_assert(len >= UBIFS_CH_SZ);
251
252 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
253 ch->len = cpu_to_le32(len);
254 if (last)
255 ch->group_type = UBIFS_LAST_OF_NODE_GROUP;
256 else
257 ch->group_type = UBIFS_IN_NODE_GROUP;
258 ch->sqnum = cpu_to_le64(sqnum);
259 ch->padding[0] = ch->padding[1] = 0;
260 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
261 ch->crc = cpu_to_le32(crc);
262}
263
264/**
265 * wbuf_timer_callback - write-buffer timer callback function.
266 * @data: timer data (write-buffer descriptor)
267 *
268 * This function is called when the write-buffer timer expires.
269 */
270static void wbuf_timer_callback_nolock(unsigned long data)
271{
272 struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data;
273
274 wbuf->need_sync = 1;
275 wbuf->c->need_wbuf_sync = 1;
276 ubifs_wake_up_bgt(wbuf->c);
277}
278
279/**
280 * new_wbuf_timer - start new write-buffer timer.
281 * @wbuf: write-buffer descriptor
282 */
283static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
284{
285 ubifs_assert(!timer_pending(&wbuf->timer));
286
287 if (!wbuf->timeout)
288 return;
289
290 wbuf->timer.expires = jiffies + wbuf->timeout;
291 add_timer(&wbuf->timer);
292}
293
294/**
295 * cancel_wbuf_timer - cancel write-buffer timer.
296 * @wbuf: write-buffer descriptor
297 */
298static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
299{
300 /*
301 * If the syncer is waiting for the lock (from the background thread's
302 * context) and another task is changing write-buffer then the syncing
303 * should be canceled.
304 */
305 wbuf->need_sync = 0;
306 del_timer(&wbuf->timer);
307}
308
309/**
310 * ubifs_wbuf_sync_nolock - synchronize write-buffer.
311 * @wbuf: write-buffer to synchronize
312 *
313 * This function synchronizes write-buffer @buf and returns zero in case of
314 * success or a negative error code in case of failure.
315 */
316int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
317{
318 struct ubifs_info *c = wbuf->c;
319 int err, dirt;
320
321 cancel_wbuf_timer_nolock(wbuf);
322 if (!wbuf->used || wbuf->lnum == -1)
323 /* Write-buffer is empty or not seeked */
324 return 0;
325
326 dbg_io("LEB %d:%d, %d bytes",
327 wbuf->lnum, wbuf->offs, wbuf->used);
328 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
329 ubifs_assert(!(wbuf->avail & 7));
330 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
331
332 if (c->ro_media)
333 return -EROFS;
334
335 ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail);
336 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
337 c->min_io_size, wbuf->dtype);
338 if (err) {
339 ubifs_err("cannot write %d bytes to LEB %d:%d",
340 c->min_io_size, wbuf->lnum, wbuf->offs);
341 dbg_dump_stack();
342 return err;
343 }
344
345 dirt = wbuf->avail;
346
347 spin_lock(&wbuf->lock);
348 wbuf->offs += c->min_io_size;
349 wbuf->avail = c->min_io_size;
350 wbuf->used = 0;
351 wbuf->next_ino = 0;
352 spin_unlock(&wbuf->lock);
353
354 if (wbuf->sync_callback)
355 err = wbuf->sync_callback(c, wbuf->lnum,
356 c->leb_size - wbuf->offs, dirt);
357 return err;
358}
359
360/**
361 * ubifs_wbuf_seek_nolock - seek write-buffer.
362 * @wbuf: write-buffer
363 * @lnum: logical eraseblock number to seek to
364 * @offs: logical eraseblock offset to seek to
365 * @dtype: data type
366 *
367 * This function targets the write buffer to logical eraseblock @lnum:@offs.
368 * The write-buffer is synchronized if it is not empty. Returns zero in case of
369 * success and a negative error code in case of failure.
370 */
371int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
372 int dtype)
373{
374 const struct ubifs_info *c = wbuf->c;
375
376 dbg_io("LEB %d:%d", lnum, offs);
377 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
378 ubifs_assert(offs >= 0 && offs <= c->leb_size);
379 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
380 ubifs_assert(lnum != wbuf->lnum);
381
382 if (wbuf->used > 0) {
383 int err = ubifs_wbuf_sync_nolock(wbuf);
384
385 if (err)
386 return err;
387 }
388
389 spin_lock(&wbuf->lock);
390 wbuf->lnum = lnum;
391 wbuf->offs = offs;
392 wbuf->avail = c->min_io_size;
393 wbuf->used = 0;
394 spin_unlock(&wbuf->lock);
395 wbuf->dtype = dtype;
396
397 return 0;
398}
399
400/**
401 * ubifs_bg_wbufs_sync - synchronize write-buffers.
402 * @c: UBIFS file-system description object
403 *
404 * This function is called by background thread to synchronize write-buffers.
405 * Returns zero in case of success and a negative error code in case of
406 * failure.
407 */
408int ubifs_bg_wbufs_sync(struct ubifs_info *c)
409{
410 int err, i;
411
412 if (!c->need_wbuf_sync)
413 return 0;
414 c->need_wbuf_sync = 0;
415
416 if (c->ro_media) {
417 err = -EROFS;
418 goto out_timers;
419 }
420
421 dbg_io("synchronize");
422 for (i = 0; i < c->jhead_cnt; i++) {
423 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
424
425 cond_resched();
426
427 /*
428 * If the mutex is locked then wbuf is being changed, so
429 * synchronization is not necessary.
430 */
431 if (mutex_is_locked(&wbuf->io_mutex))
432 continue;
433
434 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
435 if (!wbuf->need_sync) {
436 mutex_unlock(&wbuf->io_mutex);
437 continue;
438 }
439
440 err = ubifs_wbuf_sync_nolock(wbuf);
441 mutex_unlock(&wbuf->io_mutex);
442 if (err) {
443 ubifs_err("cannot sync write-buffer, error %d", err);
444 ubifs_ro_mode(c, err);
445 goto out_timers;
446 }
447 }
448
449 return 0;
450
451out_timers:
452 /* Cancel all timers to prevent repeated errors */
453 for (i = 0; i < c->jhead_cnt; i++) {
454 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
455
456 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
457 cancel_wbuf_timer_nolock(wbuf);
458 mutex_unlock(&wbuf->io_mutex);
459 }
460 return err;
461}
462
463/**
464 * ubifs_wbuf_write_nolock - write data to flash via write-buffer.
465 * @wbuf: write-buffer
466 * @buf: node to write
467 * @len: node length
468 *
469 * This function writes data to flash via write-buffer @wbuf. This means that
470 * the last piece of the node won't reach the flash media immediately if it
471 * does not take whole minimal I/O unit. Instead, the node will sit in RAM
472 * until the write-buffer is synchronized (e.g., by timer).
473 *
474 * This function returns zero in case of success and a negative error code in
475 * case of failure. If the node cannot be written because there is no more
476 * space in this logical eraseblock, %-ENOSPC is returned.
477 */
478int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
479{
480 struct ubifs_info *c = wbuf->c;
481 int err, written, n, aligned_len = ALIGN(len, 8), offs;
482
483 dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len,
484 dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum,
485 wbuf->offs + wbuf->used);
486 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
487 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
488 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
489 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size);
490 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
491
492 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
493 err = -ENOSPC;
494 goto out;
495 }
496
497 cancel_wbuf_timer_nolock(wbuf);
498
499 if (c->ro_media)
500 return -EROFS;
501
502 if (aligned_len <= wbuf->avail) {
503 /*
504 * The node is not very large and fits entirely within
505 * write-buffer.
506 */
507 memcpy(wbuf->buf + wbuf->used, buf, len);
508
509 if (aligned_len == wbuf->avail) {
510 dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum,
511 wbuf->offs);
512 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
513 wbuf->offs, c->min_io_size,
514 wbuf->dtype);
515 if (err)
516 goto out;
517
518 spin_lock(&wbuf->lock);
519 wbuf->offs += c->min_io_size;
520 wbuf->avail = c->min_io_size;
521 wbuf->used = 0;
522 wbuf->next_ino = 0;
523 spin_unlock(&wbuf->lock);
524 } else {
525 spin_lock(&wbuf->lock);
526 wbuf->avail -= aligned_len;
527 wbuf->used += aligned_len;
528 spin_unlock(&wbuf->lock);
529 }
530
531 goto exit;
532 }
533
534 /*
535 * The node is large enough and does not fit entirely within current
536 * minimal I/O unit. We have to fill and flush write-buffer and switch
537 * to the next min. I/O unit.
538 */
539 dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs);
540 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
541 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
542 c->min_io_size, wbuf->dtype);
543 if (err)
544 goto out;
545
546 offs = wbuf->offs + c->min_io_size;
547 len -= wbuf->avail;
548 aligned_len -= wbuf->avail;
549 written = wbuf->avail;
550
551 /*
552 * The remaining data may take more whole min. I/O units, so write the
553 * remains multiple to min. I/O unit size directly to the flash media.
554 * We align node length to 8-byte boundary because we anyway flash wbuf
555 * if the remaining space is less than 8 bytes.
556 */
557 n = aligned_len >> c->min_io_shift;
558 if (n) {
559 n <<= c->min_io_shift;
560 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
561 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
562 wbuf->dtype);
563 if (err)
564 goto out;
565 offs += n;
566 aligned_len -= n;
567 len -= n;
568 written += n;
569 }
570
571 spin_lock(&wbuf->lock);
572 if (aligned_len)
573 /*
574 * And now we have what's left and what does not take whole
575 * min. I/O unit, so write it to the write-buffer and we are
576 * done.
577 */
578 memcpy(wbuf->buf, buf + written, len);
579
580 wbuf->offs = offs;
581 wbuf->used = aligned_len;
582 wbuf->avail = c->min_io_size - aligned_len;
583 wbuf->next_ino = 0;
584 spin_unlock(&wbuf->lock);
585
586exit:
587 if (wbuf->sync_callback) {
588 int free = c->leb_size - wbuf->offs - wbuf->used;
589
590 err = wbuf->sync_callback(c, wbuf->lnum, free, 0);
591 if (err)
592 goto out;
593 }
594
595 if (wbuf->used)
596 new_wbuf_timer_nolock(wbuf);
597
598 return 0;
599
600out:
601 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
602 len, wbuf->lnum, wbuf->offs, err);
603 dbg_dump_node(c, buf);
604 dbg_dump_stack();
605 dbg_dump_leb(c, wbuf->lnum);
606 return err;
607}
608
609/**
610 * ubifs_write_node - write node to the media.
611 * @c: UBIFS file-system description object
612 * @buf: the node to write
613 * @len: node length
614 * @lnum: logical eraseblock number
615 * @offs: offset within the logical eraseblock
616 * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
617 *
618 * This function automatically fills node magic number, assigns sequence
619 * number, and calculates node CRC checksum. The length of the @buf buffer has
620 * to be aligned to the minimal I/O unit size. This function automatically
621 * appends padding node and padding bytes if needed. Returns zero in case of
622 * success and a negative error code in case of failure.
623 */
624int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
625 int offs, int dtype)
626{
627 int err, buf_len = ALIGN(len, c->min_io_size);
628
629 dbg_io("LEB %d:%d, %s, length %d (aligned %d)",
630 lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len,
631 buf_len);
632 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
633 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
634
635 if (c->ro_media)
636 return -EROFS;
637
638 ubifs_prepare_node(c, buf, len, 1);
639 err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype);
640 if (err) {
641 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
642 buf_len, lnum, offs, err);
643 dbg_dump_node(c, buf);
644 dbg_dump_stack();
645 }
646
647 return err;
648}
649
650/**
651 * ubifs_read_node_wbuf - read node from the media or write-buffer.
652 * @wbuf: wbuf to check for un-written data
653 * @buf: buffer to read to
654 * @type: node type
655 * @len: node length
656 * @lnum: logical eraseblock number
657 * @offs: offset within the logical eraseblock
658 *
659 * This function reads a node of known type and length, checks it and stores
660 * in @buf. If the node partially or fully sits in the write-buffer, this
661 * function takes data from the buffer, otherwise it reads the flash media.
662 * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative
663 * error code in case of failure.
664 */
665int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
666 int lnum, int offs)
667{
668 const struct ubifs_info *c = wbuf->c;
669 int err, rlen, overlap;
670 struct ubifs_ch *ch = buf;
671
672 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
673 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
674 ubifs_assert(!(offs & 7) && offs < c->leb_size);
675 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
676
677 spin_lock(&wbuf->lock);
678 overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
679 if (!overlap) {
680 /* We may safely unlock the write-buffer and read the data */
681 spin_unlock(&wbuf->lock);
682 return ubifs_read_node(c, buf, type, len, lnum, offs);
683 }
684
685 /* Don't read under wbuf */
686 rlen = wbuf->offs - offs;
687 if (rlen < 0)
688 rlen = 0;
689
690 /* Copy the rest from the write-buffer */
691 memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
692 spin_unlock(&wbuf->lock);
693
694 if (rlen > 0) {
695 /* Read everything that goes before write-buffer */
696 err = ubi_read(c->ubi, lnum, buf, offs, rlen);
697 if (err && err != -EBADMSG) {
698 ubifs_err("failed to read node %d from LEB %d:%d, "
699 "error %d", type, lnum, offs, err);
700 dbg_dump_stack();
701 return err;
702 }
703 }
704
705 if (type != ch->node_type) {
706 ubifs_err("bad node type (%d but expected %d)",
707 ch->node_type, type);
708 goto out;
709 }
710
711 err = ubifs_check_node(c, buf, lnum, offs, 0);
712 if (err) {
713 ubifs_err("expected node type %d", type);
714 return err;
715 }
716
717 rlen = le32_to_cpu(ch->len);
718 if (rlen != len) {
719 ubifs_err("bad node length %d, expected %d", rlen, len);
720 goto out;
721 }
722
723 return 0;
724
725out:
726 ubifs_err("bad node at LEB %d:%d", lnum, offs);
727 dbg_dump_node(c, buf);
728 dbg_dump_stack();
729 return -EINVAL;
730}
731
732/**
733 * ubifs_read_node - read node.
734 * @c: UBIFS file-system description object
735 * @buf: buffer to read to
736 * @type: node type
737 * @len: node length (not aligned)
738 * @lnum: logical eraseblock number
739 * @offs: offset within the logical eraseblock
740 *
741 * This function reads a node of known type and and length, checks it and
742 * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched
743 * and a negative error code in case of failure.
744 */
745int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
746 int lnum, int offs)
747{
748 int err, l;
749 struct ubifs_ch *ch = buf;
750
751 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
752 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
753 ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size);
754 ubifs_assert(!(offs & 7) && offs < c->leb_size);
755 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
756
757 err = ubi_read(c->ubi, lnum, buf, offs, len);
758 if (err && err != -EBADMSG) {
759 ubifs_err("cannot read node %d from LEB %d:%d, error %d",
760 type, lnum, offs, err);
761 return err;
762 }
763
764 if (type != ch->node_type) {
765 ubifs_err("bad node type (%d but expected %d)",
766 ch->node_type, type);
767 goto out;
768 }
769
770 err = ubifs_check_node(c, buf, lnum, offs, 0);
771 if (err) {
772 ubifs_err("expected node type %d", type);
773 return err;
774 }
775
776 l = le32_to_cpu(ch->len);
777 if (l != len) {
778 ubifs_err("bad node length %d, expected %d", l, len);
779 goto out;
780 }
781
782 return 0;
783
784out:
785 ubifs_err("bad node at LEB %d:%d", lnum, offs);
786 dbg_dump_node(c, buf);
787 dbg_dump_stack();
788 return -EINVAL;
789}
790
791/**
792 * ubifs_wbuf_init - initialize write-buffer.
793 * @c: UBIFS file-system description object
794 * @wbuf: write-buffer to initialize
795 *
796 * This function initializes write buffer. Returns zero in case of success
797 * %-ENOMEM in case of failure.
798 */
799int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
800{
801 size_t size;
802
803 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
804 if (!wbuf->buf)
805 return -ENOMEM;
806
807 size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
808 wbuf->inodes = kmalloc(size, GFP_KERNEL);
809 if (!wbuf->inodes) {
810 kfree(wbuf->buf);
811 wbuf->buf = NULL;
812 return -ENOMEM;
813 }
814
815 wbuf->used = 0;
816 wbuf->lnum = wbuf->offs = -1;
817 wbuf->avail = c->min_io_size;
818 wbuf->dtype = UBI_UNKNOWN;
819 wbuf->sync_callback = NULL;
820 mutex_init(&wbuf->io_mutex);
821 spin_lock_init(&wbuf->lock);
822
823 wbuf->c = c;
824 init_timer(&wbuf->timer);
825 wbuf->timer.function = wbuf_timer_callback_nolock;
826 wbuf->timer.data = (unsigned long)wbuf;
827 wbuf->timeout = DEFAULT_WBUF_TIMEOUT;
828 wbuf->next_ino = 0;
829
830 return 0;
831}
832
833/**
834 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
835 * @wbuf: the write-buffer whereto add
836 * @inum: the inode number
837 *
838 * This function adds an inode number to the inode array of the write-buffer.
839 */
840void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum)
841{
842 if (!wbuf->buf)
843 /* NOR flash or something similar */
844 return;
845
846 spin_lock(&wbuf->lock);
847 if (wbuf->used)
848 wbuf->inodes[wbuf->next_ino++] = inum;
849 spin_unlock(&wbuf->lock);
850}
851
852/**
853 * wbuf_has_ino - returns if the wbuf contains data from the inode.
854 * @wbuf: the write-buffer
855 * @inum: the inode number
856 *
857 * This function returns with %1 if the write-buffer contains some data from the
858 * given inode otherwise it returns with %0.
859 */
860static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum)
861{
862 int i, ret = 0;
863
864 spin_lock(&wbuf->lock);
865 for (i = 0; i < wbuf->next_ino; i++)
866 if (inum == wbuf->inodes[i]) {
867 ret = 1;
868 break;
869 }
870 spin_unlock(&wbuf->lock);
871
872 return ret;
873}
874
875/**
876 * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode.
877 * @c: UBIFS file-system description object
878 * @inode: inode to synchronize
879 *
880 * This function synchronizes write-buffers which contain nodes belonging to
881 * @inode. Returns zero in case of success and a negative error code in case of
882 * failure.
883 */
884int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode)
885{
886 int i, err = 0;
887
888 for (i = 0; i < c->jhead_cnt; i++) {
889 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
890
891 if (i == GCHD)
892 /*
893 * GC head is special, do not look at it. Even if the
894 * head contains something related to this inode, it is
895 * a _copy_ of corresponding on-flash node which sits
896 * somewhere else.
897 */
898 continue;
899
900 if (!wbuf_has_ino(wbuf, inode->i_ino))
901 continue;
902
903 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
904 if (wbuf_has_ino(wbuf, inode->i_ino))
905 err = ubifs_wbuf_sync_nolock(wbuf);
906 mutex_unlock(&wbuf->io_mutex);
907
908 if (err) {
909 ubifs_ro_mode(c, err);
910 return err;
911 }
912 }
913 return 0;
914}
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
new file mode 100644
index 000000000000..5e82cffe9695
--- /dev/null
+++ b/fs/ubifs/ioctl.c
@@ -0,0 +1,204 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 * Copyright (C) 2006, 2007 University of Szeged, Hungary
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Authors: Zoltan Sogor
21 * Artem Bityutskiy (Битюцкий Артём)
22 * Adrian Hunter
23 */
24
25/* This file implements EXT2-compatible extended attribute ioctl() calls */
26
27#include <linux/compat.h>
28#include <linux/smp_lock.h>
29#include <linux/mount.h>
30#include "ubifs.h"
31
32/**
33 * ubifs_set_inode_flags - set VFS inode flags.
34 * @inode: VFS inode to set flags for
35 *
36 * This function propagates flags from UBIFS inode object to VFS inode object.
37 */
38void ubifs_set_inode_flags(struct inode *inode)
39{
40 unsigned int flags = ubifs_inode(inode)->flags;
41
42 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC);
43 if (flags & UBIFS_SYNC_FL)
44 inode->i_flags |= S_SYNC;
45 if (flags & UBIFS_APPEND_FL)
46 inode->i_flags |= S_APPEND;
47 if (flags & UBIFS_IMMUTABLE_FL)
48 inode->i_flags |= S_IMMUTABLE;
49 if (flags & UBIFS_DIRSYNC_FL)
50 inode->i_flags |= S_DIRSYNC;
51}
52
53/*
54 * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags.
55 * @ioctl_flags: flags to convert
56 *
57 * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags
58 * (@UBIFS_COMPR_FL, etc).
59 */
60static int ioctl2ubifs(int ioctl_flags)
61{
62 int ubifs_flags = 0;
63
64 if (ioctl_flags & FS_COMPR_FL)
65 ubifs_flags |= UBIFS_COMPR_FL;
66 if (ioctl_flags & FS_SYNC_FL)
67 ubifs_flags |= UBIFS_SYNC_FL;
68 if (ioctl_flags & FS_APPEND_FL)
69 ubifs_flags |= UBIFS_APPEND_FL;
70 if (ioctl_flags & FS_IMMUTABLE_FL)
71 ubifs_flags |= UBIFS_IMMUTABLE_FL;
72 if (ioctl_flags & FS_DIRSYNC_FL)
73 ubifs_flags |= UBIFS_DIRSYNC_FL;
74
75 return ubifs_flags;
76}
77
78/*
79 * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags.
80 * @ubifs_flags: flags to convert
81 *
82 * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags
83 * (@FS_COMPR_FL, etc).
84 */
85static int ubifs2ioctl(int ubifs_flags)
86{
87 int ioctl_flags = 0;
88
89 if (ubifs_flags & UBIFS_COMPR_FL)
90 ioctl_flags |= FS_COMPR_FL;
91 if (ubifs_flags & UBIFS_SYNC_FL)
92 ioctl_flags |= FS_SYNC_FL;
93 if (ubifs_flags & UBIFS_APPEND_FL)
94 ioctl_flags |= FS_APPEND_FL;
95 if (ubifs_flags & UBIFS_IMMUTABLE_FL)
96 ioctl_flags |= FS_IMMUTABLE_FL;
97 if (ubifs_flags & UBIFS_DIRSYNC_FL)
98 ioctl_flags |= FS_DIRSYNC_FL;
99
100 return ioctl_flags;
101}
102
103static int setflags(struct inode *inode, int flags)
104{
105 int oldflags, err, release;
106 struct ubifs_inode *ui = ubifs_inode(inode);
107 struct ubifs_info *c = inode->i_sb->s_fs_info;
108 struct ubifs_budget_req req = { .dirtied_ino = 1,
109 .dirtied_ino_d = ui->data_len };
110
111 err = ubifs_budget_space(c, &req);
112 if (err)
113 return err;
114
115 /*
116 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
117 * the relevant capability.
118 */
119 mutex_lock(&ui->ui_mutex);
120 oldflags = ubifs2ioctl(ui->flags);
121 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
122 if (!capable(CAP_LINUX_IMMUTABLE)) {
123 err = -EPERM;
124 goto out_unlock;
125 }
126 }
127
128 ui->flags = ioctl2ubifs(flags);
129 ubifs_set_inode_flags(inode);
130 inode->i_ctime = ubifs_current_time(inode);
131 release = ui->dirty;
132 mark_inode_dirty_sync(inode);
133 mutex_unlock(&ui->ui_mutex);
134
135 if (release)
136 ubifs_release_budget(c, &req);
137 if (IS_SYNC(inode))
138 err = write_inode_now(inode, 1);
139 return err;
140
141out_unlock:
142 ubifs_err("can't modify inode %lu attributes", inode->i_ino);
143 mutex_unlock(&ui->ui_mutex);
144 ubifs_release_budget(c, &req);
145 return err;
146}
147
148long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
149{
150 int flags, err;
151 struct inode *inode = file->f_path.dentry->d_inode;
152
153 switch (cmd) {
154 case FS_IOC_GETFLAGS:
155 flags = ubifs2ioctl(ubifs_inode(inode)->flags);
156
157 return put_user(flags, (int __user *) arg);
158
159 case FS_IOC_SETFLAGS: {
160 if (IS_RDONLY(inode))
161 return -EROFS;
162
163 if (!is_owner_or_cap(inode))
164 return -EACCES;
165
166 if (get_user(flags, (int __user *) arg))
167 return -EFAULT;
168
169 if (!S_ISDIR(inode->i_mode))
170 flags &= ~FS_DIRSYNC_FL;
171
172 /*
173 * Make sure the file-system is read-write and make sure it
174 * will not become read-only while we are changing the flags.
175 */
176 err = mnt_want_write(file->f_path.mnt);
177 if (err)
178 return err;
179 err = setflags(inode, flags);
180 mnt_drop_write(file->f_path.mnt);
181 return err;
182 }
183
184 default:
185 return -ENOTTY;
186 }
187}
188
189#ifdef CONFIG_COMPAT
190long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
191{
192 switch (cmd) {
193 case FS_IOC32_GETFLAGS:
194 cmd = FS_IOC_GETFLAGS;
195 break;
196 case FS_IOC32_SETFLAGS:
197 cmd = FS_IOC_SETFLAGS;
198 break;
199 default:
200 return -ENOIOCTLCMD;
201 }
202 return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
203}
204#endif
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
new file mode 100644
index 000000000000..283155abe5f5
--- /dev/null
+++ b/fs/ubifs/journal.c
@@ -0,0 +1,1387 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS journal.
25 *
26 * The journal consists of 2 parts - the log and bud LEBs. The log has fixed
27 * length and position, while a bud logical eraseblock is any LEB in the main
28 * area. Buds contain file system data - data nodes, inode nodes, etc. The log
29 * contains only references to buds and some other stuff like commit
30 * start node. The idea is that when we commit the journal, we do
31 * not copy the data, the buds just become indexed. Since after the commit the
32 * nodes in bud eraseblocks become leaf nodes of the file system index tree, we
33 * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will
34 * become leafs in the future.
35 *
36 * The journal is multi-headed because we want to write data to the journal as
37 * optimally as possible. It is nice to have nodes belonging to the same inode
38 * in one LEB, so we may write data owned by different inodes to different
39 * journal heads, although at present only one data head is used.
40 *
41 * For recovery reasons, the base head contains all inode nodes, all directory
42 * entry nodes and all truncate nodes. This means that the other heads contain
43 * only data nodes.
44 *
45 * Bud LEBs may be half-indexed. For example, if the bud was not full at the
46 * time of commit, the bud is retained to continue to be used in the journal,
47 * even though the "front" of the LEB is now indexed. In that case, the log
48 * reference contains the offset where the bud starts for the purposes of the
49 * journal.
50 *
51 * The journal size has to be limited, because the larger is the journal, the
52 * longer it takes to mount UBIFS (scanning the journal) and the more memory it
53 * takes (indexing in the TNC).
54 *
55 * All the journal write operations like 'ubifs_jnl_update()' here, which write
56 * multiple UBIFS nodes to the journal at one go, are atomic with respect to
57 * unclean reboots. Should the unclean reboot happen, the recovery code drops
58 * all the nodes.
59 */
60
61#include "ubifs.h"
62
63/**
64 * zero_ino_node_unused - zero out unused fields of an on-flash inode node.
65 * @ino: the inode to zero out
66 */
67static inline void zero_ino_node_unused(struct ubifs_ino_node *ino)
68{
69 memset(ino->padding1, 0, 4);
70 memset(ino->padding2, 0, 26);
71}
72
73/**
74 * zero_dent_node_unused - zero out unused fields of an on-flash directory
75 * entry node.
76 * @dent: the directory entry to zero out
77 */
78static inline void zero_dent_node_unused(struct ubifs_dent_node *dent)
79{
80 dent->padding1 = 0;
81 memset(dent->padding2, 0, 4);
82}
83
84/**
85 * zero_data_node_unused - zero out unused fields of an on-flash data node.
86 * @data: the data node to zero out
87 */
88static inline void zero_data_node_unused(struct ubifs_data_node *data)
89{
90 memset(data->padding, 0, 2);
91}
92
93/**
94 * zero_trun_node_unused - zero out unused fields of an on-flash truncation
95 * node.
96 * @trun: the truncation node to zero out
97 */
98static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
99{
100 memset(trun->padding, 0, 12);
101}
102
103/**
104 * reserve_space - reserve space in the journal.
105 * @c: UBIFS file-system description object
106 * @jhead: journal head number
107 * @len: node length
108 *
109 * This function reserves space in journal head @head. If the reservation
110 * succeeded, the journal head stays locked and later has to be unlocked using
111 * 'release_head()'. 'write_node()' and 'write_head()' functions also unlock
112 * it. Returns zero in case of success, %-EAGAIN if commit has to be done, and
113 * other negative error codes in case of other failures.
114 */
115static int reserve_space(struct ubifs_info *c, int jhead, int len)
116{
117 int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
118 struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
119
120 /*
121 * Typically, the base head has smaller nodes written to it, so it is
122 * better to try to allocate space at the ends of eraseblocks. This is
123 * what the squeeze parameter does.
124 */
125 squeeze = (jhead == BASEHD);
126again:
127 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
128
129 if (c->ro_media) {
130 err = -EROFS;
131 goto out_unlock;
132 }
133
134 avail = c->leb_size - wbuf->offs - wbuf->used;
135 if (wbuf->lnum != -1 && avail >= len)
136 return 0;
137
138 /*
139 * Write buffer wasn't seek'ed or there is no enough space - look for an
140 * LEB with some empty space.
141 */
142 lnum = ubifs_find_free_space(c, len, &free, squeeze);
143 if (lnum >= 0) {
144 /* Found an LEB, add it to the journal head */
145 offs = c->leb_size - free;
146 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
147 if (err)
148 goto out_return;
149 /* A new bud was successfully allocated and added to the log */
150 goto out;
151 }
152
153 err = lnum;
154 if (err != -ENOSPC)
155 goto out_unlock;
156
157 /*
158 * No free space, we have to run garbage collector to make
159 * some. But the write-buffer mutex has to be unlocked because
160 * GC also takes it.
161 */
162 dbg_jnl("no free space jhead %d, run GC", jhead);
163 mutex_unlock(&wbuf->io_mutex);
164
165 lnum = ubifs_garbage_collect(c, 0);
166 if (lnum < 0) {
167 err = lnum;
168 if (err != -ENOSPC)
169 return err;
170
171 /*
172 * GC could not make a free LEB. But someone else may
173 * have allocated new bud for this journal head,
174 * because we dropped @wbuf->io_mutex, so try once
175 * again.
176 */
177 dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead);
178 if (retries++ < 2) {
179 dbg_jnl("retry (%d)", retries);
180 goto again;
181 }
182
183 dbg_jnl("return -ENOSPC");
184 return err;
185 }
186
187 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
188 dbg_jnl("got LEB %d for jhead %d", lnum, jhead);
189 avail = c->leb_size - wbuf->offs - wbuf->used;
190
191 if (wbuf->lnum != -1 && avail >= len) {
192 /*
193 * Someone else has switched the journal head and we have
194 * enough space now. This happens when more then one process is
195 * trying to write to the same journal head at the same time.
196 */
197 dbg_jnl("return LEB %d back, already have LEB %d:%d",
198 lnum, wbuf->lnum, wbuf->offs + wbuf->used);
199 err = ubifs_return_leb(c, lnum);
200 if (err)
201 goto out_unlock;
202 return 0;
203 }
204
205 err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
206 if (err)
207 goto out_return;
208 offs = 0;
209
210out:
211 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM);
212 if (err)
213 goto out_unlock;
214
215 return 0;
216
217out_unlock:
218 mutex_unlock(&wbuf->io_mutex);
219 return err;
220
221out_return:
222 /* An error occurred and the LEB has to be returned to lprops */
223 ubifs_assert(err < 0);
224 err1 = ubifs_return_leb(c, lnum);
225 if (err1 && err == -EAGAIN)
226 /*
227 * Return original error code only if it is not %-EAGAIN,
228 * which is not really an error. Otherwise, return the error
229 * code of 'ubifs_return_leb()'.
230 */
231 err = err1;
232 mutex_unlock(&wbuf->io_mutex);
233 return err;
234}
235
236/**
237 * write_node - write node to a journal head.
238 * @c: UBIFS file-system description object
239 * @jhead: journal head
240 * @node: node to write
241 * @len: node length
242 * @lnum: LEB number written is returned here
243 * @offs: offset written is returned here
244 *
245 * This function writes a node to reserved space of journal head @jhead.
246 * Returns zero in case of success and a negative error code in case of
247 * failure.
248 */
249static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
250 int *lnum, int *offs)
251{
252 struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
253
254 ubifs_assert(jhead != GCHD);
255
256 *lnum = c->jheads[jhead].wbuf.lnum;
257 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
258
259 dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
260 ubifs_prepare_node(c, node, len, 0);
261
262 return ubifs_wbuf_write_nolock(wbuf, node, len);
263}
264
265/**
266 * write_head - write data to a journal head.
267 * @c: UBIFS file-system description object
268 * @jhead: journal head
269 * @buf: buffer to write
270 * @len: length to write
271 * @lnum: LEB number written is returned here
272 * @offs: offset written is returned here
273 * @sync: non-zero if the write-buffer has to by synchronized
274 *
275 * This function is the same as 'write_node()' but it does not assume the
276 * buffer it is writing is a node, so it does not prepare it (which means
277 * initializing common header and calculating CRC).
278 */
279static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
280 int *lnum, int *offs, int sync)
281{
282 int err;
283 struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
284
285 ubifs_assert(jhead != GCHD);
286
287 *lnum = c->jheads[jhead].wbuf.lnum;
288 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
289 dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
290
291 err = ubifs_wbuf_write_nolock(wbuf, buf, len);
292 if (err)
293 return err;
294 if (sync)
295 err = ubifs_wbuf_sync_nolock(wbuf);
296 return err;
297}
298
299/**
300 * make_reservation - reserve journal space.
301 * @c: UBIFS file-system description object
302 * @jhead: journal head
303 * @len: how many bytes to reserve
304 *
305 * This function makes space reservation in journal head @jhead. The function
306 * takes the commit lock and locks the journal head, and the caller has to
307 * unlock the head and finish the reservation with 'finish_reservation()'.
308 * Returns zero in case of success and a negative error code in case of
309 * failure.
310 *
311 * Note, the journal head may be unlocked as soon as the data is written, while
312 * the commit lock has to be released after the data has been added to the
313 * TNC.
314 */
315static int make_reservation(struct ubifs_info *c, int jhead, int len)
316{
317 int err, cmt_retries = 0, nospc_retries = 0;
318
319again:
320 down_read(&c->commit_sem);
321 err = reserve_space(c, jhead, len);
322 if (!err)
323 return 0;
324 up_read(&c->commit_sem);
325
326 if (err == -ENOSPC) {
327 /*
328 * GC could not make any progress. We should try to commit
329 * once because it could make some dirty space and GC would
330 * make progress, so make the error -EAGAIN so that the below
331 * will commit and re-try.
332 */
333 if (nospc_retries++ < 2) {
334 dbg_jnl("no space, retry");
335 err = -EAGAIN;
336 }
337
338 /*
339 * This means that the budgeting is incorrect. We always have
340 * to be able to write to the media, because all operations are
341 * budgeted. Deletions are not budgeted, though, but we reserve
342 * an extra LEB for them.
343 */
344 }
345
346 if (err != -EAGAIN)
347 goto out;
348
349 /*
350 * -EAGAIN means that the journal is full or too large, or the above
351 * code wants to do one commit. Do this and re-try.
352 */
353 if (cmt_retries > 128) {
354 /*
355 * This should not happen unless the journal size limitations
356 * are too tough.
357 */
358 ubifs_err("stuck in space allocation");
359 err = -ENOSPC;
360 goto out;
361 } else if (cmt_retries > 32)
362 ubifs_warn("too many space allocation re-tries (%d)",
363 cmt_retries);
364
365 dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
366 cmt_retries);
367 cmt_retries += 1;
368
369 err = ubifs_run_commit(c);
370 if (err)
371 return err;
372 goto again;
373
374out:
375 ubifs_err("cannot reserve %d bytes in jhead %d, error %d",
376 len, jhead, err);
377 if (err == -ENOSPC) {
378 /* This are some budgeting problems, print useful information */
379 down_write(&c->commit_sem);
380 spin_lock(&c->space_lock);
381 dbg_dump_stack();
382 dbg_dump_budg(c);
383 spin_unlock(&c->space_lock);
384 dbg_dump_lprops(c);
385 cmt_retries = dbg_check_lprops(c);
386 up_write(&c->commit_sem);
387 }
388 return err;
389}
390
391/**
392 * release_head - release a journal head.
393 * @c: UBIFS file-system description object
394 * @jhead: journal head
395 *
396 * This function releases journal head @jhead which was locked by
397 * the 'make_reservation()' function. It has to be called after each successful
398 * 'make_reservation()' invocation.
399 */
400static inline void release_head(struct ubifs_info *c, int jhead)
401{
402 mutex_unlock(&c->jheads[jhead].wbuf.io_mutex);
403}
404
405/**
406 * finish_reservation - finish a reservation.
407 * @c: UBIFS file-system description object
408 *
409 * This function finishes journal space reservation. It must be called after
410 * 'make_reservation()'.
411 */
412static void finish_reservation(struct ubifs_info *c)
413{
414 up_read(&c->commit_sem);
415}
416
417/**
418 * get_dent_type - translate VFS inode mode to UBIFS directory entry type.
419 * @mode: inode mode
420 */
421static int get_dent_type(int mode)
422{
423 switch (mode & S_IFMT) {
424 case S_IFREG:
425 return UBIFS_ITYPE_REG;
426 case S_IFDIR:
427 return UBIFS_ITYPE_DIR;
428 case S_IFLNK:
429 return UBIFS_ITYPE_LNK;
430 case S_IFBLK:
431 return UBIFS_ITYPE_BLK;
432 case S_IFCHR:
433 return UBIFS_ITYPE_CHR;
434 case S_IFIFO:
435 return UBIFS_ITYPE_FIFO;
436 case S_IFSOCK:
437 return UBIFS_ITYPE_SOCK;
438 default:
439 BUG();
440 }
441 return 0;
442}
443
444/**
445 * pack_inode - pack an inode node.
446 * @c: UBIFS file-system description object
447 * @ino: buffer in which to pack inode node
448 * @inode: inode to pack
449 * @last: indicates the last node of the group
450 * @last_reference: non-zero if this is a deletion inode
451 */
452static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
453 const struct inode *inode, int last,
454 int last_reference)
455{
456 int data_len = 0;
457 struct ubifs_inode *ui = ubifs_inode(inode);
458
459 ino->ch.node_type = UBIFS_INO_NODE;
460 ino_key_init_flash(c, &ino->key, inode->i_ino);
461 ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum);
462 ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec);
463 ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
464 ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec);
465 ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
466 ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec);
467 ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
468 ino->uid = cpu_to_le32(inode->i_uid);
469 ino->gid = cpu_to_le32(inode->i_gid);
470 ino->mode = cpu_to_le32(inode->i_mode);
471 ino->flags = cpu_to_le32(ui->flags);
472 ino->size = cpu_to_le64(ui->ui_size);
473 ino->nlink = cpu_to_le32(inode->i_nlink);
474 ino->compr_type = cpu_to_le16(ui->compr_type);
475 ino->data_len = cpu_to_le32(ui->data_len);
476 ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt);
477 ino->xattr_size = cpu_to_le32(ui->xattr_size);
478 ino->xattr_names = cpu_to_le32(ui->xattr_names);
479 zero_ino_node_unused(ino);
480
481 /*
482 * Drop the attached data if this is a deletion inode, the data is not
483 * needed anymore.
484 */
485 if (!last_reference) {
486 memcpy(ino->data, ui->data, ui->data_len);
487 data_len = ui->data_len;
488 }
489
490 ubifs_prep_grp_node(c, ino, UBIFS_INO_NODE_SZ + data_len, last);
491}
492
493/**
494 * mark_inode_clean - mark UBIFS inode as clean.
495 * @c: UBIFS file-system description object
496 * @ui: UBIFS inode to mark as clean
497 *
498 * This helper function marks UBIFS inode @ui as clean by cleaning the
499 * @ui->dirty flag and releasing its budget. Note, VFS may still treat the
500 * inode as dirty and try to write it back, but 'ubifs_write_inode()' would
501 * just do nothing.
502 */
503static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui)
504{
505 if (ui->dirty)
506 ubifs_release_dirty_inode_budget(c, ui);
507 ui->dirty = 0;
508}
509
510/**
511 * ubifs_jnl_update - update inode.
512 * @c: UBIFS file-system description object
513 * @dir: parent inode or host inode in case of extended attributes
514 * @nm: directory entry name
515 * @inode: inode to update
516 * @deletion: indicates a directory entry deletion i.e unlink or rmdir
517 * @xent: non-zero if the directory entry is an extended attribute entry
518 *
519 * This function updates an inode by writing a directory entry (or extended
520 * attribute entry), the inode itself, and the parent directory inode (or the
521 * host inode) to the journal.
522 *
523 * The function writes the host inode @dir last, which is important in case of
524 * extended attributes. Indeed, then we guarantee that if the host inode gets
525 * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed,
526 * the extended attribute inode gets flushed too. And this is exactly what the
527 * user expects - synchronizing the host inode synchronizes its extended
528 * attributes. Similarly, this guarantees that if @dir is synchronized, its
529 * directory entry corresponding to @nm gets synchronized too.
530 *
531 * If the inode (@inode) or the parent directory (@dir) are synchronous, this
532 * function synchronizes the write-buffer.
533 *
534 * This function marks the @dir and @inode inodes as clean and returns zero on
535 * success. In case of failure, a negative error code is returned.
536 */
537int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
538 const struct qstr *nm, const struct inode *inode,
539 int deletion, int xent)
540{
541 int err, dlen, ilen, len, lnum, ino_offs, dent_offs;
542 int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir);
543 int last_reference = !!(deletion && inode->i_nlink == 0);
544 struct ubifs_inode *ui = ubifs_inode(inode);
545 struct ubifs_inode *dir_ui = ubifs_inode(dir);
546 struct ubifs_dent_node *dent;
547 struct ubifs_ino_node *ino;
548 union ubifs_key dent_key, ino_key;
549
550 dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu",
551 inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino);
552 ubifs_assert(dir_ui->data_len == 0);
553 ubifs_assert(mutex_is_locked(&dir_ui->ui_mutex));
554
555 dlen = UBIFS_DENT_NODE_SZ + nm->len + 1;
556 ilen = UBIFS_INO_NODE_SZ;
557
558 /*
559 * If the last reference to the inode is being deleted, then there is
560 * no need to attach and write inode data, it is being deleted anyway.
561 * And if the inode is being deleted, no need to synchronize
562 * write-buffer even if the inode is synchronous.
563 */
564 if (!last_reference) {
565 ilen += ui->data_len;
566 sync |= IS_SYNC(inode);
567 }
568
569 aligned_dlen = ALIGN(dlen, 8);
570 aligned_ilen = ALIGN(ilen, 8);
571 len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ;
572 dent = kmalloc(len, GFP_NOFS);
573 if (!dent)
574 return -ENOMEM;
575
576 /* Make reservation before allocating sequence numbers */
577 err = make_reservation(c, BASEHD, len);
578 if (err)
579 goto out_free;
580
581 if (!xent) {
582 dent->ch.node_type = UBIFS_DENT_NODE;
583 dent_key_init(c, &dent_key, dir->i_ino, nm);
584 } else {
585 dent->ch.node_type = UBIFS_XENT_NODE;
586 xent_key_init(c, &dent_key, dir->i_ino, nm);
587 }
588
589 key_write(c, &dent_key, dent->key);
590 dent->inum = deletion ? 0 : cpu_to_le64(inode->i_ino);
591 dent->type = get_dent_type(inode->i_mode);
592 dent->nlen = cpu_to_le16(nm->len);
593 memcpy(dent->name, nm->name, nm->len);
594 dent->name[nm->len] = '\0';
595 zero_dent_node_unused(dent);
596 ubifs_prep_grp_node(c, dent, dlen, 0);
597
598 ino = (void *)dent + aligned_dlen;
599 pack_inode(c, ino, inode, 0, last_reference);
600 ino = (void *)ino + aligned_ilen;
601 pack_inode(c, ino, dir, 1, 0);
602
603 if (last_reference) {
604 err = ubifs_add_orphan(c, inode->i_ino);
605 if (err) {
606 release_head(c, BASEHD);
607 goto out_finish;
608 }
609 }
610
611 err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
612 if (err)
613 goto out_release;
614 if (!sync) {
615 struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
616
617 ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino);
618 ubifs_wbuf_add_ino_nolock(wbuf, dir->i_ino);
619 }
620 release_head(c, BASEHD);
621 kfree(dent);
622
623 if (deletion) {
624 err = ubifs_tnc_remove_nm(c, &dent_key, nm);
625 if (err)
626 goto out_ro;
627 err = ubifs_add_dirt(c, lnum, dlen);
628 } else
629 err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm);
630 if (err)
631 goto out_ro;
632
633 /*
634 * Note, we do not remove the inode from TNC even if the last reference
635 * to it has just been deleted, because the inode may still be opened.
636 * Instead, the inode has been added to orphan lists and the orphan
637 * subsystem will take further care about it.
638 */
639 ino_key_init(c, &ino_key, inode->i_ino);
640 ino_offs = dent_offs + aligned_dlen;
641 err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen);
642 if (err)
643 goto out_ro;
644
645 ino_key_init(c, &ino_key, dir->i_ino);
646 ino_offs += aligned_ilen;
647 err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ);
648 if (err)
649 goto out_ro;
650
651 finish_reservation(c);
652 spin_lock(&ui->ui_lock);
653 ui->synced_i_size = ui->ui_size;
654 spin_unlock(&ui->ui_lock);
655 mark_inode_clean(c, ui);
656 mark_inode_clean(c, dir_ui);
657 return 0;
658
659out_finish:
660 finish_reservation(c);
661out_free:
662 kfree(dent);
663 return err;
664
665out_release:
666 release_head(c, BASEHD);
667out_ro:
668 ubifs_ro_mode(c, err);
669 if (last_reference)
670 ubifs_delete_orphan(c, inode->i_ino);
671 finish_reservation(c);
672 return err;
673}
674
675/**
676 * ubifs_jnl_write_data - write a data node to the journal.
677 * @c: UBIFS file-system description object
678 * @inode: inode the data node belongs to
679 * @key: node key
680 * @buf: buffer to write
681 * @len: data length (must not exceed %UBIFS_BLOCK_SIZE)
682 *
683 * This function writes a data node to the journal. Returns %0 if the data node
684 * was successfully written, and a negative error code in case of failure.
685 */
686int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
687 const union ubifs_key *key, const void *buf, int len)
688{
689 struct ubifs_data_node *data;
690 int err, lnum, offs, compr_type, out_len;
691 int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
692 struct ubifs_inode *ui = ubifs_inode(inode);
693
694 dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key),
695 key_block(c, key), len, DBGKEY(key));
696 ubifs_assert(len <= UBIFS_BLOCK_SIZE);
697
698 data = kmalloc(dlen, GFP_NOFS);
699 if (!data)
700 return -ENOMEM;
701
702 data->ch.node_type = UBIFS_DATA_NODE;
703 key_write(c, key, &data->key);
704 data->size = cpu_to_le32(len);
705 zero_data_node_unused(data);
706
707 if (!(ui->flags && UBIFS_COMPR_FL))
708 /* Compression is disabled for this inode */
709 compr_type = UBIFS_COMPR_NONE;
710 else
711 compr_type = ui->compr_type;
712
713 out_len = dlen - UBIFS_DATA_NODE_SZ;
714 ubifs_compress(buf, len, &data->data, &out_len, &compr_type);
715 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
716
717 dlen = UBIFS_DATA_NODE_SZ + out_len;
718 data->compr_type = cpu_to_le16(compr_type);
719
720 /* Make reservation before allocating sequence numbers */
721 err = make_reservation(c, DATAHD, dlen);
722 if (err)
723 goto out_free;
724
725 err = write_node(c, DATAHD, data, dlen, &lnum, &offs);
726 if (err)
727 goto out_release;
728 ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key));
729 release_head(c, DATAHD);
730
731 err = ubifs_tnc_add(c, key, lnum, offs, dlen);
732 if (err)
733 goto out_ro;
734
735 finish_reservation(c);
736 kfree(data);
737 return 0;
738
739out_release:
740 release_head(c, DATAHD);
741out_ro:
742 ubifs_ro_mode(c, err);
743 finish_reservation(c);
744out_free:
745 kfree(data);
746 return err;
747}
748
749/**
750 * ubifs_jnl_write_inode - flush inode to the journal.
751 * @c: UBIFS file-system description object
752 * @inode: inode to flush
753 * @deletion: inode has been deleted
754 *
755 * This function writes inode @inode to the journal. If the inode is
756 * synchronous, it also synchronizes the write-buffer. Returns zero in case of
757 * success and a negative error code in case of failure.
758 */
759int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
760 int deletion)
761{
762 int err, len, lnum, offs, sync = 0;
763 struct ubifs_ino_node *ino;
764 struct ubifs_inode *ui = ubifs_inode(inode);
765
766 dbg_jnl("ino %lu%s", inode->i_ino,
767 deletion ? " (last reference)" : "");
768 if (deletion)
769 ubifs_assert(inode->i_nlink == 0);
770
771 len = UBIFS_INO_NODE_SZ;
772 /*
773 * If the inode is being deleted, do not write the attached data. No
774 * need to synchronize the write-buffer either.
775 */
776 if (!deletion) {
777 len += ui->data_len;
778 sync = IS_SYNC(inode);
779 }
780 ino = kmalloc(len, GFP_NOFS);
781 if (!ino)
782 return -ENOMEM;
783
784 /* Make reservation before allocating sequence numbers */
785 err = make_reservation(c, BASEHD, len);
786 if (err)
787 goto out_free;
788
789 pack_inode(c, ino, inode, 1, deletion);
790 err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
791 if (err)
792 goto out_release;
793 if (!sync)
794 ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
795 inode->i_ino);
796 release_head(c, BASEHD);
797
798 if (deletion) {
799 err = ubifs_tnc_remove_ino(c, inode->i_ino);
800 if (err)
801 goto out_ro;
802 ubifs_delete_orphan(c, inode->i_ino);
803 err = ubifs_add_dirt(c, lnum, len);
804 } else {
805 union ubifs_key key;
806
807 ino_key_init(c, &key, inode->i_ino);
808 err = ubifs_tnc_add(c, &key, lnum, offs, len);
809 }
810 if (err)
811 goto out_ro;
812
813 finish_reservation(c);
814 spin_lock(&ui->ui_lock);
815 ui->synced_i_size = ui->ui_size;
816 spin_unlock(&ui->ui_lock);
817 kfree(ino);
818 return 0;
819
820out_release:
821 release_head(c, BASEHD);
822out_ro:
823 ubifs_ro_mode(c, err);
824 finish_reservation(c);
825out_free:
826 kfree(ino);
827 return err;
828}
829
830/**
831 * ubifs_jnl_rename - rename a directory entry.
832 * @c: UBIFS file-system description object
833 * @old_dir: parent inode of directory entry to rename
834 * @old_dentry: directory entry to rename
835 * @new_dir: parent inode of directory entry to rename
836 * @new_dentry: new directory entry (or directory entry to replace)
837 * @sync: non-zero if the write-buffer has to be synchronized
838 *
839 * This function implements the re-name operation which may involve writing up
840 * to 3 inodes and 2 directory entries. It marks the written inodes as clean
841 * and returns zero on success. In case of failure, a negative error code is
842 * returned.
843 */
844int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
845 const struct dentry *old_dentry,
846 const struct inode *new_dir,
847 const struct dentry *new_dentry, int sync)
848{
849 void *p;
850 union ubifs_key key;
851 struct ubifs_dent_node *dent, *dent2;
852 int err, dlen1, dlen2, ilen, lnum, offs, len;
853 const struct inode *old_inode = old_dentry->d_inode;
854 const struct inode *new_inode = new_dentry->d_inode;
855 int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ;
856 int last_reference = !!(new_inode && new_inode->i_nlink == 0);
857 int move = (old_dir != new_dir);
858 struct ubifs_inode *uninitialized_var(new_ui);
859
860 dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu",
861 old_dentry->d_name.len, old_dentry->d_name.name,
862 old_dir->i_ino, new_dentry->d_name.len,
863 new_dentry->d_name.name, new_dir->i_ino);
864 ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
865 ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
866 ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
867 ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex));
868
869 dlen1 = UBIFS_DENT_NODE_SZ + new_dentry->d_name.len + 1;
870 dlen2 = UBIFS_DENT_NODE_SZ + old_dentry->d_name.len + 1;
871 if (new_inode) {
872 new_ui = ubifs_inode(new_inode);
873 ubifs_assert(mutex_is_locked(&new_ui->ui_mutex));
874 ilen = UBIFS_INO_NODE_SZ;
875 if (!last_reference)
876 ilen += new_ui->data_len;
877 } else
878 ilen = 0;
879
880 aligned_dlen1 = ALIGN(dlen1, 8);
881 aligned_dlen2 = ALIGN(dlen2, 8);
882 len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
883 if (old_dir != new_dir)
884 len += plen;
885 dent = kmalloc(len, GFP_NOFS);
886 if (!dent)
887 return -ENOMEM;
888
889 /* Make reservation before allocating sequence numbers */
890 err = make_reservation(c, BASEHD, len);
891 if (err)
892 goto out_free;
893
894 /* Make new dent */
895 dent->ch.node_type = UBIFS_DENT_NODE;
896 dent_key_init_flash(c, &dent->key, new_dir->i_ino, &new_dentry->d_name);
897 dent->inum = cpu_to_le64(old_inode->i_ino);
898 dent->type = get_dent_type(old_inode->i_mode);
899 dent->nlen = cpu_to_le16(new_dentry->d_name.len);
900 memcpy(dent->name, new_dentry->d_name.name, new_dentry->d_name.len);
901 dent->name[new_dentry->d_name.len] = '\0';
902 zero_dent_node_unused(dent);
903 ubifs_prep_grp_node(c, dent, dlen1, 0);
904
905 /* Make deletion dent */
906 dent2 = (void *)dent + aligned_dlen1;
907 dent2->ch.node_type = UBIFS_DENT_NODE;
908 dent_key_init_flash(c, &dent2->key, old_dir->i_ino,
909 &old_dentry->d_name);
910 dent2->inum = 0;
911 dent2->type = DT_UNKNOWN;
912 dent2->nlen = cpu_to_le16(old_dentry->d_name.len);
913 memcpy(dent2->name, old_dentry->d_name.name, old_dentry->d_name.len);
914 dent2->name[old_dentry->d_name.len] = '\0';
915 zero_dent_node_unused(dent2);
916 ubifs_prep_grp_node(c, dent2, dlen2, 0);
917
918 p = (void *)dent2 + aligned_dlen2;
919 if (new_inode) {
920 pack_inode(c, p, new_inode, 0, last_reference);
921 p += ALIGN(ilen, 8);
922 }
923
924 if (!move)
925 pack_inode(c, p, old_dir, 1, 0);
926 else {
927 pack_inode(c, p, old_dir, 0, 0);
928 p += ALIGN(plen, 8);
929 pack_inode(c, p, new_dir, 1, 0);
930 }
931
932 if (last_reference) {
933 err = ubifs_add_orphan(c, new_inode->i_ino);
934 if (err) {
935 release_head(c, BASEHD);
936 goto out_finish;
937 }
938 }
939
940 err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
941 if (err)
942 goto out_release;
943 if (!sync) {
944 struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
945
946 ubifs_wbuf_add_ino_nolock(wbuf, new_dir->i_ino);
947 ubifs_wbuf_add_ino_nolock(wbuf, old_dir->i_ino);
948 if (new_inode)
949 ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
950 new_inode->i_ino);
951 }
952 release_head(c, BASEHD);
953
954 dent_key_init(c, &key, new_dir->i_ino, &new_dentry->d_name);
955 err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, &new_dentry->d_name);
956 if (err)
957 goto out_ro;
958
959 err = ubifs_add_dirt(c, lnum, dlen2);
960 if (err)
961 goto out_ro;
962
963 dent_key_init(c, &key, old_dir->i_ino, &old_dentry->d_name);
964 err = ubifs_tnc_remove_nm(c, &key, &old_dentry->d_name);
965 if (err)
966 goto out_ro;
967
968 offs += aligned_dlen1 + aligned_dlen2;
969 if (new_inode) {
970 ino_key_init(c, &key, new_inode->i_ino);
971 err = ubifs_tnc_add(c, &key, lnum, offs, ilen);
972 if (err)
973 goto out_ro;
974 offs += ALIGN(ilen, 8);
975 }
976
977 ino_key_init(c, &key, old_dir->i_ino);
978 err = ubifs_tnc_add(c, &key, lnum, offs, plen);
979 if (err)
980 goto out_ro;
981
982 if (old_dir != new_dir) {
983 offs += ALIGN(plen, 8);
984 ino_key_init(c, &key, new_dir->i_ino);
985 err = ubifs_tnc_add(c, &key, lnum, offs, plen);
986 if (err)
987 goto out_ro;
988 }
989
990 finish_reservation(c);
991 if (new_inode) {
992 mark_inode_clean(c, new_ui);
993 spin_lock(&new_ui->ui_lock);
994 new_ui->synced_i_size = new_ui->ui_size;
995 spin_unlock(&new_ui->ui_lock);
996 }
997 mark_inode_clean(c, ubifs_inode(old_dir));
998 if (move)
999 mark_inode_clean(c, ubifs_inode(new_dir));
1000 kfree(dent);
1001 return 0;
1002
1003out_release:
1004 release_head(c, BASEHD);
1005out_ro:
1006 ubifs_ro_mode(c, err);
1007 if (last_reference)
1008 ubifs_delete_orphan(c, new_inode->i_ino);
1009out_finish:
1010 finish_reservation(c);
1011out_free:
1012 kfree(dent);
1013 return err;
1014}
1015
1016/**
1017 * recomp_data_node - re-compress a truncated data node.
1018 * @dn: data node to re-compress
1019 * @new_len: new length
1020 *
1021 * This function is used when an inode is truncated and the last data node of
1022 * the inode has to be re-compressed and re-written.
1023 */
1024static int recomp_data_node(struct ubifs_data_node *dn, int *new_len)
1025{
1026 void *buf;
1027 int err, len, compr_type, out_len;
1028
1029 out_len = le32_to_cpu(dn->size);
1030 buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS);
1031 if (!buf)
1032 return -ENOMEM;
1033
1034 len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
1035 compr_type = le16_to_cpu(dn->compr_type);
1036 err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type);
1037 if (err)
1038 goto out;
1039
1040 ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type);
1041 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
1042 dn->compr_type = cpu_to_le16(compr_type);
1043 dn->size = cpu_to_le32(*new_len);
1044 *new_len = UBIFS_DATA_NODE_SZ + out_len;
1045out:
1046 kfree(buf);
1047 return err;
1048}
1049
1050/**
1051 * ubifs_jnl_truncate - update the journal for a truncation.
1052 * @c: UBIFS file-system description object
1053 * @inode: inode to truncate
1054 * @old_size: old size
1055 * @new_size: new size
1056 *
1057 * When the size of a file decreases due to truncation, a truncation node is
1058 * written, the journal tree is updated, and the last data block is re-written
1059 * if it has been affected. The inode is also updated in order to synchronize
1060 * the new inode size.
1061 *
1062 * This function marks the inode as clean and returns zero on success. In case
1063 * of failure, a negative error code is returned.
1064 */
1065int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1066 loff_t old_size, loff_t new_size)
1067{
1068 union ubifs_key key, to_key;
1069 struct ubifs_ino_node *ino;
1070 struct ubifs_trun_node *trun;
1071 struct ubifs_data_node *uninitialized_var(dn);
1072 int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode);
1073 struct ubifs_inode *ui = ubifs_inode(inode);
1074 ino_t inum = inode->i_ino;
1075 unsigned int blk;
1076
1077 dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size);
1078 ubifs_assert(!ui->data_len);
1079 ubifs_assert(S_ISREG(inode->i_mode));
1080 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
1081
1082 sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
1083 UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR;
1084 ino = kmalloc(sz, GFP_NOFS);
1085 if (!ino)
1086 return -ENOMEM;
1087
1088 trun = (void *)ino + UBIFS_INO_NODE_SZ;
1089 trun->ch.node_type = UBIFS_TRUN_NODE;
1090 trun->inum = cpu_to_le32(inum);
1091 trun->old_size = cpu_to_le64(old_size);
1092 trun->new_size = cpu_to_le64(new_size);
1093 zero_trun_node_unused(trun);
1094
1095 dlen = new_size & (UBIFS_BLOCK_SIZE - 1);
1096 if (dlen) {
1097 /* Get last data block so it can be truncated */
1098 dn = (void *)trun + UBIFS_TRUN_NODE_SZ;
1099 blk = new_size >> UBIFS_BLOCK_SHIFT;
1100 data_key_init(c, &key, inum, blk);
1101 dbg_jnl("last block key %s", DBGKEY(&key));
1102 err = ubifs_tnc_lookup(c, &key, dn);
1103 if (err == -ENOENT)
1104 dlen = 0; /* Not found (so it is a hole) */
1105 else if (err)
1106 goto out_free;
1107 else {
1108 if (le32_to_cpu(dn->size) <= dlen)
1109 dlen = 0; /* Nothing to do */
1110 else {
1111 int compr_type = le16_to_cpu(dn->compr_type);
1112
1113 if (compr_type != UBIFS_COMPR_NONE) {
1114 err = recomp_data_node(dn, &dlen);
1115 if (err)
1116 goto out_free;
1117 } else {
1118 dn->size = cpu_to_le32(dlen);
1119 dlen += UBIFS_DATA_NODE_SZ;
1120 }
1121 zero_data_node_unused(dn);
1122 }
1123 }
1124 }
1125
1126 /* Must make reservation before allocating sequence numbers */
1127 len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ;
1128 if (dlen)
1129 len += dlen;
1130 err = make_reservation(c, BASEHD, len);
1131 if (err)
1132 goto out_free;
1133
1134 pack_inode(c, ino, inode, 0, 0);
1135 ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
1136 if (dlen)
1137 ubifs_prep_grp_node(c, dn, dlen, 1);
1138
1139 err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
1140 if (err)
1141 goto out_release;
1142 if (!sync)
1143 ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum);
1144 release_head(c, BASEHD);
1145
1146 if (dlen) {
1147 sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ;
1148 err = ubifs_tnc_add(c, &key, lnum, sz, dlen);
1149 if (err)
1150 goto out_ro;
1151 }
1152
1153 ino_key_init(c, &key, inum);
1154 err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ);
1155 if (err)
1156 goto out_ro;
1157
1158 err = ubifs_add_dirt(c, lnum, UBIFS_TRUN_NODE_SZ);
1159 if (err)
1160 goto out_ro;
1161
1162 bit = new_size & (UBIFS_BLOCK_SIZE - 1);
1163 blk = (new_size >> UBIFS_BLOCK_SHIFT) + (bit ? 1 : 0);
1164 data_key_init(c, &key, inum, blk);
1165
1166 bit = old_size & (UBIFS_BLOCK_SIZE - 1);
1167 blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1);
1168 data_key_init(c, &to_key, inum, blk);
1169
1170 err = ubifs_tnc_remove_range(c, &key, &to_key);
1171 if (err)
1172 goto out_ro;
1173
1174 finish_reservation(c);
1175 spin_lock(&ui->ui_lock);
1176 ui->synced_i_size = ui->ui_size;
1177 spin_unlock(&ui->ui_lock);
1178 mark_inode_clean(c, ui);
1179 kfree(ino);
1180 return 0;
1181
1182out_release:
1183 release_head(c, BASEHD);
1184out_ro:
1185 ubifs_ro_mode(c, err);
1186 finish_reservation(c);
1187out_free:
1188 kfree(ino);
1189 return err;
1190}
1191
1192#ifdef CONFIG_UBIFS_FS_XATTR
1193
1194/**
1195 * ubifs_jnl_delete_xattr - delete an extended attribute.
1196 * @c: UBIFS file-system description object
1197 * @host: host inode
1198 * @inode: extended attribute inode
1199 * @nm: extended attribute entry name
1200 *
1201 * This function delete an extended attribute which is very similar to
1202 * un-linking regular files - it writes a deletion xentry, a deletion inode and
1203 * updates the target inode. Returns zero in case of success and a negative
1204 * error code in case of failure.
1205 */
1206int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
1207 const struct inode *inode, const struct qstr *nm)
1208{
1209 int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen;
1210 struct ubifs_dent_node *xent;
1211 struct ubifs_ino_node *ino;
1212 union ubifs_key xent_key, key1, key2;
1213 int sync = IS_DIRSYNC(host);
1214 struct ubifs_inode *host_ui = ubifs_inode(host);
1215
1216 dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d",
1217 host->i_ino, inode->i_ino, nm->name,
1218 ubifs_inode(inode)->data_len);
1219 ubifs_assert(inode->i_nlink == 0);
1220 ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
1221
1222 /*
1223 * Since we are deleting the inode, we do not bother to attach any data
1224 * to it and assume its length is %UBIFS_INO_NODE_SZ.
1225 */
1226 xlen = UBIFS_DENT_NODE_SZ + nm->len + 1;
1227 aligned_xlen = ALIGN(xlen, 8);
1228 hlen = host_ui->data_len + UBIFS_INO_NODE_SZ;
1229 len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8);
1230
1231 xent = kmalloc(len, GFP_NOFS);
1232 if (!xent)
1233 return -ENOMEM;
1234
1235 /* Make reservation before allocating sequence numbers */
1236 err = make_reservation(c, BASEHD, len);
1237 if (err) {
1238 kfree(xent);
1239 return err;
1240 }
1241
1242 xent->ch.node_type = UBIFS_XENT_NODE;
1243 xent_key_init(c, &xent_key, host->i_ino, nm);
1244 key_write(c, &xent_key, xent->key);
1245 xent->inum = 0;
1246 xent->type = get_dent_type(inode->i_mode);
1247 xent->nlen = cpu_to_le16(nm->len);
1248 memcpy(xent->name, nm->name, nm->len);
1249 xent->name[nm->len] = '\0';
1250 zero_dent_node_unused(xent);
1251 ubifs_prep_grp_node(c, xent, xlen, 0);
1252
1253 ino = (void *)xent + aligned_xlen;
1254 pack_inode(c, ino, inode, 0, 1);
1255 ino = (void *)ino + UBIFS_INO_NODE_SZ;
1256 pack_inode(c, ino, host, 1, 0);
1257
1258 err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
1259 if (!sync && !err)
1260 ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino);
1261 release_head(c, BASEHD);
1262 kfree(xent);
1263 if (err)
1264 goto out_ro;
1265
1266 /* Remove the extended attribute entry from TNC */
1267 err = ubifs_tnc_remove_nm(c, &xent_key, nm);
1268 if (err)
1269 goto out_ro;
1270 err = ubifs_add_dirt(c, lnum, xlen);
1271 if (err)
1272 goto out_ro;
1273
1274 /*
1275 * Remove all nodes belonging to the extended attribute inode from TNC.
1276 * Well, there actually must be only one node - the inode itself.
1277 */
1278 lowest_ino_key(c, &key1, inode->i_ino);
1279 highest_ino_key(c, &key2, inode->i_ino);
1280 err = ubifs_tnc_remove_range(c, &key1, &key2);
1281 if (err)
1282 goto out_ro;
1283 err = ubifs_add_dirt(c, lnum, UBIFS_INO_NODE_SZ);
1284 if (err)
1285 goto out_ro;
1286
1287 /* And update TNC with the new host inode position */
1288 ino_key_init(c, &key1, host->i_ino);
1289 err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen);
1290 if (err)
1291 goto out_ro;
1292
1293 finish_reservation(c);
1294 spin_lock(&host_ui->ui_lock);
1295 host_ui->synced_i_size = host_ui->ui_size;
1296 spin_unlock(&host_ui->ui_lock);
1297 mark_inode_clean(c, host_ui);
1298 return 0;
1299
1300out_ro:
1301 ubifs_ro_mode(c, err);
1302 finish_reservation(c);
1303 return err;
1304}
1305
1306/**
1307 * ubifs_jnl_change_xattr - change an extended attribute.
1308 * @c: UBIFS file-system description object
1309 * @inode: extended attribute inode
1310 * @host: host inode
1311 *
1312 * This function writes the updated version of an extended attribute inode and
1313 * the host inode tho the journal (to the base head). The host inode is written
1314 * after the extended attribute inode in order to guarantee that the extended
1315 * attribute will be flushed when the inode is synchronized by 'fsync()' and
1316 * consequently, the write-buffer is synchronized. This function returns zero
1317 * in case of success and a negative error code in case of failure.
1318 */
1319int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
1320 const struct inode *host)
1321{
1322 int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
1323 struct ubifs_inode *host_ui = ubifs_inode(inode);
1324 struct ubifs_ino_node *ino;
1325 union ubifs_key key;
1326 int sync = IS_DIRSYNC(host);
1327
1328 dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino);
1329 ubifs_assert(host->i_nlink > 0);
1330 ubifs_assert(inode->i_nlink > 0);
1331 ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
1332
1333 len1 = UBIFS_INO_NODE_SZ + host_ui->data_len;
1334 len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len;
1335 aligned_len1 = ALIGN(len1, 8);
1336 aligned_len = aligned_len1 + ALIGN(len2, 8);
1337
1338 ino = kmalloc(aligned_len, GFP_NOFS);
1339 if (!ino)
1340 return -ENOMEM;
1341
1342 /* Make reservation before allocating sequence numbers */
1343 err = make_reservation(c, BASEHD, aligned_len);
1344 if (err)
1345 goto out_free;
1346
1347 pack_inode(c, ino, host, 0, 0);
1348 pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0);
1349
1350 err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
1351 if (!sync && !err) {
1352 struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
1353
1354 ubifs_wbuf_add_ino_nolock(wbuf, host->i_ino);
1355 ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino);
1356 }
1357 release_head(c, BASEHD);
1358 if (err)
1359 goto out_ro;
1360
1361 ino_key_init(c, &key, host->i_ino);
1362 err = ubifs_tnc_add(c, &key, lnum, offs, len1);
1363 if (err)
1364 goto out_ro;
1365
1366 ino_key_init(c, &key, inode->i_ino);
1367 err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2);
1368 if (err)
1369 goto out_ro;
1370
1371 finish_reservation(c);
1372 spin_lock(&host_ui->ui_lock);
1373 host_ui->synced_i_size = host_ui->ui_size;
1374 spin_unlock(&host_ui->ui_lock);
1375 mark_inode_clean(c, host_ui);
1376 kfree(ino);
1377 return 0;
1378
1379out_ro:
1380 ubifs_ro_mode(c, err);
1381 finish_reservation(c);
1382out_free:
1383 kfree(ino);
1384 return err;
1385}
1386
1387#endif /* CONFIG_UBIFS_FS_XATTR */
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
new file mode 100644
index 000000000000..8f7476007549
--- /dev/null
+++ b/fs/ubifs/key.h
@@ -0,0 +1,533 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This header contains various key-related definitions and helper function.
25 * UBIFS allows several key schemes, so we access key fields only via these
26 * helpers. At the moment only one key scheme is supported.
27 *
28 * Simple key scheme
29 * ~~~~~~~~~~~~~~~~~
30 *
31 * Keys are 64-bits long. First 32-bits are inode number (parent inode number
32 * in case of direntry key). Next 3 bits are node type. The last 29 bits are
33 * 4KiB offset in case of inode node, and direntry hash in case of a direntry
34 * node. We use "r5" hash borrowed from reiserfs.
35 */
36
37#ifndef __UBIFS_KEY_H__
38#define __UBIFS_KEY_H__
39
40/**
41 * key_r5_hash - R5 hash function (borrowed from reiserfs).
42 * @s: direntry name
43 * @len: name length
44 */
45static inline uint32_t key_r5_hash(const char *s, int len)
46{
47 uint32_t a = 0;
48 const signed char *str = (const signed char *)s;
49
50 while (*str) {
51 a += *str << 4;
52 a += *str >> 4;
53 a *= 11;
54 str++;
55 }
56
57 a &= UBIFS_S_KEY_HASH_MASK;
58
59 /*
60 * We use hash values as offset in directories, so values %0 and %1 are
61 * reserved for "." and "..". %2 is reserved for "end of readdir"
62 * marker.
63 */
64 if (unlikely(a >= 0 && a <= 2))
65 a += 3;
66 return a;
67}
68
69/**
70 * key_test_hash - testing hash function.
71 * @str: direntry name
72 * @len: name length
73 */
74static inline uint32_t key_test_hash(const char *str, int len)
75{
76 uint32_t a = 0;
77
78 len = min_t(uint32_t, len, 4);
79 memcpy(&a, str, len);
80 a &= UBIFS_S_KEY_HASH_MASK;
81 if (unlikely(a >= 0 && a <= 2))
82 a += 3;
83 return a;
84}
85
86/**
87 * ino_key_init - initialize inode key.
88 * @c: UBIFS file-system description object
89 * @key: key to initialize
90 * @inum: inode number
91 */
92static inline void ino_key_init(const struct ubifs_info *c,
93 union ubifs_key *key, ino_t inum)
94{
95 key->u32[0] = inum;
96 key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS;
97}
98
99/**
100 * ino_key_init_flash - initialize on-flash inode key.
101 * @c: UBIFS file-system description object
102 * @k: key to initialize
103 * @inum: inode number
104 */
105static inline void ino_key_init_flash(const struct ubifs_info *c, void *k,
106 ino_t inum)
107{
108 union ubifs_key *key = k;
109
110 key->j32[0] = cpu_to_le32(inum);
111 key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS);
112 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
113}
114
115/**
116 * lowest_ino_key - get the lowest possible inode key.
117 * @c: UBIFS file-system description object
118 * @key: key to initialize
119 * @inum: inode number
120 */
121static inline void lowest_ino_key(const struct ubifs_info *c,
122 union ubifs_key *key, ino_t inum)
123{
124 key->u32[0] = inum;
125 key->u32[1] = 0;
126}
127
128/**
129 * highest_ino_key - get the highest possible inode key.
130 * @c: UBIFS file-system description object
131 * @key: key to initialize
132 * @inum: inode number
133 */
134static inline void highest_ino_key(const struct ubifs_info *c,
135 union ubifs_key *key, ino_t inum)
136{
137 key->u32[0] = inum;
138 key->u32[1] = 0xffffffff;
139}
140
141/**
142 * dent_key_init - initialize directory entry key.
143 * @c: UBIFS file-system description object
144 * @key: key to initialize
145 * @inum: parent inode number
146 * @nm: direntry name and length
147 */
148static inline void dent_key_init(const struct ubifs_info *c,
149 union ubifs_key *key, ino_t inum,
150 const struct qstr *nm)
151{
152 uint32_t hash = c->key_hash(nm->name, nm->len);
153
154 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
155 key->u32[0] = inum;
156 key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
157}
158
159/**
160 * dent_key_init_hash - initialize directory entry key without re-calculating
161 * hash function.
162 * @c: UBIFS file-system description object
163 * @key: key to initialize
164 * @inum: parent inode number
165 * @hash: direntry name hash
166 */
167static inline void dent_key_init_hash(const struct ubifs_info *c,
168 union ubifs_key *key, ino_t inum,
169 uint32_t hash)
170{
171 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
172 key->u32[0] = inum;
173 key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
174}
175
176/**
177 * dent_key_init_flash - initialize on-flash directory entry key.
178 * @c: UBIFS file-system description object
179 * @k: key to initialize
180 * @inum: parent inode number
181 * @nm: direntry name and length
182 */
183static inline void dent_key_init_flash(const struct ubifs_info *c, void *k,
184 ino_t inum, const struct qstr *nm)
185{
186 union ubifs_key *key = k;
187 uint32_t hash = c->key_hash(nm->name, nm->len);
188
189 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
190 key->j32[0] = cpu_to_le32(inum);
191 key->j32[1] = cpu_to_le32(hash |
192 (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS));
193 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
194}
195
196/**
197 * lowest_dent_key - get the lowest possible directory entry key.
198 * @c: UBIFS file-system description object
199 * @key: where to store the lowest key
200 * @inum: parent inode number
201 */
202static inline void lowest_dent_key(const struct ubifs_info *c,
203 union ubifs_key *key, ino_t inum)
204{
205 key->u32[0] = inum;
206 key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS;
207}
208
209/**
210 * xent_key_init - initialize extended attribute entry key.
211 * @c: UBIFS file-system description object
212 * @key: key to initialize
213 * @inum: host inode number
214 * @nm: extended attribute entry name and length
215 */
216static inline void xent_key_init(const struct ubifs_info *c,
217 union ubifs_key *key, ino_t inum,
218 const struct qstr *nm)
219{
220 uint32_t hash = c->key_hash(nm->name, nm->len);
221
222 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
223 key->u32[0] = inum;
224 key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
225}
226
227/**
228 * xent_key_init_hash - initialize extended attribute entry key without
229 * re-calculating hash function.
230 * @c: UBIFS file-system description object
231 * @key: key to initialize
232 * @inum: host inode number
233 * @hash: extended attribute entry name hash
234 */
235static inline void xent_key_init_hash(const struct ubifs_info *c,
236 union ubifs_key *key, ino_t inum,
237 uint32_t hash)
238{
239 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
240 key->u32[0] = inum;
241 key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
242}
243
244/**
245 * xent_key_init_flash - initialize on-flash extended attribute entry key.
246 * @c: UBIFS file-system description object
247 * @k: key to initialize
248 * @inum: host inode number
249 * @nm: extended attribute entry name and length
250 */
251static inline void xent_key_init_flash(const struct ubifs_info *c, void *k,
252 ino_t inum, const struct qstr *nm)
253{
254 union ubifs_key *key = k;
255 uint32_t hash = c->key_hash(nm->name, nm->len);
256
257 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
258 key->j32[0] = cpu_to_le32(inum);
259 key->j32[1] = cpu_to_le32(hash |
260 (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS));
261 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
262}
263
264/**
265 * lowest_xent_key - get the lowest possible extended attribute entry key.
266 * @c: UBIFS file-system description object
267 * @key: where to store the lowest key
268 * @inum: host inode number
269 */
270static inline void lowest_xent_key(const struct ubifs_info *c,
271 union ubifs_key *key, ino_t inum)
272{
273 key->u32[0] = inum;
274 key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS;
275}
276
277/**
278 * data_key_init - initialize data key.
279 * @c: UBIFS file-system description object
280 * @key: key to initialize
281 * @inum: inode number
282 * @block: block number
283 */
284static inline void data_key_init(const struct ubifs_info *c,
285 union ubifs_key *key, ino_t inum,
286 unsigned int block)
287{
288 ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
289 key->u32[0] = inum;
290 key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS);
291}
292
293/**
294 * data_key_init_flash - initialize on-flash data key.
295 * @c: UBIFS file-system description object
296 * @k: key to initialize
297 * @inum: inode number
298 * @block: block number
299 */
300static inline void data_key_init_flash(const struct ubifs_info *c, void *k,
301 ino_t inum, unsigned int block)
302{
303 union ubifs_key *key = k;
304
305 ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
306 key->j32[0] = cpu_to_le32(inum);
307 key->j32[1] = cpu_to_le32(block |
308 (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS));
309 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
310}
311
312/**
313 * trun_key_init - initialize truncation node key.
314 * @c: UBIFS file-system description object
315 * @key: key to initialize
316 * @inum: inode number
317 *
318 * Note, UBIFS does not have truncation keys on the media and this function is
319 * only used for purposes of replay.
320 */
321static inline void trun_key_init(const struct ubifs_info *c,
322 union ubifs_key *key, ino_t inum)
323{
324 key->u32[0] = inum;
325 key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS;
326}
327
328/**
329 * key_type - get key type.
330 * @c: UBIFS file-system description object
331 * @key: key to get type of
332 */
333static inline int key_type(const struct ubifs_info *c,
334 const union ubifs_key *key)
335{
336 return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS;
337}
338
339/**
340 * key_type_flash - get type of a on-flash formatted key.
341 * @c: UBIFS file-system description object
342 * @k: key to get type of
343 */
344static inline int key_type_flash(const struct ubifs_info *c, const void *k)
345{
346 const union ubifs_key *key = k;
347
348 return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
349}
350
351/**
352 * key_inum - fetch inode number from key.
353 * @c: UBIFS file-system description object
354 * @k: key to fetch inode number from
355 */
356static inline ino_t key_inum(const struct ubifs_info *c, const void *k)
357{
358 const union ubifs_key *key = k;
359
360 return key->u32[0];
361}
362
363/**
364 * key_inum_flash - fetch inode number from an on-flash formatted key.
365 * @c: UBIFS file-system description object
366 * @k: key to fetch inode number from
367 */
368static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k)
369{
370 const union ubifs_key *key = k;
371
372 return le32_to_cpu(key->j32[0]);
373}
374
375/**
376 * key_hash - get directory entry hash.
377 * @c: UBIFS file-system description object
378 * @key: the key to get hash from
379 */
380static inline int key_hash(const struct ubifs_info *c,
381 const union ubifs_key *key)
382{
383 return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
384}
385
386/**
387 * key_hash_flash - get directory entry hash from an on-flash formatted key.
388 * @c: UBIFS file-system description object
389 * @k: the key to get hash from
390 */
391static inline int key_hash_flash(const struct ubifs_info *c, const void *k)
392{
393 const union ubifs_key *key = k;
394
395 return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK;
396}
397
398/**
399 * key_block - get data block number.
400 * @c: UBIFS file-system description object
401 * @key: the key to get the block number from
402 */
403static inline unsigned int key_block(const struct ubifs_info *c,
404 const union ubifs_key *key)
405{
406 return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK;
407}
408
409/**
410 * key_block_flash - get data block number from an on-flash formatted key.
411 * @c: UBIFS file-system description object
412 * @k: the key to get the block number from
413 */
414static inline unsigned int key_block_flash(const struct ubifs_info *c,
415 const void *k)
416{
417 const union ubifs_key *key = k;
418
419 return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK;
420}
421
422/**
423 * key_read - transform a key to in-memory format.
424 * @c: UBIFS file-system description object
425 * @from: the key to transform
426 * @to: the key to store the result
427 */
428static inline void key_read(const struct ubifs_info *c, const void *from,
429 union ubifs_key *to)
430{
431 const union ubifs_key *f = from;
432
433 to->u32[0] = le32_to_cpu(f->j32[0]);
434 to->u32[1] = le32_to_cpu(f->j32[1]);
435}
436
437/**
438 * key_write - transform a key from in-memory format.
439 * @c: UBIFS file-system description object
440 * @from: the key to transform
441 * @to: the key to store the result
442 */
443static inline void key_write(const struct ubifs_info *c,
444 const union ubifs_key *from, void *to)
445{
446 union ubifs_key *t = to;
447
448 t->j32[0] = cpu_to_le32(from->u32[0]);
449 t->j32[1] = cpu_to_le32(from->u32[1]);
450 memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8);
451}
452
453/**
454 * key_write_idx - transform a key from in-memory format for the index.
455 * @c: UBIFS file-system description object
456 * @from: the key to transform
457 * @to: the key to store the result
458 */
459static inline void key_write_idx(const struct ubifs_info *c,
460 const union ubifs_key *from, void *to)
461{
462 union ubifs_key *t = to;
463
464 t->j32[0] = cpu_to_le32(from->u32[0]);
465 t->j32[1] = cpu_to_le32(from->u32[1]);
466}
467
468/**
469 * key_copy - copy a key.
470 * @c: UBIFS file-system description object
471 * @from: the key to copy from
472 * @to: the key to copy to
473 */
474static inline void key_copy(const struct ubifs_info *c,
475 const union ubifs_key *from, union ubifs_key *to)
476{
477 to->u64[0] = from->u64[0];
478}
479
480/**
481 * keys_cmp - compare keys.
482 * @c: UBIFS file-system description object
483 * @key1: the first key to compare
484 * @key2: the second key to compare
485 *
486 * This function compares 2 keys and returns %-1 if @key1 is less than
487 * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2.
488 */
489static inline int keys_cmp(const struct ubifs_info *c,
490 const union ubifs_key *key1,
491 const union ubifs_key *key2)
492{
493 if (key1->u32[0] < key2->u32[0])
494 return -1;
495 if (key1->u32[0] > key2->u32[0])
496 return 1;
497 if (key1->u32[1] < key2->u32[1])
498 return -1;
499 if (key1->u32[1] > key2->u32[1])
500 return 1;
501
502 return 0;
503}
504
505/**
506 * is_hash_key - is a key vulnerable to hash collisions.
507 * @c: UBIFS file-system description object
508 * @key: key
509 *
510 * This function returns %1 if @key is a hashed key or %0 otherwise.
511 */
512static inline int is_hash_key(const struct ubifs_info *c,
513 const union ubifs_key *key)
514{
515 int type = key_type(c, key);
516
517 return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY;
518}
519
520/**
521 * key_max_inode_size - get maximum file size allowed by current key format.
522 * @c: UBIFS file-system description object
523 */
524static inline unsigned long long key_max_inode_size(const struct ubifs_info *c)
525{
526 switch (c->key_fmt) {
527 case UBIFS_SIMPLE_KEY_FMT:
528 return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE;
529 default:
530 return 0;
531 }
532}
533#endif /* !__UBIFS_KEY_H__ */
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
new file mode 100644
index 000000000000..36857b9ed59e
--- /dev/null
+++ b/fs/ubifs/log.c
@@ -0,0 +1,805 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file is a part of UBIFS journal implementation and contains various
25 * functions which manipulate the log. The log is a fixed area on the flash
26 * which does not contain any data but refers to buds. The log is a part of the
27 * journal.
28 */
29
30#include "ubifs.h"
31
32#ifdef CONFIG_UBIFS_FS_DEBUG
33static int dbg_check_bud_bytes(struct ubifs_info *c);
34#else
35#define dbg_check_bud_bytes(c) 0
36#endif
37
38/**
39 * ubifs_search_bud - search bud LEB.
40 * @c: UBIFS file-system description object
41 * @lnum: logical eraseblock number to search
42 *
43 * This function searches bud LEB @lnum. Returns bud description object in case
44 * of success and %NULL if there is no bud with this LEB number.
45 */
46struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum)
47{
48 struct rb_node *p;
49 struct ubifs_bud *bud;
50
51 spin_lock(&c->buds_lock);
52 p = c->buds.rb_node;
53 while (p) {
54 bud = rb_entry(p, struct ubifs_bud, rb);
55 if (lnum < bud->lnum)
56 p = p->rb_left;
57 else if (lnum > bud->lnum)
58 p = p->rb_right;
59 else {
60 spin_unlock(&c->buds_lock);
61 return bud;
62 }
63 }
64 spin_unlock(&c->buds_lock);
65 return NULL;
66}
67
68/**
69 * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one.
70 * @c: UBIFS file-system description object
71 * @lnum: logical eraseblock number to search
72 *
73 * This functions returns the wbuf for @lnum or %NULL if there is not one.
74 */
75struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
76{
77 struct rb_node *p;
78 struct ubifs_bud *bud;
79 int jhead;
80
81 if (!c->jheads)
82 return NULL;
83
84 spin_lock(&c->buds_lock);
85 p = c->buds.rb_node;
86 while (p) {
87 bud = rb_entry(p, struct ubifs_bud, rb);
88 if (lnum < bud->lnum)
89 p = p->rb_left;
90 else if (lnum > bud->lnum)
91 p = p->rb_right;
92 else {
93 jhead = bud->jhead;
94 spin_unlock(&c->buds_lock);
95 return &c->jheads[jhead].wbuf;
96 }
97 }
98 spin_unlock(&c->buds_lock);
99 return NULL;
100}
101
102/**
103 * next_log_lnum - switch to the next log LEB.
104 * @c: UBIFS file-system description object
105 * @lnum: current log LEB
106 */
107static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
108{
109 lnum += 1;
110 if (lnum > c->log_last)
111 lnum = UBIFS_LOG_LNUM;
112
113 return lnum;
114}
115
116/**
117 * empty_log_bytes - calculate amount of empty space in the log.
118 * @c: UBIFS file-system description object
119 */
120static inline long long empty_log_bytes(const struct ubifs_info *c)
121{
122 long long h, t;
123
124 h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
125 t = (long long)c->ltail_lnum * c->leb_size;
126
127 if (h >= t)
128 return c->log_bytes - h + t;
129 else
130 return t - h;
131}
132
133/**
134 * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list.
135 * @c: UBIFS file-system description object
136 * @bud: the bud to add
137 */
138void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
139{
140 struct rb_node **p, *parent = NULL;
141 struct ubifs_bud *b;
142 struct ubifs_jhead *jhead;
143
144 spin_lock(&c->buds_lock);
145 p = &c->buds.rb_node;
146 while (*p) {
147 parent = *p;
148 b = rb_entry(parent, struct ubifs_bud, rb);
149 ubifs_assert(bud->lnum != b->lnum);
150 if (bud->lnum < b->lnum)
151 p = &(*p)->rb_left;
152 else
153 p = &(*p)->rb_right;
154 }
155
156 rb_link_node(&bud->rb, parent, p);
157 rb_insert_color(&bud->rb, &c->buds);
158 if (c->jheads) {
159 jhead = &c->jheads[bud->jhead];
160 list_add_tail(&bud->list, &jhead->buds_list);
161 } else
162 ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY));
163
164 /*
165 * Note, although this is a new bud, we anyway account this space now,
166 * before any data has been written to it, because this is about to
167 * guarantee fixed mount time, and this bud will anyway be read and
168 * scanned.
169 */
170 c->bud_bytes += c->leb_size - bud->start;
171
172 dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum,
173 bud->start, bud->jhead, c->bud_bytes);
174 spin_unlock(&c->buds_lock);
175}
176
177/**
178 * ubifs_create_buds_lists - create journal head buds lists for remount rw.
179 * @c: UBIFS file-system description object
180 */
181void ubifs_create_buds_lists(struct ubifs_info *c)
182{
183 struct rb_node *p;
184
185 spin_lock(&c->buds_lock);
186 p = rb_first(&c->buds);
187 while (p) {
188 struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
189 struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
190
191 list_add_tail(&bud->list, &jhead->buds_list);
192 p = rb_next(p);
193 }
194 spin_unlock(&c->buds_lock);
195}
196
197/**
198 * ubifs_add_bud_to_log - add a new bud to the log.
199 * @c: UBIFS file-system description object
200 * @jhead: journal head the bud belongs to
201 * @lnum: LEB number of the bud
202 * @offs: starting offset of the bud
203 *
204 * This function writes reference node for the new bud LEB @lnum it to the log,
205 * and adds it to the buds tress. It also makes sure that log size does not
206 * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success,
207 * %-EAGAIN if commit is required, and a negative error codes in case of
208 * failure.
209 */
210int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
211{
212 int err;
213 struct ubifs_bud *bud;
214 struct ubifs_ref_node *ref;
215
216 bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS);
217 if (!bud)
218 return -ENOMEM;
219 ref = kzalloc(c->ref_node_alsz, GFP_NOFS);
220 if (!ref) {
221 kfree(bud);
222 return -ENOMEM;
223 }
224
225 mutex_lock(&c->log_mutex);
226
227 if (c->ro_media) {
228 err = -EROFS;
229 goto out_unlock;
230 }
231
232 /* Make sure we have enough space in the log */
233 if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) {
234 dbg_log("not enough log space - %lld, required %d",
235 empty_log_bytes(c), c->min_log_bytes);
236 ubifs_commit_required(c);
237 err = -EAGAIN;
238 goto out_unlock;
239 }
240
241 /*
242 * Make sure the the amount of space in buds will not exceed
243 * 'c->max_bud_bytes' limit, because we want to guarantee mount time
244 * limits.
245 *
246 * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
247 * because we are holding @c->log_mutex. All @c->bud_bytes take place
248 * when both @c->log_mutex and @c->bud_bytes are locked.
249 */
250 if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) {
251 dbg_log("bud bytes %lld (%lld max), require commit",
252 c->bud_bytes, c->max_bud_bytes);
253 ubifs_commit_required(c);
254 err = -EAGAIN;
255 goto out_unlock;
256 }
257
258 /*
259 * If the journal is full enough - start background commit. Note, it is
260 * OK to read 'c->cmt_state' without spinlock because integer reads
261 * are atomic in the kernel.
262 */
263 if (c->bud_bytes >= c->bg_bud_bytes &&
264 c->cmt_state == COMMIT_RESTING) {
265 dbg_log("bud bytes %lld (%lld max), initiate BG commit",
266 c->bud_bytes, c->max_bud_bytes);
267 ubifs_request_bg_commit(c);
268 }
269
270 bud->lnum = lnum;
271 bud->start = offs;
272 bud->jhead = jhead;
273
274 ref->ch.node_type = UBIFS_REF_NODE;
275 ref->lnum = cpu_to_le32(bud->lnum);
276 ref->offs = cpu_to_le32(bud->start);
277 ref->jhead = cpu_to_le32(jhead);
278
279 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
280 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
281 c->lhead_offs = 0;
282 }
283
284 if (c->lhead_offs == 0) {
285 /* Must ensure next log LEB has been unmapped */
286 err = ubifs_leb_unmap(c, c->lhead_lnum);
287 if (err)
288 goto out_unlock;
289 }
290
291 if (bud->start == 0) {
292 /*
293 * Before writing the LEB reference which refers an empty LEB
294 * to the log, we have to make sure it is mapped, because
295 * otherwise we'd risk to refer an LEB with garbage in case of
296 * an unclean reboot, because the target LEB might have been
297 * unmapped, but not yet physically erased.
298 */
299 err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM);
300 if (err)
301 goto out_unlock;
302 }
303
304 dbg_log("write ref LEB %d:%d",
305 c->lhead_lnum, c->lhead_offs);
306 err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum,
307 c->lhead_offs, UBI_SHORTTERM);
308 if (err)
309 goto out_unlock;
310
311 c->lhead_offs += c->ref_node_alsz;
312
313 ubifs_add_bud(c, bud);
314
315 mutex_unlock(&c->log_mutex);
316 kfree(ref);
317 return 0;
318
319out_unlock:
320 mutex_unlock(&c->log_mutex);
321 kfree(ref);
322 kfree(bud);
323 return err;
324}
325
326/**
327 * remove_buds - remove used buds.
328 * @c: UBIFS file-system description object
329 *
330 * This function removes use buds from the buds tree. It does not remove the
331 * buds which are pointed to by journal heads.
332 */
333static void remove_buds(struct ubifs_info *c)
334{
335 struct rb_node *p;
336
337 ubifs_assert(list_empty(&c->old_buds));
338 c->cmt_bud_bytes = 0;
339 spin_lock(&c->buds_lock);
340 p = rb_first(&c->buds);
341 while (p) {
342 struct rb_node *p1 = p;
343 struct ubifs_bud *bud;
344 struct ubifs_wbuf *wbuf;
345
346 p = rb_next(p);
347 bud = rb_entry(p1, struct ubifs_bud, rb);
348 wbuf = &c->jheads[bud->jhead].wbuf;
349
350 if (wbuf->lnum == bud->lnum) {
351 /*
352 * Do not remove buds which are pointed to by journal
353 * heads (non-closed buds).
354 */
355 c->cmt_bud_bytes += wbuf->offs - bud->start;
356 dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
357 "cmt_bud_bytes %lld", bud->lnum, bud->start,
358 bud->jhead, wbuf->offs - bud->start,
359 c->cmt_bud_bytes);
360 bud->start = wbuf->offs;
361 } else {
362 c->cmt_bud_bytes += c->leb_size - bud->start;
363 dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
364 "cmt_bud_bytes %lld", bud->lnum, bud->start,
365 bud->jhead, c->leb_size - bud->start,
366 c->cmt_bud_bytes);
367 rb_erase(p1, &c->buds);
368 list_del(&bud->list);
369 /*
370 * If the commit does not finish, the recovery will need
371 * to replay the journal, in which case the old buds
372 * must be unchanged. Do not release them until post
373 * commit i.e. do not allow them to be garbage
374 * collected.
375 */
376 list_add(&bud->list, &c->old_buds);
377 }
378 }
379 spin_unlock(&c->buds_lock);
380}
381
382/**
383 * ubifs_log_start_commit - start commit.
384 * @c: UBIFS file-system description object
385 * @ltail_lnum: return new log tail LEB number
386 *
387 * The commit operation starts with writing "commit start" node to the log and
388 * reference nodes for all journal heads which will define new journal after
389 * the commit has been finished. The commit start and reference nodes are
390 * written in one go to the nearest empty log LEB (hence, when commit is
391 * finished UBIFS may safely unmap all the previous log LEBs). This function
392 * returns zero in case of success and a negative error code in case of
393 * failure.
394 */
395int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
396{
397 void *buf;
398 struct ubifs_cs_node *cs;
399 struct ubifs_ref_node *ref;
400 int err, i, max_len, len;
401
402 err = dbg_check_bud_bytes(c);
403 if (err)
404 return err;
405
406 max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ;
407 max_len = ALIGN(max_len, c->min_io_size);
408 buf = cs = kmalloc(max_len, GFP_NOFS);
409 if (!buf)
410 return -ENOMEM;
411
412 cs->ch.node_type = UBIFS_CS_NODE;
413 cs->cmt_no = cpu_to_le64(c->cmt_no + 1);
414 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
415
416 /*
417 * Note, we do not lock 'c->log_mutex' because this is the commit start
418 * phase and we are exclusively using the log. And we do not lock
419 * write-buffer because nobody can write to the file-system at this
420 * phase.
421 */
422
423 len = UBIFS_CS_NODE_SZ;
424 for (i = 0; i < c->jhead_cnt; i++) {
425 int lnum = c->jheads[i].wbuf.lnum;
426 int offs = c->jheads[i].wbuf.offs;
427
428 if (lnum == -1 || offs == c->leb_size)
429 continue;
430
431 dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i);
432 ref = buf + len;
433 ref->ch.node_type = UBIFS_REF_NODE;
434 ref->lnum = cpu_to_le32(lnum);
435 ref->offs = cpu_to_le32(offs);
436 ref->jhead = cpu_to_le32(i);
437
438 ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
439 len += UBIFS_REF_NODE_SZ;
440 }
441
442 ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
443
444 /* Switch to the next log LEB */
445 if (c->lhead_offs) {
446 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
447 c->lhead_offs = 0;
448 }
449
450 if (c->lhead_offs == 0) {
451 /* Must ensure next LEB has been unmapped */
452 err = ubifs_leb_unmap(c, c->lhead_lnum);
453 if (err)
454 goto out;
455 }
456
457 len = ALIGN(len, c->min_io_size);
458 dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
459 err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM);
460 if (err)
461 goto out;
462
463 *ltail_lnum = c->lhead_lnum;
464
465 c->lhead_offs += len;
466 if (c->lhead_offs == c->leb_size) {
467 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
468 c->lhead_offs = 0;
469 }
470
471 remove_buds(c);
472
473 /*
474 * We have started the commit and now users may use the rest of the log
475 * for new writes.
476 */
477 c->min_log_bytes = 0;
478
479out:
480 kfree(buf);
481 return err;
482}
483
484/**
485 * ubifs_log_end_commit - end commit.
486 * @c: UBIFS file-system description object
487 * @ltail_lnum: new log tail LEB number
488 *
489 * This function is called on when the commit operation was finished. It
490 * moves log tail to new position and unmaps LEBs which contain obsolete data.
491 * Returns zero in case of success and a negative error code in case of
492 * failure.
493 */
494int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
495{
496 int err;
497
498 /*
499 * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
500 * writes during commit. Its only short "commit" start phase when
501 * writers are blocked.
502 */
503 mutex_lock(&c->log_mutex);
504
505 dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
506 c->ltail_lnum, ltail_lnum);
507
508 c->ltail_lnum = ltail_lnum;
509 /*
510 * The commit is finished and from now on it must be guaranteed that
511 * there is always enough space for the next commit.
512 */
513 c->min_log_bytes = c->leb_size;
514
515 spin_lock(&c->buds_lock);
516 c->bud_bytes -= c->cmt_bud_bytes;
517 spin_unlock(&c->buds_lock);
518
519 err = dbg_check_bud_bytes(c);
520
521 mutex_unlock(&c->log_mutex);
522 return err;
523}
524
525/**
526 * ubifs_log_post_commit - things to do after commit is completed.
527 * @c: UBIFS file-system description object
528 * @old_ltail_lnum: old log tail LEB number
529 *
530 * Release buds only after commit is completed, because they must be unchanged
531 * if recovery is needed.
532 *
533 * Unmap log LEBs only after commit is completed, because they may be needed for
534 * recovery.
535 *
536 * This function returns %0 on success and a negative error code on failure.
537 */
538int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
539{
540 int lnum, err = 0;
541
542 while (!list_empty(&c->old_buds)) {
543 struct ubifs_bud *bud;
544
545 bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
546 err = ubifs_return_leb(c, bud->lnum);
547 if (err)
548 return err;
549 list_del(&bud->list);
550 kfree(bud);
551 }
552 mutex_lock(&c->log_mutex);
553 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
554 lnum = next_log_lnum(c, lnum)) {
555 dbg_log("unmap log LEB %d", lnum);
556 err = ubifs_leb_unmap(c, lnum);
557 if (err)
558 goto out;
559 }
560out:
561 mutex_unlock(&c->log_mutex);
562 return err;
563}
564
565/**
566 * struct done_ref - references that have been done.
567 * @rb: rb-tree node
568 * @lnum: LEB number
569 */
570struct done_ref {
571 struct rb_node rb;
572 int lnum;
573};
574
575/**
576 * done_already - determine if a reference has been done already.
577 * @done_tree: rb-tree to store references that have been done
578 * @lnum: LEB number of reference
579 *
580 * This function returns %1 if the reference has been done, %0 if not, otherwise
581 * a negative error code is returned.
582 */
583static int done_already(struct rb_root *done_tree, int lnum)
584{
585 struct rb_node **p = &done_tree->rb_node, *parent = NULL;
586 struct done_ref *dr;
587
588 while (*p) {
589 parent = *p;
590 dr = rb_entry(parent, struct done_ref, rb);
591 if (lnum < dr->lnum)
592 p = &(*p)->rb_left;
593 else if (lnum > dr->lnum)
594 p = &(*p)->rb_right;
595 else
596 return 1;
597 }
598
599 dr = kzalloc(sizeof(struct done_ref), GFP_NOFS);
600 if (!dr)
601 return -ENOMEM;
602
603 dr->lnum = lnum;
604
605 rb_link_node(&dr->rb, parent, p);
606 rb_insert_color(&dr->rb, done_tree);
607
608 return 0;
609}
610
611/**
612 * destroy_done_tree - destroy the done tree.
613 * @done_tree: done tree to destroy
614 */
615static void destroy_done_tree(struct rb_root *done_tree)
616{
617 struct rb_node *this = done_tree->rb_node;
618 struct done_ref *dr;
619
620 while (this) {
621 if (this->rb_left) {
622 this = this->rb_left;
623 continue;
624 } else if (this->rb_right) {
625 this = this->rb_right;
626 continue;
627 }
628 dr = rb_entry(this, struct done_ref, rb);
629 this = rb_parent(this);
630 if (this) {
631 if (this->rb_left == &dr->rb)
632 this->rb_left = NULL;
633 else
634 this->rb_right = NULL;
635 }
636 kfree(dr);
637 }
638}
639
640/**
641 * add_node - add a node to the consolidated log.
642 * @c: UBIFS file-system description object
643 * @buf: buffer to which to add
644 * @lnum: LEB number to which to write is passed and returned here
645 * @offs: offset to where to write is passed and returned here
646 * @node: node to add
647 *
648 * This function returns %0 on success and a negative error code on failure.
649 */
650static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
651 void *node)
652{
653 struct ubifs_ch *ch = node;
654 int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs;
655
656 if (len > remains) {
657 int sz = ALIGN(*offs, c->min_io_size), err;
658
659 ubifs_pad(c, buf + *offs, sz - *offs);
660 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
661 if (err)
662 return err;
663 *lnum = next_log_lnum(c, *lnum);
664 *offs = 0;
665 }
666 memcpy(buf + *offs, node, len);
667 *offs += ALIGN(len, 8);
668 return 0;
669}
670
671/**
672 * ubifs_consolidate_log - consolidate the log.
673 * @c: UBIFS file-system description object
674 *
675 * Repeated failed commits could cause the log to be full, but at least 1 LEB is
676 * needed for commit. This function rewrites the reference nodes in the log
677 * omitting duplicates, and failed CS nodes, and leaving no gaps.
678 *
679 * This function returns %0 on success and a negative error code on failure.
680 */
681int ubifs_consolidate_log(struct ubifs_info *c)
682{
683 struct ubifs_scan_leb *sleb;
684 struct ubifs_scan_node *snod;
685 struct rb_root done_tree = RB_ROOT;
686 int lnum, err, first = 1, write_lnum, offs = 0;
687 void *buf;
688
689 dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum,
690 c->lhead_lnum);
691 buf = vmalloc(c->leb_size);
692 if (!buf)
693 return -ENOMEM;
694 lnum = c->ltail_lnum;
695 write_lnum = lnum;
696 while (1) {
697 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
698 if (IS_ERR(sleb)) {
699 err = PTR_ERR(sleb);
700 goto out_free;
701 }
702 list_for_each_entry(snod, &sleb->nodes, list) {
703 switch (snod->type) {
704 case UBIFS_REF_NODE: {
705 struct ubifs_ref_node *ref = snod->node;
706 int ref_lnum = le32_to_cpu(ref->lnum);
707
708 err = done_already(&done_tree, ref_lnum);
709 if (err < 0)
710 goto out_scan;
711 if (err != 1) {
712 err = add_node(c, buf, &write_lnum,
713 &offs, snod->node);
714 if (err)
715 goto out_scan;
716 }
717 break;
718 }
719 case UBIFS_CS_NODE:
720 if (!first)
721 break;
722 err = add_node(c, buf, &write_lnum, &offs,
723 snod->node);
724 if (err)
725 goto out_scan;
726 first = 0;
727 break;
728 }
729 }
730 ubifs_scan_destroy(sleb);
731 if (lnum == c->lhead_lnum)
732 break;
733 lnum = next_log_lnum(c, lnum);
734 }
735 if (offs) {
736 int sz = ALIGN(offs, c->min_io_size);
737
738 ubifs_pad(c, buf + offs, sz - offs);
739 err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM);
740 if (err)
741 goto out_free;
742 offs = ALIGN(offs, c->min_io_size);
743 }
744 destroy_done_tree(&done_tree);
745 vfree(buf);
746 if (write_lnum == c->lhead_lnum) {
747 ubifs_err("log is too full");
748 return -EINVAL;
749 }
750 /* Unmap remaining LEBs */
751 lnum = write_lnum;
752 do {
753 lnum = next_log_lnum(c, lnum);
754 err = ubifs_leb_unmap(c, lnum);
755 if (err)
756 return err;
757 } while (lnum != c->lhead_lnum);
758 c->lhead_lnum = write_lnum;
759 c->lhead_offs = offs;
760 dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs);
761 return 0;
762
763out_scan:
764 ubifs_scan_destroy(sleb);
765out_free:
766 destroy_done_tree(&done_tree);
767 vfree(buf);
768 return err;
769}
770
771#ifdef CONFIG_UBIFS_FS_DEBUG
772
773/**
774 * dbg_check_bud_bytes - make sure bud bytes calculation are all right.
775 * @c: UBIFS file-system description object
776 *
777 * This function makes sure the amount of flash space used by closed buds
778 * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in
779 * case of failure.
780 */
781static int dbg_check_bud_bytes(struct ubifs_info *c)
782{
783 int i, err = 0;
784 struct ubifs_bud *bud;
785 long long bud_bytes = 0;
786
787 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
788 return 0;
789
790 spin_lock(&c->buds_lock);
791 for (i = 0; i < c->jhead_cnt; i++)
792 list_for_each_entry(bud, &c->jheads[i].buds_list, list)
793 bud_bytes += c->leb_size - bud->start;
794
795 if (c->bud_bytes != bud_bytes) {
796 ubifs_err("bad bud_bytes %lld, calculated %lld",
797 c->bud_bytes, bud_bytes);
798 err = -EINVAL;
799 }
800 spin_unlock(&c->buds_lock);
801
802 return err;
803}
804
805#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
new file mode 100644
index 000000000000..2ba93da71b65
--- /dev/null
+++ b/fs/ubifs/lprops.c
@@ -0,0 +1,1357 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements the functions that access LEB properties and their
25 * categories. LEBs are categorized based on the needs of UBIFS, and the
26 * categories are stored as either heaps or lists to provide a fast way of
27 * finding a LEB in a particular category. For example, UBIFS may need to find
28 * an empty LEB for the journal, or a very dirty LEB for garbage collection.
29 */
30
31#include "ubifs.h"
32
33/**
34 * get_heap_comp_val - get the LEB properties value for heap comparisons.
35 * @lprops: LEB properties
36 * @cat: LEB category
37 */
38static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat)
39{
40 switch (cat) {
41 case LPROPS_FREE:
42 return lprops->free;
43 case LPROPS_DIRTY_IDX:
44 return lprops->free + lprops->dirty;
45 default:
46 return lprops->dirty;
47 }
48}
49
50/**
51 * move_up_lpt_heap - move a new heap entry up as far as possible.
52 * @c: UBIFS file-system description object
53 * @heap: LEB category heap
54 * @lprops: LEB properties to move
55 * @cat: LEB category
56 *
57 * New entries to a heap are added at the bottom and then moved up until the
58 * parent's value is greater. In the case of LPT's category heaps, the value
59 * is either the amount of free space or the amount of dirty space, depending
60 * on the category.
61 */
62static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
63 struct ubifs_lprops *lprops, int cat)
64{
65 int val1, val2, hpos;
66
67 hpos = lprops->hpos;
68 if (!hpos)
69 return; /* Already top of the heap */
70 val1 = get_heap_comp_val(lprops, cat);
71 /* Compare to parent and, if greater, move up the heap */
72 do {
73 int ppos = (hpos - 1) / 2;
74
75 val2 = get_heap_comp_val(heap->arr[ppos], cat);
76 if (val2 >= val1)
77 return;
78 /* Greater than parent so move up */
79 heap->arr[ppos]->hpos = hpos;
80 heap->arr[hpos] = heap->arr[ppos];
81 heap->arr[ppos] = lprops;
82 lprops->hpos = ppos;
83 hpos = ppos;
84 } while (hpos);
85}
86
87/**
88 * adjust_lpt_heap - move a changed heap entry up or down the heap.
89 * @c: UBIFS file-system description object
90 * @heap: LEB category heap
91 * @lprops: LEB properties to move
92 * @hpos: heap position of @lprops
93 * @cat: LEB category
94 *
95 * Changed entries in a heap are moved up or down until the parent's value is
96 * greater. In the case of LPT's category heaps, the value is either the amount
97 * of free space or the amount of dirty space, depending on the category.
98 */
99static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
100 struct ubifs_lprops *lprops, int hpos, int cat)
101{
102 int val1, val2, val3, cpos;
103
104 val1 = get_heap_comp_val(lprops, cat);
105 /* Compare to parent and, if greater than parent, move up the heap */
106 if (hpos) {
107 int ppos = (hpos - 1) / 2;
108
109 val2 = get_heap_comp_val(heap->arr[ppos], cat);
110 if (val1 > val2) {
111 /* Greater than parent so move up */
112 while (1) {
113 heap->arr[ppos]->hpos = hpos;
114 heap->arr[hpos] = heap->arr[ppos];
115 heap->arr[ppos] = lprops;
116 lprops->hpos = ppos;
117 hpos = ppos;
118 if (!hpos)
119 return;
120 ppos = (hpos - 1) / 2;
121 val2 = get_heap_comp_val(heap->arr[ppos], cat);
122 if (val1 <= val2)
123 return;
124 /* Still greater than parent so keep going */
125 }
126 }
127 }
128 /* Not greater than parent, so compare to children */
129 while (1) {
130 /* Compare to left child */
131 cpos = hpos * 2 + 1;
132 if (cpos >= heap->cnt)
133 return;
134 val2 = get_heap_comp_val(heap->arr[cpos], cat);
135 if (val1 < val2) {
136 /* Less than left child, so promote biggest child */
137 if (cpos + 1 < heap->cnt) {
138 val3 = get_heap_comp_val(heap->arr[cpos + 1],
139 cat);
140 if (val3 > val2)
141 cpos += 1; /* Right child is bigger */
142 }
143 heap->arr[cpos]->hpos = hpos;
144 heap->arr[hpos] = heap->arr[cpos];
145 heap->arr[cpos] = lprops;
146 lprops->hpos = cpos;
147 hpos = cpos;
148 continue;
149 }
150 /* Compare to right child */
151 cpos += 1;
152 if (cpos >= heap->cnt)
153 return;
154 val3 = get_heap_comp_val(heap->arr[cpos], cat);
155 if (val1 < val3) {
156 /* Less than right child, so promote right child */
157 heap->arr[cpos]->hpos = hpos;
158 heap->arr[hpos] = heap->arr[cpos];
159 heap->arr[cpos] = lprops;
160 lprops->hpos = cpos;
161 hpos = cpos;
162 continue;
163 }
164 return;
165 }
166}
167
168/**
169 * add_to_lpt_heap - add LEB properties to a LEB category heap.
170 * @c: UBIFS file-system description object
171 * @lprops: LEB properties to add
172 * @cat: LEB category
173 *
174 * This function returns %1 if @lprops is added to the heap for LEB category
175 * @cat, otherwise %0 is returned because the heap is full.
176 */
177static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops,
178 int cat)
179{
180 struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
181
182 if (heap->cnt >= heap->max_cnt) {
183 const int b = LPT_HEAP_SZ / 2 - 1;
184 int cpos, val1, val2;
185
186 /* Compare to some other LEB on the bottom of heap */
187 /* Pick a position kind of randomly */
188 cpos = (((size_t)lprops >> 4) & b) + b;
189 ubifs_assert(cpos >= b);
190 ubifs_assert(cpos < LPT_HEAP_SZ);
191 ubifs_assert(cpos < heap->cnt);
192
193 val1 = get_heap_comp_val(lprops, cat);
194 val2 = get_heap_comp_val(heap->arr[cpos], cat);
195 if (val1 > val2) {
196 struct ubifs_lprops *lp;
197
198 lp = heap->arr[cpos];
199 lp->flags &= ~LPROPS_CAT_MASK;
200 lp->flags |= LPROPS_UNCAT;
201 list_add(&lp->list, &c->uncat_list);
202 lprops->hpos = cpos;
203 heap->arr[cpos] = lprops;
204 move_up_lpt_heap(c, heap, lprops, cat);
205 dbg_check_heap(c, heap, cat, lprops->hpos);
206 return 1; /* Added to heap */
207 }
208 dbg_check_heap(c, heap, cat, -1);
209 return 0; /* Not added to heap */
210 } else {
211 lprops->hpos = heap->cnt++;
212 heap->arr[lprops->hpos] = lprops;
213 move_up_lpt_heap(c, heap, lprops, cat);
214 dbg_check_heap(c, heap, cat, lprops->hpos);
215 return 1; /* Added to heap */
216 }
217}
218
219/**
220 * remove_from_lpt_heap - remove LEB properties from a LEB category heap.
221 * @c: UBIFS file-system description object
222 * @lprops: LEB properties to remove
223 * @cat: LEB category
224 */
225static void remove_from_lpt_heap(struct ubifs_info *c,
226 struct ubifs_lprops *lprops, int cat)
227{
228 struct ubifs_lpt_heap *heap;
229 int hpos = lprops->hpos;
230
231 heap = &c->lpt_heap[cat - 1];
232 ubifs_assert(hpos >= 0 && hpos < heap->cnt);
233 ubifs_assert(heap->arr[hpos] == lprops);
234 heap->cnt -= 1;
235 if (hpos < heap->cnt) {
236 heap->arr[hpos] = heap->arr[heap->cnt];
237 heap->arr[hpos]->hpos = hpos;
238 adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat);
239 }
240 dbg_check_heap(c, heap, cat, -1);
241}
242
243/**
244 * lpt_heap_replace - replace lprops in a category heap.
245 * @c: UBIFS file-system description object
246 * @old_lprops: LEB properties to replace
247 * @new_lprops: LEB properties with which to replace
248 * @cat: LEB category
249 *
250 * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode)
251 * and the lprops that the pnode contains. When that happens, references in
252 * the category heaps to those lprops must be updated to point to the new
253 * lprops. This function does that.
254 */
255static void lpt_heap_replace(struct ubifs_info *c,
256 struct ubifs_lprops *old_lprops,
257 struct ubifs_lprops *new_lprops, int cat)
258{
259 struct ubifs_lpt_heap *heap;
260 int hpos = new_lprops->hpos;
261
262 heap = &c->lpt_heap[cat - 1];
263 heap->arr[hpos] = new_lprops;
264}
265
266/**
267 * ubifs_add_to_cat - add LEB properties to a category list or heap.
268 * @c: UBIFS file-system description object
269 * @lprops: LEB properties to add
270 * @cat: LEB category to which to add
271 *
272 * LEB properties are categorized to enable fast find operations.
273 */
274void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
275 int cat)
276{
277 switch (cat) {
278 case LPROPS_DIRTY:
279 case LPROPS_DIRTY_IDX:
280 case LPROPS_FREE:
281 if (add_to_lpt_heap(c, lprops, cat))
282 break;
283 /* No more room on heap so make it uncategorized */
284 cat = LPROPS_UNCAT;
285 /* Fall through */
286 case LPROPS_UNCAT:
287 list_add(&lprops->list, &c->uncat_list);
288 break;
289 case LPROPS_EMPTY:
290 list_add(&lprops->list, &c->empty_list);
291 break;
292 case LPROPS_FREEABLE:
293 list_add(&lprops->list, &c->freeable_list);
294 c->freeable_cnt += 1;
295 break;
296 case LPROPS_FRDI_IDX:
297 list_add(&lprops->list, &c->frdi_idx_list);
298 break;
299 default:
300 ubifs_assert(0);
301 }
302 lprops->flags &= ~LPROPS_CAT_MASK;
303 lprops->flags |= cat;
304}
305
306/**
307 * ubifs_remove_from_cat - remove LEB properties from a category list or heap.
308 * @c: UBIFS file-system description object
309 * @lprops: LEB properties to remove
310 * @cat: LEB category from which to remove
311 *
312 * LEB properties are categorized to enable fast find operations.
313 */
314static void ubifs_remove_from_cat(struct ubifs_info *c,
315 struct ubifs_lprops *lprops, int cat)
316{
317 switch (cat) {
318 case LPROPS_DIRTY:
319 case LPROPS_DIRTY_IDX:
320 case LPROPS_FREE:
321 remove_from_lpt_heap(c, lprops, cat);
322 break;
323 case LPROPS_FREEABLE:
324 c->freeable_cnt -= 1;
325 ubifs_assert(c->freeable_cnt >= 0);
326 /* Fall through */
327 case LPROPS_UNCAT:
328 case LPROPS_EMPTY:
329 case LPROPS_FRDI_IDX:
330 ubifs_assert(!list_empty(&lprops->list));
331 list_del(&lprops->list);
332 break;
333 default:
334 ubifs_assert(0);
335 }
336}
337
338/**
339 * ubifs_replace_cat - replace lprops in a category list or heap.
340 * @c: UBIFS file-system description object
341 * @old_lprops: LEB properties to replace
342 * @new_lprops: LEB properties with which to replace
343 *
344 * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode)
345 * and the lprops that the pnode contains. When that happens, references in
346 * category lists and heaps must be replaced. This function does that.
347 */
348void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
349 struct ubifs_lprops *new_lprops)
350{
351 int cat;
352
353 cat = new_lprops->flags & LPROPS_CAT_MASK;
354 switch (cat) {
355 case LPROPS_DIRTY:
356 case LPROPS_DIRTY_IDX:
357 case LPROPS_FREE:
358 lpt_heap_replace(c, old_lprops, new_lprops, cat);
359 break;
360 case LPROPS_UNCAT:
361 case LPROPS_EMPTY:
362 case LPROPS_FREEABLE:
363 case LPROPS_FRDI_IDX:
364 list_replace(&old_lprops->list, &new_lprops->list);
365 break;
366 default:
367 ubifs_assert(0);
368 }
369}
370
371/**
372 * ubifs_ensure_cat - ensure LEB properties are categorized.
373 * @c: UBIFS file-system description object
374 * @lprops: LEB properties
375 *
376 * A LEB may have fallen off of the bottom of a heap, and ended up as
377 * uncategorized even though it has enough space for us now. If that is the case
378 * this function will put the LEB back onto a heap.
379 */
380void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops)
381{
382 int cat = lprops->flags & LPROPS_CAT_MASK;
383
384 if (cat != LPROPS_UNCAT)
385 return;
386 cat = ubifs_categorize_lprops(c, lprops);
387 if (cat == LPROPS_UNCAT)
388 return;
389 ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT);
390 ubifs_add_to_cat(c, lprops, cat);
391}
392
393/**
394 * ubifs_categorize_lprops - categorize LEB properties.
395 * @c: UBIFS file-system description object
396 * @lprops: LEB properties to categorize
397 *
398 * LEB properties are categorized to enable fast find operations. This function
399 * returns the LEB category to which the LEB properties belong. Note however
400 * that if the LEB category is stored as a heap and the heap is full, the
401 * LEB properties may have their category changed to %LPROPS_UNCAT.
402 */
403int ubifs_categorize_lprops(const struct ubifs_info *c,
404 const struct ubifs_lprops *lprops)
405{
406 if (lprops->flags & LPROPS_TAKEN)
407 return LPROPS_UNCAT;
408
409 if (lprops->free == c->leb_size) {
410 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
411 return LPROPS_EMPTY;
412 }
413
414 if (lprops->free + lprops->dirty == c->leb_size) {
415 if (lprops->flags & LPROPS_INDEX)
416 return LPROPS_FRDI_IDX;
417 else
418 return LPROPS_FREEABLE;
419 }
420
421 if (lprops->flags & LPROPS_INDEX) {
422 if (lprops->dirty + lprops->free >= c->min_idx_node_sz)
423 return LPROPS_DIRTY_IDX;
424 } else {
425 if (lprops->dirty >= c->dead_wm &&
426 lprops->dirty > lprops->free)
427 return LPROPS_DIRTY;
428 if (lprops->free > 0)
429 return LPROPS_FREE;
430 }
431
432 return LPROPS_UNCAT;
433}
434
435/**
436 * change_category - change LEB properties category.
437 * @c: UBIFS file-system description object
438 * @lprops: LEB properties to recategorize
439 *
440 * LEB properties are categorized to enable fast find operations. When the LEB
441 * properties change they must be recategorized.
442 */
443static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
444{
445 int old_cat = lprops->flags & LPROPS_CAT_MASK;
446 int new_cat = ubifs_categorize_lprops(c, lprops);
447
448 if (old_cat == new_cat) {
449 struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1];
450
451 /* lprops on a heap now must be moved up or down */
452 if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT)
453 return; /* Not on a heap */
454 heap = &c->lpt_heap[new_cat - 1];
455 adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat);
456 } else {
457 ubifs_remove_from_cat(c, lprops, old_cat);
458 ubifs_add_to_cat(c, lprops, new_cat);
459 }
460}
461
462/**
463 * ubifs_get_lprops - get reference to LEB properties.
464 * @c: the UBIFS file-system description object
465 *
466 * This function locks lprops. Lprops have to be unlocked by
467 * 'ubifs_release_lprops()'.
468 */
469void ubifs_get_lprops(struct ubifs_info *c)
470{
471 mutex_lock(&c->lp_mutex);
472}
473
474/**
475 * calc_dark - calculate LEB dark space size.
476 * @c: the UBIFS file-system description object
477 * @spc: amount of free and dirty space in the LEB
478 *
479 * This function calculates amount of dark space in an LEB which has @spc bytes
480 * of free and dirty space. Returns the calculations result.
481 *
482 * Dark space is the space which is not always usable - it depends on which
483 * nodes are written in which order. E.g., if an LEB has only 512 free bytes,
484 * it is dark space, because it cannot fit a large data node. So UBIFS cannot
485 * count on this LEB and treat these 512 bytes as usable because it is not true
486 * if, for example, only big chunks of uncompressible data will be written to
487 * the FS.
488 */
489static int calc_dark(struct ubifs_info *c, int spc)
490{
491 ubifs_assert(!(spc & 7));
492
493 if (spc < c->dark_wm)
494 return spc;
495
496 /*
497 * If we have slightly more space then the dark space watermark, we can
498 * anyway safely assume it we'll be able to write a node of the
499 * smallest size there.
500 */
501 if (spc - c->dark_wm < MIN_WRITE_SZ)
502 return spc - MIN_WRITE_SZ;
503
504 return c->dark_wm;
505}
506
507/**
508 * is_lprops_dirty - determine if LEB properties are dirty.
509 * @c: the UBIFS file-system description object
510 * @lprops: LEB properties to test
511 */
512static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
513{
514 struct ubifs_pnode *pnode;
515 int pos;
516
517 pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1);
518 pnode = (struct ubifs_pnode *)container_of(lprops - pos,
519 struct ubifs_pnode,
520 lprops[0]);
521 return !test_bit(COW_ZNODE, &pnode->flags) &&
522 test_bit(DIRTY_CNODE, &pnode->flags);
523}
524
525/**
526 * ubifs_change_lp - change LEB properties.
527 * @c: the UBIFS file-system description object
528 * @lp: LEB properties to change
529 * @free: new free space amount
530 * @dirty: new dirty space amount
531 * @flags: new flags
532 * @idx_gc_cnt: change to the count of idx_gc list
533 *
534 * This function changes LEB properties. This function does not change a LEB
535 * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC.
536 *
537 * This function returns a pointer to the updated LEB properties on success
538 * and a negative error code on failure. N.B. the LEB properties may have had to
539 * be copied (due to COW) and consequently the pointer returned may not be the
540 * same as the pointer passed.
541 */
542const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
543 const struct ubifs_lprops *lp,
544 int free, int dirty, int flags,
545 int idx_gc_cnt)
546{
547 /*
548 * This is the only function that is allowed to change lprops, so we
549 * discard the const qualifier.
550 */
551 struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp;
552
553 dbg_lp("LEB %d, free %d, dirty %d, flags %d",
554 lprops->lnum, free, dirty, flags);
555
556 ubifs_assert(mutex_is_locked(&c->lp_mutex));
557 ubifs_assert(c->lst.empty_lebs >= 0 &&
558 c->lst.empty_lebs <= c->main_lebs);
559 ubifs_assert(c->freeable_cnt >= 0);
560 ubifs_assert(c->freeable_cnt <= c->main_lebs);
561 ubifs_assert(c->lst.taken_empty_lebs >= 0);
562 ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs);
563 ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7));
564 ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7));
565 ubifs_assert(!(c->lst.total_used & 7));
566 ubifs_assert(free == LPROPS_NC || free >= 0);
567 ubifs_assert(dirty == LPROPS_NC || dirty >= 0);
568
569 if (!is_lprops_dirty(c, lprops)) {
570 lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum);
571 if (IS_ERR(lprops))
572 return lprops;
573 } else
574 ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum));
575
576 ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
577
578 spin_lock(&c->space_lock);
579
580 if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
581 c->lst.taken_empty_lebs -= 1;
582
583 if (!(lprops->flags & LPROPS_INDEX)) {
584 int old_spc;
585
586 old_spc = lprops->free + lprops->dirty;
587 if (old_spc < c->dead_wm)
588 c->lst.total_dead -= old_spc;
589 else
590 c->lst.total_dark -= calc_dark(c, old_spc);
591
592 c->lst.total_used -= c->leb_size - old_spc;
593 }
594
595 if (free != LPROPS_NC) {
596 free = ALIGN(free, 8);
597 c->lst.total_free += free - lprops->free;
598
599 /* Increase or decrease empty LEBs counter if needed */
600 if (free == c->leb_size) {
601 if (lprops->free != c->leb_size)
602 c->lst.empty_lebs += 1;
603 } else if (lprops->free == c->leb_size)
604 c->lst.empty_lebs -= 1;
605 lprops->free = free;
606 }
607
608 if (dirty != LPROPS_NC) {
609 dirty = ALIGN(dirty, 8);
610 c->lst.total_dirty += dirty - lprops->dirty;
611 lprops->dirty = dirty;
612 }
613
614 if (flags != LPROPS_NC) {
615 /* Take care about indexing LEBs counter if needed */
616 if ((lprops->flags & LPROPS_INDEX)) {
617 if (!(flags & LPROPS_INDEX))
618 c->lst.idx_lebs -= 1;
619 } else if (flags & LPROPS_INDEX)
620 c->lst.idx_lebs += 1;
621 lprops->flags = flags;
622 }
623
624 if (!(lprops->flags & LPROPS_INDEX)) {
625 int new_spc;
626
627 new_spc = lprops->free + lprops->dirty;
628 if (new_spc < c->dead_wm)
629 c->lst.total_dead += new_spc;
630 else
631 c->lst.total_dark += calc_dark(c, new_spc);
632
633 c->lst.total_used += c->leb_size - new_spc;
634 }
635
636 if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
637 c->lst.taken_empty_lebs += 1;
638
639 change_category(c, lprops);
640
641 c->idx_gc_cnt += idx_gc_cnt;
642
643 spin_unlock(&c->space_lock);
644
645 return lprops;
646}
647
648/**
649 * ubifs_release_lprops - release lprops lock.
650 * @c: the UBIFS file-system description object
651 *
652 * This function has to be called after each 'ubifs_get_lprops()' call to
653 * unlock lprops.
654 */
655void ubifs_release_lprops(struct ubifs_info *c)
656{
657 ubifs_assert(mutex_is_locked(&c->lp_mutex));
658 ubifs_assert(c->lst.empty_lebs >= 0 &&
659 c->lst.empty_lebs <= c->main_lebs);
660
661 mutex_unlock(&c->lp_mutex);
662}
663
664/**
665 * ubifs_get_lp_stats - get lprops statistics.
666 * @c: UBIFS file-system description object
667 * @st: return statistics
668 */
669void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st)
670{
671 spin_lock(&c->space_lock);
672 memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats));
673 spin_unlock(&c->space_lock);
674}
675
676/**
677 * ubifs_change_one_lp - change LEB properties.
678 * @c: the UBIFS file-system description object
679 * @lnum: LEB to change properties for
680 * @free: amount of free space
681 * @dirty: amount of dirty space
682 * @flags_set: flags to set
683 * @flags_clean: flags to clean
684 * @idx_gc_cnt: change to the count of idx_gc list
685 *
686 * This function changes properties of LEB @lnum. It is a helper wrapper over
687 * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the
688 * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and
689 * a negative error code in case of failure.
690 */
691int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
692 int flags_set, int flags_clean, int idx_gc_cnt)
693{
694 int err = 0, flags;
695 const struct ubifs_lprops *lp;
696
697 ubifs_get_lprops(c);
698
699 lp = ubifs_lpt_lookup_dirty(c, lnum);
700 if (IS_ERR(lp)) {
701 err = PTR_ERR(lp);
702 goto out;
703 }
704
705 flags = (lp->flags | flags_set) & ~flags_clean;
706 lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt);
707 if (IS_ERR(lp))
708 err = PTR_ERR(lp);
709
710out:
711 ubifs_release_lprops(c);
712 return err;
713}
714
715/**
716 * ubifs_update_one_lp - update LEB properties.
717 * @c: the UBIFS file-system description object
718 * @lnum: LEB to change properties for
719 * @free: amount of free space
720 * @dirty: amount of dirty space to add
721 * @flags_set: flags to set
722 * @flags_clean: flags to clean
723 *
724 * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to
725 * current dirty space, not substitutes it.
726 */
727int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
728 int flags_set, int flags_clean)
729{
730 int err = 0, flags;
731 const struct ubifs_lprops *lp;
732
733 ubifs_get_lprops(c);
734
735 lp = ubifs_lpt_lookup_dirty(c, lnum);
736 if (IS_ERR(lp)) {
737 err = PTR_ERR(lp);
738 goto out;
739 }
740
741 flags = (lp->flags | flags_set) & ~flags_clean;
742 lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0);
743 if (IS_ERR(lp))
744 err = PTR_ERR(lp);
745
746out:
747 ubifs_release_lprops(c);
748 return err;
749}
750
751/**
752 * ubifs_read_one_lp - read LEB properties.
753 * @c: the UBIFS file-system description object
754 * @lnum: LEB to read properties for
755 * @lp: where to store read properties
756 *
757 * This helper function reads properties of a LEB @lnum and stores them in @lp.
758 * Returns zero in case of success and a negative error code in case of
759 * failure.
760 */
761int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
762{
763 int err = 0;
764 const struct ubifs_lprops *lpp;
765
766 ubifs_get_lprops(c);
767
768 lpp = ubifs_lpt_lookup(c, lnum);
769 if (IS_ERR(lpp)) {
770 err = PTR_ERR(lpp);
771 goto out;
772 }
773
774 memcpy(lp, lpp, sizeof(struct ubifs_lprops));
775
776out:
777 ubifs_release_lprops(c);
778 return err;
779}
780
781/**
782 * ubifs_fast_find_free - try to find a LEB with free space quickly.
783 * @c: the UBIFS file-system description object
784 *
785 * This function returns LEB properties for a LEB with free space or %NULL if
786 * the function is unable to find a LEB quickly.
787 */
788const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c)
789{
790 struct ubifs_lprops *lprops;
791 struct ubifs_lpt_heap *heap;
792
793 ubifs_assert(mutex_is_locked(&c->lp_mutex));
794
795 heap = &c->lpt_heap[LPROPS_FREE - 1];
796 if (heap->cnt == 0)
797 return NULL;
798
799 lprops = heap->arr[0];
800 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
801 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
802 return lprops;
803}
804
805/**
806 * ubifs_fast_find_empty - try to find an empty LEB quickly.
807 * @c: the UBIFS file-system description object
808 *
809 * This function returns LEB properties for an empty LEB or %NULL if the
810 * function is unable to find an empty LEB quickly.
811 */
812const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c)
813{
814 struct ubifs_lprops *lprops;
815
816 ubifs_assert(mutex_is_locked(&c->lp_mutex));
817
818 if (list_empty(&c->empty_list))
819 return NULL;
820
821 lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list);
822 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
823 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
824 ubifs_assert(lprops->free == c->leb_size);
825 return lprops;
826}
827
828/**
829 * ubifs_fast_find_freeable - try to find a freeable LEB quickly.
830 * @c: the UBIFS file-system description object
831 *
832 * This function returns LEB properties for a freeable LEB or %NULL if the
833 * function is unable to find a freeable LEB quickly.
834 */
835const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c)
836{
837 struct ubifs_lprops *lprops;
838
839 ubifs_assert(mutex_is_locked(&c->lp_mutex));
840
841 if (list_empty(&c->freeable_list))
842 return NULL;
843
844 lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list);
845 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
846 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
847 ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
848 ubifs_assert(c->freeable_cnt > 0);
849 return lprops;
850}
851
852/**
853 * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly.
854 * @c: the UBIFS file-system description object
855 *
856 * This function returns LEB properties for a freeable index LEB or %NULL if the
857 * function is unable to find a freeable index LEB quickly.
858 */
859const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c)
860{
861 struct ubifs_lprops *lprops;
862
863 ubifs_assert(mutex_is_locked(&c->lp_mutex));
864
865 if (list_empty(&c->frdi_idx_list))
866 return NULL;
867
868 lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list);
869 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
870 ubifs_assert((lprops->flags & LPROPS_INDEX));
871 ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
872 return lprops;
873}
874
875#ifdef CONFIG_UBIFS_FS_DEBUG
876
877/**
878 * dbg_check_cats - check category heaps and lists.
879 * @c: UBIFS file-system description object
880 *
881 * This function returns %0 on success and a negative error code on failure.
882 */
883int dbg_check_cats(struct ubifs_info *c)
884{
885 struct ubifs_lprops *lprops;
886 struct list_head *pos;
887 int i, cat;
888
889 if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
890 return 0;
891
892 list_for_each_entry(lprops, &c->empty_list, list) {
893 if (lprops->free != c->leb_size) {
894 ubifs_err("non-empty LEB %d on empty list "
895 "(free %d dirty %d flags %d)", lprops->lnum,
896 lprops->free, lprops->dirty, lprops->flags);
897 return -EINVAL;
898 }
899 if (lprops->flags & LPROPS_TAKEN) {
900 ubifs_err("taken LEB %d on empty list "
901 "(free %d dirty %d flags %d)", lprops->lnum,
902 lprops->free, lprops->dirty, lprops->flags);
903 return -EINVAL;
904 }
905 }
906
907 i = 0;
908 list_for_each_entry(lprops, &c->freeable_list, list) {
909 if (lprops->free + lprops->dirty != c->leb_size) {
910 ubifs_err("non-freeable LEB %d on freeable list "
911 "(free %d dirty %d flags %d)", lprops->lnum,
912 lprops->free, lprops->dirty, lprops->flags);
913 return -EINVAL;
914 }
915 if (lprops->flags & LPROPS_TAKEN) {
916 ubifs_err("taken LEB %d on freeable list "
917 "(free %d dirty %d flags %d)", lprops->lnum,
918 lprops->free, lprops->dirty, lprops->flags);
919 return -EINVAL;
920 }
921 i += 1;
922 }
923 if (i != c->freeable_cnt) {
924 ubifs_err("freeable list count %d expected %d", i,
925 c->freeable_cnt);
926 return -EINVAL;
927 }
928
929 i = 0;
930 list_for_each(pos, &c->idx_gc)
931 i += 1;
932 if (i != c->idx_gc_cnt) {
933 ubifs_err("idx_gc list count %d expected %d", i,
934 c->idx_gc_cnt);
935 return -EINVAL;
936 }
937
938 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
939 if (lprops->free + lprops->dirty != c->leb_size) {
940 ubifs_err("non-freeable LEB %d on frdi_idx list "
941 "(free %d dirty %d flags %d)", lprops->lnum,
942 lprops->free, lprops->dirty, lprops->flags);
943 return -EINVAL;
944 }
945 if (lprops->flags & LPROPS_TAKEN) {
946 ubifs_err("taken LEB %d on frdi_idx list "
947 "(free %d dirty %d flags %d)", lprops->lnum,
948 lprops->free, lprops->dirty, lprops->flags);
949 return -EINVAL;
950 }
951 if (!(lprops->flags & LPROPS_INDEX)) {
952 ubifs_err("non-index LEB %d on frdi_idx list "
953 "(free %d dirty %d flags %d)", lprops->lnum,
954 lprops->free, lprops->dirty, lprops->flags);
955 return -EINVAL;
956 }
957 }
958
959 for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) {
960 struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
961
962 for (i = 0; i < heap->cnt; i++) {
963 lprops = heap->arr[i];
964 if (!lprops) {
965 ubifs_err("null ptr in LPT heap cat %d", cat);
966 return -EINVAL;
967 }
968 if (lprops->hpos != i) {
969 ubifs_err("bad ptr in LPT heap cat %d", cat);
970 return -EINVAL;
971 }
972 if (lprops->flags & LPROPS_TAKEN) {
973 ubifs_err("taken LEB in LPT heap cat %d", cat);
974 return -EINVAL;
975 }
976 }
977 }
978
979 return 0;
980}
981
982void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
983 int add_pos)
984{
985 int i = 0, j, err = 0;
986
987 if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
988 return;
989
990 for (i = 0; i < heap->cnt; i++) {
991 struct ubifs_lprops *lprops = heap->arr[i];
992 struct ubifs_lprops *lp;
993
994 if (i != add_pos)
995 if ((lprops->flags & LPROPS_CAT_MASK) != cat) {
996 err = 1;
997 goto out;
998 }
999 if (lprops->hpos != i) {
1000 err = 2;
1001 goto out;
1002 }
1003 lp = ubifs_lpt_lookup(c, lprops->lnum);
1004 if (IS_ERR(lp)) {
1005 err = 3;
1006 goto out;
1007 }
1008 if (lprops != lp) {
1009 dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
1010 (size_t)lprops, (size_t)lp, lprops->lnum,
1011 lp->lnum);
1012 err = 4;
1013 goto out;
1014 }
1015 for (j = 0; j < i; j++) {
1016 lp = heap->arr[j];
1017 if (lp == lprops) {
1018 err = 5;
1019 goto out;
1020 }
1021 if (lp->lnum == lprops->lnum) {
1022 err = 6;
1023 goto out;
1024 }
1025 }
1026 }
1027out:
1028 if (err) {
1029 dbg_msg("failed cat %d hpos %d err %d", cat, i, err);
1030 dbg_dump_stack();
1031 dbg_dump_heap(c, heap, cat);
1032 }
1033}
1034
1035/**
1036 * struct scan_check_data - data provided to scan callback function.
1037 * @lst: LEB properties statistics
1038 * @err: error code
1039 */
1040struct scan_check_data {
1041 struct ubifs_lp_stats lst;
1042 int err;
1043};
1044
1045/**
1046 * scan_check_cb - scan callback.
1047 * @c: the UBIFS file-system description object
1048 * @lp: LEB properties to scan
1049 * @in_tree: whether the LEB properties are in main memory
1050 * @data: information passed to and from the caller of the scan
1051 *
1052 * This function returns a code that indicates whether the scan should continue
1053 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
1054 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
1055 * (%LPT_SCAN_STOP).
1056 */
1057static int scan_check_cb(struct ubifs_info *c,
1058 const struct ubifs_lprops *lp, int in_tree,
1059 struct scan_check_data *data)
1060{
1061 struct ubifs_scan_leb *sleb;
1062 struct ubifs_scan_node *snod;
1063 struct ubifs_lp_stats *lst = &data->lst;
1064 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty;
1065
1066 cat = lp->flags & LPROPS_CAT_MASK;
1067 if (cat != LPROPS_UNCAT) {
1068 cat = ubifs_categorize_lprops(c, lp);
1069 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
1070 ubifs_err("bad LEB category %d expected %d",
1071 (lp->flags & LPROPS_CAT_MASK), cat);
1072 goto out;
1073 }
1074 }
1075
1076 /* Check lp is on its category list (if it has one) */
1077 if (in_tree) {
1078 struct list_head *list = NULL;
1079
1080 switch (cat) {
1081 case LPROPS_EMPTY:
1082 list = &c->empty_list;
1083 break;
1084 case LPROPS_FREEABLE:
1085 list = &c->freeable_list;
1086 break;
1087 case LPROPS_FRDI_IDX:
1088 list = &c->frdi_idx_list;
1089 break;
1090 case LPROPS_UNCAT:
1091 list = &c->uncat_list;
1092 break;
1093 }
1094 if (list) {
1095 struct ubifs_lprops *lprops;
1096 int found = 0;
1097
1098 list_for_each_entry(lprops, list, list) {
1099 if (lprops == lp) {
1100 found = 1;
1101 break;
1102 }
1103 }
1104 if (!found) {
1105 ubifs_err("bad LPT list (category %d)", cat);
1106 goto out;
1107 }
1108 }
1109 }
1110
1111 /* Check lp is on its category heap (if it has one) */
1112 if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) {
1113 struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
1114
1115 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
1116 lp != heap->arr[lp->hpos]) {
1117 ubifs_err("bad LPT heap (category %d)", cat);
1118 goto out;
1119 }
1120 }
1121
1122 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
1123 if (IS_ERR(sleb)) {
1124 /*
1125 * After an unclean unmount, empty and freeable LEBs
1126 * may contain garbage.
1127 */
1128 if (lp->free == c->leb_size) {
1129 ubifs_err("scan errors were in empty LEB "
1130 "- continuing checking");
1131 lst->empty_lebs += 1;
1132 lst->total_free += c->leb_size;
1133 lst->total_dark += calc_dark(c, c->leb_size);
1134 return LPT_SCAN_CONTINUE;
1135 }
1136
1137 if (lp->free + lp->dirty == c->leb_size &&
1138 !(lp->flags & LPROPS_INDEX)) {
1139 ubifs_err("scan errors were in freeable LEB "
1140 "- continuing checking");
1141 lst->total_free += lp->free;
1142 lst->total_dirty += lp->dirty;
1143 lst->total_dark += calc_dark(c, c->leb_size);
1144 return LPT_SCAN_CONTINUE;
1145 }
1146 data->err = PTR_ERR(sleb);
1147 return LPT_SCAN_STOP;
1148 }
1149
1150 is_idx = -1;
1151 list_for_each_entry(snod, &sleb->nodes, list) {
1152 int found, level = 0;
1153
1154 cond_resched();
1155
1156 if (is_idx == -1)
1157 is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0;
1158
1159 if (is_idx && snod->type != UBIFS_IDX_NODE) {
1160 ubifs_err("indexing node in data LEB %d:%d",
1161 lnum, snod->offs);
1162 goto out_destroy;
1163 }
1164
1165 if (snod->type == UBIFS_IDX_NODE) {
1166 struct ubifs_idx_node *idx = snod->node;
1167
1168 key_read(c, ubifs_idx_key(c, idx), &snod->key);
1169 level = le16_to_cpu(idx->level);
1170 }
1171
1172 found = ubifs_tnc_has_node(c, &snod->key, level, lnum,
1173 snod->offs, is_idx);
1174 if (found) {
1175 if (found < 0)
1176 goto out_destroy;
1177 used += ALIGN(snod->len, 8);
1178 }
1179 }
1180
1181 free = c->leb_size - sleb->endpt;
1182 dirty = sleb->endpt - used;
1183
1184 if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
1185 dirty < 0) {
1186 ubifs_err("bad calculated accounting for LEB %d: "
1187 "free %d, dirty %d", lnum, free, dirty);
1188 goto out_destroy;
1189 }
1190
1191 if (lp->free + lp->dirty == c->leb_size &&
1192 free + dirty == c->leb_size)
1193 if ((is_idx && !(lp->flags & LPROPS_INDEX)) ||
1194 (!is_idx && free == c->leb_size) ||
1195 lp->free == c->leb_size) {
1196 /*
1197 * Empty or freeable LEBs could contain index
1198 * nodes from an uncompleted commit due to an
1199 * unclean unmount. Or they could be empty for
1200 * the same reason. Or it may simply not have been
1201 * unmapped.
1202 */
1203 free = lp->free;
1204 dirty = lp->dirty;
1205 is_idx = 0;
1206 }
1207
1208 if (is_idx && lp->free + lp->dirty == free + dirty &&
1209 lnum != c->ihead_lnum) {
1210 /*
1211 * After an unclean unmount, an index LEB could have a different
1212 * amount of free space than the value recorded by lprops. That
1213 * is because the in-the-gaps method may use free space or
1214 * create free space (as a side-effect of using ubi_leb_change
1215 * and not writing the whole LEB). The incorrect free space
1216 * value is not a problem because the index is only ever
1217 * allocated empty LEBs, so there will never be an attempt to
1218 * write to the free space at the end of an index LEB - except
1219 * by the in-the-gaps method for which it is not a problem.
1220 */
1221 free = lp->free;
1222 dirty = lp->dirty;
1223 }
1224
1225 if (lp->free != free || lp->dirty != dirty)
1226 goto out_print;
1227
1228 if (is_idx && !(lp->flags & LPROPS_INDEX)) {
1229 if (free == c->leb_size)
1230 /* Free but not unmapped LEB, it's fine */
1231 is_idx = 0;
1232 else {
1233 ubifs_err("indexing node without indexing "
1234 "flag");
1235 goto out_print;
1236 }
1237 }
1238
1239 if (!is_idx && (lp->flags & LPROPS_INDEX)) {
1240 ubifs_err("data node with indexing flag");
1241 goto out_print;
1242 }
1243
1244 if (free == c->leb_size)
1245 lst->empty_lebs += 1;
1246
1247 if (is_idx)
1248 lst->idx_lebs += 1;
1249
1250 if (!(lp->flags & LPROPS_INDEX))
1251 lst->total_used += c->leb_size - free - dirty;
1252 lst->total_free += free;
1253 lst->total_dirty += dirty;
1254
1255 if (!(lp->flags & LPROPS_INDEX)) {
1256 int spc = free + dirty;
1257
1258 if (spc < c->dead_wm)
1259 lst->total_dead += spc;
1260 else
1261 lst->total_dark += calc_dark(c, spc);
1262 }
1263
1264 ubifs_scan_destroy(sleb);
1265
1266 return LPT_SCAN_CONTINUE;
1267
1268out_print:
1269 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
1270 "should be free %d, dirty %d",
1271 lnum, lp->free, lp->dirty, lp->flags, free, dirty);
1272 dbg_dump_leb(c, lnum);
1273out_destroy:
1274 ubifs_scan_destroy(sleb);
1275out:
1276 data->err = -EINVAL;
1277 return LPT_SCAN_STOP;
1278}
1279
1280/**
1281 * dbg_check_lprops - check all LEB properties.
1282 * @c: UBIFS file-system description object
1283 *
1284 * This function checks all LEB properties and makes sure they are all correct.
1285 * It returns zero if everything is fine, %-EINVAL if there is an inconsistency
1286 * and other negative error codes in case of other errors. This function is
1287 * called while the file system is locked (because of commit start), so no
1288 * additional locking is required. Note that locking the LPT mutex would cause
1289 * a circular lock dependency with the TNC mutex.
1290 */
1291int dbg_check_lprops(struct ubifs_info *c)
1292{
1293 int i, err;
1294 struct scan_check_data data;
1295 struct ubifs_lp_stats *lst = &data.lst;
1296
1297 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1298 return 0;
1299
1300 /*
1301 * As we are going to scan the media, the write buffers have to be
1302 * synchronized.
1303 */
1304 for (i = 0; i < c->jhead_cnt; i++) {
1305 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
1306 if (err)
1307 return err;
1308 }
1309
1310 memset(lst, 0, sizeof(struct ubifs_lp_stats));
1311
1312 data.err = 0;
1313 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
1314 (ubifs_lpt_scan_callback)scan_check_cb,
1315 &data);
1316 if (err && err != -ENOSPC)
1317 goto out;
1318 if (data.err) {
1319 err = data.err;
1320 goto out;
1321 }
1322
1323 if (lst->empty_lebs != c->lst.empty_lebs ||
1324 lst->idx_lebs != c->lst.idx_lebs ||
1325 lst->total_free != c->lst.total_free ||
1326 lst->total_dirty != c->lst.total_dirty ||
1327 lst->total_used != c->lst.total_used) {
1328 ubifs_err("bad overall accounting");
1329 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
1330 "total_free %lld, total_dirty %lld, total_used %lld",
1331 lst->empty_lebs, lst->idx_lebs, lst->total_free,
1332 lst->total_dirty, lst->total_used);
1333 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
1334 "total_free %lld, total_dirty %lld, total_used %lld",
1335 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
1336 c->lst.total_dirty, c->lst.total_used);
1337 err = -EINVAL;
1338 goto out;
1339 }
1340
1341 if (lst->total_dead != c->lst.total_dead ||
1342 lst->total_dark != c->lst.total_dark) {
1343 ubifs_err("bad dead/dark space accounting");
1344 ubifs_err("calculated: total_dead %lld, total_dark %lld",
1345 lst->total_dead, lst->total_dark);
1346 ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
1347 c->lst.total_dead, c->lst.total_dark);
1348 err = -EINVAL;
1349 goto out;
1350 }
1351
1352 err = dbg_check_cats(c);
1353out:
1354 return err;
1355}
1356
1357#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
new file mode 100644
index 000000000000..9ff2463177e5
--- /dev/null
+++ b/fs/ubifs/lpt.c
@@ -0,0 +1,2243 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements the LEB properties tree (LPT) area. The LPT area
25 * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and
26 * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits
27 * between the log and the orphan area.
28 *
29 * The LPT area is like a miniature self-contained file system. It is required
30 * that it never runs out of space, is fast to access and update, and scales
31 * logarithmically. The LEB properties tree is implemented as a wandering tree
32 * much like the TNC, and the LPT area has its own garbage collection.
33 *
34 * The LPT has two slightly different forms called the "small model" and the
35 * "big model". The small model is used when the entire LEB properties table
36 * can be written into a single eraseblock. In that case, garbage collection
37 * consists of just writing the whole table, which therefore makes all other
38 * eraseblocks reusable. In the case of the big model, dirty eraseblocks are
39 * selected for garbage collection, which consists are marking the nodes in
40 * that LEB as dirty, and then only the dirty nodes are written out. Also, in
41 * the case of the big model, a table of LEB numbers is saved so that the entire
42 * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first
43 * mounted.
44 */
45
46#include <linux/crc16.h>
47#include "ubifs.h"
48
49/**
50 * do_calc_lpt_geom - calculate sizes for the LPT area.
51 * @c: the UBIFS file-system description object
52 *
53 * Calculate the sizes of LPT bit fields, nodes, and tree, based on the
54 * properties of the flash and whether LPT is "big" (c->big_lpt).
55 */
56static void do_calc_lpt_geom(struct ubifs_info *c)
57{
58 int i, n, bits, per_leb_wastage, max_pnode_cnt;
59 long long sz, tot_wastage;
60
61 n = c->main_lebs + c->max_leb_cnt - c->leb_cnt;
62 max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT);
63
64 c->lpt_hght = 1;
65 n = UBIFS_LPT_FANOUT;
66 while (n < max_pnode_cnt) {
67 c->lpt_hght += 1;
68 n <<= UBIFS_LPT_FANOUT_SHIFT;
69 }
70
71 c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
72
73 n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT);
74 c->nnode_cnt = n;
75 for (i = 1; i < c->lpt_hght; i++) {
76 n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT);
77 c->nnode_cnt += n;
78 }
79
80 c->space_bits = fls(c->leb_size) - 3;
81 c->lpt_lnum_bits = fls(c->lpt_lebs);
82 c->lpt_offs_bits = fls(c->leb_size - 1);
83 c->lpt_spc_bits = fls(c->leb_size);
84
85 n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT);
86 c->pcnt_bits = fls(n - 1);
87
88 c->lnum_bits = fls(c->max_leb_cnt - 1);
89
90 bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
91 (c->big_lpt ? c->pcnt_bits : 0) +
92 (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT;
93 c->pnode_sz = (bits + 7) / 8;
94
95 bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
96 (c->big_lpt ? c->pcnt_bits : 0) +
97 (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT;
98 c->nnode_sz = (bits + 7) / 8;
99
100 bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
101 c->lpt_lebs * c->lpt_spc_bits * 2;
102 c->ltab_sz = (bits + 7) / 8;
103
104 bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
105 c->lnum_bits * c->lsave_cnt;
106 c->lsave_sz = (bits + 7) / 8;
107
108 /* Calculate the minimum LPT size */
109 c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
110 c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
111 c->lpt_sz += c->ltab_sz;
112 c->lpt_sz += c->lsave_sz;
113
114 /* Add wastage */
115 sz = c->lpt_sz;
116 per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz);
117 sz += per_leb_wastage;
118 tot_wastage = per_leb_wastage;
119 while (sz > c->leb_size) {
120 sz += per_leb_wastage;
121 sz -= c->leb_size;
122 tot_wastage += per_leb_wastage;
123 }
124 tot_wastage += ALIGN(sz, c->min_io_size) - sz;
125 c->lpt_sz += tot_wastage;
126}
127
128/**
129 * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area.
130 * @c: the UBIFS file-system description object
131 *
132 * This function returns %0 on success and a negative error code on failure.
133 */
134int ubifs_calc_lpt_geom(struct ubifs_info *c)
135{
136 int lebs_needed;
137 uint64_t sz;
138
139 do_calc_lpt_geom(c);
140
141 /* Verify that lpt_lebs is big enough */
142 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
143 sz += c->leb_size - 1;
144 do_div(sz, c->leb_size);
145 lebs_needed = sz;
146 if (lebs_needed > c->lpt_lebs) {
147 ubifs_err("too few LPT LEBs");
148 return -EINVAL;
149 }
150
151 /* Verify that ltab fits in a single LEB (since ltab is a single node */
152 if (c->ltab_sz > c->leb_size) {
153 ubifs_err("LPT ltab too big");
154 return -EINVAL;
155 }
156
157 c->check_lpt_free = c->big_lpt;
158
159 return 0;
160}
161
162/**
163 * calc_dflt_lpt_geom - calculate default LPT geometry.
164 * @c: the UBIFS file-system description object
165 * @main_lebs: number of main area LEBs is passed and returned here
166 * @big_lpt: whether the LPT area is "big" is returned here
167 *
168 * The size of the LPT area depends on parameters that themselves are dependent
169 * on the size of the LPT area. This function, successively recalculates the LPT
170 * area geometry until the parameters and resultant geometry are consistent.
171 *
172 * This function returns %0 on success and a negative error code on failure.
173 */
174static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
175 int *big_lpt)
176{
177 int i, lebs_needed;
178 uint64_t sz;
179
180 /* Start by assuming the minimum number of LPT LEBs */
181 c->lpt_lebs = UBIFS_MIN_LPT_LEBS;
182 c->main_lebs = *main_lebs - c->lpt_lebs;
183 if (c->main_lebs <= 0)
184 return -EINVAL;
185
186 /* And assume we will use the small LPT model */
187 c->big_lpt = 0;
188
189 /*
190 * Calculate the geometry based on assumptions above and then see if it
191 * makes sense
192 */
193 do_calc_lpt_geom(c);
194
195 /* Small LPT model must have lpt_sz < leb_size */
196 if (c->lpt_sz > c->leb_size) {
197 /* Nope, so try again using big LPT model */
198 c->big_lpt = 1;
199 do_calc_lpt_geom(c);
200 }
201
202 /* Now check there are enough LPT LEBs */
203 for (i = 0; i < 64 ; i++) {
204 sz = c->lpt_sz * 4; /* Allow 4 times the size */
205 sz += c->leb_size - 1;
206 do_div(sz, c->leb_size);
207 lebs_needed = sz;
208 if (lebs_needed > c->lpt_lebs) {
209 /* Not enough LPT LEBs so try again with more */
210 c->lpt_lebs = lebs_needed;
211 c->main_lebs = *main_lebs - c->lpt_lebs;
212 if (c->main_lebs <= 0)
213 return -EINVAL;
214 do_calc_lpt_geom(c);
215 continue;
216 }
217 if (c->ltab_sz > c->leb_size) {
218 ubifs_err("LPT ltab too big");
219 return -EINVAL;
220 }
221 *main_lebs = c->main_lebs;
222 *big_lpt = c->big_lpt;
223 return 0;
224 }
225 return -EINVAL;
226}
227
228/**
229 * pack_bits - pack bit fields end-to-end.
230 * @addr: address at which to pack (passed and next address returned)
231 * @pos: bit position at which to pack (passed and next position returned)
232 * @val: value to pack
233 * @nrbits: number of bits of value to pack (1-32)
234 */
235static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits)
236{
237 uint8_t *p = *addr;
238 int b = *pos;
239
240 ubifs_assert(nrbits > 0);
241 ubifs_assert(nrbits <= 32);
242 ubifs_assert(*pos >= 0);
243 ubifs_assert(*pos < 8);
244 ubifs_assert((val >> nrbits) == 0 || nrbits == 32);
245 if (b) {
246 *p |= ((uint8_t)val) << b;
247 nrbits += b;
248 if (nrbits > 8) {
249 *++p = (uint8_t)(val >>= (8 - b));
250 if (nrbits > 16) {
251 *++p = (uint8_t)(val >>= 8);
252 if (nrbits > 24) {
253 *++p = (uint8_t)(val >>= 8);
254 if (nrbits > 32)
255 *++p = (uint8_t)(val >>= 8);
256 }
257 }
258 }
259 } else {
260 *p = (uint8_t)val;
261 if (nrbits > 8) {
262 *++p = (uint8_t)(val >>= 8);
263 if (nrbits > 16) {
264 *++p = (uint8_t)(val >>= 8);
265 if (nrbits > 24)
266 *++p = (uint8_t)(val >>= 8);
267 }
268 }
269 }
270 b = nrbits & 7;
271 if (b == 0)
272 p++;
273 *addr = p;
274 *pos = b;
275}
276
277/**
278 * ubifs_unpack_bits - unpack bit fields.
279 * @addr: address at which to unpack (passed and next address returned)
280 * @pos: bit position at which to unpack (passed and next position returned)
281 * @nrbits: number of bits of value to unpack (1-32)
282 *
283 * This functions returns the value unpacked.
284 */
285uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
286{
287 const int k = 32 - nrbits;
288 uint8_t *p = *addr;
289 int b = *pos;
290 uint32_t val;
291
292 ubifs_assert(nrbits > 0);
293 ubifs_assert(nrbits <= 32);
294 ubifs_assert(*pos >= 0);
295 ubifs_assert(*pos < 8);
296 if (b) {
297 val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) |
298 ((uint32_t)p[4] << 24);
299 val <<= (8 - b);
300 val |= *p >> b;
301 nrbits += b;
302 } else
303 val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
304 ((uint32_t)p[3] << 24);
305 val <<= k;
306 val >>= k;
307 b = nrbits & 7;
308 p += nrbits / 8;
309 *addr = p;
310 *pos = b;
311 ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
312 return val;
313}
314
315/**
316 * ubifs_pack_pnode - pack all the bit fields of a pnode.
317 * @c: UBIFS file-system description object
318 * @buf: buffer into which to pack
319 * @pnode: pnode to pack
320 */
321void ubifs_pack_pnode(struct ubifs_info *c, void *buf,
322 struct ubifs_pnode *pnode)
323{
324 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
325 int i, pos = 0;
326 uint16_t crc;
327
328 pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS);
329 if (c->big_lpt)
330 pack_bits(&addr, &pos, pnode->num, c->pcnt_bits);
331 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
332 pack_bits(&addr, &pos, pnode->lprops[i].free >> 3,
333 c->space_bits);
334 pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3,
335 c->space_bits);
336 if (pnode->lprops[i].flags & LPROPS_INDEX)
337 pack_bits(&addr, &pos, 1, 1);
338 else
339 pack_bits(&addr, &pos, 0, 1);
340 }
341 crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
342 c->pnode_sz - UBIFS_LPT_CRC_BYTES);
343 addr = buf;
344 pos = 0;
345 pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
346}
347
348/**
349 * ubifs_pack_nnode - pack all the bit fields of a nnode.
350 * @c: UBIFS file-system description object
351 * @buf: buffer into which to pack
352 * @nnode: nnode to pack
353 */
354void ubifs_pack_nnode(struct ubifs_info *c, void *buf,
355 struct ubifs_nnode *nnode)
356{
357 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
358 int i, pos = 0;
359 uint16_t crc;
360
361 pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS);
362 if (c->big_lpt)
363 pack_bits(&addr, &pos, nnode->num, c->pcnt_bits);
364 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
365 int lnum = nnode->nbranch[i].lnum;
366
367 if (lnum == 0)
368 lnum = c->lpt_last + 1;
369 pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits);
370 pack_bits(&addr, &pos, nnode->nbranch[i].offs,
371 c->lpt_offs_bits);
372 }
373 crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
374 c->nnode_sz - UBIFS_LPT_CRC_BYTES);
375 addr = buf;
376 pos = 0;
377 pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
378}
379
380/**
381 * ubifs_pack_ltab - pack the LPT's own lprops table.
382 * @c: UBIFS file-system description object
383 * @buf: buffer into which to pack
384 * @ltab: LPT's own lprops table to pack
385 */
386void ubifs_pack_ltab(struct ubifs_info *c, void *buf,
387 struct ubifs_lpt_lprops *ltab)
388{
389 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
390 int i, pos = 0;
391 uint16_t crc;
392
393 pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS);
394 for (i = 0; i < c->lpt_lebs; i++) {
395 pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits);
396 pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits);
397 }
398 crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
399 c->ltab_sz - UBIFS_LPT_CRC_BYTES);
400 addr = buf;
401 pos = 0;
402 pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
403}
404
405/**
406 * ubifs_pack_lsave - pack the LPT's save table.
407 * @c: UBIFS file-system description object
408 * @buf: buffer into which to pack
409 * @lsave: LPT's save table to pack
410 */
411void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave)
412{
413 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
414 int i, pos = 0;
415 uint16_t crc;
416
417 pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS);
418 for (i = 0; i < c->lsave_cnt; i++)
419 pack_bits(&addr, &pos, lsave[i], c->lnum_bits);
420 crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
421 c->lsave_sz - UBIFS_LPT_CRC_BYTES);
422 addr = buf;
423 pos = 0;
424 pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
425}
426
427/**
428 * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties.
429 * @c: UBIFS file-system description object
430 * @lnum: LEB number to which to add dirty space
431 * @dirty: amount of dirty space to add
432 */
433void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty)
434{
435 if (!dirty || !lnum)
436 return;
437 dbg_lp("LEB %d add %d to %d",
438 lnum, dirty, c->ltab[lnum - c->lpt_first].dirty);
439 ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
440 c->ltab[lnum - c->lpt_first].dirty += dirty;
441}
442
443/**
444 * set_ltab - set LPT LEB properties.
445 * @c: UBIFS file-system description object
446 * @lnum: LEB number
447 * @free: amount of free space
448 * @dirty: amount of dirty space
449 */
450static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty)
451{
452 dbg_lp("LEB %d free %d dirty %d to %d %d",
453 lnum, c->ltab[lnum - c->lpt_first].free,
454 c->ltab[lnum - c->lpt_first].dirty, free, dirty);
455 ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
456 c->ltab[lnum - c->lpt_first].free = free;
457 c->ltab[lnum - c->lpt_first].dirty = dirty;
458}
459
460/**
461 * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties.
462 * @c: UBIFS file-system description object
463 * @nnode: nnode for which to add dirt
464 */
465void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode)
466{
467 struct ubifs_nnode *np = nnode->parent;
468
469 if (np)
470 ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum,
471 c->nnode_sz);
472 else {
473 ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz);
474 if (!(c->lpt_drty_flgs & LTAB_DIRTY)) {
475 c->lpt_drty_flgs |= LTAB_DIRTY;
476 ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz);
477 }
478 }
479}
480
481/**
482 * add_pnode_dirt - add dirty space to LPT LEB properties.
483 * @c: UBIFS file-system description object
484 * @pnode: pnode for which to add dirt
485 */
486static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode)
487{
488 ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum,
489 c->pnode_sz);
490}
491
492/**
493 * calc_nnode_num - calculate nnode number.
494 * @row: the row in the tree (root is zero)
495 * @col: the column in the row (leftmost is zero)
496 *
497 * The nnode number is a number that uniquely identifies a nnode and can be used
498 * easily to traverse the tree from the root to that nnode.
499 *
500 * This function calculates and returns the nnode number for the nnode at @row
501 * and @col.
502 */
503static int calc_nnode_num(int row, int col)
504{
505 int num, bits;
506
507 num = 1;
508 while (row--) {
509 bits = (col & (UBIFS_LPT_FANOUT - 1));
510 col >>= UBIFS_LPT_FANOUT_SHIFT;
511 num <<= UBIFS_LPT_FANOUT_SHIFT;
512 num |= bits;
513 }
514 return num;
515}
516
517/**
518 * calc_nnode_num_from_parent - calculate nnode number.
519 * @c: UBIFS file-system description object
520 * @parent: parent nnode
521 * @iip: index in parent
522 *
523 * The nnode number is a number that uniquely identifies a nnode and can be used
524 * easily to traverse the tree from the root to that nnode.
525 *
526 * This function calculates and returns the nnode number based on the parent's
527 * nnode number and the index in parent.
528 */
529static int calc_nnode_num_from_parent(struct ubifs_info *c,
530 struct ubifs_nnode *parent, int iip)
531{
532 int num, shft;
533
534 if (!parent)
535 return 1;
536 shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT;
537 num = parent->num ^ (1 << shft);
538 num |= (UBIFS_LPT_FANOUT + iip) << shft;
539 return num;
540}
541
542/**
543 * calc_pnode_num_from_parent - calculate pnode number.
544 * @c: UBIFS file-system description object
545 * @parent: parent nnode
546 * @iip: index in parent
547 *
548 * The pnode number is a number that uniquely identifies a pnode and can be used
549 * easily to traverse the tree from the root to that pnode.
550 *
551 * This function calculates and returns the pnode number based on the parent's
552 * nnode number and the index in parent.
553 */
554static int calc_pnode_num_from_parent(struct ubifs_info *c,
555 struct ubifs_nnode *parent, int iip)
556{
557 int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0;
558
559 for (i = 0; i < n; i++) {
560 num <<= UBIFS_LPT_FANOUT_SHIFT;
561 num |= pnum & (UBIFS_LPT_FANOUT - 1);
562 pnum >>= UBIFS_LPT_FANOUT_SHIFT;
563 }
564 num <<= UBIFS_LPT_FANOUT_SHIFT;
565 num |= iip;
566 return num;
567}
568
569/**
570 * ubifs_create_dflt_lpt - create default LPT.
571 * @c: UBIFS file-system description object
572 * @main_lebs: number of main area LEBs is passed and returned here
573 * @lpt_first: LEB number of first LPT LEB
574 * @lpt_lebs: number of LEBs for LPT is passed and returned here
575 * @big_lpt: use big LPT model is passed and returned here
576 *
577 * This function returns %0 on success and a negative error code on failure.
578 */
579int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
580 int *lpt_lebs, int *big_lpt)
581{
582 int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row;
583 int blnum, boffs, bsz, bcnt;
584 struct ubifs_pnode *pnode = NULL;
585 struct ubifs_nnode *nnode = NULL;
586 void *buf = NULL, *p;
587 struct ubifs_lpt_lprops *ltab = NULL;
588 int *lsave = NULL;
589
590 err = calc_dflt_lpt_geom(c, main_lebs, big_lpt);
591 if (err)
592 return err;
593 *lpt_lebs = c->lpt_lebs;
594
595 /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */
596 c->lpt_first = lpt_first;
597 /* Needed by 'set_ltab()' */
598 c->lpt_last = lpt_first + c->lpt_lebs - 1;
599 /* Needed by 'ubifs_pack_lsave()' */
600 c->main_first = c->leb_cnt - *main_lebs;
601
602 lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL);
603 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL);
604 nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL);
605 buf = vmalloc(c->leb_size);
606 ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
607 if (!pnode || !nnode || !buf || !ltab || !lsave) {
608 err = -ENOMEM;
609 goto out;
610 }
611
612 ubifs_assert(!c->ltab);
613 c->ltab = ltab; /* Needed by set_ltab */
614
615 /* Initialize LPT's own lprops */
616 for (i = 0; i < c->lpt_lebs; i++) {
617 ltab[i].free = c->leb_size;
618 ltab[i].dirty = 0;
619 ltab[i].tgc = 0;
620 ltab[i].cmt = 0;
621 }
622
623 lnum = lpt_first;
624 p = buf;
625 /* Number of leaf nodes (pnodes) */
626 cnt = c->pnode_cnt;
627
628 /*
629 * The first pnode contains the LEB properties for the LEBs that contain
630 * the root inode node and the root index node of the index tree.
631 */
632 node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8);
633 iopos = ALIGN(node_sz, c->min_io_size);
634 pnode->lprops[0].free = c->leb_size - iopos;
635 pnode->lprops[0].dirty = iopos - node_sz;
636 pnode->lprops[0].flags = LPROPS_INDEX;
637
638 node_sz = UBIFS_INO_NODE_SZ;
639 iopos = ALIGN(node_sz, c->min_io_size);
640 pnode->lprops[1].free = c->leb_size - iopos;
641 pnode->lprops[1].dirty = iopos - node_sz;
642
643 for (i = 2; i < UBIFS_LPT_FANOUT; i++)
644 pnode->lprops[i].free = c->leb_size;
645
646 /* Add first pnode */
647 ubifs_pack_pnode(c, p, pnode);
648 p += c->pnode_sz;
649 len = c->pnode_sz;
650 pnode->num += 1;
651
652 /* Reset pnode values for remaining pnodes */
653 pnode->lprops[0].free = c->leb_size;
654 pnode->lprops[0].dirty = 0;
655 pnode->lprops[0].flags = 0;
656
657 pnode->lprops[1].free = c->leb_size;
658 pnode->lprops[1].dirty = 0;
659
660 /*
661 * To calculate the internal node branches, we keep information about
662 * the level below.
663 */
664 blnum = lnum; /* LEB number of level below */
665 boffs = 0; /* Offset of level below */
666 bcnt = cnt; /* Number of nodes in level below */
667 bsz = c->pnode_sz; /* Size of nodes in level below */
668
669 /* Add all remaining pnodes */
670 for (i = 1; i < cnt; i++) {
671 if (len + c->pnode_sz > c->leb_size) {
672 alen = ALIGN(len, c->min_io_size);
673 set_ltab(c, lnum, c->leb_size - alen, alen - len);
674 memset(p, 0xff, alen - len);
675 err = ubi_leb_change(c->ubi, lnum++, buf, alen,
676 UBI_SHORTTERM);
677 if (err)
678 goto out;
679 p = buf;
680 len = 0;
681 }
682 ubifs_pack_pnode(c, p, pnode);
683 p += c->pnode_sz;
684 len += c->pnode_sz;
685 /*
686 * pnodes are simply numbered left to right starting at zero,
687 * which means the pnode number can be used easily to traverse
688 * down the tree to the corresponding pnode.
689 */
690 pnode->num += 1;
691 }
692
693 row = 0;
694 for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT)
695 row += 1;
696 /* Add all nnodes, one level at a time */
697 while (1) {
698 /* Number of internal nodes (nnodes) at next level */
699 cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT);
700 for (i = 0; i < cnt; i++) {
701 if (len + c->nnode_sz > c->leb_size) {
702 alen = ALIGN(len, c->min_io_size);
703 set_ltab(c, lnum, c->leb_size - alen,
704 alen - len);
705 memset(p, 0xff, alen - len);
706 err = ubi_leb_change(c->ubi, lnum++, buf, alen,
707 UBI_SHORTTERM);
708 if (err)
709 goto out;
710 p = buf;
711 len = 0;
712 }
713 /* Only 1 nnode at this level, so it is the root */
714 if (cnt == 1) {
715 c->lpt_lnum = lnum;
716 c->lpt_offs = len;
717 }
718 /* Set branches to the level below */
719 for (j = 0; j < UBIFS_LPT_FANOUT; j++) {
720 if (bcnt) {
721 if (boffs + bsz > c->leb_size) {
722 blnum += 1;
723 boffs = 0;
724 }
725 nnode->nbranch[j].lnum = blnum;
726 nnode->nbranch[j].offs = boffs;
727 boffs += bsz;
728 bcnt--;
729 } else {
730 nnode->nbranch[j].lnum = 0;
731 nnode->nbranch[j].offs = 0;
732 }
733 }
734 nnode->num = calc_nnode_num(row, i);
735 ubifs_pack_nnode(c, p, nnode);
736 p += c->nnode_sz;
737 len += c->nnode_sz;
738 }
739 /* Only 1 nnode at this level, so it is the root */
740 if (cnt == 1)
741 break;
742 /* Update the information about the level below */
743 bcnt = cnt;
744 bsz = c->nnode_sz;
745 row -= 1;
746 }
747
748 if (*big_lpt) {
749 /* Need to add LPT's save table */
750 if (len + c->lsave_sz > c->leb_size) {
751 alen = ALIGN(len, c->min_io_size);
752 set_ltab(c, lnum, c->leb_size - alen, alen - len);
753 memset(p, 0xff, alen - len);
754 err = ubi_leb_change(c->ubi, lnum++, buf, alen,
755 UBI_SHORTTERM);
756 if (err)
757 goto out;
758 p = buf;
759 len = 0;
760 }
761
762 c->lsave_lnum = lnum;
763 c->lsave_offs = len;
764
765 for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++)
766 lsave[i] = c->main_first + i;
767 for (; i < c->lsave_cnt; i++)
768 lsave[i] = c->main_first;
769
770 ubifs_pack_lsave(c, p, lsave);
771 p += c->lsave_sz;
772 len += c->lsave_sz;
773 }
774
775 /* Need to add LPT's own LEB properties table */
776 if (len + c->ltab_sz > c->leb_size) {
777 alen = ALIGN(len, c->min_io_size);
778 set_ltab(c, lnum, c->leb_size - alen, alen - len);
779 memset(p, 0xff, alen - len);
780 err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM);
781 if (err)
782 goto out;
783 p = buf;
784 len = 0;
785 }
786
787 c->ltab_lnum = lnum;
788 c->ltab_offs = len;
789
790 /* Update ltab before packing it */
791 len += c->ltab_sz;
792 alen = ALIGN(len, c->min_io_size);
793 set_ltab(c, lnum, c->leb_size - alen, alen - len);
794
795 ubifs_pack_ltab(c, p, ltab);
796 p += c->ltab_sz;
797
798 /* Write remaining buffer */
799 memset(p, 0xff, alen - len);
800 err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM);
801 if (err)
802 goto out;
803
804 c->nhead_lnum = lnum;
805 c->nhead_offs = ALIGN(len, c->min_io_size);
806
807 dbg_lp("space_bits %d", c->space_bits);
808 dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits);
809 dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits);
810 dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits);
811 dbg_lp("pcnt_bits %d", c->pcnt_bits);
812 dbg_lp("lnum_bits %d", c->lnum_bits);
813 dbg_lp("pnode_sz %d", c->pnode_sz);
814 dbg_lp("nnode_sz %d", c->nnode_sz);
815 dbg_lp("ltab_sz %d", c->ltab_sz);
816 dbg_lp("lsave_sz %d", c->lsave_sz);
817 dbg_lp("lsave_cnt %d", c->lsave_cnt);
818 dbg_lp("lpt_hght %d", c->lpt_hght);
819 dbg_lp("big_lpt %d", c->big_lpt);
820 dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
821 dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
822 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
823 if (c->big_lpt)
824 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
825out:
826 c->ltab = NULL;
827 kfree(lsave);
828 vfree(ltab);
829 vfree(buf);
830 kfree(nnode);
831 kfree(pnode);
832 return err;
833}
834
835/**
836 * update_cats - add LEB properties of a pnode to LEB category lists and heaps.
837 * @c: UBIFS file-system description object
838 * @pnode: pnode
839 *
840 * When a pnode is loaded into memory, the LEB properties it contains are added,
841 * by this function, to the LEB category lists and heaps.
842 */
843static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode)
844{
845 int i;
846
847 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
848 int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK;
849 int lnum = pnode->lprops[i].lnum;
850
851 if (!lnum)
852 return;
853 ubifs_add_to_cat(c, &pnode->lprops[i], cat);
854 }
855}
856
857/**
858 * replace_cats - add LEB properties of a pnode to LEB category lists and heaps.
859 * @c: UBIFS file-system description object
860 * @old_pnode: pnode copied
861 * @new_pnode: pnode copy
862 *
863 * During commit it is sometimes necessary to copy a pnode
864 * (see dirty_cow_pnode). When that happens, references in
865 * category lists and heaps must be replaced. This function does that.
866 */
867static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode,
868 struct ubifs_pnode *new_pnode)
869{
870 int i;
871
872 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
873 if (!new_pnode->lprops[i].lnum)
874 return;
875 ubifs_replace_cat(c, &old_pnode->lprops[i],
876 &new_pnode->lprops[i]);
877 }
878}
879
880/**
881 * check_lpt_crc - check LPT node crc is correct.
882 * @c: UBIFS file-system description object
883 * @buf: buffer containing node
884 * @len: length of node
885 *
886 * This function returns %0 on success and a negative error code on failure.
887 */
888static int check_lpt_crc(void *buf, int len)
889{
890 int pos = 0;
891 uint8_t *addr = buf;
892 uint16_t crc, calc_crc;
893
894 crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS);
895 calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
896 len - UBIFS_LPT_CRC_BYTES);
897 if (crc != calc_crc) {
898 ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc,
899 calc_crc);
900 dbg_dump_stack();
901 return -EINVAL;
902 }
903 return 0;
904}
905
906/**
907 * check_lpt_type - check LPT node type is correct.
908 * @c: UBIFS file-system description object
909 * @addr: address of type bit field is passed and returned updated here
910 * @pos: position of type bit field is passed and returned updated here
911 * @type: expected type
912 *
913 * This function returns %0 on success and a negative error code on failure.
914 */
915static int check_lpt_type(uint8_t **addr, int *pos, int type)
916{
917 int node_type;
918
919 node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS);
920 if (node_type != type) {
921 ubifs_err("invalid type (%d) in LPT node type %d", node_type,
922 type);
923 dbg_dump_stack();
924 return -EINVAL;
925 }
926 return 0;
927}
928
929/**
930 * unpack_pnode - unpack a pnode.
931 * @c: UBIFS file-system description object
932 * @buf: buffer containing packed pnode to unpack
933 * @pnode: pnode structure to fill
934 *
935 * This function returns %0 on success and a negative error code on failure.
936 */
937static int unpack_pnode(struct ubifs_info *c, void *buf,
938 struct ubifs_pnode *pnode)
939{
940 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
941 int i, pos = 0, err;
942
943 err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE);
944 if (err)
945 return err;
946 if (c->big_lpt)
947 pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
948 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
949 struct ubifs_lprops * const lprops = &pnode->lprops[i];
950
951 lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits);
952 lprops->free <<= 3;
953 lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits);
954 lprops->dirty <<= 3;
955
956 if (ubifs_unpack_bits(&addr, &pos, 1))
957 lprops->flags = LPROPS_INDEX;
958 else
959 lprops->flags = 0;
960 lprops->flags |= ubifs_categorize_lprops(c, lprops);
961 }
962 err = check_lpt_crc(buf, c->pnode_sz);
963 return err;
964}
965
966/**
967 * unpack_nnode - unpack a nnode.
968 * @c: UBIFS file-system description object
969 * @buf: buffer containing packed nnode to unpack
970 * @nnode: nnode structure to fill
971 *
972 * This function returns %0 on success and a negative error code on failure.
973 */
974static int unpack_nnode(struct ubifs_info *c, void *buf,
975 struct ubifs_nnode *nnode)
976{
977 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
978 int i, pos = 0, err;
979
980 err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE);
981 if (err)
982 return err;
983 if (c->big_lpt)
984 nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
985 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
986 int lnum;
987
988 lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) +
989 c->lpt_first;
990 if (lnum == c->lpt_last + 1)
991 lnum = 0;
992 nnode->nbranch[i].lnum = lnum;
993 nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos,
994 c->lpt_offs_bits);
995 }
996 err = check_lpt_crc(buf, c->nnode_sz);
997 return err;
998}
999
1000/**
1001 * unpack_ltab - unpack the LPT's own lprops table.
1002 * @c: UBIFS file-system description object
1003 * @buf: buffer from which to unpack
1004 *
1005 * This function returns %0 on success and a negative error code on failure.
1006 */
1007static int unpack_ltab(struct ubifs_info *c, void *buf)
1008{
1009 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1010 int i, pos = 0, err;
1011
1012 err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB);
1013 if (err)
1014 return err;
1015 for (i = 0; i < c->lpt_lebs; i++) {
1016 int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits);
1017 int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits);
1018
1019 if (free < 0 || free > c->leb_size || dirty < 0 ||
1020 dirty > c->leb_size || free + dirty > c->leb_size)
1021 return -EINVAL;
1022
1023 c->ltab[i].free = free;
1024 c->ltab[i].dirty = dirty;
1025 c->ltab[i].tgc = 0;
1026 c->ltab[i].cmt = 0;
1027 }
1028 err = check_lpt_crc(buf, c->ltab_sz);
1029 return err;
1030}
1031
1032/**
1033 * unpack_lsave - unpack the LPT's save table.
1034 * @c: UBIFS file-system description object
1035 * @buf: buffer from which to unpack
1036 *
1037 * This function returns %0 on success and a negative error code on failure.
1038 */
1039static int unpack_lsave(struct ubifs_info *c, void *buf)
1040{
1041 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1042 int i, pos = 0, err;
1043
1044 err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE);
1045 if (err)
1046 return err;
1047 for (i = 0; i < c->lsave_cnt; i++) {
1048 int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits);
1049
1050 if (lnum < c->main_first || lnum >= c->leb_cnt)
1051 return -EINVAL;
1052 c->lsave[i] = lnum;
1053 }
1054 err = check_lpt_crc(buf, c->lsave_sz);
1055 return err;
1056}
1057
1058/**
1059 * validate_nnode - validate a nnode.
1060 * @c: UBIFS file-system description object
1061 * @nnode: nnode to validate
1062 * @parent: parent nnode (or NULL for the root nnode)
1063 * @iip: index in parent
1064 *
1065 * This function returns %0 on success and a negative error code on failure.
1066 */
1067static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
1068 struct ubifs_nnode *parent, int iip)
1069{
1070 int i, lvl, max_offs;
1071
1072 if (c->big_lpt) {
1073 int num = calc_nnode_num_from_parent(c, parent, iip);
1074
1075 if (nnode->num != num)
1076 return -EINVAL;
1077 }
1078 lvl = parent ? parent->level - 1 : c->lpt_hght;
1079 if (lvl < 1)
1080 return -EINVAL;
1081 if (lvl == 1)
1082 max_offs = c->leb_size - c->pnode_sz;
1083 else
1084 max_offs = c->leb_size - c->nnode_sz;
1085 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1086 int lnum = nnode->nbranch[i].lnum;
1087 int offs = nnode->nbranch[i].offs;
1088
1089 if (lnum == 0) {
1090 if (offs != 0)
1091 return -EINVAL;
1092 continue;
1093 }
1094 if (lnum < c->lpt_first || lnum > c->lpt_last)
1095 return -EINVAL;
1096 if (offs < 0 || offs > max_offs)
1097 return -EINVAL;
1098 }
1099 return 0;
1100}
1101
1102/**
1103 * validate_pnode - validate a pnode.
1104 * @c: UBIFS file-system description object
1105 * @pnode: pnode to validate
1106 * @parent: parent nnode
1107 * @iip: index in parent
1108 *
1109 * This function returns %0 on success and a negative error code on failure.
1110 */
1111static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
1112 struct ubifs_nnode *parent, int iip)
1113{
1114 int i;
1115
1116 if (c->big_lpt) {
1117 int num = calc_pnode_num_from_parent(c, parent, iip);
1118
1119 if (pnode->num != num)
1120 return -EINVAL;
1121 }
1122 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1123 int free = pnode->lprops[i].free;
1124 int dirty = pnode->lprops[i].dirty;
1125
1126 if (free < 0 || free > c->leb_size || free % c->min_io_size ||
1127 (free & 7))
1128 return -EINVAL;
1129 if (dirty < 0 || dirty > c->leb_size || (dirty & 7))
1130 return -EINVAL;
1131 if (dirty + free > c->leb_size)
1132 return -EINVAL;
1133 }
1134 return 0;
1135}
1136
1137/**
1138 * set_pnode_lnum - set LEB numbers on a pnode.
1139 * @c: UBIFS file-system description object
1140 * @pnode: pnode to update
1141 *
1142 * This function calculates the LEB numbers for the LEB properties it contains
1143 * based on the pnode number.
1144 */
1145static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode)
1146{
1147 int i, lnum;
1148
1149 lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first;
1150 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1151 if (lnum >= c->leb_cnt)
1152 return;
1153 pnode->lprops[i].lnum = lnum++;
1154 }
1155}
1156
1157/**
1158 * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory.
1159 * @c: UBIFS file-system description object
1160 * @parent: parent nnode (or NULL for the root)
1161 * @iip: index in parent
1162 *
1163 * This function returns %0 on success and a negative error code on failure.
1164 */
1165int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1166{
1167 struct ubifs_nbranch *branch = NULL;
1168 struct ubifs_nnode *nnode = NULL;
1169 void *buf = c->lpt_nod_buf;
1170 int err, lnum, offs;
1171
1172 if (parent) {
1173 branch = &parent->nbranch[iip];
1174 lnum = branch->lnum;
1175 offs = branch->offs;
1176 } else {
1177 lnum = c->lpt_lnum;
1178 offs = c->lpt_offs;
1179 }
1180 nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS);
1181 if (!nnode) {
1182 err = -ENOMEM;
1183 goto out;
1184 }
1185 if (lnum == 0) {
1186 /*
1187 * This nnode was not written which just means that the LEB
1188 * properties in the subtree below it describe empty LEBs. We
1189 * make the nnode as though we had read it, which in fact means
1190 * doing almost nothing.
1191 */
1192 if (c->big_lpt)
1193 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1194 } else {
1195 err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
1196 if (err)
1197 goto out;
1198 err = unpack_nnode(c, buf, nnode);
1199 if (err)
1200 goto out;
1201 }
1202 err = validate_nnode(c, nnode, parent, iip);
1203 if (err)
1204 goto out;
1205 if (!c->big_lpt)
1206 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1207 if (parent) {
1208 branch->nnode = nnode;
1209 nnode->level = parent->level - 1;
1210 } else {
1211 c->nroot = nnode;
1212 nnode->level = c->lpt_hght;
1213 }
1214 nnode->parent = parent;
1215 nnode->iip = iip;
1216 return 0;
1217
1218out:
1219 ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
1220 kfree(nnode);
1221 return err;
1222}
1223
1224/**
1225 * read_pnode - read a pnode from flash and link it to the tree in memory.
1226 * @c: UBIFS file-system description object
1227 * @parent: parent nnode
1228 * @iip: index in parent
1229 *
1230 * This function returns %0 on success and a negative error code on failure.
1231 */
1232static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1233{
1234 struct ubifs_nbranch *branch;
1235 struct ubifs_pnode *pnode = NULL;
1236 void *buf = c->lpt_nod_buf;
1237 int err, lnum, offs;
1238
1239 branch = &parent->nbranch[iip];
1240 lnum = branch->lnum;
1241 offs = branch->offs;
1242 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
1243 if (!pnode) {
1244 err = -ENOMEM;
1245 goto out;
1246 }
1247 if (lnum == 0) {
1248 /*
1249 * This pnode was not written which just means that the LEB
1250 * properties in it describe empty LEBs. We make the pnode as
1251 * though we had read it.
1252 */
1253 int i;
1254
1255 if (c->big_lpt)
1256 pnode->num = calc_pnode_num_from_parent(c, parent, iip);
1257 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1258 struct ubifs_lprops * const lprops = &pnode->lprops[i];
1259
1260 lprops->free = c->leb_size;
1261 lprops->flags = ubifs_categorize_lprops(c, lprops);
1262 }
1263 } else {
1264 err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz);
1265 if (err)
1266 goto out;
1267 err = unpack_pnode(c, buf, pnode);
1268 if (err)
1269 goto out;
1270 }
1271 err = validate_pnode(c, pnode, parent, iip);
1272 if (err)
1273 goto out;
1274 if (!c->big_lpt)
1275 pnode->num = calc_pnode_num_from_parent(c, parent, iip);
1276 branch->pnode = pnode;
1277 pnode->parent = parent;
1278 pnode->iip = iip;
1279 set_pnode_lnum(c, pnode);
1280 c->pnodes_have += 1;
1281 return 0;
1282
1283out:
1284 ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
1285 dbg_dump_pnode(c, pnode, parent, iip);
1286 dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
1287 kfree(pnode);
1288 return err;
1289}
1290
1291/**
1292 * read_ltab - read LPT's own lprops table.
1293 * @c: UBIFS file-system description object
1294 *
1295 * This function returns %0 on success and a negative error code on failure.
1296 */
1297static int read_ltab(struct ubifs_info *c)
1298{
1299 int err;
1300 void *buf;
1301
1302 buf = vmalloc(c->ltab_sz);
1303 if (!buf)
1304 return -ENOMEM;
1305 err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz);
1306 if (err)
1307 goto out;
1308 err = unpack_ltab(c, buf);
1309out:
1310 vfree(buf);
1311 return err;
1312}
1313
1314/**
1315 * read_lsave - read LPT's save table.
1316 * @c: UBIFS file-system description object
1317 *
1318 * This function returns %0 on success and a negative error code on failure.
1319 */
1320static int read_lsave(struct ubifs_info *c)
1321{
1322 int err, i;
1323 void *buf;
1324
1325 buf = vmalloc(c->lsave_sz);
1326 if (!buf)
1327 return -ENOMEM;
1328 err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz);
1329 if (err)
1330 goto out;
1331 err = unpack_lsave(c, buf);
1332 if (err)
1333 goto out;
1334 for (i = 0; i < c->lsave_cnt; i++) {
1335 int lnum = c->lsave[i];
1336
1337 /*
1338 * Due to automatic resizing, the values in the lsave table
1339 * could be beyond the volume size - just ignore them.
1340 */
1341 if (lnum >= c->leb_cnt)
1342 continue;
1343 ubifs_lpt_lookup(c, lnum);
1344 }
1345out:
1346 vfree(buf);
1347 return err;
1348}
1349
1350/**
1351 * ubifs_get_nnode - get a nnode.
1352 * @c: UBIFS file-system description object
1353 * @parent: parent nnode (or NULL for the root)
1354 * @iip: index in parent
1355 *
1356 * This function returns a pointer to the nnode on success or a negative error
1357 * code on failure.
1358 */
1359struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
1360 struct ubifs_nnode *parent, int iip)
1361{
1362 struct ubifs_nbranch *branch;
1363 struct ubifs_nnode *nnode;
1364 int err;
1365
1366 branch = &parent->nbranch[iip];
1367 nnode = branch->nnode;
1368 if (nnode)
1369 return nnode;
1370 err = ubifs_read_nnode(c, parent, iip);
1371 if (err)
1372 return ERR_PTR(err);
1373 return branch->nnode;
1374}
1375
1376/**
1377 * ubifs_get_pnode - get a pnode.
1378 * @c: UBIFS file-system description object
1379 * @parent: parent nnode
1380 * @iip: index in parent
1381 *
1382 * This function returns a pointer to the pnode on success or a negative error
1383 * code on failure.
1384 */
1385struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
1386 struct ubifs_nnode *parent, int iip)
1387{
1388 struct ubifs_nbranch *branch;
1389 struct ubifs_pnode *pnode;
1390 int err;
1391
1392 branch = &parent->nbranch[iip];
1393 pnode = branch->pnode;
1394 if (pnode)
1395 return pnode;
1396 err = read_pnode(c, parent, iip);
1397 if (err)
1398 return ERR_PTR(err);
1399 update_cats(c, branch->pnode);
1400 return branch->pnode;
1401}
1402
1403/**
1404 * ubifs_lpt_lookup - lookup LEB properties in the LPT.
1405 * @c: UBIFS file-system description object
1406 * @lnum: LEB number to lookup
1407 *
1408 * This function returns a pointer to the LEB properties on success or a
1409 * negative error code on failure.
1410 */
1411struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
1412{
1413 int err, i, h, iip, shft;
1414 struct ubifs_nnode *nnode;
1415 struct ubifs_pnode *pnode;
1416
1417 if (!c->nroot) {
1418 err = ubifs_read_nnode(c, NULL, 0);
1419 if (err)
1420 return ERR_PTR(err);
1421 }
1422 nnode = c->nroot;
1423 i = lnum - c->main_first;
1424 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
1425 for (h = 1; h < c->lpt_hght; h++) {
1426 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1427 shft -= UBIFS_LPT_FANOUT_SHIFT;
1428 nnode = ubifs_get_nnode(c, nnode, iip);
1429 if (IS_ERR(nnode))
1430 return ERR_PTR(PTR_ERR(nnode));
1431 }
1432 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1433 shft -= UBIFS_LPT_FANOUT_SHIFT;
1434 pnode = ubifs_get_pnode(c, nnode, iip);
1435 if (IS_ERR(pnode))
1436 return ERR_PTR(PTR_ERR(pnode));
1437 iip = (i & (UBIFS_LPT_FANOUT - 1));
1438 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
1439 pnode->lprops[iip].free, pnode->lprops[iip].dirty,
1440 pnode->lprops[iip].flags);
1441 return &pnode->lprops[iip];
1442}
1443
1444/**
1445 * dirty_cow_nnode - ensure a nnode is not being committed.
1446 * @c: UBIFS file-system description object
1447 * @nnode: nnode to check
1448 *
1449 * Returns dirtied nnode on success or negative error code on failure.
1450 */
1451static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c,
1452 struct ubifs_nnode *nnode)
1453{
1454 struct ubifs_nnode *n;
1455 int i;
1456
1457 if (!test_bit(COW_CNODE, &nnode->flags)) {
1458 /* nnode is not being committed */
1459 if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
1460 c->dirty_nn_cnt += 1;
1461 ubifs_add_nnode_dirt(c, nnode);
1462 }
1463 return nnode;
1464 }
1465
1466 /* nnode is being committed, so copy it */
1467 n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS);
1468 if (unlikely(!n))
1469 return ERR_PTR(-ENOMEM);
1470
1471 memcpy(n, nnode, sizeof(struct ubifs_nnode));
1472 n->cnext = NULL;
1473 __set_bit(DIRTY_CNODE, &n->flags);
1474 __clear_bit(COW_CNODE, &n->flags);
1475
1476 /* The children now have new parent */
1477 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1478 struct ubifs_nbranch *branch = &n->nbranch[i];
1479
1480 if (branch->cnode)
1481 branch->cnode->parent = n;
1482 }
1483
1484 ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags));
1485 __set_bit(OBSOLETE_CNODE, &nnode->flags);
1486
1487 c->dirty_nn_cnt += 1;
1488 ubifs_add_nnode_dirt(c, nnode);
1489 if (nnode->parent)
1490 nnode->parent->nbranch[n->iip].nnode = n;
1491 else
1492 c->nroot = n;
1493 return n;
1494}
1495
1496/**
1497 * dirty_cow_pnode - ensure a pnode is not being committed.
1498 * @c: UBIFS file-system description object
1499 * @pnode: pnode to check
1500 *
1501 * Returns dirtied pnode on success or negative error code on failure.
1502 */
1503static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c,
1504 struct ubifs_pnode *pnode)
1505{
1506 struct ubifs_pnode *p;
1507
1508 if (!test_bit(COW_CNODE, &pnode->flags)) {
1509 /* pnode is not being committed */
1510 if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) {
1511 c->dirty_pn_cnt += 1;
1512 add_pnode_dirt(c, pnode);
1513 }
1514 return pnode;
1515 }
1516
1517 /* pnode is being committed, so copy it */
1518 p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
1519 if (unlikely(!p))
1520 return ERR_PTR(-ENOMEM);
1521
1522 memcpy(p, pnode, sizeof(struct ubifs_pnode));
1523 p->cnext = NULL;
1524 __set_bit(DIRTY_CNODE, &p->flags);
1525 __clear_bit(COW_CNODE, &p->flags);
1526 replace_cats(c, pnode, p);
1527
1528 ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags));
1529 __set_bit(OBSOLETE_CNODE, &pnode->flags);
1530
1531 c->dirty_pn_cnt += 1;
1532 add_pnode_dirt(c, pnode);
1533 pnode->parent->nbranch[p->iip].pnode = p;
1534 return p;
1535}
1536
1537/**
1538 * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT.
1539 * @c: UBIFS file-system description object
1540 * @lnum: LEB number to lookup
1541 *
1542 * This function returns a pointer to the LEB properties on success or a
1543 * negative error code on failure.
1544 */
1545struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
1546{
1547 int err, i, h, iip, shft;
1548 struct ubifs_nnode *nnode;
1549 struct ubifs_pnode *pnode;
1550
1551 if (!c->nroot) {
1552 err = ubifs_read_nnode(c, NULL, 0);
1553 if (err)
1554 return ERR_PTR(err);
1555 }
1556 nnode = c->nroot;
1557 nnode = dirty_cow_nnode(c, nnode);
1558 if (IS_ERR(nnode))
1559 return ERR_PTR(PTR_ERR(nnode));
1560 i = lnum - c->main_first;
1561 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
1562 for (h = 1; h < c->lpt_hght; h++) {
1563 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1564 shft -= UBIFS_LPT_FANOUT_SHIFT;
1565 nnode = ubifs_get_nnode(c, nnode, iip);
1566 if (IS_ERR(nnode))
1567 return ERR_PTR(PTR_ERR(nnode));
1568 nnode = dirty_cow_nnode(c, nnode);
1569 if (IS_ERR(nnode))
1570 return ERR_PTR(PTR_ERR(nnode));
1571 }
1572 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1573 shft -= UBIFS_LPT_FANOUT_SHIFT;
1574 pnode = ubifs_get_pnode(c, nnode, iip);
1575 if (IS_ERR(pnode))
1576 return ERR_PTR(PTR_ERR(pnode));
1577 pnode = dirty_cow_pnode(c, pnode);
1578 if (IS_ERR(pnode))
1579 return ERR_PTR(PTR_ERR(pnode));
1580 iip = (i & (UBIFS_LPT_FANOUT - 1));
1581 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
1582 pnode->lprops[iip].free, pnode->lprops[iip].dirty,
1583 pnode->lprops[iip].flags);
1584 ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags));
1585 return &pnode->lprops[iip];
1586}
1587
1588/**
1589 * lpt_init_rd - initialize the LPT for reading.
1590 * @c: UBIFS file-system description object
1591 *
1592 * This function returns %0 on success and a negative error code on failure.
1593 */
1594static int lpt_init_rd(struct ubifs_info *c)
1595{
1596 int err, i;
1597
1598 c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
1599 if (!c->ltab)
1600 return -ENOMEM;
1601
1602 i = max_t(int, c->nnode_sz, c->pnode_sz);
1603 c->lpt_nod_buf = kmalloc(i, GFP_KERNEL);
1604 if (!c->lpt_nod_buf)
1605 return -ENOMEM;
1606
1607 for (i = 0; i < LPROPS_HEAP_CNT; i++) {
1608 c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ,
1609 GFP_KERNEL);
1610 if (!c->lpt_heap[i].arr)
1611 return -ENOMEM;
1612 c->lpt_heap[i].cnt = 0;
1613 c->lpt_heap[i].max_cnt = LPT_HEAP_SZ;
1614 }
1615
1616 c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL);
1617 if (!c->dirty_idx.arr)
1618 return -ENOMEM;
1619 c->dirty_idx.cnt = 0;
1620 c->dirty_idx.max_cnt = LPT_HEAP_SZ;
1621
1622 err = read_ltab(c);
1623 if (err)
1624 return err;
1625
1626 dbg_lp("space_bits %d", c->space_bits);
1627 dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits);
1628 dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits);
1629 dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits);
1630 dbg_lp("pcnt_bits %d", c->pcnt_bits);
1631 dbg_lp("lnum_bits %d", c->lnum_bits);
1632 dbg_lp("pnode_sz %d", c->pnode_sz);
1633 dbg_lp("nnode_sz %d", c->nnode_sz);
1634 dbg_lp("ltab_sz %d", c->ltab_sz);
1635 dbg_lp("lsave_sz %d", c->lsave_sz);
1636 dbg_lp("lsave_cnt %d", c->lsave_cnt);
1637 dbg_lp("lpt_hght %d", c->lpt_hght);
1638 dbg_lp("big_lpt %d", c->big_lpt);
1639 dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
1640 dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
1641 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
1642 if (c->big_lpt)
1643 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
1644
1645 return 0;
1646}
1647
1648/**
1649 * lpt_init_wr - initialize the LPT for writing.
1650 * @c: UBIFS file-system description object
1651 *
1652 * 'lpt_init_rd()' must have been called already.
1653 *
1654 * This function returns %0 on success and a negative error code on failure.
1655 */
1656static int lpt_init_wr(struct ubifs_info *c)
1657{
1658 int err, i;
1659
1660 c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
1661 if (!c->ltab_cmt)
1662 return -ENOMEM;
1663
1664 c->lpt_buf = vmalloc(c->leb_size);
1665 if (!c->lpt_buf)
1666 return -ENOMEM;
1667
1668 if (c->big_lpt) {
1669 c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS);
1670 if (!c->lsave)
1671 return -ENOMEM;
1672 err = read_lsave(c);
1673 if (err)
1674 return err;
1675 }
1676
1677 for (i = 0; i < c->lpt_lebs; i++)
1678 if (c->ltab[i].free == c->leb_size) {
1679 err = ubifs_leb_unmap(c, i + c->lpt_first);
1680 if (err)
1681 return err;
1682 }
1683
1684 return 0;
1685}
1686
1687/**
1688 * ubifs_lpt_init - initialize the LPT.
1689 * @c: UBIFS file-system description object
1690 * @rd: whether to initialize lpt for reading
1691 * @wr: whether to initialize lpt for writing
1692 *
1693 * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true
1694 * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is
1695 * true.
1696 *
1697 * This function returns %0 on success and a negative error code on failure.
1698 */
1699int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr)
1700{
1701 int err;
1702
1703 if (rd) {
1704 err = lpt_init_rd(c);
1705 if (err)
1706 return err;
1707 }
1708
1709 if (wr) {
1710 err = lpt_init_wr(c);
1711 if (err)
1712 return err;
1713 }
1714
1715 return 0;
1716}
1717
1718/**
1719 * struct lpt_scan_node - somewhere to put nodes while we scan LPT.
1720 * @nnode: where to keep a nnode
1721 * @pnode: where to keep a pnode
1722 * @cnode: where to keep a cnode
1723 * @in_tree: is the node in the tree in memory
1724 * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in
1725 * the tree
1726 * @ptr.pnode: ditto for pnode
1727 * @ptr.cnode: ditto for cnode
1728 */
1729struct lpt_scan_node {
1730 union {
1731 struct ubifs_nnode nnode;
1732 struct ubifs_pnode pnode;
1733 struct ubifs_cnode cnode;
1734 };
1735 int in_tree;
1736 union {
1737 struct ubifs_nnode *nnode;
1738 struct ubifs_pnode *pnode;
1739 struct ubifs_cnode *cnode;
1740 } ptr;
1741};
1742
1743/**
1744 * scan_get_nnode - for the scan, get a nnode from either the tree or flash.
1745 * @c: the UBIFS file-system description object
1746 * @path: where to put the nnode
1747 * @parent: parent of the nnode
1748 * @iip: index in parent of the nnode
1749 *
1750 * This function returns a pointer to the nnode on success or a negative error
1751 * code on failure.
1752 */
1753static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
1754 struct lpt_scan_node *path,
1755 struct ubifs_nnode *parent, int iip)
1756{
1757 struct ubifs_nbranch *branch;
1758 struct ubifs_nnode *nnode;
1759 void *buf = c->lpt_nod_buf;
1760 int err;
1761
1762 branch = &parent->nbranch[iip];
1763 nnode = branch->nnode;
1764 if (nnode) {
1765 path->in_tree = 1;
1766 path->ptr.nnode = nnode;
1767 return nnode;
1768 }
1769 nnode = &path->nnode;
1770 path->in_tree = 0;
1771 path->ptr.nnode = nnode;
1772 memset(nnode, 0, sizeof(struct ubifs_nnode));
1773 if (branch->lnum == 0) {
1774 /*
1775 * This nnode was not written which just means that the LEB
1776 * properties in the subtree below it describe empty LEBs. We
1777 * make the nnode as though we had read it, which in fact means
1778 * doing almost nothing.
1779 */
1780 if (c->big_lpt)
1781 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1782 } else {
1783 err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
1784 c->nnode_sz);
1785 if (err)
1786 return ERR_PTR(err);
1787 err = unpack_nnode(c, buf, nnode);
1788 if (err)
1789 return ERR_PTR(err);
1790 }
1791 err = validate_nnode(c, nnode, parent, iip);
1792 if (err)
1793 return ERR_PTR(err);
1794 if (!c->big_lpt)
1795 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1796 nnode->level = parent->level - 1;
1797 nnode->parent = parent;
1798 nnode->iip = iip;
1799 return nnode;
1800}
1801
1802/**
1803 * scan_get_pnode - for the scan, get a pnode from either the tree or flash.
1804 * @c: the UBIFS file-system description object
1805 * @path: where to put the pnode
1806 * @parent: parent of the pnode
1807 * @iip: index in parent of the pnode
1808 *
1809 * This function returns a pointer to the pnode on success or a negative error
1810 * code on failure.
1811 */
1812static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c,
1813 struct lpt_scan_node *path,
1814 struct ubifs_nnode *parent, int iip)
1815{
1816 struct ubifs_nbranch *branch;
1817 struct ubifs_pnode *pnode;
1818 void *buf = c->lpt_nod_buf;
1819 int err;
1820
1821 branch = &parent->nbranch[iip];
1822 pnode = branch->pnode;
1823 if (pnode) {
1824 path->in_tree = 1;
1825 path->ptr.pnode = pnode;
1826 return pnode;
1827 }
1828 pnode = &path->pnode;
1829 path->in_tree = 0;
1830 path->ptr.pnode = pnode;
1831 memset(pnode, 0, sizeof(struct ubifs_pnode));
1832 if (branch->lnum == 0) {
1833 /*
1834 * This pnode was not written which just means that the LEB
1835 * properties in it describe empty LEBs. We make the pnode as
1836 * though we had read it.
1837 */
1838 int i;
1839
1840 if (c->big_lpt)
1841 pnode->num = calc_pnode_num_from_parent(c, parent, iip);
1842 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1843 struct ubifs_lprops * const lprops = &pnode->lprops[i];
1844
1845 lprops->free = c->leb_size;
1846 lprops->flags = ubifs_categorize_lprops(c, lprops);
1847 }
1848 } else {
1849 ubifs_assert(branch->lnum >= c->lpt_first &&
1850 branch->lnum <= c->lpt_last);
1851 ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
1852 err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
1853 c->pnode_sz);
1854 if (err)
1855 return ERR_PTR(err);
1856 err = unpack_pnode(c, buf, pnode);
1857 if (err)
1858 return ERR_PTR(err);
1859 }
1860 err = validate_pnode(c, pnode, parent, iip);
1861 if (err)
1862 return ERR_PTR(err);
1863 if (!c->big_lpt)
1864 pnode->num = calc_pnode_num_from_parent(c, parent, iip);
1865 pnode->parent = parent;
1866 pnode->iip = iip;
1867 set_pnode_lnum(c, pnode);
1868 return pnode;
1869}
1870
1871/**
1872 * ubifs_lpt_scan_nolock - scan the LPT.
1873 * @c: the UBIFS file-system description object
1874 * @start_lnum: LEB number from which to start scanning
1875 * @end_lnum: LEB number at which to stop scanning
1876 * @scan_cb: callback function called for each lprops
1877 * @data: data to be passed to the callback function
1878 *
1879 * This function returns %0 on success and a negative error code on failure.
1880 */
1881int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum,
1882 ubifs_lpt_scan_callback scan_cb, void *data)
1883{
1884 int err = 0, i, h, iip, shft;
1885 struct ubifs_nnode *nnode;
1886 struct ubifs_pnode *pnode;
1887 struct lpt_scan_node *path;
1888
1889 if (start_lnum == -1) {
1890 start_lnum = end_lnum + 1;
1891 if (start_lnum >= c->leb_cnt)
1892 start_lnum = c->main_first;
1893 }
1894
1895 ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt);
1896 ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt);
1897
1898 if (!c->nroot) {
1899 err = ubifs_read_nnode(c, NULL, 0);
1900 if (err)
1901 return err;
1902 }
1903
1904 path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1),
1905 GFP_NOFS);
1906 if (!path)
1907 return -ENOMEM;
1908
1909 path[0].ptr.nnode = c->nroot;
1910 path[0].in_tree = 1;
1911again:
1912 /* Descend to the pnode containing start_lnum */
1913 nnode = c->nroot;
1914 i = start_lnum - c->main_first;
1915 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
1916 for (h = 1; h < c->lpt_hght; h++) {
1917 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1918 shft -= UBIFS_LPT_FANOUT_SHIFT;
1919 nnode = scan_get_nnode(c, path + h, nnode, iip);
1920 if (IS_ERR(nnode)) {
1921 err = PTR_ERR(nnode);
1922 goto out;
1923 }
1924 }
1925 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1926 shft -= UBIFS_LPT_FANOUT_SHIFT;
1927 pnode = scan_get_pnode(c, path + h, nnode, iip);
1928 if (IS_ERR(pnode)) {
1929 err = PTR_ERR(pnode);
1930 goto out;
1931 }
1932 iip = (i & (UBIFS_LPT_FANOUT - 1));
1933
1934 /* Loop for each lprops */
1935 while (1) {
1936 struct ubifs_lprops *lprops = &pnode->lprops[iip];
1937 int ret, lnum = lprops->lnum;
1938
1939 ret = scan_cb(c, lprops, path[h].in_tree, data);
1940 if (ret < 0) {
1941 err = ret;
1942 goto out;
1943 }
1944 if (ret & LPT_SCAN_ADD) {
1945 /* Add all the nodes in path to the tree in memory */
1946 for (h = 1; h < c->lpt_hght; h++) {
1947 const size_t sz = sizeof(struct ubifs_nnode);
1948 struct ubifs_nnode *parent;
1949
1950 if (path[h].in_tree)
1951 continue;
1952 nnode = kmalloc(sz, GFP_NOFS);
1953 if (!nnode) {
1954 err = -ENOMEM;
1955 goto out;
1956 }
1957 memcpy(nnode, &path[h].nnode, sz);
1958 parent = nnode->parent;
1959 parent->nbranch[nnode->iip].nnode = nnode;
1960 path[h].ptr.nnode = nnode;
1961 path[h].in_tree = 1;
1962 path[h + 1].cnode.parent = nnode;
1963 }
1964 if (path[h].in_tree)
1965 ubifs_ensure_cat(c, lprops);
1966 else {
1967 const size_t sz = sizeof(struct ubifs_pnode);
1968 struct ubifs_nnode *parent;
1969
1970 pnode = kmalloc(sz, GFP_NOFS);
1971 if (!pnode) {
1972 err = -ENOMEM;
1973 goto out;
1974 }
1975 memcpy(pnode, &path[h].pnode, sz);
1976 parent = pnode->parent;
1977 parent->nbranch[pnode->iip].pnode = pnode;
1978 path[h].ptr.pnode = pnode;
1979 path[h].in_tree = 1;
1980 update_cats(c, pnode);
1981 c->pnodes_have += 1;
1982 }
1983 err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)
1984 c->nroot, 0, 0);
1985 if (err)
1986 goto out;
1987 err = dbg_check_cats(c);
1988 if (err)
1989 goto out;
1990 }
1991 if (ret & LPT_SCAN_STOP) {
1992 err = 0;
1993 break;
1994 }
1995 /* Get the next lprops */
1996 if (lnum == end_lnum) {
1997 /*
1998 * We got to the end without finding what we were
1999 * looking for
2000 */
2001 err = -ENOSPC;
2002 goto out;
2003 }
2004 if (lnum + 1 >= c->leb_cnt) {
2005 /* Wrap-around to the beginning */
2006 start_lnum = c->main_first;
2007 goto again;
2008 }
2009 if (iip + 1 < UBIFS_LPT_FANOUT) {
2010 /* Next lprops is in the same pnode */
2011 iip += 1;
2012 continue;
2013 }
2014 /* We need to get the next pnode. Go up until we can go right */
2015 iip = pnode->iip;
2016 while (1) {
2017 h -= 1;
2018 ubifs_assert(h >= 0);
2019 nnode = path[h].ptr.nnode;
2020 if (iip + 1 < UBIFS_LPT_FANOUT)
2021 break;
2022 iip = nnode->iip;
2023 }
2024 /* Go right */
2025 iip += 1;
2026 /* Descend to the pnode */
2027 h += 1;
2028 for (; h < c->lpt_hght; h++) {
2029 nnode = scan_get_nnode(c, path + h, nnode, iip);
2030 if (IS_ERR(nnode)) {
2031 err = PTR_ERR(nnode);
2032 goto out;
2033 }
2034 iip = 0;
2035 }
2036 pnode = scan_get_pnode(c, path + h, nnode, iip);
2037 if (IS_ERR(pnode)) {
2038 err = PTR_ERR(pnode);
2039 goto out;
2040 }
2041 iip = 0;
2042 }
2043out:
2044 kfree(path);
2045 return err;
2046}
2047
2048#ifdef CONFIG_UBIFS_FS_DEBUG
2049
2050/**
2051 * dbg_chk_pnode - check a pnode.
2052 * @c: the UBIFS file-system description object
2053 * @pnode: pnode to check
2054 * @col: pnode column
2055 *
2056 * This function returns %0 on success and a negative error code on failure.
2057 */
2058static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2059 int col)
2060{
2061 int i;
2062
2063 if (pnode->num != col) {
2064 dbg_err("pnode num %d expected %d parent num %d iip %d",
2065 pnode->num, col, pnode->parent->num, pnode->iip);
2066 return -EINVAL;
2067 }
2068 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
2069 struct ubifs_lprops *lp, *lprops = &pnode->lprops[i];
2070 int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i +
2071 c->main_first;
2072 int found, cat = lprops->flags & LPROPS_CAT_MASK;
2073 struct ubifs_lpt_heap *heap;
2074 struct list_head *list = NULL;
2075
2076 if (lnum >= c->leb_cnt)
2077 continue;
2078 if (lprops->lnum != lnum) {
2079 dbg_err("bad LEB number %d expected %d",
2080 lprops->lnum, lnum);
2081 return -EINVAL;
2082 }
2083 if (lprops->flags & LPROPS_TAKEN) {
2084 if (cat != LPROPS_UNCAT) {
2085 dbg_err("LEB %d taken but not uncat %d",
2086 lprops->lnum, cat);
2087 return -EINVAL;
2088 }
2089 continue;
2090 }
2091 if (lprops->flags & LPROPS_INDEX) {
2092 switch (cat) {
2093 case LPROPS_UNCAT:
2094 case LPROPS_DIRTY_IDX:
2095 case LPROPS_FRDI_IDX:
2096 break;
2097 default:
2098 dbg_err("LEB %d index but cat %d",
2099 lprops->lnum, cat);
2100 return -EINVAL;
2101 }
2102 } else {
2103 switch (cat) {
2104 case LPROPS_UNCAT:
2105 case LPROPS_DIRTY:
2106 case LPROPS_FREE:
2107 case LPROPS_EMPTY:
2108 case LPROPS_FREEABLE:
2109 break;
2110 default:
2111 dbg_err("LEB %d not index but cat %d",
2112 lprops->lnum, cat);
2113 return -EINVAL;
2114 }
2115 }
2116 switch (cat) {
2117 case LPROPS_UNCAT:
2118 list = &c->uncat_list;
2119 break;
2120 case LPROPS_EMPTY:
2121 list = &c->empty_list;
2122 break;
2123 case LPROPS_FREEABLE:
2124 list = &c->freeable_list;
2125 break;
2126 case LPROPS_FRDI_IDX:
2127 list = &c->frdi_idx_list;
2128 break;
2129 }
2130 found = 0;
2131 switch (cat) {
2132 case LPROPS_DIRTY:
2133 case LPROPS_DIRTY_IDX:
2134 case LPROPS_FREE:
2135 heap = &c->lpt_heap[cat - 1];
2136 if (lprops->hpos < heap->cnt &&
2137 heap->arr[lprops->hpos] == lprops)
2138 found = 1;
2139 break;
2140 case LPROPS_UNCAT:
2141 case LPROPS_EMPTY:
2142 case LPROPS_FREEABLE:
2143 case LPROPS_FRDI_IDX:
2144 list_for_each_entry(lp, list, list)
2145 if (lprops == lp) {
2146 found = 1;
2147 break;
2148 }
2149 break;
2150 }
2151 if (!found) {
2152 dbg_err("LEB %d cat %d not found in cat heap/list",
2153 lprops->lnum, cat);
2154 return -EINVAL;
2155 }
2156 switch (cat) {
2157 case LPROPS_EMPTY:
2158 if (lprops->free != c->leb_size) {
2159 dbg_err("LEB %d cat %d free %d dirty %d",
2160 lprops->lnum, cat, lprops->free,
2161 lprops->dirty);
2162 return -EINVAL;
2163 }
2164 case LPROPS_FREEABLE:
2165 case LPROPS_FRDI_IDX:
2166 if (lprops->free + lprops->dirty != c->leb_size) {
2167 dbg_err("LEB %d cat %d free %d dirty %d",
2168 lprops->lnum, cat, lprops->free,
2169 lprops->dirty);
2170 return -EINVAL;
2171 }
2172 }
2173 }
2174 return 0;
2175}
2176
2177/**
2178 * dbg_check_lpt_nodes - check nnodes and pnodes.
2179 * @c: the UBIFS file-system description object
2180 * @cnode: next cnode (nnode or pnode) to check
2181 * @row: row of cnode (root is zero)
2182 * @col: column of cnode (leftmost is zero)
2183 *
2184 * This function returns %0 on success and a negative error code on failure.
2185 */
2186int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
2187 int row, int col)
2188{
2189 struct ubifs_nnode *nnode, *nn;
2190 struct ubifs_cnode *cn;
2191 int num, iip = 0, err;
2192
2193 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
2194 return 0;
2195
2196 while (cnode) {
2197 ubifs_assert(row >= 0);
2198 nnode = cnode->parent;
2199 if (cnode->level) {
2200 /* cnode is a nnode */
2201 num = calc_nnode_num(row, col);
2202 if (cnode->num != num) {
2203 dbg_err("nnode num %d expected %d "
2204 "parent num %d iip %d", cnode->num, num,
2205 (nnode ? nnode->num : 0), cnode->iip);
2206 return -EINVAL;
2207 }
2208 nn = (struct ubifs_nnode *)cnode;
2209 while (iip < UBIFS_LPT_FANOUT) {
2210 cn = nn->nbranch[iip].cnode;
2211 if (cn) {
2212 /* Go down */
2213 row += 1;
2214 col <<= UBIFS_LPT_FANOUT_SHIFT;
2215 col += iip;
2216 iip = 0;
2217 cnode = cn;
2218 break;
2219 }
2220 /* Go right */
2221 iip += 1;
2222 }
2223 if (iip < UBIFS_LPT_FANOUT)
2224 continue;
2225 } else {
2226 struct ubifs_pnode *pnode;
2227
2228 /* cnode is a pnode */
2229 pnode = (struct ubifs_pnode *)cnode;
2230 err = dbg_chk_pnode(c, pnode, col);
2231 if (err)
2232 return err;
2233 }
2234 /* Go up and to the right */
2235 row -= 1;
2236 col >>= UBIFS_LPT_FANOUT_SHIFT;
2237 iip = cnode->iip + 1;
2238 cnode = (struct ubifs_cnode *)nnode;
2239 }
2240 return 0;
2241}
2242
2243#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
new file mode 100644
index 000000000000..5f0b83e20af6
--- /dev/null
+++ b/fs/ubifs/lpt_commit.c
@@ -0,0 +1,1648 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements commit-related functionality of the LEB properties
25 * subsystem.
26 */
27
28#include <linux/crc16.h>
29#include "ubifs.h"
30
31/**
32 * first_dirty_cnode - find first dirty cnode.
33 * @c: UBIFS file-system description object
34 * @nnode: nnode at which to start
35 *
36 * This function returns the first dirty cnode or %NULL if there is not one.
37 */
38static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode)
39{
40 ubifs_assert(nnode);
41 while (1) {
42 int i, cont = 0;
43
44 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
45 struct ubifs_cnode *cnode;
46
47 cnode = nnode->nbranch[i].cnode;
48 if (cnode &&
49 test_bit(DIRTY_CNODE, &cnode->flags)) {
50 if (cnode->level == 0)
51 return cnode;
52 nnode = (struct ubifs_nnode *)cnode;
53 cont = 1;
54 break;
55 }
56 }
57 if (!cont)
58 return (struct ubifs_cnode *)nnode;
59 }
60}
61
62/**
63 * next_dirty_cnode - find next dirty cnode.
64 * @cnode: cnode from which to begin searching
65 *
66 * This function returns the next dirty cnode or %NULL if there is not one.
67 */
68static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode)
69{
70 struct ubifs_nnode *nnode;
71 int i;
72
73 ubifs_assert(cnode);
74 nnode = cnode->parent;
75 if (!nnode)
76 return NULL;
77 for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) {
78 cnode = nnode->nbranch[i].cnode;
79 if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) {
80 if (cnode->level == 0)
81 return cnode; /* cnode is a pnode */
82 /* cnode is a nnode */
83 return first_dirty_cnode((struct ubifs_nnode *)cnode);
84 }
85 }
86 return (struct ubifs_cnode *)nnode;
87}
88
89/**
90 * get_cnodes_to_commit - create list of dirty cnodes to commit.
91 * @c: UBIFS file-system description object
92 *
93 * This function returns the number of cnodes to commit.
94 */
95static int get_cnodes_to_commit(struct ubifs_info *c)
96{
97 struct ubifs_cnode *cnode, *cnext;
98 int cnt = 0;
99
100 if (!c->nroot)
101 return 0;
102
103 if (!test_bit(DIRTY_CNODE, &c->nroot->flags))
104 return 0;
105
106 c->lpt_cnext = first_dirty_cnode(c->nroot);
107 cnode = c->lpt_cnext;
108 if (!cnode)
109 return 0;
110 cnt += 1;
111 while (1) {
112 ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags));
113 __set_bit(COW_ZNODE, &cnode->flags);
114 cnext = next_dirty_cnode(cnode);
115 if (!cnext) {
116 cnode->cnext = c->lpt_cnext;
117 break;
118 }
119 cnode->cnext = cnext;
120 cnode = cnext;
121 cnt += 1;
122 }
123 dbg_cmt("committing %d cnodes", cnt);
124 dbg_lp("committing %d cnodes", cnt);
125 ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt);
126 return cnt;
127}
128
129/**
130 * upd_ltab - update LPT LEB properties.
131 * @c: UBIFS file-system description object
132 * @lnum: LEB number
133 * @free: amount of free space
134 * @dirty: amount of dirty space to add
135 */
136static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty)
137{
138 dbg_lp("LEB %d free %d dirty %d to %d +%d",
139 lnum, c->ltab[lnum - c->lpt_first].free,
140 c->ltab[lnum - c->lpt_first].dirty, free, dirty);
141 ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
142 c->ltab[lnum - c->lpt_first].free = free;
143 c->ltab[lnum - c->lpt_first].dirty += dirty;
144}
145
146/**
147 * alloc_lpt_leb - allocate an LPT LEB that is empty.
148 * @c: UBIFS file-system description object
149 * @lnum: LEB number is passed and returned here
150 *
151 * This function finds the next empty LEB in the ltab starting from @lnum. If a
152 * an empty LEB is found it is returned in @lnum and the function returns %0.
153 * Otherwise the function returns -ENOSPC. Note however, that LPT is designed
154 * never to run out of space.
155 */
156static int alloc_lpt_leb(struct ubifs_info *c, int *lnum)
157{
158 int i, n;
159
160 n = *lnum - c->lpt_first + 1;
161 for (i = n; i < c->lpt_lebs; i++) {
162 if (c->ltab[i].tgc || c->ltab[i].cmt)
163 continue;
164 if (c->ltab[i].free == c->leb_size) {
165 c->ltab[i].cmt = 1;
166 *lnum = i + c->lpt_first;
167 return 0;
168 }
169 }
170
171 for (i = 0; i < n; i++) {
172 if (c->ltab[i].tgc || c->ltab[i].cmt)
173 continue;
174 if (c->ltab[i].free == c->leb_size) {
175 c->ltab[i].cmt = 1;
176 *lnum = i + c->lpt_first;
177 return 0;
178 }
179 }
180 dbg_err("last LEB %d", *lnum);
181 dump_stack();
182 return -ENOSPC;
183}
184
185/**
186 * layout_cnodes - layout cnodes for commit.
187 * @c: UBIFS file-system description object
188 *
189 * This function returns %0 on success and a negative error code on failure.
190 */
191static int layout_cnodes(struct ubifs_info *c)
192{
193 int lnum, offs, len, alen, done_lsave, done_ltab, err;
194 struct ubifs_cnode *cnode;
195
196 cnode = c->lpt_cnext;
197 if (!cnode)
198 return 0;
199 lnum = c->nhead_lnum;
200 offs = c->nhead_offs;
201 /* Try to place lsave and ltab nicely */
202 done_lsave = !c->big_lpt;
203 done_ltab = 0;
204 if (!done_lsave && offs + c->lsave_sz <= c->leb_size) {
205 done_lsave = 1;
206 c->lsave_lnum = lnum;
207 c->lsave_offs = offs;
208 offs += c->lsave_sz;
209 }
210
211 if (offs + c->ltab_sz <= c->leb_size) {
212 done_ltab = 1;
213 c->ltab_lnum = lnum;
214 c->ltab_offs = offs;
215 offs += c->ltab_sz;
216 }
217
218 do {
219 if (cnode->level) {
220 len = c->nnode_sz;
221 c->dirty_nn_cnt -= 1;
222 } else {
223 len = c->pnode_sz;
224 c->dirty_pn_cnt -= 1;
225 }
226 while (offs + len > c->leb_size) {
227 alen = ALIGN(offs, c->min_io_size);
228 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
229 err = alloc_lpt_leb(c, &lnum);
230 if (err)
231 return err;
232 offs = 0;
233 ubifs_assert(lnum >= c->lpt_first &&
234 lnum <= c->lpt_last);
235 /* Try to place lsave and ltab nicely */
236 if (!done_lsave) {
237 done_lsave = 1;
238 c->lsave_lnum = lnum;
239 c->lsave_offs = offs;
240 offs += c->lsave_sz;
241 continue;
242 }
243 if (!done_ltab) {
244 done_ltab = 1;
245 c->ltab_lnum = lnum;
246 c->ltab_offs = offs;
247 offs += c->ltab_sz;
248 continue;
249 }
250 break;
251 }
252 if (cnode->parent) {
253 cnode->parent->nbranch[cnode->iip].lnum = lnum;
254 cnode->parent->nbranch[cnode->iip].offs = offs;
255 } else {
256 c->lpt_lnum = lnum;
257 c->lpt_offs = offs;
258 }
259 offs += len;
260 cnode = cnode->cnext;
261 } while (cnode && cnode != c->lpt_cnext);
262
263 /* Make sure to place LPT's save table */
264 if (!done_lsave) {
265 if (offs + c->lsave_sz > c->leb_size) {
266 alen = ALIGN(offs, c->min_io_size);
267 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
268 err = alloc_lpt_leb(c, &lnum);
269 if (err)
270 return err;
271 offs = 0;
272 ubifs_assert(lnum >= c->lpt_first &&
273 lnum <= c->lpt_last);
274 }
275 done_lsave = 1;
276 c->lsave_lnum = lnum;
277 c->lsave_offs = offs;
278 offs += c->lsave_sz;
279 }
280
281 /* Make sure to place LPT's own lprops table */
282 if (!done_ltab) {
283 if (offs + c->ltab_sz > c->leb_size) {
284 alen = ALIGN(offs, c->min_io_size);
285 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
286 err = alloc_lpt_leb(c, &lnum);
287 if (err)
288 return err;
289 offs = 0;
290 ubifs_assert(lnum >= c->lpt_first &&
291 lnum <= c->lpt_last);
292 }
293 done_ltab = 1;
294 c->ltab_lnum = lnum;
295 c->ltab_offs = offs;
296 offs += c->ltab_sz;
297 }
298
299 alen = ALIGN(offs, c->min_io_size);
300 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
301 return 0;
302}
303
304/**
305 * realloc_lpt_leb - allocate an LPT LEB that is empty.
306 * @c: UBIFS file-system description object
307 * @lnum: LEB number is passed and returned here
308 *
309 * This function duplicates exactly the results of the function alloc_lpt_leb.
310 * It is used during end commit to reallocate the same LEB numbers that were
311 * allocated by alloc_lpt_leb during start commit.
312 *
313 * This function finds the next LEB that was allocated by the alloc_lpt_leb
314 * function starting from @lnum. If a LEB is found it is returned in @lnum and
315 * the function returns %0. Otherwise the function returns -ENOSPC.
316 * Note however, that LPT is designed never to run out of space.
317 */
318static int realloc_lpt_leb(struct ubifs_info *c, int *lnum)
319{
320 int i, n;
321
322 n = *lnum - c->lpt_first + 1;
323 for (i = n; i < c->lpt_lebs; i++)
324 if (c->ltab[i].cmt) {
325 c->ltab[i].cmt = 0;
326 *lnum = i + c->lpt_first;
327 return 0;
328 }
329
330 for (i = 0; i < n; i++)
331 if (c->ltab[i].cmt) {
332 c->ltab[i].cmt = 0;
333 *lnum = i + c->lpt_first;
334 return 0;
335 }
336 dbg_err("last LEB %d", *lnum);
337 dump_stack();
338 return -ENOSPC;
339}
340
341/**
342 * write_cnodes - write cnodes for commit.
343 * @c: UBIFS file-system description object
344 *
345 * This function returns %0 on success and a negative error code on failure.
346 */
347static int write_cnodes(struct ubifs_info *c)
348{
349 int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave;
350 struct ubifs_cnode *cnode;
351 void *buf = c->lpt_buf;
352
353 cnode = c->lpt_cnext;
354 if (!cnode)
355 return 0;
356 lnum = c->nhead_lnum;
357 offs = c->nhead_offs;
358 from = offs;
359 /* Ensure empty LEB is unmapped */
360 if (offs == 0) {
361 err = ubifs_leb_unmap(c, lnum);
362 if (err)
363 return err;
364 }
365 /* Try to place lsave and ltab nicely */
366 done_lsave = !c->big_lpt;
367 done_ltab = 0;
368 if (!done_lsave && offs + c->lsave_sz <= c->leb_size) {
369 done_lsave = 1;
370 ubifs_pack_lsave(c, buf + offs, c->lsave);
371 offs += c->lsave_sz;
372 }
373
374 if (offs + c->ltab_sz <= c->leb_size) {
375 done_ltab = 1;
376 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
377 offs += c->ltab_sz;
378 }
379
380 /* Loop for each cnode */
381 do {
382 if (cnode->level)
383 len = c->nnode_sz;
384 else
385 len = c->pnode_sz;
386 while (offs + len > c->leb_size) {
387 wlen = offs - from;
388 if (wlen) {
389 alen = ALIGN(wlen, c->min_io_size);
390 memset(buf + offs, 0xff, alen - wlen);
391 err = ubifs_leb_write(c, lnum, buf + from, from,
392 alen, UBI_SHORTTERM);
393 if (err)
394 return err;
395 }
396 err = realloc_lpt_leb(c, &lnum);
397 if (err)
398 return err;
399 offs = 0;
400 from = 0;
401 ubifs_assert(lnum >= c->lpt_first &&
402 lnum <= c->lpt_last);
403 err = ubifs_leb_unmap(c, lnum);
404 if (err)
405 return err;
406 /* Try to place lsave and ltab nicely */
407 if (!done_lsave) {
408 done_lsave = 1;
409 ubifs_pack_lsave(c, buf + offs, c->lsave);
410 offs += c->lsave_sz;
411 continue;
412 }
413 if (!done_ltab) {
414 done_ltab = 1;
415 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
416 offs += c->ltab_sz;
417 continue;
418 }
419 break;
420 }
421 if (cnode->level)
422 ubifs_pack_nnode(c, buf + offs,
423 (struct ubifs_nnode *)cnode);
424 else
425 ubifs_pack_pnode(c, buf + offs,
426 (struct ubifs_pnode *)cnode);
427 /*
428 * The reason for the barriers is the same as in case of TNC.
429 * See comment in 'write_index()'. 'dirty_cow_nnode()' and
430 * 'dirty_cow_pnode()' are the functions for which this is
431 * important.
432 */
433 clear_bit(DIRTY_CNODE, &cnode->flags);
434 smp_mb__before_clear_bit();
435 clear_bit(COW_ZNODE, &cnode->flags);
436 smp_mb__after_clear_bit();
437 offs += len;
438 cnode = cnode->cnext;
439 } while (cnode && cnode != c->lpt_cnext);
440
441 /* Make sure to place LPT's save table */
442 if (!done_lsave) {
443 if (offs + c->lsave_sz > c->leb_size) {
444 wlen = offs - from;
445 alen = ALIGN(wlen, c->min_io_size);
446 memset(buf + offs, 0xff, alen - wlen);
447 err = ubifs_leb_write(c, lnum, buf + from, from, alen,
448 UBI_SHORTTERM);
449 if (err)
450 return err;
451 err = realloc_lpt_leb(c, &lnum);
452 if (err)
453 return err;
454 offs = 0;
455 ubifs_assert(lnum >= c->lpt_first &&
456 lnum <= c->lpt_last);
457 err = ubifs_leb_unmap(c, lnum);
458 if (err)
459 return err;
460 }
461 done_lsave = 1;
462 ubifs_pack_lsave(c, buf + offs, c->lsave);
463 offs += c->lsave_sz;
464 }
465
466 /* Make sure to place LPT's own lprops table */
467 if (!done_ltab) {
468 if (offs + c->ltab_sz > c->leb_size) {
469 wlen = offs - from;
470 alen = ALIGN(wlen, c->min_io_size);
471 memset(buf + offs, 0xff, alen - wlen);
472 err = ubifs_leb_write(c, lnum, buf + from, from, alen,
473 UBI_SHORTTERM);
474 if (err)
475 return err;
476 err = realloc_lpt_leb(c, &lnum);
477 if (err)
478 return err;
479 offs = 0;
480 ubifs_assert(lnum >= c->lpt_first &&
481 lnum <= c->lpt_last);
482 err = ubifs_leb_unmap(c, lnum);
483 if (err)
484 return err;
485 }
486 done_ltab = 1;
487 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
488 offs += c->ltab_sz;
489 }
490
491 /* Write remaining data in buffer */
492 wlen = offs - from;
493 alen = ALIGN(wlen, c->min_io_size);
494 memset(buf + offs, 0xff, alen - wlen);
495 err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM);
496 if (err)
497 return err;
498 c->nhead_lnum = lnum;
499 c->nhead_offs = ALIGN(offs, c->min_io_size);
500
501 dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
502 dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
503 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
504 if (c->big_lpt)
505 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
506 return 0;
507}
508
509/**
510 * next_pnode - find next pnode.
511 * @c: UBIFS file-system description object
512 * @pnode: pnode
513 *
514 * This function returns the next pnode or %NULL if there are no more pnodes.
515 */
516static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
517 struct ubifs_pnode *pnode)
518{
519 struct ubifs_nnode *nnode;
520 int iip;
521
522 /* Try to go right */
523 nnode = pnode->parent;
524 iip = pnode->iip + 1;
525 if (iip < UBIFS_LPT_FANOUT) {
526 /* We assume here that LEB zero is never an LPT LEB */
527 if (nnode->nbranch[iip].lnum)
528 return ubifs_get_pnode(c, nnode, iip);
529 else
530 return NULL;
531 }
532
533 /* Go up while can't go right */
534 do {
535 iip = nnode->iip + 1;
536 nnode = nnode->parent;
537 if (!nnode)
538 return NULL;
539 /* We assume here that LEB zero is never an LPT LEB */
540 } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum);
541
542 /* Go right */
543 nnode = ubifs_get_nnode(c, nnode, iip);
544 if (IS_ERR(nnode))
545 return (void *)nnode;
546
547 /* Go down to level 1 */
548 while (nnode->level > 1) {
549 nnode = ubifs_get_nnode(c, nnode, 0);
550 if (IS_ERR(nnode))
551 return (void *)nnode;
552 }
553
554 return ubifs_get_pnode(c, nnode, 0);
555}
556
557/**
558 * pnode_lookup - lookup a pnode in the LPT.
559 * @c: UBIFS file-system description object
560 * @i: pnode number (0 to main_lebs - 1)
561 *
562 * This function returns a pointer to the pnode on success or a negative
563 * error code on failure.
564 */
565static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
566{
567 int err, h, iip, shft;
568 struct ubifs_nnode *nnode;
569
570 if (!c->nroot) {
571 err = ubifs_read_nnode(c, NULL, 0);
572 if (err)
573 return ERR_PTR(err);
574 }
575 i <<= UBIFS_LPT_FANOUT_SHIFT;
576 nnode = c->nroot;
577 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
578 for (h = 1; h < c->lpt_hght; h++) {
579 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
580 shft -= UBIFS_LPT_FANOUT_SHIFT;
581 nnode = ubifs_get_nnode(c, nnode, iip);
582 if (IS_ERR(nnode))
583 return ERR_PTR(PTR_ERR(nnode));
584 }
585 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
586 return ubifs_get_pnode(c, nnode, iip);
587}
588
589/**
590 * add_pnode_dirt - add dirty space to LPT LEB properties.
591 * @c: UBIFS file-system description object
592 * @pnode: pnode for which to add dirt
593 */
594static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode)
595{
596 ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum,
597 c->pnode_sz);
598}
599
600/**
601 * do_make_pnode_dirty - mark a pnode dirty.
602 * @c: UBIFS file-system description object
603 * @pnode: pnode to mark dirty
604 */
605static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode)
606{
607 /* Assumes cnext list is empty i.e. not called during commit */
608 if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) {
609 struct ubifs_nnode *nnode;
610
611 c->dirty_pn_cnt += 1;
612 add_pnode_dirt(c, pnode);
613 /* Mark parent and ancestors dirty too */
614 nnode = pnode->parent;
615 while (nnode) {
616 if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
617 c->dirty_nn_cnt += 1;
618 ubifs_add_nnode_dirt(c, nnode);
619 nnode = nnode->parent;
620 } else
621 break;
622 }
623 }
624}
625
626/**
627 * make_tree_dirty - mark the entire LEB properties tree dirty.
628 * @c: UBIFS file-system description object
629 *
630 * This function is used by the "small" LPT model to cause the entire LEB
631 * properties tree to be written. The "small" LPT model does not use LPT
632 * garbage collection because it is more efficient to write the entire tree
633 * (because it is small).
634 *
635 * This function returns %0 on success and a negative error code on failure.
636 */
637static int make_tree_dirty(struct ubifs_info *c)
638{
639 struct ubifs_pnode *pnode;
640
641 pnode = pnode_lookup(c, 0);
642 while (pnode) {
643 do_make_pnode_dirty(c, pnode);
644 pnode = next_pnode(c, pnode);
645 if (IS_ERR(pnode))
646 return PTR_ERR(pnode);
647 }
648 return 0;
649}
650
651/**
652 * need_write_all - determine if the LPT area is running out of free space.
653 * @c: UBIFS file-system description object
654 *
655 * This function returns %1 if the LPT area is running out of free space and %0
656 * if it is not.
657 */
658static int need_write_all(struct ubifs_info *c)
659{
660 long long free = 0;
661 int i;
662
663 for (i = 0; i < c->lpt_lebs; i++) {
664 if (i + c->lpt_first == c->nhead_lnum)
665 free += c->leb_size - c->nhead_offs;
666 else if (c->ltab[i].free == c->leb_size)
667 free += c->leb_size;
668 else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size)
669 free += c->leb_size;
670 }
671 /* Less than twice the size left */
672 if (free <= c->lpt_sz * 2)
673 return 1;
674 return 0;
675}
676
677/**
678 * lpt_tgc_start - start trivial garbage collection of LPT LEBs.
679 * @c: UBIFS file-system description object
680 *
681 * LPT trivial garbage collection is where a LPT LEB contains only dirty and
682 * free space and so may be reused as soon as the next commit is completed.
683 * This function is called during start commit to mark LPT LEBs for trivial GC.
684 */
685static void lpt_tgc_start(struct ubifs_info *c)
686{
687 int i;
688
689 for (i = 0; i < c->lpt_lebs; i++) {
690 if (i + c->lpt_first == c->nhead_lnum)
691 continue;
692 if (c->ltab[i].dirty > 0 &&
693 c->ltab[i].free + c->ltab[i].dirty == c->leb_size) {
694 c->ltab[i].tgc = 1;
695 c->ltab[i].free = c->leb_size;
696 c->ltab[i].dirty = 0;
697 dbg_lp("LEB %d", i + c->lpt_first);
698 }
699 }
700}
701
702/**
703 * lpt_tgc_end - end trivial garbage collection of LPT LEBs.
704 * @c: UBIFS file-system description object
705 *
706 * LPT trivial garbage collection is where a LPT LEB contains only dirty and
707 * free space and so may be reused as soon as the next commit is completed.
708 * This function is called after the commit is completed (master node has been
709 * written) and unmaps LPT LEBs that were marked for trivial GC.
710 */
711static int lpt_tgc_end(struct ubifs_info *c)
712{
713 int i, err;
714
715 for (i = 0; i < c->lpt_lebs; i++)
716 if (c->ltab[i].tgc) {
717 err = ubifs_leb_unmap(c, i + c->lpt_first);
718 if (err)
719 return err;
720 c->ltab[i].tgc = 0;
721 dbg_lp("LEB %d", i + c->lpt_first);
722 }
723 return 0;
724}
725
726/**
727 * populate_lsave - fill the lsave array with important LEB numbers.
728 * @c: the UBIFS file-system description object
729 *
730 * This function is only called for the "big" model. It records a small number
731 * of LEB numbers of important LEBs. Important LEBs are ones that are (from
732 * most important to least important): empty, freeable, freeable index, dirty
733 * index, dirty or free. Upon mount, we read this list of LEB numbers and bring
734 * their pnodes into memory. That will stop us from having to scan the LPT
735 * straight away. For the "small" model we assume that scanning the LPT is no
736 * big deal.
737 */
738static void populate_lsave(struct ubifs_info *c)
739{
740 struct ubifs_lprops *lprops;
741 struct ubifs_lpt_heap *heap;
742 int i, cnt = 0;
743
744 ubifs_assert(c->big_lpt);
745 if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) {
746 c->lpt_drty_flgs |= LSAVE_DIRTY;
747 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
748 }
749 list_for_each_entry(lprops, &c->empty_list, list) {
750 c->lsave[cnt++] = lprops->lnum;
751 if (cnt >= c->lsave_cnt)
752 return;
753 }
754 list_for_each_entry(lprops, &c->freeable_list, list) {
755 c->lsave[cnt++] = lprops->lnum;
756 if (cnt >= c->lsave_cnt)
757 return;
758 }
759 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
760 c->lsave[cnt++] = lprops->lnum;
761 if (cnt >= c->lsave_cnt)
762 return;
763 }
764 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
765 for (i = 0; i < heap->cnt; i++) {
766 c->lsave[cnt++] = heap->arr[i]->lnum;
767 if (cnt >= c->lsave_cnt)
768 return;
769 }
770 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
771 for (i = 0; i < heap->cnt; i++) {
772 c->lsave[cnt++] = heap->arr[i]->lnum;
773 if (cnt >= c->lsave_cnt)
774 return;
775 }
776 heap = &c->lpt_heap[LPROPS_FREE - 1];
777 for (i = 0; i < heap->cnt; i++) {
778 c->lsave[cnt++] = heap->arr[i]->lnum;
779 if (cnt >= c->lsave_cnt)
780 return;
781 }
782 /* Fill it up completely */
783 while (cnt < c->lsave_cnt)
784 c->lsave[cnt++] = c->main_first;
785}
786
787/**
788 * nnode_lookup - lookup a nnode in the LPT.
789 * @c: UBIFS file-system description object
790 * @i: nnode number
791 *
792 * This function returns a pointer to the nnode on success or a negative
793 * error code on failure.
794 */
795static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i)
796{
797 int err, iip;
798 struct ubifs_nnode *nnode;
799
800 if (!c->nroot) {
801 err = ubifs_read_nnode(c, NULL, 0);
802 if (err)
803 return ERR_PTR(err);
804 }
805 nnode = c->nroot;
806 while (1) {
807 iip = i & (UBIFS_LPT_FANOUT - 1);
808 i >>= UBIFS_LPT_FANOUT_SHIFT;
809 if (!i)
810 break;
811 nnode = ubifs_get_nnode(c, nnode, iip);
812 if (IS_ERR(nnode))
813 return nnode;
814 }
815 return nnode;
816}
817
818/**
819 * make_nnode_dirty - find a nnode and, if found, make it dirty.
820 * @c: UBIFS file-system description object
821 * @node_num: nnode number of nnode to make dirty
822 * @lnum: LEB number where nnode was written
823 * @offs: offset where nnode was written
824 *
825 * This function is used by LPT garbage collection. LPT garbage collection is
826 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
827 * simply involves marking all the nodes in the LEB being garbage-collected as
828 * dirty. The dirty nodes are written next commit, after which the LEB is free
829 * to be reused.
830 *
831 * This function returns %0 on success and a negative error code on failure.
832 */
833static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum,
834 int offs)
835{
836 struct ubifs_nnode *nnode;
837
838 nnode = nnode_lookup(c, node_num);
839 if (IS_ERR(nnode))
840 return PTR_ERR(nnode);
841 if (nnode->parent) {
842 struct ubifs_nbranch *branch;
843
844 branch = &nnode->parent->nbranch[nnode->iip];
845 if (branch->lnum != lnum || branch->offs != offs)
846 return 0; /* nnode is obsolete */
847 } else if (c->lpt_lnum != lnum || c->lpt_offs != offs)
848 return 0; /* nnode is obsolete */
849 /* Assumes cnext list is empty i.e. not called during commit */
850 if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
851 c->dirty_nn_cnt += 1;
852 ubifs_add_nnode_dirt(c, nnode);
853 /* Mark parent and ancestors dirty too */
854 nnode = nnode->parent;
855 while (nnode) {
856 if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
857 c->dirty_nn_cnt += 1;
858 ubifs_add_nnode_dirt(c, nnode);
859 nnode = nnode->parent;
860 } else
861 break;
862 }
863 }
864 return 0;
865}
866
867/**
868 * make_pnode_dirty - find a pnode and, if found, make it dirty.
869 * @c: UBIFS file-system description object
870 * @node_num: pnode number of pnode to make dirty
871 * @lnum: LEB number where pnode was written
872 * @offs: offset where pnode was written
873 *
874 * This function is used by LPT garbage collection. LPT garbage collection is
875 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
876 * simply involves marking all the nodes in the LEB being garbage-collected as
877 * dirty. The dirty nodes are written next commit, after which the LEB is free
878 * to be reused.
879 *
880 * This function returns %0 on success and a negative error code on failure.
881 */
882static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum,
883 int offs)
884{
885 struct ubifs_pnode *pnode;
886 struct ubifs_nbranch *branch;
887
888 pnode = pnode_lookup(c, node_num);
889 if (IS_ERR(pnode))
890 return PTR_ERR(pnode);
891 branch = &pnode->parent->nbranch[pnode->iip];
892 if (branch->lnum != lnum || branch->offs != offs)
893 return 0;
894 do_make_pnode_dirty(c, pnode);
895 return 0;
896}
897
898/**
899 * make_ltab_dirty - make ltab node dirty.
900 * @c: UBIFS file-system description object
901 * @lnum: LEB number where ltab was written
902 * @offs: offset where ltab was written
903 *
904 * This function is used by LPT garbage collection. LPT garbage collection is
905 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
906 * simply involves marking all the nodes in the LEB being garbage-collected as
907 * dirty. The dirty nodes are written next commit, after which the LEB is free
908 * to be reused.
909 *
910 * This function returns %0 on success and a negative error code on failure.
911 */
912static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs)
913{
914 if (lnum != c->ltab_lnum || offs != c->ltab_offs)
915 return 0; /* This ltab node is obsolete */
916 if (!(c->lpt_drty_flgs & LTAB_DIRTY)) {
917 c->lpt_drty_flgs |= LTAB_DIRTY;
918 ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz);
919 }
920 return 0;
921}
922
923/**
924 * make_lsave_dirty - make lsave node dirty.
925 * @c: UBIFS file-system description object
926 * @lnum: LEB number where lsave was written
927 * @offs: offset where lsave was written
928 *
929 * This function is used by LPT garbage collection. LPT garbage collection is
930 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
931 * simply involves marking all the nodes in the LEB being garbage-collected as
932 * dirty. The dirty nodes are written next commit, after which the LEB is free
933 * to be reused.
934 *
935 * This function returns %0 on success and a negative error code on failure.
936 */
937static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs)
938{
939 if (lnum != c->lsave_lnum || offs != c->lsave_offs)
940 return 0; /* This lsave node is obsolete */
941 if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) {
942 c->lpt_drty_flgs |= LSAVE_DIRTY;
943 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
944 }
945 return 0;
946}
947
948/**
949 * make_node_dirty - make node dirty.
950 * @c: UBIFS file-system description object
951 * @node_type: LPT node type
952 * @node_num: node number
953 * @lnum: LEB number where node was written
954 * @offs: offset where node was written
955 *
956 * This function is used by LPT garbage collection. LPT garbage collection is
957 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
958 * simply involves marking all the nodes in the LEB being garbage-collected as
959 * dirty. The dirty nodes are written next commit, after which the LEB is free
960 * to be reused.
961 *
962 * This function returns %0 on success and a negative error code on failure.
963 */
964static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num,
965 int lnum, int offs)
966{
967 switch (node_type) {
968 case UBIFS_LPT_NNODE:
969 return make_nnode_dirty(c, node_num, lnum, offs);
970 case UBIFS_LPT_PNODE:
971 return make_pnode_dirty(c, node_num, lnum, offs);
972 case UBIFS_LPT_LTAB:
973 return make_ltab_dirty(c, lnum, offs);
974 case UBIFS_LPT_LSAVE:
975 return make_lsave_dirty(c, lnum, offs);
976 }
977 return -EINVAL;
978}
979
980/**
981 * get_lpt_node_len - return the length of a node based on its type.
982 * @c: UBIFS file-system description object
983 * @node_type: LPT node type
984 */
985static int get_lpt_node_len(struct ubifs_info *c, int node_type)
986{
987 switch (node_type) {
988 case UBIFS_LPT_NNODE:
989 return c->nnode_sz;
990 case UBIFS_LPT_PNODE:
991 return c->pnode_sz;
992 case UBIFS_LPT_LTAB:
993 return c->ltab_sz;
994 case UBIFS_LPT_LSAVE:
995 return c->lsave_sz;
996 }
997 return 0;
998}
999
1000/**
1001 * get_pad_len - return the length of padding in a buffer.
1002 * @c: UBIFS file-system description object
1003 * @buf: buffer
1004 * @len: length of buffer
1005 */
1006static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
1007{
1008 int offs, pad_len;
1009
1010 if (c->min_io_size == 1)
1011 return 0;
1012 offs = c->leb_size - len;
1013 pad_len = ALIGN(offs, c->min_io_size) - offs;
1014 return pad_len;
1015}
1016
1017/**
1018 * get_lpt_node_type - return type (and node number) of a node in a buffer.
1019 * @c: UBIFS file-system description object
1020 * @buf: buffer
1021 * @node_num: node number is returned here
1022 */
1023static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
1024{
1025 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1026 int pos = 0, node_type;
1027
1028 node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
1029 *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
1030 return node_type;
1031}
1032
1033/**
1034 * is_a_node - determine if a buffer contains a node.
1035 * @c: UBIFS file-system description object
1036 * @buf: buffer
1037 * @len: length of buffer
1038 *
1039 * This function returns %1 if the buffer contains a node or %0 if it does not.
1040 */
1041static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
1042{
1043 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1044 int pos = 0, node_type, node_len;
1045 uint16_t crc, calc_crc;
1046
1047 node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
1048 if (node_type == UBIFS_LPT_NOT_A_NODE)
1049 return 0;
1050 node_len = get_lpt_node_len(c, node_type);
1051 if (!node_len || node_len > len)
1052 return 0;
1053 pos = 0;
1054 addr = buf;
1055 crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS);
1056 calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
1057 node_len - UBIFS_LPT_CRC_BYTES);
1058 if (crc != calc_crc)
1059 return 0;
1060 return 1;
1061}
1062
1063
1064/**
1065 * lpt_gc_lnum - garbage collect a LPT LEB.
1066 * @c: UBIFS file-system description object
1067 * @lnum: LEB number to garbage collect
1068 *
1069 * LPT garbage collection is used only for the "big" LPT model
1070 * (c->big_lpt == 1). Garbage collection simply involves marking all the nodes
1071 * in the LEB being garbage-collected as dirty. The dirty nodes are written
1072 * next commit, after which the LEB is free to be reused.
1073 *
1074 * This function returns %0 on success and a negative error code on failure.
1075 */
1076static int lpt_gc_lnum(struct ubifs_info *c, int lnum)
1077{
1078 int err, len = c->leb_size, node_type, node_num, node_len, offs;
1079 void *buf = c->lpt_buf;
1080
1081 dbg_lp("LEB %d", lnum);
1082 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1083 if (err) {
1084 ubifs_err("cannot read LEB %d, error %d", lnum, err);
1085 return err;
1086 }
1087 while (1) {
1088 if (!is_a_node(c, buf, len)) {
1089 int pad_len;
1090
1091 pad_len = get_pad_len(c, buf, len);
1092 if (pad_len) {
1093 buf += pad_len;
1094 len -= pad_len;
1095 continue;
1096 }
1097 return 0;
1098 }
1099 node_type = get_lpt_node_type(c, buf, &node_num);
1100 node_len = get_lpt_node_len(c, node_type);
1101 offs = c->leb_size - len;
1102 ubifs_assert(node_len != 0);
1103 mutex_lock(&c->lp_mutex);
1104 err = make_node_dirty(c, node_type, node_num, lnum, offs);
1105 mutex_unlock(&c->lp_mutex);
1106 if (err)
1107 return err;
1108 buf += node_len;
1109 len -= node_len;
1110 }
1111 return 0;
1112}
1113
1114/**
1115 * lpt_gc - LPT garbage collection.
1116 * @c: UBIFS file-system description object
1117 *
1118 * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'.
1119 * Returns %0 on success and a negative error code on failure.
1120 */
1121static int lpt_gc(struct ubifs_info *c)
1122{
1123 int i, lnum = -1, dirty = 0;
1124
1125 mutex_lock(&c->lp_mutex);
1126 for (i = 0; i < c->lpt_lebs; i++) {
1127 ubifs_assert(!c->ltab[i].tgc);
1128 if (i + c->lpt_first == c->nhead_lnum ||
1129 c->ltab[i].free + c->ltab[i].dirty == c->leb_size)
1130 continue;
1131 if (c->ltab[i].dirty > dirty) {
1132 dirty = c->ltab[i].dirty;
1133 lnum = i + c->lpt_first;
1134 }
1135 }
1136 mutex_unlock(&c->lp_mutex);
1137 if (lnum == -1)
1138 return -ENOSPC;
1139 return lpt_gc_lnum(c, lnum);
1140}
1141
1142/**
1143 * ubifs_lpt_start_commit - UBIFS commit starts.
1144 * @c: the UBIFS file-system description object
1145 *
1146 * This function has to be called when UBIFS starts the commit operation.
1147 * This function "freezes" all currently dirty LEB properties and does not
1148 * change them anymore. Further changes are saved and tracked separately
1149 * because they are not part of this commit. This function returns zero in case
1150 * of success and a negative error code in case of failure.
1151 */
1152int ubifs_lpt_start_commit(struct ubifs_info *c)
1153{
1154 int err, cnt;
1155
1156 dbg_lp("");
1157
1158 mutex_lock(&c->lp_mutex);
1159 err = dbg_check_ltab(c);
1160 if (err)
1161 goto out;
1162
1163 if (c->check_lpt_free) {
1164 /*
1165 * We ensure there is enough free space in
1166 * ubifs_lpt_post_commit() by marking nodes dirty. That
1167 * information is lost when we unmount, so we also need
1168 * to check free space once after mounting also.
1169 */
1170 c->check_lpt_free = 0;
1171 while (need_write_all(c)) {
1172 mutex_unlock(&c->lp_mutex);
1173 err = lpt_gc(c);
1174 if (err)
1175 return err;
1176 mutex_lock(&c->lp_mutex);
1177 }
1178 }
1179
1180 lpt_tgc_start(c);
1181
1182 if (!c->dirty_pn_cnt) {
1183 dbg_cmt("no cnodes to commit");
1184 err = 0;
1185 goto out;
1186 }
1187
1188 if (!c->big_lpt && need_write_all(c)) {
1189 /* If needed, write everything */
1190 err = make_tree_dirty(c);
1191 if (err)
1192 goto out;
1193 lpt_tgc_start(c);
1194 }
1195
1196 if (c->big_lpt)
1197 populate_lsave(c);
1198
1199 cnt = get_cnodes_to_commit(c);
1200 ubifs_assert(cnt != 0);
1201
1202 err = layout_cnodes(c);
1203 if (err)
1204 goto out;
1205
1206 /* Copy the LPT's own lprops for end commit to write */
1207 memcpy(c->ltab_cmt, c->ltab,
1208 sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
1209 c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY);
1210
1211out:
1212 mutex_unlock(&c->lp_mutex);
1213 return err;
1214}
1215
1216/**
1217 * free_obsolete_cnodes - free obsolete cnodes for commit end.
1218 * @c: UBIFS file-system description object
1219 */
1220static void free_obsolete_cnodes(struct ubifs_info *c)
1221{
1222 struct ubifs_cnode *cnode, *cnext;
1223
1224 cnext = c->lpt_cnext;
1225 if (!cnext)
1226 return;
1227 do {
1228 cnode = cnext;
1229 cnext = cnode->cnext;
1230 if (test_bit(OBSOLETE_CNODE, &cnode->flags))
1231 kfree(cnode);
1232 else
1233 cnode->cnext = NULL;
1234 } while (cnext != c->lpt_cnext);
1235 c->lpt_cnext = NULL;
1236}
1237
1238/**
1239 * ubifs_lpt_end_commit - finish the commit operation.
1240 * @c: the UBIFS file-system description object
1241 *
1242 * This function has to be called when the commit operation finishes. It
1243 * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to
1244 * the media. Returns zero in case of success and a negative error code in case
1245 * of failure.
1246 */
1247int ubifs_lpt_end_commit(struct ubifs_info *c)
1248{
1249 int err;
1250
1251 dbg_lp("");
1252
1253 if (!c->lpt_cnext)
1254 return 0;
1255
1256 err = write_cnodes(c);
1257 if (err)
1258 return err;
1259
1260 mutex_lock(&c->lp_mutex);
1261 free_obsolete_cnodes(c);
1262 mutex_unlock(&c->lp_mutex);
1263
1264 return 0;
1265}
1266
1267/**
1268 * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC.
1269 * @c: UBIFS file-system description object
1270 *
1271 * LPT trivial GC is completed after a commit. Also LPT GC is done after a
1272 * commit for the "big" LPT model.
1273 */
1274int ubifs_lpt_post_commit(struct ubifs_info *c)
1275{
1276 int err;
1277
1278 mutex_lock(&c->lp_mutex);
1279 err = lpt_tgc_end(c);
1280 if (err)
1281 goto out;
1282 if (c->big_lpt)
1283 while (need_write_all(c)) {
1284 mutex_unlock(&c->lp_mutex);
1285 err = lpt_gc(c);
1286 if (err)
1287 return err;
1288 mutex_lock(&c->lp_mutex);
1289 }
1290out:
1291 mutex_unlock(&c->lp_mutex);
1292 return err;
1293}
1294
1295/**
1296 * first_nnode - find the first nnode in memory.
1297 * @c: UBIFS file-system description object
1298 * @hght: height of tree where nnode found is returned here
1299 *
1300 * This function returns a pointer to the nnode found or %NULL if no nnode is
1301 * found. This function is a helper to 'ubifs_lpt_free()'.
1302 */
1303static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght)
1304{
1305 struct ubifs_nnode *nnode;
1306 int h, i, found;
1307
1308 nnode = c->nroot;
1309 *hght = 0;
1310 if (!nnode)
1311 return NULL;
1312 for (h = 1; h < c->lpt_hght; h++) {
1313 found = 0;
1314 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1315 if (nnode->nbranch[i].nnode) {
1316 found = 1;
1317 nnode = nnode->nbranch[i].nnode;
1318 *hght = h;
1319 break;
1320 }
1321 }
1322 if (!found)
1323 break;
1324 }
1325 return nnode;
1326}
1327
1328/**
1329 * next_nnode - find the next nnode in memory.
1330 * @c: UBIFS file-system description object
1331 * @nnode: nnode from which to start.
1332 * @hght: height of tree where nnode is, is passed and returned here
1333 *
1334 * This function returns a pointer to the nnode found or %NULL if no nnode is
1335 * found. This function is a helper to 'ubifs_lpt_free()'.
1336 */
1337static struct ubifs_nnode *next_nnode(struct ubifs_info *c,
1338 struct ubifs_nnode *nnode, int *hght)
1339{
1340 struct ubifs_nnode *parent;
1341 int iip, h, i, found;
1342
1343 parent = nnode->parent;
1344 if (!parent)
1345 return NULL;
1346 if (nnode->iip == UBIFS_LPT_FANOUT - 1) {
1347 *hght -= 1;
1348 return parent;
1349 }
1350 for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) {
1351 nnode = parent->nbranch[iip].nnode;
1352 if (nnode)
1353 break;
1354 }
1355 if (!nnode) {
1356 *hght -= 1;
1357 return parent;
1358 }
1359 for (h = *hght + 1; h < c->lpt_hght; h++) {
1360 found = 0;
1361 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1362 if (nnode->nbranch[i].nnode) {
1363 found = 1;
1364 nnode = nnode->nbranch[i].nnode;
1365 *hght = h;
1366 break;
1367 }
1368 }
1369 if (!found)
1370 break;
1371 }
1372 return nnode;
1373}
1374
1375/**
1376 * ubifs_lpt_free - free resources owned by the LPT.
1377 * @c: UBIFS file-system description object
1378 * @wr_only: free only resources used for writing
1379 */
1380void ubifs_lpt_free(struct ubifs_info *c, int wr_only)
1381{
1382 struct ubifs_nnode *nnode;
1383 int i, hght;
1384
1385 /* Free write-only things first */
1386
1387 free_obsolete_cnodes(c); /* Leftover from a failed commit */
1388
1389 vfree(c->ltab_cmt);
1390 c->ltab_cmt = NULL;
1391 vfree(c->lpt_buf);
1392 c->lpt_buf = NULL;
1393 kfree(c->lsave);
1394 c->lsave = NULL;
1395
1396 if (wr_only)
1397 return;
1398
1399 /* Now free the rest */
1400
1401 nnode = first_nnode(c, &hght);
1402 while (nnode) {
1403 for (i = 0; i < UBIFS_LPT_FANOUT; i++)
1404 kfree(nnode->nbranch[i].nnode);
1405 nnode = next_nnode(c, nnode, &hght);
1406 }
1407 for (i = 0; i < LPROPS_HEAP_CNT; i++)
1408 kfree(c->lpt_heap[i].arr);
1409 kfree(c->dirty_idx.arr);
1410 kfree(c->nroot);
1411 vfree(c->ltab);
1412 kfree(c->lpt_nod_buf);
1413}
1414
1415#ifdef CONFIG_UBIFS_FS_DEBUG
1416
1417/**
1418 * dbg_is_all_ff - determine if a buffer contains only 0xff bytes.
1419 * @buf: buffer
1420 * @len: buffer length
1421 */
1422static int dbg_is_all_ff(uint8_t *buf, int len)
1423{
1424 int i;
1425
1426 for (i = 0; i < len; i++)
1427 if (buf[i] != 0xff)
1428 return 0;
1429 return 1;
1430}
1431
1432/**
1433 * dbg_is_nnode_dirty - determine if a nnode is dirty.
1434 * @c: the UBIFS file-system description object
1435 * @lnum: LEB number where nnode was written
1436 * @offs: offset where nnode was written
1437 */
1438static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs)
1439{
1440 struct ubifs_nnode *nnode;
1441 int hght;
1442
1443 /* Entire tree is in memory so first_nnode / next_nnode are ok */
1444 nnode = first_nnode(c, &hght);
1445 for (; nnode; nnode = next_nnode(c, nnode, &hght)) {
1446 struct ubifs_nbranch *branch;
1447
1448 cond_resched();
1449 if (nnode->parent) {
1450 branch = &nnode->parent->nbranch[nnode->iip];
1451 if (branch->lnum != lnum || branch->offs != offs)
1452 continue;
1453 if (test_bit(DIRTY_CNODE, &nnode->flags))
1454 return 1;
1455 return 0;
1456 } else {
1457 if (c->lpt_lnum != lnum || c->lpt_offs != offs)
1458 continue;
1459 if (test_bit(DIRTY_CNODE, &nnode->flags))
1460 return 1;
1461 return 0;
1462 }
1463 }
1464 return 1;
1465}
1466
1467/**
1468 * dbg_is_pnode_dirty - determine if a pnode is dirty.
1469 * @c: the UBIFS file-system description object
1470 * @lnum: LEB number where pnode was written
1471 * @offs: offset where pnode was written
1472 */
1473static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs)
1474{
1475 int i, cnt;
1476
1477 cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
1478 for (i = 0; i < cnt; i++) {
1479 struct ubifs_pnode *pnode;
1480 struct ubifs_nbranch *branch;
1481
1482 cond_resched();
1483 pnode = pnode_lookup(c, i);
1484 if (IS_ERR(pnode))
1485 return PTR_ERR(pnode);
1486 branch = &pnode->parent->nbranch[pnode->iip];
1487 if (branch->lnum != lnum || branch->offs != offs)
1488 continue;
1489 if (test_bit(DIRTY_CNODE, &pnode->flags))
1490 return 1;
1491 return 0;
1492 }
1493 return 1;
1494}
1495
1496/**
1497 * dbg_is_ltab_dirty - determine if a ltab node is dirty.
1498 * @c: the UBIFS file-system description object
1499 * @lnum: LEB number where ltab node was written
1500 * @offs: offset where ltab node was written
1501 */
1502static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs)
1503{
1504 if (lnum != c->ltab_lnum || offs != c->ltab_offs)
1505 return 1;
1506 return (c->lpt_drty_flgs & LTAB_DIRTY) != 0;
1507}
1508
1509/**
1510 * dbg_is_lsave_dirty - determine if a lsave node is dirty.
1511 * @c: the UBIFS file-system description object
1512 * @lnum: LEB number where lsave node was written
1513 * @offs: offset where lsave node was written
1514 */
1515static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs)
1516{
1517 if (lnum != c->lsave_lnum || offs != c->lsave_offs)
1518 return 1;
1519 return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0;
1520}
1521
1522/**
1523 * dbg_is_node_dirty - determine if a node is dirty.
1524 * @c: the UBIFS file-system description object
1525 * @node_type: node type
1526 * @lnum: LEB number where node was written
1527 * @offs: offset where node was written
1528 */
1529static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum,
1530 int offs)
1531{
1532 switch (node_type) {
1533 case UBIFS_LPT_NNODE:
1534 return dbg_is_nnode_dirty(c, lnum, offs);
1535 case UBIFS_LPT_PNODE:
1536 return dbg_is_pnode_dirty(c, lnum, offs);
1537 case UBIFS_LPT_LTAB:
1538 return dbg_is_ltab_dirty(c, lnum, offs);
1539 case UBIFS_LPT_LSAVE:
1540 return dbg_is_lsave_dirty(c, lnum, offs);
1541 }
1542 return 1;
1543}
1544
1545/**
1546 * dbg_check_ltab_lnum - check the ltab for a LPT LEB number.
1547 * @c: the UBIFS file-system description object
1548 * @lnum: LEB number where node was written
1549 * @offs: offset where node was written
1550 *
1551 * This function returns %0 on success and a negative error code on failure.
1552 */
1553static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1554{
1555 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
1556 int ret;
1557 void *buf = c->dbg_buf;
1558
1559 dbg_lp("LEB %d", lnum);
1560 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1561 if (err) {
1562 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
1563 return err;
1564 }
1565 while (1) {
1566 if (!is_a_node(c, buf, len)) {
1567 int i, pad_len;
1568
1569 pad_len = get_pad_len(c, buf, len);
1570 if (pad_len) {
1571 buf += pad_len;
1572 len -= pad_len;
1573 dirty += pad_len;
1574 continue;
1575 }
1576 if (!dbg_is_all_ff(buf, len)) {
1577 dbg_msg("invalid empty space in LEB %d at %d",
1578 lnum, c->leb_size - len);
1579 err = -EINVAL;
1580 }
1581 i = lnum - c->lpt_first;
1582 if (len != c->ltab[i].free) {
1583 dbg_msg("invalid free space in LEB %d "
1584 "(free %d, expected %d)",
1585 lnum, len, c->ltab[i].free);
1586 err = -EINVAL;
1587 }
1588 if (dirty != c->ltab[i].dirty) {
1589 dbg_msg("invalid dirty space in LEB %d "
1590 "(dirty %d, expected %d)",
1591 lnum, dirty, c->ltab[i].dirty);
1592 err = -EINVAL;
1593 }
1594 return err;
1595 }
1596 node_type = get_lpt_node_type(c, buf, &node_num);
1597 node_len = get_lpt_node_len(c, node_type);
1598 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
1599 if (ret == 1)
1600 dirty += node_len;
1601 buf += node_len;
1602 len -= node_len;
1603 }
1604}
1605
1606/**
1607 * dbg_check_ltab - check the free and dirty space in the ltab.
1608 * @c: the UBIFS file-system description object
1609 *
1610 * This function returns %0 on success and a negative error code on failure.
1611 */
1612int dbg_check_ltab(struct ubifs_info *c)
1613{
1614 int lnum, err, i, cnt;
1615
1616 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1617 return 0;
1618
1619 /* Bring the entire tree into memory */
1620 cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
1621 for (i = 0; i < cnt; i++) {
1622 struct ubifs_pnode *pnode;
1623
1624 pnode = pnode_lookup(c, i);
1625 if (IS_ERR(pnode))
1626 return PTR_ERR(pnode);
1627 cond_resched();
1628 }
1629
1630 /* Check nodes */
1631 err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0);
1632 if (err)
1633 return err;
1634
1635 /* Check each LEB */
1636 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
1637 err = dbg_check_ltab_lnum(c, lnum);
1638 if (err) {
1639 dbg_err("failed at LEB %d", lnum);
1640 return err;
1641 }
1642 }
1643
1644 dbg_lp("succeeded");
1645 return 0;
1646}
1647
1648#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
new file mode 100644
index 000000000000..71d5493bf565
--- /dev/null
+++ b/fs/ubifs/master.c
@@ -0,0 +1,387 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/* This file implements reading and writing the master node */
24
25#include "ubifs.h"
26
27/**
28 * scan_for_master - search the valid master node.
29 * @c: UBIFS file-system description object
30 *
31 * This function scans the master node LEBs and search for the latest master
32 * node. Returns zero in case of success and a negative error code in case of
33 * failure.
34 */
35static int scan_for_master(struct ubifs_info *c)
36{
37 struct ubifs_scan_leb *sleb;
38 struct ubifs_scan_node *snod;
39 int lnum, offs = 0, nodes_cnt;
40
41 lnum = UBIFS_MST_LNUM;
42
43 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
44 if (IS_ERR(sleb))
45 return PTR_ERR(sleb);
46 nodes_cnt = sleb->nodes_cnt;
47 if (nodes_cnt > 0) {
48 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
49 list);
50 if (snod->type != UBIFS_MST_NODE)
51 goto out;
52 memcpy(c->mst_node, snod->node, snod->len);
53 offs = snod->offs;
54 }
55 ubifs_scan_destroy(sleb);
56
57 lnum += 1;
58
59 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
60 if (IS_ERR(sleb))
61 return PTR_ERR(sleb);
62 if (sleb->nodes_cnt != nodes_cnt)
63 goto out;
64 if (!sleb->nodes_cnt)
65 goto out;
66 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
67 if (snod->type != UBIFS_MST_NODE)
68 goto out;
69 if (snod->offs != offs)
70 goto out;
71 if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
72 (void *)snod->node + UBIFS_CH_SZ,
73 UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
74 goto out;
75 c->mst_offs = offs;
76 ubifs_scan_destroy(sleb);
77 return 0;
78
79out:
80 ubifs_scan_destroy(sleb);
81 return -EINVAL;
82}
83
84/**
85 * validate_master - validate master node.
86 * @c: UBIFS file-system description object
87 *
88 * This function validates data which was read from master node. Returns zero
89 * if the data is all right and %-EINVAL if not.
90 */
91static int validate_master(const struct ubifs_info *c)
92{
93 long long main_sz;
94 int err;
95
96 if (c->max_sqnum >= SQNUM_WATERMARK) {
97 err = 1;
98 goto out;
99 }
100
101 if (c->cmt_no >= c->max_sqnum) {
102 err = 2;
103 goto out;
104 }
105
106 if (c->highest_inum >= INUM_WATERMARK) {
107 err = 3;
108 goto out;
109 }
110
111 if (c->lhead_lnum < UBIFS_LOG_LNUM ||
112 c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs ||
113 c->lhead_offs < 0 || c->lhead_offs >= c->leb_size ||
114 c->lhead_offs & (c->min_io_size - 1)) {
115 err = 4;
116 goto out;
117 }
118
119 if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first ||
120 c->zroot.offs >= c->leb_size || c->zroot.offs & 7) {
121 err = 5;
122 goto out;
123 }
124
125 if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len ||
126 c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) {
127 err = 6;
128 goto out;
129 }
130
131 if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) {
132 err = 7;
133 goto out;
134 }
135
136 if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first ||
137 c->ihead_offs % c->min_io_size || c->ihead_offs < 0 ||
138 c->ihead_offs > c->leb_size || c->ihead_offs & 7) {
139 err = 8;
140 goto out;
141 }
142
143 main_sz = (long long)c->main_lebs * c->leb_size;
144 if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
145 err = 9;
146 goto out;
147 }
148
149 if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last ||
150 c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) {
151 err = 10;
152 goto out;
153 }
154
155 if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last ||
156 c->nhead_offs < 0 || c->nhead_offs % c->min_io_size ||
157 c->nhead_offs > c->leb_size) {
158 err = 11;
159 goto out;
160 }
161
162 if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last ||
163 c->ltab_offs < 0 ||
164 c->ltab_offs + c->ltab_sz > c->leb_size) {
165 err = 12;
166 goto out;
167 }
168
169 if (c->big_lpt && (c->lsave_lnum < c->lpt_first ||
170 c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 ||
171 c->lsave_offs + c->lsave_sz > c->leb_size)) {
172 err = 13;
173 goto out;
174 }
175
176 if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) {
177 err = 14;
178 goto out;
179 }
180
181 if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) {
182 err = 15;
183 goto out;
184 }
185
186 if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) {
187 err = 16;
188 goto out;
189 }
190
191 if (c->lst.total_free < 0 || c->lst.total_free > main_sz ||
192 c->lst.total_free & 7) {
193 err = 17;
194 goto out;
195 }
196
197 if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) {
198 err = 18;
199 goto out;
200 }
201
202 if (c->lst.total_used < 0 || (c->lst.total_used & 7)) {
203 err = 19;
204 goto out;
205 }
206
207 if (c->lst.total_free + c->lst.total_dirty +
208 c->lst.total_used > main_sz) {
209 err = 20;
210 goto out;
211 }
212
213 if (c->lst.total_dead + c->lst.total_dark +
214 c->lst.total_used + c->old_idx_sz > main_sz) {
215 err = 21;
216 goto out;
217 }
218
219 if (c->lst.total_dead < 0 ||
220 c->lst.total_dead > c->lst.total_free + c->lst.total_dirty ||
221 c->lst.total_dead & 7) {
222 err = 22;
223 goto out;
224 }
225
226 if (c->lst.total_dark < 0 ||
227 c->lst.total_dark > c->lst.total_free + c->lst.total_dirty ||
228 c->lst.total_dark & 7) {
229 err = 23;
230 goto out;
231 }
232
233 return 0;
234
235out:
236 ubifs_err("bad master node at offset %d error %d", c->mst_offs, err);
237 dbg_dump_node(c, c->mst_node);
238 return -EINVAL;
239}
240
241/**
242 * ubifs_read_master - read master node.
243 * @c: UBIFS file-system description object
244 *
245 * This function finds and reads the master node during file-system mount. If
246 * the flash is empty, it creates default master node as well. Returns zero in
247 * case of success and a negative error code in case of failure.
248 */
249int ubifs_read_master(struct ubifs_info *c)
250{
251 int err, old_leb_cnt;
252
253 c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL);
254 if (!c->mst_node)
255 return -ENOMEM;
256
257 err = scan_for_master(c);
258 if (err) {
259 err = ubifs_recover_master_node(c);
260 if (err)
261 /*
262 * Note, we do not free 'c->mst_node' here because the
263 * unmount routine will take care of this.
264 */
265 return err;
266 }
267
268 /* Make sure that the recovery flag is clear */
269 c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY);
270
271 c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum);
272 c->highest_inum = le64_to_cpu(c->mst_node->highest_inum);
273 c->cmt_no = le64_to_cpu(c->mst_node->cmt_no);
274 c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum);
275 c->zroot.offs = le32_to_cpu(c->mst_node->root_offs);
276 c->zroot.len = le32_to_cpu(c->mst_node->root_len);
277 c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum);
278 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
279 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
280 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
281 c->old_idx_sz = le64_to_cpu(c->mst_node->index_size);
282 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
283 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
284 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
285 c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs);
286 c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum);
287 c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs);
288 c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum);
289 c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs);
290 c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum);
291 c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs);
292 c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs);
293 old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt);
294 c->lst.total_free = le64_to_cpu(c->mst_node->total_free);
295 c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty);
296 c->lst.total_used = le64_to_cpu(c->mst_node->total_used);
297 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
298 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
299
300 c->calc_idx_sz = c->old_idx_sz;
301
302 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
303 c->no_orphs = 1;
304
305 if (old_leb_cnt != c->leb_cnt) {
306 /* The file system has been resized */
307 int growth = c->leb_cnt - old_leb_cnt;
308
309 if (c->leb_cnt < old_leb_cnt ||
310 c->leb_cnt < UBIFS_MIN_LEB_CNT) {
311 ubifs_err("bad leb_cnt on master node");
312 dbg_dump_node(c, c->mst_node);
313 return -EINVAL;
314 }
315
316 dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs",
317 old_leb_cnt, c->leb_cnt);
318 c->lst.empty_lebs += growth;
319 c->lst.total_free += growth * (long long)c->leb_size;
320 c->lst.total_dark += growth * (long long)c->dark_wm;
321
322 /*
323 * Reflect changes back onto the master node. N.B. the master
324 * node gets written immediately whenever mounting (or
325 * remounting) in read-write mode, so we do not need to write it
326 * here.
327 */
328 c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt);
329 c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs);
330 c->mst_node->total_free = cpu_to_le64(c->lst.total_free);
331 c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark);
332 }
333
334 err = validate_master(c);
335 if (err)
336 return err;
337
338 err = dbg_old_index_check_init(c, &c->zroot);
339
340 return err;
341}
342
343/**
344 * ubifs_write_master - write master node.
345 * @c: UBIFS file-system description object
346 *
347 * This function writes the master node. The caller has to take the
348 * @c->mst_mutex lock before calling this function. Returns zero in case of
349 * success and a negative error code in case of failure. The master node is
350 * written twice to enable recovery.
351 */
352int ubifs_write_master(struct ubifs_info *c)
353{
354 int err, lnum, offs, len;
355
356 if (c->ro_media)
357 return -EINVAL;
358
359 lnum = UBIFS_MST_LNUM;
360 offs = c->mst_offs + c->mst_node_alsz;
361 len = UBIFS_MST_NODE_SZ;
362
363 if (offs + UBIFS_MST_NODE_SZ > c->leb_size) {
364 err = ubifs_leb_unmap(c, lnum);
365 if (err)
366 return err;
367 offs = 0;
368 }
369
370 c->mst_offs = offs;
371 c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
372
373 err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
374 if (err)
375 return err;
376
377 lnum += 1;
378
379 if (offs == 0) {
380 err = ubifs_leb_unmap(c, lnum);
381 if (err)
382 return err;
383 }
384 err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
385
386 return err;
387}
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
new file mode 100644
index 000000000000..4beccfc256d2
--- /dev/null
+++ b/fs/ubifs/misc.h
@@ -0,0 +1,342 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file contains miscellaneous helper functions.
25 */
26
27#ifndef __UBIFS_MISC_H__
28#define __UBIFS_MISC_H__
29
30/**
31 * ubifs_zn_dirty - check if znode is dirty.
32 * @znode: znode to check
33 *
34 * This helper function returns %1 if @znode is dirty and %0 otherwise.
35 */
36static inline int ubifs_zn_dirty(const struct ubifs_znode *znode)
37{
38 return !!test_bit(DIRTY_ZNODE, &znode->flags);
39}
40
41/**
42 * ubifs_wake_up_bgt - wake up background thread.
43 * @c: UBIFS file-system description object
44 */
45static inline void ubifs_wake_up_bgt(struct ubifs_info *c)
46{
47 if (c->bgt && !c->need_bgt) {
48 c->need_bgt = 1;
49 wake_up_process(c->bgt);
50 }
51}
52
53/**
54 * ubifs_tnc_find_child - find next child in znode.
55 * @znode: znode to search at
56 * @start: the zbranch index to start at
57 *
58 * This helper function looks for znode child starting at index @start. Returns
59 * the child or %NULL if no children were found.
60 */
61static inline struct ubifs_znode *
62ubifs_tnc_find_child(struct ubifs_znode *znode, int start)
63{
64 while (start < znode->child_cnt) {
65 if (znode->zbranch[start].znode)
66 return znode->zbranch[start].znode;
67 start += 1;
68 }
69
70 return NULL;
71}
72
73/**
74 * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object.
75 * @inode: the VFS 'struct inode' pointer
76 */
77static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
78{
79 return container_of(inode, struct ubifs_inode, vfs_inode);
80}
81
82/**
83 * ubifs_ro_mode - switch UBIFS to read read-only mode.
84 * @c: UBIFS file-system description object
85 * @err: error code which is the reason of switching to R/O mode
86 */
87static inline void ubifs_ro_mode(struct ubifs_info *c, int err)
88{
89 if (!c->ro_media) {
90 c->ro_media = 1;
91 ubifs_warn("switched to read-only mode, error %d", err);
92 dbg_dump_stack();
93 }
94}
95
96/**
97 * ubifs_compr_present - check if compressor was compiled in.
98 * @compr_type: compressor type to check
99 *
100 * This function returns %1 of compressor of type @compr_type is present, and
101 * %0 if not.
102 */
103static inline int ubifs_compr_present(int compr_type)
104{
105 ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
106 return !!ubifs_compressors[compr_type]->capi_name;
107}
108
109/**
110 * ubifs_compr_name - get compressor name string by its type.
111 * @compr_type: compressor type
112 *
113 * This function returns compressor type string.
114 */
115static inline const char *ubifs_compr_name(int compr_type)
116{
117 ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
118 return ubifs_compressors[compr_type]->name;
119}
120
121/**
122 * ubifs_wbuf_sync - synchronize write-buffer.
123 * @wbuf: write-buffer to synchronize
124 *
125 * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume
126 * that the write-buffer is already locked.
127 */
128static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf)
129{
130 int err;
131
132 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
133 err = ubifs_wbuf_sync_nolock(wbuf);
134 mutex_unlock(&wbuf->io_mutex);
135 return err;
136}
137
138/**
139 * ubifs_leb_unmap - unmap an LEB.
140 * @c: UBIFS file-system description object
141 * @lnum: LEB number to unmap
142 *
143 * This function returns %0 on success and a negative error code on failure.
144 */
145static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
146{
147 int err;
148
149 if (c->ro_media)
150 return -EROFS;
151 err = ubi_leb_unmap(c->ubi, lnum);
152 if (err) {
153 ubifs_err("unmap LEB %d failed, error %d", lnum, err);
154 return err;
155 }
156
157 return 0;
158}
159
160/**
161 * ubifs_leb_write - write to a LEB.
162 * @c: UBIFS file-system description object
163 * @lnum: LEB number to write
164 * @buf: buffer to write from
165 * @offs: offset within LEB to write to
166 * @len: length to write
167 * @dtype: data type
168 *
169 * This function returns %0 on success and a negative error code on failure.
170 */
171static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
172 const void *buf, int offs, int len, int dtype)
173{
174 int err;
175
176 if (c->ro_media)
177 return -EROFS;
178 err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
179 if (err) {
180 ubifs_err("writing %d bytes at %d:%d, error %d",
181 len, lnum, offs, err);
182 return err;
183 }
184
185 return 0;
186}
187
188/**
189 * ubifs_leb_change - atomic LEB change.
190 * @c: UBIFS file-system description object
191 * @lnum: LEB number to write
192 * @buf: buffer to write from
193 * @len: length to write
194 * @dtype: data type
195 *
196 * This function returns %0 on success and a negative error code on failure.
197 */
198static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
199 const void *buf, int len, int dtype)
200{
201 int err;
202
203 if (c->ro_media)
204 return -EROFS;
205 err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
206 if (err) {
207 ubifs_err("changing %d bytes in LEB %d, error %d",
208 len, lnum, err);
209 return err;
210 }
211
212 return 0;
213}
214
215/**
216 * ubifs_encode_dev - encode device node IDs.
217 * @dev: UBIFS device node information
218 * @rdev: device IDs to encode
219 *
220 * This is a helper function which encodes major/minor numbers of a device node
221 * into UBIFS device node description. We use standard Linux "new" and "huge"
222 * encodings.
223 */
224static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev)
225{
226 if (new_valid_dev(rdev)) {
227 dev->new = cpu_to_le32(new_encode_dev(rdev));
228 return sizeof(dev->new);
229 } else {
230 dev->huge = cpu_to_le64(huge_encode_dev(rdev));
231 return sizeof(dev->huge);
232 }
233}
234
235/**
236 * ubifs_add_dirt - add dirty space to LEB properties.
237 * @c: the UBIFS file-system description object
238 * @lnum: LEB to add dirty space for
239 * @dirty: dirty space to add
240 *
241 * This is a helper function which increased amount of dirty LEB space. Returns
242 * zero in case of success and a negative error code in case of failure.
243 */
244static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty)
245{
246 return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0);
247}
248
249/**
250 * ubifs_return_leb - return LEB to lprops.
251 * @c: the UBIFS file-system description object
252 * @lnum: LEB to return
253 *
254 * This helper function cleans the "taken" flag of a logical eraseblock in the
255 * lprops. Returns zero in case of success and a negative error code in case of
256 * failure.
257 */
258static inline int ubifs_return_leb(struct ubifs_info *c, int lnum)
259{
260 return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
261 LPROPS_TAKEN, 0);
262}
263
264/**
265 * ubifs_idx_node_sz - return index node size.
266 * @c: the UBIFS file-system description object
267 * @child_cnt: number of children of this index node
268 */
269static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt)
270{
271 return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt;
272}
273
274/**
275 * ubifs_idx_branch - return pointer to an index branch.
276 * @c: the UBIFS file-system description object
277 * @idx: index node
278 * @bnum: branch number
279 */
280static inline
281struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c,
282 const struct ubifs_idx_node *idx,
283 int bnum)
284{
285 return (struct ubifs_branch *)((void *)idx->branches +
286 (UBIFS_BRANCH_SZ + c->key_len) * bnum);
287}
288
289/**
290 * ubifs_idx_key - return pointer to an index key.
291 * @c: the UBIFS file-system description object
292 * @idx: index node
293 */
294static inline void *ubifs_idx_key(const struct ubifs_info *c,
295 const struct ubifs_idx_node *idx)
296{
297 return (void *)((struct ubifs_branch *)idx->branches)->key;
298}
299
300/**
301 * ubifs_reported_space - calculate reported free space.
302 * @c: the UBIFS file-system description object
303 * @free: amount of free space
304 *
305 * This function calculates amount of free space which will be reported to
306 * user-space. User-space application tend to expect that if the file-system
307 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
308 * are able to write a file of size N. UBIFS attaches node headers to each data
309 * node and it has to write indexind nodes as well. This introduces additional
310 * overhead, and UBIFS it has to report sligtly less free space to meet the
311 * above expectetion.
312 *
313 * This function assumes free space is made up of uncompressed data nodes and
314 * full index nodes (one per data node, doubled because we always allow enough
315 * space to write the index twice).
316 *
317 * Note, the calculation is pessimistic, which means that most of the time
318 * UBIFS reports less space than it actually has.
319 */
320static inline long long ubifs_reported_space(const struct ubifs_info *c,
321 uint64_t free)
322{
323 int divisor, factor;
324
325 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1);
326 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
327 do_div(free, divisor);
328
329 return free * factor;
330}
331
332/**
333 * ubifs_current_time - round current time to time granularity.
334 * @inode: inode
335 */
336static inline struct timespec ubifs_current_time(struct inode *inode)
337{
338 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
339 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
340}
341
342#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
new file mode 100644
index 000000000000..3afeb9242c6a
--- /dev/null
+++ b/fs/ubifs/orphan.c
@@ -0,0 +1,958 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Author: Adrian Hunter
20 */
21
22#include "ubifs.h"
23
24/*
25 * An orphan is an inode number whose inode node has been committed to the index
26 * with a link count of zero. That happens when an open file is deleted
27 * (unlinked) and then a commit is run. In the normal course of events the inode
28 * would be deleted when the file is closed. However in the case of an unclean
29 * unmount, orphans need to be accounted for. After an unclean unmount, the
30 * orphans' inodes must be deleted which means either scanning the entire index
31 * looking for them, or keeping a list on flash somewhere. This unit implements
32 * the latter approach.
33 *
34 * The orphan area is a fixed number of LEBs situated between the LPT area and
35 * the main area. The number of orphan area LEBs is specified when the file
36 * system is created. The minimum number is 1. The size of the orphan area
37 * should be so that it can hold the maximum number of orphans that are expected
38 * to ever exist at one time.
39 *
40 * The number of orphans that can fit in a LEB is:
41 *
42 * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)
43 *
44 * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough.
45 *
46 * Orphans are accumulated in a rb-tree. When an inode's link count drops to
47 * zero, the inode number is added to the rb-tree. It is removed from the tree
48 * when the inode is deleted. Any new orphans that are in the orphan tree when
49 * the commit is run, are written to the orphan area in 1 or more orph nodes.
50 * If the orphan area is full, it is consolidated to make space. There is
51 * always enough space because validation prevents the user from creating more
52 * than the maximum number of orphans allowed.
53 */
54
55#ifdef CONFIG_UBIFS_FS_DEBUG
56static int dbg_check_orphans(struct ubifs_info *c);
57#else
58#define dbg_check_orphans(c) 0
59#endif
60
61/**
62 * ubifs_add_orphan - add an orphan.
63 * @c: UBIFS file-system description object
64 * @inum: orphan inode number
65 *
66 * Add an orphan. This function is called when an inodes link count drops to
67 * zero.
68 */
69int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
70{
71 struct ubifs_orphan *orphan, *o;
72 struct rb_node **p, *parent = NULL;
73
74 orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS);
75 if (!orphan)
76 return -ENOMEM;
77 orphan->inum = inum;
78 orphan->new = 1;
79
80 spin_lock(&c->orphan_lock);
81 if (c->tot_orphans >= c->max_orphans) {
82 spin_unlock(&c->orphan_lock);
83 kfree(orphan);
84 return -ENFILE;
85 }
86 p = &c->orph_tree.rb_node;
87 while (*p) {
88 parent = *p;
89 o = rb_entry(parent, struct ubifs_orphan, rb);
90 if (inum < o->inum)
91 p = &(*p)->rb_left;
92 else if (inum > o->inum)
93 p = &(*p)->rb_right;
94 else {
95 dbg_err("orphaned twice");
96 spin_unlock(&c->orphan_lock);
97 kfree(orphan);
98 return 0;
99 }
100 }
101 c->tot_orphans += 1;
102 c->new_orphans += 1;
103 rb_link_node(&orphan->rb, parent, p);
104 rb_insert_color(&orphan->rb, &c->orph_tree);
105 list_add_tail(&orphan->list, &c->orph_list);
106 list_add_tail(&orphan->new_list, &c->orph_new);
107 spin_unlock(&c->orphan_lock);
108 dbg_gen("ino %lu", inum);
109 return 0;
110}
111
112/**
113 * ubifs_delete_orphan - delete an orphan.
114 * @c: UBIFS file-system description object
115 * @inum: orphan inode number
116 *
117 * Delete an orphan. This function is called when an inode is deleted.
118 */
119void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
120{
121 struct ubifs_orphan *o;
122 struct rb_node *p;
123
124 spin_lock(&c->orphan_lock);
125 p = c->orph_tree.rb_node;
126 while (p) {
127 o = rb_entry(p, struct ubifs_orphan, rb);
128 if (inum < o->inum)
129 p = p->rb_left;
130 else if (inum > o->inum)
131 p = p->rb_right;
132 else {
133 if (o->dnext) {
134 spin_unlock(&c->orphan_lock);
135 dbg_gen("deleted twice ino %lu", inum);
136 return;
137 }
138 if (o->cnext) {
139 o->dnext = c->orph_dnext;
140 c->orph_dnext = o;
141 spin_unlock(&c->orphan_lock);
142 dbg_gen("delete later ino %lu", inum);
143 return;
144 }
145 rb_erase(p, &c->orph_tree);
146 list_del(&o->list);
147 c->tot_orphans -= 1;
148 if (o->new) {
149 list_del(&o->new_list);
150 c->new_orphans -= 1;
151 }
152 spin_unlock(&c->orphan_lock);
153 kfree(o);
154 dbg_gen("inum %lu", inum);
155 return;
156 }
157 }
158 spin_unlock(&c->orphan_lock);
159 dbg_err("missing orphan ino %lu", inum);
160 dbg_dump_stack();
161}
162
163/**
164 * ubifs_orphan_start_commit - start commit of orphans.
165 * @c: UBIFS file-system description object
166 *
167 * Start commit of orphans.
168 */
169int ubifs_orphan_start_commit(struct ubifs_info *c)
170{
171 struct ubifs_orphan *orphan, **last;
172
173 spin_lock(&c->orphan_lock);
174 last = &c->orph_cnext;
175 list_for_each_entry(orphan, &c->orph_new, new_list) {
176 ubifs_assert(orphan->new);
177 orphan->new = 0;
178 *last = orphan;
179 last = &orphan->cnext;
180 }
181 *last = orphan->cnext;
182 c->cmt_orphans = c->new_orphans;
183 c->new_orphans = 0;
184 dbg_cmt("%d orphans to commit", c->cmt_orphans);
185 INIT_LIST_HEAD(&c->orph_new);
186 if (c->tot_orphans == 0)
187 c->no_orphs = 1;
188 else
189 c->no_orphs = 0;
190 spin_unlock(&c->orphan_lock);
191 return 0;
192}
193
194/**
195 * avail_orphs - calculate available space.
196 * @c: UBIFS file-system description object
197 *
198 * This function returns the number of orphans that can be written in the
199 * available space.
200 */
201static int avail_orphs(struct ubifs_info *c)
202{
203 int avail_lebs, avail, gap;
204
205 avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1;
206 avail = avail_lebs *
207 ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64));
208 gap = c->leb_size - c->ohead_offs;
209 if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64))
210 avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64);
211 return avail;
212}
213
214/**
215 * tot_avail_orphs - calculate total space.
216 * @c: UBIFS file-system description object
217 *
218 * This function returns the number of orphans that can be written in half
219 * the total space. That leaves half the space for adding new orphans.
220 */
221static int tot_avail_orphs(struct ubifs_info *c)
222{
223 int avail_lebs, avail;
224
225 avail_lebs = c->orph_lebs;
226 avail = avail_lebs *
227 ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64));
228 return avail / 2;
229}
230
231/**
232 * do_write_orph_node - write a node
233 * @c: UBIFS file-system description object
234 * @len: length of node
235 * @atomic: write atomically
236 *
237 * This function writes a node to the orphan head from the orphan buffer. If
238 * %atomic is not zero, then the write is done atomically. On success, %0 is
239 * returned, otherwise a negative error code is returned.
240 */
241static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
242{
243 int err = 0;
244
245 if (atomic) {
246 ubifs_assert(c->ohead_offs == 0);
247 ubifs_prepare_node(c, c->orph_buf, len, 1);
248 len = ALIGN(len, c->min_io_size);
249 err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len,
250 UBI_SHORTTERM);
251 } else {
252 if (c->ohead_offs == 0) {
253 /* Ensure LEB has been unmapped */
254 err = ubifs_leb_unmap(c, c->ohead_lnum);
255 if (err)
256 return err;
257 }
258 err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum,
259 c->ohead_offs, UBI_SHORTTERM);
260 }
261 return err;
262}
263
264/**
265 * write_orph_node - write an orph node
266 * @c: UBIFS file-system description object
267 * @atomic: write atomically
268 *
269 * This function builds an orph node from the cnext list and writes it to the
270 * orphan head. On success, %0 is returned, otherwise a negative error code
271 * is returned.
272 */
273static int write_orph_node(struct ubifs_info *c, int atomic)
274{
275 struct ubifs_orphan *orphan, *cnext;
276 struct ubifs_orph_node *orph;
277 int gap, err, len, cnt, i;
278
279 ubifs_assert(c->cmt_orphans > 0);
280 gap = c->leb_size - c->ohead_offs;
281 if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) {
282 c->ohead_lnum += 1;
283 c->ohead_offs = 0;
284 gap = c->leb_size;
285 if (c->ohead_lnum > c->orph_last) {
286 /*
287 * We limit the number of orphans so that this should
288 * never happen.
289 */
290 ubifs_err("out of space in orphan area");
291 return -EINVAL;
292 }
293 }
294 cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64);
295 if (cnt > c->cmt_orphans)
296 cnt = c->cmt_orphans;
297 len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64);
298 ubifs_assert(c->orph_buf);
299 orph = c->orph_buf;
300 orph->ch.node_type = UBIFS_ORPH_NODE;
301 spin_lock(&c->orphan_lock);
302 cnext = c->orph_cnext;
303 for (i = 0; i < cnt; i++) {
304 orphan = cnext;
305 orph->inos[i] = cpu_to_le64(orphan->inum);
306 cnext = orphan->cnext;
307 orphan->cnext = NULL;
308 }
309 c->orph_cnext = cnext;
310 c->cmt_orphans -= cnt;
311 spin_unlock(&c->orphan_lock);
312 if (c->cmt_orphans)
313 orph->cmt_no = cpu_to_le64(c->cmt_no + 1);
314 else
315 /* Mark the last node of the commit */
316 orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63));
317 ubifs_assert(c->ohead_offs + len <= c->leb_size);
318 ubifs_assert(c->ohead_lnum >= c->orph_first);
319 ubifs_assert(c->ohead_lnum <= c->orph_last);
320 err = do_write_orph_node(c, len, atomic);
321 c->ohead_offs += ALIGN(len, c->min_io_size);
322 c->ohead_offs = ALIGN(c->ohead_offs, 8);
323 return err;
324}
325
326/**
327 * write_orph_nodes - write orph nodes until there are no more to commit
328 * @c: UBIFS file-system description object
329 * @atomic: write atomically
330 *
331 * This function writes orph nodes for all the orphans to commit. On success,
332 * %0 is returned, otherwise a negative error code is returned.
333 */
334static int write_orph_nodes(struct ubifs_info *c, int atomic)
335{
336 int err;
337
338 while (c->cmt_orphans > 0) {
339 err = write_orph_node(c, atomic);
340 if (err)
341 return err;
342 }
343 if (atomic) {
344 int lnum;
345
346 /* Unmap any unused LEBs after consolidation */
347 lnum = c->ohead_lnum + 1;
348 for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) {
349 err = ubifs_leb_unmap(c, lnum);
350 if (err)
351 return err;
352 }
353 }
354 return 0;
355}
356
357/**
358 * consolidate - consolidate the orphan area.
359 * @c: UBIFS file-system description object
360 *
361 * This function enables consolidation by putting all the orphans into the list
362 * to commit. The list is in the order that the orphans were added, and the
363 * LEBs are written atomically in order, so at no time can orphans be lost by
364 * an unclean unmount.
365 *
366 * This function returns %0 on success and a negative error code on failure.
367 */
368static int consolidate(struct ubifs_info *c)
369{
370 int tot_avail = tot_avail_orphs(c), err = 0;
371
372 spin_lock(&c->orphan_lock);
373 dbg_cmt("there is space for %d orphans and there are %d",
374 tot_avail, c->tot_orphans);
375 if (c->tot_orphans - c->new_orphans <= tot_avail) {
376 struct ubifs_orphan *orphan, **last;
377 int cnt = 0;
378
379 /* Change the cnext list to include all non-new orphans */
380 last = &c->orph_cnext;
381 list_for_each_entry(orphan, &c->orph_list, list) {
382 if (orphan->new)
383 continue;
384 *last = orphan;
385 last = &orphan->cnext;
386 cnt += 1;
387 }
388 *last = orphan->cnext;
389 ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
390 c->cmt_orphans = cnt;
391 c->ohead_lnum = c->orph_first;
392 c->ohead_offs = 0;
393 } else {
394 /*
395 * We limit the number of orphans so that this should
396 * never happen.
397 */
398 ubifs_err("out of space in orphan area");
399 err = -EINVAL;
400 }
401 spin_unlock(&c->orphan_lock);
402 return err;
403}
404
405/**
406 * commit_orphans - commit orphans.
407 * @c: UBIFS file-system description object
408 *
409 * This function commits orphans to flash. On success, %0 is returned,
410 * otherwise a negative error code is returned.
411 */
412static int commit_orphans(struct ubifs_info *c)
413{
414 int avail, atomic = 0, err;
415
416 ubifs_assert(c->cmt_orphans > 0);
417 avail = avail_orphs(c);
418 if (avail < c->cmt_orphans) {
419 /* Not enough space to write new orphans, so consolidate */
420 err = consolidate(c);
421 if (err)
422 return err;
423 atomic = 1;
424 }
425 err = write_orph_nodes(c, atomic);
426 return err;
427}
428
429/**
430 * erase_deleted - erase the orphans marked for deletion.
431 * @c: UBIFS file-system description object
432 *
433 * During commit, the orphans being committed cannot be deleted, so they are
434 * marked for deletion and deleted by this function. Also, the recovery
435 * adds killed orphans to the deletion list, and therefore they are deleted
436 * here too.
437 */
438static void erase_deleted(struct ubifs_info *c)
439{
440 struct ubifs_orphan *orphan, *dnext;
441
442 spin_lock(&c->orphan_lock);
443 dnext = c->orph_dnext;
444 while (dnext) {
445 orphan = dnext;
446 dnext = orphan->dnext;
447 ubifs_assert(!orphan->new);
448 rb_erase(&orphan->rb, &c->orph_tree);
449 list_del(&orphan->list);
450 c->tot_orphans -= 1;
451 dbg_gen("deleting orphan ino %lu", orphan->inum);
452 kfree(orphan);
453 }
454 c->orph_dnext = NULL;
455 spin_unlock(&c->orphan_lock);
456}
457
458/**
459 * ubifs_orphan_end_commit - end commit of orphans.
460 * @c: UBIFS file-system description object
461 *
462 * End commit of orphans.
463 */
464int ubifs_orphan_end_commit(struct ubifs_info *c)
465{
466 int err;
467
468 if (c->cmt_orphans != 0) {
469 err = commit_orphans(c);
470 if (err)
471 return err;
472 }
473 erase_deleted(c);
474 err = dbg_check_orphans(c);
475 return err;
476}
477
478/**
479 * clear_orphans - erase all LEBs used for orphans.
480 * @c: UBIFS file-system description object
481 *
482 * If recovery is not required, then the orphans from the previous session
483 * are not needed. This function locates the LEBs used to record
484 * orphans, and un-maps them.
485 */
486static int clear_orphans(struct ubifs_info *c)
487{
488 int lnum, err;
489
490 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
491 err = ubifs_leb_unmap(c, lnum);
492 if (err)
493 return err;
494 }
495 c->ohead_lnum = c->orph_first;
496 c->ohead_offs = 0;
497 return 0;
498}
499
500/**
501 * insert_dead_orphan - insert an orphan.
502 * @c: UBIFS file-system description object
503 * @inum: orphan inode number
504 *
505 * This function is a helper to the 'do_kill_orphans()' function. The orphan
506 * must be kept until the next commit, so it is added to the rb-tree and the
507 * deletion list.
508 */
509static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
510{
511 struct ubifs_orphan *orphan, *o;
512 struct rb_node **p, *parent = NULL;
513
514 orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL);
515 if (!orphan)
516 return -ENOMEM;
517 orphan->inum = inum;
518
519 p = &c->orph_tree.rb_node;
520 while (*p) {
521 parent = *p;
522 o = rb_entry(parent, struct ubifs_orphan, rb);
523 if (inum < o->inum)
524 p = &(*p)->rb_left;
525 else if (inum > o->inum)
526 p = &(*p)->rb_right;
527 else {
528 /* Already added - no problem */
529 kfree(orphan);
530 return 0;
531 }
532 }
533 c->tot_orphans += 1;
534 rb_link_node(&orphan->rb, parent, p);
535 rb_insert_color(&orphan->rb, &c->orph_tree);
536 list_add_tail(&orphan->list, &c->orph_list);
537 orphan->dnext = c->orph_dnext;
538 c->orph_dnext = orphan;
539 dbg_mnt("ino %lu, new %d, tot %d",
540 inum, c->new_orphans, c->tot_orphans);
541 return 0;
542}
543
544/**
545 * do_kill_orphans - remove orphan inodes from the index.
546 * @c: UBIFS file-system description object
547 * @sleb: scanned LEB
548 * @last_cmt_no: cmt_no of last orph node read is passed and returned here
549 * @outofdate: whether the LEB is out of date is returned here
550 * @last_flagged: whether the end orph node is encountered
551 *
552 * This function is a helper to the 'kill_orphans()' function. It goes through
553 * every orphan node in a LEB and for every inode number recorded, removes
554 * all keys for that inode from the TNC.
555 */
556static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
557 unsigned long long *last_cmt_no, int *outofdate,
558 int *last_flagged)
559{
560 struct ubifs_scan_node *snod;
561 struct ubifs_orph_node *orph;
562 unsigned long long cmt_no;
563 ino_t inum;
564 int i, n, err, first = 1;
565
566 list_for_each_entry(snod, &sleb->nodes, list) {
567 if (snod->type != UBIFS_ORPH_NODE) {
568 ubifs_err("invalid node type %d in orphan area at "
569 "%d:%d", snod->type, sleb->lnum, snod->offs);
570 dbg_dump_node(c, snod->node);
571 return -EINVAL;
572 }
573
574 orph = snod->node;
575
576 /* Check commit number */
577 cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX;
578 /*
579 * The commit number on the master node may be less, because
580 * of a failed commit. If there are several failed commits in a
581 * row, the commit number written on orph nodes will continue to
582 * increase (because the commit number is adjusted here) even
583 * though the commit number on the master node stays the same
584 * because the master node has not been re-written.
585 */
586 if (cmt_no > c->cmt_no)
587 c->cmt_no = cmt_no;
588 if (cmt_no < *last_cmt_no && *last_flagged) {
589 /*
590 * The last orph node had a higher commit number and was
591 * flagged as the last written for that commit number.
592 * That makes this orph node, out of date.
593 */
594 if (!first) {
595 ubifs_err("out of order commit number %llu in "
596 "orphan node at %d:%d",
597 cmt_no, sleb->lnum, snod->offs);
598 dbg_dump_node(c, snod->node);
599 return -EINVAL;
600 }
601 dbg_rcvry("out of date LEB %d", sleb->lnum);
602 *outofdate = 1;
603 return 0;
604 }
605
606 if (first)
607 first = 0;
608
609 n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
610 for (i = 0; i < n; i++) {
611 inum = le64_to_cpu(orph->inos[i]);
612 dbg_rcvry("deleting orphaned inode %lu", inum);
613 err = ubifs_tnc_remove_ino(c, inum);
614 if (err)
615 return err;
616 err = insert_dead_orphan(c, inum);
617 if (err)
618 return err;
619 }
620
621 *last_cmt_no = cmt_no;
622 if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) {
623 dbg_rcvry("last orph node for commit %llu at %d:%d",
624 cmt_no, sleb->lnum, snod->offs);
625 *last_flagged = 1;
626 } else
627 *last_flagged = 0;
628 }
629
630 return 0;
631}
632
633/**
634 * kill_orphans - remove all orphan inodes from the index.
635 * @c: UBIFS file-system description object
636 *
637 * If recovery is required, then orphan inodes recorded during the previous
638 * session (which ended with an unclean unmount) must be deleted from the index.
639 * This is done by updating the TNC, but since the index is not updated until
640 * the next commit, the LEBs where the orphan information is recorded are not
641 * erased until the next commit.
642 */
643static int kill_orphans(struct ubifs_info *c)
644{
645 unsigned long long last_cmt_no = 0;
646 int lnum, err = 0, outofdate = 0, last_flagged = 0;
647
648 c->ohead_lnum = c->orph_first;
649 c->ohead_offs = 0;
650 /* Check no-orphans flag and skip this if no orphans */
651 if (c->no_orphs) {
652 dbg_rcvry("no orphans");
653 return 0;
654 }
655 /*
656 * Orph nodes always start at c->orph_first and are written to each
657 * successive LEB in turn. Generally unused LEBs will have been unmapped
658 * but may contain out of date orph nodes if the unmap didn't go
659 * through. In addition, the last orph node written for each commit is
660 * marked (top bit of orph->cmt_no is set to 1). It is possible that
661 * there are orph nodes from the next commit (i.e. the commit did not
662 * complete successfully). In that case, no orphans will have been lost
663 * due to the way that orphans are written, and any orphans added will
664 * be valid orphans anyway and so can be deleted.
665 */
666 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
667 struct ubifs_scan_leb *sleb;
668
669 dbg_rcvry("LEB %d", lnum);
670 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
671 if (IS_ERR(sleb)) {
672 sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
673 if (IS_ERR(sleb)) {
674 err = PTR_ERR(sleb);
675 break;
676 }
677 }
678 err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate,
679 &last_flagged);
680 if (err || outofdate) {
681 ubifs_scan_destroy(sleb);
682 break;
683 }
684 if (sleb->endpt) {
685 c->ohead_lnum = lnum;
686 c->ohead_offs = sleb->endpt;
687 }
688 ubifs_scan_destroy(sleb);
689 }
690 return err;
691}
692
693/**
694 * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them.
695 * @c: UBIFS file-system description object
696 * @unclean: indicates recovery from unclean unmount
697 * @read_only: indicates read only mount
698 *
699 * This function is called when mounting to erase orphans from the previous
700 * session. If UBIFS was not unmounted cleanly, then the inodes recorded as
701 * orphans are deleted.
702 */
703int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only)
704{
705 int err = 0;
706
707 c->max_orphans = tot_avail_orphs(c);
708
709 if (!read_only) {
710 c->orph_buf = vmalloc(c->leb_size);
711 if (!c->orph_buf)
712 return -ENOMEM;
713 }
714
715 if (unclean)
716 err = kill_orphans(c);
717 else if (!read_only)
718 err = clear_orphans(c);
719
720 return err;
721}
722
723#ifdef CONFIG_UBIFS_FS_DEBUG
724
725struct check_orphan {
726 struct rb_node rb;
727 ino_t inum;
728};
729
730struct check_info {
731 unsigned long last_ino;
732 unsigned long tot_inos;
733 unsigned long missing;
734 unsigned long long leaf_cnt;
735 struct ubifs_ino_node *node;
736 struct rb_root root;
737};
738
739static int dbg_find_orphan(struct ubifs_info *c, ino_t inum)
740{
741 struct ubifs_orphan *o;
742 struct rb_node *p;
743
744 spin_lock(&c->orphan_lock);
745 p = c->orph_tree.rb_node;
746 while (p) {
747 o = rb_entry(p, struct ubifs_orphan, rb);
748 if (inum < o->inum)
749 p = p->rb_left;
750 else if (inum > o->inum)
751 p = p->rb_right;
752 else {
753 spin_unlock(&c->orphan_lock);
754 return 1;
755 }
756 }
757 spin_unlock(&c->orphan_lock);
758 return 0;
759}
760
761static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum)
762{
763 struct check_orphan *orphan, *o;
764 struct rb_node **p, *parent = NULL;
765
766 orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS);
767 if (!orphan)
768 return -ENOMEM;
769 orphan->inum = inum;
770
771 p = &root->rb_node;
772 while (*p) {
773 parent = *p;
774 o = rb_entry(parent, struct check_orphan, rb);
775 if (inum < o->inum)
776 p = &(*p)->rb_left;
777 else if (inum > o->inum)
778 p = &(*p)->rb_right;
779 else {
780 kfree(orphan);
781 return 0;
782 }
783 }
784 rb_link_node(&orphan->rb, parent, p);
785 rb_insert_color(&orphan->rb, root);
786 return 0;
787}
788
789static int dbg_find_check_orphan(struct rb_root *root, ino_t inum)
790{
791 struct check_orphan *o;
792 struct rb_node *p;
793
794 p = root->rb_node;
795 while (p) {
796 o = rb_entry(p, struct check_orphan, rb);
797 if (inum < o->inum)
798 p = p->rb_left;
799 else if (inum > o->inum)
800 p = p->rb_right;
801 else
802 return 1;
803 }
804 return 0;
805}
806
807static void dbg_free_check_tree(struct rb_root *root)
808{
809 struct rb_node *this = root->rb_node;
810 struct check_orphan *o;
811
812 while (this) {
813 if (this->rb_left) {
814 this = this->rb_left;
815 continue;
816 } else if (this->rb_right) {
817 this = this->rb_right;
818 continue;
819 }
820 o = rb_entry(this, struct check_orphan, rb);
821 this = rb_parent(this);
822 if (this) {
823 if (this->rb_left == &o->rb)
824 this->rb_left = NULL;
825 else
826 this->rb_right = NULL;
827 }
828 kfree(o);
829 }
830}
831
832static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
833 void *priv)
834{
835 struct check_info *ci = priv;
836 ino_t inum;
837 int err;
838
839 inum = key_inum(c, &zbr->key);
840 if (inum != ci->last_ino) {
841 /* Lowest node type is the inode node, so it comes first */
842 if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
843 ubifs_err("found orphan node ino %lu, type %d", inum,
844 key_type(c, &zbr->key));
845 ci->last_ino = inum;
846 ci->tot_inos += 1;
847 err = ubifs_tnc_read_node(c, zbr, ci->node);
848 if (err) {
849 ubifs_err("node read failed, error %d", err);
850 return err;
851 }
852 if (ci->node->nlink == 0)
853 /* Must be recorded as an orphan */
854 if (!dbg_find_check_orphan(&ci->root, inum) &&
855 !dbg_find_orphan(c, inum)) {
856 ubifs_err("missing orphan, ino %lu", inum);
857 ci->missing += 1;
858 }
859 }
860 ci->leaf_cnt += 1;
861 return 0;
862}
863
864static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
865{
866 struct ubifs_scan_node *snod;
867 struct ubifs_orph_node *orph;
868 ino_t inum;
869 int i, n, err;
870
871 list_for_each_entry(snod, &sleb->nodes, list) {
872 cond_resched();
873 if (snod->type != UBIFS_ORPH_NODE)
874 continue;
875 orph = snod->node;
876 n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
877 for (i = 0; i < n; i++) {
878 inum = le64_to_cpu(orph->inos[i]);
879 err = dbg_ins_check_orphan(&ci->root, inum);
880 if (err)
881 return err;
882 }
883 }
884 return 0;
885}
886
887static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
888{
889 int lnum, err = 0;
890
891 /* Check no-orphans flag and skip this if no orphans */
892 if (c->no_orphs)
893 return 0;
894
895 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
896 struct ubifs_scan_leb *sleb;
897
898 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
899 if (IS_ERR(sleb)) {
900 err = PTR_ERR(sleb);
901 break;
902 }
903
904 err = dbg_read_orphans(ci, sleb);
905 ubifs_scan_destroy(sleb);
906 if (err)
907 break;
908 }
909
910 return err;
911}
912
913static int dbg_check_orphans(struct ubifs_info *c)
914{
915 struct check_info ci;
916 int err;
917
918 if (!(ubifs_chk_flags & UBIFS_CHK_ORPH))
919 return 0;
920
921 ci.last_ino = 0;
922 ci.tot_inos = 0;
923 ci.missing = 0;
924 ci.leaf_cnt = 0;
925 ci.root = RB_ROOT;
926 ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
927 if (!ci.node) {
928 ubifs_err("out of memory");
929 return -ENOMEM;
930 }
931
932 err = dbg_scan_orphans(c, &ci);
933 if (err)
934 goto out;
935
936 err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci);
937 if (err) {
938 ubifs_err("cannot scan TNC, error %d", err);
939 goto out;
940 }
941
942 if (ci.missing) {
943 ubifs_err("%lu missing orphan(s)", ci.missing);
944 err = -EINVAL;
945 goto out;
946 }
947
948 dbg_cmt("last inode number is %lu", ci.last_ino);
949 dbg_cmt("total number of inodes is %lu", ci.tot_inos);
950 dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt);
951
952out:
953 dbg_free_check_tree(&ci.root);
954 kfree(ci.node);
955 return err;
956}
957
958#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
new file mode 100644
index 000000000000..77d26c141cf6
--- /dev/null
+++ b/fs/ubifs/recovery.c
@@ -0,0 +1,1519 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements functions needed to recover from unclean un-mounts.
25 * When UBIFS is mounted, it checks a flag on the master node to determine if
26 * an un-mount was completed sucessfully. If not, the process of mounting
27 * incorparates additional checking and fixing of on-flash data structures.
28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
30 * read-only, and the flash is not modified in that case.
31 */
32
33#include <linux/crc32.h>
34#include "ubifs.h"
35
36/**
37 * is_empty - determine whether a buffer is empty (contains all 0xff).
38 * @buf: buffer to clean
39 * @len: length of buffer
40 *
41 * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
42 * %0 is returned.
43 */
44static int is_empty(void *buf, int len)
45{
46 uint8_t *p = buf;
47 int i;
48
49 for (i = 0; i < len; i++)
50 if (*p++ != 0xff)
51 return 0;
52 return 1;
53}
54
55/**
56 * get_master_node - get the last valid master node allowing for corruption.
57 * @c: UBIFS file-system description object
58 * @lnum: LEB number
59 * @pbuf: buffer containing the LEB read, is returned here
60 * @mst: master node, if found, is returned here
61 * @cor: corruption, if found, is returned here
62 *
63 * This function allocates a buffer, reads the LEB into it, and finds and
64 * returns the last valid master node allowing for one area of corruption.
65 * The corrupt area, if there is one, must be consistent with the assumption
66 * that it is the result of an unclean unmount while the master node was being
67 * written. Under those circumstances, it is valid to use the previously written
68 * master node.
69 *
70 * This function returns %0 on success and a negative error code on failure.
71 */
72static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
73 struct ubifs_mst_node **mst, void **cor)
74{
75 const int sz = c->mst_node_alsz;
76 int err, offs, len;
77 void *sbuf, *buf;
78
79 sbuf = vmalloc(c->leb_size);
80 if (!sbuf)
81 return -ENOMEM;
82
83 err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
84 if (err && err != -EBADMSG)
85 goto out_free;
86
87 /* Find the first position that is definitely not a node */
88 offs = 0;
89 buf = sbuf;
90 len = c->leb_size;
91 while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
92 struct ubifs_ch *ch = buf;
93
94 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
95 break;
96 offs += sz;
97 buf += sz;
98 len -= sz;
99 }
100 /* See if there was a valid master node before that */
101 if (offs) {
102 int ret;
103
104 offs -= sz;
105 buf -= sz;
106 len += sz;
107 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
108 if (ret != SCANNED_A_NODE && offs) {
109 /* Could have been corruption so check one place back */
110 offs -= sz;
111 buf -= sz;
112 len += sz;
113 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
114 if (ret != SCANNED_A_NODE)
115 /*
116 * We accept only one area of corruption because
117 * we are assuming that it was caused while
118 * trying to write a master node.
119 */
120 goto out_err;
121 }
122 if (ret == SCANNED_A_NODE) {
123 struct ubifs_ch *ch = buf;
124
125 if (ch->node_type != UBIFS_MST_NODE)
126 goto out_err;
127 dbg_rcvry("found a master node at %d:%d", lnum, offs);
128 *mst = buf;
129 offs += sz;
130 buf += sz;
131 len -= sz;
132 }
133 }
134 /* Check for corruption */
135 if (offs < c->leb_size) {
136 if (!is_empty(buf, min_t(int, len, sz))) {
137 *cor = buf;
138 dbg_rcvry("found corruption at %d:%d", lnum, offs);
139 }
140 offs += sz;
141 buf += sz;
142 len -= sz;
143 }
144 /* Check remaining empty space */
145 if (offs < c->leb_size)
146 if (!is_empty(buf, len))
147 goto out_err;
148 *pbuf = sbuf;
149 return 0;
150
151out_err:
152 err = -EINVAL;
153out_free:
154 vfree(sbuf);
155 *mst = NULL;
156 *cor = NULL;
157 return err;
158}
159
160/**
161 * write_rcvrd_mst_node - write recovered master node.
162 * @c: UBIFS file-system description object
163 * @mst: master node
164 *
165 * This function returns %0 on success and a negative error code on failure.
166 */
167static int write_rcvrd_mst_node(struct ubifs_info *c,
168 struct ubifs_mst_node *mst)
169{
170 int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
171 uint32_t save_flags;
172
173 dbg_rcvry("recovery");
174
175 save_flags = mst->flags;
176 mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY);
177
178 ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
179 err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
180 if (err)
181 goto out;
182 err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
183 if (err)
184 goto out;
185out:
186 mst->flags = save_flags;
187 return err;
188}
189
190/**
191 * ubifs_recover_master_node - recover the master node.
192 * @c: UBIFS file-system description object
193 *
194 * This function recovers the master node from corruption that may occur due to
195 * an unclean unmount.
196 *
197 * This function returns %0 on success and a negative error code on failure.
198 */
199int ubifs_recover_master_node(struct ubifs_info *c)
200{
201 void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
202 struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
203 const int sz = c->mst_node_alsz;
204 int err, offs1, offs2;
205
206 dbg_rcvry("recovery");
207
208 err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
209 if (err)
210 goto out_free;
211
212 err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
213 if (err)
214 goto out_free;
215
216 if (mst1) {
217 offs1 = (void *)mst1 - buf1;
218 if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
219 (offs1 == 0 && !cor1)) {
220 /*
221 * mst1 was written by recovery at offset 0 with no
222 * corruption.
223 */
224 dbg_rcvry("recovery recovery");
225 mst = mst1;
226 } else if (mst2) {
227 offs2 = (void *)mst2 - buf2;
228 if (offs1 == offs2) {
229 /* Same offset, so must be the same */
230 if (memcmp((void *)mst1 + UBIFS_CH_SZ,
231 (void *)mst2 + UBIFS_CH_SZ,
232 UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
233 goto out_err;
234 mst = mst1;
235 } else if (offs2 + sz == offs1) {
236 /* 1st LEB was written, 2nd was not */
237 if (cor1)
238 goto out_err;
239 mst = mst1;
240 } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
241 /* 1st LEB was unmapped and written, 2nd not */
242 if (cor1)
243 goto out_err;
244 mst = mst1;
245 } else
246 goto out_err;
247 } else {
248 /*
249 * 2nd LEB was unmapped and about to be written, so
250 * there must be only one master node in the first LEB
251 * and no corruption.
252 */
253 if (offs1 != 0 || cor1)
254 goto out_err;
255 mst = mst1;
256 }
257 } else {
258 if (!mst2)
259 goto out_err;
260 /*
261 * 1st LEB was unmapped and about to be written, so there must
262 * be no room left in 2nd LEB.
263 */
264 offs2 = (void *)mst2 - buf2;
265 if (offs2 + sz + sz <= c->leb_size)
266 goto out_err;
267 mst = mst2;
268 }
269
270 dbg_rcvry("recovered master node from LEB %d",
271 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
272
273 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
274
275 if ((c->vfs_sb->s_flags & MS_RDONLY)) {
276 /* Read-only mode. Keep a copy for switching to rw mode */
277 c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
278 if (!c->rcvrd_mst_node) {
279 err = -ENOMEM;
280 goto out_free;
281 }
282 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
283 } else {
284 /* Write the recovered master node */
285 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
286 err = write_rcvrd_mst_node(c, c->mst_node);
287 if (err)
288 goto out_free;
289 }
290
291 vfree(buf2);
292 vfree(buf1);
293
294 return 0;
295
296out_err:
297 err = -EINVAL;
298out_free:
299 ubifs_err("failed to recover master node");
300 if (mst1) {
301 dbg_err("dumping first master node");
302 dbg_dump_node(c, mst1);
303 }
304 if (mst2) {
305 dbg_err("dumping second master node");
306 dbg_dump_node(c, mst2);
307 }
308 vfree(buf2);
309 vfree(buf1);
310 return err;
311}
312
313/**
314 * ubifs_write_rcvrd_mst_node - write the recovered master node.
315 * @c: UBIFS file-system description object
316 *
317 * This function writes the master node that was recovered during mounting in
318 * read-only mode and must now be written because we are remounting rw.
319 *
320 * This function returns %0 on success and a negative error code on failure.
321 */
322int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
323{
324 int err;
325
326 if (!c->rcvrd_mst_node)
327 return 0;
328 c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
329 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
330 err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
331 if (err)
332 return err;
333 kfree(c->rcvrd_mst_node);
334 c->rcvrd_mst_node = NULL;
335 return 0;
336}
337
338/**
339 * is_last_write - determine if an offset was in the last write to a LEB.
340 * @c: UBIFS file-system description object
341 * @buf: buffer to check
342 * @offs: offset to check
343 *
344 * This function returns %1 if @offs was in the last write to the LEB whose data
345 * is in @buf, otherwise %0 is returned. The determination is made by checking
346 * for subsequent empty space starting from the next min_io_size boundary (or a
347 * bit less than the common header size if min_io_size is one).
348 */
349static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
350{
351 int empty_offs;
352 int check_len;
353 uint8_t *p;
354
355 if (c->min_io_size == 1) {
356 check_len = c->leb_size - offs;
357 p = buf + check_len;
358 for (; check_len > 0; check_len--)
359 if (*--p != 0xff)
360 break;
361 /*
362 * 'check_len' is the size of the corruption which cannot be
363 * more than the size of 1 node if it was caused by an unclean
364 * unmount.
365 */
366 if (check_len > UBIFS_MAX_NODE_SZ)
367 return 0;
368 return 1;
369 }
370
371 /*
372 * Round up to the next c->min_io_size boundary i.e. 'offs' is in the
373 * last wbuf written. After that should be empty space.
374 */
375 empty_offs = ALIGN(offs + 1, c->min_io_size);
376 check_len = c->leb_size - empty_offs;
377 p = buf + empty_offs - offs;
378
379 for (; check_len > 0; check_len--)
380 if (*p++ != 0xff)
381 return 0;
382 return 1;
383}
384
385/**
386 * clean_buf - clean the data from an LEB sitting in a buffer.
387 * @c: UBIFS file-system description object
388 * @buf: buffer to clean
389 * @lnum: LEB number to clean
390 * @offs: offset from which to clean
391 * @len: length of buffer
392 *
393 * This function pads up to the next min_io_size boundary (if there is one) and
394 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
395 * min_io_size boundary (if there is one).
396 */
397static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
398 int *offs, int *len)
399{
400 int empty_offs, pad_len;
401
402 lnum = lnum;
403 dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
404
405 if (c->min_io_size == 1) {
406 memset(*buf, 0xff, c->leb_size - *offs);
407 return;
408 }
409
410 ubifs_assert(!(*offs & 7));
411 empty_offs = ALIGN(*offs, c->min_io_size);
412 pad_len = empty_offs - *offs;
413 ubifs_pad(c, *buf, pad_len);
414 *offs += pad_len;
415 *buf += pad_len;
416 *len -= pad_len;
417 memset(*buf, 0xff, c->leb_size - empty_offs);
418}
419
420/**
421 * no_more_nodes - determine if there are no more nodes in a buffer.
422 * @c: UBIFS file-system description object
423 * @buf: buffer to check
424 * @len: length of buffer
425 * @lnum: LEB number of the LEB from which @buf was read
426 * @offs: offset from which @buf was read
427 *
428 * This function scans @buf for more nodes and returns %0 is a node is found and
429 * %1 if no more nodes are found.
430 */
431static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
432 int lnum, int offs)
433{
434 int skip, next_offs = 0;
435
436 if (len > UBIFS_DATA_NODE_SZ) {
437 struct ubifs_ch *ch = buf;
438 int dlen = le32_to_cpu(ch->len);
439
440 if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ &&
441 dlen <= UBIFS_MAX_DATA_NODE_SZ)
442 /* The corrupt node looks like a data node */
443 next_offs = ALIGN(offs + dlen, 8);
444 }
445
446 if (c->min_io_size == 1)
447 skip = 8;
448 else
449 skip = ALIGN(offs + 1, c->min_io_size) - offs;
450
451 offs += skip;
452 buf += skip;
453 len -= skip;
454 while (len > 8) {
455 struct ubifs_ch *ch = buf;
456 uint32_t magic = le32_to_cpu(ch->magic);
457 int ret;
458
459 if (magic == UBIFS_NODE_MAGIC) {
460 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
461 if (ret == SCANNED_A_NODE || ret > 0) {
462 /*
463 * There is a small chance this is just data in
464 * a data node, so check that possibility. e.g.
465 * this is part of a file that itself contains
466 * a UBIFS image.
467 */
468 if (next_offs && offs + le32_to_cpu(ch->len) <=
469 next_offs)
470 continue;
471 dbg_rcvry("unexpected node at %d:%d", lnum,
472 offs);
473 return 0;
474 }
475 }
476 offs += 8;
477 buf += 8;
478 len -= 8;
479 }
480 return 1;
481}
482
483/**
484 * fix_unclean_leb - fix an unclean LEB.
485 * @c: UBIFS file-system description object
486 * @sleb: scanned LEB information
487 * @start: offset where scan started
488 */
489static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
490 int start)
491{
492 int lnum = sleb->lnum, endpt = start;
493
494 /* Get the end offset of the last node we are keeping */
495 if (!list_empty(&sleb->nodes)) {
496 struct ubifs_scan_node *snod;
497
498 snod = list_entry(sleb->nodes.prev,
499 struct ubifs_scan_node, list);
500 endpt = snod->offs + snod->len;
501 }
502
503 if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
504 /* Add to recovery list */
505 struct ubifs_unclean_leb *ucleb;
506
507 dbg_rcvry("need to fix LEB %d start %d endpt %d",
508 lnum, start, sleb->endpt);
509 ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
510 if (!ucleb)
511 return -ENOMEM;
512 ucleb->lnum = lnum;
513 ucleb->endpt = endpt;
514 list_add_tail(&ucleb->list, &c->unclean_leb_list);
515 } else {
516 /* Write the fixed LEB back to flash */
517 int err;
518
519 dbg_rcvry("fixing LEB %d start %d endpt %d",
520 lnum, start, sleb->endpt);
521 if (endpt == 0) {
522 err = ubifs_leb_unmap(c, lnum);
523 if (err)
524 return err;
525 } else {
526 int len = ALIGN(endpt, c->min_io_size);
527
528 if (start) {
529 err = ubi_read(c->ubi, lnum, sleb->buf, 0,
530 start);
531 if (err)
532 return err;
533 }
534 /* Pad to min_io_size */
535 if (len > endpt) {
536 int pad_len = len - ALIGN(endpt, 8);
537
538 if (pad_len > 0) {
539 void *buf = sleb->buf + len - pad_len;
540
541 ubifs_pad(c, buf, pad_len);
542 }
543 }
544 err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
545 UBI_UNKNOWN);
546 if (err)
547 return err;
548 }
549 }
550 return 0;
551}
552
553/**
554 * drop_incomplete_group - drop nodes from an incomplete group.
555 * @sleb: scanned LEB information
556 * @offs: offset of dropped nodes is returned here
557 *
558 * This function returns %1 if nodes are dropped and %0 otherwise.
559 */
560static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
561{
562 int dropped = 0;
563
564 while (!list_empty(&sleb->nodes)) {
565 struct ubifs_scan_node *snod;
566 struct ubifs_ch *ch;
567
568 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
569 list);
570 ch = snod->node;
571 if (ch->group_type != UBIFS_IN_NODE_GROUP)
572 return dropped;
573 dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
574 *offs = snod->offs;
575 list_del(&snod->list);
576 kfree(snod);
577 sleb->nodes_cnt -= 1;
578 dropped = 1;
579 }
580 return dropped;
581}
582
583/**
584 * ubifs_recover_leb - scan and recover a LEB.
585 * @c: UBIFS file-system description object
586 * @lnum: LEB number
587 * @offs: offset
588 * @sbuf: LEB-sized buffer to use
589 * @grouped: nodes may be grouped for recovery
590 *
591 * This function does a scan of a LEB, but caters for errors that might have
592 * been caused by the unclean unmount from which we are attempting to recover.
593 *
594 * This function returns %0 on success and a negative error code on failure.
595 */
596struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
597 int offs, void *sbuf, int grouped)
598{
599 int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
600 int empty_chkd = 0, start = offs;
601 struct ubifs_scan_leb *sleb;
602 void *buf = sbuf + offs;
603
604 dbg_rcvry("%d:%d", lnum, offs);
605
606 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
607 if (IS_ERR(sleb))
608 return sleb;
609
610 if (sleb->ecc)
611 need_clean = 1;
612
613 while (len >= 8) {
614 int ret;
615
616 dbg_scan("look at LEB %d:%d (%d bytes left)",
617 lnum, offs, len);
618
619 cond_resched();
620
621 /*
622 * Scan quietly until there is an error from which we cannot
623 * recover
624 */
625 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
626
627 if (ret == SCANNED_A_NODE) {
628 /* A valid node, and not a padding node */
629 struct ubifs_ch *ch = buf;
630 int node_len;
631
632 err = ubifs_add_snod(c, sleb, buf, offs);
633 if (err)
634 goto error;
635 node_len = ALIGN(le32_to_cpu(ch->len), 8);
636 offs += node_len;
637 buf += node_len;
638 len -= node_len;
639 continue;
640 }
641
642 if (ret > 0) {
643 /* Padding bytes or a valid padding node */
644 offs += ret;
645 buf += ret;
646 len -= ret;
647 continue;
648 }
649
650 if (ret == SCANNED_EMPTY_SPACE) {
651 if (!is_empty(buf, len)) {
652 if (!is_last_write(c, buf, offs))
653 break;
654 clean_buf(c, &buf, lnum, &offs, &len);
655 need_clean = 1;
656 }
657 empty_chkd = 1;
658 break;
659 }
660
661 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
662 if (is_last_write(c, buf, offs)) {
663 clean_buf(c, &buf, lnum, &offs, &len);
664 need_clean = 1;
665 empty_chkd = 1;
666 break;
667 }
668
669 if (ret == SCANNED_A_CORRUPT_NODE)
670 if (no_more_nodes(c, buf, len, lnum, offs)) {
671 clean_buf(c, &buf, lnum, &offs, &len);
672 need_clean = 1;
673 empty_chkd = 1;
674 break;
675 }
676
677 if (quiet) {
678 /* Redo the last scan but noisily */
679 quiet = 0;
680 continue;
681 }
682
683 switch (ret) {
684 case SCANNED_GARBAGE:
685 dbg_err("garbage");
686 goto corrupted;
687 case SCANNED_A_CORRUPT_NODE:
688 case SCANNED_A_BAD_PAD_NODE:
689 dbg_err("bad node");
690 goto corrupted;
691 default:
692 dbg_err("unknown");
693 goto corrupted;
694 }
695 }
696
697 if (!empty_chkd && !is_empty(buf, len)) {
698 if (is_last_write(c, buf, offs)) {
699 clean_buf(c, &buf, lnum, &offs, &len);
700 need_clean = 1;
701 } else {
702 ubifs_err("corrupt empty space at LEB %d:%d",
703 lnum, offs);
704 goto corrupted;
705 }
706 }
707
708 /* Drop nodes from incomplete group */
709 if (grouped && drop_incomplete_group(sleb, &offs)) {
710 buf = sbuf + offs;
711 len = c->leb_size - offs;
712 clean_buf(c, &buf, lnum, &offs, &len);
713 need_clean = 1;
714 }
715
716 if (offs % c->min_io_size) {
717 clean_buf(c, &buf, lnum, &offs, &len);
718 need_clean = 1;
719 }
720
721 ubifs_end_scan(c, sleb, lnum, offs);
722
723 if (need_clean) {
724 err = fix_unclean_leb(c, sleb, start);
725 if (err)
726 goto error;
727 }
728
729 return sleb;
730
731corrupted:
732 ubifs_scanned_corruption(c, lnum, offs, buf);
733 err = -EUCLEAN;
734error:
735 ubifs_err("LEB %d scanning failed", lnum);
736 ubifs_scan_destroy(sleb);
737 return ERR_PTR(err);
738}
739
740/**
741 * get_cs_sqnum - get commit start sequence number.
742 * @c: UBIFS file-system description object
743 * @lnum: LEB number of commit start node
744 * @offs: offset of commit start node
745 * @cs_sqnum: commit start sequence number is returned here
746 *
747 * This function returns %0 on success and a negative error code on failure.
748 */
749static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
750 unsigned long long *cs_sqnum)
751{
752 struct ubifs_cs_node *cs_node = NULL;
753 int err, ret;
754
755 dbg_rcvry("at %d:%d", lnum, offs);
756 cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
757 if (!cs_node)
758 return -ENOMEM;
759 if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
760 goto out_err;
761 err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
762 if (err && err != -EBADMSG)
763 goto out_free;
764 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
765 if (ret != SCANNED_A_NODE) {
766 dbg_err("Not a valid node");
767 goto out_err;
768 }
769 if (cs_node->ch.node_type != UBIFS_CS_NODE) {
770 dbg_err("Node a CS node, type is %d", cs_node->ch.node_type);
771 goto out_err;
772 }
773 if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
774 dbg_err("CS node cmt_no %llu != current cmt_no %llu",
775 (unsigned long long)le64_to_cpu(cs_node->cmt_no),
776 c->cmt_no);
777 goto out_err;
778 }
779 *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
780 dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
781 kfree(cs_node);
782 return 0;
783
784out_err:
785 err = -EINVAL;
786out_free:
787 ubifs_err("failed to get CS sqnum");
788 kfree(cs_node);
789 return err;
790}
791
792/**
793 * ubifs_recover_log_leb - scan and recover a log LEB.
794 * @c: UBIFS file-system description object
795 * @lnum: LEB number
796 * @offs: offset
797 * @sbuf: LEB-sized buffer to use
798 *
799 * This function does a scan of a LEB, but caters for errors that might have
800 * been caused by the unclean unmount from which we are attempting to recover.
801 *
802 * This function returns %0 on success and a negative error code on failure.
803 */
804struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
805 int offs, void *sbuf)
806{
807 struct ubifs_scan_leb *sleb;
808 int next_lnum;
809
810 dbg_rcvry("LEB %d", lnum);
811 next_lnum = lnum + 1;
812 if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
813 next_lnum = UBIFS_LOG_LNUM;
814 if (next_lnum != c->ltail_lnum) {
815 /*
816 * We can only recover at the end of the log, so check that the
817 * next log LEB is empty or out of date.
818 */
819 sleb = ubifs_scan(c, next_lnum, 0, sbuf);
820 if (IS_ERR(sleb))
821 return sleb;
822 if (sleb->nodes_cnt) {
823 struct ubifs_scan_node *snod;
824 unsigned long long cs_sqnum = c->cs_sqnum;
825
826 snod = list_entry(sleb->nodes.next,
827 struct ubifs_scan_node, list);
828 if (cs_sqnum == 0) {
829 int err;
830
831 err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
832 if (err) {
833 ubifs_scan_destroy(sleb);
834 return ERR_PTR(err);
835 }
836 }
837 if (snod->sqnum > cs_sqnum) {
838 ubifs_err("unrecoverable log corruption "
839 "in LEB %d", lnum);
840 ubifs_scan_destroy(sleb);
841 return ERR_PTR(-EUCLEAN);
842 }
843 }
844 ubifs_scan_destroy(sleb);
845 }
846 return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
847}
848
849/**
850 * recover_head - recover a head.
851 * @c: UBIFS file-system description object
852 * @lnum: LEB number of head to recover
853 * @offs: offset of head to recover
854 * @sbuf: LEB-sized buffer to use
855 *
856 * This function ensures that there is no data on the flash at a head location.
857 *
858 * This function returns %0 on success and a negative error code on failure.
859 */
860static int recover_head(const struct ubifs_info *c, int lnum, int offs,
861 void *sbuf)
862{
863 int len, err, need_clean = 0;
864
865 if (c->min_io_size > 1)
866 len = c->min_io_size;
867 else
868 len = 512;
869 if (offs + len > c->leb_size)
870 len = c->leb_size - offs;
871
872 if (!len)
873 return 0;
874
875 /* Read at the head location and check it is empty flash */
876 err = ubi_read(c->ubi, lnum, sbuf, offs, len);
877 if (err)
878 need_clean = 1;
879 else {
880 uint8_t *p = sbuf;
881
882 while (len--)
883 if (*p++ != 0xff) {
884 need_clean = 1;
885 break;
886 }
887 }
888
889 if (need_clean) {
890 dbg_rcvry("cleaning head at %d:%d", lnum, offs);
891 if (offs == 0)
892 return ubifs_leb_unmap(c, lnum);
893 err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
894 if (err)
895 return err;
896 return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
897 }
898
899 return 0;
900}
901
902/**
903 * ubifs_recover_inl_heads - recover index and LPT heads.
904 * @c: UBIFS file-system description object
905 * @sbuf: LEB-sized buffer to use
906 *
907 * This function ensures that there is no data on the flash at the index and
908 * LPT head locations.
909 *
910 * This deals with the recovery of a half-completed journal commit. UBIFS is
911 * careful never to overwrite the last version of the index or the LPT. Because
912 * the index and LPT are wandering trees, data from a half-completed commit will
913 * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
914 * assumed to be empty and will be unmapped anyway before use, or in the index
915 * and LPT heads.
916 *
917 * This function returns %0 on success and a negative error code on failure.
918 */
919int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
920{
921 int err;
922
923 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
924
925 dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
926 err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
927 if (err)
928 return err;
929
930 dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
931 err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
932 if (err)
933 return err;
934
935 return 0;
936}
937
938/**
939 * clean_an_unclean_leb - read and write a LEB to remove corruption.
940 * @c: UBIFS file-system description object
941 * @ucleb: unclean LEB information
942 * @sbuf: LEB-sized buffer to use
943 *
944 * This function reads a LEB up to a point pre-determined by the mount recovery,
945 * checks the nodes, and writes the result back to the flash, thereby cleaning
946 * off any following corruption, or non-fatal ECC errors.
947 *
948 * This function returns %0 on success and a negative error code on failure.
949 */
950static int clean_an_unclean_leb(const struct ubifs_info *c,
951 struct ubifs_unclean_leb *ucleb, void *sbuf)
952{
953 int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
954 void *buf = sbuf;
955
956 dbg_rcvry("LEB %d len %d", lnum, len);
957
958 if (len == 0) {
959 /* Nothing to read, just unmap it */
960 err = ubifs_leb_unmap(c, lnum);
961 if (err)
962 return err;
963 return 0;
964 }
965
966 err = ubi_read(c->ubi, lnum, buf, offs, len);
967 if (err && err != -EBADMSG)
968 return err;
969
970 while (len >= 8) {
971 int ret;
972
973 cond_resched();
974
975 /* Scan quietly until there is an error */
976 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
977
978 if (ret == SCANNED_A_NODE) {
979 /* A valid node, and not a padding node */
980 struct ubifs_ch *ch = buf;
981 int node_len;
982
983 node_len = ALIGN(le32_to_cpu(ch->len), 8);
984 offs += node_len;
985 buf += node_len;
986 len -= node_len;
987 continue;
988 }
989
990 if (ret > 0) {
991 /* Padding bytes or a valid padding node */
992 offs += ret;
993 buf += ret;
994 len -= ret;
995 continue;
996 }
997
998 if (ret == SCANNED_EMPTY_SPACE) {
999 ubifs_err("unexpected empty space at %d:%d",
1000 lnum, offs);
1001 return -EUCLEAN;
1002 }
1003
1004 if (quiet) {
1005 /* Redo the last scan but noisily */
1006 quiet = 0;
1007 continue;
1008 }
1009
1010 ubifs_scanned_corruption(c, lnum, offs, buf);
1011 return -EUCLEAN;
1012 }
1013
1014 /* Pad to min_io_size */
1015 len = ALIGN(ucleb->endpt, c->min_io_size);
1016 if (len > ucleb->endpt) {
1017 int pad_len = len - ALIGN(ucleb->endpt, 8);
1018
1019 if (pad_len > 0) {
1020 buf = c->sbuf + len - pad_len;
1021 ubifs_pad(c, buf, pad_len);
1022 }
1023 }
1024
1025 /* Write back the LEB atomically */
1026 err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
1027 if (err)
1028 return err;
1029
1030 dbg_rcvry("cleaned LEB %d", lnum);
1031
1032 return 0;
1033}
1034
1035/**
1036 * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
1037 * @c: UBIFS file-system description object
1038 * @sbuf: LEB-sized buffer to use
1039 *
1040 * This function cleans a LEB identified during recovery that needs to be
1041 * written but was not because UBIFS was mounted read-only. This happens when
1042 * remounting to read-write mode.
1043 *
1044 * This function returns %0 on success and a negative error code on failure.
1045 */
1046int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1047{
1048 dbg_rcvry("recovery");
1049 while (!list_empty(&c->unclean_leb_list)) {
1050 struct ubifs_unclean_leb *ucleb;
1051 int err;
1052
1053 ucleb = list_entry(c->unclean_leb_list.next,
1054 struct ubifs_unclean_leb, list);
1055 err = clean_an_unclean_leb(c, ucleb, sbuf);
1056 if (err)
1057 return err;
1058 list_del(&ucleb->list);
1059 kfree(ucleb);
1060 }
1061 return 0;
1062}
1063
1064/**
1065 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1066 * @c: UBIFS file-system description object
1067 *
1068 * Out-of-place garbage collection requires always one empty LEB with which to
1069 * start garbage collection. The LEB number is recorded in c->gc_lnum and is
1070 * written to the master node on unmounting. In the case of an unclean unmount
1071 * the value of gc_lnum recorded in the master node is out of date and cannot
1072 * be used. Instead, recovery must allocate an empty LEB for this purpose.
1073 * However, there may not be enough empty space, in which case it must be
1074 * possible to GC the dirtiest LEB into the GC head LEB.
1075 *
1076 * This function also runs the commit which causes the TNC updates from
1077 * size-recovery and orphans to be written to the flash. That is important to
1078 * ensure correct replay order for subsequent mounts.
1079 *
1080 * This function returns %0 on success and a negative error code on failure.
1081 */
1082int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1083{
1084 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1085 struct ubifs_lprops lp;
1086 int lnum, err;
1087
1088 c->gc_lnum = -1;
1089 if (wbuf->lnum == -1) {
1090 dbg_rcvry("no GC head LEB");
1091 goto find_free;
1092 }
1093 /*
1094 * See whether the used space in the dirtiest LEB fits in the GC head
1095 * LEB.
1096 */
1097 if (wbuf->offs == c->leb_size) {
1098 dbg_rcvry("no room in GC head LEB");
1099 goto find_free;
1100 }
1101 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1102 if (err) {
1103 if (err == -ENOSPC)
1104 dbg_err("could not find a dirty LEB");
1105 return err;
1106 }
1107 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1108 lnum = lp.lnum;
1109 if (lp.free + lp.dirty == c->leb_size) {
1110 /* An empty LEB was returned */
1111 if (lp.free != c->leb_size) {
1112 err = ubifs_change_one_lp(c, lnum, c->leb_size,
1113 0, 0, 0, 0);
1114 if (err)
1115 return err;
1116 }
1117 err = ubifs_leb_unmap(c, lnum);
1118 if (err)
1119 return err;
1120 c->gc_lnum = lnum;
1121 dbg_rcvry("allocated LEB %d for GC", lnum);
1122 /* Run the commit */
1123 dbg_rcvry("committing");
1124 return ubifs_run_commit(c);
1125 }
1126 /*
1127 * There was no empty LEB so the used space in the dirtiest LEB must fit
1128 * in the GC head LEB.
1129 */
1130 if (lp.free + lp.dirty < wbuf->offs) {
1131 dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1132 lnum, wbuf->lnum, wbuf->offs);
1133 err = ubifs_return_leb(c, lnum);
1134 if (err)
1135 return err;
1136 goto find_free;
1137 }
1138 /*
1139 * We run the commit before garbage collection otherwise subsequent
1140 * mounts will see the GC and orphan deletion in a different order.
1141 */
1142 dbg_rcvry("committing");
1143 err = ubifs_run_commit(c);
1144 if (err)
1145 return err;
1146 /*
1147 * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
1148 * - use locking to keep 'ubifs_assert()' happy.
1149 */
1150 dbg_rcvry("GC'ing LEB %d", lnum);
1151 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1152 err = ubifs_garbage_collect_leb(c, &lp);
1153 if (err >= 0) {
1154 int err2 = ubifs_wbuf_sync_nolock(wbuf);
1155
1156 if (err2)
1157 err = err2;
1158 }
1159 mutex_unlock(&wbuf->io_mutex);
1160 if (err < 0) {
1161 dbg_err("GC failed, error %d", err);
1162 if (err == -EAGAIN)
1163 err = -EINVAL;
1164 return err;
1165 }
1166 if (err != LEB_RETAINED) {
1167 dbg_err("GC returned %d", err);
1168 return -EINVAL;
1169 }
1170 err = ubifs_leb_unmap(c, c->gc_lnum);
1171 if (err)
1172 return err;
1173 dbg_rcvry("allocated LEB %d for GC", lnum);
1174 return 0;
1175
1176find_free:
1177 /*
1178 * There is no GC head LEB or the free space in the GC head LEB is too
1179 * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
1180 * GC is not run.
1181 */
1182 lnum = ubifs_find_free_leb_for_idx(c);
1183 if (lnum < 0) {
1184 dbg_err("could not find an empty LEB");
1185 return lnum;
1186 }
1187 /* And reset the index flag */
1188 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1189 LPROPS_INDEX, 0);
1190 if (err)
1191 return err;
1192 c->gc_lnum = lnum;
1193 dbg_rcvry("allocated LEB %d for GC", lnum);
1194 /* Run the commit */
1195 dbg_rcvry("committing");
1196 return ubifs_run_commit(c);
1197}
1198
1199/**
1200 * struct size_entry - inode size information for recovery.
1201 * @rb: link in the RB-tree of sizes
1202 * @inum: inode number
1203 * @i_size: size on inode
1204 * @d_size: maximum size based on data nodes
1205 * @exists: indicates whether the inode exists
1206 * @inode: inode if pinned in memory awaiting rw mode to fix it
1207 */
1208struct size_entry {
1209 struct rb_node rb;
1210 ino_t inum;
1211 loff_t i_size;
1212 loff_t d_size;
1213 int exists;
1214 struct inode *inode;
1215};
1216
1217/**
1218 * add_ino - add an entry to the size tree.
1219 * @c: UBIFS file-system description object
1220 * @inum: inode number
1221 * @i_size: size on inode
1222 * @d_size: maximum size based on data nodes
1223 * @exists: indicates whether the inode exists
1224 */
1225static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
1226 loff_t d_size, int exists)
1227{
1228 struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1229 struct size_entry *e;
1230
1231 while (*p) {
1232 parent = *p;
1233 e = rb_entry(parent, struct size_entry, rb);
1234 if (inum < e->inum)
1235 p = &(*p)->rb_left;
1236 else
1237 p = &(*p)->rb_right;
1238 }
1239
1240 e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1241 if (!e)
1242 return -ENOMEM;
1243
1244 e->inum = inum;
1245 e->i_size = i_size;
1246 e->d_size = d_size;
1247 e->exists = exists;
1248
1249 rb_link_node(&e->rb, parent, p);
1250 rb_insert_color(&e->rb, &c->size_tree);
1251
1252 return 0;
1253}
1254
1255/**
1256 * find_ino - find an entry on the size tree.
1257 * @c: UBIFS file-system description object
1258 * @inum: inode number
1259 */
1260static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1261{
1262 struct rb_node *p = c->size_tree.rb_node;
1263 struct size_entry *e;
1264
1265 while (p) {
1266 e = rb_entry(p, struct size_entry, rb);
1267 if (inum < e->inum)
1268 p = p->rb_left;
1269 else if (inum > e->inum)
1270 p = p->rb_right;
1271 else
1272 return e;
1273 }
1274 return NULL;
1275}
1276
1277/**
1278 * remove_ino - remove an entry from the size tree.
1279 * @c: UBIFS file-system description object
1280 * @inum: inode number
1281 */
1282static void remove_ino(struct ubifs_info *c, ino_t inum)
1283{
1284 struct size_entry *e = find_ino(c, inum);
1285
1286 if (!e)
1287 return;
1288 rb_erase(&e->rb, &c->size_tree);
1289 kfree(e);
1290}
1291
1292/**
1293 * ubifs_destroy_size_tree - free resources related to the size tree.
1294 * @c: UBIFS file-system description object
1295 */
1296void ubifs_destroy_size_tree(struct ubifs_info *c)
1297{
1298 struct rb_node *this = c->size_tree.rb_node;
1299 struct size_entry *e;
1300
1301 while (this) {
1302 if (this->rb_left) {
1303 this = this->rb_left;
1304 continue;
1305 } else if (this->rb_right) {
1306 this = this->rb_right;
1307 continue;
1308 }
1309 e = rb_entry(this, struct size_entry, rb);
1310 if (e->inode)
1311 iput(e->inode);
1312 this = rb_parent(this);
1313 if (this) {
1314 if (this->rb_left == &e->rb)
1315 this->rb_left = NULL;
1316 else
1317 this->rb_right = NULL;
1318 }
1319 kfree(e);
1320 }
1321 c->size_tree = RB_ROOT;
1322}
1323
1324/**
1325 * ubifs_recover_size_accum - accumulate inode sizes for recovery.
1326 * @c: UBIFS file-system description object
1327 * @key: node key
1328 * @deletion: node is for a deletion
1329 * @new_size: inode size
1330 *
1331 * This function has two purposes:
1332 * 1) to ensure there are no data nodes that fall outside the inode size
1333 * 2) to ensure there are no data nodes for inodes that do not exist
1334 * To accomplish those purposes, a rb-tree is constructed containing an entry
1335 * for each inode number in the journal that has not been deleted, and recording
1336 * the size from the inode node, the maximum size of any data node (also altered
1337 * by truncations) and a flag indicating a inode number for which no inode node
1338 * was present in the journal.
1339 *
1340 * Note that there is still the possibility that there are data nodes that have
1341 * been committed that are beyond the inode size, however the only way to find
1342 * them would be to scan the entire index. Alternatively, some provision could
1343 * be made to record the size of inodes at the start of commit, which would seem
1344 * very cumbersome for a scenario that is quite unlikely and the only negative
1345 * consequence of which is wasted space.
1346 *
1347 * This functions returns %0 on success and a negative error code on failure.
1348 */
1349int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1350 int deletion, loff_t new_size)
1351{
1352 ino_t inum = key_inum(c, key);
1353 struct size_entry *e;
1354 int err;
1355
1356 switch (key_type(c, key)) {
1357 case UBIFS_INO_KEY:
1358 if (deletion)
1359 remove_ino(c, inum);
1360 else {
1361 e = find_ino(c, inum);
1362 if (e) {
1363 e->i_size = new_size;
1364 e->exists = 1;
1365 } else {
1366 err = add_ino(c, inum, new_size, 0, 1);
1367 if (err)
1368 return err;
1369 }
1370 }
1371 break;
1372 case UBIFS_DATA_KEY:
1373 e = find_ino(c, inum);
1374 if (e) {
1375 if (new_size > e->d_size)
1376 e->d_size = new_size;
1377 } else {
1378 err = add_ino(c, inum, 0, new_size, 0);
1379 if (err)
1380 return err;
1381 }
1382 break;
1383 case UBIFS_TRUN_KEY:
1384 e = find_ino(c, inum);
1385 if (e)
1386 e->d_size = new_size;
1387 break;
1388 }
1389 return 0;
1390}
1391
1392/**
1393 * fix_size_in_place - fix inode size in place on flash.
1394 * @c: UBIFS file-system description object
1395 * @e: inode size information for recovery
1396 */
1397static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1398{
1399 struct ubifs_ino_node *ino = c->sbuf;
1400 unsigned char *p;
1401 union ubifs_key key;
1402 int err, lnum, offs, len;
1403 loff_t i_size;
1404 uint32_t crc;
1405
1406 /* Locate the inode node LEB number and offset */
1407 ino_key_init(c, &key, e->inum);
1408 err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
1409 if (err)
1410 goto out;
1411 /*
1412 * If the size recorded on the inode node is greater than the size that
1413 * was calculated from nodes in the journal then don't change the inode.
1414 */
1415 i_size = le64_to_cpu(ino->size);
1416 if (i_size >= e->d_size)
1417 return 0;
1418 /* Read the LEB */
1419 err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
1420 if (err)
1421 goto out;
1422 /* Change the size field and recalculate the CRC */
1423 ino = c->sbuf + offs;
1424 ino->size = cpu_to_le64(e->d_size);
1425 len = le32_to_cpu(ino->ch.len);
1426 crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
1427 ino->ch.crc = cpu_to_le32(crc);
1428 /* Work out where data in the LEB ends and free space begins */
1429 p = c->sbuf;
1430 len = c->leb_size - 1;
1431 while (p[len] == 0xff)
1432 len -= 1;
1433 len = ALIGN(len + 1, c->min_io_size);
1434 /* Atomically write the fixed LEB back again */
1435 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1436 if (err)
1437 goto out;
1438 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs,
1439 i_size, e->d_size);
1440 return 0;
1441
1442out:
1443 ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
1444 e->inum, e->i_size, e->d_size, err);
1445 return err;
1446}
1447
1448/**
1449 * ubifs_recover_size - recover inode size.
1450 * @c: UBIFS file-system description object
1451 *
1452 * This function attempts to fix inode size discrepancies identified by the
1453 * 'ubifs_recover_size_accum()' function.
1454 *
1455 * This functions returns %0 on success and a negative error code on failure.
1456 */
1457int ubifs_recover_size(struct ubifs_info *c)
1458{
1459 struct rb_node *this = rb_first(&c->size_tree);
1460
1461 while (this) {
1462 struct size_entry *e;
1463 int err;
1464
1465 e = rb_entry(this, struct size_entry, rb);
1466 if (!e->exists) {
1467 union ubifs_key key;
1468
1469 ino_key_init(c, &key, e->inum);
1470 err = ubifs_tnc_lookup(c, &key, c->sbuf);
1471 if (err && err != -ENOENT)
1472 return err;
1473 if (err == -ENOENT) {
1474 /* Remove data nodes that have no inode */
1475 dbg_rcvry("removing ino %lu", e->inum);
1476 err = ubifs_tnc_remove_ino(c, e->inum);
1477 if (err)
1478 return err;
1479 } else {
1480 struct ubifs_ino_node *ino = c->sbuf;
1481
1482 e->exists = 1;
1483 e->i_size = le64_to_cpu(ino->size);
1484 }
1485 }
1486 if (e->exists && e->i_size < e->d_size) {
1487 if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
1488 /* Fix the inode size and pin it in memory */
1489 struct inode *inode;
1490
1491 inode = ubifs_iget(c->vfs_sb, e->inum);
1492 if (IS_ERR(inode))
1493 return PTR_ERR(inode);
1494 if (inode->i_size < e->d_size) {
1495 dbg_rcvry("ino %lu size %lld -> %lld",
1496 e->inum, e->d_size,
1497 inode->i_size);
1498 inode->i_size = e->d_size;
1499 ubifs_inode(inode)->ui_size = e->d_size;
1500 e->inode = inode;
1501 this = rb_next(this);
1502 continue;
1503 }
1504 iput(inode);
1505 } else {
1506 /* Fix the size in place */
1507 err = fix_size_in_place(c, e);
1508 if (err)
1509 return err;
1510 if (e->inode)
1511 iput(e->inode);
1512 }
1513 }
1514 this = rb_next(this);
1515 rb_erase(&e->rb, &c->size_tree);
1516 kfree(e);
1517 }
1518 return 0;
1519}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
new file mode 100644
index 000000000000..7399692af859
--- /dev/null
+++ b/fs/ubifs/replay.c
@@ -0,0 +1,1075 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file contains journal replay code. It runs when the file-system is being
25 * mounted and requires no locking.
26 *
27 * The larger is the journal, the longer it takes to scan it, so the longer it
28 * takes to mount UBIFS. This is why the journal has limited size which may be
29 * changed depending on the system requirements. But a larger journal gives
30 * faster I/O speed because it writes the index less frequently. So this is a
31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
32 * larger is the journal, the more memory its index may consume.
33 */
34
35#include "ubifs.h"
36
37/*
38 * Replay flags.
39 *
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
42 */
43enum {
44 REPLAY_DELETION = 1,
45 REPLAY_REF = 2,
46};
47
48/**
49 * struct replay_entry - replay tree entry.
50 * @lnum: logical eraseblock number of the node
51 * @offs: node offset
52 * @len: node length
53 * @sqnum: node sequence number
54 * @flags: replay flags
55 * @rb: links the replay tree
56 * @key: node key
57 * @nm: directory entry name
58 * @old_size: truncation old size
59 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 *
63 * UBIFS journal replay must compare node sequence numbers, which means it must
64 * build a tree of node information to insert into the TNC.
65 */
66struct replay_entry {
67 int lnum;
68 int offs;
69 int len;
70 unsigned long long sqnum;
71 int flags;
72 struct rb_node rb;
73 union ubifs_key key;
74 union {
75 struct qstr nm;
76 struct {
77 loff_t old_size;
78 loff_t new_size;
79 };
80 struct {
81 int free;
82 int dirty;
83 };
84 };
85};
86
87/**
88 * struct bud_entry - entry in the list of buds to replay.
89 * @list: next bud in the list
90 * @bud: bud description object
91 * @free: free bytes in the bud
92 * @sqnum: reference node sequence number
93 */
94struct bud_entry {
95 struct list_head list;
96 struct ubifs_bud *bud;
97 int free;
98 unsigned long long sqnum;
99};
100
101/**
102 * set_bud_lprops - set free and dirty space used by a bud.
103 * @c: UBIFS file-system description object
104 * @r: replay entry of bud
105 */
106static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
107{
108 const struct ubifs_lprops *lp;
109 int err = 0, dirty;
110
111 ubifs_get_lprops(c);
112
113 lp = ubifs_lpt_lookup_dirty(c, r->lnum);
114 if (IS_ERR(lp)) {
115 err = PTR_ERR(lp);
116 goto out;
117 }
118
119 dirty = lp->dirty;
120 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
121 /*
122 * The LEB was added to the journal with a starting offset of
123 * zero which means the LEB must have been empty. The LEB
124 * property values should be lp->free == c->leb_size and
125 * lp->dirty == 0, but that is not the case. The reason is that
126 * the LEB was garbage collected. The garbage collector resets
127 * the free and dirty space without recording it anywhere except
128 * lprops, so if there is not a commit then lprops does not have
129 * that information next time the file system is mounted.
130 *
131 * We do not need to adjust free space because the scan has told
132 * us the exact value which is recorded in the replay entry as
133 * r->free.
134 *
135 * However we do need to subtract from the dirty space the
136 * amount of space that the garbage collector reclaimed, which
137 * is the whole LEB minus the amount of space that was free.
138 */
139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
140 lp->free, lp->dirty);
141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
142 lp->free, lp->dirty);
143 dirty -= c->leb_size - lp->free;
144 /*
145 * If the replay order was perfect the dirty space would now be
146 * zero. The order is not perfect because the the journal heads
147 * race with eachother. This is not a problem but is does mean
148 * that the dirty space may temporarily exceed c->leb_size
149 * during the replay.
150 */
151 if (dirty != 0)
152 dbg_msg("LEB %d lp: %d free %d dirty "
153 "replay: %d free %d dirty", r->lnum, lp->free,
154 lp->dirty, r->free, r->dirty);
155 }
156 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
157 lp->flags | LPROPS_TAKEN, 0);
158 if (IS_ERR(lp)) {
159 err = PTR_ERR(lp);
160 goto out;
161 }
162out:
163 ubifs_release_lprops(c);
164 return err;
165}
166
167/**
168 * trun_remove_range - apply a replay entry for a truncation to the TNC.
169 * @c: UBIFS file-system description object
170 * @r: replay entry of truncation
171 */
172static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
173{
174 unsigned min_blk, max_blk;
175 union ubifs_key min_key, max_key;
176 ino_t ino;
177
178 min_blk = r->new_size / UBIFS_BLOCK_SIZE;
179 if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
180 min_blk += 1;
181
182 max_blk = r->old_size / UBIFS_BLOCK_SIZE;
183 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
184 max_blk -= 1;
185
186 ino = key_inum(c, &r->key);
187
188 data_key_init(c, &min_key, ino, min_blk);
189 data_key_init(c, &max_key, ino, max_blk);
190
191 return ubifs_tnc_remove_range(c, &min_key, &max_key);
192}
193
194/**
195 * apply_replay_entry - apply a replay entry to the TNC.
196 * @c: UBIFS file-system description object
197 * @r: replay entry to apply
198 *
199 * Apply a replay entry to the TNC.
200 */
201static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
202{
203 int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
204
205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
206 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
207
208 /* Set c->replay_sqnum to help deal with dangling branches. */
209 c->replay_sqnum = r->sqnum;
210
211 if (r->flags & REPLAY_REF)
212 err = set_bud_lprops(c, r);
213 else if (is_hash_key(c, &r->key)) {
214 if (deletion)
215 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
216 else
217 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
218 r->len, &r->nm);
219 } else {
220 if (deletion)
221 switch (key_type(c, &r->key)) {
222 case UBIFS_INO_KEY:
223 {
224 ino_t inum = key_inum(c, &r->key);
225
226 err = ubifs_tnc_remove_ino(c, inum);
227 break;
228 }
229 case UBIFS_TRUN_KEY:
230 err = trun_remove_range(c, r);
231 break;
232 default:
233 err = ubifs_tnc_remove(c, &r->key);
234 break;
235 }
236 else
237 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
238 r->len);
239 if (err)
240 return err;
241
242 if (c->need_recovery)
243 err = ubifs_recover_size_accum(c, &r->key, deletion,
244 r->new_size);
245 }
246
247 return err;
248}
249
250/**
251 * destroy_replay_tree - destroy the replay.
252 * @c: UBIFS file-system description object
253 *
254 * Destroy the replay tree.
255 */
256static void destroy_replay_tree(struct ubifs_info *c)
257{
258 struct rb_node *this = c->replay_tree.rb_node;
259 struct replay_entry *r;
260
261 while (this) {
262 if (this->rb_left) {
263 this = this->rb_left;
264 continue;
265 } else if (this->rb_right) {
266 this = this->rb_right;
267 continue;
268 }
269 r = rb_entry(this, struct replay_entry, rb);
270 this = rb_parent(this);
271 if (this) {
272 if (this->rb_left == &r->rb)
273 this->rb_left = NULL;
274 else
275 this->rb_right = NULL;
276 }
277 if (is_hash_key(c, &r->key))
278 kfree(r->nm.name);
279 kfree(r);
280 }
281 c->replay_tree = RB_ROOT;
282}
283
284/**
285 * apply_replay_tree - apply the replay tree to the TNC.
286 * @c: UBIFS file-system description object
287 *
288 * Apply the replay tree.
289 * Returns zero in case of success and a negative error code in case of
290 * failure.
291 */
292static int apply_replay_tree(struct ubifs_info *c)
293{
294 struct rb_node *this = rb_first(&c->replay_tree);
295
296 while (this) {
297 struct replay_entry *r;
298 int err;
299
300 cond_resched();
301
302 r = rb_entry(this, struct replay_entry, rb);
303 err = apply_replay_entry(c, r);
304 if (err)
305 return err;
306 this = rb_next(this);
307 }
308 return 0;
309}
310
311/**
312 * insert_node - insert a node to the replay tree.
313 * @c: UBIFS file-system description object
314 * @lnum: node logical eraseblock number
315 * @offs: node offset
316 * @len: node length
317 * @key: node key
318 * @sqnum: sequence number
319 * @deletion: non-zero if this is a deletion
320 * @used: number of bytes in use in a LEB
321 * @old_size: truncation old size
322 * @new_size: truncation new size
323 *
324 * This function inserts a scanned non-direntry node to the replay tree. The
325 * replay tree is an RB-tree containing @struct replay_entry elements which are
326 * indexed by the sequence number. The replay tree is applied at the very end
327 * of the replay process. Since the tree is sorted in sequence number order,
328 * the older modifications are applied first. This function returns zero in
329 * case of success and a negative error code in case of failure.
330 */
331static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
332 union ubifs_key *key, unsigned long long sqnum,
333 int deletion, int *used, loff_t old_size,
334 loff_t new_size)
335{
336 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
337 struct replay_entry *r;
338
339 if (key_inum(c, key) >= c->highest_inum)
340 c->highest_inum = key_inum(c, key);
341
342 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
343 while (*p) {
344 parent = *p;
345 r = rb_entry(parent, struct replay_entry, rb);
346 if (sqnum < r->sqnum) {
347 p = &(*p)->rb_left;
348 continue;
349 } else if (sqnum > r->sqnum) {
350 p = &(*p)->rb_right;
351 continue;
352 }
353 ubifs_err("duplicate sqnum in replay");
354 return -EINVAL;
355 }
356
357 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
358 if (!r)
359 return -ENOMEM;
360
361 if (!deletion)
362 *used += ALIGN(len, 8);
363 r->lnum = lnum;
364 r->offs = offs;
365 r->len = len;
366 r->sqnum = sqnum;
367 r->flags = (deletion ? REPLAY_DELETION : 0);
368 r->old_size = old_size;
369 r->new_size = new_size;
370 key_copy(c, key, &r->key);
371
372 rb_link_node(&r->rb, parent, p);
373 rb_insert_color(&r->rb, &c->replay_tree);
374 return 0;
375}
376
377/**
378 * insert_dent - insert a directory entry node into the replay tree.
379 * @c: UBIFS file-system description object
380 * @lnum: node logical eraseblock number
381 * @offs: node offset
382 * @len: node length
383 * @key: node key
384 * @name: directory entry name
385 * @nlen: directory entry name length
386 * @sqnum: sequence number
387 * @deletion: non-zero if this is a deletion
388 * @used: number of bytes in use in a LEB
389 *
390 * This function inserts a scanned directory entry node to the replay tree.
391 * Returns zero in case of success and a negative error code in case of
392 * failure.
393 *
394 * This function is also used for extended attribute entries because they are
395 * implemented as directory entry nodes.
396 */
397static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
398 union ubifs_key *key, const char *name, int nlen,
399 unsigned long long sqnum, int deletion, int *used)
400{
401 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
402 struct replay_entry *r;
403 char *nbuf;
404
405 if (key_inum(c, key) >= c->highest_inum)
406 c->highest_inum = key_inum(c, key);
407
408 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
409 while (*p) {
410 parent = *p;
411 r = rb_entry(parent, struct replay_entry, rb);
412 if (sqnum < r->sqnum) {
413 p = &(*p)->rb_left;
414 continue;
415 }
416 if (sqnum > r->sqnum) {
417 p = &(*p)->rb_right;
418 continue;
419 }
420 ubifs_err("duplicate sqnum in replay");
421 return -EINVAL;
422 }
423
424 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
425 if (!r)
426 return -ENOMEM;
427 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
428 if (!nbuf) {
429 kfree(r);
430 return -ENOMEM;
431 }
432
433 if (!deletion)
434 *used += ALIGN(len, 8);
435 r->lnum = lnum;
436 r->offs = offs;
437 r->len = len;
438 r->sqnum = sqnum;
439 r->nm.len = nlen;
440 memcpy(nbuf, name, nlen);
441 nbuf[nlen] = '\0';
442 r->nm.name = nbuf;
443 r->flags = (deletion ? REPLAY_DELETION : 0);
444 key_copy(c, key, &r->key);
445
446 ubifs_assert(!*p);
447 rb_link_node(&r->rb, parent, p);
448 rb_insert_color(&r->rb, &c->replay_tree);
449 return 0;
450}
451
452/**
453 * ubifs_validate_entry - validate directory or extended attribute entry node.
454 * @c: UBIFS file-system description object
455 * @dent: the node to validate
456 *
457 * This function validates directory or extended attribute entry node @dent.
458 * Returns zero if the node is all right and a %-EINVAL if not.
459 */
460int ubifs_validate_entry(struct ubifs_info *c,
461 const struct ubifs_dent_node *dent)
462{
463 int key_type = key_type_flash(c, dent->key);
464 int nlen = le16_to_cpu(dent->nlen);
465
466 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
467 dent->type >= UBIFS_ITYPES_CNT ||
468 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
469 strnlen(dent->name, nlen) != nlen ||
470 le64_to_cpu(dent->inum) > MAX_INUM) {
471 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
472 "directory entry" : "extended attribute entry");
473 return -EINVAL;
474 }
475
476 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
477 ubifs_err("bad key type %d", key_type);
478 return -EINVAL;
479 }
480
481 return 0;
482}
483
484/**
485 * replay_bud - replay a bud logical eraseblock.
486 * @c: UBIFS file-system description object
487 * @lnum: bud logical eraseblock number to replay
488 * @offs: bud start offset
489 * @jhead: journal head to which this bud belongs
490 * @free: amount of free space in the bud is returned here
491 * @dirty: amount of dirty space from padding and deletion nodes is returned
492 * here
493 *
494 * This function returns zero in case of success and a negative error code in
495 * case of failure.
496 */
497static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
498 int *free, int *dirty)
499{
500 int err = 0, used = 0;
501 struct ubifs_scan_leb *sleb;
502 struct ubifs_scan_node *snod;
503 struct ubifs_bud *bud;
504
505 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
506 if (c->need_recovery)
507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
508 else
509 sleb = ubifs_scan(c, lnum, offs, c->sbuf);
510 if (IS_ERR(sleb))
511 return PTR_ERR(sleb);
512
513 /*
514 * The bud does not have to start from offset zero - the beginning of
515 * the 'lnum' LEB may contain previously committed data. One of the
516 * things we have to do in replay is to correctly update lprops with
517 * newer information about this LEB.
518 *
519 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
520 * bytes of free space because it only contain information about
521 * committed data.
522 *
523 * But we know that real amount of free space is 'c->leb_size -
524 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
525 * 'sleb->endpt' is used by bud data. We have to correctly calculate
526 * how much of these data are dirty and update lprops with this
527 * information.
528 *
529 * The dirt in that LEB region is comprised of padding nodes, deletion
530 * nodes, truncation nodes and nodes which are obsoleted by subsequent
531 * nodes in this LEB. So instead of calculating clean space, we
532 * calculate used space ('used' variable).
533 */
534
535 list_for_each_entry(snod, &sleb->nodes, list) {
536 int deletion = 0;
537
538 cond_resched();
539
540 if (snod->sqnum >= SQNUM_WATERMARK) {
541 ubifs_err("file system's life ended");
542 goto out_dump;
543 }
544
545 if (snod->sqnum > c->max_sqnum)
546 c->max_sqnum = snod->sqnum;
547
548 switch (snod->type) {
549 case UBIFS_INO_NODE:
550 {
551 struct ubifs_ino_node *ino = snod->node;
552 loff_t new_size = le64_to_cpu(ino->size);
553
554 if (le32_to_cpu(ino->nlink) == 0)
555 deletion = 1;
556 err = insert_node(c, lnum, snod->offs, snod->len,
557 &snod->key, snod->sqnum, deletion,
558 &used, 0, new_size);
559 break;
560 }
561 case UBIFS_DATA_NODE:
562 {
563 struct ubifs_data_node *dn = snod->node;
564 loff_t new_size = le32_to_cpu(dn->size) +
565 key_block(c, &snod->key) *
566 UBIFS_BLOCK_SIZE;
567
568 err = insert_node(c, lnum, snod->offs, snod->len,
569 &snod->key, snod->sqnum, deletion,
570 &used, 0, new_size);
571 break;
572 }
573 case UBIFS_DENT_NODE:
574 case UBIFS_XENT_NODE:
575 {
576 struct ubifs_dent_node *dent = snod->node;
577
578 err = ubifs_validate_entry(c, dent);
579 if (err)
580 goto out_dump;
581
582 err = insert_dent(c, lnum, snod->offs, snod->len,
583 &snod->key, dent->name,
584 le16_to_cpu(dent->nlen), snod->sqnum,
585 !le64_to_cpu(dent->inum), &used);
586 break;
587 }
588 case UBIFS_TRUN_NODE:
589 {
590 struct ubifs_trun_node *trun = snod->node;
591 loff_t old_size = le64_to_cpu(trun->old_size);
592 loff_t new_size = le64_to_cpu(trun->new_size);
593 union ubifs_key key;
594
595 /* Validate truncation node */
596 if (old_size < 0 || old_size > c->max_inode_sz ||
597 new_size < 0 || new_size > c->max_inode_sz ||
598 old_size <= new_size) {
599 ubifs_err("bad truncation node");
600 goto out_dump;
601 }
602
603 /*
604 * Create a fake truncation key just to use the same
605 * functions which expect nodes to have keys.
606 */
607 trun_key_init(c, &key, le32_to_cpu(trun->inum));
608 err = insert_node(c, lnum, snod->offs, snod->len,
609 &key, snod->sqnum, 1, &used,
610 old_size, new_size);
611 break;
612 }
613 default:
614 ubifs_err("unexpected node type %d in bud LEB %d:%d",
615 snod->type, lnum, snod->offs);
616 err = -EINVAL;
617 goto out_dump;
618 }
619 if (err)
620 goto out;
621 }
622
623 bud = ubifs_search_bud(c, lnum);
624 if (!bud)
625 BUG();
626
627 ubifs_assert(sleb->endpt - offs >= used);
628 ubifs_assert(sleb->endpt % c->min_io_size == 0);
629
630 if (sleb->endpt + c->min_io_size <= c->leb_size &&
631 !(c->vfs_sb->s_flags & MS_RDONLY))
632 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
633 sleb->endpt, UBI_SHORTTERM);
634
635 *dirty = sleb->endpt - offs - used;
636 *free = c->leb_size - sleb->endpt;
637
638out:
639 ubifs_scan_destroy(sleb);
640 return err;
641
642out_dump:
643 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
644 dbg_dump_node(c, snod->node);
645 ubifs_scan_destroy(sleb);
646 return -EINVAL;
647}
648
649/**
650 * insert_ref_node - insert a reference node to the replay tree.
651 * @c: UBIFS file-system description object
652 * @lnum: node logical eraseblock number
653 * @offs: node offset
654 * @sqnum: sequence number
655 * @free: amount of free space in bud
656 * @dirty: amount of dirty space from padding and deletion nodes
657 *
658 * This function inserts a reference node to the replay tree and returns zero
659 * in case of success ort a negative error code in case of failure.
660 */
661static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
662 unsigned long long sqnum, int free, int dirty)
663{
664 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
665 struct replay_entry *r;
666
667 dbg_mnt("add ref LEB %d:%d", lnum, offs);
668 while (*p) {
669 parent = *p;
670 r = rb_entry(parent, struct replay_entry, rb);
671 if (sqnum < r->sqnum) {
672 p = &(*p)->rb_left;
673 continue;
674 } else if (sqnum > r->sqnum) {
675 p = &(*p)->rb_right;
676 continue;
677 }
678 ubifs_err("duplicate sqnum in replay tree");
679 return -EINVAL;
680 }
681
682 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
683 if (!r)
684 return -ENOMEM;
685
686 r->lnum = lnum;
687 r->offs = offs;
688 r->sqnum = sqnum;
689 r->flags = REPLAY_REF;
690 r->free = free;
691 r->dirty = dirty;
692
693 rb_link_node(&r->rb, parent, p);
694 rb_insert_color(&r->rb, &c->replay_tree);
695 return 0;
696}
697
698/**
699 * replay_buds - replay all buds.
700 * @c: UBIFS file-system description object
701 *
702 * This function returns zero in case of success and a negative error code in
703 * case of failure.
704 */
705static int replay_buds(struct ubifs_info *c)
706{
707 struct bud_entry *b;
708 int err, uninitialized_var(free), uninitialized_var(dirty);
709
710 list_for_each_entry(b, &c->replay_buds, list) {
711 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
712 &free, &dirty);
713 if (err)
714 return err;
715 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
716 free, dirty);
717 if (err)
718 return err;
719 }
720
721 return 0;
722}
723
724/**
725 * destroy_bud_list - destroy the list of buds to replay.
726 * @c: UBIFS file-system description object
727 */
728static void destroy_bud_list(struct ubifs_info *c)
729{
730 struct bud_entry *b;
731
732 while (!list_empty(&c->replay_buds)) {
733 b = list_entry(c->replay_buds.next, struct bud_entry, list);
734 list_del(&b->list);
735 kfree(b);
736 }
737}
738
739/**
740 * add_replay_bud - add a bud to the list of buds to replay.
741 * @c: UBIFS file-system description object
742 * @lnum: bud logical eraseblock number to replay
743 * @offs: bud start offset
744 * @jhead: journal head to which this bud belongs
745 * @sqnum: reference node sequence number
746 *
747 * This function returns zero in case of success and a negative error code in
748 * case of failure.
749 */
750static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
751 unsigned long long sqnum)
752{
753 struct ubifs_bud *bud;
754 struct bud_entry *b;
755
756 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
757
758 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
759 if (!bud)
760 return -ENOMEM;
761
762 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
763 if (!b) {
764 kfree(bud);
765 return -ENOMEM;
766 }
767
768 bud->lnum = lnum;
769 bud->start = offs;
770 bud->jhead = jhead;
771 ubifs_add_bud(c, bud);
772
773 b->bud = bud;
774 b->sqnum = sqnum;
775 list_add_tail(&b->list, &c->replay_buds);
776
777 return 0;
778}
779
780/**
781 * validate_ref - validate a reference node.
782 * @c: UBIFS file-system description object
783 * @ref: the reference node to validate
784 * @ref_lnum: LEB number of the reference node
785 * @ref_offs: reference node offset
786 *
787 * This function returns %1 if a bud reference already exists for the LEB. %0 is
788 * returned if the reference node is new, otherwise %-EINVAL is returned if
789 * validation failed.
790 */
791static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
792{
793 struct ubifs_bud *bud;
794 int lnum = le32_to_cpu(ref->lnum);
795 unsigned int offs = le32_to_cpu(ref->offs);
796 unsigned int jhead = le32_to_cpu(ref->jhead);
797
798 /*
799 * ref->offs may point to the end of LEB when the journal head points
800 * to the end of LEB and we write reference node for it during commit.
801 * So this is why we require 'offs > c->leb_size'.
802 */
803 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
804 lnum < c->main_first || offs > c->leb_size ||
805 offs & (c->min_io_size - 1))
806 return -EINVAL;
807
808 /* Make sure we have not already looked at this bud */
809 bud = ubifs_search_bud(c, lnum);
810 if (bud) {
811 if (bud->jhead == jhead && bud->start <= offs)
812 return 1;
813 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
814 return -EINVAL;
815 }
816
817 return 0;
818}
819
820/**
821 * replay_log_leb - replay a log logical eraseblock.
822 * @c: UBIFS file-system description object
823 * @lnum: log logical eraseblock to replay
824 * @offs: offset to start replaying from
825 * @sbuf: scan buffer
826 *
827 * This function replays a log LEB and returns zero in case of success, %1 if
828 * this is the last LEB in the log, and a negative error code in case of
829 * failure.
830 */
831static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
832{
833 int err;
834 struct ubifs_scan_leb *sleb;
835 struct ubifs_scan_node *snod;
836 const struct ubifs_cs_node *node;
837
838 dbg_mnt("replay log LEB %d:%d", lnum, offs);
839 sleb = ubifs_scan(c, lnum, offs, sbuf);
840 if (IS_ERR(sleb)) {
841 if (c->need_recovery)
842 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
843 if (IS_ERR(sleb))
844 return PTR_ERR(sleb);
845 }
846
847 if (sleb->nodes_cnt == 0) {
848 err = 1;
849 goto out;
850 }
851
852 node = sleb->buf;
853
854 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
855 if (c->cs_sqnum == 0) {
856 /*
857 * This is the first log LEB we are looking at, make sure that
858 * the first node is a commit start node. Also record its
859 * sequence number so that UBIFS can determine where the log
860 * ends, because all nodes which were have higher sequence
861 * numbers.
862 */
863 if (snod->type != UBIFS_CS_NODE) {
864 dbg_err("first log node at LEB %d:%d is not CS node",
865 lnum, offs);
866 goto out_dump;
867 }
868 if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
869 dbg_err("first CS node at LEB %d:%d has wrong "
870 "commit number %llu expected %llu",
871 lnum, offs,
872 (unsigned long long)le64_to_cpu(node->cmt_no),
873 c->cmt_no);
874 goto out_dump;
875 }
876
877 c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
878 dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
879 }
880
881 if (snod->sqnum < c->cs_sqnum) {
882 /*
883 * This means that we reached end of log and now
884 * look to the older log data, which was already
885 * committed but the eraseblock was not erased (UBIFS
886 * only unmaps it). So this basically means we have to
887 * exit with "end of log" code.
888 */
889 err = 1;
890 goto out;
891 }
892
893 /* Make sure the first node sits at offset zero of the LEB */
894 if (snod->offs != 0) {
895 dbg_err("first node is not at zero offset");
896 goto out_dump;
897 }
898
899 list_for_each_entry(snod, &sleb->nodes, list) {
900
901 cond_resched();
902
903 if (snod->sqnum >= SQNUM_WATERMARK) {
904 ubifs_err("file system's life ended");
905 goto out_dump;
906 }
907
908 if (snod->sqnum < c->cs_sqnum) {
909 dbg_err("bad sqnum %llu, commit sqnum %llu",
910 snod->sqnum, c->cs_sqnum);
911 goto out_dump;
912 }
913
914 if (snod->sqnum > c->max_sqnum)
915 c->max_sqnum = snod->sqnum;
916
917 switch (snod->type) {
918 case UBIFS_REF_NODE: {
919 const struct ubifs_ref_node *ref = snod->node;
920
921 err = validate_ref(c, ref);
922 if (err == 1)
923 break; /* Already have this bud */
924 if (err)
925 goto out_dump;
926
927 err = add_replay_bud(c, le32_to_cpu(ref->lnum),
928 le32_to_cpu(ref->offs),
929 le32_to_cpu(ref->jhead),
930 snod->sqnum);
931 if (err)
932 goto out;
933
934 break;
935 }
936 case UBIFS_CS_NODE:
937 /* Make sure it sits at the beginning of LEB */
938 if (snod->offs != 0) {
939 ubifs_err("unexpected node in log");
940 goto out_dump;
941 }
942 break;
943 default:
944 ubifs_err("unexpected node in log");
945 goto out_dump;
946 }
947 }
948
949 if (sleb->endpt || c->lhead_offs >= c->leb_size) {
950 c->lhead_lnum = lnum;
951 c->lhead_offs = sleb->endpt;
952 }
953
954 err = !sleb->endpt;
955out:
956 ubifs_scan_destroy(sleb);
957 return err;
958
959out_dump:
960 ubifs_err("log error detected while replying the log at LEB %d:%d",
961 lnum, offs + snod->offs);
962 dbg_dump_node(c, snod->node);
963 ubifs_scan_destroy(sleb);
964 return -EINVAL;
965}
966
967/**
968 * take_ihead - update the status of the index head in lprops to 'taken'.
969 * @c: UBIFS file-system description object
970 *
971 * This function returns the amount of free space in the index head LEB or a
972 * negative error code.
973 */
974static int take_ihead(struct ubifs_info *c)
975{
976 const struct ubifs_lprops *lp;
977 int err, free;
978
979 ubifs_get_lprops(c);
980
981 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
982 if (IS_ERR(lp)) {
983 err = PTR_ERR(lp);
984 goto out;
985 }
986
987 free = lp->free;
988
989 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
990 lp->flags | LPROPS_TAKEN, 0);
991 if (IS_ERR(lp)) {
992 err = PTR_ERR(lp);
993 goto out;
994 }
995
996 err = free;
997out:
998 ubifs_release_lprops(c);
999 return err;
1000}
1001
1002/**
1003 * ubifs_replay_journal - replay journal.
1004 * @c: UBIFS file-system description object
1005 *
1006 * This function scans the journal, replays and cleans it up. It makes sure all
1007 * memory data structures related to uncommitted journal are built (dirty TNC
1008 * tree, tree of buds, modified lprops, etc).
1009 */
1010int ubifs_replay_journal(struct ubifs_info *c)
1011{
1012 int err, i, lnum, offs, free;
1013 void *sbuf = NULL;
1014
1015 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1016
1017 /* Update the status of the index head in lprops to 'taken' */
1018 free = take_ihead(c);
1019 if (free < 0)
1020 return free; /* Error code */
1021
1022 if (c->ihead_offs != c->leb_size - free) {
1023 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
1024 c->ihead_offs);
1025 return -EINVAL;
1026 }
1027
1028 sbuf = vmalloc(c->leb_size);
1029 if (!sbuf)
1030 return -ENOMEM;
1031
1032 dbg_mnt("start replaying the journal");
1033
1034 c->replaying = 1;
1035
1036 lnum = c->ltail_lnum = c->lhead_lnum;
1037 offs = c->lhead_offs;
1038
1039 for (i = 0; i < c->log_lebs; i++, lnum++) {
1040 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
1041 /*
1042 * The log is logically circular, we reached the last
1043 * LEB, switch to the first one.
1044 */
1045 lnum = UBIFS_LOG_LNUM;
1046 offs = 0;
1047 }
1048 err = replay_log_leb(c, lnum, offs, sbuf);
1049 if (err == 1)
1050 /* We hit the end of the log */
1051 break;
1052 if (err)
1053 goto out;
1054 offs = 0;
1055 }
1056
1057 err = replay_buds(c);
1058 if (err)
1059 goto out;
1060
1061 err = apply_replay_tree(c);
1062 if (err)
1063 goto out;
1064
1065 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1066 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1067 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1068 c->highest_inum);
1069out:
1070 destroy_replay_tree(c);
1071 destroy_bud_list(c);
1072 vfree(sbuf);
1073 c->replaying = 0;
1074 return err;
1075}
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
new file mode 100644
index 000000000000..2bf753b38889
--- /dev/null
+++ b/fs/ubifs/sb.c
@@ -0,0 +1,629 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS superblock. The superblock is stored at the first
25 * LEB of the volume and is never changed by UBIFS. Only user-space tools may
26 * change it. The superblock node mostly contains geometry information.
27 */
28
29#include "ubifs.h"
30#include <linux/random.h>
31
32/*
33 * Default journal size in logical eraseblocks as a percent of total
34 * flash size.
35 */
36#define DEFAULT_JNL_PERCENT 5
37
38/* Default maximum journal size in bytes */
39#define DEFAULT_MAX_JNL (32*1024*1024)
40
41/* Default indexing tree fanout */
42#define DEFAULT_FANOUT 8
43
44/* Default number of data journal heads */
45#define DEFAULT_JHEADS_CNT 1
46
47/* Default positions of different LEBs in the main area */
48#define DEFAULT_IDX_LEB 0
49#define DEFAULT_DATA_LEB 1
50#define DEFAULT_GC_LEB 2
51
52/* Default number of LEB numbers in LPT's save table */
53#define DEFAULT_LSAVE_CNT 256
54
55/* Default reserved pool size as a percent of maximum free space */
56#define DEFAULT_RP_PERCENT 5
57
58/* The default maximum size of reserved pool in bytes */
59#define DEFAULT_MAX_RP_SIZE (5*1024*1024)
60
61/* Default time granularity in nanoseconds */
62#define DEFAULT_TIME_GRAN 1000000000
63
64/**
65 * create_default_filesystem - format empty UBI volume.
66 * @c: UBIFS file-system description object
67 *
68 * This function creates default empty file-system. Returns zero in case of
69 * success and a negative error code in case of failure.
70 */
71static int create_default_filesystem(struct ubifs_info *c)
72{
73 struct ubifs_sb_node *sup;
74 struct ubifs_mst_node *mst;
75 struct ubifs_idx_node *idx;
76 struct ubifs_branch *br;
77 struct ubifs_ino_node *ino;
78 struct ubifs_cs_node *cs;
79 union ubifs_key key;
80 int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
81 int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
82 int min_leb_cnt = UBIFS_MIN_LEB_CNT;
83 uint64_t tmp64, main_bytes;
84
85 /* Some functions called from here depend on the @c->key_len filed */
86 c->key_len = UBIFS_SK_LEN;
87
88 /*
89 * First of all, we have to calculate default file-system geometry -
90 * log size, journal size, etc.
91 */
92 if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT)
93 /* We can first multiply then divide and have no overflow */
94 jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100;
95 else
96 jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT;
97
98 if (jnl_lebs < UBIFS_MIN_JNL_LEBS)
99 jnl_lebs = UBIFS_MIN_JNL_LEBS;
100 if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL)
101 jnl_lebs = DEFAULT_MAX_JNL / c->leb_size;
102
103 /*
104 * The log should be large enough to fit reference nodes for all bud
105 * LEBs. Because buds do not have to start from the beginning of LEBs
106 * (half of the LEB may contain committed data), the log should
107 * generally be larger, make it twice as large.
108 */
109 tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1;
110 log_lebs = tmp / c->leb_size;
111 /* Plus one LEB reserved for commit */
112 log_lebs += 1;
113 if (c->leb_cnt - min_leb_cnt > 8) {
114 /* And some extra space to allow writes while committing */
115 log_lebs += 1;
116 min_leb_cnt += 1;
117 }
118
119 max_buds = jnl_lebs - log_lebs;
120 if (max_buds < UBIFS_MIN_BUD_LEBS)
121 max_buds = UBIFS_MIN_BUD_LEBS;
122
123 /*
124 * Orphan nodes are stored in a separate area. One node can store a lot
125 * of orphan inode numbers, but when new orphan comes we just add a new
126 * orphan node. At some point the nodes are consolidated into one
127 * orphan node.
128 */
129 orph_lebs = UBIFS_MIN_ORPH_LEBS;
130#ifdef CONFIG_UBIFS_FS_DEBUG
131 if (c->leb_cnt - min_leb_cnt > 1)
132 /*
133 * For debugging purposes it is better to have at least 2
134 * orphan LEBs, because the orphan subsystem would need to do
135 * consolidations and would be stressed more.
136 */
137 orph_lebs += 1;
138#endif
139
140 main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs;
141 main_lebs -= orph_lebs;
142
143 lpt_first = UBIFS_LOG_LNUM + log_lebs;
144 c->lsave_cnt = DEFAULT_LSAVE_CNT;
145 c->max_leb_cnt = c->leb_cnt;
146 err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs,
147 &big_lpt);
148 if (err)
149 return err;
150
151 dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first,
152 lpt_first + lpt_lebs - 1);
153
154 main_first = c->leb_cnt - main_lebs;
155
156 /* Create default superblock */
157 tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
158 sup = kzalloc(tmp, GFP_KERNEL);
159 if (!sup)
160 return -ENOMEM;
161
162 tmp64 = (uint64_t)max_buds * c->leb_size;
163 if (big_lpt)
164 sup_flags |= UBIFS_FLG_BIGLPT;
165
166 sup->ch.node_type = UBIFS_SB_NODE;
167 sup->key_hash = UBIFS_KEY_HASH_R5;
168 sup->flags = cpu_to_le32(sup_flags);
169 sup->min_io_size = cpu_to_le32(c->min_io_size);
170 sup->leb_size = cpu_to_le32(c->leb_size);
171 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
172 sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt);
173 sup->max_bud_bytes = cpu_to_le64(tmp64);
174 sup->log_lebs = cpu_to_le32(log_lebs);
175 sup->lpt_lebs = cpu_to_le32(lpt_lebs);
176 sup->orph_lebs = cpu_to_le32(orph_lebs);
177 sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT);
178 sup->fanout = cpu_to_le32(DEFAULT_FANOUT);
179 sup->lsave_cnt = cpu_to_le32(c->lsave_cnt);
180 sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION);
181 sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
182 sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN);
183
184 generate_random_uuid(sup->uuid);
185
186 main_bytes = (uint64_t)main_lebs * c->leb_size;
187 tmp64 = main_bytes * DEFAULT_RP_PERCENT;
188 do_div(tmp64, 100);
189 if (tmp64 > DEFAULT_MAX_RP_SIZE)
190 tmp64 = DEFAULT_MAX_RP_SIZE;
191 sup->rp_size = cpu_to_le64(tmp64);
192
193 err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
194 kfree(sup);
195 if (err)
196 return err;
197
198 dbg_gen("default superblock created at LEB 0:0");
199
200 /* Create default master node */
201 mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
202 if (!mst)
203 return -ENOMEM;
204
205 mst->ch.node_type = UBIFS_MST_NODE;
206 mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM);
207 mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO);
208 mst->cmt_no = 0;
209 mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB);
210 mst->root_offs = 0;
211 tmp = ubifs_idx_node_sz(c, 1);
212 mst->root_len = cpu_to_le32(tmp);
213 mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB);
214 mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB);
215 mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size));
216 mst->index_size = cpu_to_le64(ALIGN(tmp, 8));
217 mst->lpt_lnum = cpu_to_le32(c->lpt_lnum);
218 mst->lpt_offs = cpu_to_le32(c->lpt_offs);
219 mst->nhead_lnum = cpu_to_le32(c->nhead_lnum);
220 mst->nhead_offs = cpu_to_le32(c->nhead_offs);
221 mst->ltab_lnum = cpu_to_le32(c->ltab_lnum);
222 mst->ltab_offs = cpu_to_le32(c->ltab_offs);
223 mst->lsave_lnum = cpu_to_le32(c->lsave_lnum);
224 mst->lsave_offs = cpu_to_le32(c->lsave_offs);
225 mst->lscan_lnum = cpu_to_le32(main_first);
226 mst->empty_lebs = cpu_to_le32(main_lebs - 2);
227 mst->idx_lebs = cpu_to_le32(1);
228 mst->leb_cnt = cpu_to_le32(c->leb_cnt);
229
230 /* Calculate lprops statistics */
231 tmp64 = main_bytes;
232 tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);
233 tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
234 mst->total_free = cpu_to_le64(tmp64);
235
236 tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);
237 ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) -
238 UBIFS_INO_NODE_SZ;
239 tmp64 += ino_waste;
240 tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8);
241 mst->total_dirty = cpu_to_le64(tmp64);
242
243 /* The indexing LEB does not contribute to dark space */
244 tmp64 = (c->main_lebs - 1) * c->dark_wm;
245 mst->total_dark = cpu_to_le64(tmp64);
246
247 mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ);
248
249 err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0,
250 UBI_UNKNOWN);
251 if (err) {
252 kfree(mst);
253 return err;
254 }
255 err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0,
256 UBI_UNKNOWN);
257 kfree(mst);
258 if (err)
259 return err;
260
261 dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM);
262
263 /* Create the root indexing node */
264 tmp = ubifs_idx_node_sz(c, 1);
265 idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
266 if (!idx)
267 return -ENOMEM;
268
269 c->key_fmt = UBIFS_SIMPLE_KEY_FMT;
270 c->key_hash = key_r5_hash;
271
272 idx->ch.node_type = UBIFS_IDX_NODE;
273 idx->child_cnt = cpu_to_le16(1);
274 ino_key_init(c, &key, UBIFS_ROOT_INO);
275 br = ubifs_idx_branch(c, idx, 0);
276 key_write_idx(c, &key, &br->key);
277 br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB);
278 br->len = cpu_to_le32(UBIFS_INO_NODE_SZ);
279 err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0,
280 UBI_UNKNOWN);
281 kfree(idx);
282 if (err)
283 return err;
284
285 dbg_gen("default root indexing node created LEB %d:0",
286 main_first + DEFAULT_IDX_LEB);
287
288 /* Create default root inode */
289 tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
290 ino = kzalloc(tmp, GFP_KERNEL);
291 if (!ino)
292 return -ENOMEM;
293
294 ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO);
295 ino->ch.node_type = UBIFS_INO_NODE;
296 ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
297 ino->nlink = cpu_to_le32(2);
298 tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
299 ino->atime_sec = tmp;
300 ino->ctime_sec = tmp;
301 ino->mtime_sec = tmp;
302 ino->atime_nsec = 0;
303 ino->ctime_nsec = 0;
304 ino->mtime_nsec = 0;
305 ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO);
306 ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ);
307
308 /* Set compression enabled by default */
309 ino->flags = cpu_to_le32(UBIFS_COMPR_FL);
310
311 err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
312 main_first + DEFAULT_DATA_LEB, 0,
313 UBI_UNKNOWN);
314 kfree(ino);
315 if (err)
316 return err;
317
318 dbg_gen("root inode created at LEB %d:0",
319 main_first + DEFAULT_DATA_LEB);
320
321 /*
322 * The first node in the log has to be the commit start node. This is
323 * always the case during normal file-system operation. Write a fake
324 * commit start node to the log.
325 */
326 tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size);
327 cs = kzalloc(tmp, GFP_KERNEL);
328 if (!cs)
329 return -ENOMEM;
330
331 cs->ch.node_type = UBIFS_CS_NODE;
332 err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM,
333 0, UBI_UNKNOWN);
334 kfree(cs);
335
336 ubifs_msg("default file-system created");
337 return 0;
338}
339
340/**
341 * validate_sb - validate superblock node.
342 * @c: UBIFS file-system description object
343 * @sup: superblock node
344 *
345 * This function validates superblock node @sup. Since most of data was read
346 * from the superblock and stored in @c, the function validates fields in @c
347 * instead. Returns zero in case of success and %-EINVAL in case of validation
348 * failure.
349 */
350static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
351{
352 long long max_bytes;
353 int err = 1, min_leb_cnt;
354
355 if (!c->key_hash) {
356 err = 2;
357 goto failed;
358 }
359
360 if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) {
361 err = 3;
362 goto failed;
363 }
364
365 if (le32_to_cpu(sup->min_io_size) != c->min_io_size) {
366 ubifs_err("min. I/O unit mismatch: %d in superblock, %d real",
367 le32_to_cpu(sup->min_io_size), c->min_io_size);
368 goto failed;
369 }
370
371 if (le32_to_cpu(sup->leb_size) != c->leb_size) {
372 ubifs_err("LEB size mismatch: %d in superblock, %d real",
373 le32_to_cpu(sup->leb_size), c->leb_size);
374 goto failed;
375 }
376
377 if (c->log_lebs < UBIFS_MIN_LOG_LEBS ||
378 c->lpt_lebs < UBIFS_MIN_LPT_LEBS ||
379 c->orph_lebs < UBIFS_MIN_ORPH_LEBS ||
380 c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
381 err = 4;
382 goto failed;
383 }
384
385 /*
386 * Calculate minimum allowed amount of main area LEBs. This is very
387 * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we
388 * have just read from the superblock.
389 */
390 min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs;
391 min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
392
393 if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
394 ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, "
395 "%d minimum required", c->leb_cnt, c->vi.size,
396 min_leb_cnt);
397 goto failed;
398 }
399
400 if (c->max_leb_cnt < c->leb_cnt) {
401 ubifs_err("max. LEB count %d less than LEB count %d",
402 c->max_leb_cnt, c->leb_cnt);
403 goto failed;
404 }
405
406 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
407 err = 7;
408 goto failed;
409 }
410
411 if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS ||
412 c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) {
413 err = 8;
414 goto failed;
415 }
416
417 if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 ||
418 c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) {
419 err = 9;
420 goto failed;
421 }
422
423 if (c->fanout < UBIFS_MIN_FANOUT ||
424 ubifs_idx_node_sz(c, c->fanout) > c->leb_size) {
425 err = 10;
426 goto failed;
427 }
428
429 if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT &&
430 c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS -
431 c->log_lebs - c->lpt_lebs - c->orph_lebs)) {
432 err = 11;
433 goto failed;
434 }
435
436 if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs +
437 c->orph_lebs + c->main_lebs != c->leb_cnt) {
438 err = 12;
439 goto failed;
440 }
441
442 if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) {
443 err = 13;
444 goto failed;
445 }
446
447 max_bytes = c->main_lebs * (long long)c->leb_size;
448 if (c->rp_size < 0 || max_bytes < c->rp_size) {
449 err = 14;
450 goto failed;
451 }
452
453 if (le32_to_cpu(sup->time_gran) > 1000000000 ||
454 le32_to_cpu(sup->time_gran) < 1) {
455 err = 15;
456 goto failed;
457 }
458
459 return 0;
460
461failed:
462 ubifs_err("bad superblock, error %d", err);
463 dbg_dump_node(c, sup);
464 return -EINVAL;
465}
466
467/**
468 * ubifs_read_sb_node - read superblock node.
469 * @c: UBIFS file-system description object
470 *
471 * This function returns a pointer to the superblock node or a negative error
472 * code.
473 */
474struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
475{
476 struct ubifs_sb_node *sup;
477 int err;
478
479 sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS);
480 if (!sup)
481 return ERR_PTR(-ENOMEM);
482
483 err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ,
484 UBIFS_SB_LNUM, 0);
485 if (err) {
486 kfree(sup);
487 return ERR_PTR(err);
488 }
489
490 return sup;
491}
492
493/**
494 * ubifs_write_sb_node - write superblock node.
495 * @c: UBIFS file-system description object
496 * @sup: superblock node read with 'ubifs_read_sb_node()'
497 *
498 * This function returns %0 on success and a negative error code on failure.
499 */
500int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup)
501{
502 int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
503
504 ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1);
505 return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM);
506}
507
508/**
509 * ubifs_read_superblock - read superblock.
510 * @c: UBIFS file-system description object
511 *
512 * This function finds, reads and checks the superblock. If an empty UBI volume
513 * is being mounted, this function creates default superblock. Returns zero in
514 * case of success, and a negative error code in case of failure.
515 */
516int ubifs_read_superblock(struct ubifs_info *c)
517{
518 int err, sup_flags;
519 struct ubifs_sb_node *sup;
520
521 if (c->empty) {
522 err = create_default_filesystem(c);
523 if (err)
524 return err;
525 }
526
527 sup = ubifs_read_sb_node(c);
528 if (IS_ERR(sup))
529 return PTR_ERR(sup);
530
531 /*
532 * The software supports all previous versions but not future versions,
533 * due to the unavailability of time-travelling equipment.
534 */
535 c->fmt_version = le32_to_cpu(sup->fmt_version);
536 if (c->fmt_version > UBIFS_FORMAT_VERSION) {
537 ubifs_err("on-flash format version is %d, but software only "
538 "supports up to version %d", c->fmt_version,
539 UBIFS_FORMAT_VERSION);
540 err = -EINVAL;
541 goto out;
542 }
543
544 if (c->fmt_version < 3) {
545 ubifs_err("on-flash format version %d is not supported",
546 c->fmt_version);
547 err = -EINVAL;
548 goto out;
549 }
550
551 switch (sup->key_hash) {
552 case UBIFS_KEY_HASH_R5:
553 c->key_hash = key_r5_hash;
554 c->key_hash_type = UBIFS_KEY_HASH_R5;
555 break;
556
557 case UBIFS_KEY_HASH_TEST:
558 c->key_hash = key_test_hash;
559 c->key_hash_type = UBIFS_KEY_HASH_TEST;
560 break;
561 };
562
563 c->key_fmt = sup->key_fmt;
564
565 switch (c->key_fmt) {
566 case UBIFS_SIMPLE_KEY_FMT:
567 c->key_len = UBIFS_SK_LEN;
568 break;
569 default:
570 ubifs_err("unsupported key format");
571 err = -EINVAL;
572 goto out;
573 }
574
575 c->leb_cnt = le32_to_cpu(sup->leb_cnt);
576 c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt);
577 c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes);
578 c->log_lebs = le32_to_cpu(sup->log_lebs);
579 c->lpt_lebs = le32_to_cpu(sup->lpt_lebs);
580 c->orph_lebs = le32_to_cpu(sup->orph_lebs);
581 c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;
582 c->fanout = le32_to_cpu(sup->fanout);
583 c->lsave_cnt = le32_to_cpu(sup->lsave_cnt);
584 c->default_compr = le16_to_cpu(sup->default_compr);
585 c->rp_size = le64_to_cpu(sup->rp_size);
586 c->rp_uid = le32_to_cpu(sup->rp_uid);
587 c->rp_gid = le32_to_cpu(sup->rp_gid);
588 sup_flags = le32_to_cpu(sup->flags);
589
590 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
591
592 memcpy(&c->uuid, &sup->uuid, 16);
593
594 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
595
596 /* Automatically increase file system size to the maximum size */
597 c->old_leb_cnt = c->leb_cnt;
598 if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
599 c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
600 if (c->vfs_sb->s_flags & MS_RDONLY)
601 dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
602 c->old_leb_cnt, c->leb_cnt);
603 else {
604 dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs",
605 c->old_leb_cnt, c->leb_cnt);
606 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
607 err = ubifs_write_sb_node(c, sup);
608 if (err)
609 goto out;
610 c->old_leb_cnt = c->leb_cnt;
611 }
612 }
613
614 c->log_bytes = (long long)c->log_lebs * c->leb_size;
615 c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1;
616 c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs;
617 c->lpt_last = c->lpt_first + c->lpt_lebs - 1;
618 c->orph_first = c->lpt_last + 1;
619 c->orph_last = c->orph_first + c->orph_lebs - 1;
620 c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
621 c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
622 c->main_first = c->leb_cnt - c->main_lebs;
623 c->report_rp_size = ubifs_reported_space(c, c->rp_size);
624
625 err = validate_sb(c, sup);
626out:
627 kfree(sup);
628 return err;
629}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
new file mode 100644
index 000000000000..acf5c5fffc60
--- /dev/null
+++ b/fs/ubifs/scan.c
@@ -0,0 +1,362 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements the scan which is a general-purpose function for
25 * determining what nodes are in an eraseblock. The scan is used to replay the
26 * journal, to do garbage collection. for the TNC in-the-gaps method, and by
27 * debugging functions.
28 */
29
30#include "ubifs.h"
31
32/**
33 * scan_padding_bytes - scan for padding bytes.
34 * @buf: buffer to scan
35 * @len: length of buffer
36 *
37 * This function returns the number of padding bytes on success and
38 * %SCANNED_GARBAGE on failure.
39 */
40static int scan_padding_bytes(void *buf, int len)
41{
42 int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len);
43 uint8_t *p = buf;
44
45 dbg_scan("not a node");
46
47 while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE)
48 pad_len += 1;
49
50 if (!pad_len || (pad_len & 7))
51 return SCANNED_GARBAGE;
52
53 dbg_scan("%d padding bytes", pad_len);
54
55 return pad_len;
56}
57
58/**
59 * ubifs_scan_a_node - scan for a node or padding.
60 * @c: UBIFS file-system description object
61 * @buf: buffer to scan
62 * @len: length of buffer
63 * @lnum: logical eraseblock number
64 * @offs: offset within the logical eraseblock
65 * @quiet: print no messages
66 *
67 * This function returns a scanning code to indicate what was scanned.
68 */
69int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
70 int offs, int quiet)
71{
72 struct ubifs_ch *ch = buf;
73 uint32_t magic;
74
75 magic = le32_to_cpu(ch->magic);
76
77 if (magic == 0xFFFFFFFF) {
78 dbg_scan("hit empty space");
79 return SCANNED_EMPTY_SPACE;
80 }
81
82 if (magic != UBIFS_NODE_MAGIC)
83 return scan_padding_bytes(buf, len);
84
85 if (len < UBIFS_CH_SZ)
86 return SCANNED_GARBAGE;
87
88 dbg_scan("scanning %s", dbg_ntype(ch->node_type));
89
90 if (ubifs_check_node(c, buf, lnum, offs, quiet))
91 return SCANNED_A_CORRUPT_NODE;
92
93 if (ch->node_type == UBIFS_PAD_NODE) {
94 struct ubifs_pad_node *pad = buf;
95 int pad_len = le32_to_cpu(pad->pad_len);
96 int node_len = le32_to_cpu(ch->len);
97
98 /* Validate the padding node */
99 if (pad_len < 0 ||
100 offs + node_len + pad_len > c->leb_size) {
101 if (!quiet) {
102 ubifs_err("bad pad node at LEB %d:%d",
103 lnum, offs);
104 dbg_dump_node(c, pad);
105 }
106 return SCANNED_A_BAD_PAD_NODE;
107 }
108
109 /* Make the node pads to 8-byte boundary */
110 if ((node_len + pad_len) & 7) {
111 if (!quiet) {
112 dbg_err("bad padding length %d - %d",
113 offs, offs + node_len + pad_len);
114 }
115 return SCANNED_A_BAD_PAD_NODE;
116 }
117
118 dbg_scan("%d bytes padded, offset now %d",
119 pad_len, ALIGN(offs + node_len + pad_len, 8));
120
121 return node_len + pad_len;
122 }
123
124 return SCANNED_A_NODE;
125}
126
127/**
128 * ubifs_start_scan - create LEB scanning information at start of scan.
129 * @c: UBIFS file-system description object
130 * @lnum: logical eraseblock number
131 * @offs: offset to start at (usually zero)
132 * @sbuf: scan buffer (must be c->leb_size)
133 *
134 * This function returns %0 on success and a negative error code on failure.
135 */
136struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
137 int offs, void *sbuf)
138{
139 struct ubifs_scan_leb *sleb;
140 int err;
141
142 dbg_scan("scan LEB %d:%d", lnum, offs);
143
144 sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS);
145 if (!sleb)
146 return ERR_PTR(-ENOMEM);
147
148 sleb->lnum = lnum;
149 INIT_LIST_HEAD(&sleb->nodes);
150 sleb->buf = sbuf;
151
152 err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs);
153 if (err && err != -EBADMSG) {
154 ubifs_err("cannot read %d bytes from LEB %d:%d,"
155 " error %d", c->leb_size - offs, lnum, offs, err);
156 kfree(sleb);
157 return ERR_PTR(err);
158 }
159
160 if (err == -EBADMSG)
161 sleb->ecc = 1;
162
163 return sleb;
164}
165
166/**
167 * ubifs_end_scan - update LEB scanning information at end of scan.
168 * @c: UBIFS file-system description object
169 * @sleb: scanning information
170 * @lnum: logical eraseblock number
171 * @offs: offset to start at (usually zero)
172 *
173 * This function returns %0 on success and a negative error code on failure.
174 */
175void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
176 int lnum, int offs)
177{
178 lnum = lnum;
179 dbg_scan("stop scanning LEB %d at offset %d", lnum, offs);
180 ubifs_assert(offs % c->min_io_size == 0);
181
182 sleb->endpt = ALIGN(offs, c->min_io_size);
183}
184
185/**
186 * ubifs_add_snod - add a scanned node to LEB scanning information.
187 * @c: UBIFS file-system description object
188 * @sleb: scanning information
189 * @buf: buffer containing node
190 * @offs: offset of node on flash
191 *
192 * This function returns %0 on success and a negative error code on failure.
193 */
194int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
195 void *buf, int offs)
196{
197 struct ubifs_ch *ch = buf;
198 struct ubifs_ino_node *ino = buf;
199 struct ubifs_scan_node *snod;
200
201 snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
202 if (!snod)
203 return -ENOMEM;
204
205 snod->sqnum = le64_to_cpu(ch->sqnum);
206 snod->type = ch->node_type;
207 snod->offs = offs;
208 snod->len = le32_to_cpu(ch->len);
209 snod->node = buf;
210
211 switch (ch->node_type) {
212 case UBIFS_INO_NODE:
213 case UBIFS_DENT_NODE:
214 case UBIFS_XENT_NODE:
215 case UBIFS_DATA_NODE:
216 case UBIFS_TRUN_NODE:
217 /*
218 * The key is in the same place in all keyed
219 * nodes.
220 */
221 key_read(c, &ino->key, &snod->key);
222 break;
223 }
224 list_add_tail(&snod->list, &sleb->nodes);
225 sleb->nodes_cnt += 1;
226 return 0;
227}
228
229/**
230 * ubifs_scanned_corruption - print information after UBIFS scanned corruption.
231 * @c: UBIFS file-system description object
232 * @lnum: LEB number of corruption
233 * @offs: offset of corruption
234 * @buf: buffer containing corruption
235 */
236void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
237 void *buf)
238{
239 int len;
240
241 ubifs_err("corrupted data at LEB %d:%d", lnum, offs);
242 if (dbg_failure_mode)
243 return;
244 len = c->leb_size - offs;
245 if (len > 4096)
246 len = 4096;
247 dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
249}
250
251/**
252 * ubifs_scan - scan a logical eraseblock.
253 * @c: UBIFS file-system description object
254 * @lnum: logical eraseblock number
255 * @offs: offset to start at (usually zero)
256 * @sbuf: scan buffer (must be c->leb_size)
257 *
258 * This function scans LEB number @lnum and returns complete information about
259 * its contents. Returns an error code in case of failure.
260 */
261struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
262 int offs, void *sbuf)
263{
264 void *buf = sbuf + offs;
265 int err, len = c->leb_size - offs;
266 struct ubifs_scan_leb *sleb;
267
268 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
269 if (IS_ERR(sleb))
270 return sleb;
271
272 while (len >= 8) {
273 struct ubifs_ch *ch = buf;
274 int node_len, ret;
275
276 dbg_scan("look at LEB %d:%d (%d bytes left)",
277 lnum, offs, len);
278
279 cond_resched();
280
281 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
282
283 if (ret > 0) {
284 /* Padding bytes or a valid padding node */
285 offs += ret;
286 buf += ret;
287 len -= ret;
288 continue;
289 }
290
291 if (ret == SCANNED_EMPTY_SPACE)
292 /* Empty space is checked later */
293 break;
294
295 switch (ret) {
296 case SCANNED_GARBAGE:
297 dbg_err("garbage");
298 goto corrupted;
299 case SCANNED_A_NODE:
300 break;
301 case SCANNED_A_CORRUPT_NODE:
302 case SCANNED_A_BAD_PAD_NODE:
303 dbg_err("bad node");
304 goto corrupted;
305 default:
306 dbg_err("unknown");
307 goto corrupted;
308 }
309
310 err = ubifs_add_snod(c, sleb, buf, offs);
311 if (err)
312 goto error;
313
314 node_len = ALIGN(le32_to_cpu(ch->len), 8);
315 offs += node_len;
316 buf += node_len;
317 len -= node_len;
318 }
319
320 if (offs % c->min_io_size)
321 goto corrupted;
322
323 ubifs_end_scan(c, sleb, lnum, offs);
324
325 for (; len > 4; offs += 4, buf = buf + 4, len -= 4)
326 if (*(uint32_t *)buf != 0xffffffff)
327 break;
328 for (; len; offs++, buf++, len--)
329 if (*(uint8_t *)buf != 0xff) {
330 ubifs_err("corrupt empty space at LEB %d:%d",
331 lnum, offs);
332 goto corrupted;
333 }
334
335 return sleb;
336
337corrupted:
338 ubifs_scanned_corruption(c, lnum, offs, buf);
339 err = -EUCLEAN;
340error:
341 ubifs_err("LEB %d scanning failed", lnum);
342 ubifs_scan_destroy(sleb);
343 return ERR_PTR(err);
344}
345
346/**
347 * ubifs_scan_destroy - destroy LEB scanning information.
348 * @sleb: scanning information to free
349 */
350void ubifs_scan_destroy(struct ubifs_scan_leb *sleb)
351{
352 struct ubifs_scan_node *node;
353 struct list_head *head;
354
355 head = &sleb->nodes;
356 while (!list_empty(head)) {
357 node = list_entry(head->next, struct ubifs_scan_node, list);
358 list_del(&node->list);
359 kfree(node);
360 }
361 kfree(sleb);
362}
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
new file mode 100644
index 000000000000..f248533841a2
--- /dev/null
+++ b/fs/ubifs/shrinker.c
@@ -0,0 +1,322 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
25 * tree when Linux VM needs more RAM.
26 *
27 * We do not implement any LRU lists to find oldest znodes to free because it
28 * would add additional overhead to the file system fast paths. So the shrinker
29 * just walks the TNC tree when searching for znodes to free.
30 *
31 * If the root of a TNC sub-tree is clean and old enough, then the children are
32 * also clean and old enough. So the shrinker walks the TNC in level order and
33 * dumps entire sub-trees.
34 *
35 * The age of znodes is just the time-stamp when they were last looked at.
36 * The current shrinker first tries to evict old znodes, then young ones.
37 *
38 * Since the shrinker is global, it has to protect against races with FS
39 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
40 */
41
42#include "ubifs.h"
43
44/* List of all UBIFS file-system instances */
45LIST_HEAD(ubifs_infos);
46
47/*
48 * We number each shrinker run and record the number on the ubifs_info structure
49 * so that we can easily work out which ubifs_info structures have already been
50 * done by the current run.
51 */
52static unsigned int shrinker_run_no;
53
54/* Protects 'ubifs_infos' list */
55DEFINE_SPINLOCK(ubifs_infos_lock);
56
57/* Global clean znode counter (for all mounted UBIFS instances) */
58atomic_long_t ubifs_clean_zn_cnt;
59
60/**
61 * shrink_tnc - shrink TNC tree.
62 * @c: UBIFS file-system description object
63 * @nr: number of znodes to free
64 * @age: the age of znodes to free
65 * @contention: if any contention, this is set to %1
66 *
67 * This function traverses TNC tree and frees clean znodes. It does not free
68 * clean znodes which younger then @age. Returns number of freed znodes.
69 */
70static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
71{
72 int total_freed = 0;
73 struct ubifs_znode *znode, *zprev;
74 int time = get_seconds();
75
76 ubifs_assert(mutex_is_locked(&c->umount_mutex));
77 ubifs_assert(mutex_is_locked(&c->tnc_mutex));
78
79 if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
80 return 0;
81
82 /*
83 * Traverse the TNC tree in levelorder manner, so that it is possible
84 * to destroy large sub-trees. Indeed, if a znode is old, then all its
85 * children are older or of the same age.
86 *
87 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
88 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
89 * changed only when the 'c->tnc_mutex' is held.
90 */
91 zprev = NULL;
92 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
93 while (znode && total_freed < nr &&
94 atomic_long_read(&c->clean_zn_cnt) > 0) {
95 int freed;
96
97 /*
98 * If the znode is clean, but it is in the 'c->cnext' list, this
99 * means that this znode has just been written to flash as a
100 * part of commit and was marked clean. They will be removed
101 * from the list at end commit. We cannot change the list,
102 * because it is not protected by any mutex (design decision to
103 * make commit really independent and parallel to main I/O). So
104 * we just skip these znodes.
105 *
106 * Note, the 'clean_zn_cnt' counters are not updated until
107 * after the commit, so the UBIFS shrinker does not report
108 * the znodes which are in the 'c->cnext' list as freeable.
109 *
110 * Also note, if the root of a sub-tree is not in 'c->cnext',
111 * then the whole sub-tree is not in 'c->cnext' as well, so it
112 * is safe to dump whole sub-tree.
113 */
114
115 if (znode->cnext) {
116 /*
117 * Very soon these znodes will be removed from the list
118 * and become freeable.
119 */
120 *contention = 1;
121 } else if (!ubifs_zn_dirty(znode) &&
122 abs(time - znode->time) >= age) {
123 if (znode->parent)
124 znode->parent->zbranch[znode->iip].znode = NULL;
125 else
126 c->zroot.znode = NULL;
127
128 freed = ubifs_destroy_tnc_subtree(znode);
129 atomic_long_sub(freed, &ubifs_clean_zn_cnt);
130 atomic_long_sub(freed, &c->clean_zn_cnt);
131 ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
132 total_freed += freed;
133 znode = zprev;
134 }
135
136 if (unlikely(!c->zroot.znode))
137 break;
138
139 zprev = znode;
140 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
141 cond_resched();
142 }
143
144 return total_freed;
145}
146
147/**
148 * shrink_tnc_trees - shrink UBIFS TNC trees.
149 * @nr: number of znodes to free
150 * @age: the age of znodes to free
151 * @contention: if any contention, this is set to %1
152 *
153 * This function walks the list of mounted UBIFS file-systems and frees clean
154 * znodes which are older then @age, until at least @nr znodes are freed.
155 * Returns the number of freed znodes.
156 */
157static int shrink_tnc_trees(int nr, int age, int *contention)
158{
159 struct ubifs_info *c;
160 struct list_head *p;
161 unsigned int run_no;
162 int freed = 0;
163
164 spin_lock(&ubifs_infos_lock);
165 do {
166 run_no = ++shrinker_run_no;
167 } while (run_no == 0);
168 /* Iterate over all mounted UBIFS file-systems and try to shrink them */
169 p = ubifs_infos.next;
170 while (p != &ubifs_infos) {
171 c = list_entry(p, struct ubifs_info, infos_list);
172 /*
173 * We move the ones we do to the end of the list, so we stop
174 * when we see one we have already done.
175 */
176 if (c->shrinker_run_no == run_no)
177 break;
178 if (!mutex_trylock(&c->umount_mutex)) {
179 /* Some un-mount is in progress, try next FS */
180 *contention = 1;
181 p = p->next;
182 continue;
183 }
184 /*
185 * We're holding 'c->umount_mutex', so the file-system won't go
186 * away.
187 */
188 if (!mutex_trylock(&c->tnc_mutex)) {
189 mutex_unlock(&c->umount_mutex);
190 *contention = 1;
191 p = p->next;
192 continue;
193 }
194 spin_unlock(&ubifs_infos_lock);
195 /*
196 * OK, now we have TNC locked, the file-system cannot go away -
197 * it is safe to reap the cache.
198 */
199 c->shrinker_run_no = run_no;
200 freed += shrink_tnc(c, nr, age, contention);
201 mutex_unlock(&c->tnc_mutex);
202 spin_lock(&ubifs_infos_lock);
203 /* Get the next list element before we move this one */
204 p = p->next;
205 /*
206 * Move this one to the end of the list to provide some
207 * fairness.
208 */
209 list_del(&c->infos_list);
210 list_add_tail(&c->infos_list, &ubifs_infos);
211 mutex_unlock(&c->umount_mutex);
212 if (freed >= nr)
213 break;
214 }
215 spin_unlock(&ubifs_infos_lock);
216 return freed;
217}
218
219/**
220 * kick_a_thread - kick a background thread to start commit.
221 *
222 * This function kicks a background thread to start background commit. Returns
223 * %-1 if a thread was kicked or there is another reason to assume the memory
224 * will soon be freed or become freeable. If there are no dirty znodes, returns
225 * %0.
226 */
227static int kick_a_thread(void)
228{
229 int i;
230 struct ubifs_info *c;
231
232 /*
233 * Iterate over all mounted UBIFS file-systems and find out if there is
234 * already an ongoing commit operation there. If no, then iterate for
235 * the second time and initiate background commit.
236 */
237 spin_lock(&ubifs_infos_lock);
238 for (i = 0; i < 2; i++) {
239 list_for_each_entry(c, &ubifs_infos, infos_list) {
240 long dirty_zn_cnt;
241
242 if (!mutex_trylock(&c->umount_mutex)) {
243 /*
244 * Some un-mount is in progress, it will
245 * certainly free memory, so just return.
246 */
247 spin_unlock(&ubifs_infos_lock);
248 return -1;
249 }
250
251 dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
252
253 if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
254 c->ro_media) {
255 mutex_unlock(&c->umount_mutex);
256 continue;
257 }
258
259 if (c->cmt_state != COMMIT_RESTING) {
260 spin_unlock(&ubifs_infos_lock);
261 mutex_unlock(&c->umount_mutex);
262 return -1;
263 }
264
265 if (i == 1) {
266 list_del(&c->infos_list);
267 list_add_tail(&c->infos_list, &ubifs_infos);
268 spin_unlock(&ubifs_infos_lock);
269
270 ubifs_request_bg_commit(c);
271 mutex_unlock(&c->umount_mutex);
272 return -1;
273 }
274 mutex_unlock(&c->umount_mutex);
275 }
276 }
277 spin_unlock(&ubifs_infos_lock);
278
279 return 0;
280}
281
282int ubifs_shrinker(int nr, gfp_t gfp_mask)
283{
284 int freed, contention = 0;
285 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
286
287 if (nr == 0)
288 return clean_zn_cnt;
289
290 if (!clean_zn_cnt) {
291 /*
292 * No clean znodes, nothing to reap. All we can do in this case
293 * is to kick background threads to start commit, which will
294 * probably make clean znodes which, in turn, will be freeable.
295 * And we return -1 which means will make VM call us again
296 * later.
297 */
298 dbg_tnc("no clean znodes, kick a thread");
299 return kick_a_thread();
300 }
301
302 freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
303 if (freed >= nr)
304 goto out;
305
306 dbg_tnc("not enough old znodes, try to free young ones");
307 freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
308 if (freed >= nr)
309 goto out;
310
311 dbg_tnc("not enough young znodes, free all");
312 freed += shrink_tnc_trees(nr - freed, 0, &contention);
313
314 if (!freed && contention) {
315 dbg_tnc("freed nothing, but contention");
316 return -1;
317 }
318
319out:
320 dbg_tnc("%d znodes were freed, requested %d", freed, nr);
321 return freed;
322}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
new file mode 100644
index 000000000000..00eb9c68ad03
--- /dev/null
+++ b/fs/ubifs/super.c
@@ -0,0 +1,1951 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS initialization and VFS superblock operations. Some
25 * initialization stuff which is rather large and complex is placed at
26 * corresponding subsystems, but most of it is here.
27 */
28
29#include <linux/init.h>
30#include <linux/slab.h>
31#include <linux/module.h>
32#include <linux/ctype.h>
33#include <linux/random.h>
34#include <linux/kthread.h>
35#include <linux/parser.h>
36#include <linux/seq_file.h>
37#include <linux/mount.h>
38#include "ubifs.h"
39
40/* Slab cache for UBIFS inodes */
41struct kmem_cache *ubifs_inode_slab;
42
43/* UBIFS TNC shrinker description */
44static struct shrinker ubifs_shrinker_info = {
45 .shrink = ubifs_shrinker,
46 .seeks = DEFAULT_SEEKS,
47};
48
49/**
50 * validate_inode - validate inode.
51 * @c: UBIFS file-system description object
52 * @inode: the inode to validate
53 *
54 * This is a helper function for 'ubifs_iget()' which validates various fields
55 * of a newly built inode to make sure they contain sane values and prevent
56 * possible vulnerabilities. Returns zero if the inode is all right and
57 * a non-zero error code if not.
58 */
59static int validate_inode(struct ubifs_info *c, const struct inode *inode)
60{
61 int err;
62 const struct ubifs_inode *ui = ubifs_inode(inode);
63
64 if (inode->i_size > c->max_inode_sz) {
65 ubifs_err("inode is too large (%lld)",
66 (long long)inode->i_size);
67 return 1;
68 }
69
70 if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
71 ubifs_err("unknown compression type %d", ui->compr_type);
72 return 2;
73 }
74
75 if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX)
76 return 3;
77
78 if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA)
79 return 4;
80
81 if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG)
82 return 5;
83
84 if (!ubifs_compr_present(ui->compr_type)) {
85 ubifs_warn("inode %lu uses '%s' compression, but it was not "
86 "compiled in", inode->i_ino,
87 ubifs_compr_name(ui->compr_type));
88 }
89
90 err = dbg_check_dir_size(c, inode);
91 return err;
92}
93
94struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
95{
96 int err;
97 union ubifs_key key;
98 struct ubifs_ino_node *ino;
99 struct ubifs_info *c = sb->s_fs_info;
100 struct inode *inode;
101 struct ubifs_inode *ui;
102
103 dbg_gen("inode %lu", inum);
104
105 inode = iget_locked(sb, inum);
106 if (!inode)
107 return ERR_PTR(-ENOMEM);
108 if (!(inode->i_state & I_NEW))
109 return inode;
110 ui = ubifs_inode(inode);
111
112 ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
113 if (!ino) {
114 err = -ENOMEM;
115 goto out;
116 }
117
118 ino_key_init(c, &key, inode->i_ino);
119
120 err = ubifs_tnc_lookup(c, &key, ino);
121 if (err)
122 goto out_ino;
123
124 inode->i_flags |= (S_NOCMTIME | S_NOATIME);
125 inode->i_nlink = le32_to_cpu(ino->nlink);
126 inode->i_uid = le32_to_cpu(ino->uid);
127 inode->i_gid = le32_to_cpu(ino->gid);
128 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec);
129 inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
130 inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec);
131 inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec);
132 inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec);
133 inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec);
134 inode->i_mode = le32_to_cpu(ino->mode);
135 inode->i_size = le64_to_cpu(ino->size);
136
137 ui->data_len = le32_to_cpu(ino->data_len);
138 ui->flags = le32_to_cpu(ino->flags);
139 ui->compr_type = le16_to_cpu(ino->compr_type);
140 ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum);
141 ui->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
142 ui->xattr_size = le32_to_cpu(ino->xattr_size);
143 ui->xattr_names = le32_to_cpu(ino->xattr_names);
144 ui->synced_i_size = ui->ui_size = inode->i_size;
145
146 ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0;
147
148 err = validate_inode(c, inode);
149 if (err)
150 goto out_invalid;
151
152 /* Disable readahead */
153 inode->i_mapping->backing_dev_info = &c->bdi;
154
155 switch (inode->i_mode & S_IFMT) {
156 case S_IFREG:
157 inode->i_mapping->a_ops = &ubifs_file_address_operations;
158 inode->i_op = &ubifs_file_inode_operations;
159 inode->i_fop = &ubifs_file_operations;
160 if (ui->xattr) {
161 ui->data = kmalloc(ui->data_len + 1, GFP_NOFS);
162 if (!ui->data) {
163 err = -ENOMEM;
164 goto out_ino;
165 }
166 memcpy(ui->data, ino->data, ui->data_len);
167 ((char *)ui->data)[ui->data_len] = '\0';
168 } else if (ui->data_len != 0) {
169 err = 10;
170 goto out_invalid;
171 }
172 break;
173 case S_IFDIR:
174 inode->i_op = &ubifs_dir_inode_operations;
175 inode->i_fop = &ubifs_dir_operations;
176 if (ui->data_len != 0) {
177 err = 11;
178 goto out_invalid;
179 }
180 break;
181 case S_IFLNK:
182 inode->i_op = &ubifs_symlink_inode_operations;
183 if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) {
184 err = 12;
185 goto out_invalid;
186 }
187 ui->data = kmalloc(ui->data_len + 1, GFP_NOFS);
188 if (!ui->data) {
189 err = -ENOMEM;
190 goto out_ino;
191 }
192 memcpy(ui->data, ino->data, ui->data_len);
193 ((char *)ui->data)[ui->data_len] = '\0';
194 break;
195 case S_IFBLK:
196 case S_IFCHR:
197 {
198 dev_t rdev;
199 union ubifs_dev_desc *dev;
200
201 ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
202 if (!ui->data) {
203 err = -ENOMEM;
204 goto out_ino;
205 }
206
207 dev = (union ubifs_dev_desc *)ino->data;
208 if (ui->data_len == sizeof(dev->new))
209 rdev = new_decode_dev(le32_to_cpu(dev->new));
210 else if (ui->data_len == sizeof(dev->huge))
211 rdev = huge_decode_dev(le64_to_cpu(dev->huge));
212 else {
213 err = 13;
214 goto out_invalid;
215 }
216 memcpy(ui->data, ino->data, ui->data_len);
217 inode->i_op = &ubifs_file_inode_operations;
218 init_special_inode(inode, inode->i_mode, rdev);
219 break;
220 }
221 case S_IFSOCK:
222 case S_IFIFO:
223 inode->i_op = &ubifs_file_inode_operations;
224 init_special_inode(inode, inode->i_mode, 0);
225 if (ui->data_len != 0) {
226 err = 14;
227 goto out_invalid;
228 }
229 break;
230 default:
231 err = 15;
232 goto out_invalid;
233 }
234
235 kfree(ino);
236 ubifs_set_inode_flags(inode);
237 unlock_new_inode(inode);
238 return inode;
239
240out_invalid:
241 ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err);
242 dbg_dump_node(c, ino);
243 dbg_dump_inode(c, inode);
244 err = -EINVAL;
245out_ino:
246 kfree(ino);
247out:
248 ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err);
249 iget_failed(inode);
250 return ERR_PTR(err);
251}
252
253static struct inode *ubifs_alloc_inode(struct super_block *sb)
254{
255 struct ubifs_inode *ui;
256
257 ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS);
258 if (!ui)
259 return NULL;
260
261 memset((void *)ui + sizeof(struct inode), 0,
262 sizeof(struct ubifs_inode) - sizeof(struct inode));
263 mutex_init(&ui->ui_mutex);
264 spin_lock_init(&ui->ui_lock);
265 return &ui->vfs_inode;
266};
267
268static void ubifs_destroy_inode(struct inode *inode)
269{
270 struct ubifs_inode *ui = ubifs_inode(inode);
271
272 kfree(ui->data);
273 kmem_cache_free(ubifs_inode_slab, inode);
274}
275
276/*
277 * Note, Linux write-back code calls this without 'i_mutex'.
278 */
279static int ubifs_write_inode(struct inode *inode, int wait)
280{
281 int err;
282 struct ubifs_info *c = inode->i_sb->s_fs_info;
283 struct ubifs_inode *ui = ubifs_inode(inode);
284
285 ubifs_assert(!ui->xattr);
286 if (is_bad_inode(inode))
287 return 0;
288
289 mutex_lock(&ui->ui_mutex);
290 /*
291 * Due to races between write-back forced by budgeting
292 * (see 'sync_some_inodes()') and pdflush write-back, the inode may
293 * have already been synchronized, do not do this again. This might
294 * also happen if it was synchronized in an VFS operation, e.g.
295 * 'ubifs_link()'.
296 */
297 if (!ui->dirty) {
298 mutex_unlock(&ui->ui_mutex);
299 return 0;
300 }
301
302 dbg_gen("inode %lu", inode->i_ino);
303 err = ubifs_jnl_write_inode(c, inode, 0);
304 if (err)
305 ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
306
307 ui->dirty = 0;
308 mutex_unlock(&ui->ui_mutex);
309 ubifs_release_dirty_inode_budget(c, ui);
310 return err;
311}
312
313static void ubifs_delete_inode(struct inode *inode)
314{
315 int err;
316 struct ubifs_info *c = inode->i_sb->s_fs_info;
317
318 if (ubifs_inode(inode)->xattr)
319 /*
320 * Extended attribute inode deletions are fully handled in
321 * 'ubifs_removexattr()'. These inodes are special and have
322 * limited usage, so there is nothing to do here.
323 */
324 goto out;
325
326 dbg_gen("inode %lu", inode->i_ino);
327 ubifs_assert(!atomic_read(&inode->i_count));
328 ubifs_assert(inode->i_nlink == 0);
329
330 truncate_inode_pages(&inode->i_data, 0);
331 if (is_bad_inode(inode))
332 goto out;
333
334 ubifs_inode(inode)->ui_size = inode->i_size = 0;
335 err = ubifs_jnl_write_inode(c, inode, 1);
336 if (err)
337 /*
338 * Worst case we have a lost orphan inode wasting space, so a
339 * simple error message is ok here.
340 */
341 ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
342out:
343 clear_inode(inode);
344}
345
346static void ubifs_dirty_inode(struct inode *inode)
347{
348 struct ubifs_inode *ui = ubifs_inode(inode);
349
350 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
351 if (!ui->dirty) {
352 ui->dirty = 1;
353 dbg_gen("inode %lu", inode->i_ino);
354 }
355}
356
357static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
358{
359 struct ubifs_info *c = dentry->d_sb->s_fs_info;
360 unsigned long long free;
361
362 free = ubifs_budg_get_free_space(c);
363 dbg_gen("free space %lld bytes (%lld blocks)",
364 free, free >> UBIFS_BLOCK_SHIFT);
365
366 buf->f_type = UBIFS_SUPER_MAGIC;
367 buf->f_bsize = UBIFS_BLOCK_SIZE;
368 buf->f_blocks = c->block_cnt;
369 buf->f_bfree = free >> UBIFS_BLOCK_SHIFT;
370 if (free > c->report_rp_size)
371 buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT;
372 else
373 buf->f_bavail = 0;
374 buf->f_files = 0;
375 buf->f_ffree = 0;
376 buf->f_namelen = UBIFS_MAX_NLEN;
377
378 return 0;
379}
380
381static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
382{
383 struct ubifs_info *c = mnt->mnt_sb->s_fs_info;
384
385 if (c->mount_opts.unmount_mode == 2)
386 seq_printf(s, ",fast_unmount");
387 else if (c->mount_opts.unmount_mode == 1)
388 seq_printf(s, ",norm_unmount");
389
390 return 0;
391}
392
393static int ubifs_sync_fs(struct super_block *sb, int wait)
394{
395 struct ubifs_info *c = sb->s_fs_info;
396 int i, ret = 0, err;
397
398 if (c->jheads)
399 for (i = 0; i < c->jhead_cnt; i++) {
400 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
401 if (err && !ret)
402 ret = err;
403 }
404 /*
405 * We ought to call sync for c->ubi but it does not have one. If it had
406 * it would in turn call mtd->sync, however mtd operations are
407 * synchronous anyway, so we don't lose any sleep here.
408 */
409 return ret;
410}
411
412/**
413 * init_constants_early - initialize UBIFS constants.
414 * @c: UBIFS file-system description object
415 *
416 * This function initialize UBIFS constants which do not need the superblock to
417 * be read. It also checks that the UBI volume satisfies basic UBIFS
418 * requirements. Returns zero in case of success and a negative error code in
419 * case of failure.
420 */
421static int init_constants_early(struct ubifs_info *c)
422{
423 if (c->vi.corrupted) {
424 ubifs_warn("UBI volume is corrupted - read-only mode");
425 c->ro_media = 1;
426 }
427
428 if (c->di.ro_mode) {
429 ubifs_msg("read-only UBI device");
430 c->ro_media = 1;
431 }
432
433 if (c->vi.vol_type == UBI_STATIC_VOLUME) {
434 ubifs_msg("static UBI volume - read-only mode");
435 c->ro_media = 1;
436 }
437
438 c->leb_cnt = c->vi.size;
439 c->leb_size = c->vi.usable_leb_size;
440 c->half_leb_size = c->leb_size / 2;
441 c->min_io_size = c->di.min_io_size;
442 c->min_io_shift = fls(c->min_io_size) - 1;
443
444 if (c->leb_size < UBIFS_MIN_LEB_SZ) {
445 ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
446 c->leb_size, UBIFS_MIN_LEB_SZ);
447 return -EINVAL;
448 }
449
450 if (c->leb_cnt < UBIFS_MIN_LEB_CNT) {
451 ubifs_err("too few LEBs (%d), min. is %d",
452 c->leb_cnt, UBIFS_MIN_LEB_CNT);
453 return -EINVAL;
454 }
455
456 if (!is_power_of_2(c->min_io_size)) {
457 ubifs_err("bad min. I/O size %d", c->min_io_size);
458 return -EINVAL;
459 }
460
461 /*
462 * UBIFS aligns all node to 8-byte boundary, so to make function in
463 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
464 * less than 8.
465 */
466 if (c->min_io_size < 8) {
467 c->min_io_size = 8;
468 c->min_io_shift = 3;
469 }
470
471 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
472 c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size);
473
474 /*
475 * Initialize node length ranges which are mostly needed for node
476 * length validation.
477 */
478 c->ranges[UBIFS_PAD_NODE].len = UBIFS_PAD_NODE_SZ;
479 c->ranges[UBIFS_SB_NODE].len = UBIFS_SB_NODE_SZ;
480 c->ranges[UBIFS_MST_NODE].len = UBIFS_MST_NODE_SZ;
481 c->ranges[UBIFS_REF_NODE].len = UBIFS_REF_NODE_SZ;
482 c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ;
483 c->ranges[UBIFS_CS_NODE].len = UBIFS_CS_NODE_SZ;
484
485 c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ;
486 c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ;
487 c->ranges[UBIFS_ORPH_NODE].min_len =
488 UBIFS_ORPH_NODE_SZ + sizeof(__le64);
489 c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size;
490 c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ;
491 c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ;
492 c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ;
493 c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ;
494 c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ;
495 c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ;
496 /*
497 * Minimum indexing node size is amended later when superblock is
498 * read and the key length is known.
499 */
500 c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ;
501 /*
502 * Maximum indexing node size is amended later when superblock is
503 * read and the fanout is known.
504 */
505 c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX;
506
507 /*
508 * Initialize dead and dark LEB space watermarks.
509 *
510 * Dead space is the space which cannot be used. Its watermark is
511 * equivalent to min. I/O unit or minimum node size if it is greater
512 * then min. I/O unit.
513 *
514 * Dark space is the space which might be used, or might not, depending
515 * on which node should be written to the LEB. Its watermark is
516 * equivalent to maximum UBIFS node size.
517 */
518 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
519 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
520
521 return 0;
522}
523
524/**
525 * bud_wbuf_callback - bud LEB write-buffer synchronization call-back.
526 * @c: UBIFS file-system description object
527 * @lnum: LEB the write-buffer was synchronized to
528 * @free: how many free bytes left in this LEB
529 * @pad: how many bytes were padded
530 *
531 * This is a callback function which is called by the I/O unit when the
532 * write-buffer is synchronized. We need this to correctly maintain space
533 * accounting in bud logical eraseblocks. This function returns zero in case of
534 * success and a negative error code in case of failure.
535 *
536 * This function actually belongs to the journal, but we keep it here because
537 * we want to keep it static.
538 */
539static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
540{
541 return ubifs_update_one_lp(c, lnum, free, pad, 0, 0);
542}
543
544/*
545 * init_constants_late - initialize UBIFS constants.
546 * @c: UBIFS file-system description object
547 *
548 * This is a helper function which initializes various UBIFS constants after
549 * the superblock has been read. It also checks various UBIFS parameters and
550 * makes sure they are all right. Returns zero in case of success and a
551 * negative error code in case of failure.
552 */
553static int init_constants_late(struct ubifs_info *c)
554{
555 int tmp, err;
556 uint64_t tmp64;
557
558 c->main_bytes = (long long)c->main_lebs * c->leb_size;
559 c->max_znode_sz = sizeof(struct ubifs_znode) +
560 c->fanout * sizeof(struct ubifs_zbranch);
561
562 tmp = ubifs_idx_node_sz(c, 1);
563 c->ranges[UBIFS_IDX_NODE].min_len = tmp;
564 c->min_idx_node_sz = ALIGN(tmp, 8);
565
566 tmp = ubifs_idx_node_sz(c, c->fanout);
567 c->ranges[UBIFS_IDX_NODE].max_len = tmp;
568 c->max_idx_node_sz = ALIGN(tmp, 8);
569
570 /* Make sure LEB size is large enough to fit full commit */
571 tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt;
572 tmp = ALIGN(tmp, c->min_io_size);
573 if (tmp > c->leb_size) {
574 dbg_err("too small LEB size %d, at least %d needed",
575 c->leb_size, tmp);
576 return -EINVAL;
577 }
578
579 /*
580 * Make sure that the log is large enough to fit reference nodes for
581 * all buds plus one reserved LEB.
582 */
583 tmp64 = c->max_bud_bytes;
584 tmp = do_div(tmp64, c->leb_size);
585 c->max_bud_cnt = tmp64 + !!tmp;
586 tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1);
587 tmp /= c->leb_size;
588 tmp += 1;
589 if (c->log_lebs < tmp) {
590 dbg_err("too small log %d LEBs, required min. %d LEBs",
591 c->log_lebs, tmp);
592 return -EINVAL;
593 }
594
595 /*
596 * When budgeting we assume worst-case scenarios when the pages are not
597 * be compressed and direntries are of the maximum size.
598 *
599 * Note, data, which may be stored in inodes is budgeted separately, so
600 * it is not included into 'c->inode_budget'.
601 */
602 c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
603 c->inode_budget = UBIFS_INO_NODE_SZ;
604 c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
605
606 /*
607 * When the amount of flash space used by buds becomes
608 * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit.
609 * The writers are unblocked when the commit is finished. To avoid
610 * writers to be blocked UBIFS initiates background commit in advance,
611 * when number of bud bytes becomes above the limit defined below.
612 */
613 c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4;
614
615 /*
616 * Ensure minimum journal size. All the bytes in the journal heads are
617 * considered to be used, when calculating the current journal usage.
618 * Consequently, if the journal is too small, UBIFS will treat it as
619 * always full.
620 */
621 tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1;
622 if (c->bg_bud_bytes < tmp64)
623 c->bg_bud_bytes = tmp64;
624 if (c->max_bud_bytes < tmp64 + c->leb_size)
625 c->max_bud_bytes = tmp64 + c->leb_size;
626
627 err = ubifs_calc_lpt_geom(c);
628 if (err)
629 return err;
630
631 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
632
633 /*
634 * Calculate total amount of FS blocks. This number is not used
635 * internally because it does not make much sense for UBIFS, but it is
636 * necessary to report something for the 'statfs()' call.
637 *
638 * Subtract the LEB reserved for GC and the LEB which is reserved for
639 * deletions.
640 *
641 * Review 'ubifs_calc_available()' if changing this calculation.
642 */
643 tmp64 = c->main_lebs - 2;
644 tmp64 *= (uint64_t)c->leb_size - c->dark_wm;
645 tmp64 = ubifs_reported_space(c, tmp64);
646 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
647
648 return 0;
649}
650
651/**
652 * take_gc_lnum - reserve GC LEB.
653 * @c: UBIFS file-system description object
654 *
655 * This function ensures that the LEB reserved for garbage collection is
656 * unmapped and is marked as "taken" in lprops. We also have to set free space
657 * to LEB size and dirty space to zero, because lprops may contain out-of-date
658 * information if the file-system was un-mounted before it has been committed.
659 * This function returns zero in case of success and a negative error code in
660 * case of failure.
661 */
662static int take_gc_lnum(struct ubifs_info *c)
663{
664 int err;
665
666 if (c->gc_lnum == -1) {
667 ubifs_err("no LEB for GC");
668 return -EINVAL;
669 }
670
671 err = ubifs_leb_unmap(c, c->gc_lnum);
672 if (err)
673 return err;
674
675 /* And we have to tell lprops that this LEB is taken */
676 err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0,
677 LPROPS_TAKEN, 0, 0);
678 return err;
679}
680
681/**
682 * alloc_wbufs - allocate write-buffers.
683 * @c: UBIFS file-system description object
684 *
685 * This helper function allocates and initializes UBIFS write-buffers. Returns
686 * zero in case of success and %-ENOMEM in case of failure.
687 */
688static int alloc_wbufs(struct ubifs_info *c)
689{
690 int i, err;
691
692 c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead),
693 GFP_KERNEL);
694 if (!c->jheads)
695 return -ENOMEM;
696
697 /* Initialize journal heads */
698 for (i = 0; i < c->jhead_cnt; i++) {
699 INIT_LIST_HEAD(&c->jheads[i].buds_list);
700 err = ubifs_wbuf_init(c, &c->jheads[i].wbuf);
701 if (err)
702 return err;
703
704 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
705 c->jheads[i].wbuf.jhead = i;
706 }
707
708 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
709 /*
710 * Garbage Collector head likely contains long-term data and
711 * does not need to be synchronized by timer.
712 */
713 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
714 c->jheads[GCHD].wbuf.timeout = 0;
715
716 return 0;
717}
718
719/**
720 * free_wbufs - free write-buffers.
721 * @c: UBIFS file-system description object
722 */
723static void free_wbufs(struct ubifs_info *c)
724{
725 int i;
726
727 if (c->jheads) {
728 for (i = 0; i < c->jhead_cnt; i++) {
729 kfree(c->jheads[i].wbuf.buf);
730 kfree(c->jheads[i].wbuf.inodes);
731 }
732 kfree(c->jheads);
733 c->jheads = NULL;
734 }
735}
736
737/**
738 * free_orphans - free orphans.
739 * @c: UBIFS file-system description object
740 */
741static void free_orphans(struct ubifs_info *c)
742{
743 struct ubifs_orphan *orph;
744
745 while (c->orph_dnext) {
746 orph = c->orph_dnext;
747 c->orph_dnext = orph->dnext;
748 list_del(&orph->list);
749 kfree(orph);
750 }
751
752 while (!list_empty(&c->orph_list)) {
753 orph = list_entry(c->orph_list.next, struct ubifs_orphan, list);
754 list_del(&orph->list);
755 kfree(orph);
756 dbg_err("orphan list not empty at unmount");
757 }
758
759 vfree(c->orph_buf);
760 c->orph_buf = NULL;
761}
762
763/**
764 * free_buds - free per-bud objects.
765 * @c: UBIFS file-system description object
766 */
767static void free_buds(struct ubifs_info *c)
768{
769 struct rb_node *this = c->buds.rb_node;
770 struct ubifs_bud *bud;
771
772 while (this) {
773 if (this->rb_left)
774 this = this->rb_left;
775 else if (this->rb_right)
776 this = this->rb_right;
777 else {
778 bud = rb_entry(this, struct ubifs_bud, rb);
779 this = rb_parent(this);
780 if (this) {
781 if (this->rb_left == &bud->rb)
782 this->rb_left = NULL;
783 else
784 this->rb_right = NULL;
785 }
786 kfree(bud);
787 }
788 }
789}
790
791/**
792 * check_volume_empty - check if the UBI volume is empty.
793 * @c: UBIFS file-system description object
794 *
795 * This function checks if the UBIFS volume is empty by looking if its LEBs are
796 * mapped or not. The result of checking is stored in the @c->empty variable.
797 * Returns zero in case of success and a negative error code in case of
798 * failure.
799 */
800static int check_volume_empty(struct ubifs_info *c)
801{
802 int lnum, err;
803
804 c->empty = 1;
805 for (lnum = 0; lnum < c->leb_cnt; lnum++) {
806 err = ubi_is_mapped(c->ubi, lnum);
807 if (unlikely(err < 0))
808 return err;
809 if (err == 1) {
810 c->empty = 0;
811 break;
812 }
813
814 cond_resched();
815 }
816
817 return 0;
818}
819
820/*
821 * UBIFS mount options.
822 *
823 * Opt_fast_unmount: do not run a journal commit before un-mounting
824 * Opt_norm_unmount: run a journal commit before un-mounting
825 * Opt_err: just end of array marker
826 */
827enum {
828 Opt_fast_unmount,
829 Opt_norm_unmount,
830 Opt_err,
831};
832
833static match_table_t tokens = {
834 {Opt_fast_unmount, "fast_unmount"},
835 {Opt_norm_unmount, "norm_unmount"},
836 {Opt_err, NULL},
837};
838
839/**
840 * ubifs_parse_options - parse mount parameters.
841 * @c: UBIFS file-system description object
842 * @options: parameters to parse
843 * @is_remount: non-zero if this is FS re-mount
844 *
845 * This function parses UBIFS mount options and returns zero in case success
846 * and a negative error code in case of failure.
847 */
848static int ubifs_parse_options(struct ubifs_info *c, char *options,
849 int is_remount)
850{
851 char *p;
852 substring_t args[MAX_OPT_ARGS];
853
854 if (!options)
855 return 0;
856
857 while ((p = strsep(&options, ","))) {
858 int token;
859
860 if (!*p)
861 continue;
862
863 token = match_token(p, tokens, args);
864 switch (token) {
865 case Opt_fast_unmount:
866 c->mount_opts.unmount_mode = 2;
867 c->fast_unmount = 1;
868 break;
869 case Opt_norm_unmount:
870 c->mount_opts.unmount_mode = 1;
871 c->fast_unmount = 0;
872 break;
873 default:
874 ubifs_err("unrecognized mount option \"%s\" "
875 "or missing value", p);
876 return -EINVAL;
877 }
878 }
879
880 return 0;
881}
882
883/**
884 * destroy_journal - destroy journal data structures.
885 * @c: UBIFS file-system description object
886 *
887 * This function destroys journal data structures including those that may have
888 * been created by recovery functions.
889 */
890static void destroy_journal(struct ubifs_info *c)
891{
892 while (!list_empty(&c->unclean_leb_list)) {
893 struct ubifs_unclean_leb *ucleb;
894
895 ucleb = list_entry(c->unclean_leb_list.next,
896 struct ubifs_unclean_leb, list);
897 list_del(&ucleb->list);
898 kfree(ucleb);
899 }
900 while (!list_empty(&c->old_buds)) {
901 struct ubifs_bud *bud;
902
903 bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
904 list_del(&bud->list);
905 kfree(bud);
906 }
907 ubifs_destroy_idx_gc(c);
908 ubifs_destroy_size_tree(c);
909 ubifs_tnc_close(c);
910 free_buds(c);
911}
912
913/**
914 * mount_ubifs - mount UBIFS file-system.
915 * @c: UBIFS file-system description object
916 *
917 * This function mounts UBIFS file system. Returns zero in case of success and
918 * a negative error code in case of failure.
919 *
920 * Note, the function does not de-allocate resources it it fails half way
921 * through, and the caller has to do this instead.
922 */
923static int mount_ubifs(struct ubifs_info *c)
924{
925 struct super_block *sb = c->vfs_sb;
926 int err, mounted_read_only = (sb->s_flags & MS_RDONLY);
927 long long x;
928 size_t sz;
929
930 err = init_constants_early(c);
931 if (err)
932 return err;
933
934#ifdef CONFIG_UBIFS_FS_DEBUG
935 c->dbg_buf = vmalloc(c->leb_size);
936 if (!c->dbg_buf)
937 return -ENOMEM;
938#endif
939
940 err = check_volume_empty(c);
941 if (err)
942 goto out_free;
943
944 if (c->empty && (mounted_read_only || c->ro_media)) {
945 /*
946 * This UBI volume is empty, and read-only, or the file system
947 * is mounted read-only - we cannot format it.
948 */
949 ubifs_err("can't format empty UBI volume: read-only %s",
950 c->ro_media ? "UBI volume" : "mount");
951 err = -EROFS;
952 goto out_free;
953 }
954
955 if (c->ro_media && !mounted_read_only) {
956 ubifs_err("cannot mount read-write - read-only media");
957 err = -EROFS;
958 goto out_free;
959 }
960
961 /*
962 * The requirement for the buffer is that it should fit indexing B-tree
963 * height amount of integers. We assume the height if the TNC tree will
964 * never exceed 64.
965 */
966 err = -ENOMEM;
967 c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL);
968 if (!c->bottom_up_buf)
969 goto out_free;
970
971 c->sbuf = vmalloc(c->leb_size);
972 if (!c->sbuf)
973 goto out_free;
974
975 if (!mounted_read_only) {
976 c->ileb_buf = vmalloc(c->leb_size);
977 if (!c->ileb_buf)
978 goto out_free;
979 }
980
981 err = ubifs_read_superblock(c);
982 if (err)
983 goto out_free;
984
985 /*
986 * Make sure the compressor which is set as the default on in the
987 * superblock was actually compiled in.
988 */
989 if (!ubifs_compr_present(c->default_compr)) {
990 ubifs_warn("'%s' compressor is set by superblock, but not "
991 "compiled in", ubifs_compr_name(c->default_compr));
992 c->default_compr = UBIFS_COMPR_NONE;
993 }
994
995 dbg_failure_mode_registration(c);
996
997 err = init_constants_late(c);
998 if (err)
999 goto out_dereg;
1000
1001 sz = ALIGN(c->max_idx_node_sz, c->min_io_size);
1002 sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);
1003 c->cbuf = kmalloc(sz, GFP_NOFS);
1004 if (!c->cbuf) {
1005 err = -ENOMEM;
1006 goto out_dereg;
1007 }
1008
1009 if (!mounted_read_only) {
1010 err = alloc_wbufs(c);
1011 if (err)
1012 goto out_cbuf;
1013
1014 /* Create background thread */
1015 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num,
1016 c->vi.vol_id);
1017 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1018 if (!c->bgt)
1019 c->bgt = ERR_PTR(-EINVAL);
1020 if (IS_ERR(c->bgt)) {
1021 err = PTR_ERR(c->bgt);
1022 c->bgt = NULL;
1023 ubifs_err("cannot spawn \"%s\", error %d",
1024 c->bgt_name, err);
1025 goto out_wbufs;
1026 }
1027 wake_up_process(c->bgt);
1028 }
1029
1030 err = ubifs_read_master(c);
1031 if (err)
1032 goto out_master;
1033
1034 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1035 ubifs_msg("recovery needed");
1036 c->need_recovery = 1;
1037 if (!mounted_read_only) {
1038 err = ubifs_recover_inl_heads(c, c->sbuf);
1039 if (err)
1040 goto out_master;
1041 }
1042 } else if (!mounted_read_only) {
1043 /*
1044 * Set the "dirty" flag so that if we reboot uncleanly we
1045 * will notice this immediately on the next mount.
1046 */
1047 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
1048 err = ubifs_write_master(c);
1049 if (err)
1050 goto out_master;
1051 }
1052
1053 err = ubifs_lpt_init(c, 1, !mounted_read_only);
1054 if (err)
1055 goto out_lpt;
1056
1057 err = dbg_check_idx_size(c, c->old_idx_sz);
1058 if (err)
1059 goto out_lpt;
1060
1061 err = ubifs_replay_journal(c);
1062 if (err)
1063 goto out_journal;
1064
1065 err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
1066 if (err)
1067 goto out_orphans;
1068
1069 if (!mounted_read_only) {
1070 int lnum;
1071
1072 /* Check for enough free space */
1073 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
1074 ubifs_err("insufficient available space");
1075 err = -EINVAL;
1076 goto out_orphans;
1077 }
1078
1079 /* Check for enough log space */
1080 lnum = c->lhead_lnum + 1;
1081 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs)
1082 lnum = UBIFS_LOG_LNUM;
1083 if (lnum == c->ltail_lnum) {
1084 err = ubifs_consolidate_log(c);
1085 if (err)
1086 goto out_orphans;
1087 }
1088
1089 if (c->need_recovery) {
1090 err = ubifs_recover_size(c);
1091 if (err)
1092 goto out_orphans;
1093 err = ubifs_rcvry_gc_commit(c);
1094 } else
1095 err = take_gc_lnum(c);
1096 if (err)
1097 goto out_orphans;
1098
1099 err = dbg_check_lprops(c);
1100 if (err)
1101 goto out_orphans;
1102 } else if (c->need_recovery) {
1103 err = ubifs_recover_size(c);
1104 if (err)
1105 goto out_orphans;
1106 }
1107
1108 spin_lock(&ubifs_infos_lock);
1109 list_add_tail(&c->infos_list, &ubifs_infos);
1110 spin_unlock(&ubifs_infos_lock);
1111
1112 if (c->need_recovery) {
1113 if (mounted_read_only)
1114 ubifs_msg("recovery deferred");
1115 else {
1116 c->need_recovery = 0;
1117 ubifs_msg("recovery completed");
1118 }
1119 }
1120
1121 err = dbg_check_filesystem(c);
1122 if (err)
1123 goto out_infos;
1124
1125 ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num,
1126 c->vi.vol_id);
1127 if (mounted_read_only)
1128 ubifs_msg("mounted read-only");
1129 x = (long long)c->main_lebs * c->leb_size;
1130 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
1131 x, x >> 10, x >> 20, c->main_lebs);
1132 x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
1133 ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
1134 x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
1135 ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
1136 ubifs_msg("media format %d, latest format %d",
1137 c->fmt_version, UBIFS_FORMAT_VERSION);
1138
1139 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
1140 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
1141 dbg_msg("LEB size: %d bytes (%d KiB)",
1142 c->leb_size, c->leb_size / 1024);
1143 dbg_msg("data journal heads: %d",
1144 c->jhead_cnt - NONDATA_JHEADS_CNT);
1145 dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X"
1146 "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
1147 c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3],
1148 c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
1149 c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
1150 c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
1151 dbg_msg("fast unmount: %d", c->fast_unmount);
1152 dbg_msg("big_lpt %d", c->big_lpt);
1153 dbg_msg("log LEBs: %d (%d - %d)",
1154 c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
1155 dbg_msg("LPT area LEBs: %d (%d - %d)",
1156 c->lpt_lebs, c->lpt_first, c->lpt_last);
1157 dbg_msg("orphan area LEBs: %d (%d - %d)",
1158 c->orph_lebs, c->orph_first, c->orph_last);
1159 dbg_msg("main area LEBs: %d (%d - %d)",
1160 c->main_lebs, c->main_first, c->leb_cnt - 1);
1161 dbg_msg("index LEBs: %d", c->lst.idx_lebs);
1162 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
1163 c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
1164 dbg_msg("key hash type: %d", c->key_hash_type);
1165 dbg_msg("tree fanout: %d", c->fanout);
1166 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
1167 dbg_msg("first main LEB: %d", c->main_first);
1168 dbg_msg("dead watermark: %d", c->dead_wm);
1169 dbg_msg("dark watermark: %d", c->dark_wm);
1170 x = (long long)c->main_lebs * c->dark_wm;
1171 dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)",
1172 x, x >> 10, x >> 20);
1173 dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)",
1174 c->max_bud_bytes, c->max_bud_bytes >> 10,
1175 c->max_bud_bytes >> 20);
1176 dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",
1177 c->bg_bud_bytes, c->bg_bud_bytes >> 10,
1178 c->bg_bud_bytes >> 20);
1179 dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)",
1180 c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20);
1181 dbg_msg("max. seq. number: %llu", c->max_sqnum);
1182 dbg_msg("commit number: %llu", c->cmt_no);
1183
1184 return 0;
1185
1186out_infos:
1187 spin_lock(&ubifs_infos_lock);
1188 list_del(&c->infos_list);
1189 spin_unlock(&ubifs_infos_lock);
1190out_orphans:
1191 free_orphans(c);
1192out_journal:
1193 destroy_journal(c);
1194out_lpt:
1195 ubifs_lpt_free(c, 0);
1196out_master:
1197 kfree(c->mst_node);
1198 kfree(c->rcvrd_mst_node);
1199 if (c->bgt)
1200 kthread_stop(c->bgt);
1201out_wbufs:
1202 free_wbufs(c);
1203out_cbuf:
1204 kfree(c->cbuf);
1205out_dereg:
1206 dbg_failure_mode_deregistration(c);
1207out_free:
1208 vfree(c->ileb_buf);
1209 vfree(c->sbuf);
1210 kfree(c->bottom_up_buf);
1211 UBIFS_DBG(vfree(c->dbg_buf));
1212 return err;
1213}
1214
1215/**
1216 * ubifs_umount - un-mount UBIFS file-system.
1217 * @c: UBIFS file-system description object
1218 *
1219 * Note, this function is called to free allocated resourced when un-mounting,
1220 * as well as free resources when an error occurred while we were half way
1221 * through mounting (error path cleanup function). So it has to make sure the
1222 * resource was actually allocated before freeing it.
1223 */
1224static void ubifs_umount(struct ubifs_info *c)
1225{
1226 dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num,
1227 c->vi.vol_id);
1228
1229 spin_lock(&ubifs_infos_lock);
1230 list_del(&c->infos_list);
1231 spin_unlock(&ubifs_infos_lock);
1232
1233 if (c->bgt)
1234 kthread_stop(c->bgt);
1235
1236 destroy_journal(c);
1237 free_wbufs(c);
1238 free_orphans(c);
1239 ubifs_lpt_free(c, 0);
1240
1241 kfree(c->cbuf);
1242 kfree(c->rcvrd_mst_node);
1243 kfree(c->mst_node);
1244 vfree(c->sbuf);
1245 kfree(c->bottom_up_buf);
1246 UBIFS_DBG(vfree(c->dbg_buf));
1247 vfree(c->ileb_buf);
1248 dbg_failure_mode_deregistration(c);
1249}
1250
1251/**
1252 * ubifs_remount_rw - re-mount in read-write mode.
1253 * @c: UBIFS file-system description object
1254 *
1255 * UBIFS avoids allocating many unnecessary resources when mounted in read-only
1256 * mode. This function allocates the needed resources and re-mounts UBIFS in
1257 * read-write mode.
1258 */
1259static int ubifs_remount_rw(struct ubifs_info *c)
1260{
1261 int err, lnum;
1262
1263 if (c->ro_media)
1264 return -EINVAL;
1265
1266 mutex_lock(&c->umount_mutex);
1267 c->remounting_rw = 1;
1268
1269 /* Check for enough free space */
1270 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
1271 ubifs_err("insufficient available space");
1272 err = -EINVAL;
1273 goto out;
1274 }
1275
1276 if (c->old_leb_cnt != c->leb_cnt) {
1277 struct ubifs_sb_node *sup;
1278
1279 sup = ubifs_read_sb_node(c);
1280 if (IS_ERR(sup)) {
1281 err = PTR_ERR(sup);
1282 goto out;
1283 }
1284 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
1285 err = ubifs_write_sb_node(c, sup);
1286 if (err)
1287 goto out;
1288 }
1289
1290 if (c->need_recovery) {
1291 ubifs_msg("completing deferred recovery");
1292 err = ubifs_write_rcvrd_mst_node(c);
1293 if (err)
1294 goto out;
1295 err = ubifs_recover_size(c);
1296 if (err)
1297 goto out;
1298 err = ubifs_clean_lebs(c, c->sbuf);
1299 if (err)
1300 goto out;
1301 err = ubifs_recover_inl_heads(c, c->sbuf);
1302 if (err)
1303 goto out;
1304 }
1305
1306 if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) {
1307 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
1308 err = ubifs_write_master(c);
1309 if (err)
1310 goto out;
1311 }
1312
1313 c->ileb_buf = vmalloc(c->leb_size);
1314 if (!c->ileb_buf) {
1315 err = -ENOMEM;
1316 goto out;
1317 }
1318
1319 err = ubifs_lpt_init(c, 0, 1);
1320 if (err)
1321 goto out;
1322
1323 err = alloc_wbufs(c);
1324 if (err)
1325 goto out;
1326
1327 ubifs_create_buds_lists(c);
1328
1329 /* Create background thread */
1330 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1331 if (!c->bgt)
1332 c->bgt = ERR_PTR(-EINVAL);
1333 if (IS_ERR(c->bgt)) {
1334 err = PTR_ERR(c->bgt);
1335 c->bgt = NULL;
1336 ubifs_err("cannot spawn \"%s\", error %d",
1337 c->bgt_name, err);
1338 return err;
1339 }
1340 wake_up_process(c->bgt);
1341
1342 c->orph_buf = vmalloc(c->leb_size);
1343 if (!c->orph_buf)
1344 return -ENOMEM;
1345
1346 /* Check for enough log space */
1347 lnum = c->lhead_lnum + 1;
1348 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs)
1349 lnum = UBIFS_LOG_LNUM;
1350 if (lnum == c->ltail_lnum) {
1351 err = ubifs_consolidate_log(c);
1352 if (err)
1353 goto out;
1354 }
1355
1356 if (c->need_recovery)
1357 err = ubifs_rcvry_gc_commit(c);
1358 else
1359 err = take_gc_lnum(c);
1360 if (err)
1361 goto out;
1362
1363 if (c->need_recovery) {
1364 c->need_recovery = 0;
1365 ubifs_msg("deferred recovery completed");
1366 }
1367
1368 dbg_gen("re-mounted read-write");
1369 c->vfs_sb->s_flags &= ~MS_RDONLY;
1370 c->remounting_rw = 0;
1371 mutex_unlock(&c->umount_mutex);
1372 return 0;
1373
1374out:
1375 vfree(c->orph_buf);
1376 c->orph_buf = NULL;
1377 if (c->bgt) {
1378 kthread_stop(c->bgt);
1379 c->bgt = NULL;
1380 }
1381 free_wbufs(c);
1382 vfree(c->ileb_buf);
1383 c->ileb_buf = NULL;
1384 ubifs_lpt_free(c, 1);
1385 c->remounting_rw = 0;
1386 mutex_unlock(&c->umount_mutex);
1387 return err;
1388}
1389
1390/**
1391 * commit_on_unmount - commit the journal when un-mounting.
1392 * @c: UBIFS file-system description object
1393 *
1394 * This function is called during un-mounting and it commits the journal unless
1395 * the "fast unmount" mode is enabled. It also avoids committing the journal if
1396 * it contains too few data.
1397 *
1398 * Sometimes recovery requires the journal to be committed at least once, and
1399 * this function takes care about this.
1400 */
1401static void commit_on_unmount(struct ubifs_info *c)
1402{
1403 if (!c->fast_unmount) {
1404 long long bud_bytes;
1405
1406 spin_lock(&c->buds_lock);
1407 bud_bytes = c->bud_bytes;
1408 spin_unlock(&c->buds_lock);
1409 if (bud_bytes > c->leb_size)
1410 ubifs_run_commit(c);
1411 }
1412}
1413
1414/**
1415 * ubifs_remount_ro - re-mount in read-only mode.
1416 * @c: UBIFS file-system description object
1417 *
1418 * We rely on VFS to have stopped writing. Possibly the background thread could
1419 * be running a commit, however kthread_stop will wait in that case.
1420 */
1421static void ubifs_remount_ro(struct ubifs_info *c)
1422{
1423 int i, err;
1424
1425 ubifs_assert(!c->need_recovery);
1426 commit_on_unmount(c);
1427
1428 mutex_lock(&c->umount_mutex);
1429 if (c->bgt) {
1430 kthread_stop(c->bgt);
1431 c->bgt = NULL;
1432 }
1433
1434 for (i = 0; i < c->jhead_cnt; i++) {
1435 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1436 del_timer_sync(&c->jheads[i].wbuf.timer);
1437 }
1438
1439 if (!c->ro_media) {
1440 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
1441 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
1442 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
1443 err = ubifs_write_master(c);
1444 if (err)
1445 ubifs_ro_mode(c, err);
1446 }
1447
1448 ubifs_destroy_idx_gc(c);
1449 free_wbufs(c);
1450 vfree(c->orph_buf);
1451 c->orph_buf = NULL;
1452 vfree(c->ileb_buf);
1453 c->ileb_buf = NULL;
1454 ubifs_lpt_free(c, 1);
1455 mutex_unlock(&c->umount_mutex);
1456}
1457
1458static void ubifs_put_super(struct super_block *sb)
1459{
1460 int i;
1461 struct ubifs_info *c = sb->s_fs_info;
1462
1463 ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
1464 c->vi.vol_id);
1465 /*
1466 * The following asserts are only valid if there has not been a failure
1467 * of the media. For example, there will be dirty inodes if we failed
1468 * to write them back because of I/O errors.
1469 */
1470 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
1471 ubifs_assert(c->budg_idx_growth == 0);
1472 ubifs_assert(c->budg_data_growth == 0);
1473
1474 /*
1475 * The 'c->umount_lock' prevents races between UBIFS memory shrinker
1476 * and file system un-mount. Namely, it prevents the shrinker from
1477 * picking this superblock for shrinking - it will be just skipped if
1478 * the mutex is locked.
1479 */
1480 mutex_lock(&c->umount_mutex);
1481 if (!(c->vfs_sb->s_flags & MS_RDONLY)) {
1482 /*
1483 * First of all kill the background thread to make sure it does
1484 * not interfere with un-mounting and freeing resources.
1485 */
1486 if (c->bgt) {
1487 kthread_stop(c->bgt);
1488 c->bgt = NULL;
1489 }
1490
1491 /* Synchronize write-buffers */
1492 if (c->jheads)
1493 for (i = 0; i < c->jhead_cnt; i++) {
1494 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1495 del_timer_sync(&c->jheads[i].wbuf.timer);
1496 }
1497
1498 /*
1499 * On fatal errors c->ro_media is set to 1, in which case we do
1500 * not write the master node.
1501 */
1502 if (!c->ro_media) {
1503 /*
1504 * We are being cleanly unmounted which means the
1505 * orphans were killed - indicate this in the master
1506 * node. Also save the reserved GC LEB number.
1507 */
1508 int err;
1509
1510 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
1511 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
1512 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
1513 err = ubifs_write_master(c);
1514 if (err)
1515 /*
1516 * Recovery will attempt to fix the master area
1517 * next mount, so we just print a message and
1518 * continue to unmount normally.
1519 */
1520 ubifs_err("failed to write master node, "
1521 "error %d", err);
1522 }
1523 }
1524
1525 ubifs_umount(c);
1526 bdi_destroy(&c->bdi);
1527 ubi_close_volume(c->ubi);
1528 mutex_unlock(&c->umount_mutex);
1529 kfree(c);
1530}
1531
1532static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1533{
1534 int err;
1535 struct ubifs_info *c = sb->s_fs_info;
1536
1537 dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags);
1538
1539 err = ubifs_parse_options(c, data, 1);
1540 if (err) {
1541 ubifs_err("invalid or unknown remount parameter");
1542 return err;
1543 }
1544 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
1545 err = ubifs_remount_rw(c);
1546 if (err)
1547 return err;
1548 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
1549 ubifs_remount_ro(c);
1550
1551 return 0;
1552}
1553
1554struct super_operations ubifs_super_operations = {
1555 .alloc_inode = ubifs_alloc_inode,
1556 .destroy_inode = ubifs_destroy_inode,
1557 .put_super = ubifs_put_super,
1558 .write_inode = ubifs_write_inode,
1559 .delete_inode = ubifs_delete_inode,
1560 .statfs = ubifs_statfs,
1561 .dirty_inode = ubifs_dirty_inode,
1562 .remount_fs = ubifs_remount_fs,
1563 .show_options = ubifs_show_options,
1564 .sync_fs = ubifs_sync_fs,
1565};
1566
1567/**
1568 * open_ubi - parse UBI device name string and open the UBI device.
1569 * @name: UBI volume name
1570 * @mode: UBI volume open mode
1571 *
1572 * There are several ways to specify UBI volumes when mounting UBIFS:
1573 * o ubiX_Y - UBI device number X, volume Y;
1574 * o ubiY - UBI device number 0, volume Y;
1575 * o ubiX:NAME - mount UBI device X, volume with name NAME;
1576 * o ubi:NAME - mount UBI device 0, volume with name NAME.
1577 *
1578 * Alternative '!' separator may be used instead of ':' (because some shells
1579 * like busybox may interpret ':' as an NFS host name separator). This function
1580 * returns ubi volume object in case of success and a negative error code in
1581 * case of failure.
1582 */
1583static struct ubi_volume_desc *open_ubi(const char *name, int mode)
1584{
1585 int dev, vol;
1586 char *endptr;
1587
1588 if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
1589 return ERR_PTR(-EINVAL);
1590
1591 /* ubi:NAME method */
1592 if ((name[3] == ':' || name[3] == '!') && name[4] != '\0')
1593 return ubi_open_volume_nm(0, name + 4, mode);
1594
1595 if (!isdigit(name[3]))
1596 return ERR_PTR(-EINVAL);
1597
1598 dev = simple_strtoul(name + 3, &endptr, 0);
1599
1600 /* ubiY method */
1601 if (*endptr == '\0')
1602 return ubi_open_volume(0, dev, mode);
1603
1604 /* ubiX_Y method */
1605 if (*endptr == '_' && isdigit(endptr[1])) {
1606 vol = simple_strtoul(endptr + 1, &endptr, 0);
1607 if (*endptr != '\0')
1608 return ERR_PTR(-EINVAL);
1609 return ubi_open_volume(dev, vol, mode);
1610 }
1611
1612 /* ubiX:NAME method */
1613 if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0')
1614 return ubi_open_volume_nm(dev, ++endptr, mode);
1615
1616 return ERR_PTR(-EINVAL);
1617}
1618
1619static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1620{
1621 struct ubi_volume_desc *ubi = sb->s_fs_info;
1622 struct ubifs_info *c;
1623 struct inode *root;
1624 int err;
1625
1626 c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
1627 if (!c)
1628 return -ENOMEM;
1629
1630 spin_lock_init(&c->cnt_lock);
1631 spin_lock_init(&c->cs_lock);
1632 spin_lock_init(&c->buds_lock);
1633 spin_lock_init(&c->space_lock);
1634 spin_lock_init(&c->orphan_lock);
1635 init_rwsem(&c->commit_sem);
1636 mutex_init(&c->lp_mutex);
1637 mutex_init(&c->tnc_mutex);
1638 mutex_init(&c->log_mutex);
1639 mutex_init(&c->mst_mutex);
1640 mutex_init(&c->umount_mutex);
1641 init_waitqueue_head(&c->cmt_wq);
1642 c->buds = RB_ROOT;
1643 c->old_idx = RB_ROOT;
1644 c->size_tree = RB_ROOT;
1645 c->orph_tree = RB_ROOT;
1646 INIT_LIST_HEAD(&c->infos_list);
1647 INIT_LIST_HEAD(&c->idx_gc);
1648 INIT_LIST_HEAD(&c->replay_list);
1649 INIT_LIST_HEAD(&c->replay_buds);
1650 INIT_LIST_HEAD(&c->uncat_list);
1651 INIT_LIST_HEAD(&c->empty_list);
1652 INIT_LIST_HEAD(&c->freeable_list);
1653 INIT_LIST_HEAD(&c->frdi_idx_list);
1654 INIT_LIST_HEAD(&c->unclean_leb_list);
1655 INIT_LIST_HEAD(&c->old_buds);
1656 INIT_LIST_HEAD(&c->orph_list);
1657 INIT_LIST_HEAD(&c->orph_new);
1658
1659 c->highest_inum = UBIFS_FIRST_INO;
1660 get_random_bytes(&c->vfs_gen, sizeof(int));
1661 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
1662
1663 ubi_get_volume_info(ubi, &c->vi);
1664 ubi_get_device_info(c->vi.ubi_num, &c->di);
1665
1666 /* Re-open the UBI device in read-write mode */
1667 c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
1668 if (IS_ERR(c->ubi)) {
1669 err = PTR_ERR(c->ubi);
1670 goto out_free;
1671 }
1672
1673 /*
1674 * UBIFS provids 'backing_dev_info' in order to disable readahead. For
1675 * UBIFS, I/O is not deferred, it is done immediately in readpage,
1676 * which means the user would have to wait not just for their own I/O
1677 * but the readahead I/O as well i.e. completely pointless.
1678 *
1679 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
1680 */
1681 c->bdi.capabilities = BDI_CAP_MAP_COPY;
1682 c->bdi.unplug_io_fn = default_unplug_io_fn;
1683 err = bdi_init(&c->bdi);
1684 if (err)
1685 goto out_close;
1686
1687 err = ubifs_parse_options(c, data, 0);
1688 if (err)
1689 goto out_bdi;
1690
1691 c->vfs_sb = sb;
1692
1693 sb->s_fs_info = c;
1694 sb->s_magic = UBIFS_SUPER_MAGIC;
1695 sb->s_blocksize = UBIFS_BLOCK_SIZE;
1696 sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT;
1697 sb->s_dev = c->vi.cdev;
1698 sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c);
1699 if (c->max_inode_sz > MAX_LFS_FILESIZE)
1700 sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
1701 sb->s_op = &ubifs_super_operations;
1702
1703 mutex_lock(&c->umount_mutex);
1704 err = mount_ubifs(c);
1705 if (err) {
1706 ubifs_assert(err < 0);
1707 goto out_unlock;
1708 }
1709
1710 /* Read the root inode */
1711 root = ubifs_iget(sb, UBIFS_ROOT_INO);
1712 if (IS_ERR(root)) {
1713 err = PTR_ERR(root);
1714 goto out_umount;
1715 }
1716
1717 sb->s_root = d_alloc_root(root);
1718 if (!sb->s_root)
1719 goto out_iput;
1720
1721 mutex_unlock(&c->umount_mutex);
1722
1723 return 0;
1724
1725out_iput:
1726 iput(root);
1727out_umount:
1728 ubifs_umount(c);
1729out_unlock:
1730 mutex_unlock(&c->umount_mutex);
1731out_bdi:
1732 bdi_destroy(&c->bdi);
1733out_close:
1734 ubi_close_volume(c->ubi);
1735out_free:
1736 kfree(c);
1737 return err;
1738}
1739
1740static int sb_test(struct super_block *sb, void *data)
1741{
1742 dev_t *dev = data;
1743
1744 return sb->s_dev == *dev;
1745}
1746
1747static int sb_set(struct super_block *sb, void *data)
1748{
1749 dev_t *dev = data;
1750
1751 sb->s_dev = *dev;
1752 return 0;
1753}
1754
1755static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
1756 const char *name, void *data, struct vfsmount *mnt)
1757{
1758 struct ubi_volume_desc *ubi;
1759 struct ubi_volume_info vi;
1760 struct super_block *sb;
1761 int err;
1762
1763 dbg_gen("name %s, flags %#x", name, flags);
1764
1765 /*
1766 * Get UBI device number and volume ID. Mount it read-only so far
1767 * because this might be a new mount point, and UBI allows only one
1768 * read-write user at a time.
1769 */
1770 ubi = open_ubi(name, UBI_READONLY);
1771 if (IS_ERR(ubi)) {
1772 ubifs_err("cannot open \"%s\", error %d",
1773 name, (int)PTR_ERR(ubi));
1774 return PTR_ERR(ubi);
1775 }
1776 ubi_get_volume_info(ubi, &vi);
1777
1778 dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
1779
1780 sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev);
1781 if (IS_ERR(sb)) {
1782 err = PTR_ERR(sb);
1783 goto out_close;
1784 }
1785
1786 if (sb->s_root) {
1787 /* A new mount point for already mounted UBIFS */
1788 dbg_gen("this ubi volume is already mounted");
1789 if ((flags ^ sb->s_flags) & MS_RDONLY) {
1790 err = -EBUSY;
1791 goto out_deact;
1792 }
1793 } else {
1794 sb->s_flags = flags;
1795 /*
1796 * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
1797 * replaced by 'c'.
1798 */
1799 sb->s_fs_info = ubi;
1800 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
1801 if (err)
1802 goto out_deact;
1803 /* We do not support atime */
1804 sb->s_flags |= MS_ACTIVE | MS_NOATIME;
1805 }
1806
1807 /* 'fill_super()' opens ubi again so we must close it here */
1808 ubi_close_volume(ubi);
1809
1810 return simple_set_mnt(mnt, sb);
1811
1812out_deact:
1813 up_write(&sb->s_umount);
1814 deactivate_super(sb);
1815out_close:
1816 ubi_close_volume(ubi);
1817 return err;
1818}
1819
1820static void ubifs_kill_sb(struct super_block *sb)
1821{
1822 struct ubifs_info *c = sb->s_fs_info;
1823
1824 /*
1825 * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
1826 * in order to be outside BKL.
1827 */
1828 if (sb->s_root && !(sb->s_flags & MS_RDONLY))
1829 commit_on_unmount(c);
1830 /* The un-mount routine is actually done in put_super() */
1831 generic_shutdown_super(sb);
1832}
1833
1834static struct file_system_type ubifs_fs_type = {
1835 .name = "ubifs",
1836 .owner = THIS_MODULE,
1837 .get_sb = ubifs_get_sb,
1838 .kill_sb = ubifs_kill_sb
1839};
1840
1841/*
1842 * Inode slab cache constructor.
1843 */
1844static void inode_slab_ctor(struct kmem_cache *cachep, void *obj)
1845{
1846 struct ubifs_inode *ui = obj;
1847 inode_init_once(&ui->vfs_inode);
1848}
1849
1850static int __init ubifs_init(void)
1851{
1852 int err;
1853
1854 BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24);
1855
1856 /* Make sure node sizes are 8-byte aligned */
1857 BUILD_BUG_ON(UBIFS_CH_SZ & 7);
1858 BUILD_BUG_ON(UBIFS_INO_NODE_SZ & 7);
1859 BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7);
1860 BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7);
1861 BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7);
1862 BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7);
1863 BUILD_BUG_ON(UBIFS_SB_NODE_SZ & 7);
1864 BUILD_BUG_ON(UBIFS_MST_NODE_SZ & 7);
1865 BUILD_BUG_ON(UBIFS_REF_NODE_SZ & 7);
1866 BUILD_BUG_ON(UBIFS_CS_NODE_SZ & 7);
1867 BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7);
1868
1869 BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7);
1870 BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7);
1871 BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7);
1872 BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ & 7);
1873 BUILD_BUG_ON(UBIFS_MAX_NODE_SZ & 7);
1874 BUILD_BUG_ON(MIN_WRITE_SZ & 7);
1875
1876 /* Check min. node size */
1877 BUILD_BUG_ON(UBIFS_INO_NODE_SZ < MIN_WRITE_SZ);
1878 BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ);
1879 BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ);
1880 BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ);
1881
1882 BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ);
1883 BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ);
1884 BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ);
1885 BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ > UBIFS_MAX_NODE_SZ);
1886
1887 /* Defined node sizes */
1888 BUILD_BUG_ON(UBIFS_SB_NODE_SZ != 4096);
1889 BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512);
1890 BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160);
1891 BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64);
1892
1893 /*
1894 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
1895 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
1896 */
1897 if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
1898 ubifs_err("VFS page cache size is %u bytes, but UBIFS requires"
1899 " at least 4096 bytes",
1900 (unsigned int)PAGE_CACHE_SIZE);
1901 return -EINVAL;
1902 }
1903
1904 err = register_filesystem(&ubifs_fs_type);
1905 if (err) {
1906 ubifs_err("cannot register file system, error %d", err);
1907 return err;
1908 }
1909
1910 err = -ENOMEM;
1911 ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
1912 sizeof(struct ubifs_inode), 0,
1913 SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
1914 &inode_slab_ctor);
1915 if (!ubifs_inode_slab)
1916 goto out_reg;
1917
1918 register_shrinker(&ubifs_shrinker_info);
1919
1920 err = ubifs_compressors_init();
1921 if (err)
1922 goto out_compr;
1923
1924 return 0;
1925
1926out_compr:
1927 unregister_shrinker(&ubifs_shrinker_info);
1928 kmem_cache_destroy(ubifs_inode_slab);
1929out_reg:
1930 unregister_filesystem(&ubifs_fs_type);
1931 return err;
1932}
1933/* late_initcall to let compressors initialize first */
1934late_initcall(ubifs_init);
1935
1936static void __exit ubifs_exit(void)
1937{
1938 ubifs_assert(list_empty(&ubifs_infos));
1939 ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
1940
1941 ubifs_compressors_exit();
1942 unregister_shrinker(&ubifs_shrinker_info);
1943 kmem_cache_destroy(ubifs_inode_slab);
1944 unregister_filesystem(&ubifs_fs_type);
1945}
1946module_exit(ubifs_exit);
1947
1948MODULE_LICENSE("GPL");
1949MODULE_VERSION(__stringify(UBIFS_VERSION));
1950MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter");
1951MODULE_DESCRIPTION("UBIFS - UBI File System");
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
new file mode 100644
index 000000000000..e909f4a96443
--- /dev/null
+++ b/fs/ubifs/tnc.c
@@ -0,0 +1,2956 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements TNC (Tree Node Cache) which caches indexing nodes of
25 * the UBIFS B-tree.
26 *
27 * At the moment the locking rules of the TNC tree are quite simple and
28 * straightforward. We just have a mutex and lock it when we traverse the
29 * tree. If a znode is not in memory, we read it from flash while still having
30 * the mutex locked.
31 */
32
33#include <linux/crc32.h>
34#include "ubifs.h"
35
36/*
37 * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions.
38 * @NAME_LESS: name corresponding to the first argument is less than second
39 * @NAME_MATCHES: names match
40 * @NAME_GREATER: name corresponding to the second argument is greater than
41 * first
42 * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media
43 *
44 * These constants were introduce to improve readability.
45 */
46enum {
47 NAME_LESS = 0,
48 NAME_MATCHES = 1,
49 NAME_GREATER = 2,
50 NOT_ON_MEDIA = 3,
51};
52
53/**
54 * insert_old_idx - record an index node obsoleted since the last commit start.
55 * @c: UBIFS file-system description object
56 * @lnum: LEB number of obsoleted index node
57 * @offs: offset of obsoleted index node
58 *
59 * Returns %0 on success, and a negative error code on failure.
60 *
61 * For recovery, there must always be a complete intact version of the index on
62 * flash at all times. That is called the "old index". It is the index as at the
63 * time of the last successful commit. Many of the index nodes in the old index
64 * may be dirty, but they must not be erased until the next successful commit
65 * (at which point that index becomes the old index).
66 *
67 * That means that the garbage collection and the in-the-gaps method of
68 * committing must be able to determine if an index node is in the old index.
69 * Most of the old index nodes can be found by looking up the TNC using the
70 * 'lookup_znode()' function. However, some of the old index nodes may have
71 * been deleted from the current index or may have been changed so much that
72 * they cannot be easily found. In those cases, an entry is added to an RB-tree.
73 * That is what this function does. The RB-tree is ordered by LEB number and
74 * offset because they uniquely identify the old index node.
75 */
76static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
77{
78 struct ubifs_old_idx *old_idx, *o;
79 struct rb_node **p, *parent = NULL;
80
81 old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
82 if (unlikely(!old_idx))
83 return -ENOMEM;
84 old_idx->lnum = lnum;
85 old_idx->offs = offs;
86
87 p = &c->old_idx.rb_node;
88 while (*p) {
89 parent = *p;
90 o = rb_entry(parent, struct ubifs_old_idx, rb);
91 if (lnum < o->lnum)
92 p = &(*p)->rb_left;
93 else if (lnum > o->lnum)
94 p = &(*p)->rb_right;
95 else if (offs < o->offs)
96 p = &(*p)->rb_left;
97 else if (offs > o->offs)
98 p = &(*p)->rb_right;
99 else {
100 ubifs_err("old idx added twice!");
101 kfree(old_idx);
102 return 0;
103 }
104 }
105 rb_link_node(&old_idx->rb, parent, p);
106 rb_insert_color(&old_idx->rb, &c->old_idx);
107 return 0;
108}
109
110/**
111 * insert_old_idx_znode - record a znode obsoleted since last commit start.
112 * @c: UBIFS file-system description object
113 * @znode: znode of obsoleted index node
114 *
115 * Returns %0 on success, and a negative error code on failure.
116 */
117int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode)
118{
119 if (znode->parent) {
120 struct ubifs_zbranch *zbr;
121
122 zbr = &znode->parent->zbranch[znode->iip];
123 if (zbr->len)
124 return insert_old_idx(c, zbr->lnum, zbr->offs);
125 } else
126 if (c->zroot.len)
127 return insert_old_idx(c, c->zroot.lnum,
128 c->zroot.offs);
129 return 0;
130}
131
132/**
133 * ins_clr_old_idx_znode - record a znode obsoleted since last commit start.
134 * @c: UBIFS file-system description object
135 * @znode: znode of obsoleted index node
136 *
137 * Returns %0 on success, and a negative error code on failure.
138 */
139static int ins_clr_old_idx_znode(struct ubifs_info *c,
140 struct ubifs_znode *znode)
141{
142 int err;
143
144 if (znode->parent) {
145 struct ubifs_zbranch *zbr;
146
147 zbr = &znode->parent->zbranch[znode->iip];
148 if (zbr->len) {
149 err = insert_old_idx(c, zbr->lnum, zbr->offs);
150 if (err)
151 return err;
152 zbr->lnum = 0;
153 zbr->offs = 0;
154 zbr->len = 0;
155 }
156 } else
157 if (c->zroot.len) {
158 err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs);
159 if (err)
160 return err;
161 c->zroot.lnum = 0;
162 c->zroot.offs = 0;
163 c->zroot.len = 0;
164 }
165 return 0;
166}
167
168/**
169 * destroy_old_idx - destroy the old_idx RB-tree.
170 * @c: UBIFS file-system description object
171 *
172 * During start commit, the old_idx RB-tree is used to avoid overwriting index
173 * nodes that were in the index last commit but have since been deleted. This
174 * is necessary for recovery i.e. the old index must be kept intact until the
175 * new index is successfully written. The old-idx RB-tree is used for the
176 * in-the-gaps method of writing index nodes and is destroyed every commit.
177 */
178void destroy_old_idx(struct ubifs_info *c)
179{
180 struct rb_node *this = c->old_idx.rb_node;
181 struct ubifs_old_idx *old_idx;
182
183 while (this) {
184 if (this->rb_left) {
185 this = this->rb_left;
186 continue;
187 } else if (this->rb_right) {
188 this = this->rb_right;
189 continue;
190 }
191 old_idx = rb_entry(this, struct ubifs_old_idx, rb);
192 this = rb_parent(this);
193 if (this) {
194 if (this->rb_left == &old_idx->rb)
195 this->rb_left = NULL;
196 else
197 this->rb_right = NULL;
198 }
199 kfree(old_idx);
200 }
201 c->old_idx = RB_ROOT;
202}
203
204/**
205 * copy_znode - copy a dirty znode.
206 * @c: UBIFS file-system description object
207 * @znode: znode to copy
208 *
209 * A dirty znode being committed may not be changed, so it is copied.
210 */
211static struct ubifs_znode *copy_znode(struct ubifs_info *c,
212 struct ubifs_znode *znode)
213{
214 struct ubifs_znode *zn;
215
216 zn = kmalloc(c->max_znode_sz, GFP_NOFS);
217 if (unlikely(!zn))
218 return ERR_PTR(-ENOMEM);
219
220 memcpy(zn, znode, c->max_znode_sz);
221 zn->cnext = NULL;
222 __set_bit(DIRTY_ZNODE, &zn->flags);
223 __clear_bit(COW_ZNODE, &zn->flags);
224
225 ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
226 __set_bit(OBSOLETE_ZNODE, &znode->flags);
227
228 if (znode->level != 0) {
229 int i;
230 const int n = zn->child_cnt;
231
232 /* The children now have new parent */
233 for (i = 0; i < n; i++) {
234 struct ubifs_zbranch *zbr = &zn->zbranch[i];
235
236 if (zbr->znode)
237 zbr->znode->parent = zn;
238 }
239 }
240
241 atomic_long_inc(&c->dirty_zn_cnt);
242 return zn;
243}
244
245/**
246 * add_idx_dirt - add dirt due to a dirty znode.
247 * @c: UBIFS file-system description object
248 * @lnum: LEB number of index node
249 * @dirt: size of index node
250 *
251 * This function updates lprops dirty space and the new size of the index.
252 */
253static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt)
254{
255 c->calc_idx_sz -= ALIGN(dirt, 8);
256 return ubifs_add_dirt(c, lnum, dirt);
257}
258
259/**
260 * dirty_cow_znode - ensure a znode is not being committed.
261 * @c: UBIFS file-system description object
262 * @zbr: branch of znode to check
263 *
264 * Returns dirtied znode on success or negative error code on failure.
265 */
266static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
267 struct ubifs_zbranch *zbr)
268{
269 struct ubifs_znode *znode = zbr->znode;
270 struct ubifs_znode *zn;
271 int err;
272
273 if (!test_bit(COW_ZNODE, &znode->flags)) {
274 /* znode is not being committed */
275 if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
276 atomic_long_inc(&c->dirty_zn_cnt);
277 atomic_long_dec(&c->clean_zn_cnt);
278 atomic_long_dec(&ubifs_clean_zn_cnt);
279 err = add_idx_dirt(c, zbr->lnum, zbr->len);
280 if (unlikely(err))
281 return ERR_PTR(err);
282 }
283 return znode;
284 }
285
286 zn = copy_znode(c, znode);
287 if (unlikely(IS_ERR(zn)))
288 return zn;
289
290 if (zbr->len) {
291 err = insert_old_idx(c, zbr->lnum, zbr->offs);
292 if (unlikely(err))
293 return ERR_PTR(err);
294 err = add_idx_dirt(c, zbr->lnum, zbr->len);
295 } else
296 err = 0;
297
298 zbr->znode = zn;
299 zbr->lnum = 0;
300 zbr->offs = 0;
301 zbr->len = 0;
302
303 if (unlikely(err))
304 return ERR_PTR(err);
305 return zn;
306}
307
308/**
309 * lnc_add - add a leaf node to the leaf node cache.
310 * @c: UBIFS file-system description object
311 * @zbr: zbranch of leaf node
312 * @node: leaf node
313 *
314 * Leaf nodes are non-index nodes directory entry nodes or data nodes. The
315 * purpose of the leaf node cache is to save re-reading the same leaf node over
316 * and over again. Most things are cached by VFS, however the file system must
317 * cache directory entries for readdir and for resolving hash collisions. The
318 * present implementation of the leaf node cache is extremely simple, and
319 * allows for error returns that are not used but that may be needed if a more
320 * complex implementation is created.
321 *
322 * Note, this function does not add the @node object to LNC directly, but
323 * allocates a copy of the object and adds the copy to LNC. The reason for this
324 * is that @node has been allocated outside of the TNC subsystem and will be
325 * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC
326 * may be changed at any time, e.g. freed by the shrinker.
327 */
328static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
329 const void *node)
330{
331 int err;
332 void *lnc_node;
333 const struct ubifs_dent_node *dent = node;
334
335 ubifs_assert(!zbr->leaf);
336 ubifs_assert(zbr->len != 0);
337 ubifs_assert(is_hash_key(c, &zbr->key));
338
339 err = ubifs_validate_entry(c, dent);
340 if (err) {
341 dbg_dump_stack();
342 dbg_dump_node(c, dent);
343 return err;
344 }
345
346 lnc_node = kmalloc(zbr->len, GFP_NOFS);
347 if (!lnc_node)
348 /* We don't have to have the cache, so no error */
349 return 0;
350
351 memcpy(lnc_node, node, zbr->len);
352 zbr->leaf = lnc_node;
353 return 0;
354}
355
356 /**
357 * lnc_add_directly - add a leaf node to the leaf-node-cache.
358 * @c: UBIFS file-system description object
359 * @zbr: zbranch of leaf node
360 * @node: leaf node
361 *
362 * This function is similar to 'lnc_add()', but it does not create a copy of
363 * @node but inserts @node to TNC directly.
364 */
365static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
366 void *node)
367{
368 int err;
369
370 ubifs_assert(!zbr->leaf);
371 ubifs_assert(zbr->len != 0);
372
373 err = ubifs_validate_entry(c, node);
374 if (err) {
375 dbg_dump_stack();
376 dbg_dump_node(c, node);
377 return err;
378 }
379
380 zbr->leaf = node;
381 return 0;
382}
383
384/**
385 * lnc_free - remove a leaf node from the leaf node cache.
386 * @zbr: zbranch of leaf node
387 * @node: leaf node
388 */
389static void lnc_free(struct ubifs_zbranch *zbr)
390{
391 if (!zbr->leaf)
392 return;
393 kfree(zbr->leaf);
394 zbr->leaf = NULL;
395}
396
397/**
398 * tnc_read_node_nm - read a "hashed" leaf node.
399 * @c: UBIFS file-system description object
400 * @zbr: key and position of the node
401 * @node: node is returned here
402 *
403 * This function reads a "hashed" node defined by @zbr from the leaf node cache
404 * (in it is there) or from the hash media, in which case the node is also
405 * added to LNC. Returns zero in case of success or a negative negative error
406 * code in case of failure.
407 */
408static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
409 void *node)
410{
411 int err;
412
413 ubifs_assert(is_hash_key(c, &zbr->key));
414
415 if (zbr->leaf) {
416 /* Read from the leaf node cache */
417 ubifs_assert(zbr->len != 0);
418 memcpy(node, zbr->leaf, zbr->len);
419 return 0;
420 }
421
422 err = ubifs_tnc_read_node(c, zbr, node);
423 if (err)
424 return err;
425
426 /* Add the node to the leaf node cache */
427 err = lnc_add(c, zbr, node);
428 return err;
429}
430
431/**
432 * try_read_node - read a node if it is a node.
433 * @c: UBIFS file-system description object
434 * @buf: buffer to read to
435 * @type: node type
436 * @len: node length (not aligned)
437 * @lnum: LEB number of node to read
438 * @offs: offset of node to read
439 *
440 * This function tries to read a node of known type and length, checks it and
441 * stores it in @buf. This function returns %1 if a node is present and %0 if
442 * a node is not present. A negative error code is returned for I/O errors.
443 * This function performs that same function as ubifs_read_node except that
444 * it does not require that there is actually a node present and instead
445 * the return code indicates if a node was read.
446 */
447static int try_read_node(const struct ubifs_info *c, void *buf, int type,
448 int len, int lnum, int offs)
449{
450 int err, node_len;
451 struct ubifs_ch *ch = buf;
452 uint32_t crc, node_crc;
453
454 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
455
456 err = ubi_read(c->ubi, lnum, buf, offs, len);
457 if (err) {
458 ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
459 type, lnum, offs, err);
460 return err;
461 }
462
463 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
464 return 0;
465
466 if (ch->node_type != type)
467 return 0;
468
469 node_len = le32_to_cpu(ch->len);
470 if (node_len != len)
471 return 0;
472
473 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
474 node_crc = le32_to_cpu(ch->crc);
475 if (crc != node_crc)
476 return 0;
477
478 return 1;
479}
480
481/**
482 * fallible_read_node - try to read a leaf node.
483 * @c: UBIFS file-system description object
484 * @key: key of node to read
485 * @zbr: position of node
486 * @node: node returned
487 *
488 * This function tries to read a node and returns %1 if the node is read, %0
489 * if the node is not present, and a negative error code in the case of error.
490 */
491static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
492 struct ubifs_zbranch *zbr, void *node)
493{
494 int ret;
495
496 dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key));
497
498 ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
499 zbr->offs);
500 if (ret == 1) {
501 union ubifs_key node_key;
502 struct ubifs_dent_node *dent = node;
503
504 /* All nodes have key in the same place */
505 key_read(c, &dent->key, &node_key);
506 if (keys_cmp(c, key, &node_key) != 0)
507 ret = 0;
508 }
509 if (ret == 0)
510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
512 return ret;
513}
514
515/**
516 * matches_name - determine if a direntry or xattr entry matches a given name.
517 * @c: UBIFS file-system description object
518 * @zbr: zbranch of dent
519 * @nm: name to match
520 *
521 * This function checks if xentry/direntry referred by zbranch @zbr matches name
522 * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by
523 * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case
524 * of failure, a negative error code is returned.
525 */
526static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr,
527 const struct qstr *nm)
528{
529 struct ubifs_dent_node *dent;
530 int nlen, err;
531
532 /* If possible, match against the dent in the leaf node cache */
533 if (!zbr->leaf) {
534 dent = kmalloc(zbr->len, GFP_NOFS);
535 if (!dent)
536 return -ENOMEM;
537
538 err = ubifs_tnc_read_node(c, zbr, dent);
539 if (err)
540 goto out_free;
541
542 /* Add the node to the leaf node cache */
543 err = lnc_add_directly(c, zbr, dent);
544 if (err)
545 goto out_free;
546 } else
547 dent = zbr->leaf;
548
549 nlen = le16_to_cpu(dent->nlen);
550 err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
551 if (err == 0) {
552 if (nlen == nm->len)
553 return NAME_MATCHES;
554 else if (nlen < nm->len)
555 return NAME_LESS;
556 else
557 return NAME_GREATER;
558 } else if (err < 0)
559 return NAME_LESS;
560 else
561 return NAME_GREATER;
562
563out_free:
564 kfree(dent);
565 return err;
566}
567
568/**
569 * get_znode - get a TNC znode that may not be loaded yet.
570 * @c: UBIFS file-system description object
571 * @znode: parent znode
572 * @n: znode branch slot number
573 *
574 * This function returns the znode or a negative error code.
575 */
576static struct ubifs_znode *get_znode(struct ubifs_info *c,
577 struct ubifs_znode *znode, int n)
578{
579 struct ubifs_zbranch *zbr;
580
581 zbr = &znode->zbranch[n];
582 if (zbr->znode)
583 znode = zbr->znode;
584 else
585 znode = ubifs_load_znode(c, zbr, znode, n);
586 return znode;
587}
588
589/**
590 * tnc_next - find next TNC entry.
591 * @c: UBIFS file-system description object
592 * @zn: znode is passed and returned here
593 * @n: znode branch slot number is passed and returned here
594 *
595 * This function returns %0 if the next TNC entry is found, %-ENOENT if there is
596 * no next entry, or a negative error code otherwise.
597 */
598static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
599{
600 struct ubifs_znode *znode = *zn;
601 int nn = *n;
602
603 nn += 1;
604 if (nn < znode->child_cnt) {
605 *n = nn;
606 return 0;
607 }
608 while (1) {
609 struct ubifs_znode *zp;
610
611 zp = znode->parent;
612 if (!zp)
613 return -ENOENT;
614 nn = znode->iip + 1;
615 znode = zp;
616 if (nn < znode->child_cnt) {
617 znode = get_znode(c, znode, nn);
618 if (IS_ERR(znode))
619 return PTR_ERR(znode);
620 while (znode->level != 0) {
621 znode = get_znode(c, znode, 0);
622 if (IS_ERR(znode))
623 return PTR_ERR(znode);
624 }
625 nn = 0;
626 break;
627 }
628 }
629 *zn = znode;
630 *n = nn;
631 return 0;
632}
633
634/**
635 * tnc_prev - find previous TNC entry.
636 * @c: UBIFS file-system description object
637 * @zn: znode is returned here
638 * @n: znode branch slot number is passed and returned here
639 *
640 * This function returns %0 if the previous TNC entry is found, %-ENOENT if
641 * there is no next entry, or a negative error code otherwise.
642 */
643static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
644{
645 struct ubifs_znode *znode = *zn;
646 int nn = *n;
647
648 if (nn > 0) {
649 *n = nn - 1;
650 return 0;
651 }
652 while (1) {
653 struct ubifs_znode *zp;
654
655 zp = znode->parent;
656 if (!zp)
657 return -ENOENT;
658 nn = znode->iip - 1;
659 znode = zp;
660 if (nn >= 0) {
661 znode = get_znode(c, znode, nn);
662 if (IS_ERR(znode))
663 return PTR_ERR(znode);
664 while (znode->level != 0) {
665 nn = znode->child_cnt - 1;
666 znode = get_znode(c, znode, nn);
667 if (IS_ERR(znode))
668 return PTR_ERR(znode);
669 }
670 nn = znode->child_cnt - 1;
671 break;
672 }
673 }
674 *zn = znode;
675 *n = nn;
676 return 0;
677}
678
679/**
680 * resolve_collision - resolve a collision.
681 * @c: UBIFS file-system description object
682 * @key: key of a directory or extended attribute entry
683 * @zn: znode is returned here
684 * @n: zbranch number is passed and returned here
685 * @nm: name of the entry
686 *
687 * This function is called for "hashed" keys to make sure that the found key
688 * really corresponds to the looked up node (directory or extended attribute
689 * entry). It returns %1 and sets @zn and @n if the collision is resolved.
690 * %0 is returned if @nm is not found and @zn and @n are set to the previous
691 * entry, i.e. to the entry after which @nm could follow if it were in TNC.
692 * This means that @n may be set to %-1 if the leftmost key in @zn is the
693 * previous one. A negative error code is returned on failures.
694 */
695static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
696 struct ubifs_znode **zn, int *n,
697 const struct qstr *nm)
698{
699 int err;
700
701 err = matches_name(c, &(*zn)->zbranch[*n], nm);
702 if (unlikely(err < 0))
703 return err;
704 if (err == NAME_MATCHES)
705 return 1;
706
707 if (err == NAME_GREATER) {
708 /* Look left */
709 while (1) {
710 err = tnc_prev(c, zn, n);
711 if (err == -ENOENT) {
712 ubifs_assert(*n == 0);
713 *n = -1;
714 return 0;
715 }
716 if (err < 0)
717 return err;
718 if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
719 /*
720 * We have found the branch after which we would
721 * like to insert, but inserting in this znode
722 * may still be wrong. Consider the following 3
723 * znodes, in the case where we are resolving a
724 * collision with Key2.
725 *
726 * znode zp
727 * ----------------------
728 * level 1 | Key0 | Key1 |
729 * -----------------------
730 * | |
731 * znode za | | znode zb
732 * ------------ ------------
733 * level 0 | Key0 | | Key2 |
734 * ------------ ------------
735 *
736 * The lookup finds Key2 in znode zb. Lets say
737 * there is no match and the name is greater so
738 * we look left. When we find Key0, we end up
739 * here. If we return now, we will insert into
740 * znode za at slot n = 1. But that is invalid
741 * according to the parent's keys. Key2 must
742 * be inserted into znode zb.
743 *
744 * Note, this problem is not relevant for the
745 * case when we go right, because
746 * 'tnc_insert()' would correct the parent key.
747 */
748 if (*n == (*zn)->child_cnt - 1) {
749 err = tnc_next(c, zn, n);
750 if (err) {
751 /* Should be impossible */
752 ubifs_assert(0);
753 if (err == -ENOENT)
754 err = -EINVAL;
755 return err;
756 }
757 ubifs_assert(*n == 0);
758 *n = -1;
759 }
760 return 0;
761 }
762 err = matches_name(c, &(*zn)->zbranch[*n], nm);
763 if (err < 0)
764 return err;
765 if (err == NAME_LESS)
766 return 0;
767 if (err == NAME_MATCHES)
768 return 1;
769 ubifs_assert(err == NAME_GREATER);
770 }
771 } else {
772 int nn = *n;
773 struct ubifs_znode *znode = *zn;
774
775 /* Look right */
776 while (1) {
777 err = tnc_next(c, &znode, &nn);
778 if (err == -ENOENT)
779 return 0;
780 if (err < 0)
781 return err;
782 if (keys_cmp(c, &znode->zbranch[nn].key, key))
783 return 0;
784 err = matches_name(c, &znode->zbranch[nn], nm);
785 if (err < 0)
786 return err;
787 if (err == NAME_GREATER)
788 return 0;
789 *zn = znode;
790 *n = nn;
791 if (err == NAME_MATCHES)
792 return 1;
793 ubifs_assert(err == NAME_LESS);
794 }
795 }
796}
797
798/**
799 * fallible_matches_name - determine if a dent matches a given name.
800 * @c: UBIFS file-system description object
801 * @zbr: zbranch of dent
802 * @nm: name to match
803 *
804 * This is a "fallible" version of 'matches_name()' function which does not
805 * panic if the direntry/xentry referred by @zbr does not exist on the media.
806 *
807 * This function checks if xentry/direntry referred by zbranch @zbr matches name
808 * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr
809 * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA
810 * if xentry/direntry referred by @zbr does not exist on the media. A negative
811 * error code is returned in case of failure.
812 */
813static int fallible_matches_name(struct ubifs_info *c,
814 struct ubifs_zbranch *zbr,
815 const struct qstr *nm)
816{
817 struct ubifs_dent_node *dent;
818 int nlen, err;
819
820 /* If possible, match against the dent in the leaf node cache */
821 if (!zbr->leaf) {
822 dent = kmalloc(zbr->len, GFP_NOFS);
823 if (!dent)
824 return -ENOMEM;
825
826 err = fallible_read_node(c, &zbr->key, zbr, dent);
827 if (err < 0)
828 goto out_free;
829 if (err == 0) {
830 /* The node was not present */
831 err = NOT_ON_MEDIA;
832 goto out_free;
833 }
834 ubifs_assert(err == 1);
835
836 err = lnc_add_directly(c, zbr, dent);
837 if (err)
838 goto out_free;
839 } else
840 dent = zbr->leaf;
841
842 nlen = le16_to_cpu(dent->nlen);
843 err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
844 if (err == 0) {
845 if (nlen == nm->len)
846 return NAME_MATCHES;
847 else if (nlen < nm->len)
848 return NAME_LESS;
849 else
850 return NAME_GREATER;
851 } else if (err < 0)
852 return NAME_LESS;
853 else
854 return NAME_GREATER;
855
856out_free:
857 kfree(dent);
858 return err;
859}
860
861/**
862 * fallible_resolve_collision - resolve a collision even if nodes are missing.
863 * @c: UBIFS file-system description object
864 * @key: key
865 * @zn: znode is returned here
866 * @n: branch number is passed and returned here
867 * @nm: name of directory entry
868 * @adding: indicates caller is adding a key to the TNC
869 *
870 * This is a "fallible" version of the 'resolve_collision()' function which
871 * does not panic if one of the nodes referred to by TNC does not exist on the
872 * media. This may happen when replaying the journal if a deleted node was
873 * Garbage-collected and the commit was not done. A branch that refers to a node
874 * that is not present is called a dangling branch. The following are the return
875 * codes for this function:
876 * o if @nm was found, %1 is returned and @zn and @n are set to the found
877 * branch;
878 * o if we are @adding and @nm was not found, %0 is returned;
879 * o if we are not @adding and @nm was not found, but a dangling branch was
880 * found, then %1 is returned and @zn and @n are set to the dangling branch;
881 * o a negative error code is returned in case of failure.
882 */
883static int fallible_resolve_collision(struct ubifs_info *c,
884 const union ubifs_key *key,
885 struct ubifs_znode **zn, int *n,
886 const struct qstr *nm, int adding)
887{
888 struct ubifs_znode *o_znode = NULL, *znode = *zn;
889 int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n;
890
891 cmp = fallible_matches_name(c, &znode->zbranch[nn], nm);
892 if (unlikely(cmp < 0))
893 return cmp;
894 if (cmp == NAME_MATCHES)
895 return 1;
896 if (cmp == NOT_ON_MEDIA) {
897 o_znode = znode;
898 o_n = nn;
899 /*
900 * We are unlucky and hit a dangling branch straight away.
901 * Now we do not really know where to go to find the needed
902 * branch - to the left or to the right. Well, let's try left.
903 */
904 unsure = 1;
905 } else if (!adding)
906 unsure = 1; /* Remove a dangling branch wherever it is */
907
908 if (cmp == NAME_GREATER || unsure) {
909 /* Look left */
910 while (1) {
911 err = tnc_prev(c, zn, n);
912 if (err == -ENOENT) {
913 ubifs_assert(*n == 0);
914 *n = -1;
915 break;
916 }
917 if (err < 0)
918 return err;
919 if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
920 /* See comments in 'resolve_collision()' */
921 if (*n == (*zn)->child_cnt - 1) {
922 err = tnc_next(c, zn, n);
923 if (err) {
924 /* Should be impossible */
925 ubifs_assert(0);
926 if (err == -ENOENT)
927 err = -EINVAL;
928 return err;
929 }
930 ubifs_assert(*n == 0);
931 *n = -1;
932 }
933 break;
934 }
935 err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm);
936 if (err < 0)
937 return err;
938 if (err == NAME_MATCHES)
939 return 1;
940 if (err == NOT_ON_MEDIA) {
941 o_znode = *zn;
942 o_n = *n;
943 continue;
944 }
945 if (!adding)
946 continue;
947 if (err == NAME_LESS)
948 break;
949 else
950 unsure = 0;
951 }
952 }
953
954 if (cmp == NAME_LESS || unsure) {
955 /* Look right */
956 *zn = znode;
957 *n = nn;
958 while (1) {
959 err = tnc_next(c, &znode, &nn);
960 if (err == -ENOENT)
961 break;
962 if (err < 0)
963 return err;
964 if (keys_cmp(c, &znode->zbranch[nn].key, key))
965 break;
966 err = fallible_matches_name(c, &znode->zbranch[nn], nm);
967 if (err < 0)
968 return err;
969 if (err == NAME_GREATER)
970 break;
971 *zn = znode;
972 *n = nn;
973 if (err == NAME_MATCHES)
974 return 1;
975 if (err == NOT_ON_MEDIA) {
976 o_znode = znode;
977 o_n = nn;
978 }
979 }
980 }
981
982 /* Never match a dangling branch when adding */
983 if (adding || !o_znode)
984 return 0;
985
986 dbg_mnt("dangling match LEB %d:%d len %d %s",
987 o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs,
988 o_znode->zbranch[o_n].len, DBGKEY(key));
989 *zn = o_znode;
990 *n = o_n;
991 return 1;
992}
993
994/**
995 * matches_position - determine if a zbranch matches a given position.
996 * @zbr: zbranch of dent
997 * @lnum: LEB number of dent to match
998 * @offs: offset of dent to match
999 *
1000 * This function returns %1 if @lnum:@offs matches, and %0 otherwise.
1001 */
1002static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs)
1003{
1004 if (zbr->lnum == lnum && zbr->offs == offs)
1005 return 1;
1006 else
1007 return 0;
1008}
1009
1010/**
1011 * resolve_collision_directly - resolve a collision directly.
1012 * @c: UBIFS file-system description object
1013 * @key: key of directory entry
1014 * @zn: znode is passed and returned here
1015 * @n: zbranch number is passed and returned here
1016 * @lnum: LEB number of dent node to match
1017 * @offs: offset of dent node to match
1018 *
1019 * This function is used for "hashed" keys to make sure the found directory or
1020 * extended attribute entry node is what was looked for. It is used when the
1021 * flash address of the right node is known (@lnum:@offs) which makes it much
1022 * easier to resolve collisions (no need to read entries and match full
1023 * names). This function returns %1 and sets @zn and @n if the collision is
1024 * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the
1025 * previous directory entry. Otherwise a negative error code is returned.
1026 */
1027static int resolve_collision_directly(struct ubifs_info *c,
1028 const union ubifs_key *key,
1029 struct ubifs_znode **zn, int *n,
1030 int lnum, int offs)
1031{
1032 struct ubifs_znode *znode;
1033 int nn, err;
1034
1035 znode = *zn;
1036 nn = *n;
1037 if (matches_position(&znode->zbranch[nn], lnum, offs))
1038 return 1;
1039
1040 /* Look left */
1041 while (1) {
1042 err = tnc_prev(c, &znode, &nn);
1043 if (err == -ENOENT)
1044 break;
1045 if (err < 0)
1046 return err;
1047 if (keys_cmp(c, &znode->zbranch[nn].key, key))
1048 break;
1049 if (matches_position(&znode->zbranch[nn], lnum, offs)) {
1050 *zn = znode;
1051 *n = nn;
1052 return 1;
1053 }
1054 }
1055
1056 /* Look right */
1057 znode = *zn;
1058 nn = *n;
1059 while (1) {
1060 err = tnc_next(c, &znode, &nn);
1061 if (err == -ENOENT)
1062 return 0;
1063 if (err < 0)
1064 return err;
1065 if (keys_cmp(c, &znode->zbranch[nn].key, key))
1066 return 0;
1067 *zn = znode;
1068 *n = nn;
1069 if (matches_position(&znode->zbranch[nn], lnum, offs))
1070 return 1;
1071 }
1072}
1073
1074/**
1075 * dirty_cow_bottom_up - dirty a znode and its ancestors.
1076 * @c: UBIFS file-system description object
1077 * @znode: znode to dirty
1078 *
1079 * If we do not have a unique key that resides in a znode, then we cannot
1080 * dirty that znode from the top down (i.e. by using lookup_level0_dirty)
1081 * This function records the path back to the last dirty ancestor, and then
1082 * dirties the znodes on that path.
1083 */
1084static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
1085 struct ubifs_znode *znode)
1086{
1087 struct ubifs_znode *zp;
1088 int *path = c->bottom_up_buf, p = 0;
1089
1090 ubifs_assert(c->zroot.znode);
1091 ubifs_assert(znode);
1092 if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) {
1093 kfree(c->bottom_up_buf);
1094 c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int),
1095 GFP_NOFS);
1096 if (!c->bottom_up_buf)
1097 return ERR_PTR(-ENOMEM);
1098 path = c->bottom_up_buf;
1099 }
1100 if (c->zroot.znode->level) {
1101 /* Go up until parent is dirty */
1102 while (1) {
1103 int n;
1104
1105 zp = znode->parent;
1106 if (!zp)
1107 break;
1108 n = znode->iip;
1109 ubifs_assert(p < c->zroot.znode->level);
1110 path[p++] = n;
1111 if (!zp->cnext && ubifs_zn_dirty(znode))
1112 break;
1113 znode = zp;
1114 }
1115 }
1116
1117 /* Come back down, dirtying as we go */
1118 while (1) {
1119 struct ubifs_zbranch *zbr;
1120
1121 zp = znode->parent;
1122 if (zp) {
1123 ubifs_assert(path[p - 1] >= 0);
1124 ubifs_assert(path[p - 1] < zp->child_cnt);
1125 zbr = &zp->zbranch[path[--p]];
1126 znode = dirty_cow_znode(c, zbr);
1127 } else {
1128 ubifs_assert(znode == c->zroot.znode);
1129 znode = dirty_cow_znode(c, &c->zroot);
1130 }
1131 if (unlikely(IS_ERR(znode)) || !p)
1132 break;
1133 ubifs_assert(path[p - 1] >= 0);
1134 ubifs_assert(path[p - 1] < znode->child_cnt);
1135 znode = znode->zbranch[path[p - 1]].znode;
1136 }
1137
1138 return znode;
1139}
1140
1141/**
1142 * ubifs_lookup_level0 - search for zero-level znode.
1143 * @c: UBIFS file-system description object
1144 * @key: key to lookup
1145 * @zn: znode is returned here
1146 * @n: znode branch slot number is returned here
1147 *
1148 * This function looks up the TNC tree and search for zero-level znode which
1149 * refers key @key. The found zero-level znode is returned in @zn. There are 3
1150 * cases:
1151 * o exact match, i.e. the found zero-level znode contains key @key, then %1
1152 * is returned and slot number of the matched branch is stored in @n;
1153 * o not exact match, which means that zero-level znode does not contain
1154 * @key, then %0 is returned and slot number of the closed branch is stored
1155 * in @n;
1156 * o @key is so small that it is even less than the lowest key of the
1157 * leftmost zero-level node, then %0 is returned and %0 is stored in @n.
1158 *
1159 * Note, when the TNC tree is traversed, some znodes may be absent, then this
1160 * function reads corresponding indexing nodes and inserts them to TNC. In
1161 * case of failure, a negative error code is returned.
1162 */
1163int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1164 struct ubifs_znode **zn, int *n)
1165{
1166 int err, exact;
1167 struct ubifs_znode *znode;
1168 unsigned long time = get_seconds();
1169
1170 dbg_tnc("search key %s", DBGKEY(key));
1171
1172 znode = c->zroot.znode;
1173 if (unlikely(!znode)) {
1174 znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
1175 if (IS_ERR(znode))
1176 return PTR_ERR(znode);
1177 }
1178
1179 znode->time = time;
1180
1181 while (1) {
1182 struct ubifs_zbranch *zbr;
1183
1184 exact = ubifs_search_zbranch(c, znode, key, n);
1185
1186 if (znode->level == 0)
1187 break;
1188
1189 if (*n < 0)
1190 *n = 0;
1191 zbr = &znode->zbranch[*n];
1192
1193 if (zbr->znode) {
1194 znode->time = time;
1195 znode = zbr->znode;
1196 continue;
1197 }
1198
1199 /* znode is not in TNC cache, load it from the media */
1200 znode = ubifs_load_znode(c, zbr, znode, *n);
1201 if (IS_ERR(znode))
1202 return PTR_ERR(znode);
1203 }
1204
1205 *zn = znode;
1206 if (exact || !is_hash_key(c, key) || *n != -1) {
1207 dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n);
1208 return exact;
1209 }
1210
1211 /*
1212 * Here is a tricky place. We have not found the key and this is a
1213 * "hashed" key, which may collide. The rest of the code deals with
1214 * situations like this:
1215 *
1216 * | 3 | 5 |
1217 * / \
1218 * | 3 | 5 | | 6 | 7 | (x)
1219 *
1220 * Or more a complex example:
1221 *
1222 * | 1 | 5 |
1223 * / \
1224 * | 1 | 3 | | 5 | 8 |
1225 * \ /
1226 * | 5 | 5 | | 6 | 7 | (x)
1227 *
1228 * In the examples, if we are looking for key "5", we may reach nodes
1229 * marked with "(x)". In this case what we have do is to look at the
1230 * left and see if there is "5" key there. If there is, we have to
1231 * return it.
1232 *
1233 * Note, this whole situation is possible because we allow to have
1234 * elements which are equivalent to the next key in the parent in the
1235 * children of current znode. For example, this happens if we split a
1236 * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something
1237 * like this:
1238 * | 3 | 5 |
1239 * / \
1240 * | 3 | 5 | | 5 | 6 | 7 |
1241 * ^
1242 * And this becomes what is at the first "picture" after key "5" marked
1243 * with "^" is removed. What could be done is we could prohibit
1244 * splitting in the middle of the colliding sequence. Also, when
1245 * removing the leftmost key, we would have to correct the key of the
1246 * parent node, which would introduce additional complications. Namely,
1247 * if we changed the the leftmost key of the parent znode, the garbage
1248 * collector would be unable to find it (GC is doing this when GC'ing
1249 * indexing LEBs). Although we already have an additional RB-tree where
1250 * we save such changed znodes (see 'ins_clr_old_idx_znode()') until
1251 * after the commit. But anyway, this does not look easy to implement
1252 * so we did not try this.
1253 */
1254 err = tnc_prev(c, &znode, n);
1255 if (err == -ENOENT) {
1256 dbg_tnc("found 0, lvl %d, n -1", znode->level);
1257 *n = -1;
1258 return 0;
1259 }
1260 if (unlikely(err < 0))
1261 return err;
1262 if (keys_cmp(c, key, &znode->zbranch[*n].key)) {
1263 dbg_tnc("found 0, lvl %d, n -1", znode->level);
1264 *n = -1;
1265 return 0;
1266 }
1267
1268 dbg_tnc("found 1, lvl %d, n %d", znode->level, *n);
1269 *zn = znode;
1270 return 1;
1271}
1272
1273/**
1274 * lookup_level0_dirty - search for zero-level znode dirtying.
1275 * @c: UBIFS file-system description object
1276 * @key: key to lookup
1277 * @zn: znode is returned here
1278 * @n: znode branch slot number is returned here
1279 *
1280 * This function looks up the TNC tree and search for zero-level znode which
1281 * refers key @key. The found zero-level znode is returned in @zn. There are 3
1282 * cases:
1283 * o exact match, i.e. the found zero-level znode contains key @key, then %1
1284 * is returned and slot number of the matched branch is stored in @n;
1285 * o not exact match, which means that zero-level znode does not contain @key
1286 * then %0 is returned and slot number of the closed branch is stored in
1287 * @n;
1288 * o @key is so small that it is even less than the lowest key of the
1289 * leftmost zero-level node, then %0 is returned and %-1 is stored in @n.
1290 *
1291 * Additionally all znodes in the path from the root to the located zero-level
1292 * znode are marked as dirty.
1293 *
1294 * Note, when the TNC tree is traversed, some znodes may be absent, then this
1295 * function reads corresponding indexing nodes and inserts them to TNC. In
1296 * case of failure, a negative error code is returned.
1297 */
1298static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1299 struct ubifs_znode **zn, int *n)
1300{
1301 int err, exact;
1302 struct ubifs_znode *znode;
1303 unsigned long time = get_seconds();
1304
1305 dbg_tnc("search and dirty key %s", DBGKEY(key));
1306
1307 znode = c->zroot.znode;
1308 if (unlikely(!znode)) {
1309 znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
1310 if (IS_ERR(znode))
1311 return PTR_ERR(znode);
1312 }
1313
1314 znode = dirty_cow_znode(c, &c->zroot);
1315 if (IS_ERR(znode))
1316 return PTR_ERR(znode);
1317
1318 znode->time = time;
1319
1320 while (1) {
1321 struct ubifs_zbranch *zbr;
1322
1323 exact = ubifs_search_zbranch(c, znode, key, n);
1324
1325 if (znode->level == 0)
1326 break;
1327
1328 if (*n < 0)
1329 *n = 0;
1330 zbr = &znode->zbranch[*n];
1331
1332 if (zbr->znode) {
1333 znode->time = time;
1334 znode = dirty_cow_znode(c, zbr);
1335 if (IS_ERR(znode))
1336 return PTR_ERR(znode);
1337 continue;
1338 }
1339
1340 /* znode is not in TNC cache, load it from the media */
1341 znode = ubifs_load_znode(c, zbr, znode, *n);
1342 if (IS_ERR(znode))
1343 return PTR_ERR(znode);
1344 znode = dirty_cow_znode(c, zbr);
1345 if (IS_ERR(znode))
1346 return PTR_ERR(znode);
1347 }
1348
1349 *zn = znode;
1350 if (exact || !is_hash_key(c, key) || *n != -1) {
1351 dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n);
1352 return exact;
1353 }
1354
1355 /*
1356 * See huge comment at 'lookup_level0_dirty()' what is the rest of the
1357 * code.
1358 */
1359 err = tnc_prev(c, &znode, n);
1360 if (err == -ENOENT) {
1361 *n = -1;
1362 dbg_tnc("found 0, lvl %d, n -1", znode->level);
1363 return 0;
1364 }
1365 if (unlikely(err < 0))
1366 return err;
1367 if (keys_cmp(c, key, &znode->zbranch[*n].key)) {
1368 *n = -1;
1369 dbg_tnc("found 0, lvl %d, n -1", znode->level);
1370 return 0;
1371 }
1372
1373 if (znode->cnext || !ubifs_zn_dirty(znode)) {
1374 znode = dirty_cow_bottom_up(c, znode);
1375 if (IS_ERR(znode))
1376 return PTR_ERR(znode);
1377 }
1378
1379 dbg_tnc("found 1, lvl %d, n %d", znode->level, *n);
1380 *zn = znode;
1381 return 1;
1382}
1383
1384/**
1385 * ubifs_tnc_lookup - look up a file-system node.
1386 * @c: UBIFS file-system description object
1387 * @key: node key to lookup
1388 * @node: the node is returned here
1389 *
1390 * This function look up and reads node with key @key. The caller has to make
1391 * sure the @node buffer is large enough to fit the node. Returns zero in case
1392 * of success, %-ENOENT if the node was not found, and a negative error code in
1393 * case of failure.
1394 */
1395int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1396 void *node)
1397{
1398 int found, n, err;
1399 struct ubifs_znode *znode;
1400 struct ubifs_zbranch zbr, *zt;
1401
1402 mutex_lock(&c->tnc_mutex);
1403 found = ubifs_lookup_level0(c, key, &znode, &n);
1404 if (!found) {
1405 err = -ENOENT;
1406 goto out;
1407 } else if (found < 0) {
1408 err = found;
1409 goto out;
1410 }
1411 zt = &znode->zbranch[n];
1412 if (is_hash_key(c, key)) {
1413 /*
1414 * In this case the leaf node cache gets used, so we pass the
1415 * address of the zbranch and keep the mutex locked
1416 */
1417 err = tnc_read_node_nm(c, zt, node);
1418 goto out;
1419 }
1420 zbr = znode->zbranch[n];
1421 mutex_unlock(&c->tnc_mutex);
1422
1423 err = ubifs_tnc_read_node(c, &zbr, node);
1424 return err;
1425
1426out:
1427 mutex_unlock(&c->tnc_mutex);
1428 return err;
1429}
1430
1431/**
1432 * ubifs_tnc_locate - look up a file-system node and return it and its location.
1433 * @c: UBIFS file-system description object
1434 * @key: node key to lookup
1435 * @node: the node is returned here
1436 * @lnum: LEB number is returned here
1437 * @offs: offset is returned here
1438 *
1439 * This function is the same as 'ubifs_tnc_lookup()' but it returns the node
1440 * location also. See 'ubifs_tnc_lookup()'.
1441 */
1442int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1443 void *node, int *lnum, int *offs)
1444{
1445 int found, n, err;
1446 struct ubifs_znode *znode;
1447 struct ubifs_zbranch zbr, *zt;
1448
1449 mutex_lock(&c->tnc_mutex);
1450 found = ubifs_lookup_level0(c, key, &znode, &n);
1451 if (!found) {
1452 err = -ENOENT;
1453 goto out;
1454 } else if (found < 0) {
1455 err = found;
1456 goto out;
1457 }
1458 zt = &znode->zbranch[n];
1459 if (is_hash_key(c, key)) {
1460 /*
1461 * In this case the leaf node cache gets used, so we pass the
1462 * address of the zbranch and keep the mutex locked
1463 */
1464 *lnum = zt->lnum;
1465 *offs = zt->offs;
1466 err = tnc_read_node_nm(c, zt, node);
1467 goto out;
1468 }
1469 zbr = znode->zbranch[n];
1470 mutex_unlock(&c->tnc_mutex);
1471
1472 *lnum = zbr.lnum;
1473 *offs = zbr.offs;
1474
1475 err = ubifs_tnc_read_node(c, &zbr, node);
1476 return err;
1477
1478out:
1479 mutex_unlock(&c->tnc_mutex);
1480 return err;
1481}
1482
1483/**
1484 * do_lookup_nm- look up a "hashed" node.
1485 * @c: UBIFS file-system description object
1486 * @key: node key to lookup
1487 * @node: the node is returned here
1488 * @nm: node name
1489 *
1490 * This function look up and reads a node which contains name hash in the key.
1491 * Since the hash may have collisions, there may be many nodes with the same
1492 * key, so we have to sequentially look to all of them until the needed one is
1493 * found. This function returns zero in case of success, %-ENOENT if the node
1494 * was not found, and a negative error code in case of failure.
1495 */
1496static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1497 void *node, const struct qstr *nm)
1498{
1499 int found, n, err;
1500 struct ubifs_znode *znode;
1501 struct ubifs_zbranch zbr;
1502
1503 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
1504 mutex_lock(&c->tnc_mutex);
1505 found = ubifs_lookup_level0(c, key, &znode, &n);
1506 if (!found) {
1507 err = -ENOENT;
1508 goto out_unlock;
1509 } else if (found < 0) {
1510 err = found;
1511 goto out_unlock;
1512 }
1513
1514 ubifs_assert(n >= 0);
1515
1516 err = resolve_collision(c, key, &znode, &n, nm);
1517 dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
1518 if (unlikely(err < 0))
1519 goto out_unlock;
1520 if (err == 0) {
1521 err = -ENOENT;
1522 goto out_unlock;
1523 }
1524
1525 zbr = znode->zbranch[n];
1526 mutex_unlock(&c->tnc_mutex);
1527
1528 err = tnc_read_node_nm(c, &zbr, node);
1529 return err;
1530
1531out_unlock:
1532 mutex_unlock(&c->tnc_mutex);
1533 return err;
1534}
1535
1536/**
1537 * ubifs_tnc_lookup_nm - look up a "hashed" node.
1538 * @c: UBIFS file-system description object
1539 * @key: node key to lookup
1540 * @node: the node is returned here
1541 * @nm: node name
1542 *
1543 * This function look up and reads a node which contains name hash in the key.
1544 * Since the hash may have collisions, there may be many nodes with the same
1545 * key, so we have to sequentially look to all of them until the needed one is
1546 * found. This function returns zero in case of success, %-ENOENT if the node
1547 * was not found, and a negative error code in case of failure.
1548 */
1549int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1550 void *node, const struct qstr *nm)
1551{
1552 int err, len;
1553 const struct ubifs_dent_node *dent = node;
1554
1555 /*
1556 * We assume that in most of the cases there are no name collisions and
1557 * 'ubifs_tnc_lookup()' returns us the right direntry.
1558 */
1559 err = ubifs_tnc_lookup(c, key, node);
1560 if (err)
1561 return err;
1562
1563 len = le16_to_cpu(dent->nlen);
1564 if (nm->len == len && !memcmp(dent->name, nm->name, len))
1565 return 0;
1566
1567 /*
1568 * Unluckily, there are hash collisions and we have to iterate over
1569 * them look at each direntry with colliding name hash sequentially.
1570 */
1571 return do_lookup_nm(c, key, node, nm);
1572}
1573
1574/**
1575 * correct_parent_keys - correct parent znodes' keys.
1576 * @c: UBIFS file-system description object
1577 * @znode: znode to correct parent znodes for
1578 *
1579 * This is a helper function for 'tnc_insert()'. When the key of the leftmost
1580 * zbranch changes, keys of parent znodes have to be corrected. This helper
1581 * function is called in such situations and corrects the keys if needed.
1582 */
1583static void correct_parent_keys(const struct ubifs_info *c,
1584 struct ubifs_znode *znode)
1585{
1586 union ubifs_key *key, *key1;
1587
1588 ubifs_assert(znode->parent);
1589 ubifs_assert(znode->iip == 0);
1590
1591 key = &znode->zbranch[0].key;
1592 key1 = &znode->parent->zbranch[0].key;
1593
1594 while (keys_cmp(c, key, key1) < 0) {
1595 key_copy(c, key, key1);
1596 znode = znode->parent;
1597 znode->alt = 1;
1598 if (!znode->parent || znode->iip)
1599 break;
1600 key1 = &znode->parent->zbranch[0].key;
1601 }
1602}
1603
1604/**
1605 * insert_zbranch - insert a zbranch into a znode.
1606 * @znode: znode into which to insert
1607 * @zbr: zbranch to insert
1608 * @n: slot number to insert to
1609 *
1610 * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in
1611 * znode's array of zbranches and keeps zbranches consolidated, so when a new
1612 * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th
1613 * slot, zbranches starting from @n have to be moved right.
1614 */
1615static void insert_zbranch(struct ubifs_znode *znode,
1616 const struct ubifs_zbranch *zbr, int n)
1617{
1618 int i;
1619
1620 ubifs_assert(ubifs_zn_dirty(znode));
1621
1622 if (znode->level) {
1623 for (i = znode->child_cnt; i > n; i--) {
1624 znode->zbranch[i] = znode->zbranch[i - 1];
1625 if (znode->zbranch[i].znode)
1626 znode->zbranch[i].znode->iip = i;
1627 }
1628 if (zbr->znode)
1629 zbr->znode->iip = n;
1630 } else
1631 for (i = znode->child_cnt; i > n; i--)
1632 znode->zbranch[i] = znode->zbranch[i - 1];
1633
1634 znode->zbranch[n] = *zbr;
1635 znode->child_cnt += 1;
1636
1637 /*
1638 * After inserting at slot zero, the lower bound of the key range of
1639 * this znode may have changed. If this znode is subsequently split
1640 * then the upper bound of the key range may change, and furthermore
1641 * it could change to be lower than the original lower bound. If that
1642 * happens, then it will no longer be possible to find this znode in the
1643 * TNC using the key from the index node on flash. That is bad because
1644 * if it is not found, we will assume it is obsolete and may overwrite
1645 * it. Then if there is an unclean unmount, we will start using the
1646 * old index which will be broken.
1647 *
1648 * So we first mark znodes that have insertions at slot zero, and then
1649 * if they are split we add their lnum/offs to the old_idx tree.
1650 */
1651 if (n == 0)
1652 znode->alt = 1;
1653}
1654
1655/**
1656 * tnc_insert - insert a node into TNC.
1657 * @c: UBIFS file-system description object
1658 * @znode: znode to insert into
1659 * @zbr: branch to insert
1660 * @n: slot number to insert new zbranch to
1661 *
1662 * This function inserts a new node described by @zbr into znode @znode. If
1663 * znode does not have a free slot for new zbranch, it is split. Parent znodes
1664 * are splat as well if needed. Returns zero in case of success or a negative
1665 * error code in case of failure.
1666 */
1667static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
1668 struct ubifs_zbranch *zbr, int n)
1669{
1670 struct ubifs_znode *zn, *zi, *zp;
1671 int i, keep, move, appending = 0;
1672 union ubifs_key *key = &zbr->key;
1673
1674 ubifs_assert(n >= 0 && n <= c->fanout);
1675
1676 /* Implement naive insert for now */
1677again:
1678 zp = znode->parent;
1679 if (znode->child_cnt < c->fanout) {
1680 ubifs_assert(n != c->fanout);
1681 dbg_tnc("inserted at %d level %d, key %s", n, znode->level,
1682 DBGKEY(key));
1683
1684 insert_zbranch(znode, zbr, n);
1685
1686 /* Ensure parent's key is correct */
1687 if (n == 0 && zp && znode->iip == 0)
1688 correct_parent_keys(c, znode);
1689
1690 return 0;
1691 }
1692
1693 /*
1694 * Unfortunately, @znode does not have more empty slots and we have to
1695 * split it.
1696 */
1697 dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key));
1698
1699 if (znode->alt)
1700 /*
1701 * We can no longer be sure of finding this znode by key, so we
1702 * record it in the old_idx tree.
1703 */
1704 ins_clr_old_idx_znode(c, znode);
1705
1706 zn = kzalloc(c->max_znode_sz, GFP_NOFS);
1707 if (!zn)
1708 return -ENOMEM;
1709 zn->parent = zp;
1710 zn->level = znode->level;
1711
1712 /* Decide where to split */
1713 if (znode->level == 0 && n == c->fanout &&
1714 key_type(c, key) == UBIFS_DATA_KEY) {
1715 union ubifs_key *key1;
1716
1717 /*
1718 * If this is an inode which is being appended - do not split
1719 * it because no other zbranches can be inserted between
1720 * zbranches of consecutive data nodes anyway.
1721 */
1722 key1 = &znode->zbranch[n - 1].key;
1723 if (key_inum(c, key1) == key_inum(c, key) &&
1724 key_type(c, key1) == UBIFS_DATA_KEY &&
1725 key_block(c, key1) == key_block(c, key) - 1)
1726 appending = 1;
1727 }
1728
1729 if (appending) {
1730 keep = c->fanout;
1731 move = 0;
1732 } else {
1733 keep = (c->fanout + 1) / 2;
1734 move = c->fanout - keep;
1735 }
1736
1737 /*
1738 * Although we don't at present, we could look at the neighbors and see
1739 * if we can move some zbranches there.
1740 */
1741
1742 if (n < keep) {
1743 /* Insert into existing znode */
1744 zi = znode;
1745 move += 1;
1746 keep -= 1;
1747 } else {
1748 /* Insert into new znode */
1749 zi = zn;
1750 n -= keep;
1751 /* Re-parent */
1752 if (zn->level != 0)
1753 zbr->znode->parent = zn;
1754 }
1755
1756 __set_bit(DIRTY_ZNODE, &zn->flags);
1757 atomic_long_inc(&c->dirty_zn_cnt);
1758
1759 zn->child_cnt = move;
1760 znode->child_cnt = keep;
1761
1762 dbg_tnc("moving %d, keeping %d", move, keep);
1763
1764 /* Move zbranch */
1765 for (i = 0; i < move; i++) {
1766 zn->zbranch[i] = znode->zbranch[keep + i];
1767 /* Re-parent */
1768 if (zn->level != 0)
1769 if (zn->zbranch[i].znode) {
1770 zn->zbranch[i].znode->parent = zn;
1771 zn->zbranch[i].znode->iip = i;
1772 }
1773 }
1774
1775 /* Insert new key and branch */
1776 dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key));
1777
1778 insert_zbranch(zi, zbr, n);
1779
1780 /* Insert new znode (produced by spitting) into the parent */
1781 if (zp) {
1782 i = n;
1783 /* Locate insertion point */
1784 n = znode->iip + 1;
1785 if (appending && n != c->fanout)
1786 appending = 0;
1787
1788 if (i == 0 && zi == znode && znode->iip == 0)
1789 correct_parent_keys(c, znode);
1790
1791 /* Tail recursion */
1792 zbr->key = zn->zbranch[0].key;
1793 zbr->znode = zn;
1794 zbr->lnum = 0;
1795 zbr->offs = 0;
1796 zbr->len = 0;
1797 znode = zp;
1798
1799 goto again;
1800 }
1801
1802 /* We have to split root znode */
1803 dbg_tnc("creating new zroot at level %d", znode->level + 1);
1804
1805 zi = kzalloc(c->max_znode_sz, GFP_NOFS);
1806 if (!zi)
1807 return -ENOMEM;
1808
1809 zi->child_cnt = 2;
1810 zi->level = znode->level + 1;
1811
1812 __set_bit(DIRTY_ZNODE, &zi->flags);
1813 atomic_long_inc(&c->dirty_zn_cnt);
1814
1815 zi->zbranch[0].key = znode->zbranch[0].key;
1816 zi->zbranch[0].znode = znode;
1817 zi->zbranch[0].lnum = c->zroot.lnum;
1818 zi->zbranch[0].offs = c->zroot.offs;
1819 zi->zbranch[0].len = c->zroot.len;
1820 zi->zbranch[1].key = zn->zbranch[0].key;
1821 zi->zbranch[1].znode = zn;
1822
1823 c->zroot.lnum = 0;
1824 c->zroot.offs = 0;
1825 c->zroot.len = 0;
1826 c->zroot.znode = zi;
1827
1828 zn->parent = zi;
1829 zn->iip = 1;
1830 znode->parent = zi;
1831 znode->iip = 0;
1832
1833 return 0;
1834}
1835
1836/**
1837 * ubifs_tnc_add - add a node to TNC.
1838 * @c: UBIFS file-system description object
1839 * @key: key to add
1840 * @lnum: LEB number of node
1841 * @offs: node offset
1842 * @len: node length
1843 *
1844 * This function adds a node with key @key to TNC. The node may be new or it may
1845 * obsolete some existing one. Returns %0 on success or negative error code on
1846 * failure.
1847 */
1848int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
1849 int offs, int len)
1850{
1851 int found, n, err = 0;
1852 struct ubifs_znode *znode;
1853
1854 mutex_lock(&c->tnc_mutex);
1855 dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key));
1856 found = lookup_level0_dirty(c, key, &znode, &n);
1857 if (!found) {
1858 struct ubifs_zbranch zbr;
1859
1860 zbr.znode = NULL;
1861 zbr.lnum = lnum;
1862 zbr.offs = offs;
1863 zbr.len = len;
1864 key_copy(c, key, &zbr.key);
1865 err = tnc_insert(c, znode, &zbr, n + 1);
1866 } else if (found == 1) {
1867 struct ubifs_zbranch *zbr = &znode->zbranch[n];
1868
1869 lnc_free(zbr);
1870 err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
1871 zbr->lnum = lnum;
1872 zbr->offs = offs;
1873 zbr->len = len;
1874 } else
1875 err = found;
1876 if (!err)
1877 err = dbg_check_tnc(c, 0);
1878 mutex_unlock(&c->tnc_mutex);
1879
1880 return err;
1881}
1882
1883/**
1884 * ubifs_tnc_replace - replace a node in the TNC only if the old node is found.
1885 * @c: UBIFS file-system description object
1886 * @key: key to add
1887 * @old_lnum: LEB number of old node
1888 * @old_offs: old node offset
1889 * @lnum: LEB number of node
1890 * @offs: node offset
1891 * @len: node length
1892 *
1893 * This function replaces a node with key @key in the TNC only if the old node
1894 * is found. This function is called by garbage collection when node are moved.
1895 * Returns %0 on success or negative error code on failure.
1896 */
1897int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
1898 int old_lnum, int old_offs, int lnum, int offs, int len)
1899{
1900 int found, n, err = 0;
1901 struct ubifs_znode *znode;
1902
1903 mutex_lock(&c->tnc_mutex);
1904 dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum,
1905 old_offs, lnum, offs, len, DBGKEY(key));
1906 found = lookup_level0_dirty(c, key, &znode, &n);
1907 if (found < 0) {
1908 err = found;
1909 goto out_unlock;
1910 }
1911
1912 if (found == 1) {
1913 struct ubifs_zbranch *zbr = &znode->zbranch[n];
1914
1915 found = 0;
1916 if (zbr->lnum == old_lnum && zbr->offs == old_offs) {
1917 lnc_free(zbr);
1918 err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
1919 if (err)
1920 goto out_unlock;
1921 zbr->lnum = lnum;
1922 zbr->offs = offs;
1923 zbr->len = len;
1924 found = 1;
1925 } else if (is_hash_key(c, key)) {
1926 found = resolve_collision_directly(c, key, &znode, &n,
1927 old_lnum, old_offs);
1928 dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d",
1929 found, znode, n, old_lnum, old_offs);
1930 if (found < 0) {
1931 err = found;
1932 goto out_unlock;
1933 }
1934
1935 if (found) {
1936 /* Ensure the znode is dirtied */
1937 if (znode->cnext || !ubifs_zn_dirty(znode)) {
1938 znode = dirty_cow_bottom_up(c,
1939 znode);
1940 if (IS_ERR(znode)) {
1941 err = PTR_ERR(znode);
1942 goto out_unlock;
1943 }
1944 }
1945 zbr = &znode->zbranch[n];
1946 lnc_free(zbr);
1947 err = ubifs_add_dirt(c, zbr->lnum,
1948 zbr->len);
1949 if (err)
1950 goto out_unlock;
1951 zbr->lnum = lnum;
1952 zbr->offs = offs;
1953 zbr->len = len;
1954 }
1955 }
1956 }
1957
1958 if (!found)
1959 err = ubifs_add_dirt(c, lnum, len);
1960
1961 if (!err)
1962 err = dbg_check_tnc(c, 0);
1963
1964out_unlock:
1965 mutex_unlock(&c->tnc_mutex);
1966 return err;
1967}
1968
1969/**
1970 * ubifs_tnc_add_nm - add a "hashed" node to TNC.
1971 * @c: UBIFS file-system description object
1972 * @key: key to add
1973 * @lnum: LEB number of node
1974 * @offs: node offset
1975 * @len: node length
1976 * @nm: node name
1977 *
1978 * This is the same as 'ubifs_tnc_add()' but it should be used with keys which
1979 * may have collisions, like directory entry keys.
1980 */
1981int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
1982 int lnum, int offs, int len, const struct qstr *nm)
1983{
1984 int found, n, err = 0;
1985 struct ubifs_znode *znode;
1986
1987 mutex_lock(&c->tnc_mutex);
1988 dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name,
1989 DBGKEY(key));
1990 found = lookup_level0_dirty(c, key, &znode, &n);
1991 if (found < 0) {
1992 err = found;
1993 goto out_unlock;
1994 }
1995
1996 if (found == 1) {
1997 if (c->replaying)
1998 found = fallible_resolve_collision(c, key, &znode, &n,
1999 nm, 1);
2000 else
2001 found = resolve_collision(c, key, &znode, &n, nm);
2002 dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n);
2003 if (found < 0) {
2004 err = found;
2005 goto out_unlock;
2006 }
2007
2008 /* Ensure the znode is dirtied */
2009 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2010 znode = dirty_cow_bottom_up(c, znode);
2011 if (IS_ERR(znode)) {
2012 err = PTR_ERR(znode);
2013 goto out_unlock;
2014 }
2015 }
2016
2017 if (found == 1) {
2018 struct ubifs_zbranch *zbr = &znode->zbranch[n];
2019
2020 lnc_free(zbr);
2021 err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
2022 zbr->lnum = lnum;
2023 zbr->offs = offs;
2024 zbr->len = len;
2025 goto out_unlock;
2026 }
2027 }
2028
2029 if (!found) {
2030 struct ubifs_zbranch zbr;
2031
2032 zbr.znode = NULL;
2033 zbr.lnum = lnum;
2034 zbr.offs = offs;
2035 zbr.len = len;
2036 key_copy(c, key, &zbr.key);
2037 err = tnc_insert(c, znode, &zbr, n + 1);
2038 if (err)
2039 goto out_unlock;
2040 if (c->replaying) {
2041 /*
2042 * We did not find it in the index so there may be a
2043 * dangling branch still in the index. So we remove it
2044 * by passing 'ubifs_tnc_remove_nm()' the same key but
2045 * an unmatchable name.
2046 */
2047 struct qstr noname = { .len = 0, .name = "" };
2048
2049 err = dbg_check_tnc(c, 0);
2050 mutex_unlock(&c->tnc_mutex);
2051 if (err)
2052 return err;
2053 return ubifs_tnc_remove_nm(c, key, &noname);
2054 }
2055 }
2056
2057out_unlock:
2058 if (!err)
2059 err = dbg_check_tnc(c, 0);
2060 mutex_unlock(&c->tnc_mutex);
2061 return err;
2062}
2063
2064/**
2065 * tnc_delete - delete a znode form TNC.
2066 * @c: UBIFS file-system description object
2067 * @znode: znode to delete from
2068 * @n: zbranch slot number to delete
2069 *
2070 * This function deletes a leaf node from @n-th slot of @znode. Returns zero in
2071 * case of success and a negative error code in case of failure.
2072 */
2073static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
2074{
2075 struct ubifs_zbranch *zbr;
2076 struct ubifs_znode *zp;
2077 int i, err;
2078
2079 /* Delete without merge for now */
2080 ubifs_assert(znode->level == 0);
2081 ubifs_assert(n >= 0 && n < c->fanout);
2082 dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key));
2083
2084 zbr = &znode->zbranch[n];
2085 lnc_free(zbr);
2086
2087 err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
2088 if (err) {
2089 dbg_dump_znode(c, znode);
2090 return err;
2091 }
2092
2093 /* We do not "gap" zbranch slots */
2094 for (i = n; i < znode->child_cnt - 1; i++)
2095 znode->zbranch[i] = znode->zbranch[i + 1];
2096 znode->child_cnt -= 1;
2097
2098 if (znode->child_cnt > 0)
2099 return 0;
2100
2101 /*
2102 * This was the last zbranch, we have to delete this znode from the
2103 * parent.
2104 */
2105
2106 do {
2107 ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
2108 ubifs_assert(ubifs_zn_dirty(znode));
2109
2110 zp = znode->parent;
2111 n = znode->iip;
2112
2113 atomic_long_dec(&c->dirty_zn_cnt);
2114
2115 err = insert_old_idx_znode(c, znode);
2116 if (err)
2117 return err;
2118
2119 if (znode->cnext) {
2120 __set_bit(OBSOLETE_ZNODE, &znode->flags);
2121 atomic_long_inc(&c->clean_zn_cnt);
2122 atomic_long_inc(&ubifs_clean_zn_cnt);
2123 } else
2124 kfree(znode);
2125 znode = zp;
2126 } while (znode->child_cnt == 1); /* while removing last child */
2127
2128 /* Remove from znode, entry n - 1 */
2129 znode->child_cnt -= 1;
2130 ubifs_assert(znode->level != 0);
2131 for (i = n; i < znode->child_cnt; i++) {
2132 znode->zbranch[i] = znode->zbranch[i + 1];
2133 if (znode->zbranch[i].znode)
2134 znode->zbranch[i].znode->iip = i;
2135 }
2136
2137 /*
2138 * If this is the root and it has only 1 child then
2139 * collapse the tree.
2140 */
2141 if (!znode->parent) {
2142 while (znode->child_cnt == 1 && znode->level != 0) {
2143 zp = znode;
2144 zbr = &znode->zbranch[0];
2145 znode = get_znode(c, znode, 0);
2146 if (IS_ERR(znode))
2147 return PTR_ERR(znode);
2148 znode = dirty_cow_znode(c, zbr);
2149 if (IS_ERR(znode))
2150 return PTR_ERR(znode);
2151 znode->parent = NULL;
2152 znode->iip = 0;
2153 if (c->zroot.len) {
2154 err = insert_old_idx(c, c->zroot.lnum,
2155 c->zroot.offs);
2156 if (err)
2157 return err;
2158 }
2159 c->zroot.lnum = zbr->lnum;
2160 c->zroot.offs = zbr->offs;
2161 c->zroot.len = zbr->len;
2162 c->zroot.znode = znode;
2163 ubifs_assert(!test_bit(OBSOLETE_ZNODE,
2164 &zp->flags));
2165 ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags));
2166 atomic_long_dec(&c->dirty_zn_cnt);
2167
2168 if (zp->cnext) {
2169 __set_bit(OBSOLETE_ZNODE, &zp->flags);
2170 atomic_long_inc(&c->clean_zn_cnt);
2171 atomic_long_inc(&ubifs_clean_zn_cnt);
2172 } else
2173 kfree(zp);
2174 }
2175 }
2176
2177 return 0;
2178}
2179
2180/**
2181 * ubifs_tnc_remove - remove an index entry of a node.
2182 * @c: UBIFS file-system description object
2183 * @key: key of node
2184 *
2185 * Returns %0 on success or negative error code on failure.
2186 */
2187int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key)
2188{
2189 int found, n, err = 0;
2190 struct ubifs_znode *znode;
2191
2192 mutex_lock(&c->tnc_mutex);
2193 dbg_tnc("key %s", DBGKEY(key));
2194 found = lookup_level0_dirty(c, key, &znode, &n);
2195 if (found < 0) {
2196 err = found;
2197 goto out_unlock;
2198 }
2199 if (found == 1)
2200 err = tnc_delete(c, znode, n);
2201 if (!err)
2202 err = dbg_check_tnc(c, 0);
2203
2204out_unlock:
2205 mutex_unlock(&c->tnc_mutex);
2206 return err;
2207}
2208
2209/**
2210 * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node.
2211 * @c: UBIFS file-system description object
2212 * @key: key of node
2213 * @nm: directory entry name
2214 *
2215 * Returns %0 on success or negative error code on failure.
2216 */
2217int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
2218 const struct qstr *nm)
2219{
2220 int n, err;
2221 struct ubifs_znode *znode;
2222
2223 mutex_lock(&c->tnc_mutex);
2224 dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key));
2225 err = lookup_level0_dirty(c, key, &znode, &n);
2226 if (err < 0)
2227 goto out_unlock;
2228
2229 if (err) {
2230 if (c->replaying)
2231 err = fallible_resolve_collision(c, key, &znode, &n,
2232 nm, 0);
2233 else
2234 err = resolve_collision(c, key, &znode, &n, nm);
2235 dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
2236 if (err < 0)
2237 goto out_unlock;
2238 if (err) {
2239 /* Ensure the znode is dirtied */
2240 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2241 znode = dirty_cow_bottom_up(c, znode);
2242 if (IS_ERR(znode)) {
2243 err = PTR_ERR(znode);
2244 goto out_unlock;
2245 }
2246 }
2247 err = tnc_delete(c, znode, n);
2248 }
2249 }
2250
2251out_unlock:
2252 if (!err)
2253 err = dbg_check_tnc(c, 0);
2254 mutex_unlock(&c->tnc_mutex);
2255 return err;
2256}
2257
2258/**
2259 * key_in_range - determine if a key falls within a range of keys.
2260 * @c: UBIFS file-system description object
2261 * @key: key to check
2262 * @from_key: lowest key in range
2263 * @to_key: highest key in range
2264 *
2265 * This function returns %1 if the key is in range and %0 otherwise.
2266 */
2267static int key_in_range(struct ubifs_info *c, union ubifs_key *key,
2268 union ubifs_key *from_key, union ubifs_key *to_key)
2269{
2270 if (keys_cmp(c, key, from_key) < 0)
2271 return 0;
2272 if (keys_cmp(c, key, to_key) > 0)
2273 return 0;
2274 return 1;
2275}
2276
2277/**
2278 * ubifs_tnc_remove_range - remove index entries in range.
2279 * @c: UBIFS file-system description object
2280 * @from_key: lowest key to remove
2281 * @to_key: highest key to remove
2282 *
2283 * This function removes index entries starting at @from_key and ending at
2284 * @to_key. This function returns zero in case of success and a negative error
2285 * code in case of failure.
2286 */
2287int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
2288 union ubifs_key *to_key)
2289{
2290 int i, n, k, err = 0;
2291 struct ubifs_znode *znode;
2292 union ubifs_key *key;
2293
2294 mutex_lock(&c->tnc_mutex);
2295 while (1) {
2296 /* Find first level 0 znode that contains keys to remove */
2297 err = ubifs_lookup_level0(c, from_key, &znode, &n);
2298 if (err < 0)
2299 goto out_unlock;
2300
2301 if (err)
2302 key = from_key;
2303 else {
2304 err = tnc_next(c, &znode, &n);
2305 if (err == -ENOENT) {
2306 err = 0;
2307 goto out_unlock;
2308 }
2309 if (err < 0)
2310 goto out_unlock;
2311 key = &znode->zbranch[n].key;
2312 if (!key_in_range(c, key, from_key, to_key)) {
2313 err = 0;
2314 goto out_unlock;
2315 }
2316 }
2317
2318 /* Ensure the znode is dirtied */
2319 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2320 znode = dirty_cow_bottom_up(c, znode);
2321 if (IS_ERR(znode)) {
2322 err = PTR_ERR(znode);
2323 goto out_unlock;
2324 }
2325 }
2326
2327 /* Remove all keys in range except the first */
2328 for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) {
2329 key = &znode->zbranch[i].key;
2330 if (!key_in_range(c, key, from_key, to_key))
2331 break;
2332 lnc_free(&znode->zbranch[i]);
2333 err = ubifs_add_dirt(c, znode->zbranch[i].lnum,
2334 znode->zbranch[i].len);
2335 if (err) {
2336 dbg_dump_znode(c, znode);
2337 goto out_unlock;
2338 }
2339 dbg_tnc("removing %s", DBGKEY(key));
2340 }
2341 if (k) {
2342 for (i = n + 1 + k; i < znode->child_cnt; i++)
2343 znode->zbranch[i - k] = znode->zbranch[i];
2344 znode->child_cnt -= k;
2345 }
2346
2347 /* Now delete the first */
2348 err = tnc_delete(c, znode, n);
2349 if (err)
2350 goto out_unlock;
2351 }
2352
2353out_unlock:
2354 if (!err)
2355 err = dbg_check_tnc(c, 0);
2356 mutex_unlock(&c->tnc_mutex);
2357 return err;
2358}
2359
2360/**
2361 * ubifs_tnc_remove_ino - remove an inode from TNC.
2362 * @c: UBIFS file-system description object
2363 * @inum: inode number to remove
2364 *
2365 * This function remove inode @inum and all the extended attributes associated
2366 * with the anode from TNC and returns zero in case of success or a negative
2367 * error code in case of failure.
2368 */
2369int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
2370{
2371 union ubifs_key key1, key2;
2372 struct ubifs_dent_node *xent, *pxent = NULL;
2373 struct qstr nm = { .name = NULL };
2374
2375 dbg_tnc("ino %lu", inum);
2376
2377 /*
2378 * Walk all extended attribute entries and remove them together with
2379 * corresponding extended attribute inodes.
2380 */
2381 lowest_xent_key(c, &key1, inum);
2382 while (1) {
2383 ino_t xattr_inum;
2384 int err;
2385
2386 xent = ubifs_tnc_next_ent(c, &key1, &nm);
2387 if (IS_ERR(xent)) {
2388 err = PTR_ERR(xent);
2389 if (err == -ENOENT)
2390 break;
2391 return err;
2392 }
2393
2394 xattr_inum = le64_to_cpu(xent->inum);
2395 dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum);
2396
2397 nm.name = xent->name;
2398 nm.len = le16_to_cpu(xent->nlen);
2399 err = ubifs_tnc_remove_nm(c, &key1, &nm);
2400 if (err) {
2401 kfree(xent);
2402 return err;
2403 }
2404
2405 lowest_ino_key(c, &key1, xattr_inum);
2406 highest_ino_key(c, &key2, xattr_inum);
2407 err = ubifs_tnc_remove_range(c, &key1, &key2);
2408 if (err) {
2409 kfree(xent);
2410 return err;
2411 }
2412
2413 kfree(pxent);
2414 pxent = xent;
2415 key_read(c, &xent->key, &key1);
2416 }
2417
2418 kfree(pxent);
2419 lowest_ino_key(c, &key1, inum);
2420 highest_ino_key(c, &key2, inum);
2421
2422 return ubifs_tnc_remove_range(c, &key1, &key2);
2423}
2424
2425/**
2426 * ubifs_tnc_next_ent - walk directory or extended attribute entries.
2427 * @c: UBIFS file-system description object
2428 * @key: key of last entry
2429 * @nm: name of last entry found or %NULL
2430 *
2431 * This function finds and reads the next directory or extended attribute entry
2432 * after the given key (@key) if there is one. @nm is used to resolve
2433 * collisions.
2434 *
2435 * If the name of the current entry is not known and only the key is known,
2436 * @nm->name has to be %NULL. In this case the semantics of this function is a
2437 * little bit different and it returns the entry corresponding to this key, not
2438 * the next one. If the key was not found, the closest "right" entry is
2439 * returned.
2440 *
2441 * If the fist entry has to be found, @key has to contain the lowest possible
2442 * key value for this inode and @name has to be %NULL.
2443 *
2444 * This function returns the found directory or extended attribute entry node
2445 * in case of success, %-ENOENT is returned if no entry was found, and a
2446 * negative error code is returned in case of failure.
2447 */
2448struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
2449 union ubifs_key *key,
2450 const struct qstr *nm)
2451{
2452 int n, err, type = key_type(c, key);
2453 struct ubifs_znode *znode;
2454 struct ubifs_dent_node *dent;
2455 struct ubifs_zbranch *zbr;
2456 union ubifs_key *dkey;
2457
2458 dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key));
2459 ubifs_assert(is_hash_key(c, key));
2460
2461 mutex_lock(&c->tnc_mutex);
2462 err = ubifs_lookup_level0(c, key, &znode, &n);
2463 if (unlikely(err < 0))
2464 goto out_unlock;
2465
2466 if (nm->name) {
2467 if (err) {
2468 /* Handle collisions */
2469 err = resolve_collision(c, key, &znode, &n, nm);
2470 dbg_tnc("rc returned %d, znode %p, n %d",
2471 err, znode, n);
2472 if (unlikely(err < 0))
2473 goto out_unlock;
2474 }
2475
2476 /* Now find next entry */
2477 err = tnc_next(c, &znode, &n);
2478 if (unlikely(err))
2479 goto out_unlock;
2480 } else {
2481 /*
2482 * The full name of the entry was not given, in which case the
2483 * behavior of this function is a little different and it
2484 * returns current entry, not the next one.
2485 */
2486 if (!err) {
2487 /*
2488 * However, the given key does not exist in the TNC
2489 * tree and @znode/@n variables contain the closest
2490 * "preceding" element. Switch to the next one.
2491 */
2492 err = tnc_next(c, &znode, &n);
2493 if (err)
2494 goto out_unlock;
2495 }
2496 }
2497
2498 zbr = &znode->zbranch[n];
2499 dent = kmalloc(zbr->len, GFP_NOFS);
2500 if (unlikely(!dent)) {
2501 err = -ENOMEM;
2502 goto out_unlock;
2503 }
2504
2505 /*
2506 * The above 'tnc_next()' call could lead us to the next inode, check
2507 * this.
2508 */
2509 dkey = &zbr->key;
2510 if (key_inum(c, dkey) != key_inum(c, key) ||
2511 key_type(c, dkey) != type) {
2512 err = -ENOENT;
2513 goto out_free;
2514 }
2515
2516 err = tnc_read_node_nm(c, zbr, dent);
2517 if (unlikely(err))
2518 goto out_free;
2519
2520 mutex_unlock(&c->tnc_mutex);
2521 return dent;
2522
2523out_free:
2524 kfree(dent);
2525out_unlock:
2526 mutex_unlock(&c->tnc_mutex);
2527 return ERR_PTR(err);
2528}
2529
2530/**
2531 * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit.
2532 * @c: UBIFS file-system description object
2533 *
2534 * Destroy left-over obsolete znodes from a failed commit.
2535 */
2536static void tnc_destroy_cnext(struct ubifs_info *c)
2537{
2538 struct ubifs_znode *cnext;
2539
2540 if (!c->cnext)
2541 return;
2542 ubifs_assert(c->cmt_state == COMMIT_BROKEN);
2543 cnext = c->cnext;
2544 do {
2545 struct ubifs_znode *znode = cnext;
2546
2547 cnext = cnext->cnext;
2548 if (test_bit(OBSOLETE_ZNODE, &znode->flags))
2549 kfree(znode);
2550 } while (cnext && cnext != c->cnext);
2551}
2552
2553/**
2554 * ubifs_tnc_close - close TNC subsystem and free all related resources.
2555 * @c: UBIFS file-system description object
2556 */
2557void ubifs_tnc_close(struct ubifs_info *c)
2558{
2559 long clean_freed;
2560
2561 tnc_destroy_cnext(c);
2562 if (c->zroot.znode) {
2563 clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
2564 atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt);
2565 }
2566 kfree(c->gap_lebs);
2567 kfree(c->ilebs);
2568 destroy_old_idx(c);
2569}
2570
2571/**
2572 * left_znode - get the znode to the left.
2573 * @c: UBIFS file-system description object
2574 * @znode: znode
2575 *
2576 * This function returns a pointer to the znode to the left of @znode or NULL if
2577 * there is not one. A negative error code is returned on failure.
2578 */
2579static struct ubifs_znode *left_znode(struct ubifs_info *c,
2580 struct ubifs_znode *znode)
2581{
2582 int level = znode->level;
2583
2584 while (1) {
2585 int n = znode->iip - 1;
2586
2587 /* Go up until we can go left */
2588 znode = znode->parent;
2589 if (!znode)
2590 return NULL;
2591 if (n >= 0) {
2592 /* Now go down the rightmost branch to 'level' */
2593 znode = get_znode(c, znode, n);
2594 if (IS_ERR(znode))
2595 return znode;
2596 while (znode->level != level) {
2597 n = znode->child_cnt - 1;
2598 znode = get_znode(c, znode, n);
2599 if (IS_ERR(znode))
2600 return znode;
2601 }
2602 break;
2603 }
2604 }
2605 return znode;
2606}
2607
2608/**
2609 * right_znode - get the znode to the right.
2610 * @c: UBIFS file-system description object
2611 * @znode: znode
2612 *
2613 * This function returns a pointer to the znode to the right of @znode or NULL
2614 * if there is not one. A negative error code is returned on failure.
2615 */
2616static struct ubifs_znode *right_znode(struct ubifs_info *c,
2617 struct ubifs_znode *znode)
2618{
2619 int level = znode->level;
2620
2621 while (1) {
2622 int n = znode->iip + 1;
2623
2624 /* Go up until we can go right */
2625 znode = znode->parent;
2626 if (!znode)
2627 return NULL;
2628 if (n < znode->child_cnt) {
2629 /* Now go down the leftmost branch to 'level' */
2630 znode = get_znode(c, znode, n);
2631 if (IS_ERR(znode))
2632 return znode;
2633 while (znode->level != level) {
2634 znode = get_znode(c, znode, 0);
2635 if (IS_ERR(znode))
2636 return znode;
2637 }
2638 break;
2639 }
2640 }
2641 return znode;
2642}
2643
2644/**
2645 * lookup_znode - find a particular indexing node from TNC.
2646 * @c: UBIFS file-system description object
2647 * @key: index node key to lookup
2648 * @level: index node level
2649 * @lnum: index node LEB number
2650 * @offs: index node offset
2651 *
2652 * This function searches an indexing node by its first key @key and its
2653 * address @lnum:@offs. It looks up the indexing tree by pulling all indexing
2654 * nodes it traverses to TNC. This function is called fro indexing nodes which
2655 * were found on the media by scanning, for example when garbage-collecting or
2656 * when doing in-the-gaps commit. This means that the indexing node which is
2657 * looked for does not have to have exactly the same leftmost key @key, because
2658 * the leftmost key may have been changed, in which case TNC will contain a
2659 * dirty znode which still refers the same @lnum:@offs. This function is clever
2660 * enough to recognize such indexing nodes.
2661 *
2662 * Note, if a znode was deleted or changed too much, then this function will
2663 * not find it. For situations like this UBIFS has the old index RB-tree
2664 * (indexed by @lnum:@offs).
2665 *
2666 * This function returns a pointer to the znode found or %NULL if it is not
2667 * found. A negative error code is returned on failure.
2668 */
2669static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
2670 union ubifs_key *key, int level,
2671 int lnum, int offs)
2672{
2673 struct ubifs_znode *znode, *zn;
2674 int n, nn;
2675
2676 /*
2677 * The arguments have probably been read off flash, so don't assume
2678 * they are valid.
2679 */
2680 if (level < 0)
2681 return ERR_PTR(-EINVAL);
2682
2683 /* Get the root znode */
2684 znode = c->zroot.znode;
2685 if (!znode) {
2686 znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
2687 if (IS_ERR(znode))
2688 return znode;
2689 }
2690 /* Check if it is the one we are looking for */
2691 if (c->zroot.lnum == lnum && c->zroot.offs == offs)
2692 return znode;
2693 /* Descend to the parent level i.e. (level + 1) */
2694 if (level >= znode->level)
2695 return NULL;
2696 while (1) {
2697 ubifs_search_zbranch(c, znode, key, &n);
2698 if (n < 0) {
2699 /*
2700 * We reached a znode where the leftmost key is greater
2701 * than the key we are searching for. This is the same
2702 * situation as the one described in a huge comment at
2703 * the end of the 'ubifs_lookup_level0()' function. And
2704 * for exactly the same reasons we have to try to look
2705 * left before giving up.
2706 */
2707 znode = left_znode(c, znode);
2708 if (!znode)
2709 return NULL;
2710 if (IS_ERR(znode))
2711 return znode;
2712 ubifs_search_zbranch(c, znode, key, &n);
2713 ubifs_assert(n >= 0);
2714 }
2715 if (znode->level == level + 1)
2716 break;
2717 znode = get_znode(c, znode, n);
2718 if (IS_ERR(znode))
2719 return znode;
2720 }
2721 /* Check if the child is the one we are looking for */
2722 if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs)
2723 return get_znode(c, znode, n);
2724 /* If the key is unique, there is nowhere else to look */
2725 if (!is_hash_key(c, key))
2726 return NULL;
2727 /*
2728 * The key is not unique and so may be also in the znodes to either
2729 * side.
2730 */
2731 zn = znode;
2732 nn = n;
2733 /* Look left */
2734 while (1) {
2735 /* Move one branch to the left */
2736 if (n)
2737 n -= 1;
2738 else {
2739 znode = left_znode(c, znode);
2740 if (!znode)
2741 break;
2742 if (IS_ERR(znode))
2743 return znode;
2744 n = znode->child_cnt - 1;
2745 }
2746 /* Check it */
2747 if (znode->zbranch[n].lnum == lnum &&
2748 znode->zbranch[n].offs == offs)
2749 return get_znode(c, znode, n);
2750 /* Stop if the key is less than the one we are looking for */
2751 if (keys_cmp(c, &znode->zbranch[n].key, key) < 0)
2752 break;
2753 }
2754 /* Back to the middle */
2755 znode = zn;
2756 n = nn;
2757 /* Look right */
2758 while (1) {
2759 /* Move one branch to the right */
2760 if (++n >= znode->child_cnt) {
2761 znode = right_znode(c, znode);
2762 if (!znode)
2763 break;
2764 if (IS_ERR(znode))
2765 return znode;
2766 n = 0;
2767 }
2768 /* Check it */
2769 if (znode->zbranch[n].lnum == lnum &&
2770 znode->zbranch[n].offs == offs)
2771 return get_znode(c, znode, n);
2772 /* Stop if the key is greater than the one we are looking for */
2773 if (keys_cmp(c, &znode->zbranch[n].key, key) > 0)
2774 break;
2775 }
2776 return NULL;
2777}
2778
2779/**
2780 * is_idx_node_in_tnc - determine if an index node is in the TNC.
2781 * @c: UBIFS file-system description object
2782 * @key: key of index node
2783 * @level: index node level
2784 * @lnum: LEB number of index node
2785 * @offs: offset of index node
2786 *
2787 * This function returns %0 if the index node is not referred to in the TNC, %1
2788 * if the index node is referred to in the TNC and the corresponding znode is
2789 * dirty, %2 if an index node is referred to in the TNC and the corresponding
2790 * znode is clean, and a negative error code in case of failure.
2791 *
2792 * Note, the @key argument has to be the key of the first child. Also note,
2793 * this function relies on the fact that 0:0 is never a valid LEB number and
2794 * offset for a main-area node.
2795 */
2796int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
2797 int lnum, int offs)
2798{
2799 struct ubifs_znode *znode;
2800
2801 znode = lookup_znode(c, key, level, lnum, offs);
2802 if (!znode)
2803 return 0;
2804 if (IS_ERR(znode))
2805 return PTR_ERR(znode);
2806
2807 return ubifs_zn_dirty(znode) ? 1 : 2;
2808}
2809
2810/**
2811 * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC.
2812 * @c: UBIFS file-system description object
2813 * @key: node key
2814 * @lnum: node LEB number
2815 * @offs: node offset
2816 *
2817 * This function returns %1 if the node is referred to in the TNC, %0 if it is
2818 * not, and a negative error code in case of failure.
2819 *
2820 * Note, this function relies on the fact that 0:0 is never a valid LEB number
2821 * and offset for a main-area node.
2822 */
2823static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key,
2824 int lnum, int offs)
2825{
2826 struct ubifs_zbranch *zbr;
2827 struct ubifs_znode *znode, *zn;
2828 int n, found, err, nn;
2829 const int unique = !is_hash_key(c, key);
2830
2831 found = ubifs_lookup_level0(c, key, &znode, &n);
2832 if (found < 0)
2833 return found; /* Error code */
2834 if (!found)
2835 return 0;
2836 zbr = &znode->zbranch[n];
2837 if (lnum == zbr->lnum && offs == zbr->offs)
2838 return 1; /* Found it */
2839 if (unique)
2840 return 0;
2841 /*
2842 * Because the key is not unique, we have to look left
2843 * and right as well
2844 */
2845 zn = znode;
2846 nn = n;
2847 /* Look left */
2848 while (1) {
2849 err = tnc_prev(c, &znode, &n);
2850 if (err == -ENOENT)
2851 break;
2852 if (err)
2853 return err;
2854 if (keys_cmp(c, key, &znode->zbranch[n].key))
2855 break;
2856 zbr = &znode->zbranch[n];
2857 if (lnum == zbr->lnum && offs == zbr->offs)
2858 return 1; /* Found it */
2859 }
2860 /* Look right */
2861 znode = zn;
2862 n = nn;
2863 while (1) {
2864 err = tnc_next(c, &znode, &n);
2865 if (err) {
2866 if (err == -ENOENT)
2867 return 0;
2868 return err;
2869 }
2870 if (keys_cmp(c, key, &znode->zbranch[n].key))
2871 break;
2872 zbr = &znode->zbranch[n];
2873 if (lnum == zbr->lnum && offs == zbr->offs)
2874 return 1; /* Found it */
2875 }
2876 return 0;
2877}
2878
2879/**
2880 * ubifs_tnc_has_node - determine whether a node is in the TNC.
2881 * @c: UBIFS file-system description object
2882 * @key: node key
2883 * @level: index node level (if it is an index node)
2884 * @lnum: node LEB number
2885 * @offs: node offset
2886 * @is_idx: non-zero if the node is an index node
2887 *
2888 * This function returns %1 if the node is in the TNC, %0 if it is not, and a
2889 * negative error code in case of failure. For index nodes, @key has to be the
2890 * key of the first child. An index node is considered to be in the TNC only if
2891 * the corresponding znode is clean or has not been loaded.
2892 */
2893int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level,
2894 int lnum, int offs, int is_idx)
2895{
2896 int err;
2897
2898 mutex_lock(&c->tnc_mutex);
2899 if (is_idx) {
2900 err = is_idx_node_in_tnc(c, key, level, lnum, offs);
2901 if (err < 0)
2902 goto out_unlock;
2903 if (err == 1)
2904 /* The index node was found but it was dirty */
2905 err = 0;
2906 else if (err == 2)
2907 /* The index node was found and it was clean */
2908 err = 1;
2909 else
2910 BUG_ON(err != 0);
2911 } else
2912 err = is_leaf_node_in_tnc(c, key, lnum, offs);
2913
2914out_unlock:
2915 mutex_unlock(&c->tnc_mutex);
2916 return err;
2917}
2918
2919/**
2920 * ubifs_dirty_idx_node - dirty an index node.
2921 * @c: UBIFS file-system description object
2922 * @key: index node key
2923 * @level: index node level
2924 * @lnum: index node LEB number
2925 * @offs: index node offset
2926 *
2927 * This function loads and dirties an index node so that it can be garbage
2928 * collected. The @key argument has to be the key of the first child. This
2929 * function relies on the fact that 0:0 is never a valid LEB number and offset
2930 * for a main-area node. Returns %0 on success and a negative error code on
2931 * failure.
2932 */
2933int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level,
2934 int lnum, int offs)
2935{
2936 struct ubifs_znode *znode;
2937 int err = 0;
2938
2939 mutex_lock(&c->tnc_mutex);
2940 znode = lookup_znode(c, key, level, lnum, offs);
2941 if (!znode)
2942 goto out_unlock;
2943 if (IS_ERR(znode)) {
2944 err = PTR_ERR(znode);
2945 goto out_unlock;
2946 }
2947 znode = dirty_cow_bottom_up(c, znode);
2948 if (IS_ERR(znode)) {
2949 err = PTR_ERR(znode);
2950 goto out_unlock;
2951 }
2952
2953out_unlock:
2954 mutex_unlock(&c->tnc_mutex);
2955 return err;
2956}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
new file mode 100644
index 000000000000..8117e65ba2e9
--- /dev/null
+++ b/fs/ubifs/tnc_commit.c
@@ -0,0 +1,1103 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/* This file implements TNC functions for committing */
24
25#include "ubifs.h"
26
27/**
28 * make_idx_node - make an index node for fill-the-gaps method of TNC commit.
29 * @c: UBIFS file-system description object
30 * @idx: buffer in which to place new index node
31 * @znode: znode from which to make new index node
32 * @lnum: LEB number where new index node will be written
33 * @offs: offset where new index node will be written
34 * @len: length of new index node
35 */
36static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
37 struct ubifs_znode *znode, int lnum, int offs, int len)
38{
39 struct ubifs_znode *zp;
40 int i, err;
41
42 /* Make index node */
43 idx->ch.node_type = UBIFS_IDX_NODE;
44 idx->child_cnt = cpu_to_le16(znode->child_cnt);
45 idx->level = cpu_to_le16(znode->level);
46 for (i = 0; i < znode->child_cnt; i++) {
47 struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
48 struct ubifs_zbranch *zbr = &znode->zbranch[i];
49
50 key_write_idx(c, &zbr->key, &br->key);
51 br->lnum = cpu_to_le32(zbr->lnum);
52 br->offs = cpu_to_le32(zbr->offs);
53 br->len = cpu_to_le32(zbr->len);
54 if (!zbr->lnum || !zbr->len) {
55 ubifs_err("bad ref in znode");
56 dbg_dump_znode(c, znode);
57 if (zbr->znode)
58 dbg_dump_znode(c, zbr->znode);
59 }
60 }
61 ubifs_prepare_node(c, idx, len, 0);
62
63#ifdef CONFIG_UBIFS_FS_DEBUG
64 znode->lnum = lnum;
65 znode->offs = offs;
66 znode->len = len;
67#endif
68
69 err = insert_old_idx_znode(c, znode);
70
71 /* Update the parent */
72 zp = znode->parent;
73 if (zp) {
74 struct ubifs_zbranch *zbr;
75
76 zbr = &zp->zbranch[znode->iip];
77 zbr->lnum = lnum;
78 zbr->offs = offs;
79 zbr->len = len;
80 } else {
81 c->zroot.lnum = lnum;
82 c->zroot.offs = offs;
83 c->zroot.len = len;
84 }
85 c->calc_idx_sz += ALIGN(len, 8);
86
87 atomic_long_dec(&c->dirty_zn_cnt);
88
89 ubifs_assert(ubifs_zn_dirty(znode));
90 ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
91
92 __clear_bit(DIRTY_ZNODE, &znode->flags);
93 __clear_bit(COW_ZNODE, &znode->flags);
94
95 return err;
96}
97
98/**
99 * fill_gap - make index nodes in gaps in dirty index LEBs.
100 * @c: UBIFS file-system description object
101 * @lnum: LEB number that gap appears in
102 * @gap_start: offset of start of gap
103 * @gap_end: offset of end of gap
104 * @dirt: adds dirty space to this
105 *
106 * This function returns the number of index nodes written into the gap.
107 */
108static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end,
109 int *dirt)
110{
111 int len, gap_remains, gap_pos, written, pad_len;
112
113 ubifs_assert((gap_start & 7) == 0);
114 ubifs_assert((gap_end & 7) == 0);
115 ubifs_assert(gap_end >= gap_start);
116
117 gap_remains = gap_end - gap_start;
118 if (!gap_remains)
119 return 0;
120 gap_pos = gap_start;
121 written = 0;
122 while (c->enext) {
123 len = ubifs_idx_node_sz(c, c->enext->child_cnt);
124 if (len < gap_remains) {
125 struct ubifs_znode *znode = c->enext;
126 const int alen = ALIGN(len, 8);
127 int err;
128
129 ubifs_assert(alen <= gap_remains);
130 err = make_idx_node(c, c->ileb_buf + gap_pos, znode,
131 lnum, gap_pos, len);
132 if (err)
133 return err;
134 gap_remains -= alen;
135 gap_pos += alen;
136 c->enext = znode->cnext;
137 if (c->enext == c->cnext)
138 c->enext = NULL;
139 written += 1;
140 } else
141 break;
142 }
143 if (gap_end == c->leb_size) {
144 c->ileb_len = ALIGN(gap_pos, c->min_io_size);
145 /* Pad to end of min_io_size */
146 pad_len = c->ileb_len - gap_pos;
147 } else
148 /* Pad to end of gap */
149 pad_len = gap_remains;
150 dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d",
151 lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len);
152 ubifs_pad(c, c->ileb_buf + gap_pos, pad_len);
153 *dirt += pad_len;
154 return written;
155}
156
157/**
158 * find_old_idx - find an index node obsoleted since the last commit start.
159 * @c: UBIFS file-system description object
160 * @lnum: LEB number of obsoleted index node
161 * @offs: offset of obsoleted index node
162 *
163 * Returns %1 if found and %0 otherwise.
164 */
165static int find_old_idx(struct ubifs_info *c, int lnum, int offs)
166{
167 struct ubifs_old_idx *o;
168 struct rb_node *p;
169
170 p = c->old_idx.rb_node;
171 while (p) {
172 o = rb_entry(p, struct ubifs_old_idx, rb);
173 if (lnum < o->lnum)
174 p = p->rb_left;
175 else if (lnum > o->lnum)
176 p = p->rb_right;
177 else if (offs < o->offs)
178 p = p->rb_left;
179 else if (offs > o->offs)
180 p = p->rb_right;
181 else
182 return 1;
183 }
184 return 0;
185}
186
187/**
188 * is_idx_node_in_use - determine if an index node can be overwritten.
189 * @c: UBIFS file-system description object
190 * @key: key of index node
191 * @level: index node level
192 * @lnum: LEB number of index node
193 * @offs: offset of index node
194 *
195 * If @key / @lnum / @offs identify an index node that was not part of the old
196 * index, then this function returns %0 (obsolete). Else if the index node was
197 * part of the old index but is now dirty %1 is returned, else if it is clean %2
198 * is returned. A negative error code is returned on failure.
199 */
200static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
201 int level, int lnum, int offs)
202{
203 int ret;
204
205 ret = is_idx_node_in_tnc(c, key, level, lnum, offs);
206 if (ret < 0)
207 return ret; /* Error code */
208 if (ret == 0)
209 if (find_old_idx(c, lnum, offs))
210 return 1;
211 return ret;
212}
213
214/**
215 * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
216 * @c: UBIFS file-system description object
217 * @p: return LEB number here
218 *
219 * This function lays out new index nodes for dirty znodes using in-the-gaps
220 * method of TNC commit.
221 * This function merely puts the next znode into the next gap, making no attempt
222 * to try to maximise the number of znodes that fit.
223 * This function returns the number of index nodes written into the gaps, or a
224 * negative error code on failure.
225 */
226static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
227{
228 struct ubifs_scan_leb *sleb;
229 struct ubifs_scan_node *snod;
230 int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written;
231
232 tot_written = 0;
233 /* Get an index LEB with lots of obsolete index nodes */
234 lnum = ubifs_find_dirty_idx_leb(c);
235 if (lnum < 0)
236 /*
237 * There also may be dirt in the index head that could be
238 * filled, however we do not check there at present.
239 */
240 return lnum; /* Error code */
241 *p = lnum;
242 dbg_gc("LEB %d", lnum);
243 /*
244 * Scan the index LEB. We use the generic scan for this even though
245 * it is more comprehensive and less efficient than is needed for this
246 * purpose.
247 */
248 sleb = ubifs_scan(c, lnum, 0, c->ileb_buf);
249 c->ileb_len = 0;
250 if (IS_ERR(sleb))
251 return PTR_ERR(sleb);
252 gap_start = 0;
253 list_for_each_entry(snod, &sleb->nodes, list) {
254 struct ubifs_idx_node *idx;
255 int in_use, level;
256
257 ubifs_assert(snod->type == UBIFS_IDX_NODE);
258 idx = snod->node;
259 key_read(c, ubifs_idx_key(c, idx), &snod->key);
260 level = le16_to_cpu(idx->level);
261 /* Determine if the index node is in use (not obsolete) */
262 in_use = is_idx_node_in_use(c, &snod->key, level, lnum,
263 snod->offs);
264 if (in_use < 0) {
265 ubifs_scan_destroy(sleb);
266 return in_use; /* Error code */
267 }
268 if (in_use) {
269 if (in_use == 1)
270 dirt += ALIGN(snod->len, 8);
271 /*
272 * The obsolete index nodes form gaps that can be
273 * overwritten. This gap has ended because we have
274 * found an index node that is still in use
275 * i.e. not obsolete
276 */
277 gap_end = snod->offs;
278 /* Try to fill gap */
279 written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
280 if (written < 0) {
281 ubifs_scan_destroy(sleb);
282 return written; /* Error code */
283 }
284 tot_written += written;
285 gap_start = ALIGN(snod->offs + snod->len, 8);
286 }
287 }
288 ubifs_scan_destroy(sleb);
289 c->ileb_len = c->leb_size;
290 gap_end = c->leb_size;
291 /* Try to fill gap */
292 written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
293 if (written < 0)
294 return written; /* Error code */
295 tot_written += written;
296 if (tot_written == 0) {
297 struct ubifs_lprops lp;
298
299 dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
300 err = ubifs_read_one_lp(c, lnum, &lp);
301 if (err)
302 return err;
303 if (lp.free == c->leb_size) {
304 /*
305 * We must have snatched this LEB from the idx_gc list
306 * so we need to correct the free and dirty space.
307 */
308 err = ubifs_change_one_lp(c, lnum,
309 c->leb_size - c->ileb_len,
310 dirt, 0, 0, 0);
311 if (err)
312 return err;
313 }
314 return 0;
315 }
316 err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt,
317 0, 0, 0);
318 if (err)
319 return err;
320 err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len,
321 UBI_SHORTTERM);
322 if (err)
323 return err;
324 dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
325 return tot_written;
326}
327
328/**
329 * get_leb_cnt - calculate the number of empty LEBs needed to commit.
330 * @c: UBIFS file-system description object
331 * @cnt: number of znodes to commit
332 *
333 * This function returns the number of empty LEBs needed to commit @cnt znodes
334 * to the current index head. The number is not exact and may be more than
335 * needed.
336 */
337static int get_leb_cnt(struct ubifs_info *c, int cnt)
338{
339 int d;
340
341 /* Assume maximum index node size (i.e. overestimate space needed) */
342 cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz;
343 if (cnt < 0)
344 cnt = 0;
345 d = c->leb_size / c->max_idx_node_sz;
346 return DIV_ROUND_UP(cnt, d);
347}
348
349/**
350 * layout_in_gaps - in-the-gaps method of committing TNC.
351 * @c: UBIFS file-system description object
352 * @cnt: number of dirty znodes to commit.
353 *
354 * This function lays out new index nodes for dirty znodes using in-the-gaps
355 * method of TNC commit.
356 *
357 * This function returns %0 on success and a negative error code on failure.
358 */
359static int layout_in_gaps(struct ubifs_info *c, int cnt)
360{
361 int err, leb_needed_cnt, written, *p;
362
363 dbg_gc("%d znodes to write", cnt);
364
365 c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS);
366 if (!c->gap_lebs)
367 return -ENOMEM;
368
369 p = c->gap_lebs;
370 do {
371 ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
372 written = layout_leb_in_gaps(c, p);
373 if (written < 0) {
374 err = written;
375 if (err == -ENOSPC) {
376 if (!dbg_force_in_the_gaps_enabled) {
377 /*
378 * Do not print scary warnings if the
379 * debugging option which forces
380 * in-the-gaps is enabled.
381 */
382 ubifs_err("out of space");
383 spin_lock(&c->space_lock);
384 dbg_dump_budg(c);
385 spin_unlock(&c->space_lock);
386 dbg_dump_lprops(c);
387 }
388 /* Try to commit anyway */
389 err = 0;
390 break;
391 }
392 kfree(c->gap_lebs);
393 c->gap_lebs = NULL;
394 return err;
395 }
396 p++;
397 cnt -= written;
398 leb_needed_cnt = get_leb_cnt(c, cnt);
399 dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
400 leb_needed_cnt, c->ileb_cnt);
401 } while (leb_needed_cnt > c->ileb_cnt);
402
403 *p = -1;
404 return 0;
405}
406
407/**
408 * layout_in_empty_space - layout index nodes in empty space.
409 * @c: UBIFS file-system description object
410 *
411 * This function lays out new index nodes for dirty znodes using empty LEBs.
412 *
413 * This function returns %0 on success and a negative error code on failure.
414 */
415static int layout_in_empty_space(struct ubifs_info *c)
416{
417 struct ubifs_znode *znode, *cnext, *zp;
418 int lnum, offs, len, next_len, buf_len, buf_offs, used, avail;
419 int wlen, blen, err;
420
421 cnext = c->enext;
422 if (!cnext)
423 return 0;
424
425 lnum = c->ihead_lnum;
426 buf_offs = c->ihead_offs;
427
428 buf_len = ubifs_idx_node_sz(c, c->fanout);
429 buf_len = ALIGN(buf_len, c->min_io_size);
430 used = 0;
431 avail = buf_len;
432
433 /* Ensure there is enough room for first write */
434 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
435 if (buf_offs + next_len > c->leb_size)
436 lnum = -1;
437
438 while (1) {
439 znode = cnext;
440
441 len = ubifs_idx_node_sz(c, znode->child_cnt);
442
443 /* Determine the index node position */
444 if (lnum == -1) {
445 if (c->ileb_nxt >= c->ileb_cnt) {
446 ubifs_err("out of space");
447 return -ENOSPC;
448 }
449 lnum = c->ilebs[c->ileb_nxt++];
450 buf_offs = 0;
451 used = 0;
452 avail = buf_len;
453 }
454
455 offs = buf_offs + used;
456
457#ifdef CONFIG_UBIFS_FS_DEBUG
458 znode->lnum = lnum;
459 znode->offs = offs;
460 znode->len = len;
461#endif
462
463 /* Update the parent */
464 zp = znode->parent;
465 if (zp) {
466 struct ubifs_zbranch *zbr;
467 int i;
468
469 i = znode->iip;
470 zbr = &zp->zbranch[i];
471 zbr->lnum = lnum;
472 zbr->offs = offs;
473 zbr->len = len;
474 } else {
475 c->zroot.lnum = lnum;
476 c->zroot.offs = offs;
477 c->zroot.len = len;
478 }
479 c->calc_idx_sz += ALIGN(len, 8);
480
481 /*
482 * Once lprops is updated, we can decrease the dirty znode count
483 * but it is easier to just do it here.
484 */
485 atomic_long_dec(&c->dirty_zn_cnt);
486
487 /*
488 * Calculate the next index node length to see if there is
489 * enough room for it
490 */
491 cnext = znode->cnext;
492 if (cnext == c->cnext)
493 next_len = 0;
494 else
495 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
496
497 if (c->min_io_size == 1) {
498 buf_offs += ALIGN(len, 8);
499 if (next_len) {
500 if (buf_offs + next_len <= c->leb_size)
501 continue;
502 err = ubifs_update_one_lp(c, lnum, 0,
503 c->leb_size - buf_offs, 0, 0);
504 if (err)
505 return err;
506 lnum = -1;
507 continue;
508 }
509 err = ubifs_update_one_lp(c, lnum,
510 c->leb_size - buf_offs, 0, 0, 0);
511 if (err)
512 return err;
513 break;
514 }
515
516 /* Update buffer positions */
517 wlen = used + len;
518 used += ALIGN(len, 8);
519 avail -= ALIGN(len, 8);
520
521 if (next_len != 0 &&
522 buf_offs + used + next_len <= c->leb_size &&
523 avail > 0)
524 continue;
525
526 if (avail <= 0 && next_len &&
527 buf_offs + used + next_len <= c->leb_size)
528 blen = buf_len;
529 else
530 blen = ALIGN(wlen, c->min_io_size);
531
532 /* The buffer is full or there are no more znodes to do */
533 buf_offs += blen;
534 if (next_len) {
535 if (buf_offs + next_len > c->leb_size) {
536 err = ubifs_update_one_lp(c, lnum,
537 c->leb_size - buf_offs, blen - used,
538 0, 0);
539 if (err)
540 return err;
541 lnum = -1;
542 }
543 used -= blen;
544 if (used < 0)
545 used = 0;
546 avail = buf_len - used;
547 continue;
548 }
549 err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs,
550 blen - used, 0, 0);
551 if (err)
552 return err;
553 break;
554 }
555
556#ifdef CONFIG_UBIFS_FS_DEBUG
557 c->new_ihead_lnum = lnum;
558 c->new_ihead_offs = buf_offs;
559#endif
560
561 return 0;
562}
563
564/**
565 * layout_commit - determine positions of index nodes to commit.
566 * @c: UBIFS file-system description object
567 * @no_space: indicates that insufficient empty LEBs were allocated
568 * @cnt: number of znodes to commit
569 *
570 * Calculate and update the positions of index nodes to commit. If there were
571 * an insufficient number of empty LEBs allocated, then index nodes are placed
572 * into the gaps created by obsolete index nodes in non-empty index LEBs. For
573 * this purpose, an obsolete index node is one that was not in the index as at
574 * the end of the last commit. To write "in-the-gaps" requires that those index
575 * LEBs are updated atomically in-place.
576 */
577static int layout_commit(struct ubifs_info *c, int no_space, int cnt)
578{
579 int err;
580
581 if (no_space) {
582 err = layout_in_gaps(c, cnt);
583 if (err)
584 return err;
585 }
586 err = layout_in_empty_space(c);
587 return err;
588}
589
590/**
591 * find_first_dirty - find first dirty znode.
592 * @znode: znode to begin searching from
593 */
594static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode)
595{
596 int i, cont;
597
598 if (!znode)
599 return NULL;
600
601 while (1) {
602 if (znode->level == 0) {
603 if (ubifs_zn_dirty(znode))
604 return znode;
605 return NULL;
606 }
607 cont = 0;
608 for (i = 0; i < znode->child_cnt; i++) {
609 struct ubifs_zbranch *zbr = &znode->zbranch[i];
610
611 if (zbr->znode && ubifs_zn_dirty(zbr->znode)) {
612 znode = zbr->znode;
613 cont = 1;
614 break;
615 }
616 }
617 if (!cont) {
618 if (ubifs_zn_dirty(znode))
619 return znode;
620 return NULL;
621 }
622 }
623}
624
625/**
626 * find_next_dirty - find next dirty znode.
627 * @znode: znode to begin searching from
628 */
629static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode)
630{
631 int n = znode->iip + 1;
632
633 znode = znode->parent;
634 if (!znode)
635 return NULL;
636 for (; n < znode->child_cnt; n++) {
637 struct ubifs_zbranch *zbr = &znode->zbranch[n];
638
639 if (zbr->znode && ubifs_zn_dirty(zbr->znode))
640 return find_first_dirty(zbr->znode);
641 }
642 return znode;
643}
644
645/**
646 * get_znodes_to_commit - create list of dirty znodes to commit.
647 * @c: UBIFS file-system description object
648 *
649 * This function returns the number of znodes to commit.
650 */
651static int get_znodes_to_commit(struct ubifs_info *c)
652{
653 struct ubifs_znode *znode, *cnext;
654 int cnt = 0;
655
656 c->cnext = find_first_dirty(c->zroot.znode);
657 znode = c->enext = c->cnext;
658 if (!znode) {
659 dbg_cmt("no znodes to commit");
660 return 0;
661 }
662 cnt += 1;
663 while (1) {
664 ubifs_assert(!test_bit(COW_ZNODE, &znode->flags));
665 __set_bit(COW_ZNODE, &znode->flags);
666 znode->alt = 0;
667 cnext = find_next_dirty(znode);
668 if (!cnext) {
669 znode->cnext = c->cnext;
670 break;
671 }
672 znode->cnext = cnext;
673 znode = cnext;
674 cnt += 1;
675 }
676 dbg_cmt("committing %d znodes", cnt);
677 ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt));
678 return cnt;
679}
680
681/**
682 * alloc_idx_lebs - allocate empty LEBs to be used to commit.
683 * @c: UBIFS file-system description object
684 * @cnt: number of znodes to commit
685 *
686 * This function returns %-ENOSPC if it cannot allocate a sufficient number of
687 * empty LEBs. %0 is returned on success, otherwise a negative error code
688 * is returned.
689 */
690static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
691{
692 int i, leb_cnt, lnum;
693
694 c->ileb_cnt = 0;
695 c->ileb_nxt = 0;
696 leb_cnt = get_leb_cnt(c, cnt);
697 dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt);
698 if (!leb_cnt)
699 return 0;
700 c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS);
701 if (!c->ilebs)
702 return -ENOMEM;
703 for (i = 0; i < leb_cnt; i++) {
704 lnum = ubifs_find_free_leb_for_idx(c);
705 if (lnum < 0)
706 return lnum;
707 c->ilebs[c->ileb_cnt++] = lnum;
708 dbg_cmt("LEB %d", lnum);
709 }
710 if (dbg_force_in_the_gaps())
711 return -ENOSPC;
712 return 0;
713}
714
715/**
716 * free_unused_idx_lebs - free unused LEBs that were allocated for the commit.
717 * @c: UBIFS file-system description object
718 *
719 * It is possible that we allocate more empty LEBs for the commit than we need.
720 * This functions frees the surplus.
721 *
722 * This function returns %0 on success and a negative error code on failure.
723 */
724static int free_unused_idx_lebs(struct ubifs_info *c)
725{
726 int i, err = 0, lnum, er;
727
728 for (i = c->ileb_nxt; i < c->ileb_cnt; i++) {
729 lnum = c->ilebs[i];
730 dbg_cmt("LEB %d", lnum);
731 er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
732 LPROPS_INDEX | LPROPS_TAKEN, 0);
733 if (!err)
734 err = er;
735 }
736 return err;
737}
738
739/**
740 * free_idx_lebs - free unused LEBs after commit end.
741 * @c: UBIFS file-system description object
742 *
743 * This function returns %0 on success and a negative error code on failure.
744 */
745static int free_idx_lebs(struct ubifs_info *c)
746{
747 int err;
748
749 err = free_unused_idx_lebs(c);
750 kfree(c->ilebs);
751 c->ilebs = NULL;
752 return err;
753}
754
755/**
756 * ubifs_tnc_start_commit - start TNC commit.
757 * @c: UBIFS file-system description object
758 * @zroot: new index root position is returned here
759 *
760 * This function prepares the list of indexing nodes to commit and lays out
761 * their positions on flash. If there is not enough free space it uses the
762 * in-gap commit method. Returns zero in case of success and a negative error
763 * code in case of failure.
764 */
765int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
766{
767 int err = 0, cnt;
768
769 mutex_lock(&c->tnc_mutex);
770 err = dbg_check_tnc(c, 1);
771 if (err)
772 goto out;
773 cnt = get_znodes_to_commit(c);
774 if (cnt != 0) {
775 int no_space = 0;
776
777 err = alloc_idx_lebs(c, cnt);
778 if (err == -ENOSPC)
779 no_space = 1;
780 else if (err)
781 goto out_free;
782 err = layout_commit(c, no_space, cnt);
783 if (err)
784 goto out_free;
785 ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
786 err = free_unused_idx_lebs(c);
787 if (err)
788 goto out;
789 }
790 destroy_old_idx(c);
791 memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch));
792
793 err = ubifs_save_dirty_idx_lnums(c);
794 if (err)
795 goto out;
796
797 spin_lock(&c->space_lock);
798 /*
799 * Although we have not finished committing yet, update size of the
800 * committed index ('c->old_idx_sz') and zero out the index growth
801 * budget. It is OK to do this now, because we've reserved all the
802 * space which is needed to commit the index, and it is save for the
803 * budgeting subsystem to assume the index is already committed,
804 * even though it is not.
805 */
806 c->old_idx_sz = c->calc_idx_sz;
807 c->budg_uncommitted_idx = 0;
808 spin_unlock(&c->space_lock);
809 mutex_unlock(&c->tnc_mutex);
810
811 dbg_cmt("number of index LEBs %d", c->lst.idx_lebs);
812 dbg_cmt("size of index %llu", c->calc_idx_sz);
813 return err;
814
815out_free:
816 free_idx_lebs(c);
817out:
818 mutex_unlock(&c->tnc_mutex);
819 return err;
820}
821
822/**
823 * write_index - write index nodes.
824 * @c: UBIFS file-system description object
825 *
826 * This function writes the index nodes whose positions were laid out in the
827 * layout_in_empty_space function.
828 */
829static int write_index(struct ubifs_info *c)
830{
831 struct ubifs_idx_node *idx;
832 struct ubifs_znode *znode, *cnext;
833 int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
834 int avail, wlen, err, lnum_pos = 0;
835
836 cnext = c->enext;
837 if (!cnext)
838 return 0;
839
840 /*
841 * Always write index nodes to the index head so that index nodes and
842 * other types of nodes are never mixed in the same erase block.
843 */
844 lnum = c->ihead_lnum;
845 buf_offs = c->ihead_offs;
846
847 /* Allocate commit buffer */
848 buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size);
849 used = 0;
850 avail = buf_len;
851
852 /* Ensure there is enough room for first write */
853 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
854 if (buf_offs + next_len > c->leb_size) {
855 err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0,
856 LPROPS_TAKEN);
857 if (err)
858 return err;
859 lnum = -1;
860 }
861
862 while (1) {
863 cond_resched();
864
865 znode = cnext;
866 idx = c->cbuf + used;
867
868 /* Make index node */
869 idx->ch.node_type = UBIFS_IDX_NODE;
870 idx->child_cnt = cpu_to_le16(znode->child_cnt);
871 idx->level = cpu_to_le16(znode->level);
872 for (i = 0; i < znode->child_cnt; i++) {
873 struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
874 struct ubifs_zbranch *zbr = &znode->zbranch[i];
875
876 key_write_idx(c, &zbr->key, &br->key);
877 br->lnum = cpu_to_le32(zbr->lnum);
878 br->offs = cpu_to_le32(zbr->offs);
879 br->len = cpu_to_le32(zbr->len);
880 if (!zbr->lnum || !zbr->len) {
881 ubifs_err("bad ref in znode");
882 dbg_dump_znode(c, znode);
883 if (zbr->znode)
884 dbg_dump_znode(c, zbr->znode);
885 }
886 }
887 len = ubifs_idx_node_sz(c, znode->child_cnt);
888 ubifs_prepare_node(c, idx, len, 0);
889
890 /* Determine the index node position */
891 if (lnum == -1) {
892 lnum = c->ilebs[lnum_pos++];
893 buf_offs = 0;
894 used = 0;
895 avail = buf_len;
896 }
897 offs = buf_offs + used;
898
899#ifdef CONFIG_UBIFS_FS_DEBUG
900 if (lnum != znode->lnum || offs != znode->offs ||
901 len != znode->len) {
902 ubifs_err("inconsistent znode posn");
903 return -EINVAL;
904 }
905#endif
906
907 /* Grab some stuff from znode while we still can */
908 cnext = znode->cnext;
909
910 ubifs_assert(ubifs_zn_dirty(znode));
911 ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
912
913 /*
914 * It is important that other threads should see %DIRTY_ZNODE
915 * flag cleared before %COW_ZNODE. Specifically, it matters in
916 * the 'dirty_cow_znode()' function. This is the reason for the
917 * first barrier. Also, we want the bit changes to be seen to
918 * other threads ASAP, to avoid unnecesarry copying, which is
919 * the reason for the second barrier.
920 */
921 clear_bit(DIRTY_ZNODE, &znode->flags);
922 smp_mb__before_clear_bit();
923 clear_bit(COW_ZNODE, &znode->flags);
924 smp_mb__after_clear_bit();
925
926 /* Do not access znode from this point on */
927
928 /* Update buffer positions */
929 wlen = used + len;
930 used += ALIGN(len, 8);
931 avail -= ALIGN(len, 8);
932
933 /*
934 * Calculate the next index node length to see if there is
935 * enough room for it
936 */
937 if (cnext == c->cnext)
938 next_len = 0;
939 else
940 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
941
942 if (c->min_io_size == 1) {
943 /*
944 * Write the prepared index node immediately if there is
945 * no minimum IO size
946 */
947 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
948 wlen, UBI_SHORTTERM);
949 if (err)
950 return err;
951 buf_offs += ALIGN(wlen, 8);
952 if (next_len) {
953 used = 0;
954 avail = buf_len;
955 if (buf_offs + next_len > c->leb_size) {
956 err = ubifs_update_one_lp(c, lnum,
957 LPROPS_NC, 0, 0, LPROPS_TAKEN);
958 if (err)
959 return err;
960 lnum = -1;
961 }
962 continue;
963 }
964 } else {
965 int blen, nxt_offs = buf_offs + used + next_len;
966
967 if (next_len && nxt_offs <= c->leb_size) {
968 if (avail > 0)
969 continue;
970 else
971 blen = buf_len;
972 } else {
973 wlen = ALIGN(wlen, 8);
974 blen = ALIGN(wlen, c->min_io_size);
975 ubifs_pad(c, c->cbuf + wlen, blen - wlen);
976 }
977 /*
978 * The buffer is full or there are no more znodes
979 * to do
980 */
981 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
982 blen, UBI_SHORTTERM);
983 if (err)
984 return err;
985 buf_offs += blen;
986 if (next_len) {
987 if (nxt_offs > c->leb_size) {
988 err = ubifs_update_one_lp(c, lnum,
989 LPROPS_NC, 0, 0, LPROPS_TAKEN);
990 if (err)
991 return err;
992 lnum = -1;
993 }
994 used -= blen;
995 if (used < 0)
996 used = 0;
997 avail = buf_len - used;
998 memmove(c->cbuf, c->cbuf + blen, used);
999 continue;
1000 }
1001 }
1002 break;
1003 }
1004
1005#ifdef CONFIG_UBIFS_FS_DEBUG
1006 if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) {
1007 ubifs_err("inconsistent ihead");
1008 return -EINVAL;
1009 }
1010#endif
1011
1012 c->ihead_lnum = lnum;
1013 c->ihead_offs = buf_offs;
1014
1015 return 0;
1016}
1017
1018/**
1019 * free_obsolete_znodes - free obsolete znodes.
1020 * @c: UBIFS file-system description object
1021 *
1022 * At the end of commit end, obsolete znodes are freed.
1023 */
1024static void free_obsolete_znodes(struct ubifs_info *c)
1025{
1026 struct ubifs_znode *znode, *cnext;
1027
1028 cnext = c->cnext;
1029 do {
1030 znode = cnext;
1031 cnext = znode->cnext;
1032 if (test_bit(OBSOLETE_ZNODE, &znode->flags))
1033 kfree(znode);
1034 else {
1035 znode->cnext = NULL;
1036 atomic_long_inc(&c->clean_zn_cnt);
1037 atomic_long_inc(&ubifs_clean_zn_cnt);
1038 }
1039 } while (cnext != c->cnext);
1040}
1041
1042/**
1043 * return_gap_lebs - return LEBs used by the in-gap commit method.
1044 * @c: UBIFS file-system description object
1045 *
1046 * This function clears the "taken" flag for the LEBs which were used by the
1047 * "commit in-the-gaps" method.
1048 */
1049static int return_gap_lebs(struct ubifs_info *c)
1050{
1051 int *p, err;
1052
1053 if (!c->gap_lebs)
1054 return 0;
1055
1056 dbg_cmt("");
1057 for (p = c->gap_lebs; *p != -1; p++) {
1058 err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0,
1059 LPROPS_TAKEN, 0);
1060 if (err)
1061 return err;
1062 }
1063
1064 kfree(c->gap_lebs);
1065 c->gap_lebs = NULL;
1066 return 0;
1067}
1068
1069/**
1070 * ubifs_tnc_end_commit - update the TNC for commit end.
1071 * @c: UBIFS file-system description object
1072 *
1073 * Write the dirty znodes.
1074 */
1075int ubifs_tnc_end_commit(struct ubifs_info *c)
1076{
1077 int err;
1078
1079 if (!c->cnext)
1080 return 0;
1081
1082 err = return_gap_lebs(c);
1083 if (err)
1084 return err;
1085
1086 err = write_index(c);
1087 if (err)
1088 return err;
1089
1090 mutex_lock(&c->tnc_mutex);
1091
1092 dbg_cmt("TNC height is %d", c->zroot.znode->level + 1);
1093
1094 free_obsolete_znodes(c);
1095
1096 c->cnext = NULL;
1097 kfree(c->ilebs);
1098 c->ilebs = NULL;
1099
1100 mutex_unlock(&c->tnc_mutex);
1101
1102 return 0;
1103}
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
new file mode 100644
index 000000000000..a25c1cc1f8d9
--- /dev/null
+++ b/fs/ubifs/tnc_misc.c
@@ -0,0 +1,494 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file contains miscelanious TNC-related functions shared betweend
25 * different files. This file does not form any logically separate TNC
26 * sub-system. The file was created because there is a lot of TNC code and
27 * putting it all in one file would make that file too big and unreadable.
28 */
29
30#include "ubifs.h"
31
32/**
33 * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal.
34 * @zr: root of the subtree to traverse
35 * @znode: previous znode
36 *
37 * This function implements levelorder TNC traversal. The LNC is ignored.
38 * Returns the next element or %NULL if @znode is already the last one.
39 */
40struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
41 struct ubifs_znode *znode)
42{
43 int level, iip, level_search = 0;
44 struct ubifs_znode *zn;
45
46 ubifs_assert(zr);
47
48 if (unlikely(!znode))
49 return zr;
50
51 if (unlikely(znode == zr)) {
52 if (znode->level == 0)
53 return NULL;
54 return ubifs_tnc_find_child(zr, 0);
55 }
56
57 level = znode->level;
58
59 iip = znode->iip;
60 while (1) {
61 ubifs_assert(znode->level <= zr->level);
62
63 /*
64 * First walk up until there is a znode with next branch to
65 * look at.
66 */
67 while (znode->parent != zr && iip >= znode->parent->child_cnt) {
68 znode = znode->parent;
69 iip = znode->iip;
70 }
71
72 if (unlikely(znode->parent == zr &&
73 iip >= znode->parent->child_cnt)) {
74 /* This level is done, switch to the lower one */
75 level -= 1;
76 if (level_search || level < 0)
77 /*
78 * We were already looking for znode at lower
79 * level ('level_search'). As we are here
80 * again, it just does not exist. Or all levels
81 * were finished ('level < 0').
82 */
83 return NULL;
84
85 level_search = 1;
86 iip = -1;
87 znode = ubifs_tnc_find_child(zr, 0);
88 ubifs_assert(znode);
89 }
90
91 /* Switch to the next index */
92 zn = ubifs_tnc_find_child(znode->parent, iip + 1);
93 if (!zn) {
94 /* No more children to look at, we have walk up */
95 iip = znode->parent->child_cnt;
96 continue;
97 }
98
99 /* Walk back down to the level we came from ('level') */
100 while (zn->level != level) {
101 znode = zn;
102 zn = ubifs_tnc_find_child(zn, 0);
103 if (!zn) {
104 /*
105 * This path is not too deep so it does not
106 * reach 'level'. Try next path.
107 */
108 iip = znode->iip;
109 break;
110 }
111 }
112
113 if (zn) {
114 ubifs_assert(zn->level >= 0);
115 return zn;
116 }
117 }
118}
119
120/**
121 * ubifs_search_zbranch - search znode branch.
122 * @c: UBIFS file-system description object
123 * @znode: znode to search in
124 * @key: key to search for
125 * @n: znode branch slot number is returned here
126 *
127 * This is a helper function which search branch with key @key in @znode using
128 * binary search. The result of the search may be:
129 * o exact match, then %1 is returned, and the slot number of the branch is
130 * stored in @n;
131 * o no exact match, then %0 is returned and the slot number of the left
132 * closest branch is returned in @n; the slot if all keys in this znode are
133 * greater than @key, then %-1 is returned in @n.
134 */
135int ubifs_search_zbranch(const struct ubifs_info *c,
136 const struct ubifs_znode *znode,
137 const union ubifs_key *key, int *n)
138{
139 int beg = 0, end = znode->child_cnt, uninitialized_var(mid);
140 int uninitialized_var(cmp);
141 const struct ubifs_zbranch *zbr = &znode->zbranch[0];
142
143 ubifs_assert(end > beg);
144
145 while (end > beg) {
146 mid = (beg + end) >> 1;
147 cmp = keys_cmp(c, key, &zbr[mid].key);
148 if (cmp > 0)
149 beg = mid + 1;
150 else if (cmp < 0)
151 end = mid;
152 else {
153 *n = mid;
154 return 1;
155 }
156 }
157
158 *n = end - 1;
159
160 /* The insert point is after *n */
161 ubifs_assert(*n >= -1 && *n < znode->child_cnt);
162 if (*n == -1)
163 ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0);
164 else
165 ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0);
166 if (*n + 1 < znode->child_cnt)
167 ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0);
168
169 return 0;
170}
171
172/**
173 * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal.
174 * @znode: znode to start at (root of the sub-tree to traverse)
175 *
176 * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is
177 * ignored.
178 */
179struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode)
180{
181 if (unlikely(!znode))
182 return NULL;
183
184 while (znode->level > 0) {
185 struct ubifs_znode *child;
186
187 child = ubifs_tnc_find_child(znode, 0);
188 if (!child)
189 return znode;
190 znode = child;
191 }
192
193 return znode;
194}
195
196/**
197 * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal.
198 * @znode: previous znode
199 *
200 * This function implements postorder TNC traversal. The LNC is ignored.
201 * Returns the next element or %NULL if @znode is already the last one.
202 */
203struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode)
204{
205 struct ubifs_znode *zn;
206
207 ubifs_assert(znode);
208 if (unlikely(!znode->parent))
209 return NULL;
210
211 /* Switch to the next index in the parent */
212 zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1);
213 if (!zn)
214 /* This is in fact the last child, return parent */
215 return znode->parent;
216
217 /* Go to the first znode in this new subtree */
218 return ubifs_tnc_postorder_first(zn);
219}
220
221/**
222 * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree.
223 * @znode: znode defining subtree to destroy
224 *
225 * This function destroys subtree of the TNC tree. Returns number of clean
226 * znodes in the subtree.
227 */
228long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode)
229{
230 struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode);
231 long clean_freed = 0;
232 int n;
233
234 ubifs_assert(zn);
235 while (1) {
236 for (n = 0; n < zn->child_cnt; n++) {
237 if (!zn->zbranch[n].znode)
238 continue;
239
240 if (zn->level > 0 &&
241 !ubifs_zn_dirty(zn->zbranch[n].znode))
242 clean_freed += 1;
243
244 cond_resched();
245 kfree(zn->zbranch[n].znode);
246 }
247
248 if (zn == znode) {
249 if (!ubifs_zn_dirty(zn))
250 clean_freed += 1;
251 kfree(zn);
252 return clean_freed;
253 }
254
255 zn = ubifs_tnc_postorder_next(zn);
256 }
257}
258
259/**
260 * read_znode - read an indexing node from flash and fill znode.
261 * @c: UBIFS file-system description object
262 * @lnum: LEB of the indexing node to read
263 * @offs: node offset
264 * @len: node length
265 * @znode: znode to read to
266 *
267 * This function reads an indexing node from the flash media and fills znode
268 * with the read data. Returns zero in case of success and a negative error
269 * code in case of failure. The read indexing node is validated and if anything
270 * is wrong with it, this function prints complaint messages and returns
271 * %-EINVAL.
272 */
273static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
274 struct ubifs_znode *znode)
275{
276 int i, err, type, cmp;
277 struct ubifs_idx_node *idx;
278
279 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
280 if (!idx)
281 return -ENOMEM;
282
283 err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
284 if (err < 0) {
285 kfree(idx);
286 return err;
287 }
288
289 znode->child_cnt = le16_to_cpu(idx->child_cnt);
290 znode->level = le16_to_cpu(idx->level);
291
292 dbg_tnc("LEB %d:%d, level %d, %d branch",
293 lnum, offs, znode->level, znode->child_cnt);
294
295 if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) {
296 dbg_err("current fanout %d, branch count %d",
297 c->fanout, znode->child_cnt);
298 dbg_err("max levels %d, znode level %d",
299 UBIFS_MAX_LEVELS, znode->level);
300 err = 1;
301 goto out_dump;
302 }
303
304 for (i = 0; i < znode->child_cnt; i++) {
305 const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
306 struct ubifs_zbranch *zbr = &znode->zbranch[i];
307
308 key_read(c, &br->key, &zbr->key);
309 zbr->lnum = le32_to_cpu(br->lnum);
310 zbr->offs = le32_to_cpu(br->offs);
311 zbr->len = le32_to_cpu(br->len);
312 zbr->znode = NULL;
313
314 /* Validate branch */
315
316 if (zbr->lnum < c->main_first ||
317 zbr->lnum >= c->leb_cnt || zbr->offs < 0 ||
318 zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) {
319 dbg_err("bad branch %d", i);
320 err = 2;
321 goto out_dump;
322 }
323
324 switch (key_type(c, &zbr->key)) {
325 case UBIFS_INO_KEY:
326 case UBIFS_DATA_KEY:
327 case UBIFS_DENT_KEY:
328 case UBIFS_XENT_KEY:
329 break;
330 default:
331 dbg_msg("bad key type at slot %d: %s", i,
332 DBGKEY(&zbr->key));
333 err = 3;
334 goto out_dump;
335 }
336
337 if (znode->level)
338 continue;
339
340 type = key_type(c, &zbr->key);
341 if (c->ranges[type].max_len == 0) {
342 if (zbr->len != c->ranges[type].len) {
343 dbg_err("bad target node (type %d) length (%d)",
344 type, zbr->len);
345 dbg_err("have to be %d", c->ranges[type].len);
346 err = 4;
347 goto out_dump;
348 }
349 } else if (zbr->len < c->ranges[type].min_len ||
350 zbr->len > c->ranges[type].max_len) {
351 dbg_err("bad target node (type %d) length (%d)",
352 type, zbr->len);
353 dbg_err("have to be in range of %d-%d",
354 c->ranges[type].min_len,
355 c->ranges[type].max_len);
356 err = 5;
357 goto out_dump;
358 }
359 }
360
361 /*
362 * Ensure that the next key is greater or equivalent to the
363 * previous one.
364 */
365 for (i = 0; i < znode->child_cnt - 1; i++) {
366 const union ubifs_key *key1, *key2;
367
368 key1 = &znode->zbranch[i].key;
369 key2 = &znode->zbranch[i + 1].key;
370
371 cmp = keys_cmp(c, key1, key2);
372 if (cmp > 0) {
373 dbg_err("bad key order (keys %d and %d)", i, i + 1);
374 err = 6;
375 goto out_dump;
376 } else if (cmp == 0 && !is_hash_key(c, key1)) {
377 /* These can only be keys with colliding hash */
378 dbg_err("keys %d and %d are not hashed but equivalent",
379 i, i + 1);
380 err = 7;
381 goto out_dump;
382 }
383 }
384
385 kfree(idx);
386 return 0;
387
388out_dump:
389 ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
390 dbg_dump_node(c, idx);
391 kfree(idx);
392 return -EINVAL;
393}
394
395/**
396 * ubifs_load_znode - load znode to TNC cache.
397 * @c: UBIFS file-system description object
398 * @zbr: znode branch
399 * @parent: znode's parent
400 * @iip: index in parent
401 *
402 * This function loads znode pointed to by @zbr into the TNC cache and
403 * returns pointer to it in case of success and a negative error code in case
404 * of failure.
405 */
406struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
407 struct ubifs_zbranch *zbr,
408 struct ubifs_znode *parent, int iip)
409{
410 int err;
411 struct ubifs_znode *znode;
412
413 ubifs_assert(!zbr->znode);
414 /*
415 * A slab cache is not presently used for znodes because the znode size
416 * depends on the fanout which is stored in the superblock.
417 */
418 znode = kzalloc(c->max_znode_sz, GFP_NOFS);
419 if (!znode)
420 return ERR_PTR(-ENOMEM);
421
422 err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode);
423 if (err)
424 goto out;
425
426 atomic_long_inc(&c->clean_zn_cnt);
427
428 /*
429 * Increment the global clean znode counter as well. It is OK that
430 * global and per-FS clean znode counters may be inconsistent for some
431 * short time (because we might be preempted at this point), the global
432 * one is only used in shrinker.
433 */
434 atomic_long_inc(&ubifs_clean_zn_cnt);
435
436 zbr->znode = znode;
437 znode->parent = parent;
438 znode->time = get_seconds();
439 znode->iip = iip;
440
441 return znode;
442
443out:
444 kfree(znode);
445 return ERR_PTR(err);
446}
447
448/**
449 * ubifs_tnc_read_node - read a leaf node from the flash media.
450 * @c: UBIFS file-system description object
451 * @zbr: key and position of the node
452 * @node: node is returned here
453 *
454 * This function reads a node defined by @zbr from the flash media. Returns
455 * zero in case of success or a negative negative error code in case of
456 * failure.
457 */
458int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
459 void *node)
460{
461 union ubifs_key key1, *key = &zbr->key;
462 int err, type = key_type(c, key);
463 struct ubifs_wbuf *wbuf;
464
465 /*
466 * 'zbr' has to point to on-flash node. The node may sit in a bud and
467 * may even be in a write buffer, so we have to take care about this.
468 */
469 wbuf = ubifs_get_wbuf(c, zbr->lnum);
470 if (wbuf)
471 err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len,
472 zbr->lnum, zbr->offs);
473 else
474 err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum,
475 zbr->offs);
476
477 if (err) {
478 dbg_tnc("key %s", DBGKEY(key));
479 return err;
480 }
481
482 /* Make sure the key of the read node is correct */
483 key_read(c, key, &key1);
484 if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) {
485 ubifs_err("bad key in node at LEB %d:%d",
486 zbr->lnum, zbr->offs);
487 dbg_tnc("looked for key %s found node's key %s",
488 DBGKEY(key), DBGKEY1(&key1));
489 dbg_dump_node(c, node);
490 return -EINVAL;
491 }
492
493 return 0;
494}
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
new file mode 100644
index 000000000000..0cc7da9bed47
--- /dev/null
+++ b/fs/ubifs/ubifs-media.h
@@ -0,0 +1,745 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file describes UBIFS on-flash format and contains definitions of all the
25 * relevant data structures and constants.
26 *
27 * All UBIFS on-flash objects are stored in the form of nodes. All nodes start
28 * with the UBIFS node magic number and have the same common header. Nodes
29 * always sit at 8-byte aligned positions on the media and node header sizes are
30 * also 8-byte aligned (except for the indexing node and the padding node).
31 */
32
33#ifndef __UBIFS_MEDIA_H__
34#define __UBIFS_MEDIA_H__
35
36/* UBIFS node magic number (must not have the padding byte first or last) */
37#define UBIFS_NODE_MAGIC 0x06101831
38
39/* UBIFS on-flash format version */
40#define UBIFS_FORMAT_VERSION 4
41
42/* Minimum logical eraseblock size in bytes */
43#define UBIFS_MIN_LEB_SZ (15*1024)
44
45/* Initial CRC32 value used when calculating CRC checksums */
46#define UBIFS_CRC32_INIT 0xFFFFFFFFU
47
48/*
49 * UBIFS does not try to compress data if its length is less than the below
50 * constant.
51 */
52#define UBIFS_MIN_COMPR_LEN 128
53
54/* Root inode number */
55#define UBIFS_ROOT_INO 1
56
57/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */
58#define UBIFS_FIRST_INO 64
59
60/*
61 * Maximum file name and extended attribute length (must be a multiple of 8,
62 * minus 1).
63 */
64#define UBIFS_MAX_NLEN 255
65
66/* Maximum number of data journal heads */
67#define UBIFS_MAX_JHEADS 1
68
69/*
70 * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system,
71 * which means that it does not treat the underlying media as consisting of
72 * blocks like in case of hard drives. Do not be confused. UBIFS block is just
73 * the maximum amount of data which one data node can have or which can be
74 * attached to an inode node.
75 */
76#define UBIFS_BLOCK_SIZE 4096
77#define UBIFS_BLOCK_SHIFT 12
78#define UBIFS_BLOCK_MASK 0x00000FFF
79
80/* UBIFS padding byte pattern (must not be first or last byte of node magic) */
81#define UBIFS_PADDING_BYTE 0xCE
82
83/* Maximum possible key length */
84#define UBIFS_MAX_KEY_LEN 16
85
86/* Key length ("simple" format) */
87#define UBIFS_SK_LEN 8
88
89/* Minimum index tree fanout */
90#define UBIFS_MIN_FANOUT 2
91
92/* Maximum number of levels in UBIFS indexing B-tree */
93#define UBIFS_MAX_LEVELS 512
94
95/* Maximum amount of data attached to an inode in bytes */
96#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE
97
98/* LEB Properties Tree fanout (must be power of 2) and fanout shift */
99#define UBIFS_LPT_FANOUT 4
100#define UBIFS_LPT_FANOUT_SHIFT 2
101
102/* LEB Properties Tree bit field sizes */
103#define UBIFS_LPT_CRC_BITS 16
104#define UBIFS_LPT_CRC_BYTES 2
105#define UBIFS_LPT_TYPE_BITS 4
106
107/* The key is always at the same position in all keyed nodes */
108#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
109
110/*
111 * LEB Properties Tree node types.
112 *
113 * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties)
114 * UBIFS_LPT_NNODE: LPT internal node
115 * UBIFS_LPT_LTAB: LPT's own lprops table
116 * UBIFS_LPT_LSAVE: LPT's save table (big model only)
117 * UBIFS_LPT_NODE_CNT: count of LPT node types
118 * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type
119 */
120enum {
121 UBIFS_LPT_PNODE,
122 UBIFS_LPT_NNODE,
123 UBIFS_LPT_LTAB,
124 UBIFS_LPT_LSAVE,
125 UBIFS_LPT_NODE_CNT,
126 UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1,
127};
128
129/*
130 * UBIFS inode types.
131 *
132 * UBIFS_ITYPE_REG: regular file
133 * UBIFS_ITYPE_DIR: directory
134 * UBIFS_ITYPE_LNK: soft link
135 * UBIFS_ITYPE_BLK: block device node
136 * UBIFS_ITYPE_CHR: character device node
137 * UBIFS_ITYPE_FIFO: fifo
138 * UBIFS_ITYPE_SOCK: socket
139 * UBIFS_ITYPES_CNT: count of supported file types
140 */
141enum {
142 UBIFS_ITYPE_REG,
143 UBIFS_ITYPE_DIR,
144 UBIFS_ITYPE_LNK,
145 UBIFS_ITYPE_BLK,
146 UBIFS_ITYPE_CHR,
147 UBIFS_ITYPE_FIFO,
148 UBIFS_ITYPE_SOCK,
149 UBIFS_ITYPES_CNT,
150};
151
152/*
153 * Supported key hash functions.
154 *
155 * UBIFS_KEY_HASH_R5: R5 hash
156 * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name
157 */
158enum {
159 UBIFS_KEY_HASH_R5,
160 UBIFS_KEY_HASH_TEST,
161};
162
163/*
164 * Supported key formats.
165 *
166 * UBIFS_SIMPLE_KEY_FMT: simple key format
167 */
168enum {
169 UBIFS_SIMPLE_KEY_FMT,
170};
171
172/*
173 * The simple key format uses 29 bits for storing UBIFS block number and hash
174 * value.
175 */
176#define UBIFS_S_KEY_BLOCK_BITS 29
177#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF
178#define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS
179#define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK
180
181/*
182 * Key types.
183 *
184 * UBIFS_INO_KEY: inode node key
185 * UBIFS_DATA_KEY: data node key
186 * UBIFS_DENT_KEY: directory entry node key
187 * UBIFS_XENT_KEY: extended attribute entry key
188 * UBIFS_KEY_TYPES_CNT: number of supported key types
189 */
190enum {
191 UBIFS_INO_KEY,
192 UBIFS_DATA_KEY,
193 UBIFS_DENT_KEY,
194 UBIFS_XENT_KEY,
195 UBIFS_KEY_TYPES_CNT,
196};
197
198/* Count of LEBs reserved for the superblock area */
199#define UBIFS_SB_LEBS 1
200/* Count of LEBs reserved for the master area */
201#define UBIFS_MST_LEBS 2
202
203/* First LEB of the superblock area */
204#define UBIFS_SB_LNUM 0
205/* First LEB of the master area */
206#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS)
207/* First LEB of the log area */
208#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS)
209
210/*
211 * The below constants define the absolute minimum values for various UBIFS
212 * media areas. Many of them actually depend of flash geometry and the FS
213 * configuration (number of journal heads, orphan LEBs, etc). This means that
214 * the smallest volume size which can be used for UBIFS cannot be pre-defined
215 * by these constants. The file-system that meets the below limitation will not
216 * necessarily mount. UBIFS does run-time calculations and validates the FS
217 * size.
218 */
219
220/* Minimum number of logical eraseblocks in the log */
221#define UBIFS_MIN_LOG_LEBS 2
222/* Minimum number of bud logical eraseblocks (one for each head) */
223#define UBIFS_MIN_BUD_LEBS 3
224/* Minimum number of journal logical eraseblocks */
225#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS)
226/* Minimum number of LPT area logical eraseblocks */
227#define UBIFS_MIN_LPT_LEBS 2
228/* Minimum number of orphan area logical eraseblocks */
229#define UBIFS_MIN_ORPH_LEBS 1
230/*
231 * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1
232 * for GC, 1 for deletions, and at least 1 for committed data).
233 */
234#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5)
235
236/* Minimum number of logical eraseblocks */
237#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
238 UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \
239 UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS)
240
241/* Node sizes (N.B. these are guaranteed to be multiples of 8) */
242#define UBIFS_CH_SZ sizeof(struct ubifs_ch)
243#define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node)
244#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node)
245#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node)
246#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node)
247#define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node)
248#define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node)
249#define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node)
250#define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node)
251#define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node)
252#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node)
253#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node)
254/* Extended attribute entry nodes are identical to directory entry nodes */
255#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ
256/* Only this does not have to be multiple of 8 bytes */
257#define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch)
258
259/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */
260#define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE)
261#define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA)
262#define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1)
263#define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ
264
265/* The largest UBIFS node */
266#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ
267
268/*
269 * On-flash inode flags.
270 *
271 * UBIFS_COMPR_FL: use compression for this inode
272 * UBIFS_SYNC_FL: I/O on this inode has to be synchronous
273 * UBIFS_IMMUTABLE_FL: inode is immutable
274 * UBIFS_APPEND_FL: writes to the inode may only append data
275 * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous
276 * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value
277 *
278 * Note, these are on-flash flags which correspond to ioctl flags
279 * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not
280 * have to be the same.
281 */
282enum {
283 UBIFS_COMPR_FL = 0x01,
284 UBIFS_SYNC_FL = 0x02,
285 UBIFS_IMMUTABLE_FL = 0x04,
286 UBIFS_APPEND_FL = 0x08,
287 UBIFS_DIRSYNC_FL = 0x10,
288 UBIFS_XATTR_FL = 0x20,
289};
290
291/* Inode flag bits used by UBIFS */
292#define UBIFS_FL_MASK 0x0000001F
293
294/*
295 * UBIFS compression algorithms.
296 *
297 * UBIFS_COMPR_NONE: no compression
298 * UBIFS_COMPR_LZO: LZO compression
299 * UBIFS_COMPR_ZLIB: ZLIB compression
300 * UBIFS_COMPR_TYPES_CNT: count of supported compression types
301 */
302enum {
303 UBIFS_COMPR_NONE,
304 UBIFS_COMPR_LZO,
305 UBIFS_COMPR_ZLIB,
306 UBIFS_COMPR_TYPES_CNT,
307};
308
309/*
310 * UBIFS node types.
311 *
312 * UBIFS_INO_NODE: inode node
313 * UBIFS_DATA_NODE: data node
314 * UBIFS_DENT_NODE: directory entry node
315 * UBIFS_XENT_NODE: extended attribute node
316 * UBIFS_TRUN_NODE: truncation node
317 * UBIFS_PAD_NODE: padding node
318 * UBIFS_SB_NODE: superblock node
319 * UBIFS_MST_NODE: master node
320 * UBIFS_REF_NODE: LEB reference node
321 * UBIFS_IDX_NODE: index node
322 * UBIFS_CS_NODE: commit start node
323 * UBIFS_ORPH_NODE: orphan node
324 * UBIFS_NODE_TYPES_CNT: count of supported node types
325 *
326 * Note, we index arrays by these numbers, so keep them low and contiguous.
327 * Node type constants for inodes, direntries and so on have to be the same as
328 * corresponding key type constants.
329 */
330enum {
331 UBIFS_INO_NODE,
332 UBIFS_DATA_NODE,
333 UBIFS_DENT_NODE,
334 UBIFS_XENT_NODE,
335 UBIFS_TRUN_NODE,
336 UBIFS_PAD_NODE,
337 UBIFS_SB_NODE,
338 UBIFS_MST_NODE,
339 UBIFS_REF_NODE,
340 UBIFS_IDX_NODE,
341 UBIFS_CS_NODE,
342 UBIFS_ORPH_NODE,
343 UBIFS_NODE_TYPES_CNT,
344};
345
346/*
347 * Master node flags.
348 *
349 * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty
350 * UBIFS_MST_NO_ORPHS: no orphan inodes present
351 * UBIFS_MST_RCVRY: written by recovery
352 */
353enum {
354 UBIFS_MST_DIRTY = 1,
355 UBIFS_MST_NO_ORPHS = 2,
356 UBIFS_MST_RCVRY = 4,
357};
358
359/*
360 * Node group type (used by recovery to recover whole group or none).
361 *
362 * UBIFS_NO_NODE_GROUP: this node is not part of a group
363 * UBIFS_IN_NODE_GROUP: this node is a part of a group
364 * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group
365 */
366enum {
367 UBIFS_NO_NODE_GROUP = 0,
368 UBIFS_IN_NODE_GROUP,
369 UBIFS_LAST_OF_NODE_GROUP,
370};
371
372/*
373 * Superblock flags.
374 *
375 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
376 */
377enum {
378 UBIFS_FLG_BIGLPT = 0x02,
379};
380
381/**
382 * struct ubifs_ch - common header node.
383 * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC)
384 * @crc: CRC-32 checksum of the node header
385 * @sqnum: sequence number
386 * @len: full node length
387 * @node_type: node type
388 * @group_type: node group type
389 * @padding: reserved for future, zeroes
390 *
391 * Every UBIFS node starts with this common part. If the node has a key, the
392 * key always goes next.
393 */
394struct ubifs_ch {
395 __le32 magic;
396 __le32 crc;
397 __le64 sqnum;
398 __le32 len;
399 __u8 node_type;
400 __u8 group_type;
401 __u8 padding[2];
402} __attribute__ ((packed));
403
404/**
405 * union ubifs_dev_desc - device node descriptor.
406 * @new: new type device descriptor
407 * @huge: huge type device descriptor
408 *
409 * This data structure describes major/minor numbers of a device node. In an
410 * inode is a device node then its data contains an object of this type. UBIFS
411 * uses standard Linux "new" and "huge" device node encodings.
412 */
413union ubifs_dev_desc {
414 __le32 new;
415 __le64 huge;
416} __attribute__ ((packed));
417
418/**
419 * struct ubifs_ino_node - inode node.
420 * @ch: common header
421 * @key: node key
422 * @creat_sqnum: sequence number at time of creation
423 * @size: inode size in bytes (amount of uncompressed data)
424 * @atime_sec: access time seconds
425 * @ctime_sec: creation time seconds
426 * @mtime_sec: modification time seconds
427 * @atime_nsec: access time nanoseconds
428 * @ctime_nsec: creation time nanoseconds
429 * @mtime_nsec: modification time nanoseconds
430 * @nlink: number of hard links
431 * @uid: owner ID
432 * @gid: group ID
433 * @mode: access flags
434 * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc)
435 * @data_len: inode data length
436 * @xattr_cnt: count of extended attributes this inode has
437 * @xattr_size: summarized size of all extended attributes in bytes
438 * @padding1: reserved for future, zeroes
439 * @xattr_names: sum of lengths of all extended attribute names belonging to
440 * this inode
441 * @compr_type: compression type used for this inode
442 * @padding2: reserved for future, zeroes
443 * @data: data attached to the inode
444 *
445 * Note, even though inode compression type is defined by @compr_type, some
446 * nodes of this inode may be compressed with different compressor - this
447 * happens if compression type is changed while the inode already has data
448 * nodes. But @compr_type will be use for further writes to the inode.
449 *
450 * Note, do not forget to amend 'zero_ino_node_unused()' function when changing
451 * the padding fields.
452 */
453struct ubifs_ino_node {
454 struct ubifs_ch ch;
455 __u8 key[UBIFS_MAX_KEY_LEN];
456 __le64 creat_sqnum;
457 __le64 size;
458 __le64 atime_sec;
459 __le64 ctime_sec;
460 __le64 mtime_sec;
461 __le32 atime_nsec;
462 __le32 ctime_nsec;
463 __le32 mtime_nsec;
464 __le32 nlink;
465 __le32 uid;
466 __le32 gid;
467 __le32 mode;
468 __le32 flags;
469 __le32 data_len;
470 __le32 xattr_cnt;
471 __le32 xattr_size;
472 __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */
473 __le32 xattr_names;
474 __le16 compr_type;
475 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
476 __u8 data[];
477} __attribute__ ((packed));
478
479/**
480 * struct ubifs_dent_node - directory entry node.
481 * @ch: common header
482 * @key: node key
483 * @inum: target inode number
484 * @padding1: reserved for future, zeroes
485 * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc)
486 * @nlen: name length
487 * @padding2: reserved for future, zeroes
488 * @name: zero-terminated name
489 *
490 * Note, do not forget to amend 'zero_dent_node_unused()' function when
491 * changing the padding fields.
492 */
493struct ubifs_dent_node {
494 struct ubifs_ch ch;
495 __u8 key[UBIFS_MAX_KEY_LEN];
496 __le64 inum;
497 __u8 padding1;
498 __u8 type;
499 __le16 nlen;
500 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
501 __u8 name[];
502} __attribute__ ((packed));
503
504/**
505 * struct ubifs_data_node - data node.
506 * @ch: common header
507 * @key: node key
508 * @size: uncompressed data size in bytes
509 * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc)
510 * @padding: reserved for future, zeroes
511 * @data: data
512 *
513 * Note, do not forget to amend 'zero_data_node_unused()' function when
514 * changing the padding fields.
515 */
516struct ubifs_data_node {
517 struct ubifs_ch ch;
518 __u8 key[UBIFS_MAX_KEY_LEN];
519 __le32 size;
520 __le16 compr_type;
521 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
522 __u8 data[];
523} __attribute__ ((packed));
524
525/**
526 * struct ubifs_trun_node - truncation node.
527 * @ch: common header
528 * @inum: truncated inode number
529 * @padding: reserved for future, zeroes
530 * @old_size: size before truncation
531 * @new_size: size after truncation
532 *
533 * This node exists only in the journal and never goes to the main area. Note,
534 * do not forget to amend 'zero_trun_node_unused()' function when changing the
535 * padding fields.
536 */
537struct ubifs_trun_node {
538 struct ubifs_ch ch;
539 __le32 inum;
540 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
541 __le64 old_size;
542 __le64 new_size;
543} __attribute__ ((packed));
544
545/**
546 * struct ubifs_pad_node - padding node.
547 * @ch: common header
548 * @pad_len: how many bytes after this node are unused (because padded)
549 * @padding: reserved for future, zeroes
550 */
551struct ubifs_pad_node {
552 struct ubifs_ch ch;
553 __le32 pad_len;
554} __attribute__ ((packed));
555
556/**
557 * struct ubifs_sb_node - superblock node.
558 * @ch: common header
559 * @padding: reserved for future, zeroes
560 * @key_hash: type of hash function used in keys
561 * @key_fmt: format of the key
562 * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc)
563 * @min_io_size: minimal input/output unit size
564 * @leb_size: logical eraseblock size in bytes
565 * @leb_cnt: count of LEBs used by file-system
566 * @max_leb_cnt: maximum count of LEBs used by file-system
567 * @max_bud_bytes: maximum amount of data stored in buds
568 * @log_lebs: log size in logical eraseblocks
569 * @lpt_lebs: number of LEBs used for lprops table
570 * @orph_lebs: number of LEBs used for recording orphans
571 * @jhead_cnt: count of journal heads
572 * @fanout: tree fanout (max. number of links per indexing node)
573 * @lsave_cnt: number of LEB numbers in LPT's save table
574 * @fmt_version: UBIFS on-flash format version
575 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
576 * @padding1: reserved for future, zeroes
577 * @rp_uid: reserve pool UID
578 * @rp_gid: reserve pool GID
579 * @rp_size: size of the reserved pool in bytes
580 * @padding2: reserved for future, zeroes
581 * @time_gran: time granularity in nanoseconds
582 * @uuid: UUID generated when the file system image was created
583 */
584struct ubifs_sb_node {
585 struct ubifs_ch ch;
586 __u8 padding[2];
587 __u8 key_hash;
588 __u8 key_fmt;
589 __le32 flags;
590 __le32 min_io_size;
591 __le32 leb_size;
592 __le32 leb_cnt;
593 __le32 max_leb_cnt;
594 __le64 max_bud_bytes;
595 __le32 log_lebs;
596 __le32 lpt_lebs;
597 __le32 orph_lebs;
598 __le32 jhead_cnt;
599 __le32 fanout;
600 __le32 lsave_cnt;
601 __le32 fmt_version;
602 __le16 default_compr;
603 __u8 padding1[2];
604 __le32 rp_uid;
605 __le32 rp_gid;
606 __le64 rp_size;
607 __le32 time_gran;
608 __u8 uuid[16];
609 __u8 padding2[3972];
610} __attribute__ ((packed));
611
612/**
613 * struct ubifs_mst_node - master node.
614 * @ch: common header
615 * @highest_inum: highest inode number in the committed index
616 * @cmt_no: commit number
617 * @flags: various flags (%UBIFS_MST_DIRTY, etc)
618 * @log_lnum: start of the log
619 * @root_lnum: LEB number of the root indexing node
620 * @root_offs: offset within @root_lnum
621 * @root_len: root indexing node length
622 * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was
623 * not reserved and should be reserved on mount)
624 * @ihead_lnum: LEB number of index head
625 * @ihead_offs: offset of index head
626 * @index_size: size of index on flash
627 * @total_free: total free space in bytes
628 * @total_dirty: total dirty space in bytes
629 * @total_used: total used space in bytes (includes only data LEBs)
630 * @total_dead: total dead space in bytes (includes only data LEBs)
631 * @total_dark: total dark space in bytes (includes only data LEBs)
632 * @lpt_lnum: LEB number of LPT root nnode
633 * @lpt_offs: offset of LPT root nnode
634 * @nhead_lnum: LEB number of LPT head
635 * @nhead_offs: offset of LPT head
636 * @ltab_lnum: LEB number of LPT's own lprops table
637 * @ltab_offs: offset of LPT's own lprops table
638 * @lsave_lnum: LEB number of LPT's save table (big model only)
639 * @lsave_offs: offset of LPT's save table (big model only)
640 * @lscan_lnum: LEB number of last LPT scan
641 * @empty_lebs: number of empty logical eraseblocks
642 * @idx_lebs: number of indexing logical eraseblocks
643 * @leb_cnt: count of LEBs used by file-system
644 * @padding: reserved for future, zeroes
645 */
646struct ubifs_mst_node {
647 struct ubifs_ch ch;
648 __le64 highest_inum;
649 __le64 cmt_no;
650 __le32 flags;
651 __le32 log_lnum;
652 __le32 root_lnum;
653 __le32 root_offs;
654 __le32 root_len;
655 __le32 gc_lnum;
656 __le32 ihead_lnum;
657 __le32 ihead_offs;
658 __le64 index_size;
659 __le64 total_free;
660 __le64 total_dirty;
661 __le64 total_used;
662 __le64 total_dead;
663 __le64 total_dark;
664 __le32 lpt_lnum;
665 __le32 lpt_offs;
666 __le32 nhead_lnum;
667 __le32 nhead_offs;
668 __le32 ltab_lnum;
669 __le32 ltab_offs;
670 __le32 lsave_lnum;
671 __le32 lsave_offs;
672 __le32 lscan_lnum;
673 __le32 empty_lebs;
674 __le32 idx_lebs;
675 __le32 leb_cnt;
676 __u8 padding[344];
677} __attribute__ ((packed));
678
679/**
680 * struct ubifs_ref_node - logical eraseblock reference node.
681 * @ch: common header
682 * @lnum: the referred logical eraseblock number
683 * @offs: start offset in the referred LEB
684 * @jhead: journal head number
685 * @padding: reserved for future, zeroes
686 */
687struct ubifs_ref_node {
688 struct ubifs_ch ch;
689 __le32 lnum;
690 __le32 offs;
691 __le32 jhead;
692 __u8 padding[28];
693} __attribute__ ((packed));
694
695/**
696 * struct ubifs_branch - key/reference/length branch
697 * @lnum: LEB number of the target node
698 * @offs: offset within @lnum
699 * @len: target node length
700 * @key: key
701 */
702struct ubifs_branch {
703 __le32 lnum;
704 __le32 offs;
705 __le32 len;
706 __u8 key[];
707} __attribute__ ((packed));
708
709/**
710 * struct ubifs_idx_node - indexing node.
711 * @ch: common header
712 * @child_cnt: number of child index nodes
713 * @level: tree level
714 * @branches: LEB number / offset / length / key branches
715 */
716struct ubifs_idx_node {
717 struct ubifs_ch ch;
718 __le16 child_cnt;
719 __le16 level;
720 __u8 branches[];
721} __attribute__ ((packed));
722
723/**
724 * struct ubifs_cs_node - commit start node.
725 * @ch: common header
726 * @cmt_no: commit number
727 */
728struct ubifs_cs_node {
729 struct ubifs_ch ch;
730 __le64 cmt_no;
731} __attribute__ ((packed));
732
733/**
734 * struct ubifs_orph_node - orphan node.
735 * @ch: common header
736 * @cmt_no: commit number (also top bit is set on the last node of the commit)
737 * @inos: inode numbers of orphans
738 */
739struct ubifs_orph_node {
740 struct ubifs_ch ch;
741 __le64 cmt_no;
742 __le64 inos[];
743} __attribute__ ((packed));
744
745#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
new file mode 100644
index 000000000000..e4f89f271827
--- /dev/null
+++ b/fs/ubifs/ubifs.h
@@ -0,0 +1,1649 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/* Implementation version 0.7 */
24
25#ifndef __UBIFS_H__
26#define __UBIFS_H__
27
28#include <asm/div64.h>
29#include <linux/statfs.h>
30#include <linux/fs.h>
31#include <linux/err.h>
32#include <linux/sched.h>
33#include <linux/vmalloc.h>
34#include <linux/spinlock.h>
35#include <linux/mutex.h>
36#include <linux/rwsem.h>
37#include <linux/mtd/ubi.h>
38#include <linux/pagemap.h>
39#include <linux/backing-dev.h>
40#include "ubifs-media.h"
41
42/* Version of this UBIFS implementation */
43#define UBIFS_VERSION 1
44
45/* Normal UBIFS messages */
46#define ubifs_msg(fmt, ...) \
47 printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__)
48/* UBIFS error messages */
49#define ubifs_err(fmt, ...) \
50 printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
51 __func__, ##__VA_ARGS__)
52/* UBIFS warning messages */
53#define ubifs_warn(fmt, ...) \
54 printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \
55 current->pid, __func__, ##__VA_ARGS__)
56
57/* UBIFS file system VFS magic number */
58#define UBIFS_SUPER_MAGIC 0x24051905
59
60/* Number of UBIFS blocks per VFS page */
61#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE)
62#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT)
63
64/* "File system end of life" sequence number watermark */
65#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
66#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL
67
68/* Minimum amount of data UBIFS writes to the flash */
69#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8)
70
71/*
72 * Currently we do not support inode number overlapping and re-using, so this
73 * watermark defines dangerous inode number level. This should be fixed later,
74 * although it is difficult to exceed current limit. Another option is to use
75 * 64-bit inode numbers, but this means more overhead.
76 */
77#define INUM_WARN_WATERMARK 0xFFF00000
78#define INUM_WATERMARK 0xFFFFFF00
79
80/* Largest key size supported in this implementation */
81#define CUR_MAX_KEY_LEN UBIFS_SK_LEN
82
83/* Maximum number of entries in each LPT (LEB category) heap */
84#define LPT_HEAP_SZ 256
85
86/*
87 * Background thread name pattern. The numbers are UBI device and volume
88 * numbers.
89 */
90#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
91
92/* Default write-buffer synchronization timeout (5 secs) */
93#define DEFAULT_WBUF_TIMEOUT (5 * HZ)
94
95/* Maximum possible inode number (only 32-bit inodes are supported now) */
96#define MAX_INUM 0xFFFFFFFF
97
98/* Number of non-data journal heads */
99#define NONDATA_JHEADS_CNT 2
100
101/* Garbage collector head */
102#define GCHD 0
103/* Base journal head number */
104#define BASEHD 1
105/* First "general purpose" journal head */
106#define DATAHD 2
107
108/* 'No change' value for 'ubifs_change_lp()' */
109#define LPROPS_NC 0x80000001
110
111/*
112 * There is no notion of truncation key because truncation nodes do not exist
113 * in TNC. However, when replaying, it is handy to introduce fake "truncation"
114 * keys for truncation nodes because the code becomes simpler. So we define
115 * %UBIFS_TRUN_KEY type.
116 */
117#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
118
119/*
120 * How much a directory entry/extended attribute entry adds to the parent/host
121 * inode.
122 */
123#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8)
124
125/* How much an extended attribute adds to the host inode */
126#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8)
127
128/*
129 * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered
130 * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are
131 * considered "young". This is used by shrinker when selecting znode to trim
132 * off.
133 */
134#define OLD_ZNODE_AGE 20
135#define YOUNG_ZNODE_AGE 5
136
137/*
138 * Some compressors, like LZO, may end up with more data then the input buffer.
139 * So UBIFS always allocates larger output buffer, to be sure the compressor
140 * will not corrupt memory in case of worst case compression.
141 */
142#define WORST_COMPR_FACTOR 2
143
144/* Maximum expected tree height for use by bottom_up_buf */
145#define BOTTOM_UP_HEIGHT 64
146
147/*
148 * Lockdep classes for UBIFS inode @ui_mutex.
149 */
150enum {
151 WB_MUTEX_1 = 0,
152 WB_MUTEX_2 = 1,
153 WB_MUTEX_3 = 2,
154};
155
156/*
157 * Znode flags (actually, bit numbers which store the flags).
158 *
159 * DIRTY_ZNODE: znode is dirty
160 * COW_ZNODE: znode is being committed and a new instance of this znode has to
161 * be created before changing this znode
162 * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is
163 * still in the commit list and the ongoing commit operation
164 * will commit it, and delete this znode after it is done
165 */
166enum {
167 DIRTY_ZNODE = 0,
168 COW_ZNODE = 1,
169 OBSOLETE_ZNODE = 2,
170};
171
172/*
173 * Commit states.
174 *
175 * COMMIT_RESTING: commit is not wanted
176 * COMMIT_BACKGROUND: background commit has been requested
177 * COMMIT_REQUIRED: commit is required
178 * COMMIT_RUNNING_BACKGROUND: background commit is running
179 * COMMIT_RUNNING_REQUIRED: commit is running and it is required
180 * COMMIT_BROKEN: commit failed
181 */
182enum {
183 COMMIT_RESTING = 0,
184 COMMIT_BACKGROUND,
185 COMMIT_REQUIRED,
186 COMMIT_RUNNING_BACKGROUND,
187 COMMIT_RUNNING_REQUIRED,
188 COMMIT_BROKEN,
189};
190
191/*
192 * 'ubifs_scan_a_node()' return values.
193 *
194 * SCANNED_GARBAGE: scanned garbage
195 * SCANNED_EMPTY_SPACE: scanned empty space
196 * SCANNED_A_NODE: scanned a valid node
197 * SCANNED_A_CORRUPT_NODE: scanned a corrupted node
198 * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length
199 *
200 * Greater than zero means: 'scanned that number of padding bytes'
201 */
202enum {
203 SCANNED_GARBAGE = 0,
204 SCANNED_EMPTY_SPACE = -1,
205 SCANNED_A_NODE = -2,
206 SCANNED_A_CORRUPT_NODE = -3,
207 SCANNED_A_BAD_PAD_NODE = -4,
208};
209
210/*
211 * LPT cnode flag bits.
212 *
213 * DIRTY_CNODE: cnode is dirty
214 * COW_CNODE: cnode is being committed and must be copied before writing
215 * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
216 * so it can (and must) be freed when the commit is finished
217 */
218enum {
219 DIRTY_CNODE = 0,
220 COW_CNODE = 1,
221 OBSOLETE_CNODE = 2,
222};
223
224/*
225 * Dirty flag bits (lpt_drty_flgs) for LPT special nodes.
226 *
227 * LTAB_DIRTY: ltab node is dirty
228 * LSAVE_DIRTY: lsave node is dirty
229 */
230enum {
231 LTAB_DIRTY = 1,
232 LSAVE_DIRTY = 2,
233};
234
235/*
236 * Return codes used by the garbage collector.
237 * @LEB_FREED: the logical eraseblock was freed and is ready to use
238 * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit
239 * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes
240 */
241enum {
242 LEB_FREED,
243 LEB_FREED_IDX,
244 LEB_RETAINED,
245};
246
247/**
248 * struct ubifs_old_idx - index node obsoleted since last commit start.
249 * @rb: rb-tree node
250 * @lnum: LEB number of obsoleted index node
251 * @offs: offset of obsoleted index node
252 */
253struct ubifs_old_idx {
254 struct rb_node rb;
255 int lnum;
256 int offs;
257};
258
259/* The below union makes it easier to deal with keys */
260union ubifs_key {
261 uint8_t u8[CUR_MAX_KEY_LEN];
262 uint32_t u32[CUR_MAX_KEY_LEN/4];
263 uint64_t u64[CUR_MAX_KEY_LEN/8];
264 __le32 j32[CUR_MAX_KEY_LEN/4];
265};
266
267/**
268 * struct ubifs_scan_node - UBIFS scanned node information.
269 * @list: list of scanned nodes
270 * @key: key of node scanned (if it has one)
271 * @sqnum: sequence number
272 * @type: type of node scanned
273 * @offs: offset with LEB of node scanned
274 * @len: length of node scanned
275 * @node: raw node
276 */
277struct ubifs_scan_node {
278 struct list_head list;
279 union ubifs_key key;
280 unsigned long long sqnum;
281 int type;
282 int offs;
283 int len;
284 void *node;
285};
286
287/**
288 * struct ubifs_scan_leb - UBIFS scanned LEB information.
289 * @lnum: logical eraseblock number
290 * @nodes_cnt: number of nodes scanned
291 * @nodes: list of struct ubifs_scan_node
292 * @endpt: end point (and therefore the start of empty space)
293 * @ecc: read returned -EBADMSG
294 * @buf: buffer containing entire LEB scanned
295 */
296struct ubifs_scan_leb {
297 int lnum;
298 int nodes_cnt;
299 struct list_head nodes;
300 int endpt;
301 int ecc;
302 void *buf;
303};
304
305/**
306 * struct ubifs_gced_idx_leb - garbage-collected indexing LEB.
307 * @list: list
308 * @lnum: LEB number
309 * @unmap: OK to unmap this LEB
310 *
311 * This data structure is used to temporary store garbage-collected indexing
312 * LEBs - they are not released immediately, but only after the next commit.
313 * This is needed to guarantee recoverability.
314 */
315struct ubifs_gced_idx_leb {
316 struct list_head list;
317 int lnum;
318 int unmap;
319};
320
321/**
322 * struct ubifs_inode - UBIFS in-memory inode description.
323 * @vfs_inode: VFS inode description object
324 * @creat_sqnum: sequence number at time of creation
325 * @xattr_size: summarized size of all extended attributes in bytes
326 * @xattr_cnt: count of extended attributes this inode has
327 * @xattr_names: sum of lengths of all extended attribute names belonging to
328 * this inode
329 * @dirty: non-zero if the inode is dirty
330 * @xattr: non-zero if this is an extended attribute inode
331 * @ui_mutex: serializes inode write-back with the rest of VFS operations,
332 * serializes "clean <-> dirty" state changes, protects @dirty,
333 * @ui_size, and @xattr_size
334 * @ui_lock: protects @synced_i_size
335 * @synced_i_size: synchronized size of inode, i.e. the value of inode size
336 * currently stored on the flash; used only for regular file
337 * inodes
338 * @ui_size: inode size used by UBIFS when writing to flash
339 * @flags: inode flags (@UBIFS_COMPR_FL, etc)
340 * @compr_type: default compression type used for this inode
341 * @data_len: length of the data attached to the inode
342 * @data: inode's data
343 *
344 * @ui_mutex exists for two main reasons. At first it prevents inodes from
345 * being written back while UBIFS changing them, being in the middle of an VFS
346 * operation. This way UBIFS makes sure the inode fields are consistent. For
347 * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and
348 * write-back must not write any of them before we have finished.
349 *
350 * The second reason is budgeting - UBIFS has to budget all operations. If an
351 * operation is going to mark an inode dirty, it has to allocate budget for
352 * this. It cannot just mark it dirty because there is no guarantee there will
353 * be enough flash space to write the inode back later. This means UBIFS has
354 * to have full control over inode "clean <-> dirty" transitions (and pages
355 * actually). But unfortunately, VFS marks inodes dirty in many places, and it
356 * does not ask the file-system if it is allowed to do so (there is a notifier,
357 * but it is not enough), i.e., there is no mechanism to synchronize with this.
358 * So UBIFS has its own inode dirty flag and its own mutex to serialize
359 * "clean <-> dirty" transitions.
360 *
361 * The @synced_i_size field is used to make sure we never write pages which are
362 * beyond last synchronized inode size. See 'ubifs_writepage()' for more
363 * information.
364 *
365 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
366 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
367 * make sure @inode->i_size is always changed under @ui_mutex, because it
368 * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock
369 * with 'ubifs_writepage()' (see file.c). All the other inode fields are
370 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
371 * could consider to rework locking and base it on "shadow" fields.
372 */
373struct ubifs_inode {
374 struct inode vfs_inode;
375 unsigned long long creat_sqnum;
376 unsigned int xattr_size;
377 unsigned int xattr_cnt;
378 unsigned int xattr_names;
379 unsigned int dirty:1;
380 unsigned int xattr:1;
381 struct mutex ui_mutex;
382 spinlock_t ui_lock;
383 loff_t synced_i_size;
384 loff_t ui_size;
385 int flags;
386 int compr_type;
387 int data_len;
388 void *data;
389};
390
391/**
392 * struct ubifs_unclean_leb - records a LEB recovered under read-only mode.
393 * @list: list
394 * @lnum: LEB number of recovered LEB
395 * @endpt: offset where recovery ended
396 *
397 * This structure records a LEB identified during recovery that needs to be
398 * cleaned but was not because UBIFS was mounted read-only. The information
399 * is used to clean the LEB when remounting to read-write mode.
400 */
401struct ubifs_unclean_leb {
402 struct list_head list;
403 int lnum;
404 int endpt;
405};
406
407/*
408 * LEB properties flags.
409 *
410 * LPROPS_UNCAT: not categorized
411 * LPROPS_DIRTY: dirty > 0, not index
412 * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index
413 * LPROPS_FREE: free > 0, not empty, not index
414 * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
415 * LPROPS_EMPTY: LEB is empty, not taken
416 * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken
417 * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken
418 * LPROPS_CAT_MASK: mask for the LEB categories above
419 * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media)
420 * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash)
421 */
422enum {
423 LPROPS_UNCAT = 0,
424 LPROPS_DIRTY = 1,
425 LPROPS_DIRTY_IDX = 2,
426 LPROPS_FREE = 3,
427 LPROPS_HEAP_CNT = 3,
428 LPROPS_EMPTY = 4,
429 LPROPS_FREEABLE = 5,
430 LPROPS_FRDI_IDX = 6,
431 LPROPS_CAT_MASK = 15,
432 LPROPS_TAKEN = 16,
433 LPROPS_INDEX = 32,
434};
435
436/**
437 * struct ubifs_lprops - logical eraseblock properties.
438 * @free: amount of free space in bytes
439 * @dirty: amount of dirty space in bytes
440 * @flags: LEB properties flags (see above)
441 * @lnum: LEB number
442 * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE)
443 * @hpos: heap position in heap of same-category lprops (other categories)
444 */
445struct ubifs_lprops {
446 int free;
447 int dirty;
448 int flags;
449 int lnum;
450 union {
451 struct list_head list;
452 int hpos;
453 };
454};
455
456/**
457 * struct ubifs_lpt_lprops - LPT logical eraseblock properties.
458 * @free: amount of free space in bytes
459 * @dirty: amount of dirty space in bytes
460 * @tgc: trivial GC flag (1 => unmap after commit end)
461 * @cmt: commit flag (1 => reserved for commit)
462 */
463struct ubifs_lpt_lprops {
464 int free;
465 int dirty;
466 unsigned tgc : 1;
467 unsigned cmt : 1;
468};
469
470/**
471 * struct ubifs_lp_stats - statistics of eraseblocks in the main area.
472 * @empty_lebs: number of empty LEBs
473 * @taken_empty_lebs: number of taken LEBs
474 * @idx_lebs: number of indexing LEBs
475 * @total_free: total free space in bytes
476 * @total_dirty: total dirty space in bytes
477 * @total_used: total used space in bytes (includes only data LEBs)
478 * @total_dead: total dead space in bytes (includes only data LEBs)
479 * @total_dark: total dark space in bytes (includes only data LEBs)
480 *
481 * N.B. total_dirty and total_used are different to other total_* fields,
482 * because they account _all_ LEBs, not just data LEBs.
483 *
484 * 'taken_empty_lebs' counts the LEBs that are in the transient state of having
485 * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed
486 * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used
487 * by itself (in which case 'unused_lebs' would be a better name). In the case
488 * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC,
489 * but unlike other empty LEBs that are 'taken', it may not be written straight
490 * away (i.e. before the next commit start or unmount), so either gc_lnum must
491 * be specially accounted for, or the current approach followed i.e. count it
492 * under 'taken_empty_lebs'.
493 */
494struct ubifs_lp_stats {
495 int empty_lebs;
496 int taken_empty_lebs;
497 int idx_lebs;
498 long long total_free;
499 long long total_dirty;
500 long long total_used;
501 long long total_dead;
502 long long total_dark;
503};
504
505struct ubifs_nnode;
506
507/**
508 * struct ubifs_cnode - LEB Properties Tree common node.
509 * @parent: parent nnode
510 * @cnext: next cnode to commit
511 * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
512 * @iip: index in parent
513 * @level: level in the tree (zero for pnodes, greater than zero for nnodes)
514 * @num: node number
515 */
516struct ubifs_cnode {
517 struct ubifs_nnode *parent;
518 struct ubifs_cnode *cnext;
519 unsigned long flags;
520 int iip;
521 int level;
522 int num;
523};
524
525/**
526 * struct ubifs_pnode - LEB Properties Tree leaf node.
527 * @parent: parent nnode
528 * @cnext: next cnode to commit
529 * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
530 * @iip: index in parent
531 * @level: level in the tree (always zero for pnodes)
532 * @num: node number
533 * @lprops: LEB properties array
534 */
535struct ubifs_pnode {
536 struct ubifs_nnode *parent;
537 struct ubifs_cnode *cnext;
538 unsigned long flags;
539 int iip;
540 int level;
541 int num;
542 struct ubifs_lprops lprops[UBIFS_LPT_FANOUT];
543};
544
545/**
546 * struct ubifs_nbranch - LEB Properties Tree internal node branch.
547 * @lnum: LEB number of child
548 * @offs: offset of child
549 * @nnode: nnode child
550 * @pnode: pnode child
551 * @cnode: cnode child
552 */
553struct ubifs_nbranch {
554 int lnum;
555 int offs;
556 union {
557 struct ubifs_nnode *nnode;
558 struct ubifs_pnode *pnode;
559 struct ubifs_cnode *cnode;
560 };
561};
562
563/**
564 * struct ubifs_nnode - LEB Properties Tree internal node.
565 * @parent: parent nnode
566 * @cnext: next cnode to commit
567 * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
568 * @iip: index in parent
569 * @level: level in the tree (always greater than zero for nnodes)
570 * @num: node number
571 * @nbranch: branches to child nodes
572 */
573struct ubifs_nnode {
574 struct ubifs_nnode *parent;
575 struct ubifs_cnode *cnext;
576 unsigned long flags;
577 int iip;
578 int level;
579 int num;
580 struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT];
581};
582
583/**
584 * struct ubifs_lpt_heap - heap of categorized lprops.
585 * @arr: heap array
586 * @cnt: number in heap
587 * @max_cnt: maximum number allowed in heap
588 *
589 * There are %LPROPS_HEAP_CNT heaps.
590 */
591struct ubifs_lpt_heap {
592 struct ubifs_lprops **arr;
593 int cnt;
594 int max_cnt;
595};
596
597/*
598 * Return codes for LPT scan callback function.
599 *
600 * LPT_SCAN_CONTINUE: continue scanning
601 * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory
602 * LPT_SCAN_STOP: stop scanning
603 */
604enum {
605 LPT_SCAN_CONTINUE = 0,
606 LPT_SCAN_ADD = 1,
607 LPT_SCAN_STOP = 2,
608};
609
610struct ubifs_info;
611
612/* Callback used by the 'ubifs_lpt_scan_nolock()' function */
613typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
614 const struct ubifs_lprops *lprops,
615 int in_tree, void *data);
616
617/**
618 * struct ubifs_wbuf - UBIFS write-buffer.
619 * @c: UBIFS file-system description object
620 * @buf: write-buffer (of min. flash I/O unit size)
621 * @lnum: logical eraseblock number the write-buffer points to
622 * @offs: write-buffer offset in this logical eraseblock
623 * @avail: number of bytes available in the write-buffer
624 * @used: number of used bytes in the write-buffer
625 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
626 * %UBI_UNKNOWN)
627 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
628 * up by 'mutex_lock_nested()).
629 * @sync_callback: write-buffer synchronization callback
630 * @io_mutex: serializes write-buffer I/O
631 * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
632 * fields
633 * @timer: write-buffer timer
634 * @timeout: timer expire interval in jiffies
635 * @need_sync: it is set if its timer expired and needs sync
636 * @next_ino: points to the next position of the following inode number
637 * @inodes: stores the inode numbers of the nodes which are in wbuf
638 *
639 * The write-buffer synchronization callback is called when the write-buffer is
640 * synchronized in order to notify how much space was wasted due to
641 * write-buffer padding and how much free space is left in the LEB.
642 *
643 * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under
644 * spin-lock or mutex because they are written under both mutex and spin-lock.
645 * @buf is appended to under mutex but overwritten under both mutex and
646 * spin-lock. Thus the data between @buf and @buf + @used can be read under
647 * spinlock.
648 */
649struct ubifs_wbuf {
650 struct ubifs_info *c;
651 void *buf;
652 int lnum;
653 int offs;
654 int avail;
655 int used;
656 int dtype;
657 int jhead;
658 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
659 struct mutex io_mutex;
660 spinlock_t lock;
661 struct timer_list timer;
662 int timeout;
663 int need_sync;
664 int next_ino;
665 ino_t *inodes;
666};
667
668/**
669 * struct ubifs_bud - bud logical eraseblock.
670 * @lnum: logical eraseblock number
671 * @start: where the (uncommitted) bud data starts
672 * @jhead: journal head number this bud belongs to
673 * @list: link in the list buds belonging to the same journal head
674 * @rb: link in the tree of all buds
675 */
676struct ubifs_bud {
677 int lnum;
678 int start;
679 int jhead;
680 struct list_head list;
681 struct rb_node rb;
682};
683
684/**
685 * struct ubifs_jhead - journal head.
686 * @wbuf: head's write-buffer
687 * @buds_list: list of bud LEBs belonging to this journal head
688 *
689 * Note, the @buds list is protected by the @c->buds_lock.
690 */
691struct ubifs_jhead {
692 struct ubifs_wbuf wbuf;
693 struct list_head buds_list;
694};
695
696/**
697 * struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
698 * @key: key
699 * @znode: znode address in memory
700 * @lnum: LEB number of the indexing node
701 * @offs: offset of the indexing node within @lnum
702 * @len: target node length
703 */
704struct ubifs_zbranch {
705 union ubifs_key key;
706 union {
707 struct ubifs_znode *znode;
708 void *leaf;
709 };
710 int lnum;
711 int offs;
712 int len;
713};
714
715/**
716 * struct ubifs_znode - in-memory representation of an indexing node.
717 * @parent: parent znode or NULL if it is the root
718 * @cnext: next znode to commit
719 * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE)
720 * @time: last access time (seconds)
721 * @level: level of the entry in the TNC tree
722 * @child_cnt: count of child znodes
723 * @iip: index in parent's zbranch array
724 * @alt: lower bound of key range has altered i.e. child inserted at slot 0
725 * @lnum: LEB number of the corresponding indexing node
726 * @offs: offset of the corresponding indexing node
727 * @len: length of the corresponding indexing node
728 * @zbranch: array of znode branches (@c->fanout elements)
729 */
730struct ubifs_znode {
731 struct ubifs_znode *parent;
732 struct ubifs_znode *cnext;
733 unsigned long flags;
734 unsigned long time;
735 int level;
736 int child_cnt;
737 int iip;
738 int alt;
739#ifdef CONFIG_UBIFS_FS_DEBUG
740 int lnum, offs, len;
741#endif
742 struct ubifs_zbranch zbranch[];
743};
744
745/**
746 * struct ubifs_node_range - node length range description data structure.
747 * @len: fixed node length
748 * @min_len: minimum possible node length
749 * @max_len: maximum possible node length
750 *
751 * If @max_len is %0, the node has fixed length @len.
752 */
753struct ubifs_node_range {
754 union {
755 int len;
756 int min_len;
757 };
758 int max_len;
759};
760
761/**
762 * struct ubifs_compressor - UBIFS compressor description structure.
763 * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc)
764 * @cc: cryptoapi compressor handle
765 * @comp_mutex: mutex used during compression
766 * @decomp_mutex: mutex used during decompression
767 * @name: compressor name
768 * @capi_name: cryptoapi compressor name
769 */
770struct ubifs_compressor {
771 int compr_type;
772 struct crypto_comp *cc;
773 struct mutex *comp_mutex;
774 struct mutex *decomp_mutex;
775 const char *name;
776 const char *capi_name;
777};
778
779/**
780 * struct ubifs_budget_req - budget requirements of an operation.
781 *
782 * @fast: non-zero if the budgeting should try to aquire budget quickly and
783 * should not try to call write-back
784 * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
785 * have to be re-calculated
786 * @new_page: non-zero if the operation adds a new page
787 * @dirtied_page: non-zero if the operation makes a page dirty
788 * @new_dent: non-zero if the operation adds a new directory entry
789 * @mod_dent: non-zero if the operation removes or modifies an existing
790 * directory entry
791 * @new_ino: non-zero if the operation adds a new inode
792 * @new_ino_d: now much data newly created inode contains
793 * @dirtied_ino: how many inodes the operation makes dirty
794 * @dirtied_ino_d: now much data dirtied inode contains
795 * @idx_growth: how much the index will supposedly grow
796 * @data_growth: how much new data the operation will supposedly add
797 * @dd_growth: how much data that makes other data dirty the operation will
798 * supposedly add
799 *
800 * @idx_growth, @data_growth and @dd_growth are not used in budget request. The
801 * budgeting subsystem caches index and data growth values there to avoid
802 * re-calculating them when the budget is released. However, if @idx_growth is
803 * %-1, it is calculated by the release function using other fields.
804 *
805 * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
806 * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
807 * dirty by the re-name operation.
808 */
809struct ubifs_budget_req {
810 unsigned int fast:1;
811 unsigned int recalculate:1;
812 unsigned int new_page:1;
813 unsigned int dirtied_page:1;
814 unsigned int new_dent:1;
815 unsigned int mod_dent:1;
816 unsigned int new_ino:1;
817 unsigned int new_ino_d:13;
818#ifndef UBIFS_DEBUG
819 unsigned int dirtied_ino:4;
820 unsigned int dirtied_ino_d:15;
821#else
822 /* Not bit-fields to check for overflows */
823 unsigned int dirtied_ino;
824 unsigned int dirtied_ino_d;
825#endif
826 int idx_growth;
827 int data_growth;
828 int dd_growth;
829};
830
831/**
832 * struct ubifs_orphan - stores the inode number of an orphan.
833 * @rb: rb-tree node of rb-tree of orphans sorted by inode number
834 * @list: list head of list of orphans in order added
835 * @new_list: list head of list of orphans added since the last commit
836 * @cnext: next orphan to commit
837 * @dnext: next orphan to delete
838 * @inum: inode number
839 * @new: %1 => added since the last commit, otherwise %0
840 */
841struct ubifs_orphan {
842 struct rb_node rb;
843 struct list_head list;
844 struct list_head new_list;
845 struct ubifs_orphan *cnext;
846 struct ubifs_orphan *dnext;
847 ino_t inum;
848 int new;
849};
850
851/**
852 * struct ubifs_mount_opts - UBIFS-specific mount options information.
853 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
854 */
855struct ubifs_mount_opts {
856 unsigned int unmount_mode:2;
857};
858
859/**
860 * struct ubifs_info - UBIFS file-system description data structure
861 * (per-superblock).
862 * @vfs_sb: VFS @struct super_block object
863 * @bdi: backing device info object to make VFS happy and disable readahead
864 *
865 * @highest_inum: highest used inode number
866 * @vfs_gen: VFS inode generation counter
867 * @max_sqnum: current global sequence number
868 * @cmt_no: commit number (last successfully completed commit)
869 * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters
870 * @fmt_version: UBIFS on-flash format version
871 * @uuid: UUID from super block
872 *
873 * @lhead_lnum: log head logical eraseblock number
874 * @lhead_offs: log head offset
875 * @ltail_lnum: log tail logical eraseblock number (offset is always 0)
876 * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and
877 * @bud_bytes
878 * @min_log_bytes: minimum required number of bytes in the log
879 * @cmt_bud_bytes: used during commit to temporarily amount of bytes in
880 * committed buds
881 *
882 * @buds: tree of all buds indexed by bud LEB number
883 * @bud_bytes: how many bytes of flash is used by buds
884 * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud
885 * lists
886 * @jhead_cnt: count of journal heads
887 * @jheads: journal heads (head zero is base head)
888 * @max_bud_bytes: maximum number of bytes allowed in buds
889 * @bg_bud_bytes: number of bud bytes when background commit is initiated
890 * @old_buds: buds to be released after commit ends
891 * @max_bud_cnt: maximum number of buds
892 *
893 * @commit_sem: synchronizes committer with other processes
894 * @cmt_state: commit state
895 * @cs_lock: commit state lock
896 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
897 * @fast_unmount: do not run journal commit before un-mounting
898 * @big_lpt: flag that LPT is too big to write whole during commit
899 * @check_lpt_free: flag that indicates LPT GC may be needed
900 * @nospace: non-zero if the file-system does not have flash space (used as
901 * optimization)
902 * @nospace_rp: the same as @nospace, but additionally means that even reserved
903 * pool is full
904 *
905 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
906 * @calc_idx_sz
907 * @zroot: zbranch which points to the root index node and znode
908 * @cnext: next znode to commit
909 * @enext: next znode to commit to empty space
910 * @gap_lebs: array of LEBs used by the in-gaps commit method
911 * @cbuf: commit buffer
912 * @ileb_buf: buffer for commit in-the-gaps method
913 * @ileb_len: length of data in ileb_buf
914 * @ihead_lnum: LEB number of index head
915 * @ihead_offs: offset of index head
916 * @ilebs: pre-allocated index LEBs
917 * @ileb_cnt: number of pre-allocated index LEBs
918 * @ileb_nxt: next pre-allocated index LEBs
919 * @old_idx: tree of index nodes obsoleted since the last commit start
920 * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c
921 * @new_ihead_lnum: used by debugging to check ihead_lnum
922 * @new_ihead_offs: used by debugging to check ihead_offs
923 *
924 * @mst_node: master node
925 * @mst_offs: offset of valid master node
926 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
927 *
928 * @log_lebs: number of logical eraseblocks in the log
929 * @log_bytes: log size in bytes
930 * @log_last: last LEB of the log
931 * @lpt_lebs: number of LEBs used for lprops table
932 * @lpt_first: first LEB of the lprops table area
933 * @lpt_last: last LEB of the lprops table area
934 * @orph_lebs: number of LEBs used for the orphan area
935 * @orph_first: first LEB of the orphan area
936 * @orph_last: last LEB of the orphan area
937 * @main_lebs: count of LEBs in the main area
938 * @main_first: first LEB of the main area
939 * @main_bytes: main area size in bytes
940 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
941 *
942 * @key_hash_type: type of the key hash
943 * @key_hash: direntry key hash function
944 * @key_fmt: key format
945 * @key_len: key length
946 * @fanout: fanout of the index tree (number of links per indexing node)
947 *
948 * @min_io_size: minimal input/output unit size
949 * @min_io_shift: number of bits in @min_io_size minus one
950 * @leb_size: logical eraseblock size in bytes
951 * @half_leb_size: half LEB size
952 * @leb_cnt: count of logical eraseblocks
953 * @max_leb_cnt: maximum count of logical eraseblocks
954 * @old_leb_cnt: count of logical eraseblocks before re-size
955 * @ro_media: the underlying UBI volume is read-only
956 *
957 * @dirty_pg_cnt: number of dirty pages (not used)
958 * @dirty_zn_cnt: number of dirty znodes
959 * @clean_zn_cnt: number of clean znodes
960 *
961 * @budg_idx_growth: amount of bytes budgeted for index growth
962 * @budg_data_growth: amount of bytes budgeted for cached data
963 * @budg_dd_growth: amount of bytes budgeted for cached data that will make
964 * other data dirty
965 * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
966 * but which still have to be taken into account because
967 * the index has not been committed so far
968 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
969 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst;
970 * @min_idx_lebs: minimum number of LEBs required for the index
971 * @old_idx_sz: size of index on flash
972 * @calc_idx_sz: temporary variable which is used to calculate new index size
973 * (contains accurate new index size at end of TNC commit start)
974 * @lst: lprops statistics
975 *
976 * @page_budget: budget for a page
977 * @inode_budget: budget for an inode
978 * @dent_budget: budget for a directory entry
979 *
980 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
981 * I/O unit
982 * @mst_node_alsz: master node aligned size
983 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
984 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
985 * @max_inode_sz: maximum possible inode size in bytes
986 * @max_znode_sz: size of znode in bytes
987 * @dead_wm: LEB dead space watermark
988 * @dark_wm: LEB dark space watermark
989 * @block_cnt: count of 4KiB blocks on the FS
990 *
991 * @ranges: UBIFS node length ranges
992 * @ubi: UBI volume descriptor
993 * @di: UBI device information
994 * @vi: UBI volume information
995 *
996 * @orph_tree: rb-tree of orphan inode numbers
997 * @orph_list: list of orphan inode numbers in order added
998 * @orph_new: list of orphan inode numbers added since last commit
999 * @orph_cnext: next orphan to commit
1000 * @orph_dnext: next orphan to delete
1001 * @orphan_lock: lock for orph_tree and orph_new
1002 * @orph_buf: buffer for orphan nodes
1003 * @new_orphans: number of orphans since last commit
1004 * @cmt_orphans: number of orphans being committed
1005 * @tot_orphans: number of orphans in the rb_tree
1006 * @max_orphans: maximum number of orphans allowed
1007 * @ohead_lnum: orphan head LEB number
1008 * @ohead_offs: orphan head offset
1009 * @no_orphs: non-zero if there are no orphans
1010 *
1011 * @bgt: UBIFS background thread
1012 * @bgt_name: background thread name
1013 * @need_bgt: if background thread should run
1014 * @need_wbuf_sync: if write-buffers have to be synchronized
1015 *
1016 * @gc_lnum: LEB number used for garbage collection
1017 * @sbuf: a buffer of LEB size used by GC and replay for scanning
1018 * @idx_gc: list of index LEBs that have been garbage collected
1019 * @idx_gc_cnt: number of elements on the idx_gc list
1020 *
1021 * @infos_list: links all 'ubifs_info' objects
1022 * @umount_mutex: serializes shrinker and un-mount
1023 * @shrinker_run_no: shrinker run number
1024 *
1025 * @space_bits: number of bits needed to record free or dirty space
1026 * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT
1027 * @lpt_offs_bits: number of bits needed to record an offset in the LPT
1028 * @lpt_spc_bits: number of bits needed to space in the LPT
1029 * @pcnt_bits: number of bits needed to record pnode or nnode number
1030 * @lnum_bits: number of bits needed to record LEB number
1031 * @nnode_sz: size of on-flash nnode
1032 * @pnode_sz: size of on-flash pnode
1033 * @ltab_sz: size of on-flash LPT lprops table
1034 * @lsave_sz: size of on-flash LPT save table
1035 * @pnode_cnt: number of pnodes
1036 * @nnode_cnt: number of nnodes
1037 * @lpt_hght: height of the LPT
1038 * @pnodes_have: number of pnodes in memory
1039 *
1040 * @lp_mutex: protects lprops table and all the other lprops-related fields
1041 * @lpt_lnum: LEB number of the root nnode of the LPT
1042 * @lpt_offs: offset of the root nnode of the LPT
1043 * @nhead_lnum: LEB number of LPT head
1044 * @nhead_offs: offset of LPT head
1045 * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
1046 * @dirty_nn_cnt: number of dirty nnodes
1047 * @dirty_pn_cnt: number of dirty pnodes
1048 * @lpt_sz: LPT size
1049 * @lpt_nod_buf: buffer for an on-flash nnode or pnode
1050 * @lpt_buf: buffer of LEB size used by LPT
1051 * @nroot: address in memory of the root nnode of the LPT
1052 * @lpt_cnext: next LPT node to commit
1053 * @lpt_heap: array of heaps of categorized lprops
1054 * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at
1055 * previous commit start
1056 * @uncat_list: list of un-categorized LEBs
1057 * @empty_list: list of empty LEBs
1058 * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size)
1059 * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size)
1060 * @freeable_cnt: number of freeable LEBs in @freeable_list
1061 *
1062 * @ltab_lnum: LEB number of LPT's own lprops table
1063 * @ltab_offs: offset of LPT's own lprops table
1064 * @ltab: LPT's own lprops table
1065 * @ltab_cmt: LPT's own lprops table (commit copy)
1066 * @lsave_cnt: number of LEB numbers in LPT's save table
1067 * @lsave_lnum: LEB number of LPT's save table
1068 * @lsave_offs: offset of LPT's save table
1069 * @lsave: LPT's save table
1070 * @lscan_lnum: LEB number of last LPT scan
1071 *
1072 * @rp_size: size of the reserved pool in bytes
1073 * @report_rp_size: size of the reserved pool reported to user-space
1074 * @rp_uid: reserved pool user ID
1075 * @rp_gid: reserved pool group ID
1076 *
1077 * @empty: if the UBI device is empty
1078 * @replay_tree: temporary tree used during journal replay
1079 * @replay_list: temporary list used during journal replay
1080 * @replay_buds: list of buds to replay
1081 * @cs_sqnum: sequence number of first node in the log (commit start node)
1082 * @replay_sqnum: sequence number of node currently being replayed
1083 * @need_recovery: file-system needs recovery
1084 * @replaying: set to %1 during journal replay
1085 * @unclean_leb_list: LEBs to recover when mounting ro to rw
1086 * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
1087 * @size_tree: inode size information for recovery
1088 * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
1089 * @mount_opts: UBIFS-specific mount options
1090 *
1091 * @dbg_buf: a buffer of LEB size used for debugging purposes
1092 * @old_zroot: old index root - used by 'dbg_check_old_index()'
1093 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
1094 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
1095 * @failure_mode: failure mode for recovery testing
1096 * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
1097 * @fail_timeout: time in jiffies when delay of failure mode expires
1098 * @fail_cnt: current number of calls to failure mode I/O functions
1099 * @fail_cnt_max: number of calls by which to delay failure mode
1100 */
1101struct ubifs_info {
1102 struct super_block *vfs_sb;
1103 struct backing_dev_info bdi;
1104
1105 ino_t highest_inum;
1106 unsigned int vfs_gen;
1107 unsigned long long max_sqnum;
1108 unsigned long long cmt_no;
1109 spinlock_t cnt_lock;
1110 int fmt_version;
1111 unsigned char uuid[16];
1112
1113 int lhead_lnum;
1114 int lhead_offs;
1115 int ltail_lnum;
1116 struct mutex log_mutex;
1117 int min_log_bytes;
1118 long long cmt_bud_bytes;
1119
1120 struct rb_root buds;
1121 long long bud_bytes;
1122 spinlock_t buds_lock;
1123 int jhead_cnt;
1124 struct ubifs_jhead *jheads;
1125 long long max_bud_bytes;
1126 long long bg_bud_bytes;
1127 struct list_head old_buds;
1128 int max_bud_cnt;
1129
1130 struct rw_semaphore commit_sem;
1131 int cmt_state;
1132 spinlock_t cs_lock;
1133 wait_queue_head_t cmt_wq;
1134 unsigned int fast_unmount:1;
1135 unsigned int big_lpt:1;
1136 unsigned int check_lpt_free:1;
1137 unsigned int nospace:1;
1138 unsigned int nospace_rp:1;
1139
1140 struct mutex tnc_mutex;
1141 struct ubifs_zbranch zroot;
1142 struct ubifs_znode *cnext;
1143 struct ubifs_znode *enext;
1144 int *gap_lebs;
1145 void *cbuf;
1146 void *ileb_buf;
1147 int ileb_len;
1148 int ihead_lnum;
1149 int ihead_offs;
1150 int *ilebs;
1151 int ileb_cnt;
1152 int ileb_nxt;
1153 struct rb_root old_idx;
1154 int *bottom_up_buf;
1155#ifdef CONFIG_UBIFS_FS_DEBUG
1156 int new_ihead_lnum;
1157 int new_ihead_offs;
1158#endif
1159
1160 struct ubifs_mst_node *mst_node;
1161 int mst_offs;
1162 struct mutex mst_mutex;
1163
1164 int log_lebs;
1165 long long log_bytes;
1166 int log_last;
1167 int lpt_lebs;
1168 int lpt_first;
1169 int lpt_last;
1170 int orph_lebs;
1171 int orph_first;
1172 int orph_last;
1173 int main_lebs;
1174 int main_first;
1175 long long main_bytes;
1176 int default_compr;
1177
1178 uint8_t key_hash_type;
1179 uint32_t (*key_hash)(const char *str, int len);
1180 int key_fmt;
1181 int key_len;
1182 int fanout;
1183
1184 int min_io_size;
1185 int min_io_shift;
1186 int leb_size;
1187 int half_leb_size;
1188 int leb_cnt;
1189 int max_leb_cnt;
1190 int old_leb_cnt;
1191 int ro_media;
1192
1193 atomic_long_t dirty_pg_cnt;
1194 atomic_long_t dirty_zn_cnt;
1195 atomic_long_t clean_zn_cnt;
1196
1197 long long budg_idx_growth;
1198 long long budg_data_growth;
1199 long long budg_dd_growth;
1200 long long budg_uncommitted_idx;
1201 spinlock_t space_lock;
1202 int min_idx_lebs;
1203 unsigned long long old_idx_sz;
1204 unsigned long long calc_idx_sz;
1205 struct ubifs_lp_stats lst;
1206
1207 int page_budget;
1208 int inode_budget;
1209 int dent_budget;
1210
1211 int ref_node_alsz;
1212 int mst_node_alsz;
1213 int min_idx_node_sz;
1214 int max_idx_node_sz;
1215 long long max_inode_sz;
1216 int max_znode_sz;
1217 int dead_wm;
1218 int dark_wm;
1219 int block_cnt;
1220
1221 struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT];
1222 struct ubi_volume_desc *ubi;
1223 struct ubi_device_info di;
1224 struct ubi_volume_info vi;
1225
1226 struct rb_root orph_tree;
1227 struct list_head orph_list;
1228 struct list_head orph_new;
1229 struct ubifs_orphan *orph_cnext;
1230 struct ubifs_orphan *orph_dnext;
1231 spinlock_t orphan_lock;
1232 void *orph_buf;
1233 int new_orphans;
1234 int cmt_orphans;
1235 int tot_orphans;
1236 int max_orphans;
1237 int ohead_lnum;
1238 int ohead_offs;
1239 int no_orphs;
1240
1241 struct task_struct *bgt;
1242 char bgt_name[sizeof(BGT_NAME_PATTERN) + 9];
1243 int need_bgt;
1244 int need_wbuf_sync;
1245
1246 int gc_lnum;
1247 void *sbuf;
1248 struct list_head idx_gc;
1249 int idx_gc_cnt;
1250
1251 struct list_head infos_list;
1252 struct mutex umount_mutex;
1253 unsigned int shrinker_run_no;
1254
1255 int space_bits;
1256 int lpt_lnum_bits;
1257 int lpt_offs_bits;
1258 int lpt_spc_bits;
1259 int pcnt_bits;
1260 int lnum_bits;
1261 int nnode_sz;
1262 int pnode_sz;
1263 int ltab_sz;
1264 int lsave_sz;
1265 int pnode_cnt;
1266 int nnode_cnt;
1267 int lpt_hght;
1268 int pnodes_have;
1269
1270 struct mutex lp_mutex;
1271 int lpt_lnum;
1272 int lpt_offs;
1273 int nhead_lnum;
1274 int nhead_offs;
1275 int lpt_drty_flgs;
1276 int dirty_nn_cnt;
1277 int dirty_pn_cnt;
1278 long long lpt_sz;
1279 void *lpt_nod_buf;
1280 void *lpt_buf;
1281 struct ubifs_nnode *nroot;
1282 struct ubifs_cnode *lpt_cnext;
1283 struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT];
1284 struct ubifs_lpt_heap dirty_idx;
1285 struct list_head uncat_list;
1286 struct list_head empty_list;
1287 struct list_head freeable_list;
1288 struct list_head frdi_idx_list;
1289 int freeable_cnt;
1290
1291 int ltab_lnum;
1292 int ltab_offs;
1293 struct ubifs_lpt_lprops *ltab;
1294 struct ubifs_lpt_lprops *ltab_cmt;
1295 int lsave_cnt;
1296 int lsave_lnum;
1297 int lsave_offs;
1298 int *lsave;
1299 int lscan_lnum;
1300
1301 long long rp_size;
1302 long long report_rp_size;
1303 uid_t rp_uid;
1304 gid_t rp_gid;
1305
1306 /* The below fields are used only during mounting and re-mounting */
1307 int empty;
1308 struct rb_root replay_tree;
1309 struct list_head replay_list;
1310 struct list_head replay_buds;
1311 unsigned long long cs_sqnum;
1312 unsigned long long replay_sqnum;
1313 int need_recovery;
1314 int replaying;
1315 struct list_head unclean_leb_list;
1316 struct ubifs_mst_node *rcvrd_mst_node;
1317 struct rb_root size_tree;
1318 int remounting_rw;
1319 struct ubifs_mount_opts mount_opts;
1320
1321#ifdef CONFIG_UBIFS_FS_DEBUG
1322 void *dbg_buf;
1323 struct ubifs_zbranch old_zroot;
1324 int old_zroot_level;
1325 unsigned long long old_zroot_sqnum;
1326 int failure_mode;
1327 int fail_delay;
1328 unsigned long fail_timeout;
1329 unsigned int fail_cnt;
1330 unsigned int fail_cnt_max;
1331#endif
1332};
1333
1334extern struct list_head ubifs_infos;
1335extern spinlock_t ubifs_infos_lock;
1336extern atomic_long_t ubifs_clean_zn_cnt;
1337extern struct kmem_cache *ubifs_inode_slab;
1338extern struct super_operations ubifs_super_operations;
1339extern struct address_space_operations ubifs_file_address_operations;
1340extern struct file_operations ubifs_file_operations;
1341extern struct inode_operations ubifs_file_inode_operations;
1342extern struct file_operations ubifs_dir_operations;
1343extern struct inode_operations ubifs_dir_inode_operations;
1344extern struct inode_operations ubifs_symlink_inode_operations;
1345extern struct backing_dev_info ubifs_backing_dev_info;
1346extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
1347
1348/* io.c */
1349int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
1350int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
1351 int dtype);
1352int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf);
1353int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
1354 int lnum, int offs);
1355int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
1356 int lnum, int offs);
1357int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
1358 int offs, int dtype);
1359int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
1360 int offs, int quiet);
1361void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
1362void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
1363int ubifs_io_init(struct ubifs_info *c);
1364void ubifs_pad(const struct ubifs_info *c, void *buf, int pad);
1365int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf);
1366int ubifs_bg_wbufs_sync(struct ubifs_info *c);
1367void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum);
1368int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode);
1369
1370/* scan.c */
1371struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
1372 int offs, void *sbuf);
1373void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
1374int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
1375 int offs, int quiet);
1376struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
1377 int offs, void *sbuf);
1378void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
1379 int lnum, int offs);
1380int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
1381 void *buf, int offs);
1382void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
1383 void *buf);
1384
1385/* log.c */
1386void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud);
1387void ubifs_create_buds_lists(struct ubifs_info *c);
1388int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs);
1389struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum);
1390struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum);
1391int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum);
1392int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum);
1393int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum);
1394int ubifs_consolidate_log(struct ubifs_info *c);
1395
1396/* journal.c */
1397int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
1398 const struct qstr *nm, const struct inode *inode,
1399 int deletion, int xent);
1400int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
1401 const union ubifs_key *key, const void *buf, int len);
1402int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
1403 int last_reference);
1404int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
1405 const struct dentry *old_dentry,
1406 const struct inode *new_dir,
1407 const struct dentry *new_dentry, int sync);
1408int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1409 loff_t old_size, loff_t new_size);
1410int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
1411 const struct inode *inode, const struct qstr *nm);
1412int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1,
1413 const struct inode *inode2);
1414
1415/* budget.c */
1416int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req);
1417void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req);
1418void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
1419 struct ubifs_inode *ui);
1420int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode,
1421 struct ubifs_budget_req *req);
1422void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
1423 struct ubifs_budget_req *req);
1424void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1425 struct ubifs_budget_req *req);
1426long long ubifs_budg_get_free_space(struct ubifs_info *c);
1427int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1428void ubifs_convert_page_budget(struct ubifs_info *c);
1429long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
1430
1431/* find.c */
1432int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
1433 int squeeze);
1434int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
1435int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
1436 int min_space, int pick_free);
1437int ubifs_find_dirty_idx_leb(struct ubifs_info *c);
1438int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
1439
1440/* tnc.c */
1441int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1442 struct ubifs_znode **zn, int *n);
1443int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1444 void *node);
1445int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1446 void *node, const struct qstr *nm);
1447int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1448 void *node, int *lnum, int *offs);
1449int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
1450 int offs, int len);
1451int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
1452 int old_lnum, int old_offs, int lnum, int offs, int len);
1453int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
1454 int lnum, int offs, int len, const struct qstr *nm);
1455int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key);
1456int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
1457 const struct qstr *nm);
1458int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
1459 union ubifs_key *to_key);
1460int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum);
1461struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
1462 union ubifs_key *key,
1463 const struct qstr *nm);
1464void ubifs_tnc_close(struct ubifs_info *c);
1465int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level,
1466 int lnum, int offs, int is_idx);
1467int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level,
1468 int lnum, int offs);
1469/* Shared by tnc.c for tnc_commit.c */
1470void destroy_old_idx(struct ubifs_info *c);
1471int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
1472 int lnum, int offs);
1473int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
1474
1475/* tnc_misc.c */
1476struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
1477 struct ubifs_znode *znode);
1478int ubifs_search_zbranch(const struct ubifs_info *c,
1479 const struct ubifs_znode *znode,
1480 const union ubifs_key *key, int *n);
1481struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode);
1482struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode);
1483long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr);
1484struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
1485 struct ubifs_zbranch *zbr,
1486 struct ubifs_znode *parent, int iip);
1487int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1488 void *node);
1489
1490/* tnc_commit.c */
1491int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
1492int ubifs_tnc_end_commit(struct ubifs_info *c);
1493
1494/* shrinker.c */
1495int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
1496
1497/* commit.c */
1498int ubifs_bg_thread(void *info);
1499void ubifs_commit_required(struct ubifs_info *c);
1500void ubifs_request_bg_commit(struct ubifs_info *c);
1501int ubifs_run_commit(struct ubifs_info *c);
1502void ubifs_recovery_commit(struct ubifs_info *c);
1503int ubifs_gc_should_commit(struct ubifs_info *c);
1504void ubifs_wait_for_commit(struct ubifs_info *c);
1505
1506/* master.c */
1507int ubifs_read_master(struct ubifs_info *c);
1508int ubifs_write_master(struct ubifs_info *c);
1509
1510/* sb.c */
1511int ubifs_read_superblock(struct ubifs_info *c);
1512struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
1513int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
1514
1515/* replay.c */
1516int ubifs_validate_entry(struct ubifs_info *c,
1517 const struct ubifs_dent_node *dent);
1518int ubifs_replay_journal(struct ubifs_info *c);
1519
1520/* gc.c */
1521int ubifs_garbage_collect(struct ubifs_info *c, int anyway);
1522int ubifs_gc_start_commit(struct ubifs_info *c);
1523int ubifs_gc_end_commit(struct ubifs_info *c);
1524void ubifs_destroy_idx_gc(struct ubifs_info *c);
1525int ubifs_get_idx_gc_leb(struct ubifs_info *c);
1526int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp);
1527
1528/* orphan.c */
1529int ubifs_add_orphan(struct ubifs_info *c, ino_t inum);
1530void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum);
1531int ubifs_orphan_start_commit(struct ubifs_info *c);
1532int ubifs_orphan_end_commit(struct ubifs_info *c);
1533int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only);
1534
1535/* lpt.c */
1536int ubifs_calc_lpt_geom(struct ubifs_info *c);
1537int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
1538 int *lpt_lebs, int *big_lpt);
1539int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr);
1540struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum);
1541struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum);
1542int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum,
1543 ubifs_lpt_scan_callback scan_cb, void *data);
1544
1545/* Shared by lpt.c for lpt_commit.c */
1546void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave);
1547void ubifs_pack_ltab(struct ubifs_info *c, void *buf,
1548 struct ubifs_lpt_lprops *ltab);
1549void ubifs_pack_pnode(struct ubifs_info *c, void *buf,
1550 struct ubifs_pnode *pnode);
1551void ubifs_pack_nnode(struct ubifs_info *c, void *buf,
1552 struct ubifs_nnode *nnode);
1553struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
1554 struct ubifs_nnode *parent, int iip);
1555struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
1556 struct ubifs_nnode *parent, int iip);
1557int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip);
1558void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
1559void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
1560uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits);
1561struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
1562
1563/* lpt_commit.c */
1564int ubifs_lpt_start_commit(struct ubifs_info *c);
1565int ubifs_lpt_end_commit(struct ubifs_info *c);
1566int ubifs_lpt_post_commit(struct ubifs_info *c);
1567void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
1568
1569/* lprops.c */
1570void ubifs_get_lprops(struct ubifs_info *c);
1571const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
1572 const struct ubifs_lprops *lp,
1573 int free, int dirty, int flags,
1574 int idx_gc_cnt);
1575void ubifs_release_lprops(struct ubifs_info *c);
1576void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
1577void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
1578 int cat);
1579void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
1580 struct ubifs_lprops *new_lprops);
1581void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops);
1582int ubifs_categorize_lprops(const struct ubifs_info *c,
1583 const struct ubifs_lprops *lprops);
1584int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
1585 int flags_set, int flags_clean, int idx_gc_cnt);
1586int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
1587 int flags_set, int flags_clean);
1588int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp);
1589const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c);
1590const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
1591const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
1592const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
1593
1594/* file.c */
1595int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
1596int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
1597
1598/* dir.c */
1599struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
1600 int mode);
1601int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1602 struct kstat *stat);
1603
1604/* xattr.c */
1605int ubifs_setxattr(struct dentry *dentry, const char *name,
1606 const void *value, size_t size, int flags);
1607ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
1608 size_t size);
1609ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
1610int ubifs_removexattr(struct dentry *dentry, const char *name);
1611
1612/* super.c */
1613struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
1614
1615/* recovery.c */
1616int ubifs_recover_master_node(struct ubifs_info *c);
1617int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
1618struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
1619 int offs, void *sbuf, int grouped);
1620struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
1621 int offs, void *sbuf);
1622int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
1623int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf);
1624int ubifs_rcvry_gc_commit(struct ubifs_info *c);
1625int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1626 int deletion, loff_t new_size);
1627int ubifs_recover_size(struct ubifs_info *c);
1628void ubifs_destroy_size_tree(struct ubifs_info *c);
1629
1630/* ioctl.c */
1631long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1632void ubifs_set_inode_flags(struct inode *inode);
1633#ifdef CONFIG_COMPAT
1634long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1635#endif
1636
1637/* compressor.c */
1638int __init ubifs_compressors_init(void);
1639void __exit ubifs_compressors_exit(void);
1640void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
1641 int *compr_type);
1642int ubifs_decompress(const void *buf, int len, void *out, int *out_len,
1643 int compr_type);
1644
1645#include "debug.h"
1646#include "misc.h"
1647#include "key.h"
1648
1649#endif /* !__UBIFS_H__ */
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
new file mode 100644
index 000000000000..1388a078e1a9
--- /dev/null
+++ b/fs/ubifs/xattr.c
@@ -0,0 +1,581 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS extended attributes support.
25 *
26 * Extended attributes are implemented as regular inodes with attached data,
27 * which limits extended attribute size to UBIFS block size (4KiB). Names of
28 * extended attributes are described by extended attribute entries (xentries),
29 * which are almost identical to directory entries, but have different key type.
30 *
31 * In other words, the situation with extended attributes is very similar to
32 * directories. Indeed, any inode (but of course not xattr inodes) may have a
33 * number of associated xentries, just like directory inodes have associated
34 * directory entries. Extended attribute entries store the name of the extended
35 * attribute, the host inode number, and the extended attribute inode number.
36 * Similarly, direntries store the name, the parent and the target inode
37 * numbers. Thus, most of the common UBIFS mechanisms may be re-used for
38 * extended attributes.
39 *
40 * The number of extended attributes is not limited, but there is Linux
41 * limitation on the maximum possible size of the list of all extended
42 * attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure
43 * the sum of all extended attribute names of the inode does not exceed that
44 * limit.
45 *
46 * Extended attributes are synchronous, which means they are written to the
47 * flash media synchronously and there is no write-back for extended attribute
48 * inodes. The extended attribute values are not stored in compressed form on
49 * the media.
50 *
51 * Since extended attributes are represented by regular inodes, they are cached
52 * in the VFS inode cache. The xentries are cached in the LNC cache (see
53 * tnc.c).
54 *
55 * ACL support is not implemented.
56 */
57
58#include <linux/xattr.h>
59#include <linux/posix_acl_xattr.h>
60#include "ubifs.h"
61
62/*
63 * Limit the number of extended attributes per inode so that the total size
64 * (xattr_size) is guaranteeded to fit in an 'unsigned int'.
65 */
66#define MAX_XATTRS_PER_INODE 65535
67
68/*
69 * Extended attribute type constants.
70 *
71 * USER_XATTR: user extended attribute ("user.*")
72 * TRUSTED_XATTR: trusted extended attribute ("trusted.*)
73 * SECURITY_XATTR: security extended attribute ("security.*")
74 */
75enum {
76 USER_XATTR,
77 TRUSTED_XATTR,
78 SECURITY_XATTR,
79};
80
81static struct inode_operations none_inode_operations;
82static struct address_space_operations none_address_operations;
83static struct file_operations none_file_operations;
84
85/**
86 * create_xattr - create an extended attribute.
87 * @c: UBIFS file-system description object
88 * @host: host inode
89 * @nm: extended attribute name
90 * @value: extended attribute value
91 * @size: size of extended attribute value
92 *
93 * This is a helper function which creates an extended attribute of name @nm
94 * and value @value for inode @host. The host inode is also updated on flash
95 * because the ctime and extended attribute accounting data changes. This
96 * function returns zero in case of success and a negative error code in case
97 * of failure.
98 */
99static int create_xattr(struct ubifs_info *c, struct inode *host,
100 const struct qstr *nm, const void *value, int size)
101{
102 int err;
103 struct inode *inode;
104 struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
105 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
106 .new_ino_d = size, .dirtied_ino = 1,
107 .dirtied_ino_d = host_ui->data_len};
108
109 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
110 return -ENOSPC;
111 /*
112 * Linux limits the maximum size of the extended attribute names list
113 * to %XATTR_LIST_MAX. This means we should not allow creating more*
114 * extended attributes if the name list becomes larger. This limitation
115 * is artificial for UBIFS, though.
116 */
117 if (host_ui->xattr_names + host_ui->xattr_cnt +
118 nm->len + 1 > XATTR_LIST_MAX)
119 return -ENOSPC;
120
121 err = ubifs_budget_space(c, &req);
122 if (err)
123 return err;
124
125 inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO);
126 if (IS_ERR(inode)) {
127 err = PTR_ERR(inode);
128 goto out_budg;
129 }
130
131 mutex_lock(&host_ui->ui_mutex);
132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &none_address_operations;
134 inode->i_op = &none_inode_operations;
135 inode->i_fop = &none_file_operations;
136
137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
138 ui = ubifs_inode(inode);
139 ui->xattr = 1;
140 ui->flags |= UBIFS_XATTR_FL;
141 ui->data = kmalloc(size, GFP_NOFS);
142 if (!ui->data) {
143 err = -ENOMEM;
144 goto out_unlock;
145 }
146
147 memcpy(ui->data, value, size);
148 host->i_ctime = ubifs_current_time(host);
149 host_ui->xattr_cnt += 1;
150 host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
151 host_ui->xattr_size += CALC_XATTR_BYTES(size);
152 host_ui->xattr_names += nm->len;
153
154 /*
155 * We do not use i_size_write() because nobody can race with us as we
156 * are holding host @host->i_mutex - every xattr operation for this
157 * inode is serialized by it.
158 */
159 inode->i_size = ui->ui_size = size;
160 ui->data_len = size;
161 err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
162 if (err)
163 goto out_cancel;
164 mutex_unlock(&host_ui->ui_mutex);
165
166 ubifs_release_budget(c, &req);
167 insert_inode_hash(inode);
168 iput(inode);
169 return 0;
170
171out_cancel:
172 host_ui->xattr_cnt -= 1;
173 host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
174 host_ui->xattr_size -= CALC_XATTR_BYTES(size);
175out_unlock:
176 mutex_unlock(&host_ui->ui_mutex);
177 make_bad_inode(inode);
178 iput(inode);
179out_budg:
180 ubifs_release_budget(c, &req);
181 return err;
182}
183
184/**
185 * change_xattr - change an extended attribute.
186 * @c: UBIFS file-system description object
187 * @host: host inode
188 * @inode: extended attribute inode
189 * @value: extended attribute value
190 * @size: size of extended attribute value
191 *
192 * This helper function changes the value of extended attribute @inode with new
193 * data from @value. Returns zero in case of success and a negative error code
194 * in case of failure.
195 */
196static int change_xattr(struct ubifs_info *c, struct inode *host,
197 struct inode *inode, const void *value, int size)
198{
199 int err;
200 struct ubifs_inode *host_ui = ubifs_inode(host);
201 struct ubifs_inode *ui = ubifs_inode(inode);
202 struct ubifs_budget_req req = { .dirtied_ino = 2,
203 .dirtied_ino_d = size + host_ui->data_len };
204
205 ubifs_assert(ui->data_len == inode->i_size);
206 err = ubifs_budget_space(c, &req);
207 if (err)
208 return err;
209
210 mutex_lock(&host_ui->ui_mutex);
211 host->i_ctime = ubifs_current_time(host);
212 host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
213 host_ui->xattr_size += CALC_XATTR_BYTES(size);
214
215 kfree(ui->data);
216 ui->data = kmalloc(size, GFP_NOFS);
217 if (!ui->data) {
218 err = -ENOMEM;
219 goto out_unlock;
220 }
221
222 memcpy(ui->data, value, size);
223 inode->i_size = ui->ui_size = size;
224 ui->data_len = size;
225
226 /*
227 * It is important to write the host inode after the xattr inode
228 * because if the host inode gets synchronized (via 'fsync()'), then
229 * the extended attribute inode gets synchronized, because it goes
230 * before the host inode in the write-buffer.
231 */
232 err = ubifs_jnl_change_xattr(c, inode, host);
233 if (err)
234 goto out_cancel;
235 mutex_unlock(&host_ui->ui_mutex);
236
237 ubifs_release_budget(c, &req);
238 return 0;
239
240out_cancel:
241 host_ui->xattr_size -= CALC_XATTR_BYTES(size);
242 host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
243 make_bad_inode(inode);
244out_unlock:
245 mutex_unlock(&host_ui->ui_mutex);
246 ubifs_release_budget(c, &req);
247 return err;
248}
249
250/**
251 * check_namespace - check extended attribute name-space.
252 * @nm: extended attribute name
253 *
254 * This function makes sure the extended attribute name belongs to one of the
255 * supported extended attribute name-spaces. Returns name-space index in case
256 * of success and a negative error code in case of failure.
257 */
258static int check_namespace(const struct qstr *nm)
259{
260 int type;
261
262 if (nm->len > UBIFS_MAX_NLEN)
263 return -ENAMETOOLONG;
264
265 if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX,
266 XATTR_TRUSTED_PREFIX_LEN)) {
267 if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0')
268 return -EINVAL;
269 type = TRUSTED_XATTR;
270 } else if (!strncmp(nm->name, XATTR_USER_PREFIX,
271 XATTR_USER_PREFIX_LEN)) {
272 if (nm->name[XATTR_USER_PREFIX_LEN] == '\0')
273 return -EINVAL;
274 type = USER_XATTR;
275 } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX,
276 XATTR_SECURITY_PREFIX_LEN)) {
277 if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0')
278 return -EINVAL;
279 type = SECURITY_XATTR;
280 } else
281 return -EOPNOTSUPP;
282
283 return type;
284}
285
286static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum)
287{
288 struct inode *inode;
289
290 inode = ubifs_iget(c->vfs_sb, inum);
291 if (IS_ERR(inode)) {
292 ubifs_err("dead extended attribute entry, error %d",
293 (int)PTR_ERR(inode));
294 return inode;
295 }
296 if (ubifs_inode(inode)->xattr)
297 return inode;
298 ubifs_err("corrupt extended attribute entry");
299 iput(inode);
300 return ERR_PTR(-EINVAL);
301}
302
303int ubifs_setxattr(struct dentry *dentry, const char *name,
304 const void *value, size_t size, int flags)
305{
306 struct inode *inode, *host = dentry->d_inode;
307 struct ubifs_info *c = host->i_sb->s_fs_info;
308 struct qstr nm = { .name = name, .len = strlen(name) };
309 struct ubifs_dent_node *xent;
310 union ubifs_key key;
311 int err, type;
312
313 dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
314 host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
315
316 if (size > UBIFS_MAX_INO_DATA)
317 return -ERANGE;
318
319 type = check_namespace(&nm);
320 if (type < 0)
321 return type;
322
323 xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
324 if (!xent)
325 return -ENOMEM;
326
327 /*
328 * The extended attribute entries are stored in LNC, so multiple
329 * look-ups do not involve reading the flash.
330 */
331 xent_key_init(c, &key, host->i_ino, &nm);
332 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
333 if (err) {
334 if (err != -ENOENT)
335 goto out_free;
336
337 if (flags & XATTR_REPLACE)
338 /* We are asked not to create the xattr */
339 err = -ENODATA;
340 else
341 err = create_xattr(c, host, &nm, value, size);
342 goto out_free;
343 }
344
345 if (flags & XATTR_CREATE) {
346 /* We are asked not to replace the xattr */
347 err = -EEXIST;
348 goto out_free;
349 }
350
351 inode = iget_xattr(c, le64_to_cpu(xent->inum));
352 if (IS_ERR(inode)) {
353 err = PTR_ERR(inode);
354 goto out_free;
355 }
356
357 err = change_xattr(c, host, inode, value, size);
358 iput(inode);
359
360out_free:
361 kfree(xent);
362 return err;
363}
364
365ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
366 size_t size)
367{
368 struct inode *inode, *host = dentry->d_inode;
369 struct ubifs_info *c = host->i_sb->s_fs_info;
370 struct qstr nm = { .name = name, .len = strlen(name) };
371 struct ubifs_inode *ui;
372 struct ubifs_dent_node *xent;
373 union ubifs_key key;
374 int err;
375
376 dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name,
377 host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
378
379 err = check_namespace(&nm);
380 if (err < 0)
381 return err;
382
383 xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
384 if (!xent)
385 return -ENOMEM;
386
387 mutex_lock(&host->i_mutex);
388 xent_key_init(c, &key, host->i_ino, &nm);
389 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
390 if (err) {
391 if (err == -ENOENT)
392 err = -ENODATA;
393 goto out_unlock;
394 }
395
396 inode = iget_xattr(c, le64_to_cpu(xent->inum));
397 if (IS_ERR(inode)) {
398 err = PTR_ERR(inode);
399 goto out_unlock;
400 }
401
402 ui = ubifs_inode(inode);
403 ubifs_assert(inode->i_size == ui->data_len);
404 ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len);
405
406 if (buf) {
407 /* If @buf is %NULL we are supposed to return the length */
408 if (ui->data_len > size) {
409 dbg_err("buffer size %zd, xattr len %d",
410 size, ui->data_len);
411 err = -ERANGE;
412 goto out_iput;
413 }
414
415 memcpy(buf, ui->data, ui->data_len);
416 }
417 err = ui->data_len;
418
419out_iput:
420 iput(inode);
421out_unlock:
422 mutex_unlock(&host->i_mutex);
423 kfree(xent);
424 return err;
425}
426
427ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
428{
429 union ubifs_key key;
430 struct inode *host = dentry->d_inode;
431 struct ubifs_info *c = host->i_sb->s_fs_info;
432 struct ubifs_inode *host_ui = ubifs_inode(host);
433 struct ubifs_dent_node *xent, *pxent = NULL;
434 int err, len, written = 0;
435 struct qstr nm = { .name = NULL };
436
437 dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino,
438 dentry->d_name.len, dentry->d_name.name, size);
439
440 len = host_ui->xattr_names + host_ui->xattr_cnt;
441 if (!buffer)
442 /*
443 * We should return the minimum buffer size which will fit a
444 * null-terminated list of all the extended attribute names.
445 */
446 return len;
447
448 if (len > size)
449 return -ERANGE;
450
451 lowest_xent_key(c, &key, host->i_ino);
452
453 mutex_lock(&host->i_mutex);
454 while (1) {
455 int type;
456
457 xent = ubifs_tnc_next_ent(c, &key, &nm);
458 if (unlikely(IS_ERR(xent))) {
459 err = PTR_ERR(xent);
460 break;
461 }
462
463 nm.name = xent->name;
464 nm.len = le16_to_cpu(xent->nlen);
465
466 type = check_namespace(&nm);
467 if (unlikely(type < 0)) {
468 err = type;
469 break;
470 }
471
472 /* Show trusted namespace only for "power" users */
473 if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) {
474 memcpy(buffer + written, nm.name, nm.len + 1);
475 written += nm.len + 1;
476 }
477
478 kfree(pxent);
479 pxent = xent;
480 key_read(c, &xent->key, &key);
481 }
482 mutex_unlock(&host->i_mutex);
483
484 kfree(pxent);
485 if (err != -ENOENT) {
486 ubifs_err("cannot find next direntry, error %d", err);
487 return err;
488 }
489
490 ubifs_assert(written <= size);
491 return written;
492}
493
494static int remove_xattr(struct ubifs_info *c, struct inode *host,
495 struct inode *inode, const struct qstr *nm)
496{
497 int err;
498 struct ubifs_inode *host_ui = ubifs_inode(host);
499 struct ubifs_inode *ui = ubifs_inode(inode);
500 struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1,
501 .dirtied_ino_d = host_ui->data_len };
502
503 ubifs_assert(ui->data_len == inode->i_size);
504
505 err = ubifs_budget_space(c, &req);
506 if (err)
507 return err;
508
509 mutex_lock(&host_ui->ui_mutex);
510 host->i_ctime = ubifs_current_time(host);
511 host_ui->xattr_cnt -= 1;
512 host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
513 host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
514 host_ui->xattr_names -= nm->len;
515
516 err = ubifs_jnl_delete_xattr(c, host, inode, nm);
517 if (err)
518 goto out_cancel;
519 mutex_unlock(&host_ui->ui_mutex);
520
521 ubifs_release_budget(c, &req);
522 return 0;
523
524out_cancel:
525 host_ui->xattr_cnt += 1;
526 host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
527 host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
528 mutex_unlock(&host_ui->ui_mutex);
529 ubifs_release_budget(c, &req);
530 make_bad_inode(inode);
531 return err;
532}
533
534int ubifs_removexattr(struct dentry *dentry, const char *name)
535{
536 struct inode *inode, *host = dentry->d_inode;
537 struct ubifs_info *c = host->i_sb->s_fs_info;
538 struct qstr nm = { .name = name, .len = strlen(name) };
539 struct ubifs_dent_node *xent;
540 union ubifs_key key;
541 int err;
542
543 dbg_gen("xattr '%s', ino %lu ('%.*s')", name,
544 host->i_ino, dentry->d_name.len, dentry->d_name.name);
545 ubifs_assert(mutex_is_locked(&host->i_mutex));
546
547 err = check_namespace(&nm);
548 if (err < 0)
549 return err;
550
551 xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
552 if (!xent)
553 return -ENOMEM;
554
555 xent_key_init(c, &key, host->i_ino, &nm);
556 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
557 if (err) {
558 if (err == -ENOENT)
559 err = -ENODATA;
560 goto out_free;
561 }
562
563 inode = iget_xattr(c, le64_to_cpu(xent->inum));
564 if (IS_ERR(inode)) {
565 err = PTR_ERR(inode);
566 goto out_free;
567 }
568
569 ubifs_assert(inode->i_nlink == 1);
570 inode->i_nlink = 0;
571 err = remove_xattr(c, host, inode, &nm);
572 if (err)
573 inode->i_nlink = 1;
574
575 /* If @i_nlink is 0, 'iput()' will delete the inode */
576 iput(inode);
577
578out_free:
579 kfree(xent);
580 return err;
581}