aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2008-07-14 12:08:37 -0400
committerArtem Bityutskiy <Artem.Bityutskiy@nokia.com>2008-07-15 10:35:15 -0400
commit1e51764a3c2ac05a23a22b2a95ddee4d9bffb16d (patch)
tree919debdd48aef9eee9ff0e8f465ef2649325b993
parente56a99d5a42dcb91e622ae7a0289d8fb2ddabffb (diff)
UBIFS: add new flash file system
This is a new flash file system. See http://www.linux-mtd.infradead.org/doc/ubifs.html Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com> Signed-off-by: Adrian Hunter <ext-adrian.hunter@nokia.com>
-rw-r--r--fs/ubifs/budget.c731
-rw-r--r--fs/ubifs/commit.c677
-rw-r--r--fs/ubifs/compress.c253
-rw-r--r--fs/ubifs/debug.c2289
-rw-r--r--fs/ubifs/debug.h403
-rw-r--r--fs/ubifs/dir.c1240
-rw-r--r--fs/ubifs/file.c1275
-rw-r--r--fs/ubifs/find.c975
-rw-r--r--fs/ubifs/gc.c773
-rw-r--r--fs/ubifs/io.c914
-rw-r--r--fs/ubifs/ioctl.c204
-rw-r--r--fs/ubifs/journal.c1387
-rw-r--r--fs/ubifs/key.h533
-rw-r--r--fs/ubifs/log.c805
-rw-r--r--fs/ubifs/lprops.c1357
-rw-r--r--fs/ubifs/lpt.c2243
-rw-r--r--fs/ubifs/lpt_commit.c1648
-rw-r--r--fs/ubifs/master.c387
-rw-r--r--fs/ubifs/misc.h342
-rw-r--r--fs/ubifs/orphan.c958
-rw-r--r--fs/ubifs/recovery.c1519
-rw-r--r--fs/ubifs/replay.c1075
-rw-r--r--fs/ubifs/sb.c629
-rw-r--r--fs/ubifs/scan.c362
-rw-r--r--fs/ubifs/shrinker.c322
-rw-r--r--fs/ubifs/super.c1951
-rw-r--r--fs/ubifs/tnc.c2956
-rw-r--r--fs/ubifs/tnc_commit.c1103
-rw-r--r--fs/ubifs/tnc_misc.c494
-rw-r--r--fs/ubifs/ubifs-media.h745
-rw-r--r--fs/ubifs/ubifs.h1649
-rw-r--r--fs/ubifs/xattr.c581
32 files changed, 32780 insertions, 0 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
new file mode 100644
index 00000000000..d81fb9ed2b8
--- /dev/null
+++ b/fs/ubifs/budget.c
@@ -0,0 +1,731 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements the budgeting sub-system which is responsible for UBIFS
25 * space management.
26 *
27 * Factors such as compression, wasted space at the ends of LEBs, space in other
28 * journal heads, the effect of updates on the index, and so on, make it
29 * impossible to accurately predict the amount of space needed. Consequently
30 * approximations are used.
31 */
32
33#include "ubifs.h"
34#include <linux/writeback.h>
35#include <asm/div64.h>
36
37/*
38 * When pessimistic budget calculations say that there is no enough space,
39 * UBIFS starts writing back dirty inodes and pages, doing garbage collection,
40 * or committing. The below constants define maximum number of times UBIFS
41 * repeats the operations.
42 */
43#define MAX_SHRINK_RETRIES 8
44#define MAX_GC_RETRIES 4
45#define MAX_CMT_RETRIES 2
46#define MAX_NOSPC_RETRIES 1
47
48/*
49 * The below constant defines amount of dirty pages which should be written
50 * back at when trying to shrink the liability.
51 */
52#define NR_TO_WRITE 16
53
54/**
55 * struct retries_info - information about re-tries while making free space.
56 * @prev_liability: previous liability
57 * @shrink_cnt: how many times the liability was shrinked
58 * @shrink_retries: count of liability shrink re-tries (increased when
59 * liability does not shrink)
60 * @try_gc: GC should be tried first
61 * @gc_retries: how many times GC was run
62 * @cmt_retries: how many times commit has been done
63 * @nospc_retries: how many times GC returned %-ENOSPC
64 *
65 * Since we consider budgeting to be the fast-path, and this structure has to
66 * be allocated on stack and zeroed out, we make it smaller using bit-fields.
67 */
68struct retries_info {
69 long long prev_liability;
70 unsigned int shrink_cnt;
71 unsigned int shrink_retries:5;
72 unsigned int try_gc:1;
73 unsigned int gc_retries:4;
74 unsigned int cmt_retries:3;
75 unsigned int nospc_retries:1;
76};
77
78/**
79 * shrink_liability - write-back some dirty pages/inodes.
80 * @c: UBIFS file-system description object
81 * @nr_to_write: how many dirty pages to write-back
82 *
83 * This function shrinks UBIFS liability by means of writing back some amount
84 * of dirty inodes and their pages. Returns the amount of pages which were
85 * written back. The returned value does not include dirty inodes which were
86 * synchronized.
87 *
88 * Note, this function synchronizes even VFS inodes which are locked
89 * (@i_mutex) by the caller of the budgeting function, because write-back does
90 * not touch @i_mutex.
91 */
92static int shrink_liability(struct ubifs_info *c, int nr_to_write)
93{
94 int nr_written;
95 struct writeback_control wbc = {
96 .sync_mode = WB_SYNC_NONE,
97 .range_end = LLONG_MAX,
98 .nr_to_write = nr_to_write,
99 };
100
101 generic_sync_sb_inodes(c->vfs_sb, &wbc);
102 nr_written = nr_to_write - wbc.nr_to_write;
103
104 if (!nr_written) {
105 /*
106 * Re-try again but wait on pages/inodes which are being
107 * written-back concurrently (e.g., by pdflush).
108 */
109 memset(&wbc, 0, sizeof(struct writeback_control));
110 wbc.sync_mode = WB_SYNC_ALL;
111 wbc.range_end = LLONG_MAX;
112 wbc.nr_to_write = nr_to_write;
113 generic_sync_sb_inodes(c->vfs_sb, &wbc);
114 nr_written = nr_to_write - wbc.nr_to_write;
115 }
116
117 dbg_budg("%d pages were written back", nr_written);
118 return nr_written;
119}
120
121
122/**
123 * run_gc - run garbage collector.
124 * @c: UBIFS file-system description object
125 *
126 * This function runs garbage collector to make some more free space. Returns
127 * zero if a free LEB has been produced, %-EAGAIN if commit is required, and a
128 * negative error code in case of failure.
129 */
130static int run_gc(struct ubifs_info *c)
131{
132 int err, lnum;
133
134 /* Make some free space by garbage-collecting dirty space */
135 down_read(&c->commit_sem);
136 lnum = ubifs_garbage_collect(c, 1);
137 up_read(&c->commit_sem);
138 if (lnum < 0)
139 return lnum;
140
141 /* GC freed one LEB, return it to lprops */
142 dbg_budg("GC freed LEB %d", lnum);
143 err = ubifs_return_leb(c, lnum);
144 if (err)
145 return err;
146 return 0;
147}
148
149/**
150 * make_free_space - make more free space on the file-system.
151 * @c: UBIFS file-system description object
152 * @ri: information about previous invocations of this function
153 *
154 * This function is called when an operation cannot be budgeted because there
155 * is supposedly no free space. But in most cases there is some free space:
156 * o budgeting is pessimistic, so it always budgets more then it is actually
157 * needed, so shrinking the liability is one way to make free space - the
158 * cached data will take less space then it was budgeted for;
159 * o GC may turn some dark space into free space (budgeting treats dark space
160 * as not available);
161 * o commit may free some LEB, i.e., turn freeable LEBs into free LEBs.
162 *
163 * So this function tries to do the above. Returns %-EAGAIN if some free space
164 * was presumably made and the caller has to re-try budgeting the operation.
165 * Returns %-ENOSPC if it couldn't do more free space, and other negative error
166 * codes on failures.
167 */
168static int make_free_space(struct ubifs_info *c, struct retries_info *ri)
169{
170 int err;
171
172 /*
173 * If we have some dirty pages and inodes (liability), try to write
174 * them back unless this was tried too many times without effect
175 * already.
176 */
177 if (ri->shrink_retries < MAX_SHRINK_RETRIES && !ri->try_gc) {
178 long long liability;
179
180 spin_lock(&c->space_lock);
181 liability = c->budg_idx_growth + c->budg_data_growth +
182 c->budg_dd_growth;
183 spin_unlock(&c->space_lock);
184
185 if (ri->prev_liability >= liability) {
186 /* Liability does not shrink, next time try GC then */
187 ri->shrink_retries += 1;
188 if (ri->gc_retries < MAX_GC_RETRIES)
189 ri->try_gc = 1;
190 dbg_budg("liability did not shrink: retries %d of %d",
191 ri->shrink_retries, MAX_SHRINK_RETRIES);
192 }
193
194 dbg_budg("force write-back (count %d)", ri->shrink_cnt);
195 shrink_liability(c, NR_TO_WRITE + ri->shrink_cnt);
196
197 ri->prev_liability = liability;
198 ri->shrink_cnt += 1;
199 return -EAGAIN;
200 }
201
202 /*
203 * Try to run garbage collector unless it was already tried too many
204 * times.
205 */
206 if (ri->gc_retries < MAX_GC_RETRIES) {
207 ri->gc_retries += 1;
208 dbg_budg("run GC, retries %d of %d",
209 ri->gc_retries, MAX_GC_RETRIES);
210
211 ri->try_gc = 0;
212 err = run_gc(c);
213 if (!err)
214 return -EAGAIN;
215
216 if (err == -EAGAIN) {
217 dbg_budg("GC asked to commit");
218 err = ubifs_run_commit(c);
219 if (err)
220 return err;
221 return -EAGAIN;
222 }
223
224 if (err != -ENOSPC)
225 return err;
226
227 /*
228 * GC could not make any progress. If this is the first time,
229 * then it makes sense to try to commit, because it might make
230 * some dirty space.
231 */
232 dbg_budg("GC returned -ENOSPC, retries %d",
233 ri->nospc_retries);
234 if (ri->nospc_retries >= MAX_NOSPC_RETRIES)
235 return err;
236 ri->nospc_retries += 1;
237 }
238
239 /* Neither GC nor write-back helped, try to commit */
240 if (ri->cmt_retries < MAX_CMT_RETRIES) {
241 ri->cmt_retries += 1;
242 dbg_budg("run commit, retries %d of %d",
243 ri->cmt_retries, MAX_CMT_RETRIES);
244 err = ubifs_run_commit(c);
245 if (err)
246 return err;
247 return -EAGAIN;
248 }
249 return -ENOSPC;
250}
251
252/**
253 * ubifs_calc_min_idx_lebs - calculate amount of eraseblocks for the index.
254 * @c: UBIFS file-system description object
255 *
256 * This function calculates and returns the number of eraseblocks which should
257 * be kept for index usage.
258 */
259int ubifs_calc_min_idx_lebs(struct ubifs_info *c)
260{
261 int ret;
262 uint64_t idx_size;
263
264 idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx;
265
266 /* And make sure we have twice the index size of space reserved */
267 idx_size <<= 1;
268
269 /*
270 * We do not maintain 'old_idx_size' as 'old_idx_lebs'/'old_idx_bytes'
271 * pair, nor similarly the two variables for the new index size, so we
272 * have to do this costly 64-bit division on fast-path.
273 */
274 if (do_div(idx_size, c->leb_size - c->max_idx_node_sz))
275 ret = idx_size + 1;
276 else
277 ret = idx_size;
278 /*
279 * The index head is not available for the in-the-gaps method, so add an
280 * extra LEB to compensate.
281 */
282 ret += 1;
283 /*
284 * At present the index needs at least 2 LEBs: one for the index head
285 * and one for in-the-gaps method (which currently does not cater for
286 * the index head and so excludes it from consideration).
287 */
288 if (ret < 2)
289 ret = 2;
290 return ret;
291}
292
293/**
294 * ubifs_calc_available - calculate available FS space.
295 * @c: UBIFS file-system description object
296 * @min_idx_lebs: minimum number of LEBs reserved for the index
297 *
298 * This function calculates and returns amount of FS space available for use.
299 */
300long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
301{
302 int subtract_lebs;
303 long long available;
304
305 /*
306 * Force the amount available to the total size reported if the used
307 * space is zero.
308 */
309 if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
310 c->budg_data_growth + c->budg_dd_growth == 0) {
311 /* Do the same calculation as for c->block_cnt */
312 available = c->main_lebs - 2;
313 available *= c->leb_size - c->dark_wm;
314 return available;
315 }
316
317 available = c->main_bytes - c->lst.total_used;
318
319 /*
320 * Now 'available' contains theoretically available flash space
321 * assuming there is no index, so we have to subtract the space which
322 * is reserved for the index.
323 */
324 subtract_lebs = min_idx_lebs;
325
326 /* Take into account that GC reserves one LEB for its own needs */
327 subtract_lebs += 1;
328
329 /*
330 * The GC journal head LEB is not really accessible. And since
331 * different write types go to different heads, we may count only on
332 * one head's space.
333 */
334 subtract_lebs += c->jhead_cnt - 1;
335
336 /* We also reserve one LEB for deletions, which bypass budgeting */
337 subtract_lebs += 1;
338
339 available -= (long long)subtract_lebs * c->leb_size;
340
341 /* Subtract the dead space which is not available for use */
342 available -= c->lst.total_dead;
343
344 /*
345 * Subtract dark space, which might or might not be usable - it depends
346 * on the data which we have on the media and which will be written. If
347 * this is a lot of uncompressed or not-compressible data, the dark
348 * space cannot be used.
349 */
350 available -= c->lst.total_dark;
351
352 /*
353 * However, there is more dark space. The index may be bigger than
354 * @min_idx_lebs. Those extra LEBs are assumed to be available, but
355 * their dark space is not included in total_dark, so it is subtracted
356 * here.
357 */
358 if (c->lst.idx_lebs > min_idx_lebs) {
359 subtract_lebs = c->lst.idx_lebs - min_idx_lebs;
360 available -= subtract_lebs * c->dark_wm;
361 }
362
363 /* The calculations are rough and may end up with a negative number */
364 return available > 0 ? available : 0;
365}
366
367/**
368 * can_use_rp - check whether the user is allowed to use reserved pool.
369 * @c: UBIFS file-system description object
370 *
371 * UBIFS has so-called "reserved pool" which is flash space reserved
372 * for the superuser and for uses whose UID/GID is recorded in UBIFS superblock.
373 * This function checks whether current user is allowed to use reserved pool.
374 * Returns %1 current user is allowed to use reserved pool and %0 otherwise.
375 */
376static int can_use_rp(struct ubifs_info *c)
377{
378 if (current->fsuid == c->rp_uid || capable(CAP_SYS_RESOURCE) ||
379 (c->rp_gid != 0 && in_group_p(c->rp_gid)))
380 return 1;
381 return 0;
382}
383
384/**
385 * do_budget_space - reserve flash space for index and data growth.
386 * @c: UBIFS file-system description object
387 *
388 * This function makes sure UBIFS has enough free eraseblocks for index growth
389 * and data.
390 *
391 * When budgeting index space, UBIFS reserves twice as more LEBs as the index
392 * would take if it was consolidated and written to the flash. This guarantees
393 * that the "in-the-gaps" commit method always succeeds and UBIFS will always
394 * be able to commit dirty index. So this function basically adds amount of
395 * budgeted index space to the size of the current index, multiplies this by 2,
396 * and makes sure this does not exceed the amount of free eraseblocks.
397 *
398 * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables:
399 * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might
400 * be large, because UBIFS does not do any index consolidation as long as
401 * there is free space. IOW, the index may take a lot of LEBs, but the LEBs
402 * will contain a lot of dirt.
403 * o @c->min_idx_lebs is the the index presumably takes. IOW, the index may be
404 * consolidated to take up to @c->min_idx_lebs LEBs.
405 *
406 * This function returns zero in case of success, and %-ENOSPC in case of
407 * failure.
408 */
409static int do_budget_space(struct ubifs_info *c)
410{
411 long long outstanding, available;
412 int lebs, rsvd_idx_lebs, min_idx_lebs;
413
414 /* First budget index space */
415 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
416
417 /* Now 'min_idx_lebs' contains number of LEBs to reserve */
418 if (min_idx_lebs > c->lst.idx_lebs)
419 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
420 else
421 rsvd_idx_lebs = 0;
422
423 /*
424 * The number of LEBs that are available to be used by the index is:
425 *
426 * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt -
427 * @c->lst.taken_empty_lebs
428 *
429 * @empty_lebs are available because they are empty. @freeable_cnt are
430 * available because they contain only free and dirty space and the
431 * index allocation always occurs after wbufs are synch'ed.
432 * @idx_gc_cnt are available because they are index LEBs that have been
433 * garbage collected (including trivial GC) and are awaiting the commit
434 * before they can be unmapped - note that the in-the-gaps method will
435 * grab these if it needs them. @taken_empty_lebs are empty_lebs that
436 * have already been allocated for some purpose (also includes those
437 * LEBs on the @idx_gc list).
438 *
439 * Note, @taken_empty_lebs may temporarily be higher by one because of
440 * the way we serialize LEB allocations and budgeting. See a comment in
441 * 'ubifs_find_free_space()'.
442 */
443 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
444 c->lst.taken_empty_lebs;
445 if (unlikely(rsvd_idx_lebs > lebs)) {
446 dbg_budg("out of indexing space: min_idx_lebs %d (old %d), "
447 "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs,
448 rsvd_idx_lebs);
449 return -ENOSPC;
450 }
451
452 available = ubifs_calc_available(c, min_idx_lebs);
453 outstanding = c->budg_data_growth + c->budg_dd_growth;
454
455 if (unlikely(available < outstanding)) {
456 dbg_budg("out of data space: available %lld, outstanding %lld",
457 available, outstanding);
458 return -ENOSPC;
459 }
460
461 if (available - outstanding <= c->rp_size && !can_use_rp(c))
462 return -ENOSPC;
463
464 c->min_idx_lebs = min_idx_lebs;
465 return 0;
466}
467
468/**
469 * calc_idx_growth - calculate approximate index growth from budgeting request.
470 * @c: UBIFS file-system description object
471 * @req: budgeting request
472 *
473 * For now we assume each new node adds one znode. But this is rather poor
474 * approximation, though.
475 */
476static int calc_idx_growth(const struct ubifs_info *c,
477 const struct ubifs_budget_req *req)
478{
479 int znodes;
480
481 znodes = req->new_ino + (req->new_page << UBIFS_BLOCKS_PER_PAGE_SHIFT) +
482 req->new_dent;
483 return znodes * c->max_idx_node_sz;
484}
485
486/**
487 * calc_data_growth - calculate approximate amount of new data from budgeting
488 * request.
489 * @c: UBIFS file-system description object
490 * @req: budgeting request
491 */
492static int calc_data_growth(const struct ubifs_info *c,
493 const struct ubifs_budget_req *req)
494{
495 int data_growth;
496
497 data_growth = req->new_ino ? c->inode_budget : 0;
498 if (req->new_page)
499 data_growth += c->page_budget;
500 if (req->new_dent)
501 data_growth += c->dent_budget;
502 data_growth += req->new_ino_d;
503 return data_growth;
504}
505
506/**
507 * calc_dd_growth - calculate approximate amount of data which makes other data
508 * dirty from budgeting request.
509 * @c: UBIFS file-system description object
510 * @req: budgeting request
511 */
512static int calc_dd_growth(const struct ubifs_info *c,
513 const struct ubifs_budget_req *req)
514{
515 int dd_growth;
516
517 dd_growth = req->dirtied_page ? c->page_budget : 0;
518
519 if (req->dirtied_ino)
520 dd_growth += c->inode_budget << (req->dirtied_ino - 1);
521 if (req->mod_dent)
522 dd_growth += c->dent_budget;
523 dd_growth += req->dirtied_ino_d;
524 return dd_growth;
525}
526
527/**
528 * ubifs_budget_space - ensure there is enough space to complete an operation.
529 * @c: UBIFS file-system description object
530 * @req: budget request
531 *
532 * This function allocates budget for an operation. It uses pessimistic
533 * approximation of how much flash space the operation needs. The goal of this
534 * function is to make sure UBIFS always has flash space to flush all dirty
535 * pages, dirty inodes, and dirty znodes (liability). This function may force
536 * commit, garbage-collection or write-back. Returns zero in case of success,
537 * %-ENOSPC if there is no free space and other negative error codes in case of
538 * failures.
539 */
540int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req)
541{
542 int uninitialized_var(cmt_retries), uninitialized_var(wb_retries);
543 int err, idx_growth, data_growth, dd_growth;
544 struct retries_info ri;
545
546 ubifs_assert(req->dirtied_ino <= 4);
547 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
548
549 data_growth = calc_data_growth(c, req);
550 dd_growth = calc_dd_growth(c, req);
551 if (!data_growth && !dd_growth)
552 return 0;
553 idx_growth = calc_idx_growth(c, req);
554 memset(&ri, 0, sizeof(struct retries_info));
555
556again:
557 spin_lock(&c->space_lock);
558 ubifs_assert(c->budg_idx_growth >= 0);
559 ubifs_assert(c->budg_data_growth >= 0);
560 ubifs_assert(c->budg_dd_growth >= 0);
561
562 if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) {
563 dbg_budg("no space");
564 spin_unlock(&c->space_lock);
565 return -ENOSPC;
566 }
567
568 c->budg_idx_growth += idx_growth;
569 c->budg_data_growth += data_growth;
570 c->budg_dd_growth += dd_growth;
571
572 err = do_budget_space(c);
573 if (likely(!err)) {
574 req->idx_growth = idx_growth;
575 req->data_growth = data_growth;
576 req->dd_growth = dd_growth;
577 spin_unlock(&c->space_lock);
578 return 0;
579 }
580
581 /* Restore the old values */
582 c->budg_idx_growth -= idx_growth;
583 c->budg_data_growth -= data_growth;
584 c->budg_dd_growth -= dd_growth;
585 spin_unlock(&c->space_lock);
586
587 if (req->fast) {
588 dbg_budg("no space for fast budgeting");
589 return err;
590 }
591
592 err = make_free_space(c, &ri);
593 if (err == -EAGAIN) {
594 dbg_budg("try again");
595 cond_resched();
596 goto again;
597 } else if (err == -ENOSPC) {
598 dbg_budg("FS is full, -ENOSPC");
599 c->nospace = 1;
600 if (can_use_rp(c) || c->rp_size == 0)
601 c->nospace_rp = 1;
602 smp_wmb();
603 } else
604 ubifs_err("cannot budget space, error %d", err);
605 return err;
606}
607
608/**
609 * ubifs_release_budget - release budgeted free space.
610 * @c: UBIFS file-system description object
611 * @req: budget request
612 *
613 * This function releases the space budgeted by 'ubifs_budget_space()'. Note,
614 * since the index changes (which were budgeted for in @req->idx_growth) will
615 * only be written to the media on commit, this function moves the index budget
616 * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be
617 * zeroed by the commit operation.
618 */
619void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req)
620{
621 ubifs_assert(req->dirtied_ino <= 4);
622 ubifs_assert(req->dirtied_ino_d <= UBIFS_MAX_INO_DATA * 4);
623 if (!req->recalculate) {
624 ubifs_assert(req->idx_growth >= 0);
625 ubifs_assert(req->data_growth >= 0);
626 ubifs_assert(req->dd_growth >= 0);
627 }
628
629 if (req->recalculate) {
630 req->data_growth = calc_data_growth(c, req);
631 req->dd_growth = calc_dd_growth(c, req);
632 req->idx_growth = calc_idx_growth(c, req);
633 }
634
635 if (!req->data_growth && !req->dd_growth)
636 return;
637
638 c->nospace = c->nospace_rp = 0;
639 smp_wmb();
640
641 spin_lock(&c->space_lock);
642 c->budg_idx_growth -= req->idx_growth;
643 c->budg_uncommitted_idx += req->idx_growth;
644 c->budg_data_growth -= req->data_growth;
645 c->budg_dd_growth -= req->dd_growth;
646 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
647
648 ubifs_assert(c->budg_idx_growth >= 0);
649 ubifs_assert(c->budg_data_growth >= 0);
650 ubifs_assert(c->min_idx_lebs < c->main_lebs);
651 spin_unlock(&c->space_lock);
652}
653
654/**
655 * ubifs_convert_page_budget - convert budget of a new page.
656 * @c: UBIFS file-system description object
657 *
658 * This function converts budget which was allocated for a new page of data to
659 * the budget of changing an existing page of data. The latter is smaller then
660 * the former, so this function only does simple re-calculation and does not
661 * involve any write-back.
662 */
663void ubifs_convert_page_budget(struct ubifs_info *c)
664{
665 spin_lock(&c->space_lock);
666 /* Release the index growth reservation */
667 c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT;
668 /* Release the data growth reservation */
669 c->budg_data_growth -= c->page_budget;
670 /* Increase the dirty data growth reservation instead */
671 c->budg_dd_growth += c->page_budget;
672 /* And re-calculate the indexing space reservation */
673 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
674 spin_unlock(&c->space_lock);
675}
676
677/**
678 * ubifs_release_dirty_inode_budget - release dirty inode budget.
679 * @c: UBIFS file-system description object
680 * @ui: UBIFS inode to release the budget for
681 *
682 * This function releases budget corresponding to a dirty inode. It is usually
683 * called when after the inode has been written to the media and marked as
684 * clean.
685 */
686void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
687 struct ubifs_inode *ui)
688{
689 struct ubifs_budget_req req = {.dd_growth = c->inode_budget,
690 .dirtied_ino_d = ui->data_len};
691
692 ubifs_release_budget(c, &req);
693}
694
695/**
696 * ubifs_budg_get_free_space - return amount of free space.
697 * @c: UBIFS file-system description object
698 *
699 * This function returns amount of free space on the file-system.
700 */
701long long ubifs_budg_get_free_space(struct ubifs_info *c)
702{
703 int min_idx_lebs, rsvd_idx_lebs;
704 long long available, outstanding, free;
705
706 /* Do exactly the same calculations as in 'do_budget_space()' */
707 spin_lock(&c->space_lock);
708 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
709
710 if (min_idx_lebs > c->lst.idx_lebs)
711 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
712 else
713 rsvd_idx_lebs = 0;
714
715 if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
716 - c->lst.taken_empty_lebs) {
717 spin_unlock(&c->space_lock);
718 return 0;
719 }
720
721 available = ubifs_calc_available(c, min_idx_lebs);
722 outstanding = c->budg_data_growth + c->budg_dd_growth;
723 c->min_idx_lebs = min_idx_lebs;
724 spin_unlock(&c->space_lock);
725
726 if (available > outstanding)
727 free = ubifs_reported_space(c, available - outstanding);
728 else
729 free = 0;
730 return free;
731}
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
new file mode 100644
index 00000000000..3b516316c9b
--- /dev/null
+++ b/fs/ubifs/commit.c
@@ -0,0 +1,677 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements functions that manage the running of the commit process.
25 * Each affected module has its own functions to accomplish their part in the
26 * commit and those functions are called here.
27 *
28 * The commit is the process whereby all updates to the index and LEB properties
29 * are written out together and the journal becomes empty. This keeps the
30 * file system consistent - at all times the state can be recreated by reading
31 * the index and LEB properties and then replaying the journal.
32 *
33 * The commit is split into two parts named "commit start" and "commit end".
34 * During commit start, the commit process has exclusive access to the journal
35 * by holding the commit semaphore down for writing. As few I/O operations as
36 * possible are performed during commit start, instead the nodes that are to be
37 * written are merely identified. During commit end, the commit semaphore is no
38 * longer held and the journal is again in operation, allowing users to continue
39 * to use the file system while the bulk of the commit I/O is performed. The
40 * purpose of this two-step approach is to prevent the commit from causing any
41 * latency blips. Note that in any case, the commit does not prevent lookups
42 * (as permitted by the TNC mutex), or access to VFS data structures e.g. page
43 * cache.
44 */
45
46#include <linux/freezer.h>
47#include <linux/kthread.h>
48#include "ubifs.h"
49
50/**
51 * do_commit - commit the journal.
52 * @c: UBIFS file-system description object
53 *
54 * This function implements UBIFS commit. It has to be called with commit lock
55 * locked. Returns zero in case of success and a negative error code in case of
56 * failure.
57 */
58static int do_commit(struct ubifs_info *c)
59{
60 int err, new_ltail_lnum, old_ltail_lnum, i;
61 struct ubifs_zbranch zroot;
62 struct ubifs_lp_stats lst;
63
64 dbg_cmt("start");
65 if (c->ro_media) {
66 err = -EROFS;
67 goto out_up;
68 }
69
70 /* Sync all write buffers (necessary for recovery) */
71 for (i = 0; i < c->jhead_cnt; i++) {
72 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
73 if (err)
74 goto out_up;
75 }
76
77 err = ubifs_gc_start_commit(c);
78 if (err)
79 goto out_up;
80 err = dbg_check_lprops(c);
81 if (err)
82 goto out_up;
83 err = ubifs_log_start_commit(c, &new_ltail_lnum);
84 if (err)
85 goto out_up;
86 err = ubifs_tnc_start_commit(c, &zroot);
87 if (err)
88 goto out_up;
89 err = ubifs_lpt_start_commit(c);
90 if (err)
91 goto out_up;
92 err = ubifs_orphan_start_commit(c);
93 if (err)
94 goto out_up;
95
96 ubifs_get_lp_stats(c, &lst);
97
98 up_write(&c->commit_sem);
99
100 err = ubifs_tnc_end_commit(c);
101 if (err)
102 goto out;
103 err = ubifs_lpt_end_commit(c);
104 if (err)
105 goto out;
106 err = ubifs_orphan_end_commit(c);
107 if (err)
108 goto out;
109 old_ltail_lnum = c->ltail_lnum;
110 err = ubifs_log_end_commit(c, new_ltail_lnum);
111 if (err)
112 goto out;
113 err = dbg_check_old_index(c, &zroot);
114 if (err)
115 goto out;
116
117 mutex_lock(&c->mst_mutex);
118 c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no);
119 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
120 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
121 c->mst_node->root_offs = cpu_to_le32(zroot.offs);
122 c->mst_node->root_len = cpu_to_le32(zroot.len);
123 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum);
124 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs);
125 c->mst_node->index_size = cpu_to_le64(c->old_idx_sz);
126 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum);
127 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs);
128 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum);
129 c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs);
130 c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum);
131 c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs);
132 c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum);
133 c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs);
134 c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum);
135 c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs);
136 c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs);
137 c->mst_node->total_free = cpu_to_le64(lst.total_free);
138 c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty);
139 c->mst_node->total_used = cpu_to_le64(lst.total_used);
140 c->mst_node->total_dead = cpu_to_le64(lst.total_dead);
141 c->mst_node->total_dark = cpu_to_le64(lst.total_dark);
142 if (c->no_orphs)
143 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
144 else
145 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
146 err = ubifs_write_master(c);
147 mutex_unlock(&c->mst_mutex);
148 if (err)
149 goto out;
150
151 err = ubifs_log_post_commit(c, old_ltail_lnum);
152 if (err)
153 goto out;
154 err = ubifs_gc_end_commit(c);
155 if (err)
156 goto out;
157 err = ubifs_lpt_post_commit(c);
158 if (err)
159 goto out;
160
161 spin_lock(&c->cs_lock);
162 c->cmt_state = COMMIT_RESTING;
163 wake_up(&c->cmt_wq);
164 dbg_cmt("commit end");
165 spin_unlock(&c->cs_lock);
166
167 return 0;
168
169out_up:
170 up_write(&c->commit_sem);
171out:
172 ubifs_err("commit failed, error %d", err);
173 spin_lock(&c->cs_lock);
174 c->cmt_state = COMMIT_BROKEN;
175 wake_up(&c->cmt_wq);
176 spin_unlock(&c->cs_lock);
177 ubifs_ro_mode(c, err);
178 return err;
179}
180
181/**
182 * run_bg_commit - run background commit if it is needed.
183 * @c: UBIFS file-system description object
184 *
185 * This function runs background commit if it is needed. Returns zero in case
186 * of success and a negative error code in case of failure.
187 */
188static int run_bg_commit(struct ubifs_info *c)
189{
190 spin_lock(&c->cs_lock);
191 /*
192 * Run background commit only if background commit was requested or if
193 * commit is required.
194 */
195 if (c->cmt_state != COMMIT_BACKGROUND &&
196 c->cmt_state != COMMIT_REQUIRED)
197 goto out;
198 spin_unlock(&c->cs_lock);
199
200 down_write(&c->commit_sem);
201 spin_lock(&c->cs_lock);
202 if (c->cmt_state == COMMIT_REQUIRED)
203 c->cmt_state = COMMIT_RUNNING_REQUIRED;
204 else if (c->cmt_state == COMMIT_BACKGROUND)
205 c->cmt_state = COMMIT_RUNNING_BACKGROUND;
206 else
207 goto out_cmt_unlock;
208 spin_unlock(&c->cs_lock);
209
210 return do_commit(c);
211
212out_cmt_unlock:
213 up_write(&c->commit_sem);
214out:
215 spin_unlock(&c->cs_lock);
216 return 0;
217}
218
219/**
220 * ubifs_bg_thread - UBIFS background thread function.
221 * @info: points to the file-system description object
222 *
223 * This function implements various file-system background activities:
224 * o when a write-buffer timer expires it synchronizes the appropriate
225 * write-buffer;
226 * o when the journal is about to be full, it starts in-advance commit.
227 *
228 * Note, other stuff like background garbage collection may be added here in
229 * future.
230 */
231int ubifs_bg_thread(void *info)
232{
233 int err;
234 struct ubifs_info *c = info;
235
236 ubifs_msg("background thread \"%s\" started, PID %d",
237 c->bgt_name, current->pid);
238 set_freezable();
239
240 while (1) {
241 if (kthread_should_stop())
242 break;
243
244 if (try_to_freeze())
245 continue;
246
247 set_current_state(TASK_INTERRUPTIBLE);
248 /* Check if there is something to do */
249 if (!c->need_bgt) {
250 /*
251 * Nothing prevents us from going sleep now and
252 * be never woken up and block the task which
253 * could wait in 'kthread_stop()' forever.
254 */
255 if (kthread_should_stop())
256 break;
257 schedule();
258 continue;
259 } else
260 __set_current_state(TASK_RUNNING);
261
262 c->need_bgt = 0;
263 err = ubifs_bg_wbufs_sync(c);
264 if (err)
265 ubifs_ro_mode(c, err);
266
267 run_bg_commit(c);
268 cond_resched();
269 }
270
271 dbg_msg("background thread \"%s\" stops", c->bgt_name);
272 return 0;
273}
274
275/**
276 * ubifs_commit_required - set commit state to "required".
277 * @c: UBIFS file-system description object
278 *
279 * This function is called if a commit is required but cannot be done from the
280 * calling function, so it is just flagged instead.
281 */
282void ubifs_commit_required(struct ubifs_info *c)
283{
284 spin_lock(&c->cs_lock);
285 switch (c->cmt_state) {
286 case COMMIT_RESTING:
287 case COMMIT_BACKGROUND:
288 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
289 dbg_cstate(COMMIT_REQUIRED));
290 c->cmt_state = COMMIT_REQUIRED;
291 break;
292 case COMMIT_RUNNING_BACKGROUND:
293 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
294 dbg_cstate(COMMIT_RUNNING_REQUIRED));
295 c->cmt_state = COMMIT_RUNNING_REQUIRED;
296 break;
297 case COMMIT_REQUIRED:
298 case COMMIT_RUNNING_REQUIRED:
299 case COMMIT_BROKEN:
300 break;
301 }
302 spin_unlock(&c->cs_lock);
303}
304
305/**
306 * ubifs_request_bg_commit - notify the background thread to do a commit.
307 * @c: UBIFS file-system description object
308 *
309 * This function is called if the journal is full enough to make a commit
310 * worthwhile, so background thread is kicked to start it.
311 */
312void ubifs_request_bg_commit(struct ubifs_info *c)
313{
314 spin_lock(&c->cs_lock);
315 if (c->cmt_state == COMMIT_RESTING) {
316 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state),
317 dbg_cstate(COMMIT_BACKGROUND));
318 c->cmt_state = COMMIT_BACKGROUND;
319 spin_unlock(&c->cs_lock);
320 ubifs_wake_up_bgt(c);
321 } else
322 spin_unlock(&c->cs_lock);
323}
324
325/**
326 * wait_for_commit - wait for commit.
327 * @c: UBIFS file-system description object
328 *
329 * This function sleeps until the commit operation is no longer running.
330 */
331static int wait_for_commit(struct ubifs_info *c)
332{
333 dbg_cmt("pid %d goes sleep", current->pid);
334
335 /*
336 * The following sleeps if the condition is false, and will be woken
337 * when the commit ends. It is possible, although very unlikely, that we
338 * will wake up and see the subsequent commit running, rather than the
339 * one we were waiting for, and go back to sleep. However, we will be
340 * woken again, so there is no danger of sleeping forever.
341 */
342 wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND &&
343 c->cmt_state != COMMIT_RUNNING_REQUIRED);
344 dbg_cmt("commit finished, pid %d woke up", current->pid);
345 return 0;
346}
347
348/**
349 * ubifs_run_commit - run or wait for commit.
350 * @c: UBIFS file-system description object
351 *
352 * This function runs commit and returns zero in case of success and a negative
353 * error code in case of failure.
354 */
355int ubifs_run_commit(struct ubifs_info *c)
356{
357 int err = 0;
358
359 spin_lock(&c->cs_lock);
360 if (c->cmt_state == COMMIT_BROKEN) {
361 err = -EINVAL;
362 goto out;
363 }
364
365 if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
366 /*
367 * We set the commit state to 'running required' to indicate
368 * that we want it to complete as quickly as possible.
369 */
370 c->cmt_state = COMMIT_RUNNING_REQUIRED;
371
372 if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
373 spin_unlock(&c->cs_lock);
374 return wait_for_commit(c);
375 }
376 spin_unlock(&c->cs_lock);
377
378 /* Ok, the commit is indeed needed */
379
380 down_write(&c->commit_sem);
381 spin_lock(&c->cs_lock);
382 /*
383 * Since we unlocked 'c->cs_lock', the state may have changed, so
384 * re-check it.
385 */
386 if (c->cmt_state == COMMIT_BROKEN) {
387 err = -EINVAL;
388 goto out_cmt_unlock;
389 }
390
391 if (c->cmt_state == COMMIT_RUNNING_BACKGROUND)
392 c->cmt_state = COMMIT_RUNNING_REQUIRED;
393
394 if (c->cmt_state == COMMIT_RUNNING_REQUIRED) {
395 up_write(&c->commit_sem);
396 spin_unlock(&c->cs_lock);
397 return wait_for_commit(c);
398 }
399 c->cmt_state = COMMIT_RUNNING_REQUIRED;
400 spin_unlock(&c->cs_lock);
401
402 err = do_commit(c);
403 return err;
404
405out_cmt_unlock:
406 up_write(&c->commit_sem);
407out:
408 spin_unlock(&c->cs_lock);
409 return err;
410}
411
412/**
413 * ubifs_gc_should_commit - determine if it is time for GC to run commit.
414 * @c: UBIFS file-system description object
415 *
416 * This function is called by garbage collection to determine if commit should
417 * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal
418 * is full enough to start commit, this function returns true. It is not
419 * absolutely necessary to commit yet, but it feels like this should be better
420 * then to keep doing GC. This function returns %1 if GC has to initiate commit
421 * and %0 if not.
422 */
423int ubifs_gc_should_commit(struct ubifs_info *c)
424{
425 int ret = 0;
426
427 spin_lock(&c->cs_lock);
428 if (c->cmt_state == COMMIT_BACKGROUND) {
429 dbg_cmt("commit required now");
430 c->cmt_state = COMMIT_REQUIRED;
431 } else
432 dbg_cmt("commit not requested");
433 if (c->cmt_state == COMMIT_REQUIRED)
434 ret = 1;
435 spin_unlock(&c->cs_lock);
436 return ret;
437}
438
439#ifdef CONFIG_UBIFS_FS_DEBUG
440
441/**
442 * struct idx_node - hold index nodes during index tree traversal.
443 * @list: list
444 * @iip: index in parent (slot number of this indexing node in the parent
445 * indexing node)
446 * @upper_key: all keys in this indexing node have to be less or equivalent to
447 * this key
448 * @idx: index node (8-byte aligned because all node structures must be 8-byte
449 * aligned)
450 */
451struct idx_node {
452 struct list_head list;
453 int iip;
454 union ubifs_key upper_key;
455 struct ubifs_idx_node idx __attribute__((aligned(8)));
456};
457
458/**
459 * dbg_old_index_check_init - get information for the next old index check.
460 * @c: UBIFS file-system description object
461 * @zroot: root of the index
462 *
463 * This function records information about the index that will be needed for the
464 * next old index check i.e. 'dbg_check_old_index()'.
465 *
466 * This function returns %0 on success and a negative error code on failure.
467 */
468int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot)
469{
470 struct ubifs_idx_node *idx;
471 int lnum, offs, len, err = 0;
472
473 c->old_zroot = *zroot;
474
475 lnum = c->old_zroot.lnum;
476 offs = c->old_zroot.offs;
477 len = c->old_zroot.len;
478
479 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
480 if (!idx)
481 return -ENOMEM;
482
483 err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
484 if (err)
485 goto out;
486
487 c->old_zroot_level = le16_to_cpu(idx->level);
488 c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum);
489out:
490 kfree(idx);
491 return err;
492}
493
494/**
495 * dbg_check_old_index - check the old copy of the index.
496 * @c: UBIFS file-system description object
497 * @zroot: root of the new index
498 *
499 * In order to be able to recover from an unclean unmount, a complete copy of
500 * the index must exist on flash. This is the "old" index. The commit process
501 * must write the "new" index to flash without overwriting or destroying any
502 * part of the old index. This function is run at commit end in order to check
503 * that the old index does indeed exist completely intact.
504 *
505 * This function returns %0 on success and a negative error code on failure.
506 */
507int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot)
508{
509 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt;
510 int first = 1, iip;
511 union ubifs_key lower_key, upper_key, l_key, u_key;
512 unsigned long long uninitialized_var(last_sqnum);
513 struct ubifs_idx_node *idx;
514 struct list_head list;
515 struct idx_node *i;
516 size_t sz;
517
518 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX))
519 goto out;
520
521 INIT_LIST_HEAD(&list);
522
523 sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) -
524 UBIFS_IDX_NODE_SZ;
525
526 /* Start at the old zroot */
527 lnum = c->old_zroot.lnum;
528 offs = c->old_zroot.offs;
529 len = c->old_zroot.len;
530 iip = 0;
531
532 /*
533 * Traverse the index tree preorder depth-first i.e. do a node and then
534 * its subtrees from left to right.
535 */
536 while (1) {
537 struct ubifs_branch *br;
538
539 /* Get the next index node */
540 i = kmalloc(sz, GFP_NOFS);
541 if (!i) {
542 err = -ENOMEM;
543 goto out_free;
544 }
545 i->iip = iip;
546 /* Keep the index nodes on our path in a linked list */
547 list_add_tail(&i->list, &list);
548 /* Read the index node */
549 idx = &i->idx;
550 err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
551 if (err)
552 goto out_free;
553 /* Validate index node */
554 child_cnt = le16_to_cpu(idx->child_cnt);
555 if (child_cnt < 1 || child_cnt > c->fanout) {
556 err = 1;
557 goto out_dump;
558 }
559 if (first) {
560 first = 0;
561 /* Check root level and sqnum */
562 if (le16_to_cpu(idx->level) != c->old_zroot_level) {
563 err = 2;
564 goto out_dump;
565 }
566 if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) {
567 err = 3;
568 goto out_dump;
569 }
570 /* Set last values as though root had a parent */
571 last_level = le16_to_cpu(idx->level) + 1;
572 last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1;
573 key_read(c, ubifs_idx_key(c, idx), &lower_key);
574 highest_ino_key(c, &upper_key, INUM_WATERMARK);
575 }
576 key_copy(c, &upper_key, &i->upper_key);
577 if (le16_to_cpu(idx->level) != last_level - 1) {
578 err = 3;
579 goto out_dump;
580 }
581 /*
582 * The index is always written bottom up hence a child's sqnum
583 * is always less than the parents.
584 */
585 if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) {
586 err = 4;
587 goto out_dump;
588 }
589 /* Check key range */
590 key_read(c, ubifs_idx_key(c, idx), &l_key);
591 br = ubifs_idx_branch(c, idx, child_cnt - 1);
592 key_read(c, &br->key, &u_key);
593 if (keys_cmp(c, &lower_key, &l_key) > 0) {
594 err = 5;
595 goto out_dump;
596 }
597 if (keys_cmp(c, &upper_key, &u_key) < 0) {
598 err = 6;
599 goto out_dump;
600 }
601 if (keys_cmp(c, &upper_key, &u_key) == 0)
602 if (!is_hash_key(c, &u_key)) {
603 err = 7;
604 goto out_dump;
605 }
606 /* Go to next index node */
607 if (le16_to_cpu(idx->level) == 0) {
608 /* At the bottom, so go up until can go right */
609 while (1) {
610 /* Drop the bottom of the list */
611 list_del(&i->list);
612 kfree(i);
613 /* No more list means we are done */
614 if (list_empty(&list))
615 goto out;
616 /* Look at the new bottom */
617 i = list_entry(list.prev, struct idx_node,
618 list);
619 idx = &i->idx;
620 /* Can we go right */
621 if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
622 iip = iip + 1;
623 break;
624 } else
625 /* Nope, so go up again */
626 iip = i->iip;
627 }
628 } else
629 /* Go down left */
630 iip = 0;
631 /*
632 * We have the parent in 'idx' and now we set up for reading the
633 * child pointed to by slot 'iip'.
634 */
635 last_level = le16_to_cpu(idx->level);
636 last_sqnum = le64_to_cpu(idx->ch.sqnum);
637 br = ubifs_idx_branch(c, idx, iip);
638 lnum = le32_to_cpu(br->lnum);
639 offs = le32_to_cpu(br->offs);
640 len = le32_to_cpu(br->len);
641 key_read(c, &br->key, &lower_key);
642 if (iip + 1 < le16_to_cpu(idx->child_cnt)) {
643 br = ubifs_idx_branch(c, idx, iip + 1);
644 key_read(c, &br->key, &upper_key);
645 } else
646 key_copy(c, &i->upper_key, &upper_key);
647 }
648out:
649 err = dbg_old_index_check_init(c, zroot);
650 if (err)
651 goto out_free;
652
653 return 0;
654
655out_dump:
656 dbg_err("dumping index node (iip=%d)", i->iip);
657 dbg_dump_node(c, idx);
658 list_del(&i->list);
659 kfree(i);
660 if (!list_empty(&list)) {
661 i = list_entry(list.prev, struct idx_node, list);
662 dbg_err("dumping parent index node");
663 dbg_dump_node(c, &i->idx);
664 }
665out_free:
666 while (!list_empty(&list)) {
667 i = list_entry(list.next, struct idx_node, list);
668 list_del(&i->list);
669 kfree(i);
670 }
671 ubifs_err("failed, error %d", err);
672 if (err > 0)
673 err = -EINVAL;
674 return err;
675}
676
677#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c
new file mode 100644
index 00000000000..5bb51dac3c1
--- /dev/null
+++ b/fs/ubifs/compress.c
@@ -0,0 +1,253 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 * Copyright (C) 2006, 2007 University of Szeged, Hungary
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Authors: Adrian Hunter
21 * Artem Bityutskiy (Битюцкий Артём)
22 * Zoltan Sogor
23 */
24
25/*
26 * This file provides a single place to access to compression and
27 * decompression.
28 */
29
30#include <linux/crypto.h>
31#include "ubifs.h"
32
33/* Fake description object for the "none" compressor */
34static struct ubifs_compressor none_compr = {
35 .compr_type = UBIFS_COMPR_NONE,
36 .name = "no compression",
37 .capi_name = "",
38};
39
40#ifdef CONFIG_UBIFS_FS_LZO
41static DEFINE_MUTEX(lzo_mutex);
42
43static struct ubifs_compressor lzo_compr = {
44 .compr_type = UBIFS_COMPR_LZO,
45 .comp_mutex = &lzo_mutex,
46 .name = "LZO",
47 .capi_name = "lzo",
48};
49#else
50static struct ubifs_compressor lzo_compr = {
51 .compr_type = UBIFS_COMPR_LZO,
52 .name = "LZO",
53};
54#endif
55
56#ifdef CONFIG_UBIFS_FS_ZLIB
57static DEFINE_MUTEX(deflate_mutex);
58static DEFINE_MUTEX(inflate_mutex);
59
60static struct ubifs_compressor zlib_compr = {
61 .compr_type = UBIFS_COMPR_ZLIB,
62 .comp_mutex = &deflate_mutex,
63 .decomp_mutex = &inflate_mutex,
64 .name = "zlib",
65 .capi_name = "deflate",
66};
67#else
68static struct ubifs_compressor zlib_compr = {
69 .compr_type = UBIFS_COMPR_ZLIB,
70 .name = "zlib",
71};
72#endif
73
74/* All UBIFS compressors */
75struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
76
77/**
78 * ubifs_compress - compress data.
79 * @in_buf: data to compress
80 * @in_len: length of the data to compress
81 * @out_buf: output buffer where compressed data should be stored
82 * @out_len: output buffer length is returned here
83 * @compr_type: type of compression to use on enter, actually used compression
84 * type on exit
85 *
86 * This function compresses input buffer @in_buf of length @in_len and stores
87 * the result in the output buffer @out_buf and the resulting length in
88 * @out_len. If the input buffer does not compress, it is just copied to the
89 * @out_buf. The same happens if @compr_type is %UBIFS_COMPR_NONE or if
90 * compression error occurred.
91 *
92 * Note, if the input buffer was not compressed, it is copied to the output
93 * buffer and %UBIFS_COMPR_NONE is returned in @compr_type.
94 *
95 * This functions returns %0 on success or a negative error code on failure.
96 */
97void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
98 int *compr_type)
99{
100 int err;
101 struct ubifs_compressor *compr = ubifs_compressors[*compr_type];
102
103 if (*compr_type == UBIFS_COMPR_NONE)
104 goto no_compr;
105
106 /* If the input data is small, do not even try to compress it */
107 if (in_len < UBIFS_MIN_COMPR_LEN)
108 goto no_compr;
109
110 if (compr->comp_mutex)
111 mutex_lock(compr->comp_mutex);
112 err = crypto_comp_compress(compr->cc, in_buf, in_len, out_buf,
113 out_len);
114 if (compr->comp_mutex)
115 mutex_unlock(compr->comp_mutex);
116 if (unlikely(err)) {
117 ubifs_warn("cannot compress %d bytes, compressor %s, "
118 "error %d, leave data uncompressed",
119 in_len, compr->name, err);
120 goto no_compr;
121 }
122
123 /*
124 * Presently, we just require that compression results in less data,
125 * rather than any defined minimum compression ratio or amount.
126 */
127 if (ALIGN(*out_len, 8) >= ALIGN(in_len, 8))
128 goto no_compr;
129
130 return;
131
132no_compr:
133 memcpy(out_buf, in_buf, in_len);
134 *out_len = in_len;
135 *compr_type = UBIFS_COMPR_NONE;
136}
137
138/**
139 * ubifs_decompress - decompress data.
140 * @in_buf: data to decompress
141 * @in_len: length of the data to decompress
142 * @out_buf: output buffer where decompressed data should
143 * @out_len: output length is returned here
144 * @compr_type: type of compression
145 *
146 * This function decompresses data from buffer @in_buf into buffer @out_buf.
147 * The length of the uncompressed data is returned in @out_len. This functions
148 * returns %0 on success or a negative error code on failure.
149 */
150int ubifs_decompress(const void *in_buf, int in_len, void *out_buf,
151 int *out_len, int compr_type)
152{
153 int err;
154 struct ubifs_compressor *compr;
155
156 if (unlikely(compr_type < 0 || compr_type >= UBIFS_COMPR_TYPES_CNT)) {
157 ubifs_err("invalid compression type %d", compr_type);
158 return -EINVAL;
159 }
160
161 compr = ubifs_compressors[compr_type];
162
163 if (unlikely(!compr->capi_name)) {
164 ubifs_err("%s compression is not compiled in", compr->name);
165 return -EINVAL;
166 }
167
168 if (compr_type == UBIFS_COMPR_NONE) {
169 memcpy(out_buf, in_buf, in_len);
170 *out_len = in_len;
171 return 0;
172 }
173
174 if (compr->decomp_mutex)
175 mutex_lock(compr->decomp_mutex);
176 err = crypto_comp_decompress(compr->cc, in_buf, in_len, out_buf,
177 out_len);
178 if (compr->decomp_mutex)
179 mutex_unlock(compr->decomp_mutex);
180 if (err)
181 ubifs_err("cannot decompress %d bytes, compressor %s, "
182 "error %d", in_len, compr->name, err);
183
184 return err;
185}
186
187/**
188 * compr_init - initialize a compressor.
189 * @compr: compressor description object
190 *
191 * This function initializes the requested compressor and returns zero in case
192 * of success or a negative error code in case of failure.
193 */
194static int __init compr_init(struct ubifs_compressor *compr)
195{
196 if (compr->capi_name) {
197 compr->cc = crypto_alloc_comp(compr->capi_name, 0, 0);
198 if (IS_ERR(compr->cc)) {
199 ubifs_err("cannot initialize compressor %s, error %ld",
200 compr->name, PTR_ERR(compr->cc));
201 return PTR_ERR(compr->cc);
202 }
203 }
204
205 ubifs_compressors[compr->compr_type] = compr;
206 return 0;
207}
208
209/**
210 * compr_exit - de-initialize a compressor.
211 * @compr: compressor description object
212 */
213static void compr_exit(struct ubifs_compressor *compr)
214{
215 if (compr->capi_name)
216 crypto_free_comp(compr->cc);
217 return;
218}
219
220/**
221 * ubifs_compressors_init - initialize UBIFS compressors.
222 *
223 * This function initializes the compressor which were compiled in. Returns
224 * zero in case of success and a negative error code in case of failure.
225 */
226int __init ubifs_compressors_init(void)
227{
228 int err;
229
230 err = compr_init(&lzo_compr);
231 if (err)
232 return err;
233
234 err = compr_init(&zlib_compr);
235 if (err)
236 goto out_lzo;
237
238 ubifs_compressors[UBIFS_COMPR_NONE] = &none_compr;
239 return 0;
240
241out_lzo:
242 compr_exit(&lzo_compr);
243 return err;
244}
245
246/**
247 * ubifs_compressors_exit - de-initialize UBIFS compressors.
248 */
249void __exit ubifs_compressors_exit(void)
250{
251 compr_exit(&lzo_compr);
252 compr_exit(&zlib_compr);
253}
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
new file mode 100644
index 00000000000..4e3aaeba4ec
--- /dev/null
+++ b/fs/ubifs/debug.c
@@ -0,0 +1,2289 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements most of the debugging stuff which is compiled in only
25 * when it is enabled. But some debugging check functions are implemented in
26 * corresponding subsystem, just because they are closely related and utilize
27 * various local functions of those subsystems.
28 */
29
30#define UBIFS_DBG_PRESERVE_UBI
31
32#include "ubifs.h"
33#include <linux/module.h>
34#include <linux/moduleparam.h>
35
36#ifdef CONFIG_UBIFS_FS_DEBUG
37
38DEFINE_SPINLOCK(dbg_lock);
39
40static char dbg_key_buf0[128];
41static char dbg_key_buf1[128];
42
43unsigned int ubifs_msg_flags = UBIFS_MSG_FLAGS_DEFAULT;
44unsigned int ubifs_chk_flags = UBIFS_CHK_FLAGS_DEFAULT;
45unsigned int ubifs_tst_flags;
46
47module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR);
48module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR);
49module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR);
50
51MODULE_PARM_DESC(debug_msgs, "Debug message type flags");
52MODULE_PARM_DESC(debug_chks, "Debug check flags");
53MODULE_PARM_DESC(debug_tsts, "Debug special test flags");
54
55static const char *get_key_fmt(int fmt)
56{
57 switch (fmt) {
58 case UBIFS_SIMPLE_KEY_FMT:
59 return "simple";
60 default:
61 return "unknown/invalid format";
62 }
63}
64
65static const char *get_key_hash(int hash)
66{
67 switch (hash) {
68 case UBIFS_KEY_HASH_R5:
69 return "R5";
70 case UBIFS_KEY_HASH_TEST:
71 return "test";
72 default:
73 return "unknown/invalid name hash";
74 }
75}
76
77static const char *get_key_type(int type)
78{
79 switch (type) {
80 case UBIFS_INO_KEY:
81 return "inode";
82 case UBIFS_DENT_KEY:
83 return "direntry";
84 case UBIFS_XENT_KEY:
85 return "xentry";
86 case UBIFS_DATA_KEY:
87 return "data";
88 case UBIFS_TRUN_KEY:
89 return "truncate";
90 default:
91 return "unknown/invalid key";
92 }
93}
94
95static void sprintf_key(const struct ubifs_info *c, const union ubifs_key *key,
96 char *buffer)
97{
98 char *p = buffer;
99 int type = key_type(c, key);
100
101 if (c->key_fmt == UBIFS_SIMPLE_KEY_FMT) {
102 switch (type) {
103 case UBIFS_INO_KEY:
104 sprintf(p, "(%lu, %s)", key_inum(c, key),
105 get_key_type(type));
106 break;
107 case UBIFS_DENT_KEY:
108 case UBIFS_XENT_KEY:
109 sprintf(p, "(%lu, %s, %#08x)", key_inum(c, key),
110 get_key_type(type), key_hash(c, key));
111 break;
112 case UBIFS_DATA_KEY:
113 sprintf(p, "(%lu, %s, %u)", key_inum(c, key),
114 get_key_type(type), key_block(c, key));
115 break;
116 case UBIFS_TRUN_KEY:
117 sprintf(p, "(%lu, %s)",
118 key_inum(c, key), get_key_type(type));
119 break;
120 default:
121 sprintf(p, "(bad key type: %#08x, %#08x)",
122 key->u32[0], key->u32[1]);
123 }
124 } else
125 sprintf(p, "bad key format %d", c->key_fmt);
126}
127
128const char *dbg_key_str0(const struct ubifs_info *c, const union ubifs_key *key)
129{
130 /* dbg_lock must be held */
131 sprintf_key(c, key, dbg_key_buf0);
132 return dbg_key_buf0;
133}
134
135const char *dbg_key_str1(const struct ubifs_info *c, const union ubifs_key *key)
136{
137 /* dbg_lock must be held */
138 sprintf_key(c, key, dbg_key_buf1);
139 return dbg_key_buf1;
140}
141
142const char *dbg_ntype(int type)
143{
144 switch (type) {
145 case UBIFS_PAD_NODE:
146 return "padding node";
147 case UBIFS_SB_NODE:
148 return "superblock node";
149 case UBIFS_MST_NODE:
150 return "master node";
151 case UBIFS_REF_NODE:
152 return "reference node";
153 case UBIFS_INO_NODE:
154 return "inode node";
155 case UBIFS_DENT_NODE:
156 return "direntry node";
157 case UBIFS_XENT_NODE:
158 return "xentry node";
159 case UBIFS_DATA_NODE:
160 return "data node";
161 case UBIFS_TRUN_NODE:
162 return "truncate node";
163 case UBIFS_IDX_NODE:
164 return "indexing node";
165 case UBIFS_CS_NODE:
166 return "commit start node";
167 case UBIFS_ORPH_NODE:
168 return "orphan node";
169 default:
170 return "unknown node";
171 }
172}
173
174static const char *dbg_gtype(int type)
175{
176 switch (type) {
177 case UBIFS_NO_NODE_GROUP:
178 return "no node group";
179 case UBIFS_IN_NODE_GROUP:
180 return "in node group";
181 case UBIFS_LAST_OF_NODE_GROUP:
182 return "last of node group";
183 default:
184 return "unknown";
185 }
186}
187
188const char *dbg_cstate(int cmt_state)
189{
190 switch (cmt_state) {
191 case COMMIT_RESTING:
192 return "commit resting";
193 case COMMIT_BACKGROUND:
194 return "background commit requested";
195 case COMMIT_REQUIRED:
196 return "commit required";
197 case COMMIT_RUNNING_BACKGROUND:
198 return "BACKGROUND commit running";
199 case COMMIT_RUNNING_REQUIRED:
200 return "commit running and required";
201 case COMMIT_BROKEN:
202 return "broken commit";
203 default:
204 return "unknown commit state";
205 }
206}
207
208static void dump_ch(const struct ubifs_ch *ch)
209{
210 printk(KERN_DEBUG "\tmagic %#x\n", le32_to_cpu(ch->magic));
211 printk(KERN_DEBUG "\tcrc %#x\n", le32_to_cpu(ch->crc));
212 printk(KERN_DEBUG "\tnode_type %d (%s)\n", ch->node_type,
213 dbg_ntype(ch->node_type));
214 printk(KERN_DEBUG "\tgroup_type %d (%s)\n", ch->group_type,
215 dbg_gtype(ch->group_type));
216 printk(KERN_DEBUG "\tsqnum %llu\n",
217 (unsigned long long)le64_to_cpu(ch->sqnum));
218 printk(KERN_DEBUG "\tlen %u\n", le32_to_cpu(ch->len));
219}
220
221void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode)
222{
223 const struct ubifs_inode *ui = ubifs_inode(inode);
224
225 printk(KERN_DEBUG "inode %lu\n", inode->i_ino);
226 printk(KERN_DEBUG "size %llu\n",
227 (unsigned long long)i_size_read(inode));
228 printk(KERN_DEBUG "nlink %u\n", inode->i_nlink);
229 printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid);
230 printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid);
231 printk(KERN_DEBUG "atime %u.%u\n",
232 (unsigned int)inode->i_atime.tv_sec,
233 (unsigned int)inode->i_atime.tv_nsec);
234 printk(KERN_DEBUG "mtime %u.%u\n",
235 (unsigned int)inode->i_mtime.tv_sec,
236 (unsigned int)inode->i_mtime.tv_nsec);
237 printk(KERN_DEBUG "ctime %u.%u\n",
238 (unsigned int)inode->i_ctime.tv_sec,
239 (unsigned int)inode->i_ctime.tv_nsec);
240 printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum);
241 printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size);
242 printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt);
243 printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names);
244 printk(KERN_DEBUG "dirty %u\n", ui->dirty);
245 printk(KERN_DEBUG "xattr %u\n", ui->xattr);
246 printk(KERN_DEBUG "flags %d\n", ui->flags);
247 printk(KERN_DEBUG "compr_type %d\n", ui->compr_type);
248 printk(KERN_DEBUG "data_len %d\n", ui->data_len);
249}
250
251void dbg_dump_node(const struct ubifs_info *c, const void *node)
252{
253 int i, n;
254 union ubifs_key key;
255 const struct ubifs_ch *ch = node;
256
257 if (dbg_failure_mode)
258 return;
259
260 /* If the magic is incorrect, just hexdump the first bytes */
261 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC) {
262 printk(KERN_DEBUG "Not a node, first %zu bytes:", UBIFS_CH_SZ);
263 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 1,
264 (void *)node, UBIFS_CH_SZ, 1);
265 return;
266 }
267
268 spin_lock(&dbg_lock);
269 dump_ch(node);
270
271 switch (ch->node_type) {
272 case UBIFS_PAD_NODE:
273 {
274 const struct ubifs_pad_node *pad = node;
275
276 printk(KERN_DEBUG "\tpad_len %u\n",
277 le32_to_cpu(pad->pad_len));
278 break;
279 }
280 case UBIFS_SB_NODE:
281 {
282 const struct ubifs_sb_node *sup = node;
283 unsigned int sup_flags = le32_to_cpu(sup->flags);
284
285 printk(KERN_DEBUG "\tkey_hash %d (%s)\n",
286 (int)sup->key_hash, get_key_hash(sup->key_hash));
287 printk(KERN_DEBUG "\tkey_fmt %d (%s)\n",
288 (int)sup->key_fmt, get_key_fmt(sup->key_fmt));
289 printk(KERN_DEBUG "\tflags %#x\n", sup_flags);
290 printk(KERN_DEBUG "\t big_lpt %u\n",
291 !!(sup_flags & UBIFS_FLG_BIGLPT));
292 printk(KERN_DEBUG "\tmin_io_size %u\n",
293 le32_to_cpu(sup->min_io_size));
294 printk(KERN_DEBUG "\tleb_size %u\n",
295 le32_to_cpu(sup->leb_size));
296 printk(KERN_DEBUG "\tleb_cnt %u\n",
297 le32_to_cpu(sup->leb_cnt));
298 printk(KERN_DEBUG "\tmax_leb_cnt %u\n",
299 le32_to_cpu(sup->max_leb_cnt));
300 printk(KERN_DEBUG "\tmax_bud_bytes %llu\n",
301 (unsigned long long)le64_to_cpu(sup->max_bud_bytes));
302 printk(KERN_DEBUG "\tlog_lebs %u\n",
303 le32_to_cpu(sup->log_lebs));
304 printk(KERN_DEBUG "\tlpt_lebs %u\n",
305 le32_to_cpu(sup->lpt_lebs));
306 printk(KERN_DEBUG "\torph_lebs %u\n",
307 le32_to_cpu(sup->orph_lebs));
308 printk(KERN_DEBUG "\tjhead_cnt %u\n",
309 le32_to_cpu(sup->jhead_cnt));
310 printk(KERN_DEBUG "\tfanout %u\n",
311 le32_to_cpu(sup->fanout));
312 printk(KERN_DEBUG "\tlsave_cnt %u\n",
313 le32_to_cpu(sup->lsave_cnt));
314 printk(KERN_DEBUG "\tdefault_compr %u\n",
315 (int)le16_to_cpu(sup->default_compr));
316 printk(KERN_DEBUG "\trp_size %llu\n",
317 (unsigned long long)le64_to_cpu(sup->rp_size));
318 printk(KERN_DEBUG "\trp_uid %u\n",
319 le32_to_cpu(sup->rp_uid));
320 printk(KERN_DEBUG "\trp_gid %u\n",
321 le32_to_cpu(sup->rp_gid));
322 printk(KERN_DEBUG "\tfmt_version %u\n",
323 le32_to_cpu(sup->fmt_version));
324 printk(KERN_DEBUG "\ttime_gran %u\n",
325 le32_to_cpu(sup->time_gran));
326 printk(KERN_DEBUG "\tUUID %02X%02X%02X%02X-%02X%02X"
327 "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X\n",
328 sup->uuid[0], sup->uuid[1], sup->uuid[2], sup->uuid[3],
329 sup->uuid[4], sup->uuid[5], sup->uuid[6], sup->uuid[7],
330 sup->uuid[8], sup->uuid[9], sup->uuid[10], sup->uuid[11],
331 sup->uuid[12], sup->uuid[13], sup->uuid[14],
332 sup->uuid[15]);
333 break;
334 }
335 case UBIFS_MST_NODE:
336 {
337 const struct ubifs_mst_node *mst = node;
338
339 printk(KERN_DEBUG "\thighest_inum %llu\n",
340 (unsigned long long)le64_to_cpu(mst->highest_inum));
341 printk(KERN_DEBUG "\tcommit number %llu\n",
342 (unsigned long long)le64_to_cpu(mst->cmt_no));
343 printk(KERN_DEBUG "\tflags %#x\n",
344 le32_to_cpu(mst->flags));
345 printk(KERN_DEBUG "\tlog_lnum %u\n",
346 le32_to_cpu(mst->log_lnum));
347 printk(KERN_DEBUG "\troot_lnum %u\n",
348 le32_to_cpu(mst->root_lnum));
349 printk(KERN_DEBUG "\troot_offs %u\n",
350 le32_to_cpu(mst->root_offs));
351 printk(KERN_DEBUG "\troot_len %u\n",
352 le32_to_cpu(mst->root_len));
353 printk(KERN_DEBUG "\tgc_lnum %u\n",
354 le32_to_cpu(mst->gc_lnum));
355 printk(KERN_DEBUG "\tihead_lnum %u\n",
356 le32_to_cpu(mst->ihead_lnum));
357 printk(KERN_DEBUG "\tihead_offs %u\n",
358 le32_to_cpu(mst->ihead_offs));
359 printk(KERN_DEBUG "\tindex_size %u\n",
360 le32_to_cpu(mst->index_size));
361 printk(KERN_DEBUG "\tlpt_lnum %u\n",
362 le32_to_cpu(mst->lpt_lnum));
363 printk(KERN_DEBUG "\tlpt_offs %u\n",
364 le32_to_cpu(mst->lpt_offs));
365 printk(KERN_DEBUG "\tnhead_lnum %u\n",
366 le32_to_cpu(mst->nhead_lnum));
367 printk(KERN_DEBUG "\tnhead_offs %u\n",
368 le32_to_cpu(mst->nhead_offs));
369 printk(KERN_DEBUG "\tltab_lnum %u\n",
370 le32_to_cpu(mst->ltab_lnum));
371 printk(KERN_DEBUG "\tltab_offs %u\n",
372 le32_to_cpu(mst->ltab_offs));
373 printk(KERN_DEBUG "\tlsave_lnum %u\n",
374 le32_to_cpu(mst->lsave_lnum));
375 printk(KERN_DEBUG "\tlsave_offs %u\n",
376 le32_to_cpu(mst->lsave_offs));
377 printk(KERN_DEBUG "\tlscan_lnum %u\n",
378 le32_to_cpu(mst->lscan_lnum));
379 printk(KERN_DEBUG "\tleb_cnt %u\n",
380 le32_to_cpu(mst->leb_cnt));
381 printk(KERN_DEBUG "\tempty_lebs %u\n",
382 le32_to_cpu(mst->empty_lebs));
383 printk(KERN_DEBUG "\tidx_lebs %u\n",
384 le32_to_cpu(mst->idx_lebs));
385 printk(KERN_DEBUG "\ttotal_free %llu\n",
386 (unsigned long long)le64_to_cpu(mst->total_free));
387 printk(KERN_DEBUG "\ttotal_dirty %llu\n",
388 (unsigned long long)le64_to_cpu(mst->total_dirty));
389 printk(KERN_DEBUG "\ttotal_used %llu\n",
390 (unsigned long long)le64_to_cpu(mst->total_used));
391 printk(KERN_DEBUG "\ttotal_dead %llu\n",
392 (unsigned long long)le64_to_cpu(mst->total_dead));
393 printk(KERN_DEBUG "\ttotal_dark %llu\n",
394 (unsigned long long)le64_to_cpu(mst->total_dark));
395 break;
396 }
397 case UBIFS_REF_NODE:
398 {
399 const struct ubifs_ref_node *ref = node;
400
401 printk(KERN_DEBUG "\tlnum %u\n",
402 le32_to_cpu(ref->lnum));
403 printk(KERN_DEBUG "\toffs %u\n",
404 le32_to_cpu(ref->offs));
405 printk(KERN_DEBUG "\tjhead %u\n",
406 le32_to_cpu(ref->jhead));
407 break;
408 }
409 case UBIFS_INO_NODE:
410 {
411 const struct ubifs_ino_node *ino = node;
412
413 key_read(c, &ino->key, &key);
414 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
415 printk(KERN_DEBUG "\tcreat_sqnum %llu\n",
416 (unsigned long long)le64_to_cpu(ino->creat_sqnum));
417 printk(KERN_DEBUG "\tsize %llu\n",
418 (unsigned long long)le64_to_cpu(ino->size));
419 printk(KERN_DEBUG "\tnlink %u\n",
420 le32_to_cpu(ino->nlink));
421 printk(KERN_DEBUG "\tatime %lld.%u\n",
422 (long long)le64_to_cpu(ino->atime_sec),
423 le32_to_cpu(ino->atime_nsec));
424 printk(KERN_DEBUG "\tmtime %lld.%u\n",
425 (long long)le64_to_cpu(ino->mtime_sec),
426 le32_to_cpu(ino->mtime_nsec));
427 printk(KERN_DEBUG "\tctime %lld.%u\n",
428 (long long)le64_to_cpu(ino->ctime_sec),
429 le32_to_cpu(ino->ctime_nsec));
430 printk(KERN_DEBUG "\tuid %u\n",
431 le32_to_cpu(ino->uid));
432 printk(KERN_DEBUG "\tgid %u\n",
433 le32_to_cpu(ino->gid));
434 printk(KERN_DEBUG "\tmode %u\n",
435 le32_to_cpu(ino->mode));
436 printk(KERN_DEBUG "\tflags %#x\n",
437 le32_to_cpu(ino->flags));
438 printk(KERN_DEBUG "\txattr_cnt %u\n",
439 le32_to_cpu(ino->xattr_cnt));
440 printk(KERN_DEBUG "\txattr_size %u\n",
441 le32_to_cpu(ino->xattr_size));
442 printk(KERN_DEBUG "\txattr_names %u\n",
443 le32_to_cpu(ino->xattr_names));
444 printk(KERN_DEBUG "\tcompr_type %#x\n",
445 (int)le16_to_cpu(ino->compr_type));
446 printk(KERN_DEBUG "\tdata len %u\n",
447 le32_to_cpu(ino->data_len));
448 break;
449 }
450 case UBIFS_DENT_NODE:
451 case UBIFS_XENT_NODE:
452 {
453 const struct ubifs_dent_node *dent = node;
454 int nlen = le16_to_cpu(dent->nlen);
455
456 key_read(c, &dent->key, &key);
457 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
458 printk(KERN_DEBUG "\tinum %llu\n",
459 (unsigned long long)le64_to_cpu(dent->inum));
460 printk(KERN_DEBUG "\ttype %d\n", (int)dent->type);
461 printk(KERN_DEBUG "\tnlen %d\n", nlen);
462 printk(KERN_DEBUG "\tname ");
463
464 if (nlen > UBIFS_MAX_NLEN)
465 printk(KERN_DEBUG "(bad name length, not printing, "
466 "bad or corrupted node)");
467 else {
468 for (i = 0; i < nlen && dent->name[i]; i++)
469 printk("%c", dent->name[i]);
470 }
471 printk("\n");
472
473 break;
474 }
475 case UBIFS_DATA_NODE:
476 {
477 const struct ubifs_data_node *dn = node;
478 int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
479
480 key_read(c, &dn->key, &key);
481 printk(KERN_DEBUG "\tkey %s\n", DBGKEY(&key));
482 printk(KERN_DEBUG "\tsize %u\n",
483 le32_to_cpu(dn->size));
484 printk(KERN_DEBUG "\tcompr_typ %d\n",
485 (int)le16_to_cpu(dn->compr_type));
486 printk(KERN_DEBUG "\tdata size %d\n",
487 dlen);
488 printk(KERN_DEBUG "\tdata:\n");
489 print_hex_dump(KERN_DEBUG, "\t", DUMP_PREFIX_OFFSET, 32, 1,
490 (void *)&dn->data, dlen, 0);
491 break;
492 }
493 case UBIFS_TRUN_NODE:
494 {
495 const struct ubifs_trun_node *trun = node;
496
497 printk(KERN_DEBUG "\tinum %u\n",
498 le32_to_cpu(trun->inum));
499 printk(KERN_DEBUG "\told_size %llu\n",
500 (unsigned long long)le64_to_cpu(trun->old_size));
501 printk(KERN_DEBUG "\tnew_size %llu\n",
502 (unsigned long long)le64_to_cpu(trun->new_size));
503 break;
504 }
505 case UBIFS_IDX_NODE:
506 {
507 const struct ubifs_idx_node *idx = node;
508
509 n = le16_to_cpu(idx->child_cnt);
510 printk(KERN_DEBUG "\tchild_cnt %d\n", n);
511 printk(KERN_DEBUG "\tlevel %d\n",
512 (int)le16_to_cpu(idx->level));
513 printk(KERN_DEBUG "\tBranches:\n");
514
515 for (i = 0; i < n && i < c->fanout - 1; i++) {
516 const struct ubifs_branch *br;
517
518 br = ubifs_idx_branch(c, idx, i);
519 key_read(c, &br->key, &key);
520 printk(KERN_DEBUG "\t%d: LEB %d:%d len %d key %s\n",
521 i, le32_to_cpu(br->lnum), le32_to_cpu(br->offs),
522 le32_to_cpu(br->len), DBGKEY(&key));
523 }
524 break;
525 }
526 case UBIFS_CS_NODE:
527 break;
528 case UBIFS_ORPH_NODE:
529 {
530 const struct ubifs_orph_node *orph = node;
531
532 printk(KERN_DEBUG "\tcommit number %llu\n",
533 (unsigned long long)
534 le64_to_cpu(orph->cmt_no) & LLONG_MAX);
535 printk(KERN_DEBUG "\tlast node flag %llu\n",
536 (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
537 n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
538 printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n);
539 for (i = 0; i < n; i++)
540 printk(KERN_DEBUG "\t ino %llu\n",
541 le64_to_cpu(orph->inos[i]));
542 break;
543 }
544 default:
545 printk(KERN_DEBUG "node type %d was not recognized\n",
546 (int)ch->node_type);
547 }
548 spin_unlock(&dbg_lock);
549}
550
551void dbg_dump_budget_req(const struct ubifs_budget_req *req)
552{
553 spin_lock(&dbg_lock);
554 printk(KERN_DEBUG "Budgeting request: new_ino %d, dirtied_ino %d\n",
555 req->new_ino, req->dirtied_ino);
556 printk(KERN_DEBUG "\tnew_ino_d %d, dirtied_ino_d %d\n",
557 req->new_ino_d, req->dirtied_ino_d);
558 printk(KERN_DEBUG "\tnew_page %d, dirtied_page %d\n",
559 req->new_page, req->dirtied_page);
560 printk(KERN_DEBUG "\tnew_dent %d, mod_dent %d\n",
561 req->new_dent, req->mod_dent);
562 printk(KERN_DEBUG "\tidx_growth %d\n", req->idx_growth);
563 printk(KERN_DEBUG "\tdata_growth %d dd_growth %d\n",
564 req->data_growth, req->dd_growth);
565 spin_unlock(&dbg_lock);
566}
567
568void dbg_dump_lstats(const struct ubifs_lp_stats *lst)
569{
570 spin_lock(&dbg_lock);
571 printk(KERN_DEBUG "Lprops statistics: empty_lebs %d, idx_lebs %d\n",
572 lst->empty_lebs, lst->idx_lebs);
573 printk(KERN_DEBUG "\ttaken_empty_lebs %d, total_free %lld, "
574 "total_dirty %lld\n", lst->taken_empty_lebs, lst->total_free,
575 lst->total_dirty);
576 printk(KERN_DEBUG "\ttotal_used %lld, total_dark %lld, "
577 "total_dead %lld\n", lst->total_used, lst->total_dark,
578 lst->total_dead);
579 spin_unlock(&dbg_lock);
580}
581
582void dbg_dump_budg(struct ubifs_info *c)
583{
584 int i;
585 struct rb_node *rb;
586 struct ubifs_bud *bud;
587 struct ubifs_gced_idx_leb *idx_gc;
588
589 spin_lock(&dbg_lock);
590 printk(KERN_DEBUG "Budgeting info: budg_data_growth %lld, "
591 "budg_dd_growth %lld, budg_idx_growth %lld\n",
592 c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth);
593 printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, "
594 "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth,
595 c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth,
596 c->freeable_cnt);
597 printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, "
598 "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs,
599 c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt);
600 printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, "
601 "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt),
602 atomic_long_read(&c->dirty_zn_cnt),
603 atomic_long_read(&c->clean_zn_cnt));
604 printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n",
605 c->dark_wm, c->dead_wm, c->max_idx_node_sz);
606 printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n",
607 c->gc_lnum, c->ihead_lnum);
608 for (i = 0; i < c->jhead_cnt; i++)
609 printk(KERN_DEBUG "\tjhead %d\t LEB %d\n",
610 c->jheads[i].wbuf.jhead, c->jheads[i].wbuf.lnum);
611 for (rb = rb_first(&c->buds); rb; rb = rb_next(rb)) {
612 bud = rb_entry(rb, struct ubifs_bud, rb);
613 printk(KERN_DEBUG "\tbud LEB %d\n", bud->lnum);
614 }
615 list_for_each_entry(bud, &c->old_buds, list)
616 printk(KERN_DEBUG "\told bud LEB %d\n", bud->lnum);
617 list_for_each_entry(idx_gc, &c->idx_gc, list)
618 printk(KERN_DEBUG "\tGC'ed idx LEB %d unmap %d\n",
619 idx_gc->lnum, idx_gc->unmap);
620 printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state);
621 spin_unlock(&dbg_lock);
622}
623
624void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp)
625{
626 printk(KERN_DEBUG "LEB %d lprops: free %d, dirty %d (used %d), "
627 "flags %#x\n", lp->lnum, lp->free, lp->dirty,
628 c->leb_size - lp->free - lp->dirty, lp->flags);
629}
630
631void dbg_dump_lprops(struct ubifs_info *c)
632{
633 int lnum, err;
634 struct ubifs_lprops lp;
635 struct ubifs_lp_stats lst;
636
637 printk(KERN_DEBUG "Dumping LEB properties\n");
638 ubifs_get_lp_stats(c, &lst);
639 dbg_dump_lstats(&lst);
640
641 for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) {
642 err = ubifs_read_one_lp(c, lnum, &lp);
643 if (err)
644 ubifs_err("cannot read lprops for LEB %d", lnum);
645
646 dbg_dump_lprop(c, &lp);
647 }
648}
649
650void dbg_dump_leb(const struct ubifs_info *c, int lnum)
651{
652 struct ubifs_scan_leb *sleb;
653 struct ubifs_scan_node *snod;
654
655 if (dbg_failure_mode)
656 return;
657
658 printk(KERN_DEBUG "Dumping LEB %d\n", lnum);
659
660 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
661 if (IS_ERR(sleb)) {
662 ubifs_err("scan error %d", (int)PTR_ERR(sleb));
663 return;
664 }
665
666 printk(KERN_DEBUG "LEB %d has %d nodes ending at %d\n", lnum,
667 sleb->nodes_cnt, sleb->endpt);
668
669 list_for_each_entry(snod, &sleb->nodes, list) {
670 cond_resched();
671 printk(KERN_DEBUG "Dumping node at LEB %d:%d len %d\n", lnum,
672 snod->offs, snod->len);
673 dbg_dump_node(c, snod->node);
674 }
675
676 ubifs_scan_destroy(sleb);
677 return;
678}
679
680void dbg_dump_znode(const struct ubifs_info *c,
681 const struct ubifs_znode *znode)
682{
683 int n;
684 const struct ubifs_zbranch *zbr;
685
686 spin_lock(&dbg_lock);
687 if (znode->parent)
688 zbr = &znode->parent->zbranch[znode->iip];
689 else
690 zbr = &c->zroot;
691
692 printk(KERN_DEBUG "znode %p, LEB %d:%d len %d parent %p iip %d level %d"
693 " child_cnt %d flags %lx\n", znode, zbr->lnum, zbr->offs,
694 zbr->len, znode->parent, znode->iip, znode->level,
695 znode->child_cnt, znode->flags);
696
697 if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
698 spin_unlock(&dbg_lock);
699 return;
700 }
701
702 printk(KERN_DEBUG "zbranches:\n");
703 for (n = 0; n < znode->child_cnt; n++) {
704 zbr = &znode->zbranch[n];
705 if (znode->level > 0)
706 printk(KERN_DEBUG "\t%d: znode %p LEB %d:%d len %d key "
707 "%s\n", n, zbr->znode, zbr->lnum,
708 zbr->offs, zbr->len,
709 DBGKEY(&zbr->key));
710 else
711 printk(KERN_DEBUG "\t%d: LNC %p LEB %d:%d len %d key "
712 "%s\n", n, zbr->znode, zbr->lnum,
713 zbr->offs, zbr->len,
714 DBGKEY(&zbr->key));
715 }
716 spin_unlock(&dbg_lock);
717}
718
719void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat)
720{
721 int i;
722
723 printk(KERN_DEBUG "Dumping heap cat %d (%d elements)\n",
724 cat, heap->cnt);
725 for (i = 0; i < heap->cnt; i++) {
726 struct ubifs_lprops *lprops = heap->arr[i];
727
728 printk(KERN_DEBUG "\t%d. LEB %d hpos %d free %d dirty %d "
729 "flags %d\n", i, lprops->lnum, lprops->hpos,
730 lprops->free, lprops->dirty, lprops->flags);
731 }
732}
733
734void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
735 struct ubifs_nnode *parent, int iip)
736{
737 int i;
738
739 printk(KERN_DEBUG "Dumping pnode:\n");
740 printk(KERN_DEBUG "\taddress %zx parent %zx cnext %zx\n",
741 (size_t)pnode, (size_t)parent, (size_t)pnode->cnext);
742 printk(KERN_DEBUG "\tflags %lu iip %d level %d num %d\n",
743 pnode->flags, iip, pnode->level, pnode->num);
744 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
745 struct ubifs_lprops *lp = &pnode->lprops[i];
746
747 printk(KERN_DEBUG "\t%d: free %d dirty %d flags %d lnum %d\n",
748 i, lp->free, lp->dirty, lp->flags, lp->lnum);
749 }
750}
751
752void dbg_dump_tnc(struct ubifs_info *c)
753{
754 struct ubifs_znode *znode;
755 int level;
756
757 printk(KERN_DEBUG "\n");
758 printk(KERN_DEBUG "Dumping the TNC tree\n");
759 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
760 level = znode->level;
761 printk(KERN_DEBUG "== Level %d ==\n", level);
762 while (znode) {
763 if (level != znode->level) {
764 level = znode->level;
765 printk(KERN_DEBUG "== Level %d ==\n", level);
766 }
767 dbg_dump_znode(c, znode);
768 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
769 }
770
771 printk(KERN_DEBUG "\n");
772}
773
774static int dump_znode(struct ubifs_info *c, struct ubifs_znode *znode,
775 void *priv)
776{
777 dbg_dump_znode(c, znode);
778 return 0;
779}
780
781/**
782 * dbg_dump_index - dump the on-flash index.
783 * @c: UBIFS file-system description object
784 *
785 * This function dumps whole UBIFS indexing B-tree, unlike 'dbg_dump_tnc()'
786 * which dumps only in-memory znodes and does not read znodes which from flash.
787 */
788void dbg_dump_index(struct ubifs_info *c)
789{
790 dbg_walk_index(c, NULL, dump_znode, NULL);
791}
792
793/**
794 * dbg_check_synced_i_size - check synchronized inode size.
795 * @inode: inode to check
796 *
797 * If inode is clean, synchronized inode size has to be equivalent to current
798 * inode size. This function has to be called only for locked inodes (@i_mutex
799 * has to be locked). Returns %0 if synchronized inode size if correct, and
800 * %-EINVAL if not.
801 */
802int dbg_check_synced_i_size(struct inode *inode)
803{
804 int err = 0;
805 struct ubifs_inode *ui = ubifs_inode(inode);
806
807 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
808 return 0;
809 if (!S_ISREG(inode->i_mode))
810 return 0;
811
812 mutex_lock(&ui->ui_mutex);
813 spin_lock(&ui->ui_lock);
814 if (ui->ui_size != ui->synced_i_size && !ui->dirty) {
815 ubifs_err("ui_size is %lld, synced_i_size is %lld, but inode "
816 "is clean", ui->ui_size, ui->synced_i_size);
817 ubifs_err("i_ino %lu, i_mode %#x, i_size %lld", inode->i_ino,
818 inode->i_mode, i_size_read(inode));
819 dbg_dump_stack();
820 err = -EINVAL;
821 }
822 spin_unlock(&ui->ui_lock);
823 mutex_unlock(&ui->ui_mutex);
824 return err;
825}
826
827/*
828 * dbg_check_dir - check directory inode size and link count.
829 * @c: UBIFS file-system description object
830 * @dir: the directory to calculate size for
831 * @size: the result is returned here
832 *
833 * This function makes sure that directory size and link count are correct.
834 * Returns zero in case of success and a negative error code in case of
835 * failure.
836 *
837 * Note, it is good idea to make sure the @dir->i_mutex is locked before
838 * calling this function.
839 */
840int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir)
841{
842 unsigned int nlink = 2;
843 union ubifs_key key;
844 struct ubifs_dent_node *dent, *pdent = NULL;
845 struct qstr nm = { .name = NULL };
846 loff_t size = UBIFS_INO_NODE_SZ;
847
848 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
849 return 0;
850
851 if (!S_ISDIR(dir->i_mode))
852 return 0;
853
854 lowest_dent_key(c, &key, dir->i_ino);
855 while (1) {
856 int err;
857
858 dent = ubifs_tnc_next_ent(c, &key, &nm);
859 if (IS_ERR(dent)) {
860 err = PTR_ERR(dent);
861 if (err == -ENOENT)
862 break;
863 return err;
864 }
865
866 nm.name = dent->name;
867 nm.len = le16_to_cpu(dent->nlen);
868 size += CALC_DENT_SIZE(nm.len);
869 if (dent->type == UBIFS_ITYPE_DIR)
870 nlink += 1;
871 kfree(pdent);
872 pdent = dent;
873 key_read(c, &dent->key, &key);
874 }
875 kfree(pdent);
876
877 if (i_size_read(dir) != size) {
878 ubifs_err("directory inode %lu has size %llu, "
879 "but calculated size is %llu", dir->i_ino,
880 (unsigned long long)i_size_read(dir),
881 (unsigned long long)size);
882 dump_stack();
883 return -EINVAL;
884 }
885 if (dir->i_nlink != nlink) {
886 ubifs_err("directory inode %lu has nlink %u, but calculated "
887 "nlink is %u", dir->i_ino, dir->i_nlink, nlink);
888 dump_stack();
889 return -EINVAL;
890 }
891
892 return 0;
893}
894
895/**
896 * dbg_check_key_order - make sure that colliding keys are properly ordered.
897 * @c: UBIFS file-system description object
898 * @zbr1: first zbranch
899 * @zbr2: following zbranch
900 *
901 * In UBIFS indexing B-tree colliding keys has to be sorted in binary order of
902 * names of the direntries/xentries which are referred by the keys. This
903 * function reads direntries/xentries referred by @zbr1 and @zbr2 and makes
904 * sure the name of direntry/xentry referred by @zbr1 is less than
905 * direntry/xentry referred by @zbr2. Returns zero if this is true, %1 if not,
906 * and a negative error code in case of failure.
907 */
908static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
909 struct ubifs_zbranch *zbr2)
910{
911 int err, nlen1, nlen2, cmp;
912 struct ubifs_dent_node *dent1, *dent2;
913 union ubifs_key key;
914
915 ubifs_assert(!keys_cmp(c, &zbr1->key, &zbr2->key));
916 dent1 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
917 if (!dent1)
918 return -ENOMEM;
919 dent2 = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
920 if (!dent2) {
921 err = -ENOMEM;
922 goto out_free;
923 }
924
925 err = ubifs_tnc_read_node(c, zbr1, dent1);
926 if (err)
927 goto out_free;
928 err = ubifs_validate_entry(c, dent1);
929 if (err)
930 goto out_free;
931
932 err = ubifs_tnc_read_node(c, zbr2, dent2);
933 if (err)
934 goto out_free;
935 err = ubifs_validate_entry(c, dent2);
936 if (err)
937 goto out_free;
938
939 /* Make sure node keys are the same as in zbranch */
940 err = 1;
941 key_read(c, &dent1->key, &key);
942 if (keys_cmp(c, &zbr1->key, &key)) {
943 dbg_err("1st entry at %d:%d has key %s", zbr1->lnum,
944 zbr1->offs, DBGKEY(&key));
945 dbg_err("but it should have key %s according to tnc",
946 DBGKEY(&zbr1->key));
947 dbg_dump_node(c, dent1);
948 goto out_free;
949 }
950
951 key_read(c, &dent2->key, &key);
952 if (keys_cmp(c, &zbr2->key, &key)) {
953 dbg_err("2nd entry at %d:%d has key %s", zbr1->lnum,
954 zbr1->offs, DBGKEY(&key));
955 dbg_err("but it should have key %s according to tnc",
956 DBGKEY(&zbr2->key));
957 dbg_dump_node(c, dent2);
958 goto out_free;
959 }
960
961 nlen1 = le16_to_cpu(dent1->nlen);
962 nlen2 = le16_to_cpu(dent2->nlen);
963
964 cmp = memcmp(dent1->name, dent2->name, min_t(int, nlen1, nlen2));
965 if (cmp < 0 || (cmp == 0 && nlen1 < nlen2)) {
966 err = 0;
967 goto out_free;
968 }
969 if (cmp == 0 && nlen1 == nlen2)
970 dbg_err("2 xent/dent nodes with the same name");
971 else
972 dbg_err("bad order of colliding key %s",
973 DBGKEY(&key));
974
975 dbg_msg("first node at %d:%d\n", zbr1->lnum, zbr1->offs);
976 dbg_dump_node(c, dent1);
977 dbg_msg("second node at %d:%d\n", zbr2->lnum, zbr2->offs);
978 dbg_dump_node(c, dent2);
979
980out_free:
981 kfree(dent2);
982 kfree(dent1);
983 return err;
984}
985
986/**
987 * dbg_check_znode - check if znode is all right.
988 * @c: UBIFS file-system description object
989 * @zbr: zbranch which points to this znode
990 *
991 * This function makes sure that znode referred to by @zbr is all right.
992 * Returns zero if it is, and %-EINVAL if it is not.
993 */
994static int dbg_check_znode(struct ubifs_info *c, struct ubifs_zbranch *zbr)
995{
996 struct ubifs_znode *znode = zbr->znode;
997 struct ubifs_znode *zp = znode->parent;
998 int n, err, cmp;
999
1000 if (znode->child_cnt <= 0 || znode->child_cnt > c->fanout) {
1001 err = 1;
1002 goto out;
1003 }
1004 if (znode->level < 0) {
1005 err = 2;
1006 goto out;
1007 }
1008 if (znode->iip < 0 || znode->iip >= c->fanout) {
1009 err = 3;
1010 goto out;
1011 }
1012
1013 if (zbr->len == 0)
1014 /* Only dirty zbranch may have no on-flash nodes */
1015 if (!ubifs_zn_dirty(znode)) {
1016 err = 4;
1017 goto out;
1018 }
1019
1020 if (ubifs_zn_dirty(znode)) {
1021 /*
1022 * If znode is dirty, its parent has to be dirty as well. The
1023 * order of the operation is important, so we have to have
1024 * memory barriers.
1025 */
1026 smp_mb();
1027 if (zp && !ubifs_zn_dirty(zp)) {
1028 /*
1029 * The dirty flag is atomic and is cleared outside the
1030 * TNC mutex, so znode's dirty flag may now have
1031 * been cleared. The child is always cleared before the
1032 * parent, so we just need to check again.
1033 */
1034 smp_mb();
1035 if (ubifs_zn_dirty(znode)) {
1036 err = 5;
1037 goto out;
1038 }
1039 }
1040 }
1041
1042 if (zp) {
1043 const union ubifs_key *min, *max;
1044
1045 if (znode->level != zp->level - 1) {
1046 err = 6;
1047 goto out;
1048 }
1049
1050 /* Make sure the 'parent' pointer in our znode is correct */
1051 err = ubifs_search_zbranch(c, zp, &zbr->key, &n);
1052 if (!err) {
1053 /* This zbranch does not exist in the parent */
1054 err = 7;
1055 goto out;
1056 }
1057
1058 if (znode->iip >= zp->child_cnt) {
1059 err = 8;
1060 goto out;
1061 }
1062
1063 if (znode->iip != n) {
1064 /* This may happen only in case of collisions */
1065 if (keys_cmp(c, &zp->zbranch[n].key,
1066 &zp->zbranch[znode->iip].key)) {
1067 err = 9;
1068 goto out;
1069 }
1070 n = znode->iip;
1071 }
1072
1073 /*
1074 * Make sure that the first key in our znode is greater than or
1075 * equal to the key in the pointing zbranch.
1076 */
1077 min = &zbr->key;
1078 cmp = keys_cmp(c, min, &znode->zbranch[0].key);
1079 if (cmp == 1) {
1080 err = 10;
1081 goto out;
1082 }
1083
1084 if (n + 1 < zp->child_cnt) {
1085 max = &zp->zbranch[n + 1].key;
1086
1087 /*
1088 * Make sure the last key in our znode is less or
1089 * equivalent than the the key in zbranch which goes
1090 * after our pointing zbranch.
1091 */
1092 cmp = keys_cmp(c, max,
1093 &znode->zbranch[znode->child_cnt - 1].key);
1094 if (cmp == -1) {
1095 err = 11;
1096 goto out;
1097 }
1098 }
1099 } else {
1100 /* This may only be root znode */
1101 if (zbr != &c->zroot) {
1102 err = 12;
1103 goto out;
1104 }
1105 }
1106
1107 /*
1108 * Make sure that next key is greater or equivalent then the previous
1109 * one.
1110 */
1111 for (n = 1; n < znode->child_cnt; n++) {
1112 cmp = keys_cmp(c, &znode->zbranch[n - 1].key,
1113 &znode->zbranch[n].key);
1114 if (cmp > 0) {
1115 err = 13;
1116 goto out;
1117 }
1118 if (cmp == 0) {
1119 /* This can only be keys with colliding hash */
1120 if (!is_hash_key(c, &znode->zbranch[n].key)) {
1121 err = 14;
1122 goto out;
1123 }
1124
1125 if (znode->level != 0 || c->replaying)
1126 continue;
1127
1128 /*
1129 * Colliding keys should follow binary order of
1130 * corresponding xentry/dentry names.
1131 */
1132 err = dbg_check_key_order(c, &znode->zbranch[n - 1],
1133 &znode->zbranch[n]);
1134 if (err < 0)
1135 return err;
1136 if (err) {
1137 err = 15;
1138 goto out;
1139 }
1140 }
1141 }
1142
1143 for (n = 0; n < znode->child_cnt; n++) {
1144 if (!znode->zbranch[n].znode &&
1145 (znode->zbranch[n].lnum == 0 ||
1146 znode->zbranch[n].len == 0)) {
1147 err = 16;
1148 goto out;
1149 }
1150
1151 if (znode->zbranch[n].lnum != 0 &&
1152 znode->zbranch[n].len == 0) {
1153 err = 17;
1154 goto out;
1155 }
1156
1157 if (znode->zbranch[n].lnum == 0 &&
1158 znode->zbranch[n].len != 0) {
1159 err = 18;
1160 goto out;
1161 }
1162
1163 if (znode->zbranch[n].lnum == 0 &&
1164 znode->zbranch[n].offs != 0) {
1165 err = 19;
1166 goto out;
1167 }
1168
1169 if (znode->level != 0 && znode->zbranch[n].znode)
1170 if (znode->zbranch[n].znode->parent != znode) {
1171 err = 20;
1172 goto out;
1173 }
1174 }
1175
1176 return 0;
1177
1178out:
1179 ubifs_err("failed, error %d", err);
1180 ubifs_msg("dump of the znode");
1181 dbg_dump_znode(c, znode);
1182 if (zp) {
1183 ubifs_msg("dump of the parent znode");
1184 dbg_dump_znode(c, zp);
1185 }
1186 dump_stack();
1187 return -EINVAL;
1188}
1189
1190/**
1191 * dbg_check_tnc - check TNC tree.
1192 * @c: UBIFS file-system description object
1193 * @extra: do extra checks that are possible at start commit
1194 *
1195 * This function traverses whole TNC tree and checks every znode. Returns zero
1196 * if everything is all right and %-EINVAL if something is wrong with TNC.
1197 */
1198int dbg_check_tnc(struct ubifs_info *c, int extra)
1199{
1200 struct ubifs_znode *znode;
1201 long clean_cnt = 0, dirty_cnt = 0;
1202 int err, last;
1203
1204 if (!(ubifs_chk_flags & UBIFS_CHK_TNC))
1205 return 0;
1206
1207 ubifs_assert(mutex_is_locked(&c->tnc_mutex));
1208 if (!c->zroot.znode)
1209 return 0;
1210
1211 znode = ubifs_tnc_postorder_first(c->zroot.znode);
1212 while (1) {
1213 struct ubifs_znode *prev;
1214 struct ubifs_zbranch *zbr;
1215
1216 if (!znode->parent)
1217 zbr = &c->zroot;
1218 else
1219 zbr = &znode->parent->zbranch[znode->iip];
1220
1221 err = dbg_check_znode(c, zbr);
1222 if (err)
1223 return err;
1224
1225 if (extra) {
1226 if (ubifs_zn_dirty(znode))
1227 dirty_cnt += 1;
1228 else
1229 clean_cnt += 1;
1230 }
1231
1232 prev = znode;
1233 znode = ubifs_tnc_postorder_next(znode);
1234 if (!znode)
1235 break;
1236
1237 /*
1238 * If the last key of this znode is equivalent to the first key
1239 * of the next znode (collision), then check order of the keys.
1240 */
1241 last = prev->child_cnt - 1;
1242 if (prev->level == 0 && znode->level == 0 && !c->replaying &&
1243 !keys_cmp(c, &prev->zbranch[last].key,
1244 &znode->zbranch[0].key)) {
1245 err = dbg_check_key_order(c, &prev->zbranch[last],
1246 &znode->zbranch[0]);
1247 if (err < 0)
1248 return err;
1249 if (err) {
1250 ubifs_msg("first znode");
1251 dbg_dump_znode(c, prev);
1252 ubifs_msg("second znode");
1253 dbg_dump_znode(c, znode);
1254 return -EINVAL;
1255 }
1256 }
1257 }
1258
1259 if (extra) {
1260 if (clean_cnt != atomic_long_read(&c->clean_zn_cnt)) {
1261 ubifs_err("incorrect clean_zn_cnt %ld, calculated %ld",
1262 atomic_long_read(&c->clean_zn_cnt),
1263 clean_cnt);
1264 return -EINVAL;
1265 }
1266 if (dirty_cnt != atomic_long_read(&c->dirty_zn_cnt)) {
1267 ubifs_err("incorrect dirty_zn_cnt %ld, calculated %ld",
1268 atomic_long_read(&c->dirty_zn_cnt),
1269 dirty_cnt);
1270 return -EINVAL;
1271 }
1272 }
1273
1274 return 0;
1275}
1276
1277/**
1278 * dbg_walk_index - walk the on-flash index.
1279 * @c: UBIFS file-system description object
1280 * @leaf_cb: called for each leaf node
1281 * @znode_cb: called for each indexing node
1282 * @priv: private date which is passed to callbacks
1283 *
1284 * This function walks the UBIFS index and calls the @leaf_cb for each leaf
1285 * node and @znode_cb for each indexing node. Returns zero in case of success
1286 * and a negative error code in case of failure.
1287 *
1288 * It would be better if this function removed every znode it pulled to into
1289 * the TNC, so that the behavior more closely matched the non-debugging
1290 * behavior.
1291 */
1292int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
1293 dbg_znode_callback znode_cb, void *priv)
1294{
1295 int err;
1296 struct ubifs_zbranch *zbr;
1297 struct ubifs_znode *znode, *child;
1298
1299 mutex_lock(&c->tnc_mutex);
1300 /* If the root indexing node is not in TNC - pull it */
1301 if (!c->zroot.znode) {
1302 c->zroot.znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
1303 if (IS_ERR(c->zroot.znode)) {
1304 err = PTR_ERR(c->zroot.znode);
1305 c->zroot.znode = NULL;
1306 goto out_unlock;
1307 }
1308 }
1309
1310 /*
1311 * We are going to traverse the indexing tree in the postorder manner.
1312 * Go down and find the leftmost indexing node where we are going to
1313 * start from.
1314 */
1315 znode = c->zroot.znode;
1316 while (znode->level > 0) {
1317 zbr = &znode->zbranch[0];
1318 child = zbr->znode;
1319 if (!child) {
1320 child = ubifs_load_znode(c, zbr, znode, 0);
1321 if (IS_ERR(child)) {
1322 err = PTR_ERR(child);
1323 goto out_unlock;
1324 }
1325 zbr->znode = child;
1326 }
1327
1328 znode = child;
1329 }
1330
1331 /* Iterate over all indexing nodes */
1332 while (1) {
1333 int idx;
1334
1335 cond_resched();
1336
1337 if (znode_cb) {
1338 err = znode_cb(c, znode, priv);
1339 if (err) {
1340 ubifs_err("znode checking function returned "
1341 "error %d", err);
1342 dbg_dump_znode(c, znode);
1343 goto out_dump;
1344 }
1345 }
1346 if (leaf_cb && znode->level == 0) {
1347 for (idx = 0; idx < znode->child_cnt; idx++) {
1348 zbr = &znode->zbranch[idx];
1349 err = leaf_cb(c, zbr, priv);
1350 if (err) {
1351 ubifs_err("leaf checking function "
1352 "returned error %d, for leaf "
1353 "at LEB %d:%d",
1354 err, zbr->lnum, zbr->offs);
1355 goto out_dump;
1356 }
1357 }
1358 }
1359
1360 if (!znode->parent)
1361 break;
1362
1363 idx = znode->iip + 1;
1364 znode = znode->parent;
1365 if (idx < znode->child_cnt) {
1366 /* Switch to the next index in the parent */
1367 zbr = &znode->zbranch[idx];
1368 child = zbr->znode;
1369 if (!child) {
1370 child = ubifs_load_znode(c, zbr, znode, idx);
1371 if (IS_ERR(child)) {
1372 err = PTR_ERR(child);
1373 goto out_unlock;
1374 }
1375 zbr->znode = child;
1376 }
1377 znode = child;
1378 } else
1379 /*
1380 * This is the last child, switch to the parent and
1381 * continue.
1382 */
1383 continue;
1384
1385 /* Go to the lowest leftmost znode in the new sub-tree */
1386 while (znode->level > 0) {
1387 zbr = &znode->zbranch[0];
1388 child = zbr->znode;
1389 if (!child) {
1390 child = ubifs_load_znode(c, zbr, znode, 0);
1391 if (IS_ERR(child)) {
1392 err = PTR_ERR(child);
1393 goto out_unlock;
1394 }
1395 zbr->znode = child;
1396 }
1397 znode = child;
1398 }
1399 }
1400
1401 mutex_unlock(&c->tnc_mutex);
1402 return 0;
1403
1404out_dump:
1405 if (znode->parent)
1406 zbr = &znode->parent->zbranch[znode->iip];
1407 else
1408 zbr = &c->zroot;
1409 ubifs_msg("dump of znode at LEB %d:%d", zbr->lnum, zbr->offs);
1410 dbg_dump_znode(c, znode);
1411out_unlock:
1412 mutex_unlock(&c->tnc_mutex);
1413 return err;
1414}
1415
1416/**
1417 * add_size - add znode size to partially calculated index size.
1418 * @c: UBIFS file-system description object
1419 * @znode: znode to add size for
1420 * @priv: partially calculated index size
1421 *
1422 * This is a helper function for 'dbg_check_idx_size()' which is called for
1423 * every indexing node and adds its size to the 'long long' variable pointed to
1424 * by @priv.
1425 */
1426static int add_size(struct ubifs_info *c, struct ubifs_znode *znode, void *priv)
1427{
1428 long long *idx_size = priv;
1429 int add;
1430
1431 add = ubifs_idx_node_sz(c, znode->child_cnt);
1432 add = ALIGN(add, 8);
1433 *idx_size += add;
1434 return 0;
1435}
1436
1437/**
1438 * dbg_check_idx_size - check index size.
1439 * @c: UBIFS file-system description object
1440 * @idx_size: size to check
1441 *
1442 * This function walks the UBIFS index, calculates its size and checks that the
1443 * size is equivalent to @idx_size. Returns zero in case of success and a
1444 * negative error code in case of failure.
1445 */
1446int dbg_check_idx_size(struct ubifs_info *c, long long idx_size)
1447{
1448 int err;
1449 long long calc = 0;
1450
1451 if (!(ubifs_chk_flags & UBIFS_CHK_IDX_SZ))
1452 return 0;
1453
1454 err = dbg_walk_index(c, NULL, add_size, &calc);
1455 if (err) {
1456 ubifs_err("error %d while walking the index", err);
1457 return err;
1458 }
1459
1460 if (calc != idx_size) {
1461 ubifs_err("index size check failed: calculated size is %lld, "
1462 "should be %lld", calc, idx_size);
1463 dump_stack();
1464 return -EINVAL;
1465 }
1466
1467 return 0;
1468}
1469
1470/**
1471 * struct fsck_inode - information about an inode used when checking the file-system.
1472 * @rb: link in the RB-tree of inodes
1473 * @inum: inode number
1474 * @mode: inode type, permissions, etc
1475 * @nlink: inode link count
1476 * @xattr_cnt: count of extended attributes
1477 * @references: how many directory/xattr entries refer this inode (calculated
1478 * while walking the index)
1479 * @calc_cnt: for directory inode count of child directories
1480 * @size: inode size (read from on-flash inode)
1481 * @xattr_sz: summary size of all extended attributes (read from on-flash
1482 * inode)
1483 * @calc_sz: for directories calculated directory size
1484 * @calc_xcnt: count of extended attributes
1485 * @calc_xsz: calculated summary size of all extended attributes
1486 * @xattr_nms: sum of lengths of all extended attribute names belonging to this
1487 * inode (read from on-flash inode)
1488 * @calc_xnms: calculated sum of lengths of all extended attribute names
1489 */
1490struct fsck_inode {
1491 struct rb_node rb;
1492 ino_t inum;
1493 umode_t mode;
1494 unsigned int nlink;
1495 unsigned int xattr_cnt;
1496 int references;
1497 int calc_cnt;
1498 long long size;
1499 unsigned int xattr_sz;
1500 long long calc_sz;
1501 long long calc_xcnt;
1502 long long calc_xsz;
1503 unsigned int xattr_nms;
1504 long long calc_xnms;
1505};
1506
1507/**
1508 * struct fsck_data - private FS checking information.
1509 * @inodes: RB-tree of all inodes (contains @struct fsck_inode objects)
1510 */
1511struct fsck_data {
1512 struct rb_root inodes;
1513};
1514
1515/**
1516 * add_inode - add inode information to RB-tree of inodes.
1517 * @c: UBIFS file-system description object
1518 * @fsckd: FS checking information
1519 * @ino: raw UBIFS inode to add
1520 *
1521 * This is a helper function for 'check_leaf()' which adds information about
1522 * inode @ino to the RB-tree of inodes. Returns inode information pointer in
1523 * case of success and a negative error code in case of failure.
1524 */
1525static struct fsck_inode *add_inode(struct ubifs_info *c,
1526 struct fsck_data *fsckd,
1527 struct ubifs_ino_node *ino)
1528{
1529 struct rb_node **p, *parent = NULL;
1530 struct fsck_inode *fscki;
1531 ino_t inum = key_inum_flash(c, &ino->key);
1532
1533 p = &fsckd->inodes.rb_node;
1534 while (*p) {
1535 parent = *p;
1536 fscki = rb_entry(parent, struct fsck_inode, rb);
1537 if (inum < fscki->inum)
1538 p = &(*p)->rb_left;
1539 else if (inum > fscki->inum)
1540 p = &(*p)->rb_right;
1541 else
1542 return fscki;
1543 }
1544
1545 if (inum > c->highest_inum) {
1546 ubifs_err("too high inode number, max. is %lu",
1547 c->highest_inum);
1548 return ERR_PTR(-EINVAL);
1549 }
1550
1551 fscki = kzalloc(sizeof(struct fsck_inode), GFP_NOFS);
1552 if (!fscki)
1553 return ERR_PTR(-ENOMEM);
1554
1555 fscki->inum = inum;
1556 fscki->nlink = le32_to_cpu(ino->nlink);
1557 fscki->size = le64_to_cpu(ino->size);
1558 fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
1559 fscki->xattr_sz = le32_to_cpu(ino->xattr_size);
1560 fscki->xattr_nms = le32_to_cpu(ino->xattr_names);
1561 fscki->mode = le32_to_cpu(ino->mode);
1562 if (S_ISDIR(fscki->mode)) {
1563 fscki->calc_sz = UBIFS_INO_NODE_SZ;
1564 fscki->calc_cnt = 2;
1565 }
1566 rb_link_node(&fscki->rb, parent, p);
1567 rb_insert_color(&fscki->rb, &fsckd->inodes);
1568 return fscki;
1569}
1570
1571/**
1572 * search_inode - search inode in the RB-tree of inodes.
1573 * @fsckd: FS checking information
1574 * @inum: inode number to search
1575 *
1576 * This is a helper function for 'check_leaf()' which searches inode @inum in
1577 * the RB-tree of inodes and returns an inode information pointer or %NULL if
1578 * the inode was not found.
1579 */
1580static struct fsck_inode *search_inode(struct fsck_data *fsckd, ino_t inum)
1581{
1582 struct rb_node *p;
1583 struct fsck_inode *fscki;
1584
1585 p = fsckd->inodes.rb_node;
1586 while (p) {
1587 fscki = rb_entry(p, struct fsck_inode, rb);
1588 if (inum < fscki->inum)
1589 p = p->rb_left;
1590 else if (inum > fscki->inum)
1591 p = p->rb_right;
1592 else
1593 return fscki;
1594 }
1595 return NULL;
1596}
1597
1598/**
1599 * read_add_inode - read inode node and add it to RB-tree of inodes.
1600 * @c: UBIFS file-system description object
1601 * @fsckd: FS checking information
1602 * @inum: inode number to read
1603 *
1604 * This is a helper function for 'check_leaf()' which finds inode node @inum in
1605 * the index, reads it, and adds it to the RB-tree of inodes. Returns inode
1606 * information pointer in case of success and a negative error code in case of
1607 * failure.
1608 */
1609static struct fsck_inode *read_add_inode(struct ubifs_info *c,
1610 struct fsck_data *fsckd, ino_t inum)
1611{
1612 int n, err;
1613 union ubifs_key key;
1614 struct ubifs_znode *znode;
1615 struct ubifs_zbranch *zbr;
1616 struct ubifs_ino_node *ino;
1617 struct fsck_inode *fscki;
1618
1619 fscki = search_inode(fsckd, inum);
1620 if (fscki)
1621 return fscki;
1622
1623 ino_key_init(c, &key, inum);
1624 err = ubifs_lookup_level0(c, &key, &znode, &n);
1625 if (!err) {
1626 ubifs_err("inode %lu not found in index", inum);
1627 return ERR_PTR(-ENOENT);
1628 } else if (err < 0) {
1629 ubifs_err("error %d while looking up inode %lu", err, inum);
1630 return ERR_PTR(err);
1631 }
1632
1633 zbr = &znode->zbranch[n];
1634 if (zbr->len < UBIFS_INO_NODE_SZ) {
1635 ubifs_err("bad node %lu node length %d", inum, zbr->len);
1636 return ERR_PTR(-EINVAL);
1637 }
1638
1639 ino = kmalloc(zbr->len, GFP_NOFS);
1640 if (!ino)
1641 return ERR_PTR(-ENOMEM);
1642
1643 err = ubifs_tnc_read_node(c, zbr, ino);
1644 if (err) {
1645 ubifs_err("cannot read inode node at LEB %d:%d, error %d",
1646 zbr->lnum, zbr->offs, err);
1647 kfree(ino);
1648 return ERR_PTR(err);
1649 }
1650
1651 fscki = add_inode(c, fsckd, ino);
1652 kfree(ino);
1653 if (IS_ERR(fscki)) {
1654 ubifs_err("error %ld while adding inode %lu node",
1655 PTR_ERR(fscki), inum);
1656 return fscki;
1657 }
1658
1659 return fscki;
1660}
1661
1662/**
1663 * check_leaf - check leaf node.
1664 * @c: UBIFS file-system description object
1665 * @zbr: zbranch of the leaf node to check
1666 * @priv: FS checking information
1667 *
1668 * This is a helper function for 'dbg_check_filesystem()' which is called for
1669 * every single leaf node while walking the indexing tree. It checks that the
1670 * leaf node referred from the indexing tree exists, has correct CRC, and does
1671 * some other basic validation. This function is also responsible for building
1672 * an RB-tree of inodes - it adds all inodes into the RB-tree. It also
1673 * calculates reference count, size, etc for each inode in order to later
1674 * compare them to the information stored inside the inodes and detect possible
1675 * inconsistencies. Returns zero in case of success and a negative error code
1676 * in case of failure.
1677 */
1678static int check_leaf(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1679 void *priv)
1680{
1681 ino_t inum;
1682 void *node;
1683 struct ubifs_ch *ch;
1684 int err, type = key_type(c, &zbr->key);
1685 struct fsck_inode *fscki;
1686
1687 if (zbr->len < UBIFS_CH_SZ) {
1688 ubifs_err("bad leaf length %d (LEB %d:%d)",
1689 zbr->len, zbr->lnum, zbr->offs);
1690 return -EINVAL;
1691 }
1692
1693 node = kmalloc(zbr->len, GFP_NOFS);
1694 if (!node)
1695 return -ENOMEM;
1696
1697 err = ubifs_tnc_read_node(c, zbr, node);
1698 if (err) {
1699 ubifs_err("cannot read leaf node at LEB %d:%d, error %d",
1700 zbr->lnum, zbr->offs, err);
1701 goto out_free;
1702 }
1703
1704 /* If this is an inode node, add it to RB-tree of inodes */
1705 if (type == UBIFS_INO_KEY) {
1706 fscki = add_inode(c, priv, node);
1707 if (IS_ERR(fscki)) {
1708 err = PTR_ERR(fscki);
1709 ubifs_err("error %d while adding inode node", err);
1710 goto out_dump;
1711 }
1712 goto out;
1713 }
1714
1715 if (type != UBIFS_DENT_KEY && type != UBIFS_XENT_KEY &&
1716 type != UBIFS_DATA_KEY) {
1717 ubifs_err("unexpected node type %d at LEB %d:%d",
1718 type, zbr->lnum, zbr->offs);
1719 err = -EINVAL;
1720 goto out_free;
1721 }
1722
1723 ch = node;
1724 if (le64_to_cpu(ch->sqnum) > c->max_sqnum) {
1725 ubifs_err("too high sequence number, max. is %llu",
1726 c->max_sqnum);
1727 err = -EINVAL;
1728 goto out_dump;
1729 }
1730
1731 if (type == UBIFS_DATA_KEY) {
1732 long long blk_offs;
1733 struct ubifs_data_node *dn = node;
1734
1735 /*
1736 * Search the inode node this data node belongs to and insert
1737 * it to the RB-tree of inodes.
1738 */
1739 inum = key_inum_flash(c, &dn->key);
1740 fscki = read_add_inode(c, priv, inum);
1741 if (IS_ERR(fscki)) {
1742 err = PTR_ERR(fscki);
1743 ubifs_err("error %d while processing data node and "
1744 "trying to find inode node %lu", err, inum);
1745 goto out_dump;
1746 }
1747
1748 /* Make sure the data node is within inode size */
1749 blk_offs = key_block_flash(c, &dn->key);
1750 blk_offs <<= UBIFS_BLOCK_SHIFT;
1751 blk_offs += le32_to_cpu(dn->size);
1752 if (blk_offs > fscki->size) {
1753 ubifs_err("data node at LEB %d:%d is not within inode "
1754 "size %lld", zbr->lnum, zbr->offs,
1755 fscki->size);
1756 err = -EINVAL;
1757 goto out_dump;
1758 }
1759 } else {
1760 int nlen;
1761 struct ubifs_dent_node *dent = node;
1762 struct fsck_inode *fscki1;
1763
1764 err = ubifs_validate_entry(c, dent);
1765 if (err)
1766 goto out_dump;
1767
1768 /*
1769 * Search the inode node this entry refers to and the parent
1770 * inode node and insert them to the RB-tree of inodes.
1771 */
1772 inum = le64_to_cpu(dent->inum);
1773 fscki = read_add_inode(c, priv, inum);
1774 if (IS_ERR(fscki)) {
1775 err = PTR_ERR(fscki);
1776 ubifs_err("error %d while processing entry node and "
1777 "trying to find inode node %lu", err, inum);
1778 goto out_dump;
1779 }
1780
1781 /* Count how many direntries or xentries refers this inode */
1782 fscki->references += 1;
1783
1784 inum = key_inum_flash(c, &dent->key);
1785 fscki1 = read_add_inode(c, priv, inum);
1786 if (IS_ERR(fscki1)) {
1787 err = PTR_ERR(fscki);
1788 ubifs_err("error %d while processing entry node and "
1789 "trying to find parent inode node %lu",
1790 err, inum);
1791 goto out_dump;
1792 }
1793
1794 nlen = le16_to_cpu(dent->nlen);
1795 if (type == UBIFS_XENT_KEY) {
1796 fscki1->calc_xcnt += 1;
1797 fscki1->calc_xsz += CALC_DENT_SIZE(nlen);
1798 fscki1->calc_xsz += CALC_XATTR_BYTES(fscki->size);
1799 fscki1->calc_xnms += nlen;
1800 } else {
1801 fscki1->calc_sz += CALC_DENT_SIZE(nlen);
1802 if (dent->type == UBIFS_ITYPE_DIR)
1803 fscki1->calc_cnt += 1;
1804 }
1805 }
1806
1807out:
1808 kfree(node);
1809 return 0;
1810
1811out_dump:
1812 ubifs_msg("dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
1813 dbg_dump_node(c, node);
1814out_free:
1815 kfree(node);
1816 return err;
1817}
1818
1819/**
1820 * free_inodes - free RB-tree of inodes.
1821 * @fsckd: FS checking information
1822 */
1823static void free_inodes(struct fsck_data *fsckd)
1824{
1825 struct rb_node *this = fsckd->inodes.rb_node;
1826 struct fsck_inode *fscki;
1827
1828 while (this) {
1829 if (this->rb_left)
1830 this = this->rb_left;
1831 else if (this->rb_right)
1832 this = this->rb_right;
1833 else {
1834 fscki = rb_entry(this, struct fsck_inode, rb);
1835 this = rb_parent(this);
1836 if (this) {
1837 if (this->rb_left == &fscki->rb)
1838 this->rb_left = NULL;
1839 else
1840 this->rb_right = NULL;
1841 }
1842 kfree(fscki);
1843 }
1844 }
1845}
1846
1847/**
1848 * check_inodes - checks all inodes.
1849 * @c: UBIFS file-system description object
1850 * @fsckd: FS checking information
1851 *
1852 * This is a helper function for 'dbg_check_filesystem()' which walks the
1853 * RB-tree of inodes after the index scan has been finished, and checks that
1854 * inode nlink, size, etc are correct. Returns zero if inodes are fine,
1855 * %-EINVAL if not, and a negative error code in case of failure.
1856 */
1857static int check_inodes(struct ubifs_info *c, struct fsck_data *fsckd)
1858{
1859 int n, err;
1860 union ubifs_key key;
1861 struct ubifs_znode *znode;
1862 struct ubifs_zbranch *zbr;
1863 struct ubifs_ino_node *ino;
1864 struct fsck_inode *fscki;
1865 struct rb_node *this = rb_first(&fsckd->inodes);
1866
1867 while (this) {
1868 fscki = rb_entry(this, struct fsck_inode, rb);
1869 this = rb_next(this);
1870
1871 if (S_ISDIR(fscki->mode)) {
1872 /*
1873 * Directories have to have exactly one reference (they
1874 * cannot have hardlinks), although root inode is an
1875 * exception.
1876 */
1877 if (fscki->inum != UBIFS_ROOT_INO &&
1878 fscki->references != 1) {
1879 ubifs_err("directory inode %lu has %d "
1880 "direntries which refer it, but "
1881 "should be 1", fscki->inum,
1882 fscki->references);
1883 goto out_dump;
1884 }
1885 if (fscki->inum == UBIFS_ROOT_INO &&
1886 fscki->references != 0) {
1887 ubifs_err("root inode %lu has non-zero (%d) "
1888 "direntries which refer it",
1889 fscki->inum, fscki->references);
1890 goto out_dump;
1891 }
1892 if (fscki->calc_sz != fscki->size) {
1893 ubifs_err("directory inode %lu size is %lld, "
1894 "but calculated size is %lld",
1895 fscki->inum, fscki->size,
1896 fscki->calc_sz);
1897 goto out_dump;
1898 }
1899 if (fscki->calc_cnt != fscki->nlink) {
1900 ubifs_err("directory inode %lu nlink is %d, "
1901 "but calculated nlink is %d",
1902 fscki->inum, fscki->nlink,
1903 fscki->calc_cnt);
1904 goto out_dump;
1905 }
1906 } else {
1907 if (fscki->references != fscki->nlink) {
1908 ubifs_err("inode %lu nlink is %d, but "
1909 "calculated nlink is %d", fscki->inum,
1910 fscki->nlink, fscki->references);
1911 goto out_dump;
1912 }
1913 }
1914 if (fscki->xattr_sz != fscki->calc_xsz) {
1915 ubifs_err("inode %lu has xattr size %u, but "
1916 "calculated size is %lld",
1917 fscki->inum, fscki->xattr_sz,
1918 fscki->calc_xsz);
1919 goto out_dump;
1920 }
1921 if (fscki->xattr_cnt != fscki->calc_xcnt) {
1922 ubifs_err("inode %lu has %u xattrs, but "
1923 "calculated count is %lld", fscki->inum,
1924 fscki->xattr_cnt, fscki->calc_xcnt);
1925 goto out_dump;
1926 }
1927 if (fscki->xattr_nms != fscki->calc_xnms) {
1928 ubifs_err("inode %lu has xattr names' size %u, but "
1929 "calculated names' size is %lld",
1930 fscki->inum, fscki->xattr_nms,
1931 fscki->calc_xnms);
1932 goto out_dump;
1933 }
1934 }
1935
1936 return 0;
1937
1938out_dump:
1939 /* Read the bad inode and dump it */
1940 ino_key_init(c, &key, fscki->inum);
1941 err = ubifs_lookup_level0(c, &key, &znode, &n);
1942 if (!err) {
1943 ubifs_err("inode %lu not found in index", fscki->inum);
1944 return -ENOENT;
1945 } else if (err < 0) {
1946 ubifs_err("error %d while looking up inode %lu",
1947 err, fscki->inum);
1948 return err;
1949 }
1950
1951 zbr = &znode->zbranch[n];
1952 ino = kmalloc(zbr->len, GFP_NOFS);
1953 if (!ino)
1954 return -ENOMEM;
1955
1956 err = ubifs_tnc_read_node(c, zbr, ino);
1957 if (err) {
1958 ubifs_err("cannot read inode node at LEB %d:%d, error %d",
1959 zbr->lnum, zbr->offs, err);
1960 kfree(ino);
1961 return err;
1962 }
1963
1964 ubifs_msg("dump of the inode %lu sitting in LEB %d:%d",
1965 fscki->inum, zbr->lnum, zbr->offs);
1966 dbg_dump_node(c, ino);
1967 kfree(ino);
1968 return -EINVAL;
1969}
1970
1971/**
1972 * dbg_check_filesystem - check the file-system.
1973 * @c: UBIFS file-system description object
1974 *
1975 * This function checks the file system, namely:
1976 * o makes sure that all leaf nodes exist and their CRCs are correct;
1977 * o makes sure inode nlink, size, xattr size/count are correct (for all
1978 * inodes).
1979 *
1980 * The function reads whole indexing tree and all nodes, so it is pretty
1981 * heavy-weight. Returns zero if the file-system is consistent, %-EINVAL if
1982 * not, and a negative error code in case of failure.
1983 */
1984int dbg_check_filesystem(struct ubifs_info *c)
1985{
1986 int err;
1987 struct fsck_data fsckd;
1988
1989 if (!(ubifs_chk_flags & UBIFS_CHK_FS))
1990 return 0;
1991
1992 fsckd.inodes = RB_ROOT;
1993 err = dbg_walk_index(c, check_leaf, NULL, &fsckd);
1994 if (err)
1995 goto out_free;
1996
1997 err = check_inodes(c, &fsckd);
1998 if (err)
1999 goto out_free;
2000
2001 free_inodes(&fsckd);
2002 return 0;
2003
2004out_free:
2005 ubifs_err("file-system check failed with error %d", err);
2006 dump_stack();
2007 free_inodes(&fsckd);
2008 return err;
2009}
2010
2011static int invocation_cnt;
2012
2013int dbg_force_in_the_gaps(void)
2014{
2015 if (!dbg_force_in_the_gaps_enabled)
2016 return 0;
2017 /* Force in-the-gaps every 8th commit */
2018 return !((invocation_cnt++) & 0x7);
2019}
2020
2021/* Failure mode for recovery testing */
2022
2023#define chance(n, d) (simple_rand() <= (n) * 32768LL / (d))
2024
2025struct failure_mode_info {
2026 struct list_head list;
2027 struct ubifs_info *c;
2028};
2029
2030static LIST_HEAD(fmi_list);
2031static DEFINE_SPINLOCK(fmi_lock);
2032
2033static unsigned int next;
2034
2035static int simple_rand(void)
2036{
2037 if (next == 0)
2038 next = current->pid;
2039 next = next * 1103515245 + 12345;
2040 return (next >> 16) & 32767;
2041}
2042
2043void dbg_failure_mode_registration(struct ubifs_info *c)
2044{
2045 struct failure_mode_info *fmi;
2046
2047 fmi = kmalloc(sizeof(struct failure_mode_info), GFP_NOFS);
2048 if (!fmi) {
2049 dbg_err("Failed to register failure mode - no memory");
2050 return;
2051 }
2052 fmi->c = c;
2053 spin_lock(&fmi_lock);
2054 list_add_tail(&fmi->list, &fmi_list);
2055 spin_unlock(&fmi_lock);
2056}
2057
2058void dbg_failure_mode_deregistration(struct ubifs_info *c)
2059{
2060 struct failure_mode_info *fmi, *tmp;
2061
2062 spin_lock(&fmi_lock);
2063 list_for_each_entry_safe(fmi, tmp, &fmi_list, list)
2064 if (fmi->c == c) {
2065 list_del(&fmi->list);
2066 kfree(fmi);
2067 }
2068 spin_unlock(&fmi_lock);
2069}
2070
2071static struct ubifs_info *dbg_find_info(struct ubi_volume_desc *desc)
2072{
2073 struct failure_mode_info *fmi;
2074
2075 spin_lock(&fmi_lock);
2076 list_for_each_entry(fmi, &fmi_list, list)
2077 if (fmi->c->ubi == desc) {
2078 struct ubifs_info *c = fmi->c;
2079
2080 spin_unlock(&fmi_lock);
2081 return c;
2082 }
2083 spin_unlock(&fmi_lock);
2084 return NULL;
2085}
2086
2087static int in_failure_mode(struct ubi_volume_desc *desc)
2088{
2089 struct ubifs_info *c = dbg_find_info(desc);
2090
2091 if (c && dbg_failure_mode)
2092 return c->failure_mode;
2093 return 0;
2094}
2095
2096static int do_fail(struct ubi_volume_desc *desc, int lnum, int write)
2097{
2098 struct ubifs_info *c = dbg_find_info(desc);
2099
2100 if (!c || !dbg_failure_mode)
2101 return 0;
2102 if (c->failure_mode)
2103 return 1;
2104 if (!c->fail_cnt) {
2105 /* First call - decide delay to failure */
2106 if (chance(1, 2)) {
2107 unsigned int delay = 1 << (simple_rand() >> 11);
2108
2109 if (chance(1, 2)) {
2110 c->fail_delay = 1;
2111 c->fail_timeout = jiffies +
2112 msecs_to_jiffies(delay);
2113 dbg_rcvry("failing after %ums", delay);
2114 } else {
2115 c->fail_delay = 2;
2116 c->fail_cnt_max = delay;
2117 dbg_rcvry("failing after %u calls", delay);
2118 }
2119 }
2120 c->fail_cnt += 1;
2121 }
2122 /* Determine if failure delay has expired */
2123 if (c->fail_delay == 1) {
2124 if (time_before(jiffies, c->fail_timeout))
2125 return 0;
2126 } else if (c->fail_delay == 2)
2127 if (c->fail_cnt++ < c->fail_cnt_max)
2128 return 0;
2129 if (lnum == UBIFS_SB_LNUM) {
2130 if (write) {
2131 if (chance(1, 2))
2132 return 0;
2133 } else if (chance(19, 20))
2134 return 0;
2135 dbg_rcvry("failing in super block LEB %d", lnum);
2136 } else if (lnum == UBIFS_MST_LNUM || lnum == UBIFS_MST_LNUM + 1) {
2137 if (chance(19, 20))
2138 return 0;
2139 dbg_rcvry("failing in master LEB %d", lnum);
2140 } else if (lnum >= UBIFS_LOG_LNUM && lnum <= c->log_last) {
2141 if (write) {
2142 if (chance(99, 100))
2143 return 0;
2144 } else if (chance(399, 400))
2145 return 0;
2146 dbg_rcvry("failing in log LEB %d", lnum);
2147 } else if (lnum >= c->lpt_first && lnum <= c->lpt_last) {
2148 if (write) {
2149 if (chance(7, 8))
2150 return 0;
2151 } else if (chance(19, 20))
2152 return 0;
2153 dbg_rcvry("failing in LPT LEB %d", lnum);
2154 } else if (lnum >= c->orph_first && lnum <= c->orph_last) {
2155 if (write) {
2156 if (chance(1, 2))
2157 return 0;
2158 } else if (chance(9, 10))
2159 return 0;
2160 dbg_rcvry("failing in orphan LEB %d", lnum);
2161 } else if (lnum == c->ihead_lnum) {
2162 if (chance(99, 100))
2163 return 0;
2164 dbg_rcvry("failing in index head LEB %d", lnum);
2165 } else if (c->jheads && lnum == c->jheads[GCHD].wbuf.lnum) {
2166 if (chance(9, 10))
2167 return 0;
2168 dbg_rcvry("failing in GC head LEB %d", lnum);
2169 } else if (write && !RB_EMPTY_ROOT(&c->buds) &&
2170 !ubifs_search_bud(c, lnum)) {
2171 if (chance(19, 20))
2172 return 0;
2173 dbg_rcvry("failing in non-bud LEB %d", lnum);
2174 } else if (c->cmt_state == COMMIT_RUNNING_BACKGROUND ||
2175 c->cmt_state == COMMIT_RUNNING_REQUIRED) {
2176 if (chance(999, 1000))
2177 return 0;
2178 dbg_rcvry("failing in bud LEB %d commit running", lnum);
2179 } else {
2180 if (chance(9999, 10000))
2181 return 0;
2182 dbg_rcvry("failing in bud LEB %d commit not running", lnum);
2183 }
2184 ubifs_err("*** SETTING FAILURE MODE ON (LEB %d) ***", lnum);
2185 c->failure_mode = 1;
2186 dump_stack();
2187 return 1;
2188}
2189
2190static void cut_data(const void *buf, int len)
2191{
2192 int flen, i;
2193 unsigned char *p = (void *)buf;
2194
2195 flen = (len * (long long)simple_rand()) >> 15;
2196 for (i = flen; i < len; i++)
2197 p[i] = 0xff;
2198}
2199
2200int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
2201 int len, int check)
2202{
2203 if (in_failure_mode(desc))
2204 return -EIO;
2205 return ubi_leb_read(desc, lnum, buf, offset, len, check);
2206}
2207
2208int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
2209 int offset, int len, int dtype)
2210{
2211 int err;
2212
2213 if (in_failure_mode(desc))
2214 return -EIO;
2215 if (do_fail(desc, lnum, 1))
2216 cut_data(buf, len);
2217 err = ubi_leb_write(desc, lnum, buf, offset, len, dtype);
2218 if (err)
2219 return err;
2220 if (in_failure_mode(desc))
2221 return -EIO;
2222 return 0;
2223}
2224
2225int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
2226 int len, int dtype)
2227{
2228 int err;
2229
2230 if (do_fail(desc, lnum, 1))
2231 return -EIO;
2232 err = ubi_leb_change(desc, lnum, buf, len, dtype);
2233 if (err)
2234 return err;
2235 if (do_fail(desc, lnum, 1))
2236 return -EIO;
2237 return 0;
2238}
2239
2240int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum)
2241{
2242 int err;
2243
2244 if (do_fail(desc, lnum, 0))
2245 return -EIO;
2246 err = ubi_leb_erase(desc, lnum);
2247 if (err)
2248 return err;
2249 if (do_fail(desc, lnum, 0))
2250 return -EIO;
2251 return 0;
2252}
2253
2254int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum)
2255{
2256 int err;
2257
2258 if (do_fail(desc, lnum, 0))
2259 return -EIO;
2260 err = ubi_leb_unmap(desc, lnum);
2261 if (err)
2262 return err;
2263 if (do_fail(desc, lnum, 0))
2264 return -EIO;
2265 return 0;
2266}
2267
2268int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum)
2269{
2270 if (in_failure_mode(desc))
2271 return -EIO;
2272 return ubi_is_mapped(desc, lnum);
2273}
2274
2275int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype)
2276{
2277 int err;
2278
2279 if (do_fail(desc, lnum, 0))
2280 return -EIO;
2281 err = ubi_leb_map(desc, lnum, dtype);
2282 if (err)
2283 return err;
2284 if (do_fail(desc, lnum, 0))
2285 return -EIO;
2286 return 0;
2287}
2288
2289#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
new file mode 100644
index 00000000000..3c4f1e93c9e
--- /dev/null
+++ b/fs/ubifs/debug.h
@@ -0,0 +1,403 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23#ifndef __UBIFS_DEBUG_H__
24#define __UBIFS_DEBUG_H__
25
26#ifdef CONFIG_UBIFS_FS_DEBUG
27
28#define UBIFS_DBG(op) op
29
30#define ubifs_assert(expr) do { \
31 if (unlikely(!(expr))) { \
32 printk(KERN_CRIT "UBIFS assert failed in %s at %u (pid %d)\n", \
33 __func__, __LINE__, current->pid); \
34 dbg_dump_stack(); \
35 } \
36} while (0)
37
38#define ubifs_assert_cmt_locked(c) do { \
39 if (unlikely(down_write_trylock(&(c)->commit_sem))) { \
40 up_write(&(c)->commit_sem); \
41 printk(KERN_CRIT "commit lock is not locked!\n"); \
42 ubifs_assert(0); \
43 } \
44} while (0)
45
46#define dbg_dump_stack() do { \
47 if (!dbg_failure_mode) \
48 dump_stack(); \
49} while (0)
50
51/* Generic debugging messages */
52#define dbg_msg(fmt, ...) do { \
53 spin_lock(&dbg_lock); \
54 printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \
55 __func__, ##__VA_ARGS__); \
56 spin_unlock(&dbg_lock); \
57} while (0)
58
59#define dbg_do_msg(typ, fmt, ...) do { \
60 if (ubifs_msg_flags & typ) \
61 dbg_msg(fmt, ##__VA_ARGS__); \
62} while (0)
63
64#define dbg_err(fmt, ...) do { \
65 spin_lock(&dbg_lock); \
66 ubifs_err(fmt, ##__VA_ARGS__); \
67 spin_unlock(&dbg_lock); \
68} while (0)
69
70const char *dbg_key_str0(const struct ubifs_info *c,
71 const union ubifs_key *key);
72const char *dbg_key_str1(const struct ubifs_info *c,
73 const union ubifs_key *key);
74
75/*
76 * DBGKEY macros require dbg_lock to be held, which it is in the dbg message
77 * macros.
78 */
79#define DBGKEY(key) dbg_key_str0(c, (key))
80#define DBGKEY1(key) dbg_key_str1(c, (key))
81
82/* General messages */
83#define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__)
84
85/* Additional journal messages */
86#define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__)
87
88/* Additional TNC messages */
89#define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__)
90
91/* Additional lprops messages */
92#define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__)
93
94/* Additional LEB find messages */
95#define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__)
96
97/* Additional mount messages */
98#define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__)
99
100/* Additional I/O messages */
101#define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__)
102
103/* Additional commit messages */
104#define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__)
105
106/* Additional budgeting messages */
107#define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__)
108
109/* Additional log messages */
110#define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__)
111
112/* Additional gc messages */
113#define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__)
114
115/* Additional scan messages */
116#define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__)
117
118/* Additional recovery messages */
119#define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__)
120
121/*
122 * Debugging message type flags (must match msg_type_names in debug.c).
123 *
124 * UBIFS_MSG_GEN: general messages
125 * UBIFS_MSG_JNL: journal messages
126 * UBIFS_MSG_MNT: mount messages
127 * UBIFS_MSG_CMT: commit messages
128 * UBIFS_MSG_FIND: LEB find messages
129 * UBIFS_MSG_BUDG: budgeting messages
130 * UBIFS_MSG_GC: garbage collection messages
131 * UBIFS_MSG_TNC: TNC messages
132 * UBIFS_MSG_LP: lprops messages
133 * UBIFS_MSG_IO: I/O messages
134 * UBIFS_MSG_LOG: log messages
135 * UBIFS_MSG_SCAN: scan messages
136 * UBIFS_MSG_RCVRY: recovery messages
137 */
138enum {
139 UBIFS_MSG_GEN = 0x1,
140 UBIFS_MSG_JNL = 0x2,
141 UBIFS_MSG_MNT = 0x4,
142 UBIFS_MSG_CMT = 0x8,
143 UBIFS_MSG_FIND = 0x10,
144 UBIFS_MSG_BUDG = 0x20,
145 UBIFS_MSG_GC = 0x40,
146 UBIFS_MSG_TNC = 0x80,
147 UBIFS_MSG_LP = 0x100,
148 UBIFS_MSG_IO = 0x200,
149 UBIFS_MSG_LOG = 0x400,
150 UBIFS_MSG_SCAN = 0x800,
151 UBIFS_MSG_RCVRY = 0x1000,
152};
153
154/* Debugging message type flags for each default debug message level */
155#define UBIFS_MSG_LVL_0 0
156#define UBIFS_MSG_LVL_1 0x1
157#define UBIFS_MSG_LVL_2 0x7f
158#define UBIFS_MSG_LVL_3 0xffff
159
160/*
161 * Debugging check flags (must match chk_names in debug.c).
162 *
163 * UBIFS_CHK_GEN: general checks
164 * UBIFS_CHK_TNC: check TNC
165 * UBIFS_CHK_IDX_SZ: check index size
166 * UBIFS_CHK_ORPH: check orphans
167 * UBIFS_CHK_OLD_IDX: check the old index
168 * UBIFS_CHK_LPROPS: check lprops
169 * UBIFS_CHK_FS: check the file-system
170 */
171enum {
172 UBIFS_CHK_GEN = 0x1,
173 UBIFS_CHK_TNC = 0x2,
174 UBIFS_CHK_IDX_SZ = 0x4,
175 UBIFS_CHK_ORPH = 0x8,
176 UBIFS_CHK_OLD_IDX = 0x10,
177 UBIFS_CHK_LPROPS = 0x20,
178 UBIFS_CHK_FS = 0x40,
179};
180
181/*
182 * Special testing flags (must match tst_names in debug.c).
183 *
184 * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method
185 * UBIFS_TST_RCVRY: failure mode for recovery testing
186 */
187enum {
188 UBIFS_TST_FORCE_IN_THE_GAPS = 0x2,
189 UBIFS_TST_RCVRY = 0x4,
190};
191
192#if CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 1
193#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_1
194#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 2
195#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_2
196#elif CONFIG_UBIFS_FS_DEBUG_MSG_LVL == 3
197#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_3
198#else
199#define UBIFS_MSG_FLAGS_DEFAULT UBIFS_MSG_LVL_0
200#endif
201
202#ifdef CONFIG_UBIFS_FS_DEBUG_CHKS
203#define UBIFS_CHK_FLAGS_DEFAULT 0xffffffff
204#else
205#define UBIFS_CHK_FLAGS_DEFAULT 0
206#endif
207
208extern spinlock_t dbg_lock;
209
210extern unsigned int ubifs_msg_flags;
211extern unsigned int ubifs_chk_flags;
212extern unsigned int ubifs_tst_flags;
213
214/* Dump functions */
215
216const char *dbg_ntype(int type);
217const char *dbg_cstate(int cmt_state);
218const char *dbg_get_key_dump(const struct ubifs_info *c,
219 const union ubifs_key *key);
220void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode);
221void dbg_dump_node(const struct ubifs_info *c, const void *node);
222void dbg_dump_budget_req(const struct ubifs_budget_req *req);
223void dbg_dump_lstats(const struct ubifs_lp_stats *lst);
224void dbg_dump_budg(struct ubifs_info *c);
225void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp);
226void dbg_dump_lprops(struct ubifs_info *c);
227void dbg_dump_leb(const struct ubifs_info *c, int lnum);
228void dbg_dump_znode(const struct ubifs_info *c,
229 const struct ubifs_znode *znode);
230void dbg_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat);
231void dbg_dump_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
232 struct ubifs_nnode *parent, int iip);
233void dbg_dump_tnc(struct ubifs_info *c);
234void dbg_dump_index(struct ubifs_info *c);
235
236/* Checking helper functions */
237
238typedef int (*dbg_leaf_callback)(struct ubifs_info *c,
239 struct ubifs_zbranch *zbr, void *priv);
240typedef int (*dbg_znode_callback)(struct ubifs_info *c,
241 struct ubifs_znode *znode, void *priv);
242
243int dbg_walk_index(struct ubifs_info *c, dbg_leaf_callback leaf_cb,
244 dbg_znode_callback znode_cb, void *priv);
245
246/* Checking functions */
247
248int dbg_check_lprops(struct ubifs_info *c);
249
250int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot);
251int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot);
252
253int dbg_check_cats(struct ubifs_info *c);
254
255int dbg_check_ltab(struct ubifs_info *c);
256
257int dbg_check_synced_i_size(struct inode *inode);
258
259int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir);
260
261int dbg_check_tnc(struct ubifs_info *c, int extra);
262
263int dbg_check_idx_size(struct ubifs_info *c, long long idx_size);
264
265int dbg_check_filesystem(struct ubifs_info *c);
266
267void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
268 int add_pos);
269
270int dbg_check_lprops(struct ubifs_info *c);
271int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
272 int row, int col);
273
274/* Force the use of in-the-gaps method for testing */
275
276#define dbg_force_in_the_gaps_enabled \
277 (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS)
278
279int dbg_force_in_the_gaps(void);
280
281/* Failure mode for recovery testing */
282
283#define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY)
284
285void dbg_failure_mode_registration(struct ubifs_info *c);
286void dbg_failure_mode_deregistration(struct ubifs_info *c);
287
288#ifndef UBIFS_DBG_PRESERVE_UBI
289
290#define ubi_leb_read dbg_leb_read
291#define ubi_leb_write dbg_leb_write
292#define ubi_leb_change dbg_leb_change
293#define ubi_leb_erase dbg_leb_erase
294#define ubi_leb_unmap dbg_leb_unmap
295#define ubi_is_mapped dbg_is_mapped
296#define ubi_leb_map dbg_leb_map
297
298#endif
299
300int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset,
301 int len, int check);
302int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf,
303 int offset, int len, int dtype);
304int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf,
305 int len, int dtype);
306int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum);
307int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum);
308int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum);
309int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype);
310
311static inline int dbg_read(struct ubi_volume_desc *desc, int lnum, char *buf,
312 int offset, int len)
313{
314 return dbg_leb_read(desc, lnum, buf, offset, len, 0);
315}
316
317static inline int dbg_write(struct ubi_volume_desc *desc, int lnum,
318 const void *buf, int offset, int len)
319{
320 return dbg_leb_write(desc, lnum, buf, offset, len, UBI_UNKNOWN);
321}
322
323static inline int dbg_change(struct ubi_volume_desc *desc, int lnum,
324 const void *buf, int len)
325{
326 return dbg_leb_change(desc, lnum, buf, len, UBI_UNKNOWN);
327}
328
329#else /* !CONFIG_UBIFS_FS_DEBUG */
330
331#define UBIFS_DBG(op)
332#define ubifs_assert(expr) ({})
333#define ubifs_assert_cmt_locked(c)
334#define dbg_dump_stack()
335#define dbg_err(fmt, ...) ({})
336#define dbg_msg(fmt, ...) ({})
337#define dbg_key(c, key, fmt, ...) ({})
338
339#define dbg_gen(fmt, ...) ({})
340#define dbg_jnl(fmt, ...) ({})
341#define dbg_tnc(fmt, ...) ({})
342#define dbg_lp(fmt, ...) ({})
343#define dbg_find(fmt, ...) ({})
344#define dbg_mnt(fmt, ...) ({})
345#define dbg_io(fmt, ...) ({})
346#define dbg_cmt(fmt, ...) ({})
347#define dbg_budg(fmt, ...) ({})
348#define dbg_log(fmt, ...) ({})
349#define dbg_gc(fmt, ...) ({})
350#define dbg_scan(fmt, ...) ({})
351#define dbg_rcvry(fmt, ...) ({})
352
353#define dbg_ntype(type) ""
354#define dbg_cstate(cmt_state) ""
355#define dbg_get_key_dump(c, key) ({})
356#define dbg_dump_inode(c, inode) ({})
357#define dbg_dump_node(c, node) ({})
358#define dbg_dump_budget_req(req) ({})
359#define dbg_dump_lstats(lst) ({})
360#define dbg_dump_budg(c) ({})
361#define dbg_dump_lprop(c, lp) ({})
362#define dbg_dump_lprops(c) ({})
363#define dbg_dump_leb(c, lnum) ({})
364#define dbg_dump_znode(c, znode) ({})
365#define dbg_dump_heap(c, heap, cat) ({})
366#define dbg_dump_pnode(c, pnode, parent, iip) ({})
367#define dbg_dump_tnc(c) ({})
368#define dbg_dump_index(c) ({})
369
370#define dbg_walk_index(c, leaf_cb, znode_cb, priv) 0
371
372#define dbg_old_index_check_init(c, zroot) 0
373#define dbg_check_old_index(c, zroot) 0
374
375#define dbg_check_cats(c) 0
376
377#define dbg_check_ltab(c) 0
378
379#define dbg_check_synced_i_size(inode) 0
380
381#define dbg_check_dir_size(c, dir) 0
382
383#define dbg_check_tnc(c, x) 0
384
385#define dbg_check_idx_size(c, idx_size) 0
386
387#define dbg_check_filesystem(c) 0
388
389#define dbg_check_heap(c, heap, cat, add_pos) ({})
390
391#define dbg_check_lprops(c) 0
392#define dbg_check_lpt_nodes(c, cnode, row, col) 0
393
394#define dbg_force_in_the_gaps_enabled 0
395#define dbg_force_in_the_gaps() 0
396
397#define dbg_failure_mode 0
398#define dbg_failure_mode_registration(c) ({})
399#define dbg_failure_mode_deregistration(c) ({})
400
401#endif /* !CONFIG_UBIFS_FS_DEBUG */
402
403#endif /* !__UBIFS_DEBUG_H__ */
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
new file mode 100644
index 00000000000..e90374be7d3
--- /dev/null
+++ b/fs/ubifs/dir.c
@@ -0,0 +1,1240 @@
1/* * This file is part of UBIFS.
2 *
3 * Copyright (C) 2006-2008 Nokia Corporation.
4 * Copyright (C) 2006, 2007 University of Szeged, Hungary
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 * Zoltan Sogor
22 */
23
24/*
25 * This file implements directory operations.
26 *
27 * All FS operations in this file allocate budget before writing anything to the
28 * media. If they fail to allocate it, the error is returned. The only
29 * exceptions are 'ubifs_unlink()' and 'ubifs_rmdir()' which keep working even
30 * if they unable to allocate the budget, because deletion %-ENOSPC failure is
31 * not what users are usually ready to get. UBIFS budgeting subsystem has some
32 * space reserved for these purposes.
33 *
34 * All operations in this file write all inodes which they change straight
35 * away, instead of marking them dirty. For example, 'ubifs_link()' changes
36 * @i_size of the parent inode and writes the parent inode together with the
37 * target inode. This was done to simplify file-system recovery which would
38 * otherwise be very difficult to do. The only exception is rename which marks
39 * the re-named inode dirty (because its @i_ctime is updated) but does not
40 * write it, but just marks it as dirty.
41 */
42
43#include "ubifs.h"
44
45/**
46 * inherit_flags - inherit flags of the parent inode.
47 * @dir: parent inode
48 * @mode: new inode mode flags
49 *
50 * This is a helper function for 'ubifs_new_inode()' which inherits flag of the
51 * parent directory inode @dir. UBIFS inodes inherit the following flags:
52 * o %UBIFS_COMPR_FL, which is useful to switch compression on/of on
53 * sub-directory basis;
54 * o %UBIFS_SYNC_FL - useful for the same reasons;
55 * o %UBIFS_DIRSYNC_FL - similar, but relevant only to directories.
56 *
57 * This function returns the inherited flags.
58 */
59static int inherit_flags(const struct inode *dir, int mode)
60{
61 int flags;
62 const struct ubifs_inode *ui = ubifs_inode(dir);
63
64 if (!S_ISDIR(dir->i_mode))
65 /*
66 * The parent is not a directory, which means that an extended
67 * attribute inode is being created. No flags.
68 */
69 return 0;
70
71 flags = ui->flags & (UBIFS_COMPR_FL | UBIFS_SYNC_FL | UBIFS_DIRSYNC_FL);
72 if (!S_ISDIR(mode))
73 /* The "DIRSYNC" flag only applies to directories */
74 flags &= ~UBIFS_DIRSYNC_FL;
75 return flags;
76}
77
78/**
79 * ubifs_new_inode - allocate new UBIFS inode object.
80 * @c: UBIFS file-system description object
81 * @dir: parent directory inode
82 * @mode: inode mode flags
83 *
84 * This function finds an unused inode number, allocates new inode and
85 * initializes it. Returns new inode in case of success and an error code in
86 * case of failure.
87 */
88struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
89 int mode)
90{
91 struct inode *inode;
92 struct ubifs_inode *ui;
93
94 inode = new_inode(c->vfs_sb);
95 ui = ubifs_inode(inode);
96 if (!inode)
97 return ERR_PTR(-ENOMEM);
98
99 /*
100 * Set 'S_NOCMTIME' to prevent VFS form updating [mc]time of inodes and
101 * marking them dirty in file write path (see 'file_update_time()').
102 * UBIFS has to fully control "clean <-> dirty" transitions of inodes
103 * to make budgeting work.
104 */
105 inode->i_flags |= (S_NOCMTIME);
106
107 inode->i_uid = current->fsuid;
108 if (dir->i_mode & S_ISGID) {
109 inode->i_gid = dir->i_gid;
110 if (S_ISDIR(mode))
111 mode |= S_ISGID;
112 } else
113 inode->i_gid = current->fsgid;
114 inode->i_mode = mode;
115 inode->i_mtime = inode->i_atime = inode->i_ctime =
116 ubifs_current_time(inode);
117 inode->i_mapping->nrpages = 0;
118 /* Disable readahead */
119 inode->i_mapping->backing_dev_info = &c->bdi;
120
121 switch (mode & S_IFMT) {
122 case S_IFREG:
123 inode->i_mapping->a_ops = &ubifs_file_address_operations;
124 inode->i_op = &ubifs_file_inode_operations;
125 inode->i_fop = &ubifs_file_operations;
126 break;
127 case S_IFDIR:
128 inode->i_op = &ubifs_dir_inode_operations;
129 inode->i_fop = &ubifs_dir_operations;
130 inode->i_size = ui->ui_size = UBIFS_INO_NODE_SZ;
131 break;
132 case S_IFLNK:
133 inode->i_op = &ubifs_symlink_inode_operations;
134 break;
135 case S_IFSOCK:
136 case S_IFIFO:
137 case S_IFBLK:
138 case S_IFCHR:
139 inode->i_op = &ubifs_file_inode_operations;
140 break;
141 default:
142 BUG();
143 }
144
145 ui->flags = inherit_flags(dir, mode);
146 ubifs_set_inode_flags(inode);
147 if (S_ISREG(mode))
148 ui->compr_type = c->default_compr;
149 else
150 ui->compr_type = UBIFS_COMPR_NONE;
151 ui->synced_i_size = 0;
152
153 spin_lock(&c->cnt_lock);
154 /* Inode number overflow is currently not supported */
155 if (c->highest_inum >= INUM_WARN_WATERMARK) {
156 if (c->highest_inum >= INUM_WATERMARK) {
157 spin_unlock(&c->cnt_lock);
158 ubifs_err("out of inode numbers");
159 make_bad_inode(inode);
160 iput(inode);
161 return ERR_PTR(-EINVAL);
162 }
163 ubifs_warn("running out of inode numbers (current %lu, max %d)",
164 c->highest_inum, INUM_WATERMARK);
165 }
166
167 inode->i_ino = ++c->highest_inum;
168 inode->i_generation = ++c->vfs_gen;
169 /*
170 * The creation sequence number remains with this inode for its
171 * lifetime. All nodes for this inode have a greater sequence number,
172 * and so it is possible to distinguish obsolete nodes belonging to a
173 * previous incarnation of the same inode number - for example, for the
174 * purpose of rebuilding the index.
175 */
176 ui->creat_sqnum = ++c->max_sqnum;
177 spin_unlock(&c->cnt_lock);
178 return inode;
179}
180
181#ifdef CONFIG_UBIFS_FS_DEBUG
182
183static int dbg_check_name(struct ubifs_dent_node *dent, struct qstr *nm)
184{
185 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
186 return 0;
187 if (le16_to_cpu(dent->nlen) != nm->len)
188 return -EINVAL;
189 if (memcmp(dent->name, nm->name, nm->len))
190 return -EINVAL;
191 return 0;
192}
193
194#else
195
196#define dbg_check_name(dent, nm) 0
197
198#endif
199
200static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
201 struct nameidata *nd)
202{
203 int err;
204 union ubifs_key key;
205 struct inode *inode = NULL;
206 struct ubifs_dent_node *dent;
207 struct ubifs_info *c = dir->i_sb->s_fs_info;
208
209 dbg_gen("'%.*s' in dir ino %lu",
210 dentry->d_name.len, dentry->d_name.name, dir->i_ino);
211
212 if (dentry->d_name.len > UBIFS_MAX_NLEN)
213 return ERR_PTR(-ENAMETOOLONG);
214
215 dent = kmalloc(UBIFS_MAX_DENT_NODE_SZ, GFP_NOFS);
216 if (!dent)
217 return ERR_PTR(-ENOMEM);
218
219 dent_key_init(c, &key, dir->i_ino, &dentry->d_name);
220
221 err = ubifs_tnc_lookup_nm(c, &key, dent, &dentry->d_name);
222 if (err) {
223 /*
224 * Do not hash the direntry if parent 'i_nlink' is zero, because
225 * this has side-effects - '->delete_inode()' call will not be
226 * called for the parent orphan inode, because 'd_count' of its
227 * direntry will stay 1 (it'll be negative direntry I guess)
228 * and prevent 'iput_final()' until the dentry is destroyed due
229 * to unmount or memory pressure.
230 */
231 if (err == -ENOENT && dir->i_nlink != 0) {
232 dbg_gen("not found");
233 goto done;
234 }
235 goto out;
236 }
237
238 if (dbg_check_name(dent, &dentry->d_name)) {
239 err = -EINVAL;
240 goto out;
241 }
242
243 inode = ubifs_iget(dir->i_sb, le64_to_cpu(dent->inum));
244 if (IS_ERR(inode)) {
245 /*
246 * This should not happen. Probably the file-system needs
247 * checking.
248 */
249 err = PTR_ERR(inode);
250 ubifs_err("dead directory entry '%.*s', error %d",
251 dentry->d_name.len, dentry->d_name.name, err);
252 ubifs_ro_mode(c, err);
253 goto out;
254 }
255
256done:
257 kfree(dent);
258 /*
259 * Note, d_splice_alias() would be required instead if we supported
260 * NFS.
261 */
262 d_add(dentry, inode);
263 return NULL;
264
265out:
266 kfree(dent);
267 return ERR_PTR(err);
268}
269
270static int ubifs_create(struct inode *dir, struct dentry *dentry, int mode,
271 struct nameidata *nd)
272{
273 struct inode *inode;
274 struct ubifs_info *c = dir->i_sb->s_fs_info;
275 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
276 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
277 .dirtied_ino = 1 };
278 struct ubifs_inode *dir_ui = ubifs_inode(dir);
279
280 /*
281 * Budget request settings: new inode, new direntry, changing the
282 * parent directory inode.
283 */
284
285 dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
286 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
287
288 err = ubifs_budget_space(c, &req);
289 if (err)
290 return err;
291
292 inode = ubifs_new_inode(c, dir, mode);
293 if (IS_ERR(inode)) {
294 err = PTR_ERR(inode);
295 goto out_budg;
296 }
297
298 mutex_lock(&dir_ui->ui_mutex);
299 dir->i_size += sz_change;
300 dir_ui->ui_size = dir->i_size;
301 dir->i_mtime = dir->i_ctime = inode->i_ctime;
302 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
303 if (err)
304 goto out_cancel;
305 mutex_unlock(&dir_ui->ui_mutex);
306
307 ubifs_release_budget(c, &req);
308 insert_inode_hash(inode);
309 d_instantiate(dentry, inode);
310 return 0;
311
312out_cancel:
313 dir->i_size -= sz_change;
314 dir_ui->ui_size = dir->i_size;
315 mutex_unlock(&dir_ui->ui_mutex);
316 make_bad_inode(inode);
317 iput(inode);
318out_budg:
319 ubifs_release_budget(c, &req);
320 ubifs_err("cannot create regular file, error %d", err);
321 return err;
322}
323
324/**
325 * vfs_dent_type - get VFS directory entry type.
326 * @type: UBIFS directory entry type
327 *
328 * This function converts UBIFS directory entry type into VFS directory entry
329 * type.
330 */
331static unsigned int vfs_dent_type(uint8_t type)
332{
333 switch (type) {
334 case UBIFS_ITYPE_REG:
335 return DT_REG;
336 case UBIFS_ITYPE_DIR:
337 return DT_DIR;
338 case UBIFS_ITYPE_LNK:
339 return DT_LNK;
340 case UBIFS_ITYPE_BLK:
341 return DT_BLK;
342 case UBIFS_ITYPE_CHR:
343 return DT_CHR;
344 case UBIFS_ITYPE_FIFO:
345 return DT_FIFO;
346 case UBIFS_ITYPE_SOCK:
347 return DT_SOCK;
348 default:
349 BUG();
350 }
351 return 0;
352}
353
354/*
355 * The classical Unix view for directory is that it is a linear array of
356 * (name, inode number) entries. Linux/VFS assumes this model as well.
357 * Particularly, 'readdir()' call wants us to return a directory entry offset
358 * which later may be used to continue 'readdir()'ing the directory or to
359 * 'seek()' to that specific direntry. Obviously UBIFS does not really fit this
360 * model because directory entries are identified by keys, which may collide.
361 *
362 * UBIFS uses directory entry hash value for directory offsets, so
363 * 'seekdir()'/'telldir()' may not always work because of possible key
364 * collisions. But UBIFS guarantees that consecutive 'readdir()' calls work
365 * properly by means of saving full directory entry name in the private field
366 * of the file description object.
367 *
368 * This means that UBIFS cannot support NFS which requires full
369 * 'seekdir()'/'telldir()' support.
370 */
371static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir)
372{
373 int err, over = 0;
374 struct qstr nm;
375 union ubifs_key key;
376 struct ubifs_dent_node *dent;
377 struct inode *dir = file->f_path.dentry->d_inode;
378 struct ubifs_info *c = dir->i_sb->s_fs_info;
379
380 dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos);
381
382 if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2)
383 /*
384 * The directory was seek'ed to a senseless position or there
385 * are no more entries.
386 */
387 return 0;
388
389 /* File positions 0 and 1 correspond to "." and ".." */
390 if (file->f_pos == 0) {
391 ubifs_assert(!file->private_data);
392 over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR);
393 if (over)
394 return 0;
395 file->f_pos = 1;
396 }
397
398 if (file->f_pos == 1) {
399 ubifs_assert(!file->private_data);
400 over = filldir(dirent, "..", 2, 1,
401 parent_ino(file->f_path.dentry), DT_DIR);
402 if (over)
403 return 0;
404
405 /* Find the first entry in TNC and save it */
406 lowest_dent_key(c, &key, dir->i_ino);
407 nm.name = NULL;
408 dent = ubifs_tnc_next_ent(c, &key, &nm);
409 if (IS_ERR(dent)) {
410 err = PTR_ERR(dent);
411 goto out;
412 }
413
414 file->f_pos = key_hash_flash(c, &dent->key);
415 file->private_data = dent;
416 }
417
418 dent = file->private_data;
419 if (!dent) {
420 /*
421 * The directory was seek'ed to and is now readdir'ed.
422 * Find the entry corresponding to @file->f_pos or the
423 * closest one.
424 */
425 dent_key_init_hash(c, &key, dir->i_ino, file->f_pos);
426 nm.name = NULL;
427 dent = ubifs_tnc_next_ent(c, &key, &nm);
428 if (IS_ERR(dent)) {
429 err = PTR_ERR(dent);
430 goto out;
431 }
432 file->f_pos = key_hash_flash(c, &dent->key);
433 file->private_data = dent;
434 }
435
436 while (1) {
437 dbg_gen("feed '%s', ino %llu, new f_pos %#x",
438 dent->name, le64_to_cpu(dent->inum),
439 key_hash_flash(c, &dent->key));
440 ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum);
441
442 nm.len = le16_to_cpu(dent->nlen);
443 over = filldir(dirent, dent->name, nm.len, file->f_pos,
444 le64_to_cpu(dent->inum),
445 vfs_dent_type(dent->type));
446 if (over)
447 return 0;
448
449 /* Switch to the next entry */
450 key_read(c, &dent->key, &key);
451 nm.name = dent->name;
452 dent = ubifs_tnc_next_ent(c, &key, &nm);
453 if (IS_ERR(dent)) {
454 err = PTR_ERR(dent);
455 goto out;
456 }
457
458 kfree(file->private_data);
459 file->f_pos = key_hash_flash(c, &dent->key);
460 file->private_data = dent;
461 cond_resched();
462 }
463
464out:
465 if (err != -ENOENT) {
466 ubifs_err("cannot find next direntry, error %d", err);
467 return err;
468 }
469
470 kfree(file->private_data);
471 file->private_data = NULL;
472 file->f_pos = 2;
473 return 0;
474}
475
476/* If a directory is seeked, we have to free saved readdir() state */
477static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int origin)
478{
479 kfree(file->private_data);
480 file->private_data = NULL;
481 return generic_file_llseek(file, offset, origin);
482}
483
484/* Free saved readdir() state when the directory is closed */
485static int ubifs_dir_release(struct inode *dir, struct file *file)
486{
487 kfree(file->private_data);
488 file->private_data = NULL;
489 return 0;
490}
491
492/**
493 * lock_2_inodes - lock two UBIFS inodes.
494 * @inode1: first inode
495 * @inode2: second inode
496 */
497static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
498{
499 if (inode1->i_ino < inode2->i_ino) {
500 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_2);
501 mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_3);
502 } else {
503 mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
504 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_3);
505 }
506}
507
508/**
509 * unlock_2_inodes - unlock two UBIFS inodes inodes.
510 * @inode1: first inode
511 * @inode2: second inode
512 */
513static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
514{
515 mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
516 mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
517}
518
519static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
520 struct dentry *dentry)
521{
522 struct ubifs_info *c = dir->i_sb->s_fs_info;
523 struct inode *inode = old_dentry->d_inode;
524 struct ubifs_inode *ui = ubifs_inode(inode);
525 struct ubifs_inode *dir_ui = ubifs_inode(dir);
526 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
527 struct ubifs_budget_req req = { .new_dent = 1, .dirtied_ino = 2,
528 .dirtied_ino_d = ui->data_len };
529
530 /*
531 * Budget request settings: new direntry, changing the target inode,
532 * changing the parent inode.
533 */
534
535 dbg_gen("dent '%.*s' to ino %lu (nlink %d) in dir ino %lu",
536 dentry->d_name.len, dentry->d_name.name, inode->i_ino,
537 inode->i_nlink, dir->i_ino);
538 err = dbg_check_synced_i_size(inode);
539 if (err)
540 return err;
541
542 err = ubifs_budget_space(c, &req);
543 if (err)
544 return err;
545
546 lock_2_inodes(dir, inode);
547 inc_nlink(inode);
548 atomic_inc(&inode->i_count);
549 inode->i_ctime = ubifs_current_time(inode);
550 dir->i_size += sz_change;
551 dir_ui->ui_size = dir->i_size;
552 dir->i_mtime = dir->i_ctime = inode->i_ctime;
553 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
554 if (err)
555 goto out_cancel;
556 unlock_2_inodes(dir, inode);
557
558 ubifs_release_budget(c, &req);
559 d_instantiate(dentry, inode);
560 return 0;
561
562out_cancel:
563 dir->i_size -= sz_change;
564 dir_ui->ui_size = dir->i_size;
565 drop_nlink(inode);
566 unlock_2_inodes(dir, inode);
567 ubifs_release_budget(c, &req);
568 iput(inode);
569 return err;
570}
571
572static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
573{
574 struct ubifs_info *c = dir->i_sb->s_fs_info;
575 struct inode *inode = dentry->d_inode;
576 struct ubifs_inode *dir_ui = ubifs_inode(dir);
577 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
578 int err, budgeted = 1;
579 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
580
581 /*
582 * Budget request settings: deletion direntry, deletion inode (+1 for
583 * @dirtied_ino), changing the parent directory inode. If budgeting
584 * fails, go ahead anyway because we have extra space reserved for
585 * deletions.
586 */
587
588 dbg_gen("dent '%.*s' from ino %lu (nlink %d) in dir ino %lu",
589 dentry->d_name.len, dentry->d_name.name, inode->i_ino,
590 inode->i_nlink, dir->i_ino);
591 err = dbg_check_synced_i_size(inode);
592 if (err)
593 return err;
594
595 err = ubifs_budget_space(c, &req);
596 if (err) {
597 if (err != -ENOSPC)
598 return err;
599 err = 0;
600 budgeted = 0;
601 }
602
603 lock_2_inodes(dir, inode);
604 inode->i_ctime = ubifs_current_time(dir);
605 drop_nlink(inode);
606 dir->i_size -= sz_change;
607 dir_ui->ui_size = dir->i_size;
608 dir->i_mtime = dir->i_ctime = inode->i_ctime;
609 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
610 if (err)
611 goto out_cancel;
612 unlock_2_inodes(dir, inode);
613
614 if (budgeted)
615 ubifs_release_budget(c, &req);
616 else {
617 /* We've deleted something - clean the "no space" flags */
618 c->nospace = c->nospace_rp = 0;
619 smp_wmb();
620 }
621 return 0;
622
623out_cancel:
624 dir->i_size += sz_change;
625 dir_ui->ui_size = dir->i_size;
626 inc_nlink(inode);
627 unlock_2_inodes(dir, inode);
628 if (budgeted)
629 ubifs_release_budget(c, &req);
630 return err;
631}
632
633/**
634 * check_dir_empty - check if a directory is empty or not.
635 * @c: UBIFS file-system description object
636 * @dir: VFS inode object of the directory to check
637 *
638 * This function checks if directory @dir is empty. Returns zero if the
639 * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes
640 * in case of of errors.
641 */
642static int check_dir_empty(struct ubifs_info *c, struct inode *dir)
643{
644 struct qstr nm = { .name = NULL };
645 struct ubifs_dent_node *dent;
646 union ubifs_key key;
647 int err;
648
649 lowest_dent_key(c, &key, dir->i_ino);
650 dent = ubifs_tnc_next_ent(c, &key, &nm);
651 if (IS_ERR(dent)) {
652 err = PTR_ERR(dent);
653 if (err == -ENOENT)
654 err = 0;
655 } else {
656 kfree(dent);
657 err = -ENOTEMPTY;
658 }
659 return err;
660}
661
662static int ubifs_rmdir(struct inode *dir, struct dentry *dentry)
663{
664 struct ubifs_info *c = dir->i_sb->s_fs_info;
665 struct inode *inode = dentry->d_inode;
666 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
667 int err, budgeted = 1;
668 struct ubifs_inode *dir_ui = ubifs_inode(dir);
669 struct ubifs_budget_req req = { .mod_dent = 1, .dirtied_ino = 2 };
670
671 /*
672 * Budget request settings: deletion direntry, deletion inode and
673 * changing the parent inode. If budgeting fails, go ahead anyway
674 * because we have extra space reserved for deletions.
675 */
676
677 dbg_gen("directory '%.*s', ino %lu in dir ino %lu", dentry->d_name.len,
678 dentry->d_name.name, inode->i_ino, dir->i_ino);
679
680 err = check_dir_empty(c, dentry->d_inode);
681 if (err)
682 return err;
683
684 err = ubifs_budget_space(c, &req);
685 if (err) {
686 if (err != -ENOSPC)
687 return err;
688 budgeted = 0;
689 }
690
691 lock_2_inodes(dir, inode);
692 inode->i_ctime = ubifs_current_time(dir);
693 clear_nlink(inode);
694 drop_nlink(dir);
695 dir->i_size -= sz_change;
696 dir_ui->ui_size = dir->i_size;
697 dir->i_mtime = dir->i_ctime = inode->i_ctime;
698 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 1, 0);
699 if (err)
700 goto out_cancel;
701 unlock_2_inodes(dir, inode);
702
703 if (budgeted)
704 ubifs_release_budget(c, &req);
705 else {
706 /* We've deleted something - clean the "no space" flags */
707 c->nospace = c->nospace_rp = 0;
708 smp_wmb();
709 }
710 return 0;
711
712out_cancel:
713 dir->i_size += sz_change;
714 dir_ui->ui_size = dir->i_size;
715 inc_nlink(dir);
716 inc_nlink(inode);
717 inc_nlink(inode);
718 unlock_2_inodes(dir, inode);
719 if (budgeted)
720 ubifs_release_budget(c, &req);
721 return err;
722}
723
724static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
725{
726 struct inode *inode;
727 struct ubifs_inode *dir_ui = ubifs_inode(dir);
728 struct ubifs_info *c = dir->i_sb->s_fs_info;
729 int err, sz_change = CALC_DENT_SIZE(dentry->d_name.len);
730 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
731 .dirtied_ino_d = 1 };
732
733 /*
734 * Budget request settings: new inode, new direntry and changing parent
735 * directory inode.
736 */
737
738 dbg_gen("dent '%.*s', mode %#x in dir ino %lu",
739 dentry->d_name.len, dentry->d_name.name, mode, dir->i_ino);
740
741 err = ubifs_budget_space(c, &req);
742 if (err)
743 return err;
744
745 inode = ubifs_new_inode(c, dir, S_IFDIR | mode);
746 if (IS_ERR(inode)) {
747 err = PTR_ERR(inode);
748 goto out_budg;
749 }
750
751 mutex_lock(&dir_ui->ui_mutex);
752 insert_inode_hash(inode);
753 inc_nlink(inode);
754 inc_nlink(dir);
755 dir->i_size += sz_change;
756 dir_ui->ui_size = dir->i_size;
757 dir->i_mtime = dir->i_ctime = inode->i_ctime;
758 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
759 if (err) {
760 ubifs_err("cannot create directory, error %d", err);
761 goto out_cancel;
762 }
763 mutex_unlock(&dir_ui->ui_mutex);
764
765 ubifs_release_budget(c, &req);
766 d_instantiate(dentry, inode);
767 return 0;
768
769out_cancel:
770 dir->i_size -= sz_change;
771 dir_ui->ui_size = dir->i_size;
772 drop_nlink(dir);
773 mutex_unlock(&dir_ui->ui_mutex);
774 make_bad_inode(inode);
775 iput(inode);
776out_budg:
777 ubifs_release_budget(c, &req);
778 return err;
779}
780
781static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
782 int mode, dev_t rdev)
783{
784 struct inode *inode;
785 struct ubifs_inode *ui;
786 struct ubifs_inode *dir_ui = ubifs_inode(dir);
787 struct ubifs_info *c = dir->i_sb->s_fs_info;
788 union ubifs_dev_desc *dev = NULL;
789 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
790 int err, devlen = 0;
791 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
792 .new_ino_d = devlen, .dirtied_ino = 1 };
793
794 /*
795 * Budget request settings: new inode, new direntry and changing parent
796 * directory inode.
797 */
798
799 dbg_gen("dent '%.*s' in dir ino %lu",
800 dentry->d_name.len, dentry->d_name.name, dir->i_ino);
801
802 if (!new_valid_dev(rdev))
803 return -EINVAL;
804
805 if (S_ISBLK(mode) || S_ISCHR(mode)) {
806 dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
807 if (!dev)
808 return -ENOMEM;
809 devlen = ubifs_encode_dev(dev, rdev);
810 }
811
812 err = ubifs_budget_space(c, &req);
813 if (err) {
814 kfree(dev);
815 return err;
816 }
817
818 inode = ubifs_new_inode(c, dir, mode);
819 if (IS_ERR(inode)) {
820 kfree(dev);
821 err = PTR_ERR(inode);
822 goto out_budg;
823 }
824
825 init_special_inode(inode, inode->i_mode, rdev);
826 inode->i_size = ubifs_inode(inode)->ui_size = devlen;
827 ui = ubifs_inode(inode);
828 ui->data = dev;
829 ui->data_len = devlen;
830
831 mutex_lock(&dir_ui->ui_mutex);
832 dir->i_size += sz_change;
833 dir_ui->ui_size = dir->i_size;
834 dir->i_mtime = dir->i_ctime = inode->i_ctime;
835 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
836 if (err)
837 goto out_cancel;
838 mutex_unlock(&dir_ui->ui_mutex);
839
840 ubifs_release_budget(c, &req);
841 insert_inode_hash(inode);
842 d_instantiate(dentry, inode);
843 return 0;
844
845out_cancel:
846 dir->i_size -= sz_change;
847 dir_ui->ui_size = dir->i_size;
848 mutex_unlock(&dir_ui->ui_mutex);
849 make_bad_inode(inode);
850 iput(inode);
851out_budg:
852 ubifs_release_budget(c, &req);
853 return err;
854}
855
856static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
857 const char *symname)
858{
859 struct inode *inode;
860 struct ubifs_inode *ui;
861 struct ubifs_inode *dir_ui = ubifs_inode(dir);
862 struct ubifs_info *c = dir->i_sb->s_fs_info;
863 int err, len = strlen(symname);
864 int sz_change = CALC_DENT_SIZE(dentry->d_name.len);
865 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
866 .new_ino_d = len, .dirtied_ino = 1 };
867
868 /*
869 * Budget request settings: new inode, new direntry and changing parent
870 * directory inode.
871 */
872
873 dbg_gen("dent '%.*s', target '%s' in dir ino %lu", dentry->d_name.len,
874 dentry->d_name.name, symname, dir->i_ino);
875
876 if (len > UBIFS_MAX_INO_DATA)
877 return -ENAMETOOLONG;
878
879 err = ubifs_budget_space(c, &req);
880 if (err)
881 return err;
882
883 inode = ubifs_new_inode(c, dir, S_IFLNK | S_IRWXUGO);
884 if (IS_ERR(inode)) {
885 err = PTR_ERR(inode);
886 goto out_budg;
887 }
888
889 ui = ubifs_inode(inode);
890 ui->data = kmalloc(len + 1, GFP_NOFS);
891 if (!ui->data) {
892 err = -ENOMEM;
893 goto out_inode;
894 }
895
896 memcpy(ui->data, symname, len);
897 ((char *)ui->data)[len] = '\0';
898 /*
899 * The terminating zero byte is not written to the flash media and it
900 * is put just to make later in-memory string processing simpler. Thus,
901 * data length is @len, not @len + %1.
902 */
903 ui->data_len = len;
904 inode->i_size = ubifs_inode(inode)->ui_size = len;
905
906 mutex_lock(&dir_ui->ui_mutex);
907 dir->i_size += sz_change;
908 dir_ui->ui_size = dir->i_size;
909 dir->i_mtime = dir->i_ctime = inode->i_ctime;
910 err = ubifs_jnl_update(c, dir, &dentry->d_name, inode, 0, 0);
911 if (err)
912 goto out_cancel;
913 mutex_unlock(&dir_ui->ui_mutex);
914
915 ubifs_release_budget(c, &req);
916 insert_inode_hash(inode);
917 d_instantiate(dentry, inode);
918 return 0;
919
920out_cancel:
921 dir->i_size -= sz_change;
922 dir_ui->ui_size = dir->i_size;
923 mutex_unlock(&dir_ui->ui_mutex);
924out_inode:
925 make_bad_inode(inode);
926 iput(inode);
927out_budg:
928 ubifs_release_budget(c, &req);
929 return err;
930}
931
932/**
933 * lock_3_inodes - lock three UBIFS inodes for rename.
934 * @inode1: first inode
935 * @inode2: second inode
936 * @inode3: third inode
937 *
938 * For 'ubifs_rename()', @inode1 may be the same as @inode2 whereas @inode3 may
939 * be null.
940 */
941static void lock_3_inodes(struct inode *inode1, struct inode *inode2,
942 struct inode *inode3)
943{
944 struct inode *i1, *i2, *i3;
945
946 if (!inode3) {
947 if (inode1 != inode2) {
948 lock_2_inodes(inode1, inode2);
949 return;
950 }
951 mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
952 return;
953 }
954
955 if (inode1 == inode2) {
956 lock_2_inodes(inode1, inode3);
957 return;
958 }
959
960 /* 3 different inodes */
961 if (inode1 < inode2) {
962 i3 = inode2;
963 if (inode1 < inode3) {
964 i1 = inode1;
965 i2 = inode3;
966 } else {
967 i1 = inode3;
968 i2 = inode1;
969 }
970 } else {
971 i3 = inode1;
972 if (inode2 < inode3) {
973 i1 = inode2;
974 i2 = inode3;
975 } else {
976 i1 = inode3;
977 i2 = inode2;
978 }
979 }
980 mutex_lock_nested(&ubifs_inode(i1)->ui_mutex, WB_MUTEX_1);
981 lock_2_inodes(i2, i3);
982}
983
984/**
985 * unlock_3_inodes - unlock three UBIFS inodes for rename.
986 * @inode1: first inode
987 * @inode2: second inode
988 * @inode3: third inode
989 */
990static void unlock_3_inodes(struct inode *inode1, struct inode *inode2,
991 struct inode *inode3)
992{
993 mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
994 if (inode1 != inode2)
995 mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
996 if (inode3)
997 mutex_unlock(&ubifs_inode(inode3)->ui_mutex);
998}
999
1000static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
1001 struct inode *new_dir, struct dentry *new_dentry)
1002{
1003 struct ubifs_info *c = old_dir->i_sb->s_fs_info;
1004 struct inode *old_inode = old_dentry->d_inode;
1005 struct inode *new_inode = new_dentry->d_inode;
1006 struct ubifs_inode *old_inode_ui = ubifs_inode(old_inode);
1007 int err, release, sync = 0, move = (new_dir != old_dir);
1008 int is_dir = S_ISDIR(old_inode->i_mode);
1009 int unlink = !!new_inode;
1010 int new_sz = CALC_DENT_SIZE(new_dentry->d_name.len);
1011 int old_sz = CALC_DENT_SIZE(old_dentry->d_name.len);
1012 struct ubifs_budget_req req = { .new_dent = 1, .mod_dent = 1,
1013 .dirtied_ino = 3 };
1014 struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
1015 .dirtied_ino_d = old_inode_ui->data_len };
1016 struct timespec time;
1017
1018 /*
1019 * Budget request settings: deletion direntry, new direntry, removing
1020 * the old inode, and changing old and new parent directory inodes.
1021 *
1022 * However, this operation also marks the target inode as dirty and
1023 * does not write it, so we allocate budget for the target inode
1024 * separately.
1025 */
1026
1027 dbg_gen("dent '%.*s' ino %lu in dir ino %lu to dent '%.*s' in "
1028 "dir ino %lu", old_dentry->d_name.len, old_dentry->d_name.name,
1029 old_inode->i_ino, old_dir->i_ino, new_dentry->d_name.len,
1030 new_dentry->d_name.name, new_dir->i_ino);
1031
1032 if (unlink && is_dir) {
1033 err = check_dir_empty(c, new_inode);
1034 if (err)
1035 return err;
1036 }
1037
1038 err = ubifs_budget_space(c, &req);
1039 if (err)
1040 return err;
1041 err = ubifs_budget_space(c, &ino_req);
1042 if (err) {
1043 ubifs_release_budget(c, &req);
1044 return err;
1045 }
1046
1047 lock_3_inodes(old_dir, new_dir, new_inode);
1048
1049 /*
1050 * Like most other Unix systems, set the @i_ctime for inodes on a
1051 * rename.
1052 */
1053 time = ubifs_current_time(old_dir);
1054 old_inode->i_ctime = time;
1055
1056 /* We must adjust parent link count when renaming directories */
1057 if (is_dir) {
1058 if (move) {
1059 /*
1060 * @old_dir loses a link because we are moving
1061 * @old_inode to a different directory.
1062 */
1063 drop_nlink(old_dir);
1064 /*
1065 * @new_dir only gains a link if we are not also
1066 * overwriting an existing directory.
1067 */
1068 if (!unlink)
1069 inc_nlink(new_dir);
1070 } else {
1071 /*
1072 * @old_inode is not moving to a different directory,
1073 * but @old_dir still loses a link if we are
1074 * overwriting an existing directory.
1075 */
1076 if (unlink)
1077 drop_nlink(old_dir);
1078 }
1079 }
1080
1081 old_dir->i_size -= old_sz;
1082 ubifs_inode(old_dir)->ui_size = old_dir->i_size;
1083 old_dir->i_mtime = old_dir->i_ctime = time;
1084 new_dir->i_mtime = new_dir->i_ctime = time;
1085
1086 /*
1087 * And finally, if we unlinked a direntry which happened to have the
1088 * same name as the moved direntry, we have to decrement @i_nlink of
1089 * the unlinked inode and change its ctime.
1090 */
1091 if (unlink) {
1092 /*
1093 * Directories cannot have hard-links, so if this is a
1094 * directory, decrement its @i_nlink twice because an empty
1095 * directory has @i_nlink 2.
1096 */
1097 if (is_dir)
1098 drop_nlink(new_inode);
1099 new_inode->i_ctime = time;
1100 drop_nlink(new_inode);
1101 } else {
1102 new_dir->i_size += new_sz;
1103 ubifs_inode(new_dir)->ui_size = new_dir->i_size;
1104 }
1105
1106 /*
1107 * Do not ask 'ubifs_jnl_rename()' to flush write-buffer if @old_inode
1108 * is dirty, because this will be done later on at the end of
1109 * 'ubifs_rename()'.
1110 */
1111 if (IS_SYNC(old_inode)) {
1112 sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
1113 if (unlink && IS_SYNC(new_inode))
1114 sync = 1;
1115 }
1116 err = ubifs_jnl_rename(c, old_dir, old_dentry, new_dir, new_dentry,
1117 sync);
1118 if (err)
1119 goto out_cancel;
1120
1121 unlock_3_inodes(old_dir, new_dir, new_inode);
1122 ubifs_release_budget(c, &req);
1123
1124 mutex_lock(&old_inode_ui->ui_mutex);
1125 release = old_inode_ui->dirty;
1126 mark_inode_dirty_sync(old_inode);
1127 mutex_unlock(&old_inode_ui->ui_mutex);
1128
1129 if (release)
1130 ubifs_release_budget(c, &ino_req);
1131 if (IS_SYNC(old_inode))
1132 err = old_inode->i_sb->s_op->write_inode(old_inode, 1);
1133 return err;
1134
1135out_cancel:
1136 if (unlink) {
1137 if (is_dir)
1138 inc_nlink(new_inode);
1139 inc_nlink(new_inode);
1140 } else {
1141 new_dir->i_size -= new_sz;
1142 ubifs_inode(new_dir)->ui_size = new_dir->i_size;
1143 }
1144 old_dir->i_size += old_sz;
1145 ubifs_inode(old_dir)->ui_size = old_dir->i_size;
1146 if (is_dir) {
1147 if (move) {
1148 inc_nlink(old_dir);
1149 if (!unlink)
1150 drop_nlink(new_dir);
1151 } else {
1152 if (unlink)
1153 inc_nlink(old_dir);
1154 }
1155 }
1156 unlock_3_inodes(old_dir, new_dir, new_inode);
1157 ubifs_release_budget(c, &ino_req);
1158 ubifs_release_budget(c, &req);
1159 return err;
1160}
1161
1162int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1163 struct kstat *stat)
1164{
1165 loff_t size;
1166 struct inode *inode = dentry->d_inode;
1167 struct ubifs_inode *ui = ubifs_inode(inode);
1168
1169 mutex_lock(&ui->ui_mutex);
1170 stat->dev = inode->i_sb->s_dev;
1171 stat->ino = inode->i_ino;
1172 stat->mode = inode->i_mode;
1173 stat->nlink = inode->i_nlink;
1174 stat->uid = inode->i_uid;
1175 stat->gid = inode->i_gid;
1176 stat->rdev = inode->i_rdev;
1177 stat->atime = inode->i_atime;
1178 stat->mtime = inode->i_mtime;
1179 stat->ctime = inode->i_ctime;
1180 stat->blksize = UBIFS_BLOCK_SIZE;
1181 stat->size = ui->ui_size;
1182
1183 /*
1184 * Unfortunately, the 'stat()' system call was designed for block
1185 * device based file systems, and it is not appropriate for UBIFS,
1186 * because UBIFS does not have notion of "block". For example, it is
1187 * difficult to tell how many block a directory takes - it actually
1188 * takes less than 300 bytes, but we have to round it to block size,
1189 * which introduces large mistake. This makes utilities like 'du' to
1190 * report completely senseless numbers. This is the reason why UBIFS
1191 * goes the same way as JFFS2 - it reports zero blocks for everything
1192 * but regular files, which makes more sense than reporting completely
1193 * wrong sizes.
1194 */
1195 if (S_ISREG(inode->i_mode)) {
1196 size = ui->xattr_size;
1197 size += stat->size;
1198 size = ALIGN(size, UBIFS_BLOCK_SIZE);
1199 /*
1200 * Note, user-space expects 512-byte blocks count irrespectively
1201 * of what was reported in @stat->size.
1202 */
1203 stat->blocks = size >> 9;
1204 } else
1205 stat->blocks = 0;
1206 mutex_unlock(&ui->ui_mutex);
1207 return 0;
1208}
1209
1210struct inode_operations ubifs_dir_inode_operations = {
1211 .lookup = ubifs_lookup,
1212 .create = ubifs_create,
1213 .link = ubifs_link,
1214 .symlink = ubifs_symlink,
1215 .unlink = ubifs_unlink,
1216 .mkdir = ubifs_mkdir,
1217 .rmdir = ubifs_rmdir,
1218 .mknod = ubifs_mknod,
1219 .rename = ubifs_rename,
1220 .setattr = ubifs_setattr,
1221 .getattr = ubifs_getattr,
1222#ifdef CONFIG_UBIFS_FS_XATTR
1223 .setxattr = ubifs_setxattr,
1224 .getxattr = ubifs_getxattr,
1225 .listxattr = ubifs_listxattr,
1226 .removexattr = ubifs_removexattr,
1227#endif
1228};
1229
1230struct file_operations ubifs_dir_operations = {
1231 .llseek = ubifs_dir_llseek,
1232 .release = ubifs_dir_release,
1233 .read = generic_read_dir,
1234 .readdir = ubifs_readdir,
1235 .fsync = ubifs_fsync,
1236 .unlocked_ioctl = ubifs_ioctl,
1237#ifdef CONFIG_COMPAT
1238 .compat_ioctl = ubifs_compat_ioctl,
1239#endif
1240};
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
new file mode 100644
index 00000000000..005a3b854d9
--- /dev/null
+++ b/fs/ubifs/file.c
@@ -0,0 +1,1275 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements VFS file and inode operations of regular files, device
25 * nodes and symlinks as well as address space operations.
26 *
27 * UBIFS uses 2 page flags: PG_private and PG_checked. PG_private is set if the
28 * page is dirty and is used for budgeting purposes - dirty pages should not be
29 * budgeted. The PG_checked flag is set if full budgeting is required for the
30 * page e.g., when it corresponds to a file hole or it is just beyond the file
31 * size. The budgeting is done in 'ubifs_write_begin()', because it is OK to
32 * fail in this function, and the budget is released in 'ubifs_write_end()'. So
33 * the PG_private and PG_checked flags carry the information about how the page
34 * was budgeted, to make it possible to release the budget properly.
35 *
36 * A thing to keep in mind: inode's 'i_mutex' is locked in most VFS operations
37 * we implement. However, this is not true for '->writepage()', which might be
38 * called with 'i_mutex' unlocked. For example, when pdflush is performing
39 * write-back, it calls 'writepage()' with unlocked 'i_mutex', although the
40 * inode has 'I_LOCK' flag in this case. At "normal" work-paths 'i_mutex' is
41 * locked in '->writepage', e.g. in "sys_write -> alloc_pages -> direct reclaim
42 * path'. So, in '->writepage()' we are only guaranteed that the page is
43 * locked.
44 *
45 * Similarly, 'i_mutex' does not have to be locked in readpage(), e.g.,
46 * readahead path does not have it locked ("sys_read -> generic_file_aio_read
47 * -> ondemand_readahead -> readpage"). In case of readahead, 'I_LOCK' flag is
48 * not set as well. However, UBIFS disables readahead.
49 *
50 * This, for example means that there might be 2 concurrent '->writepage()'
51 * calls for the same inode, but different inode dirty pages.
52 */
53
54#include "ubifs.h"
55#include <linux/mount.h>
56
57static int read_block(struct inode *inode, void *addr, unsigned int block,
58 struct ubifs_data_node *dn)
59{
60 struct ubifs_info *c = inode->i_sb->s_fs_info;
61 int err, len, out_len;
62 union ubifs_key key;
63 unsigned int dlen;
64
65 data_key_init(c, &key, inode->i_ino, block);
66 err = ubifs_tnc_lookup(c, &key, dn);
67 if (err) {
68 if (err == -ENOENT)
69 /* Not found, so it must be a hole */
70 memset(addr, 0, UBIFS_BLOCK_SIZE);
71 return err;
72 }
73
74 ubifs_assert(dn->ch.sqnum > ubifs_inode(inode)->creat_sqnum);
75
76 len = le32_to_cpu(dn->size);
77 if (len <= 0 || len > UBIFS_BLOCK_SIZE)
78 goto dump;
79
80 dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
81 out_len = UBIFS_BLOCK_SIZE;
82 err = ubifs_decompress(&dn->data, dlen, addr, &out_len,
83 le16_to_cpu(dn->compr_type));
84 if (err || len != out_len)
85 goto dump;
86
87 /*
88 * Data length can be less than a full block, even for blocks that are
89 * not the last in the file (e.g., as a result of making a hole and
90 * appending data). Ensure that the remainder is zeroed out.
91 */
92 if (len < UBIFS_BLOCK_SIZE)
93 memset(addr + len, 0, UBIFS_BLOCK_SIZE - len);
94
95 return 0;
96
97dump:
98 ubifs_err("bad data node (block %u, inode %lu)",
99 block, inode->i_ino);
100 dbg_dump_node(c, dn);
101 return -EINVAL;
102}
103
104static int do_readpage(struct page *page)
105{
106 void *addr;
107 int err = 0, i;
108 unsigned int block, beyond;
109 struct ubifs_data_node *dn;
110 struct inode *inode = page->mapping->host;
111 loff_t i_size = i_size_read(inode);
112
113 dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx",
114 inode->i_ino, page->index, i_size, page->flags);
115 ubifs_assert(!PageChecked(page));
116 ubifs_assert(!PagePrivate(page));
117
118 addr = kmap(page);
119
120 block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
121 beyond = (i_size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
122 if (block >= beyond) {
123 /* Reading beyond inode */
124 SetPageChecked(page);
125 memset(addr, 0, PAGE_CACHE_SIZE);
126 goto out;
127 }
128
129 dn = kmalloc(UBIFS_MAX_DATA_NODE_SZ, GFP_NOFS);
130 if (!dn) {
131 err = -ENOMEM;
132 goto error;
133 }
134
135 i = 0;
136 while (1) {
137 int ret;
138
139 if (block >= beyond) {
140 /* Reading beyond inode */
141 err = -ENOENT;
142 memset(addr, 0, UBIFS_BLOCK_SIZE);
143 } else {
144 ret = read_block(inode, addr, block, dn);
145 if (ret) {
146 err = ret;
147 if (err != -ENOENT)
148 break;
149 }
150 }
151 if (++i >= UBIFS_BLOCKS_PER_PAGE)
152 break;
153 block += 1;
154 addr += UBIFS_BLOCK_SIZE;
155 }
156 if (err) {
157 if (err == -ENOENT) {
158 /* Not found, so it must be a hole */
159 SetPageChecked(page);
160 dbg_gen("hole");
161 goto out_free;
162 }
163 ubifs_err("cannot read page %lu of inode %lu, error %d",
164 page->index, inode->i_ino, err);
165 goto error;
166 }
167
168out_free:
169 kfree(dn);
170out:
171 SetPageUptodate(page);
172 ClearPageError(page);
173 flush_dcache_page(page);
174 kunmap(page);
175 return 0;
176
177error:
178 kfree(dn);
179 ClearPageUptodate(page);
180 SetPageError(page);
181 flush_dcache_page(page);
182 kunmap(page);
183 return err;
184}
185
186/**
187 * release_new_page_budget - release budget of a new page.
188 * @c: UBIFS file-system description object
189 *
190 * This is a helper function which releases budget corresponding to the budget
191 * of one new page of data.
192 */
193static void release_new_page_budget(struct ubifs_info *c)
194{
195 struct ubifs_budget_req req = { .recalculate = 1, .new_page = 1 };
196
197 ubifs_release_budget(c, &req);
198}
199
200/**
201 * release_existing_page_budget - release budget of an existing page.
202 * @c: UBIFS file-system description object
203 *
204 * This is a helper function which releases budget corresponding to the budget
205 * of changing one one page of data which already exists on the flash media.
206 */
207static void release_existing_page_budget(struct ubifs_info *c)
208{
209 struct ubifs_budget_req req = { .dd_growth = c->page_budget};
210
211 ubifs_release_budget(c, &req);
212}
213
214static int write_begin_slow(struct address_space *mapping,
215 loff_t pos, unsigned len, struct page **pagep)
216{
217 struct inode *inode = mapping->host;
218 struct ubifs_info *c = inode->i_sb->s_fs_info;
219 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
220 struct ubifs_budget_req req = { .new_page = 1 };
221 int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
222 struct page *page;
223
224 dbg_gen("ino %lu, pos %llu, len %u, i_size %lld",
225 inode->i_ino, pos, len, inode->i_size);
226
227 /*
228 * At the slow path we have to budget before locking the page, because
229 * budgeting may force write-back, which would wait on locked pages and
230 * deadlock if we had the page locked. At this point we do not know
231 * anything about the page, so assume that this is a new page which is
232 * written to a hole. This corresponds to largest budget. Later the
233 * budget will be amended if this is not true.
234 */
235 if (appending)
236 /* We are appending data, budget for inode change */
237 req.dirtied_ino = 1;
238
239 err = ubifs_budget_space(c, &req);
240 if (unlikely(err))
241 return err;
242
243 page = __grab_cache_page(mapping, index);
244 if (unlikely(!page)) {
245 ubifs_release_budget(c, &req);
246 return -ENOMEM;
247 }
248
249 if (!PageUptodate(page)) {
250 if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
251 SetPageChecked(page);
252 else {
253 err = do_readpage(page);
254 if (err) {
255 unlock_page(page);
256 page_cache_release(page);
257 return err;
258 }
259 }
260
261 SetPageUptodate(page);
262 ClearPageError(page);
263 }
264
265 if (PagePrivate(page))
266 /*
267 * The page is dirty, which means it was budgeted twice:
268 * o first time the budget was allocated by the task which
269 * made the page dirty and set the PG_private flag;
270 * o and then we budgeted for it for the second time at the
271 * very beginning of this function.
272 *
273 * So what we have to do is to release the page budget we
274 * allocated.
275 */
276 release_new_page_budget(c);
277 else if (!PageChecked(page))
278 /*
279 * We are changing a page which already exists on the media.
280 * This means that changing the page does not make the amount
281 * of indexing information larger, and this part of the budget
282 * which we have already acquired may be released.
283 */
284 ubifs_convert_page_budget(c);
285
286 if (appending) {
287 struct ubifs_inode *ui = ubifs_inode(inode);
288
289 /*
290 * 'ubifs_write_end()' is optimized from the fast-path part of
291 * 'ubifs_write_begin()' and expects the @ui_mutex to be locked
292 * if data is appended.
293 */
294 mutex_lock(&ui->ui_mutex);
295 if (ui->dirty)
296 /*
297 * The inode is dirty already, so we may free the
298 * budget we allocated.
299 */
300 ubifs_release_dirty_inode_budget(c, ui);
301 }
302
303 *pagep = page;
304 return 0;
305}
306
307/**
308 * allocate_budget - allocate budget for 'ubifs_write_begin()'.
309 * @c: UBIFS file-system description object
310 * @page: page to allocate budget for
311 * @ui: UBIFS inode object the page belongs to
312 * @appending: non-zero if the page is appended
313 *
314 * This is a helper function for 'ubifs_write_begin()' which allocates budget
315 * for the operation. The budget is allocated differently depending on whether
316 * this is appending, whether the page is dirty or not, and so on. This
317 * function leaves the @ui->ui_mutex locked in case of appending. Returns zero
318 * in case of success and %-ENOSPC in case of failure.
319 */
320static int allocate_budget(struct ubifs_info *c, struct page *page,
321 struct ubifs_inode *ui, int appending)
322{
323 struct ubifs_budget_req req = { .fast = 1 };
324
325 if (PagePrivate(page)) {
326 if (!appending)
327 /*
328 * The page is dirty and we are not appending, which
329 * means no budget is needed at all.
330 */
331 return 0;
332
333 mutex_lock(&ui->ui_mutex);
334 if (ui->dirty)
335 /*
336 * The page is dirty and we are appending, so the inode
337 * has to be marked as dirty. However, it is already
338 * dirty, so we do not need any budget. We may return,
339 * but @ui->ui_mutex hast to be left locked because we
340 * should prevent write-back from flushing the inode
341 * and freeing the budget. The lock will be released in
342 * 'ubifs_write_end()'.
343 */
344 return 0;
345
346 /*
347 * The page is dirty, we are appending, the inode is clean, so
348 * we need to budget the inode change.
349 */
350 req.dirtied_ino = 1;
351 } else {
352 if (PageChecked(page))
353 /*
354 * The page corresponds to a hole and does not
355 * exist on the media. So changing it makes
356 * make the amount of indexing information
357 * larger, and we have to budget for a new
358 * page.
359 */
360 req.new_page = 1;
361 else
362 /*
363 * Not a hole, the change will not add any new
364 * indexing information, budget for page
365 * change.
366 */
367 req.dirtied_page = 1;
368
369 if (appending) {
370 mutex_lock(&ui->ui_mutex);
371 if (!ui->dirty)
372 /*
373 * The inode is clean but we will have to mark
374 * it as dirty because we are appending. This
375 * needs a budget.
376 */
377 req.dirtied_ino = 1;
378 }
379 }
380
381 return ubifs_budget_space(c, &req);
382}
383
384/*
385 * This function is called when a page of data is going to be written. Since
386 * the page of data will not necessarily go to the flash straight away, UBIFS
387 * has to reserve space on the media for it, which is done by means of
388 * budgeting.
389 *
390 * This is the hot-path of the file-system and we are trying to optimize it as
391 * much as possible. For this reasons it is split on 2 parts - slow and fast.
392 *
393 * There many budgeting cases:
394 * o a new page is appended - we have to budget for a new page and for
395 * changing the inode; however, if the inode is already dirty, there is
396 * no need to budget for it;
397 * o an existing clean page is changed - we have budget for it; if the page
398 * does not exist on the media (a hole), we have to budget for a new
399 * page; otherwise, we may budget for changing an existing page; the
400 * difference between these cases is that changing an existing page does
401 * not introduce anything new to the FS indexing information, so it does
402 * not grow, and smaller budget is acquired in this case;
403 * o an existing dirty page is changed - no need to budget at all, because
404 * the page budget has been acquired by earlier, when the page has been
405 * marked dirty.
406 *
407 * UBIFS budgeting sub-system may force write-back if it thinks there is no
408 * space to reserve. This imposes some locking restrictions and makes it
409 * impossible to take into account the above cases, and makes it impossible to
410 * optimize budgeting.
411 *
412 * The solution for this is that the fast path of 'ubifs_write_begin()' assumes
413 * there is a plenty of flash space and the budget will be acquired quickly,
414 * without forcing write-back. The slow path does not make this assumption.
415 */
416static int ubifs_write_begin(struct file *file, struct address_space *mapping,
417 loff_t pos, unsigned len, unsigned flags,
418 struct page **pagep, void **fsdata)
419{
420 struct inode *inode = mapping->host;
421 struct ubifs_info *c = inode->i_sb->s_fs_info;
422 struct ubifs_inode *ui = ubifs_inode(inode);
423 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
424 int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
425 struct page *page;
426
427
428 ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size);
429
430 if (unlikely(c->ro_media))
431 return -EROFS;
432
433 /* Try out the fast-path part first */
434 page = __grab_cache_page(mapping, index);
435 if (unlikely(!page))
436 return -ENOMEM;
437
438 if (!PageUptodate(page)) {
439 /* The page is not loaded from the flash */
440 if (!(pos & PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
441 /*
442 * We change whole page so no need to load it. But we
443 * have to set the @PG_checked flag to make the further
444 * code the page is new. This might be not true, but it
445 * is better to budget more that to read the page from
446 * the media.
447 */
448 SetPageChecked(page);
449 else {
450 err = do_readpage(page);
451 if (err) {
452 unlock_page(page);
453 page_cache_release(page);
454 return err;
455 }
456 }
457
458 SetPageUptodate(page);
459 ClearPageError(page);
460 }
461
462 err = allocate_budget(c, page, ui, appending);
463 if (unlikely(err)) {
464 ubifs_assert(err == -ENOSPC);
465 /*
466 * Budgeting failed which means it would have to force
467 * write-back but didn't, because we set the @fast flag in the
468 * request. Write-back cannot be done now, while we have the
469 * page locked, because it would deadlock. Unlock and free
470 * everything and fall-back to slow-path.
471 */
472 if (appending) {
473 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
474 mutex_unlock(&ui->ui_mutex);
475 }
476 unlock_page(page);
477 page_cache_release(page);
478
479 return write_begin_slow(mapping, pos, len, pagep);
480 }
481
482 /*
483 * Whee, we aquired budgeting quickly - without involving
484 * garbage-collection, committing or forceing write-back. We return
485 * with @ui->ui_mutex locked if we are appending pages, and unlocked
486 * otherwise. This is an optimization (slightly hacky though).
487 */
488 *pagep = page;
489 return 0;
490
491}
492
493/**
494 * cancel_budget - cancel budget.
495 * @c: UBIFS file-system description object
496 * @page: page to cancel budget for
497 * @ui: UBIFS inode object the page belongs to
498 * @appending: non-zero if the page is appended
499 *
500 * This is a helper function for a page write operation. It unlocks the
501 * @ui->ui_mutex in case of appending.
502 */
503static void cancel_budget(struct ubifs_info *c, struct page *page,
504 struct ubifs_inode *ui, int appending)
505{
506 if (appending) {
507 if (!ui->dirty)
508 ubifs_release_dirty_inode_budget(c, ui);
509 mutex_unlock(&ui->ui_mutex);
510 }
511 if (!PagePrivate(page)) {
512 if (PageChecked(page))
513 release_new_page_budget(c);
514 else
515 release_existing_page_budget(c);
516 }
517}
518
519static int ubifs_write_end(struct file *file, struct address_space *mapping,
520 loff_t pos, unsigned len, unsigned copied,
521 struct page *page, void *fsdata)
522{
523 struct inode *inode = mapping->host;
524 struct ubifs_inode *ui = ubifs_inode(inode);
525 struct ubifs_info *c = inode->i_sb->s_fs_info;
526 loff_t end_pos = pos + len;
527 int appending = !!(end_pos > inode->i_size);
528
529 dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
530 inode->i_ino, pos, page->index, len, copied, inode->i_size);
531
532 if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) {
533 /*
534 * VFS copied less data to the page that it intended and
535 * declared in its '->write_begin()' call via the @len
536 * argument. If the page was not up-to-date, and @len was
537 * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did
538 * not load it from the media (for optimization reasons). This
539 * means that part of the page contains garbage. So read the
540 * page now.
541 */
542 dbg_gen("copied %d instead of %d, read page and repeat",
543 copied, len);
544 cancel_budget(c, page, ui, appending);
545
546 /*
547 * Return 0 to force VFS to repeat the whole operation, or the
548 * error code if 'do_readpage()' failes.
549 */
550 copied = do_readpage(page);
551 goto out;
552 }
553
554 if (!PagePrivate(page)) {
555 SetPagePrivate(page);
556 atomic_long_inc(&c->dirty_pg_cnt);
557 __set_page_dirty_nobuffers(page);
558 }
559
560 if (appending) {
561 i_size_write(inode, end_pos);
562 ui->ui_size = end_pos;
563 /*
564 * Note, we do not set @I_DIRTY_PAGES (which means that the
565 * inode has dirty pages), this has been done in
566 * '__set_page_dirty_nobuffers()'.
567 */
568 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
569 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
570 mutex_unlock(&ui->ui_mutex);
571 }
572
573out:
574 unlock_page(page);
575 page_cache_release(page);
576 return copied;
577}
578
579static int ubifs_readpage(struct file *file, struct page *page)
580{
581 do_readpage(page);
582 unlock_page(page);
583 return 0;
584}
585
586static int do_writepage(struct page *page, int len)
587{
588 int err = 0, i, blen;
589 unsigned int block;
590 void *addr;
591 union ubifs_key key;
592 struct inode *inode = page->mapping->host;
593 struct ubifs_info *c = inode->i_sb->s_fs_info;
594
595#ifdef UBIFS_DEBUG
596 spin_lock(&ui->ui_lock);
597 ubifs_assert(page->index <= ui->synced_i_size << PAGE_CACHE_SIZE);
598 spin_unlock(&ui->ui_lock);
599#endif
600
601 /* Update radix tree tags */
602 set_page_writeback(page);
603
604 addr = kmap(page);
605 block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT;
606 i = 0;
607 while (len) {
608 blen = min_t(int, len, UBIFS_BLOCK_SIZE);
609 data_key_init(c, &key, inode->i_ino, block);
610 err = ubifs_jnl_write_data(c, inode, &key, addr, blen);
611 if (err)
612 break;
613 if (++i >= UBIFS_BLOCKS_PER_PAGE)
614 break;
615 block += 1;
616 addr += blen;
617 len -= blen;
618 }
619 if (err) {
620 SetPageError(page);
621 ubifs_err("cannot write page %lu of inode %lu, error %d",
622 page->index, inode->i_ino, err);
623 ubifs_ro_mode(c, err);
624 }
625
626 ubifs_assert(PagePrivate(page));
627 if (PageChecked(page))
628 release_new_page_budget(c);
629 else
630 release_existing_page_budget(c);
631
632 atomic_long_dec(&c->dirty_pg_cnt);
633 ClearPagePrivate(page);
634 ClearPageChecked(page);
635
636 kunmap(page);
637 unlock_page(page);
638 end_page_writeback(page);
639 return err;
640}
641
642/*
643 * When writing-back dirty inodes, VFS first writes-back pages belonging to the
644 * inode, then the inode itself. For UBIFS this may cause a problem. Consider a
645 * situation when a we have an inode with size 0, then a megabyte of data is
646 * appended to the inode, then write-back starts and flushes some amount of the
647 * dirty pages, the journal becomes full, commit happens and finishes, and then
648 * an unclean reboot happens. When the file system is mounted next time, the
649 * inode size would still be 0, but there would be many pages which are beyond
650 * the inode size, they would be indexed and consume flash space. Because the
651 * journal has been committed, the replay would not be able to detect this
652 * situation and correct the inode size. This means UBIFS would have to scan
653 * whole index and correct all inode sizes, which is long an unacceptable.
654 *
655 * To prevent situations like this, UBIFS writes pages back only if they are
656 * within last synchronized inode size, i.e. the the size which has been
657 * written to the flash media last time. Otherwise, UBIFS forces inode
658 * write-back, thus making sure the on-flash inode contains current inode size,
659 * and then keeps writing pages back.
660 *
661 * Some locking issues explanation. 'ubifs_writepage()' first is called with
662 * the page locked, and it locks @ui_mutex. However, write-back does take inode
663 * @i_mutex, which means other VFS operations may be run on this inode at the
664 * same time. And the problematic one is truncation to smaller size, from where
665 * we have to call 'vmtruncate()', which first changes @inode->i_size, then
666 * drops the truncated pages. And while dropping the pages, it takes the page
667 * lock. This means that 'do_truncation()' cannot call 'vmtruncate()' with
668 * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This
669 * means that @inode->i_size is changed while @ui_mutex is unlocked.
670 *
671 * But in 'ubifs_writepage()' we have to guarantee that we do not write beyond
672 * inode size. How do we do this if @inode->i_size may became smaller while we
673 * are in the middle of 'ubifs_writepage()'? The UBIFS solution is the
674 * @ui->ui_isize "shadow" field which UBIFS uses instead of @inode->i_size
675 * internally and updates it under @ui_mutex.
676 *
677 * Q: why we do not worry that if we race with truncation, we may end up with a
678 * situation when the inode is truncated while we are in the middle of
679 * 'do_writepage()', so we do write beyond inode size?
680 * A: If we are in the middle of 'do_writepage()', truncation would be locked
681 * on the page lock and it would not write the truncated inode node to the
682 * journal before we have finished.
683 */
684static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
685{
686 struct inode *inode = page->mapping->host;
687 struct ubifs_inode *ui = ubifs_inode(inode);
688 loff_t i_size = i_size_read(inode), synced_i_size;
689 pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
690 int err, len = i_size & (PAGE_CACHE_SIZE - 1);
691 void *kaddr;
692
693 dbg_gen("ino %lu, pg %lu, pg flags %#lx",
694 inode->i_ino, page->index, page->flags);
695 ubifs_assert(PagePrivate(page));
696
697 /* Is the page fully outside @i_size? (truncate in progress) */
698 if (page->index > end_index || (page->index == end_index && !len)) {
699 err = 0;
700 goto out_unlock;
701 }
702
703 spin_lock(&ui->ui_lock);
704 synced_i_size = ui->synced_i_size;
705 spin_unlock(&ui->ui_lock);
706
707 /* Is the page fully inside @i_size? */
708 if (page->index < end_index) {
709 if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
710 err = inode->i_sb->s_op->write_inode(inode, 1);
711 if (err)
712 goto out_unlock;
713 /*
714 * The inode has been written, but the write-buffer has
715 * not been synchronized, so in case of an unclean
716 * reboot we may end up with some pages beyond inode
717 * size, but they would be in the journal (because
718 * commit flushes write buffers) and recovery would deal
719 * with this.
720 */
721 }
722 return do_writepage(page, PAGE_CACHE_SIZE);
723 }
724
725 /*
726 * The page straddles @i_size. It must be zeroed out on each and every
727 * writepage invocation because it may be mmapped. "A file is mapped
728 * in multiples of the page size. For a file that is not a multiple of
729 * the page size, the remaining memory is zeroed when mapped, and
730 * writes to that region are not written out to the file."
731 */
732 kaddr = kmap_atomic(page, KM_USER0);
733 memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
734 flush_dcache_page(page);
735 kunmap_atomic(kaddr, KM_USER0);
736
737 if (i_size > synced_i_size) {
738 err = inode->i_sb->s_op->write_inode(inode, 1);
739 if (err)
740 goto out_unlock;
741 }
742
743 return do_writepage(page, len);
744
745out_unlock:
746 unlock_page(page);
747 return err;
748}
749
750/**
751 * do_attr_changes - change inode attributes.
752 * @inode: inode to change attributes for
753 * @attr: describes attributes to change
754 */
755static void do_attr_changes(struct inode *inode, const struct iattr *attr)
756{
757 if (attr->ia_valid & ATTR_UID)
758 inode->i_uid = attr->ia_uid;
759 if (attr->ia_valid & ATTR_GID)
760 inode->i_gid = attr->ia_gid;
761 if (attr->ia_valid & ATTR_ATIME)
762 inode->i_atime = timespec_trunc(attr->ia_atime,
763 inode->i_sb->s_time_gran);
764 if (attr->ia_valid & ATTR_MTIME)
765 inode->i_mtime = timespec_trunc(attr->ia_mtime,
766 inode->i_sb->s_time_gran);
767 if (attr->ia_valid & ATTR_CTIME)
768 inode->i_ctime = timespec_trunc(attr->ia_ctime,
769 inode->i_sb->s_time_gran);
770 if (attr->ia_valid & ATTR_MODE) {
771 umode_t mode = attr->ia_mode;
772
773 if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID))
774 mode &= ~S_ISGID;
775 inode->i_mode = mode;
776 }
777}
778
779/**
780 * do_truncation - truncate an inode.
781 * @c: UBIFS file-system description object
782 * @inode: inode to truncate
783 * @attr: inode attribute changes description
784 *
785 * This function implements VFS '->setattr()' call when the inode is truncated
786 * to a smaller size. Returns zero in case of success and a negative error code
787 * in case of failure.
788 */
789static int do_truncation(struct ubifs_info *c, struct inode *inode,
790 const struct iattr *attr)
791{
792 int err;
793 struct ubifs_budget_req req;
794 loff_t old_size = inode->i_size, new_size = attr->ia_size;
795 int offset = new_size & (UBIFS_BLOCK_SIZE - 1);
796 struct ubifs_inode *ui = ubifs_inode(inode);
797
798 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
799 memset(&req, 0, sizeof(struct ubifs_budget_req));
800
801 /*
802 * If this is truncation to a smaller size, and we do not truncate on a
803 * block boundary, budget for changing one data block, because the last
804 * block will be re-written.
805 */
806 if (new_size & (UBIFS_BLOCK_SIZE - 1))
807 req.dirtied_page = 1;
808
809 req.dirtied_ino = 1;
810 /* A funny way to budget for truncation node */
811 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
812 err = ubifs_budget_space(c, &req);
813 if (err)
814 return err;
815
816 err = vmtruncate(inode, new_size);
817 if (err)
818 goto out_budg;
819
820 if (offset) {
821 pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
822 struct page *page;
823
824 page = find_lock_page(inode->i_mapping, index);
825 if (page) {
826 if (PageDirty(page)) {
827 /*
828 * 'ubifs_jnl_truncate()' will try to truncate
829 * the last data node, but it contains
830 * out-of-date data because the page is dirty.
831 * Write the page now, so that
832 * 'ubifs_jnl_truncate()' will see an already
833 * truncated (and up to date) data node.
834 */
835 ubifs_assert(PagePrivate(page));
836
837 clear_page_dirty_for_io(page);
838 if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
839 offset = new_size &
840 (PAGE_CACHE_SIZE - 1);
841 err = do_writepage(page, offset);
842 page_cache_release(page);
843 if (err)
844 goto out_budg;
845 /*
846 * We could now tell 'ubifs_jnl_truncate()' not
847 * to read the last block.
848 */
849 } else {
850 /*
851 * We could 'kmap()' the page and pass the data
852 * to 'ubifs_jnl_truncate()' to save it from
853 * having to read it.
854 */
855 unlock_page(page);
856 page_cache_release(page);
857 }
858 }
859 }
860
861 mutex_lock(&ui->ui_mutex);
862 ui->ui_size = inode->i_size;
863 /* Truncation changes inode [mc]time */
864 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
865 /* The other attributes may be changed at the same time as well */
866 do_attr_changes(inode, attr);
867
868 err = ubifs_jnl_truncate(c, inode, old_size, new_size);
869 mutex_unlock(&ui->ui_mutex);
870out_budg:
871 ubifs_release_budget(c, &req);
872 return err;
873}
874
875/**
876 * do_setattr - change inode attributes.
877 * @c: UBIFS file-system description object
878 * @inode: inode to change attributes for
879 * @attr: inode attribute changes description
880 *
881 * This function implements VFS '->setattr()' call for all cases except
882 * truncations to smaller size. Returns zero in case of success and a negative
883 * error code in case of failure.
884 */
885static int do_setattr(struct ubifs_info *c, struct inode *inode,
886 const struct iattr *attr)
887{
888 int err, release;
889 loff_t new_size = attr->ia_size;
890 struct ubifs_inode *ui = ubifs_inode(inode);
891 struct ubifs_budget_req req = { .dirtied_ino = 1,
892 .dirtied_ino_d = ui->data_len };
893
894 err = ubifs_budget_space(c, &req);
895 if (err)
896 return err;
897
898 if (attr->ia_valid & ATTR_SIZE) {
899 dbg_gen("size %lld -> %lld", inode->i_size, new_size);
900 err = vmtruncate(inode, new_size);
901 if (err)
902 goto out;
903 }
904
905 mutex_lock(&ui->ui_mutex);
906 if (attr->ia_valid & ATTR_SIZE) {
907 /* Truncation changes inode [mc]time */
908 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
909 /* 'vmtruncate()' changed @i_size, update @ui_size */
910 ui->ui_size = inode->i_size;
911 }
912
913 do_attr_changes(inode, attr);
914
915 release = ui->dirty;
916 if (attr->ia_valid & ATTR_SIZE)
917 /*
918 * Inode length changed, so we have to make sure
919 * @I_DIRTY_DATASYNC is set.
920 */
921 __mark_inode_dirty(inode, I_DIRTY_SYNC | I_DIRTY_DATASYNC);
922 else
923 mark_inode_dirty_sync(inode);
924 mutex_unlock(&ui->ui_mutex);
925
926 if (release)
927 ubifs_release_budget(c, &req);
928 if (IS_SYNC(inode))
929 err = inode->i_sb->s_op->write_inode(inode, 1);
930 return err;
931
932out:
933 ubifs_release_budget(c, &req);
934 return err;
935}
936
937int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
938{
939 int err;
940 struct inode *inode = dentry->d_inode;
941 struct ubifs_info *c = inode->i_sb->s_fs_info;
942
943 dbg_gen("ino %lu, ia_valid %#x", inode->i_ino, attr->ia_valid);
944 err = inode_change_ok(inode, attr);
945 if (err)
946 return err;
947
948 err = dbg_check_synced_i_size(inode);
949 if (err)
950 return err;
951
952 if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size < inode->i_size)
953 /* Truncation to a smaller size */
954 err = do_truncation(c, inode, attr);
955 else
956 err = do_setattr(c, inode, attr);
957
958 return err;
959}
960
961static void ubifs_invalidatepage(struct page *page, unsigned long offset)
962{
963 struct inode *inode = page->mapping->host;
964 struct ubifs_info *c = inode->i_sb->s_fs_info;
965
966 ubifs_assert(PagePrivate(page));
967 if (offset)
968 /* Partial page remains dirty */
969 return;
970
971 if (PageChecked(page))
972 release_new_page_budget(c);
973 else
974 release_existing_page_budget(c);
975
976 atomic_long_dec(&c->dirty_pg_cnt);
977 ClearPagePrivate(page);
978 ClearPageChecked(page);
979}
980
981static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd)
982{
983 struct ubifs_inode *ui = ubifs_inode(dentry->d_inode);
984
985 nd_set_link(nd, ui->data);
986 return NULL;
987}
988
989int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync)
990{
991 struct inode *inode = dentry->d_inode;
992 struct ubifs_info *c = inode->i_sb->s_fs_info;
993 int err;
994
995 dbg_gen("syncing inode %lu", inode->i_ino);
996
997 /*
998 * VFS has already synchronized dirty pages for this inode. Synchronize
999 * the inode unless this is a 'datasync()' call.
1000 */
1001 if (!datasync || (inode->i_state & I_DIRTY_DATASYNC)) {
1002 err = inode->i_sb->s_op->write_inode(inode, 1);
1003 if (err)
1004 return err;
1005 }
1006
1007 /*
1008 * Nodes related to this inode may still sit in a write-buffer. Flush
1009 * them.
1010 */
1011 err = ubifs_sync_wbufs_by_inode(c, inode);
1012 if (err)
1013 return err;
1014
1015 return 0;
1016}
1017
1018/**
1019 * mctime_update_needed - check if mtime or ctime update is needed.
1020 * @inode: the inode to do the check for
1021 * @now: current time
1022 *
1023 * This helper function checks if the inode mtime/ctime should be updated or
1024 * not. If current values of the time-stamps are within the UBIFS inode time
1025 * granularity, they are not updated. This is an optimization.
1026 */
1027static inline int mctime_update_needed(const struct inode *inode,
1028 const struct timespec *now)
1029{
1030 if (!timespec_equal(&inode->i_mtime, now) ||
1031 !timespec_equal(&inode->i_ctime, now))
1032 return 1;
1033 return 0;
1034}
1035
1036/**
1037 * update_ctime - update mtime and ctime of an inode.
1038 * @c: UBIFS file-system description object
1039 * @inode: inode to update
1040 *
1041 * This function updates mtime and ctime of the inode if it is not equivalent to
1042 * current time. Returns zero in case of success and a negative error code in
1043 * case of failure.
1044 */
1045static int update_mctime(struct ubifs_info *c, struct inode *inode)
1046{
1047 struct timespec now = ubifs_current_time(inode);
1048 struct ubifs_inode *ui = ubifs_inode(inode);
1049
1050 if (mctime_update_needed(inode, &now)) {
1051 int err, release;
1052 struct ubifs_budget_req req = { .dirtied_ino = 1,
1053 .dirtied_ino_d = ui->data_len };
1054
1055 err = ubifs_budget_space(c, &req);
1056 if (err)
1057 return err;
1058
1059 mutex_lock(&ui->ui_mutex);
1060 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
1061 release = ui->dirty;
1062 mark_inode_dirty_sync(inode);
1063 mutex_unlock(&ui->ui_mutex);
1064 if (release)
1065 ubifs_release_budget(c, &req);
1066 }
1067
1068 return 0;
1069}
1070
1071static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov,
1072 unsigned long nr_segs, loff_t pos)
1073{
1074 int err;
1075 ssize_t ret;
1076 struct inode *inode = iocb->ki_filp->f_mapping->host;
1077 struct ubifs_info *c = inode->i_sb->s_fs_info;
1078
1079 err = update_mctime(c, inode);
1080 if (err)
1081 return err;
1082
1083 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
1084 if (ret < 0)
1085 return ret;
1086
1087 if (ret > 0 && (IS_SYNC(inode) || iocb->ki_filp->f_flags & O_SYNC)) {
1088 err = ubifs_sync_wbufs_by_inode(c, inode);
1089 if (err)
1090 return err;
1091 }
1092
1093 return ret;
1094}
1095
1096static int ubifs_set_page_dirty(struct page *page)
1097{
1098 int ret;
1099
1100 ret = __set_page_dirty_nobuffers(page);
1101 /*
1102 * An attempt to dirty a page without budgeting for it - should not
1103 * happen.
1104 */
1105 ubifs_assert(ret == 0);
1106 return ret;
1107}
1108
1109static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1110{
1111 /*
1112 * An attempt to release a dirty page without budgeting for it - should
1113 * not happen.
1114 */
1115 if (PageWriteback(page))
1116 return 0;
1117 ubifs_assert(PagePrivate(page));
1118 ubifs_assert(0);
1119 ClearPagePrivate(page);
1120 ClearPageChecked(page);
1121 return 1;
1122}
1123
1124/*
1125 * mmap()d file has taken write protection fault and is being made
1126 * writable. UBIFS must ensure page is budgeted for.
1127 */
1128static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1129{
1130 struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1131 struct ubifs_info *c = inode->i_sb->s_fs_info;
1132 struct timespec now = ubifs_current_time(inode);
1133 struct ubifs_budget_req req = { .new_page = 1 };
1134 int err, update_time;
1135
1136 dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index,
1137 i_size_read(inode));
1138 ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY));
1139
1140 if (unlikely(c->ro_media))
1141 return -EROFS;
1142
1143 /*
1144 * We have not locked @page so far so we may budget for changing the
1145 * page. Note, we cannot do this after we locked the page, because
1146 * budgeting may cause write-back which would cause deadlock.
1147 *
1148 * At the moment we do not know whether the page is dirty or not, so we
1149 * assume that it is not and budget for a new page. We could look at
1150 * the @PG_private flag and figure this out, but we may race with write
1151 * back and the page state may change by the time we lock it, so this
1152 * would need additional care. We do not bother with this at the
1153 * moment, although it might be good idea to do. Instead, we allocate
1154 * budget for a new page and amend it later on if the page was in fact
1155 * dirty.
1156 *
1157 * The budgeting-related logic of this function is similar to what we
1158 * do in 'ubifs_write_begin()' and 'ubifs_write_end()'. Glance there
1159 * for more comments.
1160 */
1161 update_time = mctime_update_needed(inode, &now);
1162 if (update_time)
1163 /*
1164 * We have to change inode time stamp which requires extra
1165 * budgeting.
1166 */
1167 req.dirtied_ino = 1;
1168
1169 err = ubifs_budget_space(c, &req);
1170 if (unlikely(err)) {
1171 if (err == -ENOSPC)
1172 ubifs_warn("out of space for mmapped file "
1173 "(inode number %lu)", inode->i_ino);
1174 return err;
1175 }
1176
1177 lock_page(page);
1178 if (unlikely(page->mapping != inode->i_mapping ||
1179 page_offset(page) > i_size_read(inode))) {
1180 /* Page got truncated out from underneath us */
1181 err = -EINVAL;
1182 goto out_unlock;
1183 }
1184
1185 if (PagePrivate(page))
1186 release_new_page_budget(c);
1187 else {
1188 if (!PageChecked(page))
1189 ubifs_convert_page_budget(c);
1190 SetPagePrivate(page);
1191 atomic_long_inc(&c->dirty_pg_cnt);
1192 __set_page_dirty_nobuffers(page);
1193 }
1194
1195 if (update_time) {
1196 int release;
1197 struct ubifs_inode *ui = ubifs_inode(inode);
1198
1199 mutex_lock(&ui->ui_mutex);
1200 inode->i_mtime = inode->i_ctime = ubifs_current_time(inode);
1201 release = ui->dirty;
1202 mark_inode_dirty_sync(inode);
1203 mutex_unlock(&ui->ui_mutex);
1204 if (release)
1205 ubifs_release_dirty_inode_budget(c, ui);
1206 }
1207
1208 unlock_page(page);
1209 return 0;
1210
1211out_unlock:
1212 unlock_page(page);
1213 ubifs_release_budget(c, &req);
1214 return err;
1215}
1216
1217static struct vm_operations_struct ubifs_file_vm_ops = {
1218 .fault = filemap_fault,
1219 .page_mkwrite = ubifs_vm_page_mkwrite,
1220};
1221
1222static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1223{
1224 int err;
1225
1226 /* 'generic_file_mmap()' takes care of NOMMU case */
1227 err = generic_file_mmap(file, vma);
1228 if (err)
1229 return err;
1230 vma->vm_ops = &ubifs_file_vm_ops;
1231 return 0;
1232}
1233
1234struct address_space_operations ubifs_file_address_operations = {
1235 .readpage = ubifs_readpage,
1236 .writepage = ubifs_writepage,
1237 .write_begin = ubifs_write_begin,
1238 .write_end = ubifs_write_end,
1239 .invalidatepage = ubifs_invalidatepage,
1240 .set_page_dirty = ubifs_set_page_dirty,
1241 .releasepage = ubifs_releasepage,
1242};
1243
1244struct inode_operations ubifs_file_inode_operations = {
1245 .setattr = ubifs_setattr,
1246 .getattr = ubifs_getattr,
1247#ifdef CONFIG_UBIFS_FS_XATTR
1248 .setxattr = ubifs_setxattr,
1249 .getxattr = ubifs_getxattr,
1250 .listxattr = ubifs_listxattr,
1251 .removexattr = ubifs_removexattr,
1252#endif
1253};
1254
1255struct inode_operations ubifs_symlink_inode_operations = {
1256 .readlink = generic_readlink,
1257 .follow_link = ubifs_follow_link,
1258 .setattr = ubifs_setattr,
1259 .getattr = ubifs_getattr,
1260};
1261
1262struct file_operations ubifs_file_operations = {
1263 .llseek = generic_file_llseek,
1264 .read = do_sync_read,
1265 .write = do_sync_write,
1266 .aio_read = generic_file_aio_read,
1267 .aio_write = ubifs_aio_write,
1268 .mmap = ubifs_file_mmap,
1269 .fsync = ubifs_fsync,
1270 .unlocked_ioctl = ubifs_ioctl,
1271 .splice_read = generic_file_splice_read,
1272#ifdef CONFIG_COMPAT
1273 .compat_ioctl = ubifs_compat_ioctl,
1274#endif
1275};
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
new file mode 100644
index 00000000000..10394c54836
--- /dev/null
+++ b/fs/ubifs/find.c
@@ -0,0 +1,975 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file contains functions for finding LEBs for various purposes e.g.
25 * garbage collection. In general, lprops category heaps and lists are used
26 * for fast access, falling back on scanning the LPT as a last resort.
27 */
28
29#include <linux/sort.h>
30#include "ubifs.h"
31
32/**
33 * struct scan_data - data provided to scan callback functions
34 * @min_space: minimum number of bytes for which to scan
35 * @pick_free: whether it is OK to scan for empty LEBs
36 * @lnum: LEB number found is returned here
37 * @exclude_index: whether to exclude index LEBs
38 */
39struct scan_data {
40 int min_space;
41 int pick_free;
42 int lnum;
43 int exclude_index;
44};
45
46/**
47 * valuable - determine whether LEB properties are valuable.
48 * @c: the UBIFS file-system description object
49 * @lprops: LEB properties
50 *
51 * This function return %1 if the LEB properties should be added to the LEB
52 * properties tree in memory. Otherwise %0 is returned.
53 */
54static int valuable(struct ubifs_info *c, const struct ubifs_lprops *lprops)
55{
56 int n, cat = lprops->flags & LPROPS_CAT_MASK;
57 struct ubifs_lpt_heap *heap;
58
59 switch (cat) {
60 case LPROPS_DIRTY:
61 case LPROPS_DIRTY_IDX:
62 case LPROPS_FREE:
63 heap = &c->lpt_heap[cat - 1];
64 if (heap->cnt < heap->max_cnt)
65 return 1;
66 if (lprops->free + lprops->dirty >= c->dark_wm)
67 return 1;
68 return 0;
69 case LPROPS_EMPTY:
70 n = c->lst.empty_lebs + c->freeable_cnt -
71 c->lst.taken_empty_lebs;
72 if (n < c->lsave_cnt)
73 return 1;
74 return 0;
75 case LPROPS_FREEABLE:
76 return 1;
77 case LPROPS_FRDI_IDX:
78 return 1;
79 }
80 return 0;
81}
82
83/**
84 * scan_for_dirty_cb - dirty space scan callback.
85 * @c: the UBIFS file-system description object
86 * @lprops: LEB properties to scan
87 * @in_tree: whether the LEB properties are in main memory
88 * @data: information passed to and from the caller of the scan
89 *
90 * This function returns a code that indicates whether the scan should continue
91 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
92 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
93 * (%LPT_SCAN_STOP).
94 */
95static int scan_for_dirty_cb(struct ubifs_info *c,
96 const struct ubifs_lprops *lprops, int in_tree,
97 struct scan_data *data)
98{
99 int ret = LPT_SCAN_CONTINUE;
100
101 /* Exclude LEBs that are currently in use */
102 if (lprops->flags & LPROPS_TAKEN)
103 return LPT_SCAN_CONTINUE;
104 /* Determine whether to add these LEB properties to the tree */
105 if (!in_tree && valuable(c, lprops))
106 ret |= LPT_SCAN_ADD;
107 /* Exclude LEBs with too little space */
108 if (lprops->free + lprops->dirty < data->min_space)
109 return ret;
110 /* If specified, exclude index LEBs */
111 if (data->exclude_index && lprops->flags & LPROPS_INDEX)
112 return ret;
113 /* If specified, exclude empty or freeable LEBs */
114 if (lprops->free + lprops->dirty == c->leb_size) {
115 if (!data->pick_free)
116 return ret;
117 /* Exclude LEBs with too little dirty space (unless it is empty) */
118 } else if (lprops->dirty < c->dead_wm)
119 return ret;
120 /* Finally we found space */
121 data->lnum = lprops->lnum;
122 return LPT_SCAN_ADD | LPT_SCAN_STOP;
123}
124
125/**
126 * scan_for_dirty - find a data LEB with free space.
127 * @c: the UBIFS file-system description object
128 * @min_space: minimum amount free plus dirty space the returned LEB has to
129 * have
130 * @pick_free: if it is OK to return a free or freeable LEB
131 * @exclude_index: whether to exclude index LEBs
132 *
133 * This function returns a pointer to the LEB properties found or a negative
134 * error code.
135 */
136static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
137 int min_space, int pick_free,
138 int exclude_index)
139{
140 const struct ubifs_lprops *lprops;
141 struct ubifs_lpt_heap *heap;
142 struct scan_data data;
143 int err, i;
144
145 /* There may be an LEB with enough dirty space on the free heap */
146 heap = &c->lpt_heap[LPROPS_FREE - 1];
147 for (i = 0; i < heap->cnt; i++) {
148 lprops = heap->arr[i];
149 if (lprops->free + lprops->dirty < min_space)
150 continue;
151 if (lprops->dirty < c->dead_wm)
152 continue;
153 return lprops;
154 }
155 /*
156 * A LEB may have fallen off of the bottom of the dirty heap, and ended
157 * up as uncategorized even though it has enough dirty space for us now,
158 * so check the uncategorized list. N.B. neither empty nor freeable LEBs
159 * can end up as uncategorized because they are kept on lists not
160 * finite-sized heaps.
161 */
162 list_for_each_entry(lprops, &c->uncat_list, list) {
163 if (lprops->flags & LPROPS_TAKEN)
164 continue;
165 if (lprops->free + lprops->dirty < min_space)
166 continue;
167 if (exclude_index && (lprops->flags & LPROPS_INDEX))
168 continue;
169 if (lprops->dirty < c->dead_wm)
170 continue;
171 return lprops;
172 }
173 /* We have looked everywhere in main memory, now scan the flash */
174 if (c->pnodes_have >= c->pnode_cnt)
175 /* All pnodes are in memory, so skip scan */
176 return ERR_PTR(-ENOSPC);
177 data.min_space = min_space;
178 data.pick_free = pick_free;
179 data.lnum = -1;
180 data.exclude_index = exclude_index;
181 err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
182 (ubifs_lpt_scan_callback)scan_for_dirty_cb,
183 &data);
184 if (err)
185 return ERR_PTR(err);
186 ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
187 c->lscan_lnum = data.lnum;
188 lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
189 if (IS_ERR(lprops))
190 return lprops;
191 ubifs_assert(lprops->lnum == data.lnum);
192 ubifs_assert(lprops->free + lprops->dirty >= min_space);
193 ubifs_assert(lprops->dirty >= c->dead_wm ||
194 (pick_free &&
195 lprops->free + lprops->dirty == c->leb_size));
196 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
197 ubifs_assert(!exclude_index || !(lprops->flags & LPROPS_INDEX));
198 return lprops;
199}
200
201/**
202 * ubifs_find_dirty_leb - find a dirty LEB for the Garbage Collector.
203 * @c: the UBIFS file-system description object
204 * @ret_lp: LEB properties are returned here on exit
205 * @min_space: minimum amount free plus dirty space the returned LEB has to
206 * have
207 * @pick_free: controls whether it is OK to pick empty or index LEBs
208 *
209 * This function tries to find a dirty logical eraseblock which has at least
210 * @min_space free and dirty space. It prefers to take an LEB from the dirty or
211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty
212 * or do not have an LEB which satisfies the @min_space criteria.
213 *
214 * Note:
215 * o LEBs which have less than dead watermark of dirty space are never picked
216 * by this function;
217 *
218 * Returns zero and the LEB properties of
219 * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
220 * negative error code in case of other failures. The returned LEB is marked as
221 * "taken".
222 *
223 * The additional @pick_free argument controls if this function has to return a
224 * free or freeable LEB if one is present. For example, GC must to set it to %1,
225 * when called from the journal space reservation function, because the
226 * appearance of free space may coincide with the loss of enough dirty space
227 * for GC to succeed anyway.
228 *
229 * In contrast, if the Garbage Collector is called from budgeting, it should
230 * just make free space, not return LEBs which are already free or freeable.
231 *
232 * In addition @pick_free is set to %2 by the recovery process in order to
233 * recover gc_lnum in which case an index LEB must not be returned.
234 */
235int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
236 int min_space, int pick_free)
237{
238 int err = 0, sum, exclude_index = pick_free == 2 ? 1 : 0;
239 const struct ubifs_lprops *lp = NULL, *idx_lp = NULL;
240 struct ubifs_lpt_heap *heap, *idx_heap;
241
242 ubifs_get_lprops(c);
243
244 if (pick_free) {
245 int lebs, rsvd_idx_lebs = 0;
246
247 spin_lock(&c->space_lock);
248 lebs = c->lst.empty_lebs;
249 lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
250
251 /*
252 * Note, the index may consume more LEBs than have been reserved
253 * for it. It is OK because it might be consolidated by GC.
254 * But if the index takes fewer LEBs than it is reserved for it,
255 * this function must avoid picking those reserved LEBs.
256 */
257 if (c->min_idx_lebs >= c->lst.idx_lebs) {
258 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
259 exclude_index = 1;
260 }
261 spin_unlock(&c->space_lock);
262
263 /* Check if there are enough free LEBs for the index */
264 if (rsvd_idx_lebs < lebs) {
265 /* OK, try to find an empty LEB */
266 lp = ubifs_fast_find_empty(c);
267 if (lp)
268 goto found;
269
270 /* Or a freeable LEB */
271 lp = ubifs_fast_find_freeable(c);
272 if (lp)
273 goto found;
274 } else
275 /*
276 * We cannot pick free/freeable LEBs in the below code.
277 */
278 pick_free = 0;
279 } else {
280 spin_lock(&c->space_lock);
281 exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs);
282 spin_unlock(&c->space_lock);
283 }
284
285 /* Look on the dirty and dirty index heaps */
286 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
287 idx_heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
288
289 if (idx_heap->cnt && !exclude_index) {
290 idx_lp = idx_heap->arr[0];
291 sum = idx_lp->free + idx_lp->dirty;
292 /*
293 * Since we reserve twice as more space for the index than it
294 * actually takes, it does not make sense to pick indexing LEBs
295 * with less than half LEB of dirty space.
296 */
297 if (sum < min_space || sum < c->half_leb_size)
298 idx_lp = NULL;
299 }
300
301 if (heap->cnt) {
302 lp = heap->arr[0];
303 if (lp->dirty + lp->free < min_space)
304 lp = NULL;
305 }
306
307 /* Pick the LEB with most space */
308 if (idx_lp && lp) {
309 if (idx_lp->free + idx_lp->dirty >= lp->free + lp->dirty)
310 lp = idx_lp;
311 } else if (idx_lp && !lp)
312 lp = idx_lp;
313
314 if (lp) {
315 ubifs_assert(lp->dirty >= c->dead_wm);
316 goto found;
317 }
318
319 /* Did not find a dirty LEB on the dirty heaps, have to scan */
320 dbg_find("scanning LPT for a dirty LEB");
321 lp = scan_for_dirty(c, min_space, pick_free, exclude_index);
322 if (IS_ERR(lp)) {
323 err = PTR_ERR(lp);
324 goto out;
325 }
326 ubifs_assert(lp->dirty >= c->dead_wm ||
327 (pick_free && lp->free + lp->dirty == c->leb_size));
328
329found:
330 dbg_find("found LEB %d, free %d, dirty %d, flags %#x",
331 lp->lnum, lp->free, lp->dirty, lp->flags);
332
333 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
334 lp->flags | LPROPS_TAKEN, 0);
335 if (IS_ERR(lp)) {
336 err = PTR_ERR(lp);
337 goto out;
338 }
339
340 memcpy(ret_lp, lp, sizeof(struct ubifs_lprops));
341
342out:
343 ubifs_release_lprops(c);
344 return err;
345}
346
347/**
348 * scan_for_free_cb - free space scan callback.
349 * @c: the UBIFS file-system description object
350 * @lprops: LEB properties to scan
351 * @in_tree: whether the LEB properties are in main memory
352 * @data: information passed to and from the caller of the scan
353 *
354 * This function returns a code that indicates whether the scan should continue
355 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
356 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
357 * (%LPT_SCAN_STOP).
358 */
359static int scan_for_free_cb(struct ubifs_info *c,
360 const struct ubifs_lprops *lprops, int in_tree,
361 struct scan_data *data)
362{
363 int ret = LPT_SCAN_CONTINUE;
364
365 /* Exclude LEBs that are currently in use */
366 if (lprops->flags & LPROPS_TAKEN)
367 return LPT_SCAN_CONTINUE;
368 /* Determine whether to add these LEB properties to the tree */
369 if (!in_tree && valuable(c, lprops))
370 ret |= LPT_SCAN_ADD;
371 /* Exclude index LEBs */
372 if (lprops->flags & LPROPS_INDEX)
373 return ret;
374 /* Exclude LEBs with too little space */
375 if (lprops->free < data->min_space)
376 return ret;
377 /* If specified, exclude empty LEBs */
378 if (!data->pick_free && lprops->free == c->leb_size)
379 return ret;
380 /*
381 * LEBs that have only free and dirty space must not be allocated
382 * because they may have been unmapped already or they may have data
383 * that is obsolete only because of nodes that are still sitting in a
384 * wbuf.
385 */
386 if (lprops->free + lprops->dirty == c->leb_size && lprops->dirty > 0)
387 return ret;
388 /* Finally we found space */
389 data->lnum = lprops->lnum;
390 return LPT_SCAN_ADD | LPT_SCAN_STOP;
391}
392
393/**
394 * do_find_free_space - find a data LEB with free space.
395 * @c: the UBIFS file-system description object
396 * @min_space: minimum amount of free space required
397 * @pick_free: whether it is OK to scan for empty LEBs
398 * @squeeze: whether to try to find space in a non-empty LEB first
399 *
400 * This function returns a pointer to the LEB properties found or a negative
401 * error code.
402 */
403static
404const struct ubifs_lprops *do_find_free_space(struct ubifs_info *c,
405 int min_space, int pick_free,
406 int squeeze)
407{
408 const struct ubifs_lprops *lprops;
409 struct ubifs_lpt_heap *heap;
410 struct scan_data data;
411 int err, i;
412
413 if (squeeze) {
414 lprops = ubifs_fast_find_free(c);
415 if (lprops && lprops->free >= min_space)
416 return lprops;
417 }
418 if (pick_free) {
419 lprops = ubifs_fast_find_empty(c);
420 if (lprops)
421 return lprops;
422 }
423 if (!squeeze) {
424 lprops = ubifs_fast_find_free(c);
425 if (lprops && lprops->free >= min_space)
426 return lprops;
427 }
428 /* There may be an LEB with enough free space on the dirty heap */
429 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
430 for (i = 0; i < heap->cnt; i++) {
431 lprops = heap->arr[i];
432 if (lprops->free >= min_space)
433 return lprops;
434 }
435 /*
436 * A LEB may have fallen off of the bottom of the free heap, and ended
437 * up as uncategorized even though it has enough free space for us now,
438 * so check the uncategorized list. N.B. neither empty nor freeable LEBs
439 * can end up as uncategorized because they are kept on lists not
440 * finite-sized heaps.
441 */
442 list_for_each_entry(lprops, &c->uncat_list, list) {
443 if (lprops->flags & LPROPS_TAKEN)
444 continue;
445 if (lprops->flags & LPROPS_INDEX)
446 continue;
447 if (lprops->free >= min_space)
448 return lprops;
449 }
450 /* We have looked everywhere in main memory, now scan the flash */
451 if (c->pnodes_have >= c->pnode_cnt)
452 /* All pnodes are in memory, so skip scan */
453 return ERR_PTR(-ENOSPC);
454 data.min_space = min_space;
455 data.pick_free = pick_free;
456 data.lnum = -1;
457 err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
458 (ubifs_lpt_scan_callback)scan_for_free_cb,
459 &data);
460 if (err)
461 return ERR_PTR(err);
462 ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
463 c->lscan_lnum = data.lnum;
464 lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
465 if (IS_ERR(lprops))
466 return lprops;
467 ubifs_assert(lprops->lnum == data.lnum);
468 ubifs_assert(lprops->free >= min_space);
469 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
470 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
471 return lprops;
472}
473
474/**
475 * ubifs_find_free_space - find a data LEB with free space.
476 * @c: the UBIFS file-system description object
477 * @min_space: minimum amount of required free space
478 * @free: contains amount of free space in the LEB on exit
479 * @squeeze: whether to try to find space in a non-empty LEB first
480 *
481 * This function looks for an LEB with at least @min_space bytes of free space.
482 * It tries to find an empty LEB if possible. If no empty LEBs are available,
483 * this function searches for a non-empty data LEB. The returned LEB is marked
484 * as "taken".
485 *
486 * This function returns found LEB number in case of success, %-ENOSPC if it
487 * failed to find a LEB with @min_space bytes of free space and other a negative
488 * error codes in case of failure.
489 */
490int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
491 int squeeze)
492{
493 const struct ubifs_lprops *lprops;
494 int lebs, rsvd_idx_lebs, pick_free = 0, err, lnum, flags;
495
496 dbg_find("min_space %d", min_space);
497 ubifs_get_lprops(c);
498
499 /* Check if there are enough empty LEBs for commit */
500 spin_lock(&c->space_lock);
501 if (c->min_idx_lebs > c->lst.idx_lebs)
502 rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs;
503 else
504 rsvd_idx_lebs = 0;
505 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
506 c->lst.taken_empty_lebs;
507 ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs);
508 if (rsvd_idx_lebs < lebs)
509 /*
510 * OK to allocate an empty LEB, but we still don't want to go
511 * looking for one if there aren't any.
512 */
513 if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
514 pick_free = 1;
515 /*
516 * Because we release the space lock, we must account
517 * for this allocation here. After the LEB properties
518 * flags have been updated, we subtract one. Note, the
519 * result of this is that lprops also decreases
520 * @taken_empty_lebs in 'ubifs_change_lp()', so it is
521 * off by one for a short period of time which may
522 * introduce a small disturbance to budgeting
523 * calculations, but this is harmless because at the
524 * worst case this would make the budgeting subsystem
525 * be more pessimistic than needed.
526 *
527 * Fundamentally, this is about serialization of the
528 * budgeting and lprops subsystems. We could make the
529 * @space_lock a mutex and avoid dropping it before
530 * calling 'ubifs_change_lp()', but mutex is more
531 * heavy-weight, and we want budgeting to be as fast as
532 * possible.
533 */
534 c->lst.taken_empty_lebs += 1;
535 }
536 spin_unlock(&c->space_lock);
537
538 lprops = do_find_free_space(c, min_space, pick_free, squeeze);
539 if (IS_ERR(lprops)) {
540 err = PTR_ERR(lprops);
541 goto out;
542 }
543
544 lnum = lprops->lnum;
545 flags = lprops->flags | LPROPS_TAKEN;
546
547 lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC, flags, 0);
548 if (IS_ERR(lprops)) {
549 err = PTR_ERR(lprops);
550 goto out;
551 }
552
553 if (pick_free) {
554 spin_lock(&c->space_lock);
555 c->lst.taken_empty_lebs -= 1;
556 spin_unlock(&c->space_lock);
557 }
558
559 *free = lprops->free;
560 ubifs_release_lprops(c);
561
562 if (*free == c->leb_size) {
563 /*
564 * Ensure that empty LEBs have been unmapped. They may not have
565 * been, for example, because of an unclean unmount. Also
566 * LEBs that were freeable LEBs (free + dirty == leb_size) will
567 * not have been unmapped.
568 */
569 err = ubifs_leb_unmap(c, lnum);
570 if (err)
571 return err;
572 }
573
574 dbg_find("found LEB %d, free %d", lnum, *free);
575 ubifs_assert(*free >= min_space);
576 return lnum;
577
578out:
579 if (pick_free) {
580 spin_lock(&c->space_lock);
581 c->lst.taken_empty_lebs -= 1;
582 spin_unlock(&c->space_lock);
583 }
584 ubifs_release_lprops(c);
585 return err;
586}
587
588/**
589 * scan_for_idx_cb - callback used by the scan for a free LEB for the index.
590 * @c: the UBIFS file-system description object
591 * @lprops: LEB properties to scan
592 * @in_tree: whether the LEB properties are in main memory
593 * @data: information passed to and from the caller of the scan
594 *
595 * This function returns a code that indicates whether the scan should continue
596 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
597 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
598 * (%LPT_SCAN_STOP).
599 */
600static int scan_for_idx_cb(struct ubifs_info *c,
601 const struct ubifs_lprops *lprops, int in_tree,
602 struct scan_data *data)
603{
604 int ret = LPT_SCAN_CONTINUE;
605
606 /* Exclude LEBs that are currently in use */
607 if (lprops->flags & LPROPS_TAKEN)
608 return LPT_SCAN_CONTINUE;
609 /* Determine whether to add these LEB properties to the tree */
610 if (!in_tree && valuable(c, lprops))
611 ret |= LPT_SCAN_ADD;
612 /* Exclude index LEBS */
613 if (lprops->flags & LPROPS_INDEX)
614 return ret;
615 /* Exclude LEBs that cannot be made empty */
616 if (lprops->free + lprops->dirty != c->leb_size)
617 return ret;
618 /*
619 * We are allocating for the index so it is safe to allocate LEBs with
620 * only free and dirty space, because write buffers are sync'd at commit
621 * start.
622 */
623 data->lnum = lprops->lnum;
624 return LPT_SCAN_ADD | LPT_SCAN_STOP;
625}
626
627/**
628 * scan_for_leb_for_idx - scan for a free LEB for the index.
629 * @c: the UBIFS file-system description object
630 */
631static const struct ubifs_lprops *scan_for_leb_for_idx(struct ubifs_info *c)
632{
633 struct ubifs_lprops *lprops;
634 struct scan_data data;
635 int err;
636
637 data.lnum = -1;
638 err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
639 (ubifs_lpt_scan_callback)scan_for_idx_cb,
640 &data);
641 if (err)
642 return ERR_PTR(err);
643 ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
644 c->lscan_lnum = data.lnum;
645 lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
646 if (IS_ERR(lprops))
647 return lprops;
648 ubifs_assert(lprops->lnum == data.lnum);
649 ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
650 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
651 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
652 return lprops;
653}
654
655/**
656 * ubifs_find_free_leb_for_idx - find a free LEB for the index.
657 * @c: the UBIFS file-system description object
658 *
659 * This function looks for a free LEB and returns that LEB number. The returned
660 * LEB is marked as "taken", "index".
661 *
662 * Only empty LEBs are allocated. This is for two reasons. First, the commit
663 * calculates the number of LEBs to allocate based on the assumption that they
664 * will be empty. Secondly, free space at the end of an index LEB is not
665 * guaranteed to be empty because it may have been used by the in-the-gaps
666 * method prior to an unclean unmount.
667 *
668 * If no LEB is found %-ENOSPC is returned. For other failures another negative
669 * error code is returned.
670 */
671int ubifs_find_free_leb_for_idx(struct ubifs_info *c)
672{
673 const struct ubifs_lprops *lprops;
674 int lnum = -1, err, flags;
675
676 ubifs_get_lprops(c);
677
678 lprops = ubifs_fast_find_empty(c);
679 if (!lprops) {
680 lprops = ubifs_fast_find_freeable(c);
681 if (!lprops) {
682 ubifs_assert(c->freeable_cnt == 0);
683 if (c->lst.empty_lebs - c->lst.taken_empty_lebs > 0) {
684 lprops = scan_for_leb_for_idx(c);
685 if (IS_ERR(lprops)) {
686 err = PTR_ERR(lprops);
687 goto out;
688 }
689 }
690 }
691 }
692
693 if (!lprops) {
694 err = -ENOSPC;
695 goto out;
696 }
697
698 lnum = lprops->lnum;
699
700 dbg_find("found LEB %d, free %d, dirty %d, flags %#x",
701 lnum, lprops->free, lprops->dirty, lprops->flags);
702
703 flags = lprops->flags | LPROPS_TAKEN | LPROPS_INDEX;
704 lprops = ubifs_change_lp(c, lprops, c->leb_size, 0, flags, 0);
705 if (IS_ERR(lprops)) {
706 err = PTR_ERR(lprops);
707 goto out;
708 }
709
710 ubifs_release_lprops(c);
711
712 /*
713 * Ensure that empty LEBs have been unmapped. They may not have been,
714 * for example, because of an unclean unmount. Also LEBs that were
715 * freeable LEBs (free + dirty == leb_size) will not have been unmapped.
716 */
717 err = ubifs_leb_unmap(c, lnum);
718 if (err) {
719 ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
720 LPROPS_TAKEN | LPROPS_INDEX, 0);
721 return err;
722 }
723
724 return lnum;
725
726out:
727 ubifs_release_lprops(c);
728 return err;
729}
730
731static int cmp_dirty_idx(const struct ubifs_lprops **a,
732 const struct ubifs_lprops **b)
733{
734 const struct ubifs_lprops *lpa = *a;
735 const struct ubifs_lprops *lpb = *b;
736
737 return lpa->dirty + lpa->free - lpb->dirty - lpb->free;
738}
739
740static void swap_dirty_idx(struct ubifs_lprops **a, struct ubifs_lprops **b,
741 int size)
742{
743 struct ubifs_lprops *t = *a;
744
745 *a = *b;
746 *b = t;
747}
748
749/**
750 * ubifs_save_dirty_idx_lnums - save an array of the most dirty index LEB nos.
751 * @c: the UBIFS file-system description object
752 *
753 * This function is called each commit to create an array of LEB numbers of
754 * dirty index LEBs sorted in order of dirty and free space. This is used by
755 * the in-the-gaps method of TNC commit.
756 */
757int ubifs_save_dirty_idx_lnums(struct ubifs_info *c)
758{
759 int i;
760
761 ubifs_get_lprops(c);
762 /* Copy the LPROPS_DIRTY_IDX heap */
763 c->dirty_idx.cnt = c->lpt_heap[LPROPS_DIRTY_IDX - 1].cnt;
764 memcpy(c->dirty_idx.arr, c->lpt_heap[LPROPS_DIRTY_IDX - 1].arr,
765 sizeof(void *) * c->dirty_idx.cnt);
766 /* Sort it so that the dirtiest is now at the end */
767 sort(c->dirty_idx.arr, c->dirty_idx.cnt, sizeof(void *),
768 (int (*)(const void *, const void *))cmp_dirty_idx,
769 (void (*)(void *, void *, int))swap_dirty_idx);
770 dbg_find("found %d dirty index LEBs", c->dirty_idx.cnt);
771 if (c->dirty_idx.cnt)
772 dbg_find("dirtiest index LEB is %d with dirty %d and free %d",
773 c->dirty_idx.arr[c->dirty_idx.cnt - 1]->lnum,
774 c->dirty_idx.arr[c->dirty_idx.cnt - 1]->dirty,
775 c->dirty_idx.arr[c->dirty_idx.cnt - 1]->free);
776 /* Replace the lprops pointers with LEB numbers */
777 for (i = 0; i < c->dirty_idx.cnt; i++)
778 c->dirty_idx.arr[i] = (void *)(size_t)c->dirty_idx.arr[i]->lnum;
779 ubifs_release_lprops(c);
780 return 0;
781}
782
783/**
784 * scan_dirty_idx_cb - callback used by the scan for a dirty index LEB.
785 * @c: the UBIFS file-system description object
786 * @lprops: LEB properties to scan
787 * @in_tree: whether the LEB properties are in main memory
788 * @data: information passed to and from the caller of the scan
789 *
790 * This function returns a code that indicates whether the scan should continue
791 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
792 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
793 * (%LPT_SCAN_STOP).
794 */
795static int scan_dirty_idx_cb(struct ubifs_info *c,
796 const struct ubifs_lprops *lprops, int in_tree,
797 struct scan_data *data)
798{
799 int ret = LPT_SCAN_CONTINUE;
800
801 /* Exclude LEBs that are currently in use */
802 if (lprops->flags & LPROPS_TAKEN)
803 return LPT_SCAN_CONTINUE;
804 /* Determine whether to add these LEB properties to the tree */
805 if (!in_tree && valuable(c, lprops))
806 ret |= LPT_SCAN_ADD;
807 /* Exclude non-index LEBs */
808 if (!(lprops->flags & LPROPS_INDEX))
809 return ret;
810 /* Exclude LEBs with too little space */
811 if (lprops->free + lprops->dirty < c->min_idx_node_sz)
812 return ret;
813 /* Finally we found space */
814 data->lnum = lprops->lnum;
815 return LPT_SCAN_ADD | LPT_SCAN_STOP;
816}
817
818/**
819 * find_dirty_idx_leb - find a dirty index LEB.
820 * @c: the UBIFS file-system description object
821 *
822 * This function returns LEB number upon success and a negative error code upon
823 * failure. In particular, -ENOSPC is returned if a dirty index LEB is not
824 * found.
825 *
826 * Note that this function scans the entire LPT but it is called very rarely.
827 */
828static int find_dirty_idx_leb(struct ubifs_info *c)
829{
830 const struct ubifs_lprops *lprops;
831 struct ubifs_lpt_heap *heap;
832 struct scan_data data;
833 int err, i, ret;
834
835 /* Check all structures in memory first */
836 data.lnum = -1;
837 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
838 for (i = 0; i < heap->cnt; i++) {
839 lprops = heap->arr[i];
840 ret = scan_dirty_idx_cb(c, lprops, 1, &data);
841 if (ret & LPT_SCAN_STOP)
842 goto found;
843 }
844 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
845 ret = scan_dirty_idx_cb(c, lprops, 1, &data);
846 if (ret & LPT_SCAN_STOP)
847 goto found;
848 }
849 list_for_each_entry(lprops, &c->uncat_list, list) {
850 ret = scan_dirty_idx_cb(c, lprops, 1, &data);
851 if (ret & LPT_SCAN_STOP)
852 goto found;
853 }
854 if (c->pnodes_have >= c->pnode_cnt)
855 /* All pnodes are in memory, so skip scan */
856 return -ENOSPC;
857 err = ubifs_lpt_scan_nolock(c, -1, c->lscan_lnum,
858 (ubifs_lpt_scan_callback)scan_dirty_idx_cb,
859 &data);
860 if (err)
861 return err;
862found:
863 ubifs_assert(data.lnum >= c->main_first && data.lnum < c->leb_cnt);
864 c->lscan_lnum = data.lnum;
865 lprops = ubifs_lpt_lookup_dirty(c, data.lnum);
866 if (IS_ERR(lprops))
867 return PTR_ERR(lprops);
868 ubifs_assert(lprops->lnum == data.lnum);
869 ubifs_assert(lprops->free + lprops->dirty >= c->min_idx_node_sz);
870 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
871 ubifs_assert((lprops->flags & LPROPS_INDEX));
872
873 dbg_find("found dirty LEB %d, free %d, dirty %d, flags %#x",
874 lprops->lnum, lprops->free, lprops->dirty, lprops->flags);
875
876 lprops = ubifs_change_lp(c, lprops, LPROPS_NC, LPROPS_NC,
877 lprops->flags | LPROPS_TAKEN, 0);
878 if (IS_ERR(lprops))
879 return PTR_ERR(lprops);
880
881 return lprops->lnum;
882}
883
884/**
885 * get_idx_gc_leb - try to get a LEB number from trivial GC.
886 * @c: the UBIFS file-system description object
887 */
888static int get_idx_gc_leb(struct ubifs_info *c)
889{
890 const struct ubifs_lprops *lp;
891 int err, lnum;
892
893 err = ubifs_get_idx_gc_leb(c);
894 if (err < 0)
895 return err;
896 lnum = err;
897 /*
898 * The LEB was due to be unmapped after the commit but
899 * it is needed now for this commit.
900 */
901 lp = ubifs_lpt_lookup_dirty(c, lnum);
902 if (unlikely(IS_ERR(lp)))
903 return PTR_ERR(lp);
904 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
905 lp->flags | LPROPS_INDEX, -1);
906 if (unlikely(IS_ERR(lp)))
907 return PTR_ERR(lp);
908 dbg_find("LEB %d, dirty %d and free %d flags %#x",
909 lp->lnum, lp->dirty, lp->free, lp->flags);
910 return lnum;
911}
912
913/**
914 * find_dirtiest_idx_leb - find dirtiest index LEB from dirtiest array.
915 * @c: the UBIFS file-system description object
916 */
917static int find_dirtiest_idx_leb(struct ubifs_info *c)
918{
919 const struct ubifs_lprops *lp;
920 int lnum;
921
922 while (1) {
923 if (!c->dirty_idx.cnt)
924 return -ENOSPC;
925 /* The lprops pointers were replaced by LEB numbers */
926 lnum = (size_t)c->dirty_idx.arr[--c->dirty_idx.cnt];
927 lp = ubifs_lpt_lookup(c, lnum);
928 if (IS_ERR(lp))
929 return PTR_ERR(lp);
930 if ((lp->flags & LPROPS_TAKEN) || !(lp->flags & LPROPS_INDEX))
931 continue;
932 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
933 lp->flags | LPROPS_TAKEN, 0);
934 if (IS_ERR(lp))
935 return PTR_ERR(lp);
936 break;
937 }
938 dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
939 lp->free, lp->flags);
940 ubifs_assert(lp->flags | LPROPS_TAKEN);
941 ubifs_assert(lp->flags | LPROPS_INDEX);
942 return lnum;
943}
944
945/**
946 * ubifs_find_dirty_idx_leb - try to find dirtiest index LEB as at last commit.
947 * @c: the UBIFS file-system description object
948 *
949 * This function attempts to find an untaken index LEB with the most free and
950 * dirty space that can be used without overwriting index nodes that were in the
951 * last index committed.
952 */
953int ubifs_find_dirty_idx_leb(struct ubifs_info *c)
954{
955 int err;
956
957 ubifs_get_lprops(c);
958
959 /*
960 * We made an array of the dirtiest index LEB numbers as at the start of
961 * last commit. Try that array first.
962 */
963 err = find_dirtiest_idx_leb(c);
964
965 /* Next try scanning the entire LPT */
966 if (err == -ENOSPC)
967 err = find_dirty_idx_leb(c);
968
969 /* Finally take any index LEBs awaiting trivial GC */
970 if (err == -ENOSPC)
971 err = get_idx_gc_leb(c);
972
973 ubifs_release_lprops(c);
974 return err;
975}
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
new file mode 100644
index 00000000000..d0f3dac2908
--- /dev/null
+++ b/fs/ubifs/gc.c
@@ -0,0 +1,773 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements garbage collection. The procedure for garbage collection
25 * is different depending on whether a LEB as an index LEB (contains index
26 * nodes) or not. For non-index LEBs, garbage collection finds a LEB which
27 * contains a lot of dirty space (obsolete nodes), and copies the non-obsolete
28 * nodes to the journal, at which point the garbage-collected LEB is free to be
29 * reused. For index LEBs, garbage collection marks the non-obsolete index nodes
30 * dirty in the TNC, and after the next commit, the garbage-collected LEB is
31 * to be reused. Garbage collection will cause the number of dirty index nodes
32 * to grow, however sufficient space is reserved for the index to ensure the
33 * commit will never run out of space.
34 */
35
36#include <linux/pagemap.h>
37#include "ubifs.h"
38
39/*
40 * GC tries to optimize the way it fit nodes to available space, and it sorts
41 * nodes a little. The below constants are watermarks which define "large",
42 * "medium", and "small" nodes.
43 */
44#define MEDIUM_NODE_WM (UBIFS_BLOCK_SIZE / 4)
45#define SMALL_NODE_WM UBIFS_MAX_DENT_NODE_SZ
46
47/*
48 * GC may need to move more then one LEB to make progress. The below constants
49 * define "soft" and "hard" limits on the number of LEBs the garbage collector
50 * may move.
51 */
52#define SOFT_LEBS_LIMIT 4
53#define HARD_LEBS_LIMIT 32
54
55/**
56 * switch_gc_head - switch the garbage collection journal head.
57 * @c: UBIFS file-system description object
58 * @buf: buffer to write
59 * @len: length of the buffer to write
60 * @lnum: LEB number written is returned here
61 * @offs: offset written is returned here
62 *
63 * This function switch the GC head to the next LEB which is reserved in
64 * @c->gc_lnum. Returns %0 in case of success, %-EAGAIN if commit is required,
65 * and other negative error code in case of failures.
66 */
67static int switch_gc_head(struct ubifs_info *c)
68{
69 int err, gc_lnum = c->gc_lnum;
70 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
71
72 ubifs_assert(gc_lnum != -1);
73 dbg_gc("switch GC head from LEB %d:%d to LEB %d (waste %d bytes)",
74 wbuf->lnum, wbuf->offs + wbuf->used, gc_lnum,
75 c->leb_size - wbuf->offs - wbuf->used);
76
77 err = ubifs_wbuf_sync_nolock(wbuf);
78 if (err)
79 return err;
80
81 /*
82 * The GC write-buffer was synchronized, we may safely unmap
83 * 'c->gc_lnum'.
84 */
85 err = ubifs_leb_unmap(c, gc_lnum);
86 if (err)
87 return err;
88
89 err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0);
90 if (err)
91 return err;
92
93 c->gc_lnum = -1;
94 err = ubifs_wbuf_seek_nolock(wbuf, gc_lnum, 0, UBI_LONGTERM);
95 return err;
96}
97
98/**
99 * move_nodes - move nodes.
100 * @c: UBIFS file-system description object
101 * @sleb: describes nodes to move
102 *
103 * This function moves valid nodes from data LEB described by @sleb to the GC
104 * journal head. The obsolete nodes are dropped.
105 *
106 * When moving nodes we have to deal with classical bin-packing problem: the
107 * space in the current GC journal head LEB and in @c->gc_lnum are the "bins",
108 * where the nodes in the @sleb->nodes list are the elements which should be
109 * fit optimally to the bins. This function uses the "first fit decreasing"
110 * strategy, although it does not really sort the nodes but just split them on
111 * 3 classes - large, medium, and small, so they are roughly sorted.
112 *
113 * This function returns zero in case of success, %-EAGAIN if commit is
114 * required, and other negative error codes in case of other failures.
115 */
116static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb)
117{
118 struct ubifs_scan_node *snod, *tmp;
119 struct list_head large, medium, small;
120 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
121 int avail, err, min = INT_MAX;
122
123 INIT_LIST_HEAD(&large);
124 INIT_LIST_HEAD(&medium);
125 INIT_LIST_HEAD(&small);
126
127 list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) {
128 struct list_head *lst;
129
130 ubifs_assert(snod->type != UBIFS_IDX_NODE);
131 ubifs_assert(snod->type != UBIFS_REF_NODE);
132 ubifs_assert(snod->type != UBIFS_CS_NODE);
133
134 err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum,
135 snod->offs, 0);
136 if (err < 0)
137 goto out;
138
139 lst = &snod->list;
140 list_del(lst);
141 if (!err) {
142 /* The node is obsolete, remove it from the list */
143 kfree(snod);
144 continue;
145 }
146
147 /*
148 * Sort the list of nodes so that large nodes go first, and
149 * small nodes go last.
150 */
151 if (snod->len > MEDIUM_NODE_WM)
152 list_add(lst, &large);
153 else if (snod->len > SMALL_NODE_WM)
154 list_add(lst, &medium);
155 else
156 list_add(lst, &small);
157
158 /* And find the smallest node */
159 if (snod->len < min)
160 min = snod->len;
161 }
162
163 /*
164 * Join the tree lists so that we'd have one roughly sorted list
165 * ('large' will be the head of the joined list).
166 */
167 list_splice(&medium, large.prev);
168 list_splice(&small, large.prev);
169
170 if (wbuf->lnum == -1) {
171 /*
172 * The GC journal head is not set, because it is the first GC
173 * invocation since mount.
174 */
175 err = switch_gc_head(c);
176 if (err)
177 goto out;
178 }
179
180 /* Write nodes to their new location. Use the first-fit strategy */
181 while (1) {
182 avail = c->leb_size - wbuf->offs - wbuf->used;
183 list_for_each_entry_safe(snod, tmp, &large, list) {
184 int new_lnum, new_offs;
185
186 if (avail < min)
187 break;
188
189 if (snod->len > avail)
190 /* This node does not fit */
191 continue;
192
193 cond_resched();
194
195 new_lnum = wbuf->lnum;
196 new_offs = wbuf->offs + wbuf->used;
197 err = ubifs_wbuf_write_nolock(wbuf, snod->node,
198 snod->len);
199 if (err)
200 goto out;
201 err = ubifs_tnc_replace(c, &snod->key, sleb->lnum,
202 snod->offs, new_lnum, new_offs,
203 snod->len);
204 if (err)
205 goto out;
206
207 avail = c->leb_size - wbuf->offs - wbuf->used;
208 list_del(&snod->list);
209 kfree(snod);
210 }
211
212 if (list_empty(&large))
213 break;
214
215 /*
216 * Waste the rest of the space in the LEB and switch to the
217 * next LEB.
218 */
219 err = switch_gc_head(c);
220 if (err)
221 goto out;
222 }
223
224 return 0;
225
226out:
227 list_for_each_entry_safe(snod, tmp, &large, list) {
228 list_del(&snod->list);
229 kfree(snod);
230 }
231 return err;
232}
233
234/**
235 * gc_sync_wbufs - sync write-buffers for GC.
236 * @c: UBIFS file-system description object
237 *
238 * We must guarantee that obsoleting nodes are on flash. Unfortunately they may
239 * be in a write-buffer instead. That is, a node could be written to a
240 * write-buffer, obsoleting another node in a LEB that is GC'd. If that LEB is
241 * erased before the write-buffer is sync'd and then there is an unclean
242 * unmount, then an existing node is lost. To avoid this, we sync all
243 * write-buffers.
244 *
245 * This function returns %0 on success or a negative error code on failure.
246 */
247static int gc_sync_wbufs(struct ubifs_info *c)
248{
249 int err, i;
250
251 for (i = 0; i < c->jhead_cnt; i++) {
252 if (i == GCHD)
253 continue;
254 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
255 if (err)
256 return err;
257 }
258 return 0;
259}
260
261/**
262 * ubifs_garbage_collect_leb - garbage-collect a logical eraseblock.
263 * @c: UBIFS file-system description object
264 * @lp: describes the LEB to garbage collect
265 *
266 * This function garbage-collects an LEB and returns one of the @LEB_FREED,
267 * @LEB_RETAINED, etc positive codes in case of success, %-EAGAIN if commit is
268 * required, and other negative error codes in case of failures.
269 */
270int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
271{
272 struct ubifs_scan_leb *sleb;
273 struct ubifs_scan_node *snod;
274 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
275 int err = 0, lnum = lp->lnum;
276
277 ubifs_assert(c->gc_lnum != -1 || wbuf->offs + wbuf->used == 0 ||
278 c->need_recovery);
279 ubifs_assert(c->gc_lnum != lnum);
280 ubifs_assert(wbuf->lnum != lnum);
281
282 /*
283 * We scan the entire LEB even though we only really need to scan up to
284 * (c->leb_size - lp->free).
285 */
286 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
287 if (IS_ERR(sleb))
288 return PTR_ERR(sleb);
289
290 ubifs_assert(!list_empty(&sleb->nodes));
291 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
292
293 if (snod->type == UBIFS_IDX_NODE) {
294 struct ubifs_gced_idx_leb *idx_gc;
295
296 dbg_gc("indexing LEB %d (free %d, dirty %d)",
297 lnum, lp->free, lp->dirty);
298 list_for_each_entry(snod, &sleb->nodes, list) {
299 struct ubifs_idx_node *idx = snod->node;
300 int level = le16_to_cpu(idx->level);
301
302 ubifs_assert(snod->type == UBIFS_IDX_NODE);
303 key_read(c, ubifs_idx_key(c, idx), &snod->key);
304 err = ubifs_dirty_idx_node(c, &snod->key, level, lnum,
305 snod->offs);
306 if (err)
307 goto out;
308 }
309
310 idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
311 if (!idx_gc) {
312 err = -ENOMEM;
313 goto out;
314 }
315
316 idx_gc->lnum = lnum;
317 idx_gc->unmap = 0;
318 list_add(&idx_gc->list, &c->idx_gc);
319
320 /*
321 * Don't release the LEB until after the next commit, because
322 * it may contain date which is needed for recovery. So
323 * although we freed this LEB, it will become usable only after
324 * the commit.
325 */
326 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0,
327 LPROPS_INDEX, 1);
328 if (err)
329 goto out;
330 err = LEB_FREED_IDX;
331 } else {
332 dbg_gc("data LEB %d (free %d, dirty %d)",
333 lnum, lp->free, lp->dirty);
334
335 err = move_nodes(c, sleb);
336 if (err)
337 goto out;
338
339 err = gc_sync_wbufs(c);
340 if (err)
341 goto out;
342
343 err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0);
344 if (err)
345 goto out;
346
347 if (c->gc_lnum == -1) {
348 c->gc_lnum = lnum;
349 err = LEB_RETAINED;
350 } else {
351 err = ubifs_wbuf_sync_nolock(wbuf);
352 if (err)
353 goto out;
354
355 err = ubifs_leb_unmap(c, lnum);
356 if (err)
357 goto out;
358
359 err = LEB_FREED;
360 }
361 }
362
363out:
364 ubifs_scan_destroy(sleb);
365 return err;
366}
367
368/**
369 * ubifs_garbage_collect - UBIFS garbage collector.
370 * @c: UBIFS file-system description object
371 * @anyway: do GC even if there are free LEBs
372 *
373 * This function does out-of-place garbage collection. The return codes are:
374 * o positive LEB number if the LEB has been freed and may be used;
375 * o %-EAGAIN if the caller has to run commit;
376 * o %-ENOSPC if GC failed to make any progress;
377 * o other negative error codes in case of other errors.
378 *
379 * Garbage collector writes data to the journal when GC'ing data LEBs, and just
380 * marking indexing nodes dirty when GC'ing indexing LEBs. Thus, at some point
381 * commit may be required. But commit cannot be run from inside GC, because the
382 * caller might be holding the commit lock, so %-EAGAIN is returned instead;
383 * And this error code means that the caller has to run commit, and re-run GC
384 * if there is still no free space.
385 *
386 * There are many reasons why this function may return %-EAGAIN:
387 * o the log is full and there is no space to write an LEB reference for
388 * @c->gc_lnum;
389 * o the journal is too large and exceeds size limitations;
390 * o GC moved indexing LEBs, but they can be used only after the commit;
391 * o the shrinker fails to find clean znodes to free and requests the commit;
392 * o etc.
393 *
394 * Note, if the file-system is close to be full, this function may return
395 * %-EAGAIN infinitely, so the caller has to limit amount of re-invocations of
396 * the function. E.g., this happens if the limits on the journal size are too
397 * tough and GC writes too much to the journal before an LEB is freed. This
398 * might also mean that the journal is too large, and the TNC becomes to big,
399 * so that the shrinker is constantly called, finds not clean znodes to free,
400 * and requests commit. Well, this may also happen if the journal is all right,
401 * but another kernel process consumes too much memory. Anyway, infinite
402 * %-EAGAIN may happen, but in some extreme/misconfiguration cases.
403 */
404int ubifs_garbage_collect(struct ubifs_info *c, int anyway)
405{
406 int i, err, ret, min_space = c->dead_wm;
407 struct ubifs_lprops lp;
408 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
409
410 ubifs_assert_cmt_locked(c);
411
412 if (ubifs_gc_should_commit(c))
413 return -EAGAIN;
414
415 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
416
417 if (c->ro_media) {
418 ret = -EROFS;
419 goto out_unlock;
420 }
421
422 /* We expect the write-buffer to be empty on entry */
423 ubifs_assert(!wbuf->used);
424
425 for (i = 0; ; i++) {
426 int space_before = c->leb_size - wbuf->offs - wbuf->used;
427 int space_after;
428
429 cond_resched();
430
431 /* Give the commit an opportunity to run */
432 if (ubifs_gc_should_commit(c)) {
433 ret = -EAGAIN;
434 break;
435 }
436
437 if (i > SOFT_LEBS_LIMIT && !list_empty(&c->idx_gc)) {
438 /*
439 * We've done enough iterations. Indexing LEBs were
440 * moved and will be available after the commit.
441 */
442 dbg_gc("soft limit, some index LEBs GC'ed, -EAGAIN");
443 ubifs_commit_required(c);
444 ret = -EAGAIN;
445 break;
446 }
447
448 if (i > HARD_LEBS_LIMIT) {
449 /*
450 * We've moved too many LEBs and have not made
451 * progress, give up.
452 */
453 dbg_gc("hard limit, -ENOSPC");
454 ret = -ENOSPC;
455 break;
456 }
457
458 /*
459 * Empty and freeable LEBs can turn up while we waited for
460 * the wbuf lock, or while we have been running GC. In that
461 * case, we should just return one of those instead of
462 * continuing to GC dirty LEBs. Hence we request
463 * 'ubifs_find_dirty_leb()' to return an empty LEB if it can.
464 */
465 ret = ubifs_find_dirty_leb(c, &lp, min_space, anyway ? 0 : 1);
466 if (ret) {
467 if (ret == -ENOSPC)
468 dbg_gc("no more dirty LEBs");
469 break;
470 }
471
472 dbg_gc("found LEB %d: free %d, dirty %d, sum %d "
473 "(min. space %d)", lp.lnum, lp.free, lp.dirty,
474 lp.free + lp.dirty, min_space);
475
476 if (lp.free + lp.dirty == c->leb_size) {
477 /* An empty LEB was returned */
478 dbg_gc("LEB %d is free, return it", lp.lnum);
479 /*
480 * ubifs_find_dirty_leb() doesn't return freeable index
481 * LEBs.
482 */
483 ubifs_assert(!(lp.flags & LPROPS_INDEX));
484 if (lp.free != c->leb_size) {
485 /*
486 * Write buffers must be sync'd before
487 * unmapping freeable LEBs, because one of them
488 * may contain data which obsoletes something
489 * in 'lp.pnum'.
490 */
491 ret = gc_sync_wbufs(c);
492 if (ret)
493 goto out;
494 ret = ubifs_change_one_lp(c, lp.lnum,
495 c->leb_size, 0, 0, 0,
496 0);
497 if (ret)
498 goto out;
499 }
500 ret = ubifs_leb_unmap(c, lp.lnum);
501 if (ret)
502 goto out;
503 ret = lp.lnum;
504 break;
505 }
506
507 space_before = c->leb_size - wbuf->offs - wbuf->used;
508 if (wbuf->lnum == -1)
509 space_before = 0;
510
511 ret = ubifs_garbage_collect_leb(c, &lp);
512 if (ret < 0) {
513 if (ret == -EAGAIN || ret == -ENOSPC) {
514 /*
515 * These codes are not errors, so we have to
516 * return the LEB to lprops. But if the
517 * 'ubifs_return_leb()' function fails, its
518 * failure code is propagated to the caller
519 * instead of the original '-EAGAIN' or
520 * '-ENOSPC'.
521 */
522 err = ubifs_return_leb(c, lp.lnum);
523 if (err)
524 ret = err;
525 break;
526 }
527 goto out;
528 }
529
530 if (ret == LEB_FREED) {
531 /* An LEB has been freed and is ready for use */
532 dbg_gc("LEB %d freed, return", lp.lnum);
533 ret = lp.lnum;
534 break;
535 }
536
537 if (ret == LEB_FREED_IDX) {
538 /*
539 * This was an indexing LEB and it cannot be
540 * immediately used. And instead of requesting the
541 * commit straight away, we try to garbage collect some
542 * more.
543 */
544 dbg_gc("indexing LEB %d freed, continue", lp.lnum);
545 continue;
546 }
547
548 ubifs_assert(ret == LEB_RETAINED);
549 space_after = c->leb_size - wbuf->offs - wbuf->used;
550 dbg_gc("LEB %d retained, freed %d bytes", lp.lnum,
551 space_after - space_before);
552
553 if (space_after > space_before) {
554 /* GC makes progress, keep working */
555 min_space >>= 1;
556 if (min_space < c->dead_wm)
557 min_space = c->dead_wm;
558 continue;
559 }
560
561 dbg_gc("did not make progress");
562
563 /*
564 * GC moved an LEB bud have not done any progress. This means
565 * that the previous GC head LEB contained too few free space
566 * and the LEB which was GC'ed contained only large nodes which
567 * did not fit that space.
568 *
569 * We can do 2 things:
570 * 1. pick another LEB in a hope it'll contain a small node
571 * which will fit the space we have at the end of current GC
572 * head LEB, but there is no guarantee, so we try this out
573 * unless we have already been working for too long;
574 * 2. request an LEB with more dirty space, which will force
575 * 'ubifs_find_dirty_leb()' to start scanning the lprops
576 * table, instead of just picking one from the heap
577 * (previously it already picked the dirtiest LEB).
578 */
579 if (i < SOFT_LEBS_LIMIT) {
580 dbg_gc("try again");
581 continue;
582 }
583
584 min_space <<= 1;
585 if (min_space > c->dark_wm)
586 min_space = c->dark_wm;
587 dbg_gc("set min. space to %d", min_space);
588 }
589
590 if (ret == -ENOSPC && !list_empty(&c->idx_gc)) {
591 dbg_gc("no space, some index LEBs GC'ed, -EAGAIN");
592 ubifs_commit_required(c);
593 ret = -EAGAIN;
594 }
595
596 err = ubifs_wbuf_sync_nolock(wbuf);
597 if (!err)
598 err = ubifs_leb_unmap(c, c->gc_lnum);
599 if (err) {
600 ret = err;
601 goto out;
602 }
603out_unlock:
604 mutex_unlock(&wbuf->io_mutex);
605 return ret;
606
607out:
608 ubifs_assert(ret < 0);
609 ubifs_assert(ret != -ENOSPC && ret != -EAGAIN);
610 ubifs_ro_mode(c, ret);
611 ubifs_wbuf_sync_nolock(wbuf);
612 mutex_unlock(&wbuf->io_mutex);
613 ubifs_return_leb(c, lp.lnum);
614 return ret;
615}
616
617/**
618 * ubifs_gc_start_commit - garbage collection at start of commit.
619 * @c: UBIFS file-system description object
620 *
621 * If a LEB has only dirty and free space, then we may safely unmap it and make
622 * it free. Note, we cannot do this with indexing LEBs because dirty space may
623 * correspond index nodes that are required for recovery. In that case, the
624 * LEB cannot be unmapped until after the next commit.
625 *
626 * This function returns %0 upon success and a negative error code upon failure.
627 */
628int ubifs_gc_start_commit(struct ubifs_info *c)
629{
630 struct ubifs_gced_idx_leb *idx_gc;
631 const struct ubifs_lprops *lp;
632 int err = 0, flags;
633
634 ubifs_get_lprops(c);
635
636 /*
637 * Unmap (non-index) freeable LEBs. Note that recovery requires that all
638 * wbufs are sync'd before this, which is done in 'do_commit()'.
639 */
640 while (1) {
641 lp = ubifs_fast_find_freeable(c);
642 if (unlikely(IS_ERR(lp))) {
643 err = PTR_ERR(lp);
644 goto out;
645 }
646 if (!lp)
647 break;
648 ubifs_assert(!(lp->flags & LPROPS_TAKEN));
649 ubifs_assert(!(lp->flags & LPROPS_INDEX));
650 err = ubifs_leb_unmap(c, lp->lnum);
651 if (err)
652 goto out;
653 lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0);
654 if (unlikely(IS_ERR(lp))) {
655 err = PTR_ERR(lp);
656 goto out;
657 }
658 ubifs_assert(!(lp->flags & LPROPS_TAKEN));
659 ubifs_assert(!(lp->flags & LPROPS_INDEX));
660 }
661
662 /* Mark GC'd index LEBs OK to unmap after this commit finishes */
663 list_for_each_entry(idx_gc, &c->idx_gc, list)
664 idx_gc->unmap = 1;
665
666 /* Record index freeable LEBs for unmapping after commit */
667 while (1) {
668 lp = ubifs_fast_find_frdi_idx(c);
669 if (unlikely(IS_ERR(lp))) {
670 err = PTR_ERR(lp);
671 goto out;
672 }
673 if (!lp)
674 break;
675 idx_gc = kmalloc(sizeof(struct ubifs_gced_idx_leb), GFP_NOFS);
676 if (!idx_gc) {
677 err = -ENOMEM;
678 goto out;
679 }
680 ubifs_assert(!(lp->flags & LPROPS_TAKEN));
681 ubifs_assert(lp->flags & LPROPS_INDEX);
682 /* Don't release the LEB until after the next commit */
683 flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX;
684 lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1);
685 if (unlikely(IS_ERR(lp))) {
686 err = PTR_ERR(lp);
687 kfree(idx_gc);
688 goto out;
689 }
690 ubifs_assert(lp->flags & LPROPS_TAKEN);
691 ubifs_assert(!(lp->flags & LPROPS_INDEX));
692 idx_gc->lnum = lp->lnum;
693 idx_gc->unmap = 1;
694 list_add(&idx_gc->list, &c->idx_gc);
695 }
696out:
697 ubifs_release_lprops(c);
698 return err;
699}
700
701/**
702 * ubifs_gc_end_commit - garbage collection at end of commit.
703 * @c: UBIFS file-system description object
704 *
705 * This function completes out-of-place garbage collection of index LEBs.
706 */
707int ubifs_gc_end_commit(struct ubifs_info *c)
708{
709 struct ubifs_gced_idx_leb *idx_gc, *tmp;
710 struct ubifs_wbuf *wbuf;
711 int err = 0;
712
713 wbuf = &c->jheads[GCHD].wbuf;
714 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
715 list_for_each_entry_safe(idx_gc, tmp, &c->idx_gc, list)
716 if (idx_gc->unmap) {
717 dbg_gc("LEB %d", idx_gc->lnum);
718 err = ubifs_leb_unmap(c, idx_gc->lnum);
719 if (err)
720 goto out;
721 err = ubifs_change_one_lp(c, idx_gc->lnum, LPROPS_NC,
722 LPROPS_NC, 0, LPROPS_TAKEN, -1);
723 if (err)
724 goto out;
725 list_del(&idx_gc->list);
726 kfree(idx_gc);
727 }
728out:
729 mutex_unlock(&wbuf->io_mutex);
730 return err;
731}
732
733/**
734 * ubifs_destroy_idx_gc - destroy idx_gc list.
735 * @c: UBIFS file-system description object
736 *
737 * This function destroys the idx_gc list. It is called when unmounting or
738 * remounting read-only so locks are not needed.
739 */
740void ubifs_destroy_idx_gc(struct ubifs_info *c)
741{
742 while (!list_empty(&c->idx_gc)) {
743 struct ubifs_gced_idx_leb *idx_gc;
744
745 idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb,
746 list);
747 c->idx_gc_cnt -= 1;
748 list_del(&idx_gc->list);
749 kfree(idx_gc);
750 }
751
752}
753
754/**
755 * ubifs_get_idx_gc_leb - get a LEB from GC'd index LEB list.
756 * @c: UBIFS file-system description object
757 *
758 * Called during start commit so locks are not needed.
759 */
760int ubifs_get_idx_gc_leb(struct ubifs_info *c)
761{
762 struct ubifs_gced_idx_leb *idx_gc;
763 int lnum;
764
765 if (list_empty(&c->idx_gc))
766 return -ENOSPC;
767 idx_gc = list_entry(c->idx_gc.next, struct ubifs_gced_idx_leb, list);
768 lnum = idx_gc->lnum;
769 /* c->idx_gc_cnt is updated by the caller when lprops are updated */
770 list_del(&idx_gc->list);
771 kfree(idx_gc);
772 return lnum;
773}
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
new file mode 100644
index 00000000000..3374f91b670
--- /dev/null
+++ b/fs/ubifs/io.c
@@ -0,0 +1,914 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 * Copyright (C) 2006, 2007 University of Szeged, Hungary
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Authors: Artem Bityutskiy (Битюцкий Артём)
21 * Adrian Hunter
22 * Zoltan Sogor
23 */
24
25/*
26 * This file implements UBIFS I/O subsystem which provides various I/O-related
27 * helper functions (reading/writing/checking/validating nodes) and implements
28 * write-buffering support. Write buffers help to save space which otherwise
29 * would have been wasted for padding to the nearest minimal I/O unit boundary.
30 * Instead, data first goes to the write-buffer and is flushed when the
31 * buffer is full or when it is not used for some time (by timer). This is
32 * similarto the mechanism is used by JFFS2.
33 *
34 * Write-buffers are defined by 'struct ubifs_wbuf' objects and protected by
35 * mutexes defined inside these objects. Since sometimes upper-level code
36 * has to lock the write-buffer (e.g. journal space reservation code), many
37 * functions related to write-buffers have "nolock" suffix which means that the
38 * caller has to lock the write-buffer before calling this function.
39 *
40 * UBIFS stores nodes at 64 bit-aligned addresses. If the node length is not
41 * aligned, UBIFS starts the next node from the aligned address, and the padded
42 * bytes may contain any rubbish. In other words, UBIFS does not put padding
43 * bytes in those small gaps. Common headers of nodes store real node lengths,
44 * not aligned lengths. Indexing nodes also store real lengths in branches.
45 *
46 * UBIFS uses padding when it pads to the next min. I/O unit. In this case it
47 * uses padding nodes or padding bytes, if the padding node does not fit.
48 *
49 * All UBIFS nodes are protected by CRC checksums and UBIFS checks all nodes
50 * every time they are read from the flash media.
51 */
52
53#include <linux/crc32.h>
54#include "ubifs.h"
55
56/**
57 * ubifs_check_node - check node.
58 * @c: UBIFS file-system description object
59 * @buf: node to check
60 * @lnum: logical eraseblock number
61 * @offs: offset within the logical eraseblock
62 * @quiet: print no messages
63 *
64 * This function checks node magic number and CRC checksum. This function also
65 * validates node length to prevent UBIFS from becoming crazy when an attacker
66 * feeds it a file-system image with incorrect nodes. For example, too large
67 * node length in the common header could cause UBIFS to read memory outside of
68 * allocated buffer when checking the CRC checksum.
69 *
70 * This function returns zero in case of success %-EUCLEAN in case of bad CRC
71 * or magic.
72 */
73int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
74 int offs, int quiet)
75{
76 int err = -EINVAL, type, node_len;
77 uint32_t crc, node_crc, magic;
78 const struct ubifs_ch *ch = buf;
79
80 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
81 ubifs_assert(!(offs & 7) && offs < c->leb_size);
82
83 magic = le32_to_cpu(ch->magic);
84 if (magic != UBIFS_NODE_MAGIC) {
85 if (!quiet)
86 ubifs_err("bad magic %#08x, expected %#08x",
87 magic, UBIFS_NODE_MAGIC);
88 err = -EUCLEAN;
89 goto out;
90 }
91
92 type = ch->node_type;
93 if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
94 if (!quiet)
95 ubifs_err("bad node type %d", type);
96 goto out;
97 }
98
99 node_len = le32_to_cpu(ch->len);
100 if (node_len + offs > c->leb_size)
101 goto out_len;
102
103 if (c->ranges[type].max_len == 0) {
104 if (node_len != c->ranges[type].len)
105 goto out_len;
106 } else if (node_len < c->ranges[type].min_len ||
107 node_len > c->ranges[type].max_len)
108 goto out_len;
109
110 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
111 node_crc = le32_to_cpu(ch->crc);
112 if (crc != node_crc) {
113 if (!quiet)
114 ubifs_err("bad CRC: calculated %#08x, read %#08x",
115 crc, node_crc);
116 err = -EUCLEAN;
117 goto out;
118 }
119
120 return 0;
121
122out_len:
123 if (!quiet)
124 ubifs_err("bad node length %d", node_len);
125out:
126 if (!quiet) {
127 ubifs_err("bad node at LEB %d:%d", lnum, offs);
128 dbg_dump_node(c, buf);
129 dbg_dump_stack();
130 }
131 return err;
132}
133
134/**
135 * ubifs_pad - pad flash space.
136 * @c: UBIFS file-system description object
137 * @buf: buffer to put padding to
138 * @pad: how many bytes to pad
139 *
140 * The flash media obliges us to write only in chunks of %c->min_io_size and
141 * when we have to write less data we add padding node to the write-buffer and
142 * pad it to the next minimal I/O unit's boundary. Padding nodes help when the
143 * media is being scanned. If the amount of wasted space is not enough to fit a
144 * padding node which takes %UBIFS_PAD_NODE_SZ bytes, we write padding bytes
145 * pattern (%UBIFS_PADDING_BYTE).
146 *
147 * Padding nodes are also used to fill gaps when the "commit-in-gaps" method is
148 * used.
149 */
150void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
151{
152 uint32_t crc;
153
154 ubifs_assert(pad >= 0 && !(pad & 7));
155
156 if (pad >= UBIFS_PAD_NODE_SZ) {
157 struct ubifs_ch *ch = buf;
158 struct ubifs_pad_node *pad_node = buf;
159
160 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
161 ch->node_type = UBIFS_PAD_NODE;
162 ch->group_type = UBIFS_NO_NODE_GROUP;
163 ch->padding[0] = ch->padding[1] = 0;
164 ch->sqnum = 0;
165 ch->len = cpu_to_le32(UBIFS_PAD_NODE_SZ);
166 pad -= UBIFS_PAD_NODE_SZ;
167 pad_node->pad_len = cpu_to_le32(pad);
168 crc = crc32(UBIFS_CRC32_INIT, buf + 8, UBIFS_PAD_NODE_SZ - 8);
169 ch->crc = cpu_to_le32(crc);
170 memset(buf + UBIFS_PAD_NODE_SZ, 0, pad);
171 } else if (pad > 0)
172 /* Too little space, padding node won't fit */
173 memset(buf, UBIFS_PADDING_BYTE, pad);
174}
175
176/**
177 * next_sqnum - get next sequence number.
178 * @c: UBIFS file-system description object
179 */
180static unsigned long long next_sqnum(struct ubifs_info *c)
181{
182 unsigned long long sqnum;
183
184 spin_lock(&c->cnt_lock);
185 sqnum = ++c->max_sqnum;
186 spin_unlock(&c->cnt_lock);
187
188 if (unlikely(sqnum >= SQNUM_WARN_WATERMARK)) {
189 if (sqnum >= SQNUM_WATERMARK) {
190 ubifs_err("sequence number overflow %llu, end of life",
191 sqnum);
192 ubifs_ro_mode(c, -EINVAL);
193 }
194 ubifs_warn("running out of sequence numbers, end of life soon");
195 }
196
197 return sqnum;
198}
199
200/**
201 * ubifs_prepare_node - prepare node to be written to flash.
202 * @c: UBIFS file-system description object
203 * @node: the node to pad
204 * @len: node length
205 * @pad: if the buffer has to be padded
206 *
207 * This function prepares node at @node to be written to the media - it
208 * calculates node CRC, fills the common header, and adds proper padding up to
209 * the next minimum I/O unit if @pad is not zero.
210 */
211void ubifs_prepare_node(struct ubifs_info *c, void *node, int len, int pad)
212{
213 uint32_t crc;
214 struct ubifs_ch *ch = node;
215 unsigned long long sqnum = next_sqnum(c);
216
217 ubifs_assert(len >= UBIFS_CH_SZ);
218
219 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
220 ch->len = cpu_to_le32(len);
221 ch->group_type = UBIFS_NO_NODE_GROUP;
222 ch->sqnum = cpu_to_le64(sqnum);
223 ch->padding[0] = ch->padding[1] = 0;
224 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
225 ch->crc = cpu_to_le32(crc);
226
227 if (pad) {
228 len = ALIGN(len, 8);
229 pad = ALIGN(len, c->min_io_size) - len;
230 ubifs_pad(c, node + len, pad);
231 }
232}
233
234/**
235 * ubifs_prep_grp_node - prepare node of a group to be written to flash.
236 * @c: UBIFS file-system description object
237 * @node: the node to pad
238 * @len: node length
239 * @last: indicates the last node of the group
240 *
241 * This function prepares node at @node to be written to the media - it
242 * calculates node CRC and fills the common header.
243 */
244void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last)
245{
246 uint32_t crc;
247 struct ubifs_ch *ch = node;
248 unsigned long long sqnum = next_sqnum(c);
249
250 ubifs_assert(len >= UBIFS_CH_SZ);
251
252 ch->magic = cpu_to_le32(UBIFS_NODE_MAGIC);
253 ch->len = cpu_to_le32(len);
254 if (last)
255 ch->group_type = UBIFS_LAST_OF_NODE_GROUP;
256 else
257 ch->group_type = UBIFS_IN_NODE_GROUP;
258 ch->sqnum = cpu_to_le64(sqnum);
259 ch->padding[0] = ch->padding[1] = 0;
260 crc = crc32(UBIFS_CRC32_INIT, node + 8, len - 8);
261 ch->crc = cpu_to_le32(crc);
262}
263
264/**
265 * wbuf_timer_callback - write-buffer timer callback function.
266 * @data: timer data (write-buffer descriptor)
267 *
268 * This function is called when the write-buffer timer expires.
269 */
270static void wbuf_timer_callback_nolock(unsigned long data)
271{
272 struct ubifs_wbuf *wbuf = (struct ubifs_wbuf *)data;
273
274 wbuf->need_sync = 1;
275 wbuf->c->need_wbuf_sync = 1;
276 ubifs_wake_up_bgt(wbuf->c);
277}
278
279/**
280 * new_wbuf_timer - start new write-buffer timer.
281 * @wbuf: write-buffer descriptor
282 */
283static void new_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
284{
285 ubifs_assert(!timer_pending(&wbuf->timer));
286
287 if (!wbuf->timeout)
288 return;
289
290 wbuf->timer.expires = jiffies + wbuf->timeout;
291 add_timer(&wbuf->timer);
292}
293
294/**
295 * cancel_wbuf_timer - cancel write-buffer timer.
296 * @wbuf: write-buffer descriptor
297 */
298static void cancel_wbuf_timer_nolock(struct ubifs_wbuf *wbuf)
299{
300 /*
301 * If the syncer is waiting for the lock (from the background thread's
302 * context) and another task is changing write-buffer then the syncing
303 * should be canceled.
304 */
305 wbuf->need_sync = 0;
306 del_timer(&wbuf->timer);
307}
308
309/**
310 * ubifs_wbuf_sync_nolock - synchronize write-buffer.
311 * @wbuf: write-buffer to synchronize
312 *
313 * This function synchronizes write-buffer @buf and returns zero in case of
314 * success or a negative error code in case of failure.
315 */
316int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf)
317{
318 struct ubifs_info *c = wbuf->c;
319 int err, dirt;
320
321 cancel_wbuf_timer_nolock(wbuf);
322 if (!wbuf->used || wbuf->lnum == -1)
323 /* Write-buffer is empty or not seeked */
324 return 0;
325
326 dbg_io("LEB %d:%d, %d bytes",
327 wbuf->lnum, wbuf->offs, wbuf->used);
328 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY));
329 ubifs_assert(!(wbuf->avail & 7));
330 ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size);
331
332 if (c->ro_media)
333 return -EROFS;
334
335 ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail);
336 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
337 c->min_io_size, wbuf->dtype);
338 if (err) {
339 ubifs_err("cannot write %d bytes to LEB %d:%d",
340 c->min_io_size, wbuf->lnum, wbuf->offs);
341 dbg_dump_stack();
342 return err;
343 }
344
345 dirt = wbuf->avail;
346
347 spin_lock(&wbuf->lock);
348 wbuf->offs += c->min_io_size;
349 wbuf->avail = c->min_io_size;
350 wbuf->used = 0;
351 wbuf->next_ino = 0;
352 spin_unlock(&wbuf->lock);
353
354 if (wbuf->sync_callback)
355 err = wbuf->sync_callback(c, wbuf->lnum,
356 c->leb_size - wbuf->offs, dirt);
357 return err;
358}
359
360/**
361 * ubifs_wbuf_seek_nolock - seek write-buffer.
362 * @wbuf: write-buffer
363 * @lnum: logical eraseblock number to seek to
364 * @offs: logical eraseblock offset to seek to
365 * @dtype: data type
366 *
367 * This function targets the write buffer to logical eraseblock @lnum:@offs.
368 * The write-buffer is synchronized if it is not empty. Returns zero in case of
369 * success and a negative error code in case of failure.
370 */
371int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
372 int dtype)
373{
374 const struct ubifs_info *c = wbuf->c;
375
376 dbg_io("LEB %d:%d", lnum, offs);
377 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt);
378 ubifs_assert(offs >= 0 && offs <= c->leb_size);
379 ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7));
380 ubifs_assert(lnum != wbuf->lnum);
381
382 if (wbuf->used > 0) {
383 int err = ubifs_wbuf_sync_nolock(wbuf);
384
385 if (err)
386 return err;
387 }
388
389 spin_lock(&wbuf->lock);
390 wbuf->lnum = lnum;
391 wbuf->offs = offs;
392 wbuf->avail = c->min_io_size;
393 wbuf->used = 0;
394 spin_unlock(&wbuf->lock);
395 wbuf->dtype = dtype;
396
397 return 0;
398}
399
400/**
401 * ubifs_bg_wbufs_sync - synchronize write-buffers.
402 * @c: UBIFS file-system description object
403 *
404 * This function is called by background thread to synchronize write-buffers.
405 * Returns zero in case of success and a negative error code in case of
406 * failure.
407 */
408int ubifs_bg_wbufs_sync(struct ubifs_info *c)
409{
410 int err, i;
411
412 if (!c->need_wbuf_sync)
413 return 0;
414 c->need_wbuf_sync = 0;
415
416 if (c->ro_media) {
417 err = -EROFS;
418 goto out_timers;
419 }
420
421 dbg_io("synchronize");
422 for (i = 0; i < c->jhead_cnt; i++) {
423 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
424
425 cond_resched();
426
427 /*
428 * If the mutex is locked then wbuf is being changed, so
429 * synchronization is not necessary.
430 */
431 if (mutex_is_locked(&wbuf->io_mutex))
432 continue;
433
434 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
435 if (!wbuf->need_sync) {
436 mutex_unlock(&wbuf->io_mutex);
437 continue;
438 }
439
440 err = ubifs_wbuf_sync_nolock(wbuf);
441 mutex_unlock(&wbuf->io_mutex);
442 if (err) {
443 ubifs_err("cannot sync write-buffer, error %d", err);
444 ubifs_ro_mode(c, err);
445 goto out_timers;
446 }
447 }
448
449 return 0;
450
451out_timers:
452 /* Cancel all timers to prevent repeated errors */
453 for (i = 0; i < c->jhead_cnt; i++) {
454 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
455
456 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
457 cancel_wbuf_timer_nolock(wbuf);
458 mutex_unlock(&wbuf->io_mutex);
459 }
460 return err;
461}
462
463/**
464 * ubifs_wbuf_write_nolock - write data to flash via write-buffer.
465 * @wbuf: write-buffer
466 * @buf: node to write
467 * @len: node length
468 *
469 * This function writes data to flash via write-buffer @wbuf. This means that
470 * the last piece of the node won't reach the flash media immediately if it
471 * does not take whole minimal I/O unit. Instead, the node will sit in RAM
472 * until the write-buffer is synchronized (e.g., by timer).
473 *
474 * This function returns zero in case of success and a negative error code in
475 * case of failure. If the node cannot be written because there is no more
476 * space in this logical eraseblock, %-ENOSPC is returned.
477 */
478int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
479{
480 struct ubifs_info *c = wbuf->c;
481 int err, written, n, aligned_len = ALIGN(len, 8), offs;
482
483 dbg_io("%d bytes (%s) to wbuf at LEB %d:%d", len,
484 dbg_ntype(((struct ubifs_ch *)buf)->node_type), wbuf->lnum,
485 wbuf->offs + wbuf->used);
486 ubifs_assert(len > 0 && wbuf->lnum >= 0 && wbuf->lnum < c->leb_cnt);
487 ubifs_assert(wbuf->offs >= 0 && wbuf->offs % c->min_io_size == 0);
488 ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size);
489 ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size);
490 ubifs_assert(mutex_is_locked(&wbuf->io_mutex));
491
492 if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) {
493 err = -ENOSPC;
494 goto out;
495 }
496
497 cancel_wbuf_timer_nolock(wbuf);
498
499 if (c->ro_media)
500 return -EROFS;
501
502 if (aligned_len <= wbuf->avail) {
503 /*
504 * The node is not very large and fits entirely within
505 * write-buffer.
506 */
507 memcpy(wbuf->buf + wbuf->used, buf, len);
508
509 if (aligned_len == wbuf->avail) {
510 dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum,
511 wbuf->offs);
512 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf,
513 wbuf->offs, c->min_io_size,
514 wbuf->dtype);
515 if (err)
516 goto out;
517
518 spin_lock(&wbuf->lock);
519 wbuf->offs += c->min_io_size;
520 wbuf->avail = c->min_io_size;
521 wbuf->used = 0;
522 wbuf->next_ino = 0;
523 spin_unlock(&wbuf->lock);
524 } else {
525 spin_lock(&wbuf->lock);
526 wbuf->avail -= aligned_len;
527 wbuf->used += aligned_len;
528 spin_unlock(&wbuf->lock);
529 }
530
531 goto exit;
532 }
533
534 /*
535 * The node is large enough and does not fit entirely within current
536 * minimal I/O unit. We have to fill and flush write-buffer and switch
537 * to the next min. I/O unit.
538 */
539 dbg_io("flush wbuf to LEB %d:%d", wbuf->lnum, wbuf->offs);
540 memcpy(wbuf->buf + wbuf->used, buf, wbuf->avail);
541 err = ubi_leb_write(c->ubi, wbuf->lnum, wbuf->buf, wbuf->offs,
542 c->min_io_size, wbuf->dtype);
543 if (err)
544 goto out;
545
546 offs = wbuf->offs + c->min_io_size;
547 len -= wbuf->avail;
548 aligned_len -= wbuf->avail;
549 written = wbuf->avail;
550
551 /*
552 * The remaining data may take more whole min. I/O units, so write the
553 * remains multiple to min. I/O unit size directly to the flash media.
554 * We align node length to 8-byte boundary because we anyway flash wbuf
555 * if the remaining space is less than 8 bytes.
556 */
557 n = aligned_len >> c->min_io_shift;
558 if (n) {
559 n <<= c->min_io_shift;
560 dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs);
561 err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n,
562 wbuf->dtype);
563 if (err)
564 goto out;
565 offs += n;
566 aligned_len -= n;
567 len -= n;
568 written += n;
569 }
570
571 spin_lock(&wbuf->lock);
572 if (aligned_len)
573 /*
574 * And now we have what's left and what does not take whole
575 * min. I/O unit, so write it to the write-buffer and we are
576 * done.
577 */
578 memcpy(wbuf->buf, buf + written, len);
579
580 wbuf->offs = offs;
581 wbuf->used = aligned_len;
582 wbuf->avail = c->min_io_size - aligned_len;
583 wbuf->next_ino = 0;
584 spin_unlock(&wbuf->lock);
585
586exit:
587 if (wbuf->sync_callback) {
588 int free = c->leb_size - wbuf->offs - wbuf->used;
589
590 err = wbuf->sync_callback(c, wbuf->lnum, free, 0);
591 if (err)
592 goto out;
593 }
594
595 if (wbuf->used)
596 new_wbuf_timer_nolock(wbuf);
597
598 return 0;
599
600out:
601 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
602 len, wbuf->lnum, wbuf->offs, err);
603 dbg_dump_node(c, buf);
604 dbg_dump_stack();
605 dbg_dump_leb(c, wbuf->lnum);
606 return err;
607}
608
609/**
610 * ubifs_write_node - write node to the media.
611 * @c: UBIFS file-system description object
612 * @buf: the node to write
613 * @len: node length
614 * @lnum: logical eraseblock number
615 * @offs: offset within the logical eraseblock
616 * @dtype: node life-time hint (%UBI_LONGTERM, %UBI_SHORTTERM, %UBI_UNKNOWN)
617 *
618 * This function automatically fills node magic number, assigns sequence
619 * number, and calculates node CRC checksum. The length of the @buf buffer has
620 * to be aligned to the minimal I/O unit size. This function automatically
621 * appends padding node and padding bytes if needed. Returns zero in case of
622 * success and a negative error code in case of failure.
623 */
624int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum,
625 int offs, int dtype)
626{
627 int err, buf_len = ALIGN(len, c->min_io_size);
628
629 dbg_io("LEB %d:%d, %s, length %d (aligned %d)",
630 lnum, offs, dbg_ntype(((struct ubifs_ch *)buf)->node_type), len,
631 buf_len);
632 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
633 ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size);
634
635 if (c->ro_media)
636 return -EROFS;
637
638 ubifs_prepare_node(c, buf, len, 1);
639 err = ubi_leb_write(c->ubi, lnum, buf, offs, buf_len, dtype);
640 if (err) {
641 ubifs_err("cannot write %d bytes to LEB %d:%d, error %d",
642 buf_len, lnum, offs, err);
643 dbg_dump_node(c, buf);
644 dbg_dump_stack();
645 }
646
647 return err;
648}
649
650/**
651 * ubifs_read_node_wbuf - read node from the media or write-buffer.
652 * @wbuf: wbuf to check for un-written data
653 * @buf: buffer to read to
654 * @type: node type
655 * @len: node length
656 * @lnum: logical eraseblock number
657 * @offs: offset within the logical eraseblock
658 *
659 * This function reads a node of known type and length, checks it and stores
660 * in @buf. If the node partially or fully sits in the write-buffer, this
661 * function takes data from the buffer, otherwise it reads the flash media.
662 * Returns zero in case of success, %-EUCLEAN if CRC mismatched and a negative
663 * error code in case of failure.
664 */
665int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
666 int lnum, int offs)
667{
668 const struct ubifs_info *c = wbuf->c;
669 int err, rlen, overlap;
670 struct ubifs_ch *ch = buf;
671
672 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
673 ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
674 ubifs_assert(!(offs & 7) && offs < c->leb_size);
675 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
676
677 spin_lock(&wbuf->lock);
678 overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
679 if (!overlap) {
680 /* We may safely unlock the write-buffer and read the data */
681 spin_unlock(&wbuf->lock);
682 return ubifs_read_node(c, buf, type, len, lnum, offs);
683 }
684
685 /* Don't read under wbuf */
686 rlen = wbuf->offs - offs;
687 if (rlen < 0)
688 rlen = 0;
689
690 /* Copy the rest from the write-buffer */
691 memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
692 spin_unlock(&wbuf->lock);
693
694 if (rlen > 0) {
695 /* Read everything that goes before write-buffer */
696 err = ubi_read(c->ubi, lnum, buf, offs, rlen);
697 if (err && err != -EBADMSG) {
698 ubifs_err("failed to read node %d from LEB %d:%d, "
699 "error %d", type, lnum, offs, err);
700 dbg_dump_stack();
701 return err;
702 }
703 }
704
705 if (type != ch->node_type) {
706 ubifs_err("bad node type (%d but expected %d)",
707 ch->node_type, type);
708 goto out;
709 }
710
711 err = ubifs_check_node(c, buf, lnum, offs, 0);
712 if (err) {
713 ubifs_err("expected node type %d", type);
714 return err;
715 }
716
717 rlen = le32_to_cpu(ch->len);
718 if (rlen != len) {
719 ubifs_err("bad node length %d, expected %d", rlen, len);
720 goto out;
721 }
722
723 return 0;
724
725out:
726 ubifs_err("bad node at LEB %d:%d", lnum, offs);
727 dbg_dump_node(c, buf);
728 dbg_dump_stack();
729 return -EINVAL;
730}
731
732/**
733 * ubifs_read_node - read node.
734 * @c: UBIFS file-system description object
735 * @buf: buffer to read to
736 * @type: node type
737 * @len: node length (not aligned)
738 * @lnum: logical eraseblock number
739 * @offs: offset within the logical eraseblock
740 *
741 * This function reads a node of known type and and length, checks it and
742 * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched
743 * and a negative error code in case of failure.
744 */
745int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
746 int lnum, int offs)
747{
748 int err, l;
749 struct ubifs_ch *ch = buf;
750
751 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
752 ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
753 ubifs_assert(len >= UBIFS_CH_SZ && offs + len <= c->leb_size);
754 ubifs_assert(!(offs & 7) && offs < c->leb_size);
755 ubifs_assert(type >= 0 && type < UBIFS_NODE_TYPES_CNT);
756
757 err = ubi_read(c->ubi, lnum, buf, offs, len);
758 if (err && err != -EBADMSG) {
759 ubifs_err("cannot read node %d from LEB %d:%d, error %d",
760 type, lnum, offs, err);
761 return err;
762 }
763
764 if (type != ch->node_type) {
765 ubifs_err("bad node type (%d but expected %d)",
766 ch->node_type, type);
767 goto out;
768 }
769
770 err = ubifs_check_node(c, buf, lnum, offs, 0);
771 if (err) {
772 ubifs_err("expected node type %d", type);
773 return err;
774 }
775
776 l = le32_to_cpu(ch->len);
777 if (l != len) {
778 ubifs_err("bad node length %d, expected %d", l, len);
779 goto out;
780 }
781
782 return 0;
783
784out:
785 ubifs_err("bad node at LEB %d:%d", lnum, offs);
786 dbg_dump_node(c, buf);
787 dbg_dump_stack();
788 return -EINVAL;
789}
790
791/**
792 * ubifs_wbuf_init - initialize write-buffer.
793 * @c: UBIFS file-system description object
794 * @wbuf: write-buffer to initialize
795 *
796 * This function initializes write buffer. Returns zero in case of success
797 * %-ENOMEM in case of failure.
798 */
799int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf)
800{
801 size_t size;
802
803 wbuf->buf = kmalloc(c->min_io_size, GFP_KERNEL);
804 if (!wbuf->buf)
805 return -ENOMEM;
806
807 size = (c->min_io_size / UBIFS_CH_SZ + 1) * sizeof(ino_t);
808 wbuf->inodes = kmalloc(size, GFP_KERNEL);
809 if (!wbuf->inodes) {
810 kfree(wbuf->buf);
811 wbuf->buf = NULL;
812 return -ENOMEM;
813 }
814
815 wbuf->used = 0;
816 wbuf->lnum = wbuf->offs = -1;
817 wbuf->avail = c->min_io_size;
818 wbuf->dtype = UBI_UNKNOWN;
819 wbuf->sync_callback = NULL;
820 mutex_init(&wbuf->io_mutex);
821 spin_lock_init(&wbuf->lock);
822
823 wbuf->c = c;
824 init_timer(&wbuf->timer);
825 wbuf->timer.function = wbuf_timer_callback_nolock;
826 wbuf->timer.data = (unsigned long)wbuf;
827 wbuf->timeout = DEFAULT_WBUF_TIMEOUT;
828 wbuf->next_ino = 0;
829
830 return 0;
831}
832
833/**
834 * ubifs_wbuf_add_ino_nolock - add an inode number into the wbuf inode array.
835 * @wbuf: the write-buffer whereto add
836 * @inum: the inode number
837 *
838 * This function adds an inode number to the inode array of the write-buffer.
839 */
840void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum)
841{
842 if (!wbuf->buf)
843 /* NOR flash or something similar */
844 return;
845
846 spin_lock(&wbuf->lock);
847 if (wbuf->used)
848 wbuf->inodes[wbuf->next_ino++] = inum;
849 spin_unlock(&wbuf->lock);
850}
851
852/**
853 * wbuf_has_ino - returns if the wbuf contains data from the inode.
854 * @wbuf: the write-buffer
855 * @inum: the inode number
856 *
857 * This function returns with %1 if the write-buffer contains some data from the
858 * given inode otherwise it returns with %0.
859 */
860static int wbuf_has_ino(struct ubifs_wbuf *wbuf, ino_t inum)
861{
862 int i, ret = 0;
863
864 spin_lock(&wbuf->lock);
865 for (i = 0; i < wbuf->next_ino; i++)
866 if (inum == wbuf->inodes[i]) {
867 ret = 1;
868 break;
869 }
870 spin_unlock(&wbuf->lock);
871
872 return ret;
873}
874
875/**
876 * ubifs_sync_wbufs_by_inode - synchronize write-buffers for an inode.
877 * @c: UBIFS file-system description object
878 * @inode: inode to synchronize
879 *
880 * This function synchronizes write-buffers which contain nodes belonging to
881 * @inode. Returns zero in case of success and a negative error code in case of
882 * failure.
883 */
884int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode)
885{
886 int i, err = 0;
887
888 for (i = 0; i < c->jhead_cnt; i++) {
889 struct ubifs_wbuf *wbuf = &c->jheads[i].wbuf;
890
891 if (i == GCHD)
892 /*
893 * GC head is special, do not look at it. Even if the
894 * head contains something related to this inode, it is
895 * a _copy_ of corresponding on-flash node which sits
896 * somewhere else.
897 */
898 continue;
899
900 if (!wbuf_has_ino(wbuf, inode->i_ino))
901 continue;
902
903 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
904 if (wbuf_has_ino(wbuf, inode->i_ino))
905 err = ubifs_wbuf_sync_nolock(wbuf);
906 mutex_unlock(&wbuf->io_mutex);
907
908 if (err) {
909 ubifs_ro_mode(c, err);
910 return err;
911 }
912 }
913 return 0;
914}
diff --git a/fs/ubifs/ioctl.c b/fs/ubifs/ioctl.c
new file mode 100644
index 00000000000..5e82cffe969
--- /dev/null
+++ b/fs/ubifs/ioctl.c
@@ -0,0 +1,204 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 * Copyright (C) 2006, 2007 University of Szeged, Hungary
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published by
9 * the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc., 51
18 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 *
20 * Authors: Zoltan Sogor
21 * Artem Bityutskiy (Битюцкий Артём)
22 * Adrian Hunter
23 */
24
25/* This file implements EXT2-compatible extended attribute ioctl() calls */
26
27#include <linux/compat.h>
28#include <linux/smp_lock.h>
29#include <linux/mount.h>
30#include "ubifs.h"
31
32/**
33 * ubifs_set_inode_flags - set VFS inode flags.
34 * @inode: VFS inode to set flags for
35 *
36 * This function propagates flags from UBIFS inode object to VFS inode object.
37 */
38void ubifs_set_inode_flags(struct inode *inode)
39{
40 unsigned int flags = ubifs_inode(inode)->flags;
41
42 inode->i_flags &= ~(S_SYNC | S_APPEND | S_IMMUTABLE | S_DIRSYNC);
43 if (flags & UBIFS_SYNC_FL)
44 inode->i_flags |= S_SYNC;
45 if (flags & UBIFS_APPEND_FL)
46 inode->i_flags |= S_APPEND;
47 if (flags & UBIFS_IMMUTABLE_FL)
48 inode->i_flags |= S_IMMUTABLE;
49 if (flags & UBIFS_DIRSYNC_FL)
50 inode->i_flags |= S_DIRSYNC;
51}
52
53/*
54 * ioctl2ubifs - convert ioctl inode flags to UBIFS inode flags.
55 * @ioctl_flags: flags to convert
56 *
57 * This function convert ioctl flags (@FS_COMPR_FL, etc) to UBIFS inode flags
58 * (@UBIFS_COMPR_FL, etc).
59 */
60static int ioctl2ubifs(int ioctl_flags)
61{
62 int ubifs_flags = 0;
63
64 if (ioctl_flags & FS_COMPR_FL)
65 ubifs_flags |= UBIFS_COMPR_FL;
66 if (ioctl_flags & FS_SYNC_FL)
67 ubifs_flags |= UBIFS_SYNC_FL;
68 if (ioctl_flags & FS_APPEND_FL)
69 ubifs_flags |= UBIFS_APPEND_FL;
70 if (ioctl_flags & FS_IMMUTABLE_FL)
71 ubifs_flags |= UBIFS_IMMUTABLE_FL;
72 if (ioctl_flags & FS_DIRSYNC_FL)
73 ubifs_flags |= UBIFS_DIRSYNC_FL;
74
75 return ubifs_flags;
76}
77
78/*
79 * ubifs2ioctl - convert UBIFS inode flags to ioctl inode flags.
80 * @ubifs_flags: flags to convert
81 *
82 * This function convert UBIFS (@UBIFS_COMPR_FL, etc) to ioctl flags
83 * (@FS_COMPR_FL, etc).
84 */
85static int ubifs2ioctl(int ubifs_flags)
86{
87 int ioctl_flags = 0;
88
89 if (ubifs_flags & UBIFS_COMPR_FL)
90 ioctl_flags |= FS_COMPR_FL;
91 if (ubifs_flags & UBIFS_SYNC_FL)
92 ioctl_flags |= FS_SYNC_FL;
93 if (ubifs_flags & UBIFS_APPEND_FL)
94 ioctl_flags |= FS_APPEND_FL;
95 if (ubifs_flags & UBIFS_IMMUTABLE_FL)
96 ioctl_flags |= FS_IMMUTABLE_FL;
97 if (ubifs_flags & UBIFS_DIRSYNC_FL)
98 ioctl_flags |= FS_DIRSYNC_FL;
99
100 return ioctl_flags;
101}
102
103static int setflags(struct inode *inode, int flags)
104{
105 int oldflags, err, release;
106 struct ubifs_inode *ui = ubifs_inode(inode);
107 struct ubifs_info *c = inode->i_sb->s_fs_info;
108 struct ubifs_budget_req req = { .dirtied_ino = 1,
109 .dirtied_ino_d = ui->data_len };
110
111 err = ubifs_budget_space(c, &req);
112 if (err)
113 return err;
114
115 /*
116 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
117 * the relevant capability.
118 */
119 mutex_lock(&ui->ui_mutex);
120 oldflags = ubifs2ioctl(ui->flags);
121 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) {
122 if (!capable(CAP_LINUX_IMMUTABLE)) {
123 err = -EPERM;
124 goto out_unlock;
125 }
126 }
127
128 ui->flags = ioctl2ubifs(flags);
129 ubifs_set_inode_flags(inode);
130 inode->i_ctime = ubifs_current_time(inode);
131 release = ui->dirty;
132 mark_inode_dirty_sync(inode);
133 mutex_unlock(&ui->ui_mutex);
134
135 if (release)
136 ubifs_release_budget(c, &req);
137 if (IS_SYNC(inode))
138 err = write_inode_now(inode, 1);
139 return err;
140
141out_unlock:
142 ubifs_err("can't modify inode %lu attributes", inode->i_ino);
143 mutex_unlock(&ui->ui_mutex);
144 ubifs_release_budget(c, &req);
145 return err;
146}
147
148long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
149{
150 int flags, err;
151 struct inode *inode = file->f_path.dentry->d_inode;
152
153 switch (cmd) {
154 case FS_IOC_GETFLAGS:
155 flags = ubifs2ioctl(ubifs_inode(inode)->flags);
156
157 return put_user(flags, (int __user *) arg);
158
159 case FS_IOC_SETFLAGS: {
160 if (IS_RDONLY(inode))
161 return -EROFS;
162
163 if (!is_owner_or_cap(inode))
164 return -EACCES;
165
166 if (get_user(flags, (int __user *) arg))
167 return -EFAULT;
168
169 if (!S_ISDIR(inode->i_mode))
170 flags &= ~FS_DIRSYNC_FL;
171
172 /*
173 * Make sure the file-system is read-write and make sure it
174 * will not become read-only while we are changing the flags.
175 */
176 err = mnt_want_write(file->f_path.mnt);
177 if (err)
178 return err;
179 err = setflags(inode, flags);
180 mnt_drop_write(file->f_path.mnt);
181 return err;
182 }
183
184 default:
185 return -ENOTTY;
186 }
187}
188
189#ifdef CONFIG_COMPAT
190long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
191{
192 switch (cmd) {
193 case FS_IOC32_GETFLAGS:
194 cmd = FS_IOC_GETFLAGS;
195 break;
196 case FS_IOC32_SETFLAGS:
197 cmd = FS_IOC_SETFLAGS;
198 break;
199 default:
200 return -ENOIOCTLCMD;
201 }
202 return ubifs_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
203}
204#endif
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
new file mode 100644
index 00000000000..283155abe5f
--- /dev/null
+++ b/fs/ubifs/journal.c
@@ -0,0 +1,1387 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS journal.
25 *
26 * The journal consists of 2 parts - the log and bud LEBs. The log has fixed
27 * length and position, while a bud logical eraseblock is any LEB in the main
28 * area. Buds contain file system data - data nodes, inode nodes, etc. The log
29 * contains only references to buds and some other stuff like commit
30 * start node. The idea is that when we commit the journal, we do
31 * not copy the data, the buds just become indexed. Since after the commit the
32 * nodes in bud eraseblocks become leaf nodes of the file system index tree, we
33 * use term "bud". Analogy is obvious, bud eraseblocks contain nodes which will
34 * become leafs in the future.
35 *
36 * The journal is multi-headed because we want to write data to the journal as
37 * optimally as possible. It is nice to have nodes belonging to the same inode
38 * in one LEB, so we may write data owned by different inodes to different
39 * journal heads, although at present only one data head is used.
40 *
41 * For recovery reasons, the base head contains all inode nodes, all directory
42 * entry nodes and all truncate nodes. This means that the other heads contain
43 * only data nodes.
44 *
45 * Bud LEBs may be half-indexed. For example, if the bud was not full at the
46 * time of commit, the bud is retained to continue to be used in the journal,
47 * even though the "front" of the LEB is now indexed. In that case, the log
48 * reference contains the offset where the bud starts for the purposes of the
49 * journal.
50 *
51 * The journal size has to be limited, because the larger is the journal, the
52 * longer it takes to mount UBIFS (scanning the journal) and the more memory it
53 * takes (indexing in the TNC).
54 *
55 * All the journal write operations like 'ubifs_jnl_update()' here, which write
56 * multiple UBIFS nodes to the journal at one go, are atomic with respect to
57 * unclean reboots. Should the unclean reboot happen, the recovery code drops
58 * all the nodes.
59 */
60
61#include "ubifs.h"
62
63/**
64 * zero_ino_node_unused - zero out unused fields of an on-flash inode node.
65 * @ino: the inode to zero out
66 */
67static inline void zero_ino_node_unused(struct ubifs_ino_node *ino)
68{
69 memset(ino->padding1, 0, 4);
70 memset(ino->padding2, 0, 26);
71}
72
73/**
74 * zero_dent_node_unused - zero out unused fields of an on-flash directory
75 * entry node.
76 * @dent: the directory entry to zero out
77 */
78static inline void zero_dent_node_unused(struct ubifs_dent_node *dent)
79{
80 dent->padding1 = 0;
81 memset(dent->padding2, 0, 4);
82}
83
84/**
85 * zero_data_node_unused - zero out unused fields of an on-flash data node.
86 * @data: the data node to zero out
87 */
88static inline void zero_data_node_unused(struct ubifs_data_node *data)
89{
90 memset(data->padding, 0, 2);
91}
92
93/**
94 * zero_trun_node_unused - zero out unused fields of an on-flash truncation
95 * node.
96 * @trun: the truncation node to zero out
97 */
98static inline void zero_trun_node_unused(struct ubifs_trun_node *trun)
99{
100 memset(trun->padding, 0, 12);
101}
102
103/**
104 * reserve_space - reserve space in the journal.
105 * @c: UBIFS file-system description object
106 * @jhead: journal head number
107 * @len: node length
108 *
109 * This function reserves space in journal head @head. If the reservation
110 * succeeded, the journal head stays locked and later has to be unlocked using
111 * 'release_head()'. 'write_node()' and 'write_head()' functions also unlock
112 * it. Returns zero in case of success, %-EAGAIN if commit has to be done, and
113 * other negative error codes in case of other failures.
114 */
115static int reserve_space(struct ubifs_info *c, int jhead, int len)
116{
117 int err = 0, err1, retries = 0, avail, lnum, offs, free, squeeze;
118 struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
119
120 /*
121 * Typically, the base head has smaller nodes written to it, so it is
122 * better to try to allocate space at the ends of eraseblocks. This is
123 * what the squeeze parameter does.
124 */
125 squeeze = (jhead == BASEHD);
126again:
127 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
128
129 if (c->ro_media) {
130 err = -EROFS;
131 goto out_unlock;
132 }
133
134 avail = c->leb_size - wbuf->offs - wbuf->used;
135 if (wbuf->lnum != -1 && avail >= len)
136 return 0;
137
138 /*
139 * Write buffer wasn't seek'ed or there is no enough space - look for an
140 * LEB with some empty space.
141 */
142 lnum = ubifs_find_free_space(c, len, &free, squeeze);
143 if (lnum >= 0) {
144 /* Found an LEB, add it to the journal head */
145 offs = c->leb_size - free;
146 err = ubifs_add_bud_to_log(c, jhead, lnum, offs);
147 if (err)
148 goto out_return;
149 /* A new bud was successfully allocated and added to the log */
150 goto out;
151 }
152
153 err = lnum;
154 if (err != -ENOSPC)
155 goto out_unlock;
156
157 /*
158 * No free space, we have to run garbage collector to make
159 * some. But the write-buffer mutex has to be unlocked because
160 * GC also takes it.
161 */
162 dbg_jnl("no free space jhead %d, run GC", jhead);
163 mutex_unlock(&wbuf->io_mutex);
164
165 lnum = ubifs_garbage_collect(c, 0);
166 if (lnum < 0) {
167 err = lnum;
168 if (err != -ENOSPC)
169 return err;
170
171 /*
172 * GC could not make a free LEB. But someone else may
173 * have allocated new bud for this journal head,
174 * because we dropped @wbuf->io_mutex, so try once
175 * again.
176 */
177 dbg_jnl("GC couldn't make a free LEB for jhead %d", jhead);
178 if (retries++ < 2) {
179 dbg_jnl("retry (%d)", retries);
180 goto again;
181 }
182
183 dbg_jnl("return -ENOSPC");
184 return err;
185 }
186
187 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
188 dbg_jnl("got LEB %d for jhead %d", lnum, jhead);
189 avail = c->leb_size - wbuf->offs - wbuf->used;
190
191 if (wbuf->lnum != -1 && avail >= len) {
192 /*
193 * Someone else has switched the journal head and we have
194 * enough space now. This happens when more then one process is
195 * trying to write to the same journal head at the same time.
196 */
197 dbg_jnl("return LEB %d back, already have LEB %d:%d",
198 lnum, wbuf->lnum, wbuf->offs + wbuf->used);
199 err = ubifs_return_leb(c, lnum);
200 if (err)
201 goto out_unlock;
202 return 0;
203 }
204
205 err = ubifs_add_bud_to_log(c, jhead, lnum, 0);
206 if (err)
207 goto out_return;
208 offs = 0;
209
210out:
211 err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, UBI_SHORTTERM);
212 if (err)
213 goto out_unlock;
214
215 return 0;
216
217out_unlock:
218 mutex_unlock(&wbuf->io_mutex);
219 return err;
220
221out_return:
222 /* An error occurred and the LEB has to be returned to lprops */
223 ubifs_assert(err < 0);
224 err1 = ubifs_return_leb(c, lnum);
225 if (err1 && err == -EAGAIN)
226 /*
227 * Return original error code only if it is not %-EAGAIN,
228 * which is not really an error. Otherwise, return the error
229 * code of 'ubifs_return_leb()'.
230 */
231 err = err1;
232 mutex_unlock(&wbuf->io_mutex);
233 return err;
234}
235
236/**
237 * write_node - write node to a journal head.
238 * @c: UBIFS file-system description object
239 * @jhead: journal head
240 * @node: node to write
241 * @len: node length
242 * @lnum: LEB number written is returned here
243 * @offs: offset written is returned here
244 *
245 * This function writes a node to reserved space of journal head @jhead.
246 * Returns zero in case of success and a negative error code in case of
247 * failure.
248 */
249static int write_node(struct ubifs_info *c, int jhead, void *node, int len,
250 int *lnum, int *offs)
251{
252 struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
253
254 ubifs_assert(jhead != GCHD);
255
256 *lnum = c->jheads[jhead].wbuf.lnum;
257 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
258
259 dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
260 ubifs_prepare_node(c, node, len, 0);
261
262 return ubifs_wbuf_write_nolock(wbuf, node, len);
263}
264
265/**
266 * write_head - write data to a journal head.
267 * @c: UBIFS file-system description object
268 * @jhead: journal head
269 * @buf: buffer to write
270 * @len: length to write
271 * @lnum: LEB number written is returned here
272 * @offs: offset written is returned here
273 * @sync: non-zero if the write-buffer has to by synchronized
274 *
275 * This function is the same as 'write_node()' but it does not assume the
276 * buffer it is writing is a node, so it does not prepare it (which means
277 * initializing common header and calculating CRC).
278 */
279static int write_head(struct ubifs_info *c, int jhead, void *buf, int len,
280 int *lnum, int *offs, int sync)
281{
282 int err;
283 struct ubifs_wbuf *wbuf = &c->jheads[jhead].wbuf;
284
285 ubifs_assert(jhead != GCHD);
286
287 *lnum = c->jheads[jhead].wbuf.lnum;
288 *offs = c->jheads[jhead].wbuf.offs + c->jheads[jhead].wbuf.used;
289 dbg_jnl("jhead %d, LEB %d:%d, len %d", jhead, *lnum, *offs, len);
290
291 err = ubifs_wbuf_write_nolock(wbuf, buf, len);
292 if (err)
293 return err;
294 if (sync)
295 err = ubifs_wbuf_sync_nolock(wbuf);
296 return err;
297}
298
299/**
300 * make_reservation - reserve journal space.
301 * @c: UBIFS file-system description object
302 * @jhead: journal head
303 * @len: how many bytes to reserve
304 *
305 * This function makes space reservation in journal head @jhead. The function
306 * takes the commit lock and locks the journal head, and the caller has to
307 * unlock the head and finish the reservation with 'finish_reservation()'.
308 * Returns zero in case of success and a negative error code in case of
309 * failure.
310 *
311 * Note, the journal head may be unlocked as soon as the data is written, while
312 * the commit lock has to be released after the data has been added to the
313 * TNC.
314 */
315static int make_reservation(struct ubifs_info *c, int jhead, int len)
316{
317 int err, cmt_retries = 0, nospc_retries = 0;
318
319again:
320 down_read(&c->commit_sem);
321 err = reserve_space(c, jhead, len);
322 if (!err)
323 return 0;
324 up_read(&c->commit_sem);
325
326 if (err == -ENOSPC) {
327 /*
328 * GC could not make any progress. We should try to commit
329 * once because it could make some dirty space and GC would
330 * make progress, so make the error -EAGAIN so that the below
331 * will commit and re-try.
332 */
333 if (nospc_retries++ < 2) {
334 dbg_jnl("no space, retry");
335 err = -EAGAIN;
336 }
337
338 /*
339 * This means that the budgeting is incorrect. We always have
340 * to be able to write to the media, because all operations are
341 * budgeted. Deletions are not budgeted, though, but we reserve
342 * an extra LEB for them.
343 */
344 }
345
346 if (err != -EAGAIN)
347 goto out;
348
349 /*
350 * -EAGAIN means that the journal is full or too large, or the above
351 * code wants to do one commit. Do this and re-try.
352 */
353 if (cmt_retries > 128) {
354 /*
355 * This should not happen unless the journal size limitations
356 * are too tough.
357 */
358 ubifs_err("stuck in space allocation");
359 err = -ENOSPC;
360 goto out;
361 } else if (cmt_retries > 32)
362 ubifs_warn("too many space allocation re-tries (%d)",
363 cmt_retries);
364
365 dbg_jnl("-EAGAIN, commit and retry (retried %d times)",
366 cmt_retries);
367 cmt_retries += 1;
368
369 err = ubifs_run_commit(c);
370 if (err)
371 return err;
372 goto again;
373
374out:
375 ubifs_err("cannot reserve %d bytes in jhead %d, error %d",
376 len, jhead, err);
377 if (err == -ENOSPC) {
378 /* This are some budgeting problems, print useful information */
379 down_write(&c->commit_sem);
380 spin_lock(&c->space_lock);
381 dbg_dump_stack();
382 dbg_dump_budg(c);
383 spin_unlock(&c->space_lock);
384 dbg_dump_lprops(c);
385 cmt_retries = dbg_check_lprops(c);
386 up_write(&c->commit_sem);
387 }
388 return err;
389}
390
391/**
392 * release_head - release a journal head.
393 * @c: UBIFS file-system description object
394 * @jhead: journal head
395 *
396 * This function releases journal head @jhead which was locked by
397 * the 'make_reservation()' function. It has to be called after each successful
398 * 'make_reservation()' invocation.
399 */
400static inline void release_head(struct ubifs_info *c, int jhead)
401{
402 mutex_unlock(&c->jheads[jhead].wbuf.io_mutex);
403}
404
405/**
406 * finish_reservation - finish a reservation.
407 * @c: UBIFS file-system description object
408 *
409 * This function finishes journal space reservation. It must be called after
410 * 'make_reservation()'.
411 */
412static void finish_reservation(struct ubifs_info *c)
413{
414 up_read(&c->commit_sem);
415}
416
417/**
418 * get_dent_type - translate VFS inode mode to UBIFS directory entry type.
419 * @mode: inode mode
420 */
421static int get_dent_type(int mode)
422{
423 switch (mode & S_IFMT) {
424 case S_IFREG:
425 return UBIFS_ITYPE_REG;
426 case S_IFDIR:
427 return UBIFS_ITYPE_DIR;
428 case S_IFLNK:
429 return UBIFS_ITYPE_LNK;
430 case S_IFBLK:
431 return UBIFS_ITYPE_BLK;
432 case S_IFCHR:
433 return UBIFS_ITYPE_CHR;
434 case S_IFIFO:
435 return UBIFS_ITYPE_FIFO;
436 case S_IFSOCK:
437 return UBIFS_ITYPE_SOCK;
438 default:
439 BUG();
440 }
441 return 0;
442}
443
444/**
445 * pack_inode - pack an inode node.
446 * @c: UBIFS file-system description object
447 * @ino: buffer in which to pack inode node
448 * @inode: inode to pack
449 * @last: indicates the last node of the group
450 * @last_reference: non-zero if this is a deletion inode
451 */
452static void pack_inode(struct ubifs_info *c, struct ubifs_ino_node *ino,
453 const struct inode *inode, int last,
454 int last_reference)
455{
456 int data_len = 0;
457 struct ubifs_inode *ui = ubifs_inode(inode);
458
459 ino->ch.node_type = UBIFS_INO_NODE;
460 ino_key_init_flash(c, &ino->key, inode->i_ino);
461 ino->creat_sqnum = cpu_to_le64(ui->creat_sqnum);
462 ino->atime_sec = cpu_to_le64(inode->i_atime.tv_sec);
463 ino->atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
464 ino->ctime_sec = cpu_to_le64(inode->i_ctime.tv_sec);
465 ino->ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
466 ino->mtime_sec = cpu_to_le64(inode->i_mtime.tv_sec);
467 ino->mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
468 ino->uid = cpu_to_le32(inode->i_uid);
469 ino->gid = cpu_to_le32(inode->i_gid);
470 ino->mode = cpu_to_le32(inode->i_mode);
471 ino->flags = cpu_to_le32(ui->flags);
472 ino->size = cpu_to_le64(ui->ui_size);
473 ino->nlink = cpu_to_le32(inode->i_nlink);
474 ino->compr_type = cpu_to_le16(ui->compr_type);
475 ino->data_len = cpu_to_le32(ui->data_len);
476 ino->xattr_cnt = cpu_to_le32(ui->xattr_cnt);
477 ino->xattr_size = cpu_to_le32(ui->xattr_size);
478 ino->xattr_names = cpu_to_le32(ui->xattr_names);
479 zero_ino_node_unused(ino);
480
481 /*
482 * Drop the attached data if this is a deletion inode, the data is not
483 * needed anymore.
484 */
485 if (!last_reference) {
486 memcpy(ino->data, ui->data, ui->data_len);
487 data_len = ui->data_len;
488 }
489
490 ubifs_prep_grp_node(c, ino, UBIFS_INO_NODE_SZ + data_len, last);
491}
492
493/**
494 * mark_inode_clean - mark UBIFS inode as clean.
495 * @c: UBIFS file-system description object
496 * @ui: UBIFS inode to mark as clean
497 *
498 * This helper function marks UBIFS inode @ui as clean by cleaning the
499 * @ui->dirty flag and releasing its budget. Note, VFS may still treat the
500 * inode as dirty and try to write it back, but 'ubifs_write_inode()' would
501 * just do nothing.
502 */
503static void mark_inode_clean(struct ubifs_info *c, struct ubifs_inode *ui)
504{
505 if (ui->dirty)
506 ubifs_release_dirty_inode_budget(c, ui);
507 ui->dirty = 0;
508}
509
510/**
511 * ubifs_jnl_update - update inode.
512 * @c: UBIFS file-system description object
513 * @dir: parent inode or host inode in case of extended attributes
514 * @nm: directory entry name
515 * @inode: inode to update
516 * @deletion: indicates a directory entry deletion i.e unlink or rmdir
517 * @xent: non-zero if the directory entry is an extended attribute entry
518 *
519 * This function updates an inode by writing a directory entry (or extended
520 * attribute entry), the inode itself, and the parent directory inode (or the
521 * host inode) to the journal.
522 *
523 * The function writes the host inode @dir last, which is important in case of
524 * extended attributes. Indeed, then we guarantee that if the host inode gets
525 * synchronized (with 'fsync()'), and the write-buffer it sits in gets flushed,
526 * the extended attribute inode gets flushed too. And this is exactly what the
527 * user expects - synchronizing the host inode synchronizes its extended
528 * attributes. Similarly, this guarantees that if @dir is synchronized, its
529 * directory entry corresponding to @nm gets synchronized too.
530 *
531 * If the inode (@inode) or the parent directory (@dir) are synchronous, this
532 * function synchronizes the write-buffer.
533 *
534 * This function marks the @dir and @inode inodes as clean and returns zero on
535 * success. In case of failure, a negative error code is returned.
536 */
537int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
538 const struct qstr *nm, const struct inode *inode,
539 int deletion, int xent)
540{
541 int err, dlen, ilen, len, lnum, ino_offs, dent_offs;
542 int aligned_dlen, aligned_ilen, sync = IS_DIRSYNC(dir);
543 int last_reference = !!(deletion && inode->i_nlink == 0);
544 struct ubifs_inode *ui = ubifs_inode(inode);
545 struct ubifs_inode *dir_ui = ubifs_inode(dir);
546 struct ubifs_dent_node *dent;
547 struct ubifs_ino_node *ino;
548 union ubifs_key dent_key, ino_key;
549
550 dbg_jnl("ino %lu, dent '%.*s', data len %d in dir ino %lu",
551 inode->i_ino, nm->len, nm->name, ui->data_len, dir->i_ino);
552 ubifs_assert(dir_ui->data_len == 0);
553 ubifs_assert(mutex_is_locked(&dir_ui->ui_mutex));
554
555 dlen = UBIFS_DENT_NODE_SZ + nm->len + 1;
556 ilen = UBIFS_INO_NODE_SZ;
557
558 /*
559 * If the last reference to the inode is being deleted, then there is
560 * no need to attach and write inode data, it is being deleted anyway.
561 * And if the inode is being deleted, no need to synchronize
562 * write-buffer even if the inode is synchronous.
563 */
564 if (!last_reference) {
565 ilen += ui->data_len;
566 sync |= IS_SYNC(inode);
567 }
568
569 aligned_dlen = ALIGN(dlen, 8);
570 aligned_ilen = ALIGN(ilen, 8);
571 len = aligned_dlen + aligned_ilen + UBIFS_INO_NODE_SZ;
572 dent = kmalloc(len, GFP_NOFS);
573 if (!dent)
574 return -ENOMEM;
575
576 /* Make reservation before allocating sequence numbers */
577 err = make_reservation(c, BASEHD, len);
578 if (err)
579 goto out_free;
580
581 if (!xent) {
582 dent->ch.node_type = UBIFS_DENT_NODE;
583 dent_key_init(c, &dent_key, dir->i_ino, nm);
584 } else {
585 dent->ch.node_type = UBIFS_XENT_NODE;
586 xent_key_init(c, &dent_key, dir->i_ino, nm);
587 }
588
589 key_write(c, &dent_key, dent->key);
590 dent->inum = deletion ? 0 : cpu_to_le64(inode->i_ino);
591 dent->type = get_dent_type(inode->i_mode);
592 dent->nlen = cpu_to_le16(nm->len);
593 memcpy(dent->name, nm->name, nm->len);
594 dent->name[nm->len] = '\0';
595 zero_dent_node_unused(dent);
596 ubifs_prep_grp_node(c, dent, dlen, 0);
597
598 ino = (void *)dent + aligned_dlen;
599 pack_inode(c, ino, inode, 0, last_reference);
600 ino = (void *)ino + aligned_ilen;
601 pack_inode(c, ino, dir, 1, 0);
602
603 if (last_reference) {
604 err = ubifs_add_orphan(c, inode->i_ino);
605 if (err) {
606 release_head(c, BASEHD);
607 goto out_finish;
608 }
609 }
610
611 err = write_head(c, BASEHD, dent, len, &lnum, &dent_offs, sync);
612 if (err)
613 goto out_release;
614 if (!sync) {
615 struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
616
617 ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino);
618 ubifs_wbuf_add_ino_nolock(wbuf, dir->i_ino);
619 }
620 release_head(c, BASEHD);
621 kfree(dent);
622
623 if (deletion) {
624 err = ubifs_tnc_remove_nm(c, &dent_key, nm);
625 if (err)
626 goto out_ro;
627 err = ubifs_add_dirt(c, lnum, dlen);
628 } else
629 err = ubifs_tnc_add_nm(c, &dent_key, lnum, dent_offs, dlen, nm);
630 if (err)
631 goto out_ro;
632
633 /*
634 * Note, we do not remove the inode from TNC even if the last reference
635 * to it has just been deleted, because the inode may still be opened.
636 * Instead, the inode has been added to orphan lists and the orphan
637 * subsystem will take further care about it.
638 */
639 ino_key_init(c, &ino_key, inode->i_ino);
640 ino_offs = dent_offs + aligned_dlen;
641 err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, ilen);
642 if (err)
643 goto out_ro;
644
645 ino_key_init(c, &ino_key, dir->i_ino);
646 ino_offs += aligned_ilen;
647 err = ubifs_tnc_add(c, &ino_key, lnum, ino_offs, UBIFS_INO_NODE_SZ);
648 if (err)
649 goto out_ro;
650
651 finish_reservation(c);
652 spin_lock(&ui->ui_lock);
653 ui->synced_i_size = ui->ui_size;
654 spin_unlock(&ui->ui_lock);
655 mark_inode_clean(c, ui);
656 mark_inode_clean(c, dir_ui);
657 return 0;
658
659out_finish:
660 finish_reservation(c);
661out_free:
662 kfree(dent);
663 return err;
664
665out_release:
666 release_head(c, BASEHD);
667out_ro:
668 ubifs_ro_mode(c, err);
669 if (last_reference)
670 ubifs_delete_orphan(c, inode->i_ino);
671 finish_reservation(c);
672 return err;
673}
674
675/**
676 * ubifs_jnl_write_data - write a data node to the journal.
677 * @c: UBIFS file-system description object
678 * @inode: inode the data node belongs to
679 * @key: node key
680 * @buf: buffer to write
681 * @len: data length (must not exceed %UBIFS_BLOCK_SIZE)
682 *
683 * This function writes a data node to the journal. Returns %0 if the data node
684 * was successfully written, and a negative error code in case of failure.
685 */
686int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
687 const union ubifs_key *key, const void *buf, int len)
688{
689 struct ubifs_data_node *data;
690 int err, lnum, offs, compr_type, out_len;
691 int dlen = UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE * WORST_COMPR_FACTOR;
692 struct ubifs_inode *ui = ubifs_inode(inode);
693
694 dbg_jnl("ino %lu, blk %u, len %d, key %s", key_inum(c, key),
695 key_block(c, key), len, DBGKEY(key));
696 ubifs_assert(len <= UBIFS_BLOCK_SIZE);
697
698 data = kmalloc(dlen, GFP_NOFS);
699 if (!data)
700 return -ENOMEM;
701
702 data->ch.node_type = UBIFS_DATA_NODE;
703 key_write(c, key, &data->key);
704 data->size = cpu_to_le32(len);
705 zero_data_node_unused(data);
706
707 if (!(ui->flags && UBIFS_COMPR_FL))
708 /* Compression is disabled for this inode */
709 compr_type = UBIFS_COMPR_NONE;
710 else
711 compr_type = ui->compr_type;
712
713 out_len = dlen - UBIFS_DATA_NODE_SZ;
714 ubifs_compress(buf, len, &data->data, &out_len, &compr_type);
715 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
716
717 dlen = UBIFS_DATA_NODE_SZ + out_len;
718 data->compr_type = cpu_to_le16(compr_type);
719
720 /* Make reservation before allocating sequence numbers */
721 err = make_reservation(c, DATAHD, dlen);
722 if (err)
723 goto out_free;
724
725 err = write_node(c, DATAHD, data, dlen, &lnum, &offs);
726 if (err)
727 goto out_release;
728 ubifs_wbuf_add_ino_nolock(&c->jheads[DATAHD].wbuf, key_inum(c, key));
729 release_head(c, DATAHD);
730
731 err = ubifs_tnc_add(c, key, lnum, offs, dlen);
732 if (err)
733 goto out_ro;
734
735 finish_reservation(c);
736 kfree(data);
737 return 0;
738
739out_release:
740 release_head(c, DATAHD);
741out_ro:
742 ubifs_ro_mode(c, err);
743 finish_reservation(c);
744out_free:
745 kfree(data);
746 return err;
747}
748
749/**
750 * ubifs_jnl_write_inode - flush inode to the journal.
751 * @c: UBIFS file-system description object
752 * @inode: inode to flush
753 * @deletion: inode has been deleted
754 *
755 * This function writes inode @inode to the journal. If the inode is
756 * synchronous, it also synchronizes the write-buffer. Returns zero in case of
757 * success and a negative error code in case of failure.
758 */
759int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
760 int deletion)
761{
762 int err, len, lnum, offs, sync = 0;
763 struct ubifs_ino_node *ino;
764 struct ubifs_inode *ui = ubifs_inode(inode);
765
766 dbg_jnl("ino %lu%s", inode->i_ino,
767 deletion ? " (last reference)" : "");
768 if (deletion)
769 ubifs_assert(inode->i_nlink == 0);
770
771 len = UBIFS_INO_NODE_SZ;
772 /*
773 * If the inode is being deleted, do not write the attached data. No
774 * need to synchronize the write-buffer either.
775 */
776 if (!deletion) {
777 len += ui->data_len;
778 sync = IS_SYNC(inode);
779 }
780 ino = kmalloc(len, GFP_NOFS);
781 if (!ino)
782 return -ENOMEM;
783
784 /* Make reservation before allocating sequence numbers */
785 err = make_reservation(c, BASEHD, len);
786 if (err)
787 goto out_free;
788
789 pack_inode(c, ino, inode, 1, deletion);
790 err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
791 if (err)
792 goto out_release;
793 if (!sync)
794 ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
795 inode->i_ino);
796 release_head(c, BASEHD);
797
798 if (deletion) {
799 err = ubifs_tnc_remove_ino(c, inode->i_ino);
800 if (err)
801 goto out_ro;
802 ubifs_delete_orphan(c, inode->i_ino);
803 err = ubifs_add_dirt(c, lnum, len);
804 } else {
805 union ubifs_key key;
806
807 ino_key_init(c, &key, inode->i_ino);
808 err = ubifs_tnc_add(c, &key, lnum, offs, len);
809 }
810 if (err)
811 goto out_ro;
812
813 finish_reservation(c);
814 spin_lock(&ui->ui_lock);
815 ui->synced_i_size = ui->ui_size;
816 spin_unlock(&ui->ui_lock);
817 kfree(ino);
818 return 0;
819
820out_release:
821 release_head(c, BASEHD);
822out_ro:
823 ubifs_ro_mode(c, err);
824 finish_reservation(c);
825out_free:
826 kfree(ino);
827 return err;
828}
829
830/**
831 * ubifs_jnl_rename - rename a directory entry.
832 * @c: UBIFS file-system description object
833 * @old_dir: parent inode of directory entry to rename
834 * @old_dentry: directory entry to rename
835 * @new_dir: parent inode of directory entry to rename
836 * @new_dentry: new directory entry (or directory entry to replace)
837 * @sync: non-zero if the write-buffer has to be synchronized
838 *
839 * This function implements the re-name operation which may involve writing up
840 * to 3 inodes and 2 directory entries. It marks the written inodes as clean
841 * and returns zero on success. In case of failure, a negative error code is
842 * returned.
843 */
844int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
845 const struct dentry *old_dentry,
846 const struct inode *new_dir,
847 const struct dentry *new_dentry, int sync)
848{
849 void *p;
850 union ubifs_key key;
851 struct ubifs_dent_node *dent, *dent2;
852 int err, dlen1, dlen2, ilen, lnum, offs, len;
853 const struct inode *old_inode = old_dentry->d_inode;
854 const struct inode *new_inode = new_dentry->d_inode;
855 int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ;
856 int last_reference = !!(new_inode && new_inode->i_nlink == 0);
857 int move = (old_dir != new_dir);
858 struct ubifs_inode *uninitialized_var(new_ui);
859
860 dbg_jnl("dent '%.*s' in dir ino %lu to dent '%.*s' in dir ino %lu",
861 old_dentry->d_name.len, old_dentry->d_name.name,
862 old_dir->i_ino, new_dentry->d_name.len,
863 new_dentry->d_name.name, new_dir->i_ino);
864 ubifs_assert(ubifs_inode(old_dir)->data_len == 0);
865 ubifs_assert(ubifs_inode(new_dir)->data_len == 0);
866 ubifs_assert(mutex_is_locked(&ubifs_inode(old_dir)->ui_mutex));
867 ubifs_assert(mutex_is_locked(&ubifs_inode(new_dir)->ui_mutex));
868
869 dlen1 = UBIFS_DENT_NODE_SZ + new_dentry->d_name.len + 1;
870 dlen2 = UBIFS_DENT_NODE_SZ + old_dentry->d_name.len + 1;
871 if (new_inode) {
872 new_ui = ubifs_inode(new_inode);
873 ubifs_assert(mutex_is_locked(&new_ui->ui_mutex));
874 ilen = UBIFS_INO_NODE_SZ;
875 if (!last_reference)
876 ilen += new_ui->data_len;
877 } else
878 ilen = 0;
879
880 aligned_dlen1 = ALIGN(dlen1, 8);
881 aligned_dlen2 = ALIGN(dlen2, 8);
882 len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
883 if (old_dir != new_dir)
884 len += plen;
885 dent = kmalloc(len, GFP_NOFS);
886 if (!dent)
887 return -ENOMEM;
888
889 /* Make reservation before allocating sequence numbers */
890 err = make_reservation(c, BASEHD, len);
891 if (err)
892 goto out_free;
893
894 /* Make new dent */
895 dent->ch.node_type = UBIFS_DENT_NODE;
896 dent_key_init_flash(c, &dent->key, new_dir->i_ino, &new_dentry->d_name);
897 dent->inum = cpu_to_le64(old_inode->i_ino);
898 dent->type = get_dent_type(old_inode->i_mode);
899 dent->nlen = cpu_to_le16(new_dentry->d_name.len);
900 memcpy(dent->name, new_dentry->d_name.name, new_dentry->d_name.len);
901 dent->name[new_dentry->d_name.len] = '\0';
902 zero_dent_node_unused(dent);
903 ubifs_prep_grp_node(c, dent, dlen1, 0);
904
905 /* Make deletion dent */
906 dent2 = (void *)dent + aligned_dlen1;
907 dent2->ch.node_type = UBIFS_DENT_NODE;
908 dent_key_init_flash(c, &dent2->key, old_dir->i_ino,
909 &old_dentry->d_name);
910 dent2->inum = 0;
911 dent2->type = DT_UNKNOWN;
912 dent2->nlen = cpu_to_le16(old_dentry->d_name.len);
913 memcpy(dent2->name, old_dentry->d_name.name, old_dentry->d_name.len);
914 dent2->name[old_dentry->d_name.len] = '\0';
915 zero_dent_node_unused(dent2);
916 ubifs_prep_grp_node(c, dent2, dlen2, 0);
917
918 p = (void *)dent2 + aligned_dlen2;
919 if (new_inode) {
920 pack_inode(c, p, new_inode, 0, last_reference);
921 p += ALIGN(ilen, 8);
922 }
923
924 if (!move)
925 pack_inode(c, p, old_dir, 1, 0);
926 else {
927 pack_inode(c, p, old_dir, 0, 0);
928 p += ALIGN(plen, 8);
929 pack_inode(c, p, new_dir, 1, 0);
930 }
931
932 if (last_reference) {
933 err = ubifs_add_orphan(c, new_inode->i_ino);
934 if (err) {
935 release_head(c, BASEHD);
936 goto out_finish;
937 }
938 }
939
940 err = write_head(c, BASEHD, dent, len, &lnum, &offs, sync);
941 if (err)
942 goto out_release;
943 if (!sync) {
944 struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
945
946 ubifs_wbuf_add_ino_nolock(wbuf, new_dir->i_ino);
947 ubifs_wbuf_add_ino_nolock(wbuf, old_dir->i_ino);
948 if (new_inode)
949 ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
950 new_inode->i_ino);
951 }
952 release_head(c, BASEHD);
953
954 dent_key_init(c, &key, new_dir->i_ino, &new_dentry->d_name);
955 err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen1, &new_dentry->d_name);
956 if (err)
957 goto out_ro;
958
959 err = ubifs_add_dirt(c, lnum, dlen2);
960 if (err)
961 goto out_ro;
962
963 dent_key_init(c, &key, old_dir->i_ino, &old_dentry->d_name);
964 err = ubifs_tnc_remove_nm(c, &key, &old_dentry->d_name);
965 if (err)
966 goto out_ro;
967
968 offs += aligned_dlen1 + aligned_dlen2;
969 if (new_inode) {
970 ino_key_init(c, &key, new_inode->i_ino);
971 err = ubifs_tnc_add(c, &key, lnum, offs, ilen);
972 if (err)
973 goto out_ro;
974 offs += ALIGN(ilen, 8);
975 }
976
977 ino_key_init(c, &key, old_dir->i_ino);
978 err = ubifs_tnc_add(c, &key, lnum, offs, plen);
979 if (err)
980 goto out_ro;
981
982 if (old_dir != new_dir) {
983 offs += ALIGN(plen, 8);
984 ino_key_init(c, &key, new_dir->i_ino);
985 err = ubifs_tnc_add(c, &key, lnum, offs, plen);
986 if (err)
987 goto out_ro;
988 }
989
990 finish_reservation(c);
991 if (new_inode) {
992 mark_inode_clean(c, new_ui);
993 spin_lock(&new_ui->ui_lock);
994 new_ui->synced_i_size = new_ui->ui_size;
995 spin_unlock(&new_ui->ui_lock);
996 }
997 mark_inode_clean(c, ubifs_inode(old_dir));
998 if (move)
999 mark_inode_clean(c, ubifs_inode(new_dir));
1000 kfree(dent);
1001 return 0;
1002
1003out_release:
1004 release_head(c, BASEHD);
1005out_ro:
1006 ubifs_ro_mode(c, err);
1007 if (last_reference)
1008 ubifs_delete_orphan(c, new_inode->i_ino);
1009out_finish:
1010 finish_reservation(c);
1011out_free:
1012 kfree(dent);
1013 return err;
1014}
1015
1016/**
1017 * recomp_data_node - re-compress a truncated data node.
1018 * @dn: data node to re-compress
1019 * @new_len: new length
1020 *
1021 * This function is used when an inode is truncated and the last data node of
1022 * the inode has to be re-compressed and re-written.
1023 */
1024static int recomp_data_node(struct ubifs_data_node *dn, int *new_len)
1025{
1026 void *buf;
1027 int err, len, compr_type, out_len;
1028
1029 out_len = le32_to_cpu(dn->size);
1030 buf = kmalloc(out_len * WORST_COMPR_FACTOR, GFP_NOFS);
1031 if (!buf)
1032 return -ENOMEM;
1033
1034 len = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ;
1035 compr_type = le16_to_cpu(dn->compr_type);
1036 err = ubifs_decompress(&dn->data, len, buf, &out_len, compr_type);
1037 if (err)
1038 goto out;
1039
1040 ubifs_compress(buf, *new_len, &dn->data, &out_len, &compr_type);
1041 ubifs_assert(out_len <= UBIFS_BLOCK_SIZE);
1042 dn->compr_type = cpu_to_le16(compr_type);
1043 dn->size = cpu_to_le32(*new_len);
1044 *new_len = UBIFS_DATA_NODE_SZ + out_len;
1045out:
1046 kfree(buf);
1047 return err;
1048}
1049
1050/**
1051 * ubifs_jnl_truncate - update the journal for a truncation.
1052 * @c: UBIFS file-system description object
1053 * @inode: inode to truncate
1054 * @old_size: old size
1055 * @new_size: new size
1056 *
1057 * When the size of a file decreases due to truncation, a truncation node is
1058 * written, the journal tree is updated, and the last data block is re-written
1059 * if it has been affected. The inode is also updated in order to synchronize
1060 * the new inode size.
1061 *
1062 * This function marks the inode as clean and returns zero on success. In case
1063 * of failure, a negative error code is returned.
1064 */
1065int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1066 loff_t old_size, loff_t new_size)
1067{
1068 union ubifs_key key, to_key;
1069 struct ubifs_ino_node *ino;
1070 struct ubifs_trun_node *trun;
1071 struct ubifs_data_node *uninitialized_var(dn);
1072 int err, dlen, len, lnum, offs, bit, sz, sync = IS_SYNC(inode);
1073 struct ubifs_inode *ui = ubifs_inode(inode);
1074 ino_t inum = inode->i_ino;
1075 unsigned int blk;
1076
1077 dbg_jnl("ino %lu, size %lld -> %lld", inum, old_size, new_size);
1078 ubifs_assert(!ui->data_len);
1079 ubifs_assert(S_ISREG(inode->i_mode));
1080 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
1081
1082 sz = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ +
1083 UBIFS_MAX_DATA_NODE_SZ * WORST_COMPR_FACTOR;
1084 ino = kmalloc(sz, GFP_NOFS);
1085 if (!ino)
1086 return -ENOMEM;
1087
1088 trun = (void *)ino + UBIFS_INO_NODE_SZ;
1089 trun->ch.node_type = UBIFS_TRUN_NODE;
1090 trun->inum = cpu_to_le32(inum);
1091 trun->old_size = cpu_to_le64(old_size);
1092 trun->new_size = cpu_to_le64(new_size);
1093 zero_trun_node_unused(trun);
1094
1095 dlen = new_size & (UBIFS_BLOCK_SIZE - 1);
1096 if (dlen) {
1097 /* Get last data block so it can be truncated */
1098 dn = (void *)trun + UBIFS_TRUN_NODE_SZ;
1099 blk = new_size >> UBIFS_BLOCK_SHIFT;
1100 data_key_init(c, &key, inum, blk);
1101 dbg_jnl("last block key %s", DBGKEY(&key));
1102 err = ubifs_tnc_lookup(c, &key, dn);
1103 if (err == -ENOENT)
1104 dlen = 0; /* Not found (so it is a hole) */
1105 else if (err)
1106 goto out_free;
1107 else {
1108 if (le32_to_cpu(dn->size) <= dlen)
1109 dlen = 0; /* Nothing to do */
1110 else {
1111 int compr_type = le16_to_cpu(dn->compr_type);
1112
1113 if (compr_type != UBIFS_COMPR_NONE) {
1114 err = recomp_data_node(dn, &dlen);
1115 if (err)
1116 goto out_free;
1117 } else {
1118 dn->size = cpu_to_le32(dlen);
1119 dlen += UBIFS_DATA_NODE_SZ;
1120 }
1121 zero_data_node_unused(dn);
1122 }
1123 }
1124 }
1125
1126 /* Must make reservation before allocating sequence numbers */
1127 len = UBIFS_TRUN_NODE_SZ + UBIFS_INO_NODE_SZ;
1128 if (dlen)
1129 len += dlen;
1130 err = make_reservation(c, BASEHD, len);
1131 if (err)
1132 goto out_free;
1133
1134 pack_inode(c, ino, inode, 0, 0);
1135 ubifs_prep_grp_node(c, trun, UBIFS_TRUN_NODE_SZ, dlen ? 0 : 1);
1136 if (dlen)
1137 ubifs_prep_grp_node(c, dn, dlen, 1);
1138
1139 err = write_head(c, BASEHD, ino, len, &lnum, &offs, sync);
1140 if (err)
1141 goto out_release;
1142 if (!sync)
1143 ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, inum);
1144 release_head(c, BASEHD);
1145
1146 if (dlen) {
1147 sz = offs + UBIFS_INO_NODE_SZ + UBIFS_TRUN_NODE_SZ;
1148 err = ubifs_tnc_add(c, &key, lnum, sz, dlen);
1149 if (err)
1150 goto out_ro;
1151 }
1152
1153 ino_key_init(c, &key, inum);
1154 err = ubifs_tnc_add(c, &key, lnum, offs, UBIFS_INO_NODE_SZ);
1155 if (err)
1156 goto out_ro;
1157
1158 err = ubifs_add_dirt(c, lnum, UBIFS_TRUN_NODE_SZ);
1159 if (err)
1160 goto out_ro;
1161
1162 bit = new_size & (UBIFS_BLOCK_SIZE - 1);
1163 blk = (new_size >> UBIFS_BLOCK_SHIFT) + (bit ? 1 : 0);
1164 data_key_init(c, &key, inum, blk);
1165
1166 bit = old_size & (UBIFS_BLOCK_SIZE - 1);
1167 blk = (old_size >> UBIFS_BLOCK_SHIFT) - (bit ? 0: 1);
1168 data_key_init(c, &to_key, inum, blk);
1169
1170 err = ubifs_tnc_remove_range(c, &key, &to_key);
1171 if (err)
1172 goto out_ro;
1173
1174 finish_reservation(c);
1175 spin_lock(&ui->ui_lock);
1176 ui->synced_i_size = ui->ui_size;
1177 spin_unlock(&ui->ui_lock);
1178 mark_inode_clean(c, ui);
1179 kfree(ino);
1180 return 0;
1181
1182out_release:
1183 release_head(c, BASEHD);
1184out_ro:
1185 ubifs_ro_mode(c, err);
1186 finish_reservation(c);
1187out_free:
1188 kfree(ino);
1189 return err;
1190}
1191
1192#ifdef CONFIG_UBIFS_FS_XATTR
1193
1194/**
1195 * ubifs_jnl_delete_xattr - delete an extended attribute.
1196 * @c: UBIFS file-system description object
1197 * @host: host inode
1198 * @inode: extended attribute inode
1199 * @nm: extended attribute entry name
1200 *
1201 * This function delete an extended attribute which is very similar to
1202 * un-linking regular files - it writes a deletion xentry, a deletion inode and
1203 * updates the target inode. Returns zero in case of success and a negative
1204 * error code in case of failure.
1205 */
1206int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
1207 const struct inode *inode, const struct qstr *nm)
1208{
1209 int err, xlen, hlen, len, lnum, xent_offs, aligned_xlen;
1210 struct ubifs_dent_node *xent;
1211 struct ubifs_ino_node *ino;
1212 union ubifs_key xent_key, key1, key2;
1213 int sync = IS_DIRSYNC(host);
1214 struct ubifs_inode *host_ui = ubifs_inode(host);
1215
1216 dbg_jnl("host %lu, xattr ino %lu, name '%s', data len %d",
1217 host->i_ino, inode->i_ino, nm->name,
1218 ubifs_inode(inode)->data_len);
1219 ubifs_assert(inode->i_nlink == 0);
1220 ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
1221
1222 /*
1223 * Since we are deleting the inode, we do not bother to attach any data
1224 * to it and assume its length is %UBIFS_INO_NODE_SZ.
1225 */
1226 xlen = UBIFS_DENT_NODE_SZ + nm->len + 1;
1227 aligned_xlen = ALIGN(xlen, 8);
1228 hlen = host_ui->data_len + UBIFS_INO_NODE_SZ;
1229 len = aligned_xlen + UBIFS_INO_NODE_SZ + ALIGN(hlen, 8);
1230
1231 xent = kmalloc(len, GFP_NOFS);
1232 if (!xent)
1233 return -ENOMEM;
1234
1235 /* Make reservation before allocating sequence numbers */
1236 err = make_reservation(c, BASEHD, len);
1237 if (err) {
1238 kfree(xent);
1239 return err;
1240 }
1241
1242 xent->ch.node_type = UBIFS_XENT_NODE;
1243 xent_key_init(c, &xent_key, host->i_ino, nm);
1244 key_write(c, &xent_key, xent->key);
1245 xent->inum = 0;
1246 xent->type = get_dent_type(inode->i_mode);
1247 xent->nlen = cpu_to_le16(nm->len);
1248 memcpy(xent->name, nm->name, nm->len);
1249 xent->name[nm->len] = '\0';
1250 zero_dent_node_unused(xent);
1251 ubifs_prep_grp_node(c, xent, xlen, 0);
1252
1253 ino = (void *)xent + aligned_xlen;
1254 pack_inode(c, ino, inode, 0, 1);
1255 ino = (void *)ino + UBIFS_INO_NODE_SZ;
1256 pack_inode(c, ino, host, 1, 0);
1257
1258 err = write_head(c, BASEHD, xent, len, &lnum, &xent_offs, sync);
1259 if (!sync && !err)
1260 ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, host->i_ino);
1261 release_head(c, BASEHD);
1262 kfree(xent);
1263 if (err)
1264 goto out_ro;
1265
1266 /* Remove the extended attribute entry from TNC */
1267 err = ubifs_tnc_remove_nm(c, &xent_key, nm);
1268 if (err)
1269 goto out_ro;
1270 err = ubifs_add_dirt(c, lnum, xlen);
1271 if (err)
1272 goto out_ro;
1273
1274 /*
1275 * Remove all nodes belonging to the extended attribute inode from TNC.
1276 * Well, there actually must be only one node - the inode itself.
1277 */
1278 lowest_ino_key(c, &key1, inode->i_ino);
1279 highest_ino_key(c, &key2, inode->i_ino);
1280 err = ubifs_tnc_remove_range(c, &key1, &key2);
1281 if (err)
1282 goto out_ro;
1283 err = ubifs_add_dirt(c, lnum, UBIFS_INO_NODE_SZ);
1284 if (err)
1285 goto out_ro;
1286
1287 /* And update TNC with the new host inode position */
1288 ino_key_init(c, &key1, host->i_ino);
1289 err = ubifs_tnc_add(c, &key1, lnum, xent_offs + len - hlen, hlen);
1290 if (err)
1291 goto out_ro;
1292
1293 finish_reservation(c);
1294 spin_lock(&host_ui->ui_lock);
1295 host_ui->synced_i_size = host_ui->ui_size;
1296 spin_unlock(&host_ui->ui_lock);
1297 mark_inode_clean(c, host_ui);
1298 return 0;
1299
1300out_ro:
1301 ubifs_ro_mode(c, err);
1302 finish_reservation(c);
1303 return err;
1304}
1305
1306/**
1307 * ubifs_jnl_change_xattr - change an extended attribute.
1308 * @c: UBIFS file-system description object
1309 * @inode: extended attribute inode
1310 * @host: host inode
1311 *
1312 * This function writes the updated version of an extended attribute inode and
1313 * the host inode tho the journal (to the base head). The host inode is written
1314 * after the extended attribute inode in order to guarantee that the extended
1315 * attribute will be flushed when the inode is synchronized by 'fsync()' and
1316 * consequently, the write-buffer is synchronized. This function returns zero
1317 * in case of success and a negative error code in case of failure.
1318 */
1319int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode,
1320 const struct inode *host)
1321{
1322 int err, len1, len2, aligned_len, aligned_len1, lnum, offs;
1323 struct ubifs_inode *host_ui = ubifs_inode(inode);
1324 struct ubifs_ino_node *ino;
1325 union ubifs_key key;
1326 int sync = IS_DIRSYNC(host);
1327
1328 dbg_jnl("ino %lu, ino %lu", host->i_ino, inode->i_ino);
1329 ubifs_assert(host->i_nlink > 0);
1330 ubifs_assert(inode->i_nlink > 0);
1331 ubifs_assert(mutex_is_locked(&host_ui->ui_mutex));
1332
1333 len1 = UBIFS_INO_NODE_SZ + host_ui->data_len;
1334 len2 = UBIFS_INO_NODE_SZ + ubifs_inode(inode)->data_len;
1335 aligned_len1 = ALIGN(len1, 8);
1336 aligned_len = aligned_len1 + ALIGN(len2, 8);
1337
1338 ino = kmalloc(aligned_len, GFP_NOFS);
1339 if (!ino)
1340 return -ENOMEM;
1341
1342 /* Make reservation before allocating sequence numbers */
1343 err = make_reservation(c, BASEHD, aligned_len);
1344 if (err)
1345 goto out_free;
1346
1347 pack_inode(c, ino, host, 0, 0);
1348 pack_inode(c, (void *)ino + aligned_len1, inode, 1, 0);
1349
1350 err = write_head(c, BASEHD, ino, aligned_len, &lnum, &offs, 0);
1351 if (!sync && !err) {
1352 struct ubifs_wbuf *wbuf = &c->jheads[BASEHD].wbuf;
1353
1354 ubifs_wbuf_add_ino_nolock(wbuf, host->i_ino);
1355 ubifs_wbuf_add_ino_nolock(wbuf, inode->i_ino);
1356 }
1357 release_head(c, BASEHD);
1358 if (err)
1359 goto out_ro;
1360
1361 ino_key_init(c, &key, host->i_ino);
1362 err = ubifs_tnc_add(c, &key, lnum, offs, len1);
1363 if (err)
1364 goto out_ro;
1365
1366 ino_key_init(c, &key, inode->i_ino);
1367 err = ubifs_tnc_add(c, &key, lnum, offs + aligned_len1, len2);
1368 if (err)
1369 goto out_ro;
1370
1371 finish_reservation(c);
1372 spin_lock(&host_ui->ui_lock);
1373 host_ui->synced_i_size = host_ui->ui_size;
1374 spin_unlock(&host_ui->ui_lock);
1375 mark_inode_clean(c, host_ui);
1376 kfree(ino);
1377 return 0;
1378
1379out_ro:
1380 ubifs_ro_mode(c, err);
1381 finish_reservation(c);
1382out_free:
1383 kfree(ino);
1384 return err;
1385}
1386
1387#endif /* CONFIG_UBIFS_FS_XATTR */
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h
new file mode 100644
index 00000000000..8f747600754
--- /dev/null
+++ b/fs/ubifs/key.h
@@ -0,0 +1,533 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This header contains various key-related definitions and helper function.
25 * UBIFS allows several key schemes, so we access key fields only via these
26 * helpers. At the moment only one key scheme is supported.
27 *
28 * Simple key scheme
29 * ~~~~~~~~~~~~~~~~~
30 *
31 * Keys are 64-bits long. First 32-bits are inode number (parent inode number
32 * in case of direntry key). Next 3 bits are node type. The last 29 bits are
33 * 4KiB offset in case of inode node, and direntry hash in case of a direntry
34 * node. We use "r5" hash borrowed from reiserfs.
35 */
36
37#ifndef __UBIFS_KEY_H__
38#define __UBIFS_KEY_H__
39
40/**
41 * key_r5_hash - R5 hash function (borrowed from reiserfs).
42 * @s: direntry name
43 * @len: name length
44 */
45static inline uint32_t key_r5_hash(const char *s, int len)
46{
47 uint32_t a = 0;
48 const signed char *str = (const signed char *)s;
49
50 while (*str) {
51 a += *str << 4;
52 a += *str >> 4;
53 a *= 11;
54 str++;
55 }
56
57 a &= UBIFS_S_KEY_HASH_MASK;
58
59 /*
60 * We use hash values as offset in directories, so values %0 and %1 are
61 * reserved for "." and "..". %2 is reserved for "end of readdir"
62 * marker.
63 */
64 if (unlikely(a >= 0 && a <= 2))
65 a += 3;
66 return a;
67}
68
69/**
70 * key_test_hash - testing hash function.
71 * @str: direntry name
72 * @len: name length
73 */
74static inline uint32_t key_test_hash(const char *str, int len)
75{
76 uint32_t a = 0;
77
78 len = min_t(uint32_t, len, 4);
79 memcpy(&a, str, len);
80 a &= UBIFS_S_KEY_HASH_MASK;
81 if (unlikely(a >= 0 && a <= 2))
82 a += 3;
83 return a;
84}
85
86/**
87 * ino_key_init - initialize inode key.
88 * @c: UBIFS file-system description object
89 * @key: key to initialize
90 * @inum: inode number
91 */
92static inline void ino_key_init(const struct ubifs_info *c,
93 union ubifs_key *key, ino_t inum)
94{
95 key->u32[0] = inum;
96 key->u32[1] = UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS;
97}
98
99/**
100 * ino_key_init_flash - initialize on-flash inode key.
101 * @c: UBIFS file-system description object
102 * @k: key to initialize
103 * @inum: inode number
104 */
105static inline void ino_key_init_flash(const struct ubifs_info *c, void *k,
106 ino_t inum)
107{
108 union ubifs_key *key = k;
109
110 key->j32[0] = cpu_to_le32(inum);
111 key->j32[1] = cpu_to_le32(UBIFS_INO_KEY << UBIFS_S_KEY_BLOCK_BITS);
112 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
113}
114
115/**
116 * lowest_ino_key - get the lowest possible inode key.
117 * @c: UBIFS file-system description object
118 * @key: key to initialize
119 * @inum: inode number
120 */
121static inline void lowest_ino_key(const struct ubifs_info *c,
122 union ubifs_key *key, ino_t inum)
123{
124 key->u32[0] = inum;
125 key->u32[1] = 0;
126}
127
128/**
129 * highest_ino_key - get the highest possible inode key.
130 * @c: UBIFS file-system description object
131 * @key: key to initialize
132 * @inum: inode number
133 */
134static inline void highest_ino_key(const struct ubifs_info *c,
135 union ubifs_key *key, ino_t inum)
136{
137 key->u32[0] = inum;
138 key->u32[1] = 0xffffffff;
139}
140
141/**
142 * dent_key_init - initialize directory entry key.
143 * @c: UBIFS file-system description object
144 * @key: key to initialize
145 * @inum: parent inode number
146 * @nm: direntry name and length
147 */
148static inline void dent_key_init(const struct ubifs_info *c,
149 union ubifs_key *key, ino_t inum,
150 const struct qstr *nm)
151{
152 uint32_t hash = c->key_hash(nm->name, nm->len);
153
154 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
155 key->u32[0] = inum;
156 key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
157}
158
159/**
160 * dent_key_init_hash - initialize directory entry key without re-calculating
161 * hash function.
162 * @c: UBIFS file-system description object
163 * @key: key to initialize
164 * @inum: parent inode number
165 * @hash: direntry name hash
166 */
167static inline void dent_key_init_hash(const struct ubifs_info *c,
168 union ubifs_key *key, ino_t inum,
169 uint32_t hash)
170{
171 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
172 key->u32[0] = inum;
173 key->u32[1] = hash | (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS);
174}
175
176/**
177 * dent_key_init_flash - initialize on-flash directory entry key.
178 * @c: UBIFS file-system description object
179 * @k: key to initialize
180 * @inum: parent inode number
181 * @nm: direntry name and length
182 */
183static inline void dent_key_init_flash(const struct ubifs_info *c, void *k,
184 ino_t inum, const struct qstr *nm)
185{
186 union ubifs_key *key = k;
187 uint32_t hash = c->key_hash(nm->name, nm->len);
188
189 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
190 key->j32[0] = cpu_to_le32(inum);
191 key->j32[1] = cpu_to_le32(hash |
192 (UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS));
193 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
194}
195
196/**
197 * lowest_dent_key - get the lowest possible directory entry key.
198 * @c: UBIFS file-system description object
199 * @key: where to store the lowest key
200 * @inum: parent inode number
201 */
202static inline void lowest_dent_key(const struct ubifs_info *c,
203 union ubifs_key *key, ino_t inum)
204{
205 key->u32[0] = inum;
206 key->u32[1] = UBIFS_DENT_KEY << UBIFS_S_KEY_HASH_BITS;
207}
208
209/**
210 * xent_key_init - initialize extended attribute entry key.
211 * @c: UBIFS file-system description object
212 * @key: key to initialize
213 * @inum: host inode number
214 * @nm: extended attribute entry name and length
215 */
216static inline void xent_key_init(const struct ubifs_info *c,
217 union ubifs_key *key, ino_t inum,
218 const struct qstr *nm)
219{
220 uint32_t hash = c->key_hash(nm->name, nm->len);
221
222 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
223 key->u32[0] = inum;
224 key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
225}
226
227/**
228 * xent_key_init_hash - initialize extended attribute entry key without
229 * re-calculating hash function.
230 * @c: UBIFS file-system description object
231 * @key: key to initialize
232 * @inum: host inode number
233 * @hash: extended attribute entry name hash
234 */
235static inline void xent_key_init_hash(const struct ubifs_info *c,
236 union ubifs_key *key, ino_t inum,
237 uint32_t hash)
238{
239 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
240 key->u32[0] = inum;
241 key->u32[1] = hash | (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS);
242}
243
244/**
245 * xent_key_init_flash - initialize on-flash extended attribute entry key.
246 * @c: UBIFS file-system description object
247 * @k: key to initialize
248 * @inum: host inode number
249 * @nm: extended attribute entry name and length
250 */
251static inline void xent_key_init_flash(const struct ubifs_info *c, void *k,
252 ino_t inum, const struct qstr *nm)
253{
254 union ubifs_key *key = k;
255 uint32_t hash = c->key_hash(nm->name, nm->len);
256
257 ubifs_assert(!(hash & ~UBIFS_S_KEY_HASH_MASK));
258 key->j32[0] = cpu_to_le32(inum);
259 key->j32[1] = cpu_to_le32(hash |
260 (UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS));
261 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
262}
263
264/**
265 * lowest_xent_key - get the lowest possible extended attribute entry key.
266 * @c: UBIFS file-system description object
267 * @key: where to store the lowest key
268 * @inum: host inode number
269 */
270static inline void lowest_xent_key(const struct ubifs_info *c,
271 union ubifs_key *key, ino_t inum)
272{
273 key->u32[0] = inum;
274 key->u32[1] = UBIFS_XENT_KEY << UBIFS_S_KEY_HASH_BITS;
275}
276
277/**
278 * data_key_init - initialize data key.
279 * @c: UBIFS file-system description object
280 * @key: key to initialize
281 * @inum: inode number
282 * @block: block number
283 */
284static inline void data_key_init(const struct ubifs_info *c,
285 union ubifs_key *key, ino_t inum,
286 unsigned int block)
287{
288 ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
289 key->u32[0] = inum;
290 key->u32[1] = block | (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS);
291}
292
293/**
294 * data_key_init_flash - initialize on-flash data key.
295 * @c: UBIFS file-system description object
296 * @k: key to initialize
297 * @inum: inode number
298 * @block: block number
299 */
300static inline void data_key_init_flash(const struct ubifs_info *c, void *k,
301 ino_t inum, unsigned int block)
302{
303 union ubifs_key *key = k;
304
305 ubifs_assert(!(block & ~UBIFS_S_KEY_BLOCK_MASK));
306 key->j32[0] = cpu_to_le32(inum);
307 key->j32[1] = cpu_to_le32(block |
308 (UBIFS_DATA_KEY << UBIFS_S_KEY_BLOCK_BITS));
309 memset(k + 8, 0, UBIFS_MAX_KEY_LEN - 8);
310}
311
312/**
313 * trun_key_init - initialize truncation node key.
314 * @c: UBIFS file-system description object
315 * @key: key to initialize
316 * @inum: inode number
317 *
318 * Note, UBIFS does not have truncation keys on the media and this function is
319 * only used for purposes of replay.
320 */
321static inline void trun_key_init(const struct ubifs_info *c,
322 union ubifs_key *key, ino_t inum)
323{
324 key->u32[0] = inum;
325 key->u32[1] = UBIFS_TRUN_KEY << UBIFS_S_KEY_BLOCK_BITS;
326}
327
328/**
329 * key_type - get key type.
330 * @c: UBIFS file-system description object
331 * @key: key to get type of
332 */
333static inline int key_type(const struct ubifs_info *c,
334 const union ubifs_key *key)
335{
336 return key->u32[1] >> UBIFS_S_KEY_BLOCK_BITS;
337}
338
339/**
340 * key_type_flash - get type of a on-flash formatted key.
341 * @c: UBIFS file-system description object
342 * @k: key to get type of
343 */
344static inline int key_type_flash(const struct ubifs_info *c, const void *k)
345{
346 const union ubifs_key *key = k;
347
348 return le32_to_cpu(key->u32[1]) >> UBIFS_S_KEY_BLOCK_BITS;
349}
350
351/**
352 * key_inum - fetch inode number from key.
353 * @c: UBIFS file-system description object
354 * @k: key to fetch inode number from
355 */
356static inline ino_t key_inum(const struct ubifs_info *c, const void *k)
357{
358 const union ubifs_key *key = k;
359
360 return key->u32[0];
361}
362
363/**
364 * key_inum_flash - fetch inode number from an on-flash formatted key.
365 * @c: UBIFS file-system description object
366 * @k: key to fetch inode number from
367 */
368static inline ino_t key_inum_flash(const struct ubifs_info *c, const void *k)
369{
370 const union ubifs_key *key = k;
371
372 return le32_to_cpu(key->j32[0]);
373}
374
375/**
376 * key_hash - get directory entry hash.
377 * @c: UBIFS file-system description object
378 * @key: the key to get hash from
379 */
380static inline int key_hash(const struct ubifs_info *c,
381 const union ubifs_key *key)
382{
383 return key->u32[1] & UBIFS_S_KEY_HASH_MASK;
384}
385
386/**
387 * key_hash_flash - get directory entry hash from an on-flash formatted key.
388 * @c: UBIFS file-system description object
389 * @k: the key to get hash from
390 */
391static inline int key_hash_flash(const struct ubifs_info *c, const void *k)
392{
393 const union ubifs_key *key = k;
394
395 return le32_to_cpu(key->j32[1]) & UBIFS_S_KEY_HASH_MASK;
396}
397
398/**
399 * key_block - get data block number.
400 * @c: UBIFS file-system description object
401 * @key: the key to get the block number from
402 */
403static inline unsigned int key_block(const struct ubifs_info *c,
404 const union ubifs_key *key)
405{
406 return key->u32[1] & UBIFS_S_KEY_BLOCK_MASK;
407}
408
409/**
410 * key_block_flash - get data block number from an on-flash formatted key.
411 * @c: UBIFS file-system description object
412 * @k: the key to get the block number from
413 */
414static inline unsigned int key_block_flash(const struct ubifs_info *c,
415 const void *k)
416{
417 const union ubifs_key *key = k;
418
419 return le32_to_cpu(key->u32[1]) & UBIFS_S_KEY_BLOCK_MASK;
420}
421
422/**
423 * key_read - transform a key to in-memory format.
424 * @c: UBIFS file-system description object
425 * @from: the key to transform
426 * @to: the key to store the result
427 */
428static inline void key_read(const struct ubifs_info *c, const void *from,
429 union ubifs_key *to)
430{
431 const union ubifs_key *f = from;
432
433 to->u32[0] = le32_to_cpu(f->j32[0]);
434 to->u32[1] = le32_to_cpu(f->j32[1]);
435}
436
437/**
438 * key_write - transform a key from in-memory format.
439 * @c: UBIFS file-system description object
440 * @from: the key to transform
441 * @to: the key to store the result
442 */
443static inline void key_write(const struct ubifs_info *c,
444 const union ubifs_key *from, void *to)
445{
446 union ubifs_key *t = to;
447
448 t->j32[0] = cpu_to_le32(from->u32[0]);
449 t->j32[1] = cpu_to_le32(from->u32[1]);
450 memset(to + 8, 0, UBIFS_MAX_KEY_LEN - 8);
451}
452
453/**
454 * key_write_idx - transform a key from in-memory format for the index.
455 * @c: UBIFS file-system description object
456 * @from: the key to transform
457 * @to: the key to store the result
458 */
459static inline void key_write_idx(const struct ubifs_info *c,
460 const union ubifs_key *from, void *to)
461{
462 union ubifs_key *t = to;
463
464 t->j32[0] = cpu_to_le32(from->u32[0]);
465 t->j32[1] = cpu_to_le32(from->u32[1]);
466}
467
468/**
469 * key_copy - copy a key.
470 * @c: UBIFS file-system description object
471 * @from: the key to copy from
472 * @to: the key to copy to
473 */
474static inline void key_copy(const struct ubifs_info *c,
475 const union ubifs_key *from, union ubifs_key *to)
476{
477 to->u64[0] = from->u64[0];
478}
479
480/**
481 * keys_cmp - compare keys.
482 * @c: UBIFS file-system description object
483 * @key1: the first key to compare
484 * @key2: the second key to compare
485 *
486 * This function compares 2 keys and returns %-1 if @key1 is less than
487 * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2.
488 */
489static inline int keys_cmp(const struct ubifs_info *c,
490 const union ubifs_key *key1,
491 const union ubifs_key *key2)
492{
493 if (key1->u32[0] < key2->u32[0])
494 return -1;
495 if (key1->u32[0] > key2->u32[0])
496 return 1;
497 if (key1->u32[1] < key2->u32[1])
498 return -1;
499 if (key1->u32[1] > key2->u32[1])
500 return 1;
501
502 return 0;
503}
504
505/**
506 * is_hash_key - is a key vulnerable to hash collisions.
507 * @c: UBIFS file-system description object
508 * @key: key
509 *
510 * This function returns %1 if @key is a hashed key or %0 otherwise.
511 */
512static inline int is_hash_key(const struct ubifs_info *c,
513 const union ubifs_key *key)
514{
515 int type = key_type(c, key);
516
517 return type == UBIFS_DENT_KEY || type == UBIFS_XENT_KEY;
518}
519
520/**
521 * key_max_inode_size - get maximum file size allowed by current key format.
522 * @c: UBIFS file-system description object
523 */
524static inline unsigned long long key_max_inode_size(const struct ubifs_info *c)
525{
526 switch (c->key_fmt) {
527 case UBIFS_SIMPLE_KEY_FMT:
528 return (1ULL << UBIFS_S_KEY_BLOCK_BITS) * UBIFS_BLOCK_SIZE;
529 default:
530 return 0;
531 }
532}
533#endif /* !__UBIFS_KEY_H__ */
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
new file mode 100644
index 00000000000..36857b9ed59
--- /dev/null
+++ b/fs/ubifs/log.c
@@ -0,0 +1,805 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file is a part of UBIFS journal implementation and contains various
25 * functions which manipulate the log. The log is a fixed area on the flash
26 * which does not contain any data but refers to buds. The log is a part of the
27 * journal.
28 */
29
30#include "ubifs.h"
31
32#ifdef CONFIG_UBIFS_FS_DEBUG
33static int dbg_check_bud_bytes(struct ubifs_info *c);
34#else
35#define dbg_check_bud_bytes(c) 0
36#endif
37
38/**
39 * ubifs_search_bud - search bud LEB.
40 * @c: UBIFS file-system description object
41 * @lnum: logical eraseblock number to search
42 *
43 * This function searches bud LEB @lnum. Returns bud description object in case
44 * of success and %NULL if there is no bud with this LEB number.
45 */
46struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum)
47{
48 struct rb_node *p;
49 struct ubifs_bud *bud;
50
51 spin_lock(&c->buds_lock);
52 p = c->buds.rb_node;
53 while (p) {
54 bud = rb_entry(p, struct ubifs_bud, rb);
55 if (lnum < bud->lnum)
56 p = p->rb_left;
57 else if (lnum > bud->lnum)
58 p = p->rb_right;
59 else {
60 spin_unlock(&c->buds_lock);
61 return bud;
62 }
63 }
64 spin_unlock(&c->buds_lock);
65 return NULL;
66}
67
68/**
69 * ubifs_get_wbuf - get the wbuf associated with a LEB, if there is one.
70 * @c: UBIFS file-system description object
71 * @lnum: logical eraseblock number to search
72 *
73 * This functions returns the wbuf for @lnum or %NULL if there is not one.
74 */
75struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum)
76{
77 struct rb_node *p;
78 struct ubifs_bud *bud;
79 int jhead;
80
81 if (!c->jheads)
82 return NULL;
83
84 spin_lock(&c->buds_lock);
85 p = c->buds.rb_node;
86 while (p) {
87 bud = rb_entry(p, struct ubifs_bud, rb);
88 if (lnum < bud->lnum)
89 p = p->rb_left;
90 else if (lnum > bud->lnum)
91 p = p->rb_right;
92 else {
93 jhead = bud->jhead;
94 spin_unlock(&c->buds_lock);
95 return &c->jheads[jhead].wbuf;
96 }
97 }
98 spin_unlock(&c->buds_lock);
99 return NULL;
100}
101
102/**
103 * next_log_lnum - switch to the next log LEB.
104 * @c: UBIFS file-system description object
105 * @lnum: current log LEB
106 */
107static inline int next_log_lnum(const struct ubifs_info *c, int lnum)
108{
109 lnum += 1;
110 if (lnum > c->log_last)
111 lnum = UBIFS_LOG_LNUM;
112
113 return lnum;
114}
115
116/**
117 * empty_log_bytes - calculate amount of empty space in the log.
118 * @c: UBIFS file-system description object
119 */
120static inline long long empty_log_bytes(const struct ubifs_info *c)
121{
122 long long h, t;
123
124 h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
125 t = (long long)c->ltail_lnum * c->leb_size;
126
127 if (h >= t)
128 return c->log_bytes - h + t;
129 else
130 return t - h;
131}
132
133/**
134 * ubifs_add_bud - add bud LEB to the tree of buds and its journal head list.
135 * @c: UBIFS file-system description object
136 * @bud: the bud to add
137 */
138void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud)
139{
140 struct rb_node **p, *parent = NULL;
141 struct ubifs_bud *b;
142 struct ubifs_jhead *jhead;
143
144 spin_lock(&c->buds_lock);
145 p = &c->buds.rb_node;
146 while (*p) {
147 parent = *p;
148 b = rb_entry(parent, struct ubifs_bud, rb);
149 ubifs_assert(bud->lnum != b->lnum);
150 if (bud->lnum < b->lnum)
151 p = &(*p)->rb_left;
152 else
153 p = &(*p)->rb_right;
154 }
155
156 rb_link_node(&bud->rb, parent, p);
157 rb_insert_color(&bud->rb, &c->buds);
158 if (c->jheads) {
159 jhead = &c->jheads[bud->jhead];
160 list_add_tail(&bud->list, &jhead->buds_list);
161 } else
162 ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY));
163
164 /*
165 * Note, although this is a new bud, we anyway account this space now,
166 * before any data has been written to it, because this is about to
167 * guarantee fixed mount time, and this bud will anyway be read and
168 * scanned.
169 */
170 c->bud_bytes += c->leb_size - bud->start;
171
172 dbg_log("LEB %d:%d, jhead %d, bud_bytes %lld", bud->lnum,
173 bud->start, bud->jhead, c->bud_bytes);
174 spin_unlock(&c->buds_lock);
175}
176
177/**
178 * ubifs_create_buds_lists - create journal head buds lists for remount rw.
179 * @c: UBIFS file-system description object
180 */
181void ubifs_create_buds_lists(struct ubifs_info *c)
182{
183 struct rb_node *p;
184
185 spin_lock(&c->buds_lock);
186 p = rb_first(&c->buds);
187 while (p) {
188 struct ubifs_bud *bud = rb_entry(p, struct ubifs_bud, rb);
189 struct ubifs_jhead *jhead = &c->jheads[bud->jhead];
190
191 list_add_tail(&bud->list, &jhead->buds_list);
192 p = rb_next(p);
193 }
194 spin_unlock(&c->buds_lock);
195}
196
197/**
198 * ubifs_add_bud_to_log - add a new bud to the log.
199 * @c: UBIFS file-system description object
200 * @jhead: journal head the bud belongs to
201 * @lnum: LEB number of the bud
202 * @offs: starting offset of the bud
203 *
204 * This function writes reference node for the new bud LEB @lnum it to the log,
205 * and adds it to the buds tress. It also makes sure that log size does not
206 * exceed the 'c->max_bud_bytes' limit. Returns zero in case of success,
207 * %-EAGAIN if commit is required, and a negative error codes in case of
208 * failure.
209 */
210int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs)
211{
212 int err;
213 struct ubifs_bud *bud;
214 struct ubifs_ref_node *ref;
215
216 bud = kmalloc(sizeof(struct ubifs_bud), GFP_NOFS);
217 if (!bud)
218 return -ENOMEM;
219 ref = kzalloc(c->ref_node_alsz, GFP_NOFS);
220 if (!ref) {
221 kfree(bud);
222 return -ENOMEM;
223 }
224
225 mutex_lock(&c->log_mutex);
226
227 if (c->ro_media) {
228 err = -EROFS;
229 goto out_unlock;
230 }
231
232 /* Make sure we have enough space in the log */
233 if (empty_log_bytes(c) - c->ref_node_alsz < c->min_log_bytes) {
234 dbg_log("not enough log space - %lld, required %d",
235 empty_log_bytes(c), c->min_log_bytes);
236 ubifs_commit_required(c);
237 err = -EAGAIN;
238 goto out_unlock;
239 }
240
241 /*
242 * Make sure the the amount of space in buds will not exceed
243 * 'c->max_bud_bytes' limit, because we want to guarantee mount time
244 * limits.
245 *
246 * It is not necessary to hold @c->buds_lock when reading @c->bud_bytes
247 * because we are holding @c->log_mutex. All @c->bud_bytes take place
248 * when both @c->log_mutex and @c->bud_bytes are locked.
249 */
250 if (c->bud_bytes + c->leb_size - offs > c->max_bud_bytes) {
251 dbg_log("bud bytes %lld (%lld max), require commit",
252 c->bud_bytes, c->max_bud_bytes);
253 ubifs_commit_required(c);
254 err = -EAGAIN;
255 goto out_unlock;
256 }
257
258 /*
259 * If the journal is full enough - start background commit. Note, it is
260 * OK to read 'c->cmt_state' without spinlock because integer reads
261 * are atomic in the kernel.
262 */
263 if (c->bud_bytes >= c->bg_bud_bytes &&
264 c->cmt_state == COMMIT_RESTING) {
265 dbg_log("bud bytes %lld (%lld max), initiate BG commit",
266 c->bud_bytes, c->max_bud_bytes);
267 ubifs_request_bg_commit(c);
268 }
269
270 bud->lnum = lnum;
271 bud->start = offs;
272 bud->jhead = jhead;
273
274 ref->ch.node_type = UBIFS_REF_NODE;
275 ref->lnum = cpu_to_le32(bud->lnum);
276 ref->offs = cpu_to_le32(bud->start);
277 ref->jhead = cpu_to_le32(jhead);
278
279 if (c->lhead_offs > c->leb_size - c->ref_node_alsz) {
280 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
281 c->lhead_offs = 0;
282 }
283
284 if (c->lhead_offs == 0) {
285 /* Must ensure next log LEB has been unmapped */
286 err = ubifs_leb_unmap(c, c->lhead_lnum);
287 if (err)
288 goto out_unlock;
289 }
290
291 if (bud->start == 0) {
292 /*
293 * Before writing the LEB reference which refers an empty LEB
294 * to the log, we have to make sure it is mapped, because
295 * otherwise we'd risk to refer an LEB with garbage in case of
296 * an unclean reboot, because the target LEB might have been
297 * unmapped, but not yet physically erased.
298 */
299 err = ubi_leb_map(c->ubi, bud->lnum, UBI_SHORTTERM);
300 if (err)
301 goto out_unlock;
302 }
303
304 dbg_log("write ref LEB %d:%d",
305 c->lhead_lnum, c->lhead_offs);
306 err = ubifs_write_node(c, ref, UBIFS_REF_NODE_SZ, c->lhead_lnum,
307 c->lhead_offs, UBI_SHORTTERM);
308 if (err)
309 goto out_unlock;
310
311 c->lhead_offs += c->ref_node_alsz;
312
313 ubifs_add_bud(c, bud);
314
315 mutex_unlock(&c->log_mutex);
316 kfree(ref);
317 return 0;
318
319out_unlock:
320 mutex_unlock(&c->log_mutex);
321 kfree(ref);
322 kfree(bud);
323 return err;
324}
325
326/**
327 * remove_buds - remove used buds.
328 * @c: UBIFS file-system description object
329 *
330 * This function removes use buds from the buds tree. It does not remove the
331 * buds which are pointed to by journal heads.
332 */
333static void remove_buds(struct ubifs_info *c)
334{
335 struct rb_node *p;
336
337 ubifs_assert(list_empty(&c->old_buds));
338 c->cmt_bud_bytes = 0;
339 spin_lock(&c->buds_lock);
340 p = rb_first(&c->buds);
341 while (p) {
342 struct rb_node *p1 = p;
343 struct ubifs_bud *bud;
344 struct ubifs_wbuf *wbuf;
345
346 p = rb_next(p);
347 bud = rb_entry(p1, struct ubifs_bud, rb);
348 wbuf = &c->jheads[bud->jhead].wbuf;
349
350 if (wbuf->lnum == bud->lnum) {
351 /*
352 * Do not remove buds which are pointed to by journal
353 * heads (non-closed buds).
354 */
355 c->cmt_bud_bytes += wbuf->offs - bud->start;
356 dbg_log("preserve %d:%d, jhead %d, bud bytes %d, "
357 "cmt_bud_bytes %lld", bud->lnum, bud->start,
358 bud->jhead, wbuf->offs - bud->start,
359 c->cmt_bud_bytes);
360 bud->start = wbuf->offs;
361 } else {
362 c->cmt_bud_bytes += c->leb_size - bud->start;
363 dbg_log("remove %d:%d, jhead %d, bud bytes %d, "
364 "cmt_bud_bytes %lld", bud->lnum, bud->start,
365 bud->jhead, c->leb_size - bud->start,
366 c->cmt_bud_bytes);
367 rb_erase(p1, &c->buds);
368 list_del(&bud->list);
369 /*
370 * If the commit does not finish, the recovery will need
371 * to replay the journal, in which case the old buds
372 * must be unchanged. Do not release them until post
373 * commit i.e. do not allow them to be garbage
374 * collected.
375 */
376 list_add(&bud->list, &c->old_buds);
377 }
378 }
379 spin_unlock(&c->buds_lock);
380}
381
382/**
383 * ubifs_log_start_commit - start commit.
384 * @c: UBIFS file-system description object
385 * @ltail_lnum: return new log tail LEB number
386 *
387 * The commit operation starts with writing "commit start" node to the log and
388 * reference nodes for all journal heads which will define new journal after
389 * the commit has been finished. The commit start and reference nodes are
390 * written in one go to the nearest empty log LEB (hence, when commit is
391 * finished UBIFS may safely unmap all the previous log LEBs). This function
392 * returns zero in case of success and a negative error code in case of
393 * failure.
394 */
395int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum)
396{
397 void *buf;
398 struct ubifs_cs_node *cs;
399 struct ubifs_ref_node *ref;
400 int err, i, max_len, len;
401
402 err = dbg_check_bud_bytes(c);
403 if (err)
404 return err;
405
406 max_len = UBIFS_CS_NODE_SZ + c->jhead_cnt * UBIFS_REF_NODE_SZ;
407 max_len = ALIGN(max_len, c->min_io_size);
408 buf = cs = kmalloc(max_len, GFP_NOFS);
409 if (!buf)
410 return -ENOMEM;
411
412 cs->ch.node_type = UBIFS_CS_NODE;
413 cs->cmt_no = cpu_to_le64(c->cmt_no + 1);
414 ubifs_prepare_node(c, cs, UBIFS_CS_NODE_SZ, 0);
415
416 /*
417 * Note, we do not lock 'c->log_mutex' because this is the commit start
418 * phase and we are exclusively using the log. And we do not lock
419 * write-buffer because nobody can write to the file-system at this
420 * phase.
421 */
422
423 len = UBIFS_CS_NODE_SZ;
424 for (i = 0; i < c->jhead_cnt; i++) {
425 int lnum = c->jheads[i].wbuf.lnum;
426 int offs = c->jheads[i].wbuf.offs;
427
428 if (lnum == -1 || offs == c->leb_size)
429 continue;
430
431 dbg_log("add ref to LEB %d:%d for jhead %d", lnum, offs, i);
432 ref = buf + len;
433 ref->ch.node_type = UBIFS_REF_NODE;
434 ref->lnum = cpu_to_le32(lnum);
435 ref->offs = cpu_to_le32(offs);
436 ref->jhead = cpu_to_le32(i);
437
438 ubifs_prepare_node(c, ref, UBIFS_REF_NODE_SZ, 0);
439 len += UBIFS_REF_NODE_SZ;
440 }
441
442 ubifs_pad(c, buf + len, ALIGN(len, c->min_io_size) - len);
443
444 /* Switch to the next log LEB */
445 if (c->lhead_offs) {
446 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
447 c->lhead_offs = 0;
448 }
449
450 if (c->lhead_offs == 0) {
451 /* Must ensure next LEB has been unmapped */
452 err = ubifs_leb_unmap(c, c->lhead_lnum);
453 if (err)
454 goto out;
455 }
456
457 len = ALIGN(len, c->min_io_size);
458 dbg_log("writing commit start at LEB %d:0, len %d", c->lhead_lnum, len);
459 err = ubifs_leb_write(c, c->lhead_lnum, cs, 0, len, UBI_SHORTTERM);
460 if (err)
461 goto out;
462
463 *ltail_lnum = c->lhead_lnum;
464
465 c->lhead_offs += len;
466 if (c->lhead_offs == c->leb_size) {
467 c->lhead_lnum = next_log_lnum(c, c->lhead_lnum);
468 c->lhead_offs = 0;
469 }
470
471 remove_buds(c);
472
473 /*
474 * We have started the commit and now users may use the rest of the log
475 * for new writes.
476 */
477 c->min_log_bytes = 0;
478
479out:
480 kfree(buf);
481 return err;
482}
483
484/**
485 * ubifs_log_end_commit - end commit.
486 * @c: UBIFS file-system description object
487 * @ltail_lnum: new log tail LEB number
488 *
489 * This function is called on when the commit operation was finished. It
490 * moves log tail to new position and unmaps LEBs which contain obsolete data.
491 * Returns zero in case of success and a negative error code in case of
492 * failure.
493 */
494int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
495{
496 int err;
497
498 /*
499 * At this phase we have to lock 'c->log_mutex' because UBIFS allows FS
500 * writes during commit. Its only short "commit" start phase when
501 * writers are blocked.
502 */
503 mutex_lock(&c->log_mutex);
504
505 dbg_log("old tail was LEB %d:0, new tail is LEB %d:0",
506 c->ltail_lnum, ltail_lnum);
507
508 c->ltail_lnum = ltail_lnum;
509 /*
510 * The commit is finished and from now on it must be guaranteed that
511 * there is always enough space for the next commit.
512 */
513 c->min_log_bytes = c->leb_size;
514
515 spin_lock(&c->buds_lock);
516 c->bud_bytes -= c->cmt_bud_bytes;
517 spin_unlock(&c->buds_lock);
518
519 err = dbg_check_bud_bytes(c);
520
521 mutex_unlock(&c->log_mutex);
522 return err;
523}
524
525/**
526 * ubifs_log_post_commit - things to do after commit is completed.
527 * @c: UBIFS file-system description object
528 * @old_ltail_lnum: old log tail LEB number
529 *
530 * Release buds only after commit is completed, because they must be unchanged
531 * if recovery is needed.
532 *
533 * Unmap log LEBs only after commit is completed, because they may be needed for
534 * recovery.
535 *
536 * This function returns %0 on success and a negative error code on failure.
537 */
538int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum)
539{
540 int lnum, err = 0;
541
542 while (!list_empty(&c->old_buds)) {
543 struct ubifs_bud *bud;
544
545 bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
546 err = ubifs_return_leb(c, bud->lnum);
547 if (err)
548 return err;
549 list_del(&bud->list);
550 kfree(bud);
551 }
552 mutex_lock(&c->log_mutex);
553 for (lnum = old_ltail_lnum; lnum != c->ltail_lnum;
554 lnum = next_log_lnum(c, lnum)) {
555 dbg_log("unmap log LEB %d", lnum);
556 err = ubifs_leb_unmap(c, lnum);
557 if (err)
558 goto out;
559 }
560out:
561 mutex_unlock(&c->log_mutex);
562 return err;
563}
564
565/**
566 * struct done_ref - references that have been done.
567 * @rb: rb-tree node
568 * @lnum: LEB number
569 */
570struct done_ref {
571 struct rb_node rb;
572 int lnum;
573};
574
575/**
576 * done_already - determine if a reference has been done already.
577 * @done_tree: rb-tree to store references that have been done
578 * @lnum: LEB number of reference
579 *
580 * This function returns %1 if the reference has been done, %0 if not, otherwise
581 * a negative error code is returned.
582 */
583static int done_already(struct rb_root *done_tree, int lnum)
584{
585 struct rb_node **p = &done_tree->rb_node, *parent = NULL;
586 struct done_ref *dr;
587
588 while (*p) {
589 parent = *p;
590 dr = rb_entry(parent, struct done_ref, rb);
591 if (lnum < dr->lnum)
592 p = &(*p)->rb_left;
593 else if (lnum > dr->lnum)
594 p = &(*p)->rb_right;
595 else
596 return 1;
597 }
598
599 dr = kzalloc(sizeof(struct done_ref), GFP_NOFS);
600 if (!dr)
601 return -ENOMEM;
602
603 dr->lnum = lnum;
604
605 rb_link_node(&dr->rb, parent, p);
606 rb_insert_color(&dr->rb, done_tree);
607
608 return 0;
609}
610
611/**
612 * destroy_done_tree - destroy the done tree.
613 * @done_tree: done tree to destroy
614 */
615static void destroy_done_tree(struct rb_root *done_tree)
616{
617 struct rb_node *this = done_tree->rb_node;
618 struct done_ref *dr;
619
620 while (this) {
621 if (this->rb_left) {
622 this = this->rb_left;
623 continue;
624 } else if (this->rb_right) {
625 this = this->rb_right;
626 continue;
627 }
628 dr = rb_entry(this, struct done_ref, rb);
629 this = rb_parent(this);
630 if (this) {
631 if (this->rb_left == &dr->rb)
632 this->rb_left = NULL;
633 else
634 this->rb_right = NULL;
635 }
636 kfree(dr);
637 }
638}
639
640/**
641 * add_node - add a node to the consolidated log.
642 * @c: UBIFS file-system description object
643 * @buf: buffer to which to add
644 * @lnum: LEB number to which to write is passed and returned here
645 * @offs: offset to where to write is passed and returned here
646 * @node: node to add
647 *
648 * This function returns %0 on success and a negative error code on failure.
649 */
650static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs,
651 void *node)
652{
653 struct ubifs_ch *ch = node;
654 int len = le32_to_cpu(ch->len), remains = c->leb_size - *offs;
655
656 if (len > remains) {
657 int sz = ALIGN(*offs, c->min_io_size), err;
658
659 ubifs_pad(c, buf + *offs, sz - *offs);
660 err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM);
661 if (err)
662 return err;
663 *lnum = next_log_lnum(c, *lnum);
664 *offs = 0;
665 }
666 memcpy(buf + *offs, node, len);
667 *offs += ALIGN(len, 8);
668 return 0;
669}
670
671/**
672 * ubifs_consolidate_log - consolidate the log.
673 * @c: UBIFS file-system description object
674 *
675 * Repeated failed commits could cause the log to be full, but at least 1 LEB is
676 * needed for commit. This function rewrites the reference nodes in the log
677 * omitting duplicates, and failed CS nodes, and leaving no gaps.
678 *
679 * This function returns %0 on success and a negative error code on failure.
680 */
681int ubifs_consolidate_log(struct ubifs_info *c)
682{
683 struct ubifs_scan_leb *sleb;
684 struct ubifs_scan_node *snod;
685 struct rb_root done_tree = RB_ROOT;
686 int lnum, err, first = 1, write_lnum, offs = 0;
687 void *buf;
688
689 dbg_rcvry("log tail LEB %d, log head LEB %d", c->ltail_lnum,
690 c->lhead_lnum);
691 buf = vmalloc(c->leb_size);
692 if (!buf)
693 return -ENOMEM;
694 lnum = c->ltail_lnum;
695 write_lnum = lnum;
696 while (1) {
697 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
698 if (IS_ERR(sleb)) {
699 err = PTR_ERR(sleb);
700 goto out_free;
701 }
702 list_for_each_entry(snod, &sleb->nodes, list) {
703 switch (snod->type) {
704 case UBIFS_REF_NODE: {
705 struct ubifs_ref_node *ref = snod->node;
706 int ref_lnum = le32_to_cpu(ref->lnum);
707
708 err = done_already(&done_tree, ref_lnum);
709 if (err < 0)
710 goto out_scan;
711 if (err != 1) {
712 err = add_node(c, buf, &write_lnum,
713 &offs, snod->node);
714 if (err)
715 goto out_scan;
716 }
717 break;
718 }
719 case UBIFS_CS_NODE:
720 if (!first)
721 break;
722 err = add_node(c, buf, &write_lnum, &offs,
723 snod->node);
724 if (err)
725 goto out_scan;
726 first = 0;
727 break;
728 }
729 }
730 ubifs_scan_destroy(sleb);
731 if (lnum == c->lhead_lnum)
732 break;
733 lnum = next_log_lnum(c, lnum);
734 }
735 if (offs) {
736 int sz = ALIGN(offs, c->min_io_size);
737
738 ubifs_pad(c, buf + offs, sz - offs);
739 err = ubifs_leb_change(c, write_lnum, buf, sz, UBI_SHORTTERM);
740 if (err)
741 goto out_free;
742 offs = ALIGN(offs, c->min_io_size);
743 }
744 destroy_done_tree(&done_tree);
745 vfree(buf);
746 if (write_lnum == c->lhead_lnum) {
747 ubifs_err("log is too full");
748 return -EINVAL;
749 }
750 /* Unmap remaining LEBs */
751 lnum = write_lnum;
752 do {
753 lnum = next_log_lnum(c, lnum);
754 err = ubifs_leb_unmap(c, lnum);
755 if (err)
756 return err;
757 } while (lnum != c->lhead_lnum);
758 c->lhead_lnum = write_lnum;
759 c->lhead_offs = offs;
760 dbg_rcvry("new log head at %d:%d", c->lhead_lnum, c->lhead_offs);
761 return 0;
762
763out_scan:
764 ubifs_scan_destroy(sleb);
765out_free:
766 destroy_done_tree(&done_tree);
767 vfree(buf);
768 return err;
769}
770
771#ifdef CONFIG_UBIFS_FS_DEBUG
772
773/**
774 * dbg_check_bud_bytes - make sure bud bytes calculation are all right.
775 * @c: UBIFS file-system description object
776 *
777 * This function makes sure the amount of flash space used by closed buds
778 * ('c->bud_bytes' is correct). Returns zero in case of success and %-EINVAL in
779 * case of failure.
780 */
781static int dbg_check_bud_bytes(struct ubifs_info *c)
782{
783 int i, err = 0;
784 struct ubifs_bud *bud;
785 long long bud_bytes = 0;
786
787 if (!(ubifs_chk_flags & UBIFS_CHK_GEN))
788 return 0;
789
790 spin_lock(&c->buds_lock);
791 for (i = 0; i < c->jhead_cnt; i++)
792 list_for_each_entry(bud, &c->jheads[i].buds_list, list)
793 bud_bytes += c->leb_size - bud->start;
794
795 if (c->bud_bytes != bud_bytes) {
796 ubifs_err("bad bud_bytes %lld, calculated %lld",
797 c->bud_bytes, bud_bytes);
798 err = -EINVAL;
799 }
800 spin_unlock(&c->buds_lock);
801
802 return err;
803}
804
805#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c
new file mode 100644
index 00000000000..2ba93da71b6
--- /dev/null
+++ b/fs/ubifs/lprops.c
@@ -0,0 +1,1357 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements the functions that access LEB properties and their
25 * categories. LEBs are categorized based on the needs of UBIFS, and the
26 * categories are stored as either heaps or lists to provide a fast way of
27 * finding a LEB in a particular category. For example, UBIFS may need to find
28 * an empty LEB for the journal, or a very dirty LEB for garbage collection.
29 */
30
31#include "ubifs.h"
32
33/**
34 * get_heap_comp_val - get the LEB properties value for heap comparisons.
35 * @lprops: LEB properties
36 * @cat: LEB category
37 */
38static int get_heap_comp_val(struct ubifs_lprops *lprops, int cat)
39{
40 switch (cat) {
41 case LPROPS_FREE:
42 return lprops->free;
43 case LPROPS_DIRTY_IDX:
44 return lprops->free + lprops->dirty;
45 default:
46 return lprops->dirty;
47 }
48}
49
50/**
51 * move_up_lpt_heap - move a new heap entry up as far as possible.
52 * @c: UBIFS file-system description object
53 * @heap: LEB category heap
54 * @lprops: LEB properties to move
55 * @cat: LEB category
56 *
57 * New entries to a heap are added at the bottom and then moved up until the
58 * parent's value is greater. In the case of LPT's category heaps, the value
59 * is either the amount of free space or the amount of dirty space, depending
60 * on the category.
61 */
62static void move_up_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
63 struct ubifs_lprops *lprops, int cat)
64{
65 int val1, val2, hpos;
66
67 hpos = lprops->hpos;
68 if (!hpos)
69 return; /* Already top of the heap */
70 val1 = get_heap_comp_val(lprops, cat);
71 /* Compare to parent and, if greater, move up the heap */
72 do {
73 int ppos = (hpos - 1) / 2;
74
75 val2 = get_heap_comp_val(heap->arr[ppos], cat);
76 if (val2 >= val1)
77 return;
78 /* Greater than parent so move up */
79 heap->arr[ppos]->hpos = hpos;
80 heap->arr[hpos] = heap->arr[ppos];
81 heap->arr[ppos] = lprops;
82 lprops->hpos = ppos;
83 hpos = ppos;
84 } while (hpos);
85}
86
87/**
88 * adjust_lpt_heap - move a changed heap entry up or down the heap.
89 * @c: UBIFS file-system description object
90 * @heap: LEB category heap
91 * @lprops: LEB properties to move
92 * @hpos: heap position of @lprops
93 * @cat: LEB category
94 *
95 * Changed entries in a heap are moved up or down until the parent's value is
96 * greater. In the case of LPT's category heaps, the value is either the amount
97 * of free space or the amount of dirty space, depending on the category.
98 */
99static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
100 struct ubifs_lprops *lprops, int hpos, int cat)
101{
102 int val1, val2, val3, cpos;
103
104 val1 = get_heap_comp_val(lprops, cat);
105 /* Compare to parent and, if greater than parent, move up the heap */
106 if (hpos) {
107 int ppos = (hpos - 1) / 2;
108
109 val2 = get_heap_comp_val(heap->arr[ppos], cat);
110 if (val1 > val2) {
111 /* Greater than parent so move up */
112 while (1) {
113 heap->arr[ppos]->hpos = hpos;
114 heap->arr[hpos] = heap->arr[ppos];
115 heap->arr[ppos] = lprops;
116 lprops->hpos = ppos;
117 hpos = ppos;
118 if (!hpos)
119 return;
120 ppos = (hpos - 1) / 2;
121 val2 = get_heap_comp_val(heap->arr[ppos], cat);
122 if (val1 <= val2)
123 return;
124 /* Still greater than parent so keep going */
125 }
126 }
127 }
128 /* Not greater than parent, so compare to children */
129 while (1) {
130 /* Compare to left child */
131 cpos = hpos * 2 + 1;
132 if (cpos >= heap->cnt)
133 return;
134 val2 = get_heap_comp_val(heap->arr[cpos], cat);
135 if (val1 < val2) {
136 /* Less than left child, so promote biggest child */
137 if (cpos + 1 < heap->cnt) {
138 val3 = get_heap_comp_val(heap->arr[cpos + 1],
139 cat);
140 if (val3 > val2)
141 cpos += 1; /* Right child is bigger */
142 }
143 heap->arr[cpos]->hpos = hpos;
144 heap->arr[hpos] = heap->arr[cpos];
145 heap->arr[cpos] = lprops;
146 lprops->hpos = cpos;
147 hpos = cpos;
148 continue;
149 }
150 /* Compare to right child */
151 cpos += 1;
152 if (cpos >= heap->cnt)
153 return;
154 val3 = get_heap_comp_val(heap->arr[cpos], cat);
155 if (val1 < val3) {
156 /* Less than right child, so promote right child */
157 heap->arr[cpos]->hpos = hpos;
158 heap->arr[hpos] = heap->arr[cpos];
159 heap->arr[cpos] = lprops;
160 lprops->hpos = cpos;
161 hpos = cpos;
162 continue;
163 }
164 return;
165 }
166}
167
168/**
169 * add_to_lpt_heap - add LEB properties to a LEB category heap.
170 * @c: UBIFS file-system description object
171 * @lprops: LEB properties to add
172 * @cat: LEB category
173 *
174 * This function returns %1 if @lprops is added to the heap for LEB category
175 * @cat, otherwise %0 is returned because the heap is full.
176 */
177static int add_to_lpt_heap(struct ubifs_info *c, struct ubifs_lprops *lprops,
178 int cat)
179{
180 struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
181
182 if (heap->cnt >= heap->max_cnt) {
183 const int b = LPT_HEAP_SZ / 2 - 1;
184 int cpos, val1, val2;
185
186 /* Compare to some other LEB on the bottom of heap */
187 /* Pick a position kind of randomly */
188 cpos = (((size_t)lprops >> 4) & b) + b;
189 ubifs_assert(cpos >= b);
190 ubifs_assert(cpos < LPT_HEAP_SZ);
191 ubifs_assert(cpos < heap->cnt);
192
193 val1 = get_heap_comp_val(lprops, cat);
194 val2 = get_heap_comp_val(heap->arr[cpos], cat);
195 if (val1 > val2) {
196 struct ubifs_lprops *lp;
197
198 lp = heap->arr[cpos];
199 lp->flags &= ~LPROPS_CAT_MASK;
200 lp->flags |= LPROPS_UNCAT;
201 list_add(&lp->list, &c->uncat_list);
202 lprops->hpos = cpos;
203 heap->arr[cpos] = lprops;
204 move_up_lpt_heap(c, heap, lprops, cat);
205 dbg_check_heap(c, heap, cat, lprops->hpos);
206 return 1; /* Added to heap */
207 }
208 dbg_check_heap(c, heap, cat, -1);
209 return 0; /* Not added to heap */
210 } else {
211 lprops->hpos = heap->cnt++;
212 heap->arr[lprops->hpos] = lprops;
213 move_up_lpt_heap(c, heap, lprops, cat);
214 dbg_check_heap(c, heap, cat, lprops->hpos);
215 return 1; /* Added to heap */
216 }
217}
218
219/**
220 * remove_from_lpt_heap - remove LEB properties from a LEB category heap.
221 * @c: UBIFS file-system description object
222 * @lprops: LEB properties to remove
223 * @cat: LEB category
224 */
225static void remove_from_lpt_heap(struct ubifs_info *c,
226 struct ubifs_lprops *lprops, int cat)
227{
228 struct ubifs_lpt_heap *heap;
229 int hpos = lprops->hpos;
230
231 heap = &c->lpt_heap[cat - 1];
232 ubifs_assert(hpos >= 0 && hpos < heap->cnt);
233 ubifs_assert(heap->arr[hpos] == lprops);
234 heap->cnt -= 1;
235 if (hpos < heap->cnt) {
236 heap->arr[hpos] = heap->arr[heap->cnt];
237 heap->arr[hpos]->hpos = hpos;
238 adjust_lpt_heap(c, heap, heap->arr[hpos], hpos, cat);
239 }
240 dbg_check_heap(c, heap, cat, -1);
241}
242
243/**
244 * lpt_heap_replace - replace lprops in a category heap.
245 * @c: UBIFS file-system description object
246 * @old_lprops: LEB properties to replace
247 * @new_lprops: LEB properties with which to replace
248 * @cat: LEB category
249 *
250 * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode)
251 * and the lprops that the pnode contains. When that happens, references in
252 * the category heaps to those lprops must be updated to point to the new
253 * lprops. This function does that.
254 */
255static void lpt_heap_replace(struct ubifs_info *c,
256 struct ubifs_lprops *old_lprops,
257 struct ubifs_lprops *new_lprops, int cat)
258{
259 struct ubifs_lpt_heap *heap;
260 int hpos = new_lprops->hpos;
261
262 heap = &c->lpt_heap[cat - 1];
263 heap->arr[hpos] = new_lprops;
264}
265
266/**
267 * ubifs_add_to_cat - add LEB properties to a category list or heap.
268 * @c: UBIFS file-system description object
269 * @lprops: LEB properties to add
270 * @cat: LEB category to which to add
271 *
272 * LEB properties are categorized to enable fast find operations.
273 */
274void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
275 int cat)
276{
277 switch (cat) {
278 case LPROPS_DIRTY:
279 case LPROPS_DIRTY_IDX:
280 case LPROPS_FREE:
281 if (add_to_lpt_heap(c, lprops, cat))
282 break;
283 /* No more room on heap so make it uncategorized */
284 cat = LPROPS_UNCAT;
285 /* Fall through */
286 case LPROPS_UNCAT:
287 list_add(&lprops->list, &c->uncat_list);
288 break;
289 case LPROPS_EMPTY:
290 list_add(&lprops->list, &c->empty_list);
291 break;
292 case LPROPS_FREEABLE:
293 list_add(&lprops->list, &c->freeable_list);
294 c->freeable_cnt += 1;
295 break;
296 case LPROPS_FRDI_IDX:
297 list_add(&lprops->list, &c->frdi_idx_list);
298 break;
299 default:
300 ubifs_assert(0);
301 }
302 lprops->flags &= ~LPROPS_CAT_MASK;
303 lprops->flags |= cat;
304}
305
306/**
307 * ubifs_remove_from_cat - remove LEB properties from a category list or heap.
308 * @c: UBIFS file-system description object
309 * @lprops: LEB properties to remove
310 * @cat: LEB category from which to remove
311 *
312 * LEB properties are categorized to enable fast find operations.
313 */
314static void ubifs_remove_from_cat(struct ubifs_info *c,
315 struct ubifs_lprops *lprops, int cat)
316{
317 switch (cat) {
318 case LPROPS_DIRTY:
319 case LPROPS_DIRTY_IDX:
320 case LPROPS_FREE:
321 remove_from_lpt_heap(c, lprops, cat);
322 break;
323 case LPROPS_FREEABLE:
324 c->freeable_cnt -= 1;
325 ubifs_assert(c->freeable_cnt >= 0);
326 /* Fall through */
327 case LPROPS_UNCAT:
328 case LPROPS_EMPTY:
329 case LPROPS_FRDI_IDX:
330 ubifs_assert(!list_empty(&lprops->list));
331 list_del(&lprops->list);
332 break;
333 default:
334 ubifs_assert(0);
335 }
336}
337
338/**
339 * ubifs_replace_cat - replace lprops in a category list or heap.
340 * @c: UBIFS file-system description object
341 * @old_lprops: LEB properties to replace
342 * @new_lprops: LEB properties with which to replace
343 *
344 * During commit it is sometimes necessary to copy a pnode (see dirty_cow_pnode)
345 * and the lprops that the pnode contains. When that happens, references in
346 * category lists and heaps must be replaced. This function does that.
347 */
348void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
349 struct ubifs_lprops *new_lprops)
350{
351 int cat;
352
353 cat = new_lprops->flags & LPROPS_CAT_MASK;
354 switch (cat) {
355 case LPROPS_DIRTY:
356 case LPROPS_DIRTY_IDX:
357 case LPROPS_FREE:
358 lpt_heap_replace(c, old_lprops, new_lprops, cat);
359 break;
360 case LPROPS_UNCAT:
361 case LPROPS_EMPTY:
362 case LPROPS_FREEABLE:
363 case LPROPS_FRDI_IDX:
364 list_replace(&old_lprops->list, &new_lprops->list);
365 break;
366 default:
367 ubifs_assert(0);
368 }
369}
370
371/**
372 * ubifs_ensure_cat - ensure LEB properties are categorized.
373 * @c: UBIFS file-system description object
374 * @lprops: LEB properties
375 *
376 * A LEB may have fallen off of the bottom of a heap, and ended up as
377 * uncategorized even though it has enough space for us now. If that is the case
378 * this function will put the LEB back onto a heap.
379 */
380void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops)
381{
382 int cat = lprops->flags & LPROPS_CAT_MASK;
383
384 if (cat != LPROPS_UNCAT)
385 return;
386 cat = ubifs_categorize_lprops(c, lprops);
387 if (cat == LPROPS_UNCAT)
388 return;
389 ubifs_remove_from_cat(c, lprops, LPROPS_UNCAT);
390 ubifs_add_to_cat(c, lprops, cat);
391}
392
393/**
394 * ubifs_categorize_lprops - categorize LEB properties.
395 * @c: UBIFS file-system description object
396 * @lprops: LEB properties to categorize
397 *
398 * LEB properties are categorized to enable fast find operations. This function
399 * returns the LEB category to which the LEB properties belong. Note however
400 * that if the LEB category is stored as a heap and the heap is full, the
401 * LEB properties may have their category changed to %LPROPS_UNCAT.
402 */
403int ubifs_categorize_lprops(const struct ubifs_info *c,
404 const struct ubifs_lprops *lprops)
405{
406 if (lprops->flags & LPROPS_TAKEN)
407 return LPROPS_UNCAT;
408
409 if (lprops->free == c->leb_size) {
410 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
411 return LPROPS_EMPTY;
412 }
413
414 if (lprops->free + lprops->dirty == c->leb_size) {
415 if (lprops->flags & LPROPS_INDEX)
416 return LPROPS_FRDI_IDX;
417 else
418 return LPROPS_FREEABLE;
419 }
420
421 if (lprops->flags & LPROPS_INDEX) {
422 if (lprops->dirty + lprops->free >= c->min_idx_node_sz)
423 return LPROPS_DIRTY_IDX;
424 } else {
425 if (lprops->dirty >= c->dead_wm &&
426 lprops->dirty > lprops->free)
427 return LPROPS_DIRTY;
428 if (lprops->free > 0)
429 return LPROPS_FREE;
430 }
431
432 return LPROPS_UNCAT;
433}
434
435/**
436 * change_category - change LEB properties category.
437 * @c: UBIFS file-system description object
438 * @lprops: LEB properties to recategorize
439 *
440 * LEB properties are categorized to enable fast find operations. When the LEB
441 * properties change they must be recategorized.
442 */
443static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops)
444{
445 int old_cat = lprops->flags & LPROPS_CAT_MASK;
446 int new_cat = ubifs_categorize_lprops(c, lprops);
447
448 if (old_cat == new_cat) {
449 struct ubifs_lpt_heap *heap = &c->lpt_heap[new_cat - 1];
450
451 /* lprops on a heap now must be moved up or down */
452 if (new_cat < 1 || new_cat > LPROPS_HEAP_CNT)
453 return; /* Not on a heap */
454 heap = &c->lpt_heap[new_cat - 1];
455 adjust_lpt_heap(c, heap, lprops, lprops->hpos, new_cat);
456 } else {
457 ubifs_remove_from_cat(c, lprops, old_cat);
458 ubifs_add_to_cat(c, lprops, new_cat);
459 }
460}
461
462/**
463 * ubifs_get_lprops - get reference to LEB properties.
464 * @c: the UBIFS file-system description object
465 *
466 * This function locks lprops. Lprops have to be unlocked by
467 * 'ubifs_release_lprops()'.
468 */
469void ubifs_get_lprops(struct ubifs_info *c)
470{
471 mutex_lock(&c->lp_mutex);
472}
473
474/**
475 * calc_dark - calculate LEB dark space size.
476 * @c: the UBIFS file-system description object
477 * @spc: amount of free and dirty space in the LEB
478 *
479 * This function calculates amount of dark space in an LEB which has @spc bytes
480 * of free and dirty space. Returns the calculations result.
481 *
482 * Dark space is the space which is not always usable - it depends on which
483 * nodes are written in which order. E.g., if an LEB has only 512 free bytes,
484 * it is dark space, because it cannot fit a large data node. So UBIFS cannot
485 * count on this LEB and treat these 512 bytes as usable because it is not true
486 * if, for example, only big chunks of uncompressible data will be written to
487 * the FS.
488 */
489static int calc_dark(struct ubifs_info *c, int spc)
490{
491 ubifs_assert(!(spc & 7));
492
493 if (spc < c->dark_wm)
494 return spc;
495
496 /*
497 * If we have slightly more space then the dark space watermark, we can
498 * anyway safely assume it we'll be able to write a node of the
499 * smallest size there.
500 */
501 if (spc - c->dark_wm < MIN_WRITE_SZ)
502 return spc - MIN_WRITE_SZ;
503
504 return c->dark_wm;
505}
506
507/**
508 * is_lprops_dirty - determine if LEB properties are dirty.
509 * @c: the UBIFS file-system description object
510 * @lprops: LEB properties to test
511 */
512static int is_lprops_dirty(struct ubifs_info *c, struct ubifs_lprops *lprops)
513{
514 struct ubifs_pnode *pnode;
515 int pos;
516
517 pos = (lprops->lnum - c->main_first) & (UBIFS_LPT_FANOUT - 1);
518 pnode = (struct ubifs_pnode *)container_of(lprops - pos,
519 struct ubifs_pnode,
520 lprops[0]);
521 return !test_bit(COW_ZNODE, &pnode->flags) &&
522 test_bit(DIRTY_CNODE, &pnode->flags);
523}
524
525/**
526 * ubifs_change_lp - change LEB properties.
527 * @c: the UBIFS file-system description object
528 * @lp: LEB properties to change
529 * @free: new free space amount
530 * @dirty: new dirty space amount
531 * @flags: new flags
532 * @idx_gc_cnt: change to the count of idx_gc list
533 *
534 * This function changes LEB properties. This function does not change a LEB
535 * property (@free, @dirty or @flag) if the value passed is %LPROPS_NC.
536 *
537 * This function returns a pointer to the updated LEB properties on success
538 * and a negative error code on failure. N.B. the LEB properties may have had to
539 * be copied (due to COW) and consequently the pointer returned may not be the
540 * same as the pointer passed.
541 */
542const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
543 const struct ubifs_lprops *lp,
544 int free, int dirty, int flags,
545 int idx_gc_cnt)
546{
547 /*
548 * This is the only function that is allowed to change lprops, so we
549 * discard the const qualifier.
550 */
551 struct ubifs_lprops *lprops = (struct ubifs_lprops *)lp;
552
553 dbg_lp("LEB %d, free %d, dirty %d, flags %d",
554 lprops->lnum, free, dirty, flags);
555
556 ubifs_assert(mutex_is_locked(&c->lp_mutex));
557 ubifs_assert(c->lst.empty_lebs >= 0 &&
558 c->lst.empty_lebs <= c->main_lebs);
559 ubifs_assert(c->freeable_cnt >= 0);
560 ubifs_assert(c->freeable_cnt <= c->main_lebs);
561 ubifs_assert(c->lst.taken_empty_lebs >= 0);
562 ubifs_assert(c->lst.taken_empty_lebs <= c->lst.empty_lebs);
563 ubifs_assert(!(c->lst.total_free & 7) && !(c->lst.total_dirty & 7));
564 ubifs_assert(!(c->lst.total_dead & 7) && !(c->lst.total_dark & 7));
565 ubifs_assert(!(c->lst.total_used & 7));
566 ubifs_assert(free == LPROPS_NC || free >= 0);
567 ubifs_assert(dirty == LPROPS_NC || dirty >= 0);
568
569 if (!is_lprops_dirty(c, lprops)) {
570 lprops = ubifs_lpt_lookup_dirty(c, lprops->lnum);
571 if (IS_ERR(lprops))
572 return lprops;
573 } else
574 ubifs_assert(lprops == ubifs_lpt_lookup_dirty(c, lprops->lnum));
575
576 ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7));
577
578 spin_lock(&c->space_lock);
579
580 if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
581 c->lst.taken_empty_lebs -= 1;
582
583 if (!(lprops->flags & LPROPS_INDEX)) {
584 int old_spc;
585
586 old_spc = lprops->free + lprops->dirty;
587 if (old_spc < c->dead_wm)
588 c->lst.total_dead -= old_spc;
589 else
590 c->lst.total_dark -= calc_dark(c, old_spc);
591
592 c->lst.total_used -= c->leb_size - old_spc;
593 }
594
595 if (free != LPROPS_NC) {
596 free = ALIGN(free, 8);
597 c->lst.total_free += free - lprops->free;
598
599 /* Increase or decrease empty LEBs counter if needed */
600 if (free == c->leb_size) {
601 if (lprops->free != c->leb_size)
602 c->lst.empty_lebs += 1;
603 } else if (lprops->free == c->leb_size)
604 c->lst.empty_lebs -= 1;
605 lprops->free = free;
606 }
607
608 if (dirty != LPROPS_NC) {
609 dirty = ALIGN(dirty, 8);
610 c->lst.total_dirty += dirty - lprops->dirty;
611 lprops->dirty = dirty;
612 }
613
614 if (flags != LPROPS_NC) {
615 /* Take care about indexing LEBs counter if needed */
616 if ((lprops->flags & LPROPS_INDEX)) {
617 if (!(flags & LPROPS_INDEX))
618 c->lst.idx_lebs -= 1;
619 } else if (flags & LPROPS_INDEX)
620 c->lst.idx_lebs += 1;
621 lprops->flags = flags;
622 }
623
624 if (!(lprops->flags & LPROPS_INDEX)) {
625 int new_spc;
626
627 new_spc = lprops->free + lprops->dirty;
628 if (new_spc < c->dead_wm)
629 c->lst.total_dead += new_spc;
630 else
631 c->lst.total_dark += calc_dark(c, new_spc);
632
633 c->lst.total_used += c->leb_size - new_spc;
634 }
635
636 if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size)
637 c->lst.taken_empty_lebs += 1;
638
639 change_category(c, lprops);
640
641 c->idx_gc_cnt += idx_gc_cnt;
642
643 spin_unlock(&c->space_lock);
644
645 return lprops;
646}
647
648/**
649 * ubifs_release_lprops - release lprops lock.
650 * @c: the UBIFS file-system description object
651 *
652 * This function has to be called after each 'ubifs_get_lprops()' call to
653 * unlock lprops.
654 */
655void ubifs_release_lprops(struct ubifs_info *c)
656{
657 ubifs_assert(mutex_is_locked(&c->lp_mutex));
658 ubifs_assert(c->lst.empty_lebs >= 0 &&
659 c->lst.empty_lebs <= c->main_lebs);
660
661 mutex_unlock(&c->lp_mutex);
662}
663
664/**
665 * ubifs_get_lp_stats - get lprops statistics.
666 * @c: UBIFS file-system description object
667 * @st: return statistics
668 */
669void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *st)
670{
671 spin_lock(&c->space_lock);
672 memcpy(st, &c->lst, sizeof(struct ubifs_lp_stats));
673 spin_unlock(&c->space_lock);
674}
675
676/**
677 * ubifs_change_one_lp - change LEB properties.
678 * @c: the UBIFS file-system description object
679 * @lnum: LEB to change properties for
680 * @free: amount of free space
681 * @dirty: amount of dirty space
682 * @flags_set: flags to set
683 * @flags_clean: flags to clean
684 * @idx_gc_cnt: change to the count of idx_gc list
685 *
686 * This function changes properties of LEB @lnum. It is a helper wrapper over
687 * 'ubifs_change_lp()' which hides lprops get/release. The arguments are the
688 * same as in case of 'ubifs_change_lp()'. Returns zero in case of success and
689 * a negative error code in case of failure.
690 */
691int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
692 int flags_set, int flags_clean, int idx_gc_cnt)
693{
694 int err = 0, flags;
695 const struct ubifs_lprops *lp;
696
697 ubifs_get_lprops(c);
698
699 lp = ubifs_lpt_lookup_dirty(c, lnum);
700 if (IS_ERR(lp)) {
701 err = PTR_ERR(lp);
702 goto out;
703 }
704
705 flags = (lp->flags | flags_set) & ~flags_clean;
706 lp = ubifs_change_lp(c, lp, free, dirty, flags, idx_gc_cnt);
707 if (IS_ERR(lp))
708 err = PTR_ERR(lp);
709
710out:
711 ubifs_release_lprops(c);
712 return err;
713}
714
715/**
716 * ubifs_update_one_lp - update LEB properties.
717 * @c: the UBIFS file-system description object
718 * @lnum: LEB to change properties for
719 * @free: amount of free space
720 * @dirty: amount of dirty space to add
721 * @flags_set: flags to set
722 * @flags_clean: flags to clean
723 *
724 * This function is the same as 'ubifs_change_one_lp()' but @dirty is added to
725 * current dirty space, not substitutes it.
726 */
727int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
728 int flags_set, int flags_clean)
729{
730 int err = 0, flags;
731 const struct ubifs_lprops *lp;
732
733 ubifs_get_lprops(c);
734
735 lp = ubifs_lpt_lookup_dirty(c, lnum);
736 if (IS_ERR(lp)) {
737 err = PTR_ERR(lp);
738 goto out;
739 }
740
741 flags = (lp->flags | flags_set) & ~flags_clean;
742 lp = ubifs_change_lp(c, lp, free, lp->dirty + dirty, flags, 0);
743 if (IS_ERR(lp))
744 err = PTR_ERR(lp);
745
746out:
747 ubifs_release_lprops(c);
748 return err;
749}
750
751/**
752 * ubifs_read_one_lp - read LEB properties.
753 * @c: the UBIFS file-system description object
754 * @lnum: LEB to read properties for
755 * @lp: where to store read properties
756 *
757 * This helper function reads properties of a LEB @lnum and stores them in @lp.
758 * Returns zero in case of success and a negative error code in case of
759 * failure.
760 */
761int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp)
762{
763 int err = 0;
764 const struct ubifs_lprops *lpp;
765
766 ubifs_get_lprops(c);
767
768 lpp = ubifs_lpt_lookup(c, lnum);
769 if (IS_ERR(lpp)) {
770 err = PTR_ERR(lpp);
771 goto out;
772 }
773
774 memcpy(lp, lpp, sizeof(struct ubifs_lprops));
775
776out:
777 ubifs_release_lprops(c);
778 return err;
779}
780
781/**
782 * ubifs_fast_find_free - try to find a LEB with free space quickly.
783 * @c: the UBIFS file-system description object
784 *
785 * This function returns LEB properties for a LEB with free space or %NULL if
786 * the function is unable to find a LEB quickly.
787 */
788const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c)
789{
790 struct ubifs_lprops *lprops;
791 struct ubifs_lpt_heap *heap;
792
793 ubifs_assert(mutex_is_locked(&c->lp_mutex));
794
795 heap = &c->lpt_heap[LPROPS_FREE - 1];
796 if (heap->cnt == 0)
797 return NULL;
798
799 lprops = heap->arr[0];
800 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
801 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
802 return lprops;
803}
804
805/**
806 * ubifs_fast_find_empty - try to find an empty LEB quickly.
807 * @c: the UBIFS file-system description object
808 *
809 * This function returns LEB properties for an empty LEB or %NULL if the
810 * function is unable to find an empty LEB quickly.
811 */
812const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c)
813{
814 struct ubifs_lprops *lprops;
815
816 ubifs_assert(mutex_is_locked(&c->lp_mutex));
817
818 if (list_empty(&c->empty_list))
819 return NULL;
820
821 lprops = list_entry(c->empty_list.next, struct ubifs_lprops, list);
822 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
823 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
824 ubifs_assert(lprops->free == c->leb_size);
825 return lprops;
826}
827
828/**
829 * ubifs_fast_find_freeable - try to find a freeable LEB quickly.
830 * @c: the UBIFS file-system description object
831 *
832 * This function returns LEB properties for a freeable LEB or %NULL if the
833 * function is unable to find a freeable LEB quickly.
834 */
835const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c)
836{
837 struct ubifs_lprops *lprops;
838
839 ubifs_assert(mutex_is_locked(&c->lp_mutex));
840
841 if (list_empty(&c->freeable_list))
842 return NULL;
843
844 lprops = list_entry(c->freeable_list.next, struct ubifs_lprops, list);
845 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
846 ubifs_assert(!(lprops->flags & LPROPS_INDEX));
847 ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
848 ubifs_assert(c->freeable_cnt > 0);
849 return lprops;
850}
851
852/**
853 * ubifs_fast_find_frdi_idx - try to find a freeable index LEB quickly.
854 * @c: the UBIFS file-system description object
855 *
856 * This function returns LEB properties for a freeable index LEB or %NULL if the
857 * function is unable to find a freeable index LEB quickly.
858 */
859const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c)
860{
861 struct ubifs_lprops *lprops;
862
863 ubifs_assert(mutex_is_locked(&c->lp_mutex));
864
865 if (list_empty(&c->frdi_idx_list))
866 return NULL;
867
868 lprops = list_entry(c->frdi_idx_list.next, struct ubifs_lprops, list);
869 ubifs_assert(!(lprops->flags & LPROPS_TAKEN));
870 ubifs_assert((lprops->flags & LPROPS_INDEX));
871 ubifs_assert(lprops->free + lprops->dirty == c->leb_size);
872 return lprops;
873}
874
875#ifdef CONFIG_UBIFS_FS_DEBUG
876
877/**
878 * dbg_check_cats - check category heaps and lists.
879 * @c: UBIFS file-system description object
880 *
881 * This function returns %0 on success and a negative error code on failure.
882 */
883int dbg_check_cats(struct ubifs_info *c)
884{
885 struct ubifs_lprops *lprops;
886 struct list_head *pos;
887 int i, cat;
888
889 if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
890 return 0;
891
892 list_for_each_entry(lprops, &c->empty_list, list) {
893 if (lprops->free != c->leb_size) {
894 ubifs_err("non-empty LEB %d on empty list "
895 "(free %d dirty %d flags %d)", lprops->lnum,
896 lprops->free, lprops->dirty, lprops->flags);
897 return -EINVAL;
898 }
899 if (lprops->flags & LPROPS_TAKEN) {
900 ubifs_err("taken LEB %d on empty list "
901 "(free %d dirty %d flags %d)", lprops->lnum,
902 lprops->free, lprops->dirty, lprops->flags);
903 return -EINVAL;
904 }
905 }
906
907 i = 0;
908 list_for_each_entry(lprops, &c->freeable_list, list) {
909 if (lprops->free + lprops->dirty != c->leb_size) {
910 ubifs_err("non-freeable LEB %d on freeable list "
911 "(free %d dirty %d flags %d)", lprops->lnum,
912 lprops->free, lprops->dirty, lprops->flags);
913 return -EINVAL;
914 }
915 if (lprops->flags & LPROPS_TAKEN) {
916 ubifs_err("taken LEB %d on freeable list "
917 "(free %d dirty %d flags %d)", lprops->lnum,
918 lprops->free, lprops->dirty, lprops->flags);
919 return -EINVAL;
920 }
921 i += 1;
922 }
923 if (i != c->freeable_cnt) {
924 ubifs_err("freeable list count %d expected %d", i,
925 c->freeable_cnt);
926 return -EINVAL;
927 }
928
929 i = 0;
930 list_for_each(pos, &c->idx_gc)
931 i += 1;
932 if (i != c->idx_gc_cnt) {
933 ubifs_err("idx_gc list count %d expected %d", i,
934 c->idx_gc_cnt);
935 return -EINVAL;
936 }
937
938 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
939 if (lprops->free + lprops->dirty != c->leb_size) {
940 ubifs_err("non-freeable LEB %d on frdi_idx list "
941 "(free %d dirty %d flags %d)", lprops->lnum,
942 lprops->free, lprops->dirty, lprops->flags);
943 return -EINVAL;
944 }
945 if (lprops->flags & LPROPS_TAKEN) {
946 ubifs_err("taken LEB %d on frdi_idx list "
947 "(free %d dirty %d flags %d)", lprops->lnum,
948 lprops->free, lprops->dirty, lprops->flags);
949 return -EINVAL;
950 }
951 if (!(lprops->flags & LPROPS_INDEX)) {
952 ubifs_err("non-index LEB %d on frdi_idx list "
953 "(free %d dirty %d flags %d)", lprops->lnum,
954 lprops->free, lprops->dirty, lprops->flags);
955 return -EINVAL;
956 }
957 }
958
959 for (cat = 1; cat <= LPROPS_HEAP_CNT; cat++) {
960 struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
961
962 for (i = 0; i < heap->cnt; i++) {
963 lprops = heap->arr[i];
964 if (!lprops) {
965 ubifs_err("null ptr in LPT heap cat %d", cat);
966 return -EINVAL;
967 }
968 if (lprops->hpos != i) {
969 ubifs_err("bad ptr in LPT heap cat %d", cat);
970 return -EINVAL;
971 }
972 if (lprops->flags & LPROPS_TAKEN) {
973 ubifs_err("taken LEB in LPT heap cat %d", cat);
974 return -EINVAL;
975 }
976 }
977 }
978
979 return 0;
980}
981
982void dbg_check_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, int cat,
983 int add_pos)
984{
985 int i = 0, j, err = 0;
986
987 if (!(ubifs_chk_flags & (UBIFS_CHK_GEN | UBIFS_CHK_LPROPS)))
988 return;
989
990 for (i = 0; i < heap->cnt; i++) {
991 struct ubifs_lprops *lprops = heap->arr[i];
992 struct ubifs_lprops *lp;
993
994 if (i != add_pos)
995 if ((lprops->flags & LPROPS_CAT_MASK) != cat) {
996 err = 1;
997 goto out;
998 }
999 if (lprops->hpos != i) {
1000 err = 2;
1001 goto out;
1002 }
1003 lp = ubifs_lpt_lookup(c, lprops->lnum);
1004 if (IS_ERR(lp)) {
1005 err = 3;
1006 goto out;
1007 }
1008 if (lprops != lp) {
1009 dbg_msg("lprops %zx lp %zx lprops->lnum %d lp->lnum %d",
1010 (size_t)lprops, (size_t)lp, lprops->lnum,
1011 lp->lnum);
1012 err = 4;
1013 goto out;
1014 }
1015 for (j = 0; j < i; j++) {
1016 lp = heap->arr[j];
1017 if (lp == lprops) {
1018 err = 5;
1019 goto out;
1020 }
1021 if (lp->lnum == lprops->lnum) {
1022 err = 6;
1023 goto out;
1024 }
1025 }
1026 }
1027out:
1028 if (err) {
1029 dbg_msg("failed cat %d hpos %d err %d", cat, i, err);
1030 dbg_dump_stack();
1031 dbg_dump_heap(c, heap, cat);
1032 }
1033}
1034
1035/**
1036 * struct scan_check_data - data provided to scan callback function.
1037 * @lst: LEB properties statistics
1038 * @err: error code
1039 */
1040struct scan_check_data {
1041 struct ubifs_lp_stats lst;
1042 int err;
1043};
1044
1045/**
1046 * scan_check_cb - scan callback.
1047 * @c: the UBIFS file-system description object
1048 * @lp: LEB properties to scan
1049 * @in_tree: whether the LEB properties are in main memory
1050 * @data: information passed to and from the caller of the scan
1051 *
1052 * This function returns a code that indicates whether the scan should continue
1053 * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree
1054 * in main memory (%LPT_SCAN_ADD), or whether the scan should stop
1055 * (%LPT_SCAN_STOP).
1056 */
1057static int scan_check_cb(struct ubifs_info *c,
1058 const struct ubifs_lprops *lp, int in_tree,
1059 struct scan_check_data *data)
1060{
1061 struct ubifs_scan_leb *sleb;
1062 struct ubifs_scan_node *snod;
1063 struct ubifs_lp_stats *lst = &data->lst;
1064 int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty;
1065
1066 cat = lp->flags & LPROPS_CAT_MASK;
1067 if (cat != LPROPS_UNCAT) {
1068 cat = ubifs_categorize_lprops(c, lp);
1069 if (cat != (lp->flags & LPROPS_CAT_MASK)) {
1070 ubifs_err("bad LEB category %d expected %d",
1071 (lp->flags & LPROPS_CAT_MASK), cat);
1072 goto out;
1073 }
1074 }
1075
1076 /* Check lp is on its category list (if it has one) */
1077 if (in_tree) {
1078 struct list_head *list = NULL;
1079
1080 switch (cat) {
1081 case LPROPS_EMPTY:
1082 list = &c->empty_list;
1083 break;
1084 case LPROPS_FREEABLE:
1085 list = &c->freeable_list;
1086 break;
1087 case LPROPS_FRDI_IDX:
1088 list = &c->frdi_idx_list;
1089 break;
1090 case LPROPS_UNCAT:
1091 list = &c->uncat_list;
1092 break;
1093 }
1094 if (list) {
1095 struct ubifs_lprops *lprops;
1096 int found = 0;
1097
1098 list_for_each_entry(lprops, list, list) {
1099 if (lprops == lp) {
1100 found = 1;
1101 break;
1102 }
1103 }
1104 if (!found) {
1105 ubifs_err("bad LPT list (category %d)", cat);
1106 goto out;
1107 }
1108 }
1109 }
1110
1111 /* Check lp is on its category heap (if it has one) */
1112 if (in_tree && cat > 0 && cat <= LPROPS_HEAP_CNT) {
1113 struct ubifs_lpt_heap *heap = &c->lpt_heap[cat - 1];
1114
1115 if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) ||
1116 lp != heap->arr[lp->hpos]) {
1117 ubifs_err("bad LPT heap (category %d)", cat);
1118 goto out;
1119 }
1120 }
1121
1122 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
1123 if (IS_ERR(sleb)) {
1124 /*
1125 * After an unclean unmount, empty and freeable LEBs
1126 * may contain garbage.
1127 */
1128 if (lp->free == c->leb_size) {
1129 ubifs_err("scan errors were in empty LEB "
1130 "- continuing checking");
1131 lst->empty_lebs += 1;
1132 lst->total_free += c->leb_size;
1133 lst->total_dark += calc_dark(c, c->leb_size);
1134 return LPT_SCAN_CONTINUE;
1135 }
1136
1137 if (lp->free + lp->dirty == c->leb_size &&
1138 !(lp->flags & LPROPS_INDEX)) {
1139 ubifs_err("scan errors were in freeable LEB "
1140 "- continuing checking");
1141 lst->total_free += lp->free;
1142 lst->total_dirty += lp->dirty;
1143 lst->total_dark += calc_dark(c, c->leb_size);
1144 return LPT_SCAN_CONTINUE;
1145 }
1146 data->err = PTR_ERR(sleb);
1147 return LPT_SCAN_STOP;
1148 }
1149
1150 is_idx = -1;
1151 list_for_each_entry(snod, &sleb->nodes, list) {
1152 int found, level = 0;
1153
1154 cond_resched();
1155
1156 if (is_idx == -1)
1157 is_idx = (snod->type == UBIFS_IDX_NODE) ? 1 : 0;
1158
1159 if (is_idx && snod->type != UBIFS_IDX_NODE) {
1160 ubifs_err("indexing node in data LEB %d:%d",
1161 lnum, snod->offs);
1162 goto out_destroy;
1163 }
1164
1165 if (snod->type == UBIFS_IDX_NODE) {
1166 struct ubifs_idx_node *idx = snod->node;
1167
1168 key_read(c, ubifs_idx_key(c, idx), &snod->key);
1169 level = le16_to_cpu(idx->level);
1170 }
1171
1172 found = ubifs_tnc_has_node(c, &snod->key, level, lnum,
1173 snod->offs, is_idx);
1174 if (found) {
1175 if (found < 0)
1176 goto out_destroy;
1177 used += ALIGN(snod->len, 8);
1178 }
1179 }
1180
1181 free = c->leb_size - sleb->endpt;
1182 dirty = sleb->endpt - used;
1183
1184 if (free > c->leb_size || free < 0 || dirty > c->leb_size ||
1185 dirty < 0) {
1186 ubifs_err("bad calculated accounting for LEB %d: "
1187 "free %d, dirty %d", lnum, free, dirty);
1188 goto out_destroy;
1189 }
1190
1191 if (lp->free + lp->dirty == c->leb_size &&
1192 free + dirty == c->leb_size)
1193 if ((is_idx && !(lp->flags & LPROPS_INDEX)) ||
1194 (!is_idx && free == c->leb_size) ||
1195 lp->free == c->leb_size) {
1196 /*
1197 * Empty or freeable LEBs could contain index
1198 * nodes from an uncompleted commit due to an
1199 * unclean unmount. Or they could be empty for
1200 * the same reason. Or it may simply not have been
1201 * unmapped.
1202 */
1203 free = lp->free;
1204 dirty = lp->dirty;
1205 is_idx = 0;
1206 }
1207
1208 if (is_idx && lp->free + lp->dirty == free + dirty &&
1209 lnum != c->ihead_lnum) {
1210 /*
1211 * After an unclean unmount, an index LEB could have a different
1212 * amount of free space than the value recorded by lprops. That
1213 * is because the in-the-gaps method may use free space or
1214 * create free space (as a side-effect of using ubi_leb_change
1215 * and not writing the whole LEB). The incorrect free space
1216 * value is not a problem because the index is only ever
1217 * allocated empty LEBs, so there will never be an attempt to
1218 * write to the free space at the end of an index LEB - except
1219 * by the in-the-gaps method for which it is not a problem.
1220 */
1221 free = lp->free;
1222 dirty = lp->dirty;
1223 }
1224
1225 if (lp->free != free || lp->dirty != dirty)
1226 goto out_print;
1227
1228 if (is_idx && !(lp->flags & LPROPS_INDEX)) {
1229 if (free == c->leb_size)
1230 /* Free but not unmapped LEB, it's fine */
1231 is_idx = 0;
1232 else {
1233 ubifs_err("indexing node without indexing "
1234 "flag");
1235 goto out_print;
1236 }
1237 }
1238
1239 if (!is_idx && (lp->flags & LPROPS_INDEX)) {
1240 ubifs_err("data node with indexing flag");
1241 goto out_print;
1242 }
1243
1244 if (free == c->leb_size)
1245 lst->empty_lebs += 1;
1246
1247 if (is_idx)
1248 lst->idx_lebs += 1;
1249
1250 if (!(lp->flags & LPROPS_INDEX))
1251 lst->total_used += c->leb_size - free - dirty;
1252 lst->total_free += free;
1253 lst->total_dirty += dirty;
1254
1255 if (!(lp->flags & LPROPS_INDEX)) {
1256 int spc = free + dirty;
1257
1258 if (spc < c->dead_wm)
1259 lst->total_dead += spc;
1260 else
1261 lst->total_dark += calc_dark(c, spc);
1262 }
1263
1264 ubifs_scan_destroy(sleb);
1265
1266 return LPT_SCAN_CONTINUE;
1267
1268out_print:
1269 ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, "
1270 "should be free %d, dirty %d",
1271 lnum, lp->free, lp->dirty, lp->flags, free, dirty);
1272 dbg_dump_leb(c, lnum);
1273out_destroy:
1274 ubifs_scan_destroy(sleb);
1275out:
1276 data->err = -EINVAL;
1277 return LPT_SCAN_STOP;
1278}
1279
1280/**
1281 * dbg_check_lprops - check all LEB properties.
1282 * @c: UBIFS file-system description object
1283 *
1284 * This function checks all LEB properties and makes sure they are all correct.
1285 * It returns zero if everything is fine, %-EINVAL if there is an inconsistency
1286 * and other negative error codes in case of other errors. This function is
1287 * called while the file system is locked (because of commit start), so no
1288 * additional locking is required. Note that locking the LPT mutex would cause
1289 * a circular lock dependency with the TNC mutex.
1290 */
1291int dbg_check_lprops(struct ubifs_info *c)
1292{
1293 int i, err;
1294 struct scan_check_data data;
1295 struct ubifs_lp_stats *lst = &data.lst;
1296
1297 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1298 return 0;
1299
1300 /*
1301 * As we are going to scan the media, the write buffers have to be
1302 * synchronized.
1303 */
1304 for (i = 0; i < c->jhead_cnt; i++) {
1305 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
1306 if (err)
1307 return err;
1308 }
1309
1310 memset(lst, 0, sizeof(struct ubifs_lp_stats));
1311
1312 data.err = 0;
1313 err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1,
1314 (ubifs_lpt_scan_callback)scan_check_cb,
1315 &data);
1316 if (err && err != -ENOSPC)
1317 goto out;
1318 if (data.err) {
1319 err = data.err;
1320 goto out;
1321 }
1322
1323 if (lst->empty_lebs != c->lst.empty_lebs ||
1324 lst->idx_lebs != c->lst.idx_lebs ||
1325 lst->total_free != c->lst.total_free ||
1326 lst->total_dirty != c->lst.total_dirty ||
1327 lst->total_used != c->lst.total_used) {
1328 ubifs_err("bad overall accounting");
1329 ubifs_err("calculated: empty_lebs %d, idx_lebs %d, "
1330 "total_free %lld, total_dirty %lld, total_used %lld",
1331 lst->empty_lebs, lst->idx_lebs, lst->total_free,
1332 lst->total_dirty, lst->total_used);
1333 ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, "
1334 "total_free %lld, total_dirty %lld, total_used %lld",
1335 c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free,
1336 c->lst.total_dirty, c->lst.total_used);
1337 err = -EINVAL;
1338 goto out;
1339 }
1340
1341 if (lst->total_dead != c->lst.total_dead ||
1342 lst->total_dark != c->lst.total_dark) {
1343 ubifs_err("bad dead/dark space accounting");
1344 ubifs_err("calculated: total_dead %lld, total_dark %lld",
1345 lst->total_dead, lst->total_dark);
1346 ubifs_err("read from lprops: total_dead %lld, total_dark %lld",
1347 c->lst.total_dead, c->lst.total_dark);
1348 err = -EINVAL;
1349 goto out;
1350 }
1351
1352 err = dbg_check_cats(c);
1353out:
1354 return err;
1355}
1356
1357#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
new file mode 100644
index 00000000000..9ff2463177e
--- /dev/null
+++ b/fs/ubifs/lpt.c
@@ -0,0 +1,2243 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements the LEB properties tree (LPT) area. The LPT area
25 * contains the LEB properties tree, a table of LPT area eraseblocks (ltab), and
26 * (for the "big" model) a table of saved LEB numbers (lsave). The LPT area sits
27 * between the log and the orphan area.
28 *
29 * The LPT area is like a miniature self-contained file system. It is required
30 * that it never runs out of space, is fast to access and update, and scales
31 * logarithmically. The LEB properties tree is implemented as a wandering tree
32 * much like the TNC, and the LPT area has its own garbage collection.
33 *
34 * The LPT has two slightly different forms called the "small model" and the
35 * "big model". The small model is used when the entire LEB properties table
36 * can be written into a single eraseblock. In that case, garbage collection
37 * consists of just writing the whole table, which therefore makes all other
38 * eraseblocks reusable. In the case of the big model, dirty eraseblocks are
39 * selected for garbage collection, which consists are marking the nodes in
40 * that LEB as dirty, and then only the dirty nodes are written out. Also, in
41 * the case of the big model, a table of LEB numbers is saved so that the entire
42 * LPT does not to be scanned looking for empty eraseblocks when UBIFS is first
43 * mounted.
44 */
45
46#include <linux/crc16.h>
47#include "ubifs.h"
48
49/**
50 * do_calc_lpt_geom - calculate sizes for the LPT area.
51 * @c: the UBIFS file-system description object
52 *
53 * Calculate the sizes of LPT bit fields, nodes, and tree, based on the
54 * properties of the flash and whether LPT is "big" (c->big_lpt).
55 */
56static void do_calc_lpt_geom(struct ubifs_info *c)
57{
58 int i, n, bits, per_leb_wastage, max_pnode_cnt;
59 long long sz, tot_wastage;
60
61 n = c->main_lebs + c->max_leb_cnt - c->leb_cnt;
62 max_pnode_cnt = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT);
63
64 c->lpt_hght = 1;
65 n = UBIFS_LPT_FANOUT;
66 while (n < max_pnode_cnt) {
67 c->lpt_hght += 1;
68 n <<= UBIFS_LPT_FANOUT_SHIFT;
69 }
70
71 c->pnode_cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
72
73 n = DIV_ROUND_UP(c->pnode_cnt, UBIFS_LPT_FANOUT);
74 c->nnode_cnt = n;
75 for (i = 1; i < c->lpt_hght; i++) {
76 n = DIV_ROUND_UP(n, UBIFS_LPT_FANOUT);
77 c->nnode_cnt += n;
78 }
79
80 c->space_bits = fls(c->leb_size) - 3;
81 c->lpt_lnum_bits = fls(c->lpt_lebs);
82 c->lpt_offs_bits = fls(c->leb_size - 1);
83 c->lpt_spc_bits = fls(c->leb_size);
84
85 n = DIV_ROUND_UP(c->max_leb_cnt, UBIFS_LPT_FANOUT);
86 c->pcnt_bits = fls(n - 1);
87
88 c->lnum_bits = fls(c->max_leb_cnt - 1);
89
90 bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
91 (c->big_lpt ? c->pcnt_bits : 0) +
92 (c->space_bits * 2 + 1) * UBIFS_LPT_FANOUT;
93 c->pnode_sz = (bits + 7) / 8;
94
95 bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
96 (c->big_lpt ? c->pcnt_bits : 0) +
97 (c->lpt_lnum_bits + c->lpt_offs_bits) * UBIFS_LPT_FANOUT;
98 c->nnode_sz = (bits + 7) / 8;
99
100 bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
101 c->lpt_lebs * c->lpt_spc_bits * 2;
102 c->ltab_sz = (bits + 7) / 8;
103
104 bits = UBIFS_LPT_CRC_BITS + UBIFS_LPT_TYPE_BITS +
105 c->lnum_bits * c->lsave_cnt;
106 c->lsave_sz = (bits + 7) / 8;
107
108 /* Calculate the minimum LPT size */
109 c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz;
110 c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz;
111 c->lpt_sz += c->ltab_sz;
112 c->lpt_sz += c->lsave_sz;
113
114 /* Add wastage */
115 sz = c->lpt_sz;
116 per_leb_wastage = max_t(int, c->pnode_sz, c->nnode_sz);
117 sz += per_leb_wastage;
118 tot_wastage = per_leb_wastage;
119 while (sz > c->leb_size) {
120 sz += per_leb_wastage;
121 sz -= c->leb_size;
122 tot_wastage += per_leb_wastage;
123 }
124 tot_wastage += ALIGN(sz, c->min_io_size) - sz;
125 c->lpt_sz += tot_wastage;
126}
127
128/**
129 * ubifs_calc_lpt_geom - calculate and check sizes for the LPT area.
130 * @c: the UBIFS file-system description object
131 *
132 * This function returns %0 on success and a negative error code on failure.
133 */
134int ubifs_calc_lpt_geom(struct ubifs_info *c)
135{
136 int lebs_needed;
137 uint64_t sz;
138
139 do_calc_lpt_geom(c);
140
141 /* Verify that lpt_lebs is big enough */
142 sz = c->lpt_sz * 2; /* Must have at least 2 times the size */
143 sz += c->leb_size - 1;
144 do_div(sz, c->leb_size);
145 lebs_needed = sz;
146 if (lebs_needed > c->lpt_lebs) {
147 ubifs_err("too few LPT LEBs");
148 return -EINVAL;
149 }
150
151 /* Verify that ltab fits in a single LEB (since ltab is a single node */
152 if (c->ltab_sz > c->leb_size) {
153 ubifs_err("LPT ltab too big");
154 return -EINVAL;
155 }
156
157 c->check_lpt_free = c->big_lpt;
158
159 return 0;
160}
161
162/**
163 * calc_dflt_lpt_geom - calculate default LPT geometry.
164 * @c: the UBIFS file-system description object
165 * @main_lebs: number of main area LEBs is passed and returned here
166 * @big_lpt: whether the LPT area is "big" is returned here
167 *
168 * The size of the LPT area depends on parameters that themselves are dependent
169 * on the size of the LPT area. This function, successively recalculates the LPT
170 * area geometry until the parameters and resultant geometry are consistent.
171 *
172 * This function returns %0 on success and a negative error code on failure.
173 */
174static int calc_dflt_lpt_geom(struct ubifs_info *c, int *main_lebs,
175 int *big_lpt)
176{
177 int i, lebs_needed;
178 uint64_t sz;
179
180 /* Start by assuming the minimum number of LPT LEBs */
181 c->lpt_lebs = UBIFS_MIN_LPT_LEBS;
182 c->main_lebs = *main_lebs - c->lpt_lebs;
183 if (c->main_lebs <= 0)
184 return -EINVAL;
185
186 /* And assume we will use the small LPT model */
187 c->big_lpt = 0;
188
189 /*
190 * Calculate the geometry based on assumptions above and then see if it
191 * makes sense
192 */
193 do_calc_lpt_geom(c);
194
195 /* Small LPT model must have lpt_sz < leb_size */
196 if (c->lpt_sz > c->leb_size) {
197 /* Nope, so try again using big LPT model */
198 c->big_lpt = 1;
199 do_calc_lpt_geom(c);
200 }
201
202 /* Now check there are enough LPT LEBs */
203 for (i = 0; i < 64 ; i++) {
204 sz = c->lpt_sz * 4; /* Allow 4 times the size */
205 sz += c->leb_size - 1;
206 do_div(sz, c->leb_size);
207 lebs_needed = sz;
208 if (lebs_needed > c->lpt_lebs) {
209 /* Not enough LPT LEBs so try again with more */
210 c->lpt_lebs = lebs_needed;
211 c->main_lebs = *main_lebs - c->lpt_lebs;
212 if (c->main_lebs <= 0)
213 return -EINVAL;
214 do_calc_lpt_geom(c);
215 continue;
216 }
217 if (c->ltab_sz > c->leb_size) {
218 ubifs_err("LPT ltab too big");
219 return -EINVAL;
220 }
221 *main_lebs = c->main_lebs;
222 *big_lpt = c->big_lpt;
223 return 0;
224 }
225 return -EINVAL;
226}
227
228/**
229 * pack_bits - pack bit fields end-to-end.
230 * @addr: address at which to pack (passed and next address returned)
231 * @pos: bit position at which to pack (passed and next position returned)
232 * @val: value to pack
233 * @nrbits: number of bits of value to pack (1-32)
234 */
235static void pack_bits(uint8_t **addr, int *pos, uint32_t val, int nrbits)
236{
237 uint8_t *p = *addr;
238 int b = *pos;
239
240 ubifs_assert(nrbits > 0);
241 ubifs_assert(nrbits <= 32);
242 ubifs_assert(*pos >= 0);
243 ubifs_assert(*pos < 8);
244 ubifs_assert((val >> nrbits) == 0 || nrbits == 32);
245 if (b) {
246 *p |= ((uint8_t)val) << b;
247 nrbits += b;
248 if (nrbits > 8) {
249 *++p = (uint8_t)(val >>= (8 - b));
250 if (nrbits > 16) {
251 *++p = (uint8_t)(val >>= 8);
252 if (nrbits > 24) {
253 *++p = (uint8_t)(val >>= 8);
254 if (nrbits > 32)
255 *++p = (uint8_t)(val >>= 8);
256 }
257 }
258 }
259 } else {
260 *p = (uint8_t)val;
261 if (nrbits > 8) {
262 *++p = (uint8_t)(val >>= 8);
263 if (nrbits > 16) {
264 *++p = (uint8_t)(val >>= 8);
265 if (nrbits > 24)
266 *++p = (uint8_t)(val >>= 8);
267 }
268 }
269 }
270 b = nrbits & 7;
271 if (b == 0)
272 p++;
273 *addr = p;
274 *pos = b;
275}
276
277/**
278 * ubifs_unpack_bits - unpack bit fields.
279 * @addr: address at which to unpack (passed and next address returned)
280 * @pos: bit position at which to unpack (passed and next position returned)
281 * @nrbits: number of bits of value to unpack (1-32)
282 *
283 * This functions returns the value unpacked.
284 */
285uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits)
286{
287 const int k = 32 - nrbits;
288 uint8_t *p = *addr;
289 int b = *pos;
290 uint32_t val;
291
292 ubifs_assert(nrbits > 0);
293 ubifs_assert(nrbits <= 32);
294 ubifs_assert(*pos >= 0);
295 ubifs_assert(*pos < 8);
296 if (b) {
297 val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) |
298 ((uint32_t)p[4] << 24);
299 val <<= (8 - b);
300 val |= *p >> b;
301 nrbits += b;
302 } else
303 val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) |
304 ((uint32_t)p[3] << 24);
305 val <<= k;
306 val >>= k;
307 b = nrbits & 7;
308 p += nrbits / 8;
309 *addr = p;
310 *pos = b;
311 ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32);
312 return val;
313}
314
315/**
316 * ubifs_pack_pnode - pack all the bit fields of a pnode.
317 * @c: UBIFS file-system description object
318 * @buf: buffer into which to pack
319 * @pnode: pnode to pack
320 */
321void ubifs_pack_pnode(struct ubifs_info *c, void *buf,
322 struct ubifs_pnode *pnode)
323{
324 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
325 int i, pos = 0;
326 uint16_t crc;
327
328 pack_bits(&addr, &pos, UBIFS_LPT_PNODE, UBIFS_LPT_TYPE_BITS);
329 if (c->big_lpt)
330 pack_bits(&addr, &pos, pnode->num, c->pcnt_bits);
331 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
332 pack_bits(&addr, &pos, pnode->lprops[i].free >> 3,
333 c->space_bits);
334 pack_bits(&addr, &pos, pnode->lprops[i].dirty >> 3,
335 c->space_bits);
336 if (pnode->lprops[i].flags & LPROPS_INDEX)
337 pack_bits(&addr, &pos, 1, 1);
338 else
339 pack_bits(&addr, &pos, 0, 1);
340 }
341 crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
342 c->pnode_sz - UBIFS_LPT_CRC_BYTES);
343 addr = buf;
344 pos = 0;
345 pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
346}
347
348/**
349 * ubifs_pack_nnode - pack all the bit fields of a nnode.
350 * @c: UBIFS file-system description object
351 * @buf: buffer into which to pack
352 * @nnode: nnode to pack
353 */
354void ubifs_pack_nnode(struct ubifs_info *c, void *buf,
355 struct ubifs_nnode *nnode)
356{
357 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
358 int i, pos = 0;
359 uint16_t crc;
360
361 pack_bits(&addr, &pos, UBIFS_LPT_NNODE, UBIFS_LPT_TYPE_BITS);
362 if (c->big_lpt)
363 pack_bits(&addr, &pos, nnode->num, c->pcnt_bits);
364 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
365 int lnum = nnode->nbranch[i].lnum;
366
367 if (lnum == 0)
368 lnum = c->lpt_last + 1;
369 pack_bits(&addr, &pos, lnum - c->lpt_first, c->lpt_lnum_bits);
370 pack_bits(&addr, &pos, nnode->nbranch[i].offs,
371 c->lpt_offs_bits);
372 }
373 crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
374 c->nnode_sz - UBIFS_LPT_CRC_BYTES);
375 addr = buf;
376 pos = 0;
377 pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
378}
379
380/**
381 * ubifs_pack_ltab - pack the LPT's own lprops table.
382 * @c: UBIFS file-system description object
383 * @buf: buffer into which to pack
384 * @ltab: LPT's own lprops table to pack
385 */
386void ubifs_pack_ltab(struct ubifs_info *c, void *buf,
387 struct ubifs_lpt_lprops *ltab)
388{
389 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
390 int i, pos = 0;
391 uint16_t crc;
392
393 pack_bits(&addr, &pos, UBIFS_LPT_LTAB, UBIFS_LPT_TYPE_BITS);
394 for (i = 0; i < c->lpt_lebs; i++) {
395 pack_bits(&addr, &pos, ltab[i].free, c->lpt_spc_bits);
396 pack_bits(&addr, &pos, ltab[i].dirty, c->lpt_spc_bits);
397 }
398 crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
399 c->ltab_sz - UBIFS_LPT_CRC_BYTES);
400 addr = buf;
401 pos = 0;
402 pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
403}
404
405/**
406 * ubifs_pack_lsave - pack the LPT's save table.
407 * @c: UBIFS file-system description object
408 * @buf: buffer into which to pack
409 * @lsave: LPT's save table to pack
410 */
411void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave)
412{
413 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
414 int i, pos = 0;
415 uint16_t crc;
416
417 pack_bits(&addr, &pos, UBIFS_LPT_LSAVE, UBIFS_LPT_TYPE_BITS);
418 for (i = 0; i < c->lsave_cnt; i++)
419 pack_bits(&addr, &pos, lsave[i], c->lnum_bits);
420 crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
421 c->lsave_sz - UBIFS_LPT_CRC_BYTES);
422 addr = buf;
423 pos = 0;
424 pack_bits(&addr, &pos, crc, UBIFS_LPT_CRC_BITS);
425}
426
427/**
428 * ubifs_add_lpt_dirt - add dirty space to LPT LEB properties.
429 * @c: UBIFS file-system description object
430 * @lnum: LEB number to which to add dirty space
431 * @dirty: amount of dirty space to add
432 */
433void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty)
434{
435 if (!dirty || !lnum)
436 return;
437 dbg_lp("LEB %d add %d to %d",
438 lnum, dirty, c->ltab[lnum - c->lpt_first].dirty);
439 ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
440 c->ltab[lnum - c->lpt_first].dirty += dirty;
441}
442
443/**
444 * set_ltab - set LPT LEB properties.
445 * @c: UBIFS file-system description object
446 * @lnum: LEB number
447 * @free: amount of free space
448 * @dirty: amount of dirty space
449 */
450static void set_ltab(struct ubifs_info *c, int lnum, int free, int dirty)
451{
452 dbg_lp("LEB %d free %d dirty %d to %d %d",
453 lnum, c->ltab[lnum - c->lpt_first].free,
454 c->ltab[lnum - c->lpt_first].dirty, free, dirty);
455 ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
456 c->ltab[lnum - c->lpt_first].free = free;
457 c->ltab[lnum - c->lpt_first].dirty = dirty;
458}
459
460/**
461 * ubifs_add_nnode_dirt - add dirty space to LPT LEB properties.
462 * @c: UBIFS file-system description object
463 * @nnode: nnode for which to add dirt
464 */
465void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode)
466{
467 struct ubifs_nnode *np = nnode->parent;
468
469 if (np)
470 ubifs_add_lpt_dirt(c, np->nbranch[nnode->iip].lnum,
471 c->nnode_sz);
472 else {
473 ubifs_add_lpt_dirt(c, c->lpt_lnum, c->nnode_sz);
474 if (!(c->lpt_drty_flgs & LTAB_DIRTY)) {
475 c->lpt_drty_flgs |= LTAB_DIRTY;
476 ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz);
477 }
478 }
479}
480
481/**
482 * add_pnode_dirt - add dirty space to LPT LEB properties.
483 * @c: UBIFS file-system description object
484 * @pnode: pnode for which to add dirt
485 */
486static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode)
487{
488 ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum,
489 c->pnode_sz);
490}
491
492/**
493 * calc_nnode_num - calculate nnode number.
494 * @row: the row in the tree (root is zero)
495 * @col: the column in the row (leftmost is zero)
496 *
497 * The nnode number is a number that uniquely identifies a nnode and can be used
498 * easily to traverse the tree from the root to that nnode.
499 *
500 * This function calculates and returns the nnode number for the nnode at @row
501 * and @col.
502 */
503static int calc_nnode_num(int row, int col)
504{
505 int num, bits;
506
507 num = 1;
508 while (row--) {
509 bits = (col & (UBIFS_LPT_FANOUT - 1));
510 col >>= UBIFS_LPT_FANOUT_SHIFT;
511 num <<= UBIFS_LPT_FANOUT_SHIFT;
512 num |= bits;
513 }
514 return num;
515}
516
517/**
518 * calc_nnode_num_from_parent - calculate nnode number.
519 * @c: UBIFS file-system description object
520 * @parent: parent nnode
521 * @iip: index in parent
522 *
523 * The nnode number is a number that uniquely identifies a nnode and can be used
524 * easily to traverse the tree from the root to that nnode.
525 *
526 * This function calculates and returns the nnode number based on the parent's
527 * nnode number and the index in parent.
528 */
529static int calc_nnode_num_from_parent(struct ubifs_info *c,
530 struct ubifs_nnode *parent, int iip)
531{
532 int num, shft;
533
534 if (!parent)
535 return 1;
536 shft = (c->lpt_hght - parent->level) * UBIFS_LPT_FANOUT_SHIFT;
537 num = parent->num ^ (1 << shft);
538 num |= (UBIFS_LPT_FANOUT + iip) << shft;
539 return num;
540}
541
542/**
543 * calc_pnode_num_from_parent - calculate pnode number.
544 * @c: UBIFS file-system description object
545 * @parent: parent nnode
546 * @iip: index in parent
547 *
548 * The pnode number is a number that uniquely identifies a pnode and can be used
549 * easily to traverse the tree from the root to that pnode.
550 *
551 * This function calculates and returns the pnode number based on the parent's
552 * nnode number and the index in parent.
553 */
554static int calc_pnode_num_from_parent(struct ubifs_info *c,
555 struct ubifs_nnode *parent, int iip)
556{
557 int i, n = c->lpt_hght - 1, pnum = parent->num, num = 0;
558
559 for (i = 0; i < n; i++) {
560 num <<= UBIFS_LPT_FANOUT_SHIFT;
561 num |= pnum & (UBIFS_LPT_FANOUT - 1);
562 pnum >>= UBIFS_LPT_FANOUT_SHIFT;
563 }
564 num <<= UBIFS_LPT_FANOUT_SHIFT;
565 num |= iip;
566 return num;
567}
568
569/**
570 * ubifs_create_dflt_lpt - create default LPT.
571 * @c: UBIFS file-system description object
572 * @main_lebs: number of main area LEBs is passed and returned here
573 * @lpt_first: LEB number of first LPT LEB
574 * @lpt_lebs: number of LEBs for LPT is passed and returned here
575 * @big_lpt: use big LPT model is passed and returned here
576 *
577 * This function returns %0 on success and a negative error code on failure.
578 */
579int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
580 int *lpt_lebs, int *big_lpt)
581{
582 int lnum, err = 0, node_sz, iopos, i, j, cnt, len, alen, row;
583 int blnum, boffs, bsz, bcnt;
584 struct ubifs_pnode *pnode = NULL;
585 struct ubifs_nnode *nnode = NULL;
586 void *buf = NULL, *p;
587 struct ubifs_lpt_lprops *ltab = NULL;
588 int *lsave = NULL;
589
590 err = calc_dflt_lpt_geom(c, main_lebs, big_lpt);
591 if (err)
592 return err;
593 *lpt_lebs = c->lpt_lebs;
594
595 /* Needed by 'ubifs_pack_nnode()' and 'set_ltab()' */
596 c->lpt_first = lpt_first;
597 /* Needed by 'set_ltab()' */
598 c->lpt_last = lpt_first + c->lpt_lebs - 1;
599 /* Needed by 'ubifs_pack_lsave()' */
600 c->main_first = c->leb_cnt - *main_lebs;
601
602 lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_KERNEL);
603 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_KERNEL);
604 nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_KERNEL);
605 buf = vmalloc(c->leb_size);
606 ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
607 if (!pnode || !nnode || !buf || !ltab || !lsave) {
608 err = -ENOMEM;
609 goto out;
610 }
611
612 ubifs_assert(!c->ltab);
613 c->ltab = ltab; /* Needed by set_ltab */
614
615 /* Initialize LPT's own lprops */
616 for (i = 0; i < c->lpt_lebs; i++) {
617 ltab[i].free = c->leb_size;
618 ltab[i].dirty = 0;
619 ltab[i].tgc = 0;
620 ltab[i].cmt = 0;
621 }
622
623 lnum = lpt_first;
624 p = buf;
625 /* Number of leaf nodes (pnodes) */
626 cnt = c->pnode_cnt;
627
628 /*
629 * The first pnode contains the LEB properties for the LEBs that contain
630 * the root inode node and the root index node of the index tree.
631 */
632 node_sz = ALIGN(ubifs_idx_node_sz(c, 1), 8);
633 iopos = ALIGN(node_sz, c->min_io_size);
634 pnode->lprops[0].free = c->leb_size - iopos;
635 pnode->lprops[0].dirty = iopos - node_sz;
636 pnode->lprops[0].flags = LPROPS_INDEX;
637
638 node_sz = UBIFS_INO_NODE_SZ;
639 iopos = ALIGN(node_sz, c->min_io_size);
640 pnode->lprops[1].free = c->leb_size - iopos;
641 pnode->lprops[1].dirty = iopos - node_sz;
642
643 for (i = 2; i < UBIFS_LPT_FANOUT; i++)
644 pnode->lprops[i].free = c->leb_size;
645
646 /* Add first pnode */
647 ubifs_pack_pnode(c, p, pnode);
648 p += c->pnode_sz;
649 len = c->pnode_sz;
650 pnode->num += 1;
651
652 /* Reset pnode values for remaining pnodes */
653 pnode->lprops[0].free = c->leb_size;
654 pnode->lprops[0].dirty = 0;
655 pnode->lprops[0].flags = 0;
656
657 pnode->lprops[1].free = c->leb_size;
658 pnode->lprops[1].dirty = 0;
659
660 /*
661 * To calculate the internal node branches, we keep information about
662 * the level below.
663 */
664 blnum = lnum; /* LEB number of level below */
665 boffs = 0; /* Offset of level below */
666 bcnt = cnt; /* Number of nodes in level below */
667 bsz = c->pnode_sz; /* Size of nodes in level below */
668
669 /* Add all remaining pnodes */
670 for (i = 1; i < cnt; i++) {
671 if (len + c->pnode_sz > c->leb_size) {
672 alen = ALIGN(len, c->min_io_size);
673 set_ltab(c, lnum, c->leb_size - alen, alen - len);
674 memset(p, 0xff, alen - len);
675 err = ubi_leb_change(c->ubi, lnum++, buf, alen,
676 UBI_SHORTTERM);
677 if (err)
678 goto out;
679 p = buf;
680 len = 0;
681 }
682 ubifs_pack_pnode(c, p, pnode);
683 p += c->pnode_sz;
684 len += c->pnode_sz;
685 /*
686 * pnodes are simply numbered left to right starting at zero,
687 * which means the pnode number can be used easily to traverse
688 * down the tree to the corresponding pnode.
689 */
690 pnode->num += 1;
691 }
692
693 row = 0;
694 for (i = UBIFS_LPT_FANOUT; cnt > i; i <<= UBIFS_LPT_FANOUT_SHIFT)
695 row += 1;
696 /* Add all nnodes, one level at a time */
697 while (1) {
698 /* Number of internal nodes (nnodes) at next level */
699 cnt = DIV_ROUND_UP(cnt, UBIFS_LPT_FANOUT);
700 for (i = 0; i < cnt; i++) {
701 if (len + c->nnode_sz > c->leb_size) {
702 alen = ALIGN(len, c->min_io_size);
703 set_ltab(c, lnum, c->leb_size - alen,
704 alen - len);
705 memset(p, 0xff, alen - len);
706 err = ubi_leb_change(c->ubi, lnum++, buf, alen,
707 UBI_SHORTTERM);
708 if (err)
709 goto out;
710 p = buf;
711 len = 0;
712 }
713 /* Only 1 nnode at this level, so it is the root */
714 if (cnt == 1) {
715 c->lpt_lnum = lnum;
716 c->lpt_offs = len;
717 }
718 /* Set branches to the level below */
719 for (j = 0; j < UBIFS_LPT_FANOUT; j++) {
720 if (bcnt) {
721 if (boffs + bsz > c->leb_size) {
722 blnum += 1;
723 boffs = 0;
724 }
725 nnode->nbranch[j].lnum = blnum;
726 nnode->nbranch[j].offs = boffs;
727 boffs += bsz;
728 bcnt--;
729 } else {
730 nnode->nbranch[j].lnum = 0;
731 nnode->nbranch[j].offs = 0;
732 }
733 }
734 nnode->num = calc_nnode_num(row, i);
735 ubifs_pack_nnode(c, p, nnode);
736 p += c->nnode_sz;
737 len += c->nnode_sz;
738 }
739 /* Only 1 nnode at this level, so it is the root */
740 if (cnt == 1)
741 break;
742 /* Update the information about the level below */
743 bcnt = cnt;
744 bsz = c->nnode_sz;
745 row -= 1;
746 }
747
748 if (*big_lpt) {
749 /* Need to add LPT's save table */
750 if (len + c->lsave_sz > c->leb_size) {
751 alen = ALIGN(len, c->min_io_size);
752 set_ltab(c, lnum, c->leb_size - alen, alen - len);
753 memset(p, 0xff, alen - len);
754 err = ubi_leb_change(c->ubi, lnum++, buf, alen,
755 UBI_SHORTTERM);
756 if (err)
757 goto out;
758 p = buf;
759 len = 0;
760 }
761
762 c->lsave_lnum = lnum;
763 c->lsave_offs = len;
764
765 for (i = 0; i < c->lsave_cnt && i < *main_lebs; i++)
766 lsave[i] = c->main_first + i;
767 for (; i < c->lsave_cnt; i++)
768 lsave[i] = c->main_first;
769
770 ubifs_pack_lsave(c, p, lsave);
771 p += c->lsave_sz;
772 len += c->lsave_sz;
773 }
774
775 /* Need to add LPT's own LEB properties table */
776 if (len + c->ltab_sz > c->leb_size) {
777 alen = ALIGN(len, c->min_io_size);
778 set_ltab(c, lnum, c->leb_size - alen, alen - len);
779 memset(p, 0xff, alen - len);
780 err = ubi_leb_change(c->ubi, lnum++, buf, alen, UBI_SHORTTERM);
781 if (err)
782 goto out;
783 p = buf;
784 len = 0;
785 }
786
787 c->ltab_lnum = lnum;
788 c->ltab_offs = len;
789
790 /* Update ltab before packing it */
791 len += c->ltab_sz;
792 alen = ALIGN(len, c->min_io_size);
793 set_ltab(c, lnum, c->leb_size - alen, alen - len);
794
795 ubifs_pack_ltab(c, p, ltab);
796 p += c->ltab_sz;
797
798 /* Write remaining buffer */
799 memset(p, 0xff, alen - len);
800 err = ubi_leb_change(c->ubi, lnum, buf, alen, UBI_SHORTTERM);
801 if (err)
802 goto out;
803
804 c->nhead_lnum = lnum;
805 c->nhead_offs = ALIGN(len, c->min_io_size);
806
807 dbg_lp("space_bits %d", c->space_bits);
808 dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits);
809 dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits);
810 dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits);
811 dbg_lp("pcnt_bits %d", c->pcnt_bits);
812 dbg_lp("lnum_bits %d", c->lnum_bits);
813 dbg_lp("pnode_sz %d", c->pnode_sz);
814 dbg_lp("nnode_sz %d", c->nnode_sz);
815 dbg_lp("ltab_sz %d", c->ltab_sz);
816 dbg_lp("lsave_sz %d", c->lsave_sz);
817 dbg_lp("lsave_cnt %d", c->lsave_cnt);
818 dbg_lp("lpt_hght %d", c->lpt_hght);
819 dbg_lp("big_lpt %d", c->big_lpt);
820 dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
821 dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
822 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
823 if (c->big_lpt)
824 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
825out:
826 c->ltab = NULL;
827 kfree(lsave);
828 vfree(ltab);
829 vfree(buf);
830 kfree(nnode);
831 kfree(pnode);
832 return err;
833}
834
835/**
836 * update_cats - add LEB properties of a pnode to LEB category lists and heaps.
837 * @c: UBIFS file-system description object
838 * @pnode: pnode
839 *
840 * When a pnode is loaded into memory, the LEB properties it contains are added,
841 * by this function, to the LEB category lists and heaps.
842 */
843static void update_cats(struct ubifs_info *c, struct ubifs_pnode *pnode)
844{
845 int i;
846
847 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
848 int cat = pnode->lprops[i].flags & LPROPS_CAT_MASK;
849 int lnum = pnode->lprops[i].lnum;
850
851 if (!lnum)
852 return;
853 ubifs_add_to_cat(c, &pnode->lprops[i], cat);
854 }
855}
856
857/**
858 * replace_cats - add LEB properties of a pnode to LEB category lists and heaps.
859 * @c: UBIFS file-system description object
860 * @old_pnode: pnode copied
861 * @new_pnode: pnode copy
862 *
863 * During commit it is sometimes necessary to copy a pnode
864 * (see dirty_cow_pnode). When that happens, references in
865 * category lists and heaps must be replaced. This function does that.
866 */
867static void replace_cats(struct ubifs_info *c, struct ubifs_pnode *old_pnode,
868 struct ubifs_pnode *new_pnode)
869{
870 int i;
871
872 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
873 if (!new_pnode->lprops[i].lnum)
874 return;
875 ubifs_replace_cat(c, &old_pnode->lprops[i],
876 &new_pnode->lprops[i]);
877 }
878}
879
880/**
881 * check_lpt_crc - check LPT node crc is correct.
882 * @c: UBIFS file-system description object
883 * @buf: buffer containing node
884 * @len: length of node
885 *
886 * This function returns %0 on success and a negative error code on failure.
887 */
888static int check_lpt_crc(void *buf, int len)
889{
890 int pos = 0;
891 uint8_t *addr = buf;
892 uint16_t crc, calc_crc;
893
894 crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS);
895 calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
896 len - UBIFS_LPT_CRC_BYTES);
897 if (crc != calc_crc) {
898 ubifs_err("invalid crc in LPT node: crc %hx calc %hx", crc,
899 calc_crc);
900 dbg_dump_stack();
901 return -EINVAL;
902 }
903 return 0;
904}
905
906/**
907 * check_lpt_type - check LPT node type is correct.
908 * @c: UBIFS file-system description object
909 * @addr: address of type bit field is passed and returned updated here
910 * @pos: position of type bit field is passed and returned updated here
911 * @type: expected type
912 *
913 * This function returns %0 on success and a negative error code on failure.
914 */
915static int check_lpt_type(uint8_t **addr, int *pos, int type)
916{
917 int node_type;
918
919 node_type = ubifs_unpack_bits(addr, pos, UBIFS_LPT_TYPE_BITS);
920 if (node_type != type) {
921 ubifs_err("invalid type (%d) in LPT node type %d", node_type,
922 type);
923 dbg_dump_stack();
924 return -EINVAL;
925 }
926 return 0;
927}
928
929/**
930 * unpack_pnode - unpack a pnode.
931 * @c: UBIFS file-system description object
932 * @buf: buffer containing packed pnode to unpack
933 * @pnode: pnode structure to fill
934 *
935 * This function returns %0 on success and a negative error code on failure.
936 */
937static int unpack_pnode(struct ubifs_info *c, void *buf,
938 struct ubifs_pnode *pnode)
939{
940 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
941 int i, pos = 0, err;
942
943 err = check_lpt_type(&addr, &pos, UBIFS_LPT_PNODE);
944 if (err)
945 return err;
946 if (c->big_lpt)
947 pnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
948 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
949 struct ubifs_lprops * const lprops = &pnode->lprops[i];
950
951 lprops->free = ubifs_unpack_bits(&addr, &pos, c->space_bits);
952 lprops->free <<= 3;
953 lprops->dirty = ubifs_unpack_bits(&addr, &pos, c->space_bits);
954 lprops->dirty <<= 3;
955
956 if (ubifs_unpack_bits(&addr, &pos, 1))
957 lprops->flags = LPROPS_INDEX;
958 else
959 lprops->flags = 0;
960 lprops->flags |= ubifs_categorize_lprops(c, lprops);
961 }
962 err = check_lpt_crc(buf, c->pnode_sz);
963 return err;
964}
965
966/**
967 * unpack_nnode - unpack a nnode.
968 * @c: UBIFS file-system description object
969 * @buf: buffer containing packed nnode to unpack
970 * @nnode: nnode structure to fill
971 *
972 * This function returns %0 on success and a negative error code on failure.
973 */
974static int unpack_nnode(struct ubifs_info *c, void *buf,
975 struct ubifs_nnode *nnode)
976{
977 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
978 int i, pos = 0, err;
979
980 err = check_lpt_type(&addr, &pos, UBIFS_LPT_NNODE);
981 if (err)
982 return err;
983 if (c->big_lpt)
984 nnode->num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
985 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
986 int lnum;
987
988 lnum = ubifs_unpack_bits(&addr, &pos, c->lpt_lnum_bits) +
989 c->lpt_first;
990 if (lnum == c->lpt_last + 1)
991 lnum = 0;
992 nnode->nbranch[i].lnum = lnum;
993 nnode->nbranch[i].offs = ubifs_unpack_bits(&addr, &pos,
994 c->lpt_offs_bits);
995 }
996 err = check_lpt_crc(buf, c->nnode_sz);
997 return err;
998}
999
1000/**
1001 * unpack_ltab - unpack the LPT's own lprops table.
1002 * @c: UBIFS file-system description object
1003 * @buf: buffer from which to unpack
1004 *
1005 * This function returns %0 on success and a negative error code on failure.
1006 */
1007static int unpack_ltab(struct ubifs_info *c, void *buf)
1008{
1009 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1010 int i, pos = 0, err;
1011
1012 err = check_lpt_type(&addr, &pos, UBIFS_LPT_LTAB);
1013 if (err)
1014 return err;
1015 for (i = 0; i < c->lpt_lebs; i++) {
1016 int free = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits);
1017 int dirty = ubifs_unpack_bits(&addr, &pos, c->lpt_spc_bits);
1018
1019 if (free < 0 || free > c->leb_size || dirty < 0 ||
1020 dirty > c->leb_size || free + dirty > c->leb_size)
1021 return -EINVAL;
1022
1023 c->ltab[i].free = free;
1024 c->ltab[i].dirty = dirty;
1025 c->ltab[i].tgc = 0;
1026 c->ltab[i].cmt = 0;
1027 }
1028 err = check_lpt_crc(buf, c->ltab_sz);
1029 return err;
1030}
1031
1032/**
1033 * unpack_lsave - unpack the LPT's save table.
1034 * @c: UBIFS file-system description object
1035 * @buf: buffer from which to unpack
1036 *
1037 * This function returns %0 on success and a negative error code on failure.
1038 */
1039static int unpack_lsave(struct ubifs_info *c, void *buf)
1040{
1041 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1042 int i, pos = 0, err;
1043
1044 err = check_lpt_type(&addr, &pos, UBIFS_LPT_LSAVE);
1045 if (err)
1046 return err;
1047 for (i = 0; i < c->lsave_cnt; i++) {
1048 int lnum = ubifs_unpack_bits(&addr, &pos, c->lnum_bits);
1049
1050 if (lnum < c->main_first || lnum >= c->leb_cnt)
1051 return -EINVAL;
1052 c->lsave[i] = lnum;
1053 }
1054 err = check_lpt_crc(buf, c->lsave_sz);
1055 return err;
1056}
1057
1058/**
1059 * validate_nnode - validate a nnode.
1060 * @c: UBIFS file-system description object
1061 * @nnode: nnode to validate
1062 * @parent: parent nnode (or NULL for the root nnode)
1063 * @iip: index in parent
1064 *
1065 * This function returns %0 on success and a negative error code on failure.
1066 */
1067static int validate_nnode(struct ubifs_info *c, struct ubifs_nnode *nnode,
1068 struct ubifs_nnode *parent, int iip)
1069{
1070 int i, lvl, max_offs;
1071
1072 if (c->big_lpt) {
1073 int num = calc_nnode_num_from_parent(c, parent, iip);
1074
1075 if (nnode->num != num)
1076 return -EINVAL;
1077 }
1078 lvl = parent ? parent->level - 1 : c->lpt_hght;
1079 if (lvl < 1)
1080 return -EINVAL;
1081 if (lvl == 1)
1082 max_offs = c->leb_size - c->pnode_sz;
1083 else
1084 max_offs = c->leb_size - c->nnode_sz;
1085 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1086 int lnum = nnode->nbranch[i].lnum;
1087 int offs = nnode->nbranch[i].offs;
1088
1089 if (lnum == 0) {
1090 if (offs != 0)
1091 return -EINVAL;
1092 continue;
1093 }
1094 if (lnum < c->lpt_first || lnum > c->lpt_last)
1095 return -EINVAL;
1096 if (offs < 0 || offs > max_offs)
1097 return -EINVAL;
1098 }
1099 return 0;
1100}
1101
1102/**
1103 * validate_pnode - validate a pnode.
1104 * @c: UBIFS file-system description object
1105 * @pnode: pnode to validate
1106 * @parent: parent nnode
1107 * @iip: index in parent
1108 *
1109 * This function returns %0 on success and a negative error code on failure.
1110 */
1111static int validate_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
1112 struct ubifs_nnode *parent, int iip)
1113{
1114 int i;
1115
1116 if (c->big_lpt) {
1117 int num = calc_pnode_num_from_parent(c, parent, iip);
1118
1119 if (pnode->num != num)
1120 return -EINVAL;
1121 }
1122 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1123 int free = pnode->lprops[i].free;
1124 int dirty = pnode->lprops[i].dirty;
1125
1126 if (free < 0 || free > c->leb_size || free % c->min_io_size ||
1127 (free & 7))
1128 return -EINVAL;
1129 if (dirty < 0 || dirty > c->leb_size || (dirty & 7))
1130 return -EINVAL;
1131 if (dirty + free > c->leb_size)
1132 return -EINVAL;
1133 }
1134 return 0;
1135}
1136
1137/**
1138 * set_pnode_lnum - set LEB numbers on a pnode.
1139 * @c: UBIFS file-system description object
1140 * @pnode: pnode to update
1141 *
1142 * This function calculates the LEB numbers for the LEB properties it contains
1143 * based on the pnode number.
1144 */
1145static void set_pnode_lnum(struct ubifs_info *c, struct ubifs_pnode *pnode)
1146{
1147 int i, lnum;
1148
1149 lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + c->main_first;
1150 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1151 if (lnum >= c->leb_cnt)
1152 return;
1153 pnode->lprops[i].lnum = lnum++;
1154 }
1155}
1156
1157/**
1158 * ubifs_read_nnode - read a nnode from flash and link it to the tree in memory.
1159 * @c: UBIFS file-system description object
1160 * @parent: parent nnode (or NULL for the root)
1161 * @iip: index in parent
1162 *
1163 * This function returns %0 on success and a negative error code on failure.
1164 */
1165int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1166{
1167 struct ubifs_nbranch *branch = NULL;
1168 struct ubifs_nnode *nnode = NULL;
1169 void *buf = c->lpt_nod_buf;
1170 int err, lnum, offs;
1171
1172 if (parent) {
1173 branch = &parent->nbranch[iip];
1174 lnum = branch->lnum;
1175 offs = branch->offs;
1176 } else {
1177 lnum = c->lpt_lnum;
1178 offs = c->lpt_offs;
1179 }
1180 nnode = kzalloc(sizeof(struct ubifs_nnode), GFP_NOFS);
1181 if (!nnode) {
1182 err = -ENOMEM;
1183 goto out;
1184 }
1185 if (lnum == 0) {
1186 /*
1187 * This nnode was not written which just means that the LEB
1188 * properties in the subtree below it describe empty LEBs. We
1189 * make the nnode as though we had read it, which in fact means
1190 * doing almost nothing.
1191 */
1192 if (c->big_lpt)
1193 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1194 } else {
1195 err = ubi_read(c->ubi, lnum, buf, offs, c->nnode_sz);
1196 if (err)
1197 goto out;
1198 err = unpack_nnode(c, buf, nnode);
1199 if (err)
1200 goto out;
1201 }
1202 err = validate_nnode(c, nnode, parent, iip);
1203 if (err)
1204 goto out;
1205 if (!c->big_lpt)
1206 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1207 if (parent) {
1208 branch->nnode = nnode;
1209 nnode->level = parent->level - 1;
1210 } else {
1211 c->nroot = nnode;
1212 nnode->level = c->lpt_hght;
1213 }
1214 nnode->parent = parent;
1215 nnode->iip = iip;
1216 return 0;
1217
1218out:
1219 ubifs_err("error %d reading nnode at %d:%d", err, lnum, offs);
1220 kfree(nnode);
1221 return err;
1222}
1223
1224/**
1225 * read_pnode - read a pnode from flash and link it to the tree in memory.
1226 * @c: UBIFS file-system description object
1227 * @parent: parent nnode
1228 * @iip: index in parent
1229 *
1230 * This function returns %0 on success and a negative error code on failure.
1231 */
1232static int read_pnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip)
1233{
1234 struct ubifs_nbranch *branch;
1235 struct ubifs_pnode *pnode = NULL;
1236 void *buf = c->lpt_nod_buf;
1237 int err, lnum, offs;
1238
1239 branch = &parent->nbranch[iip];
1240 lnum = branch->lnum;
1241 offs = branch->offs;
1242 pnode = kzalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
1243 if (!pnode) {
1244 err = -ENOMEM;
1245 goto out;
1246 }
1247 if (lnum == 0) {
1248 /*
1249 * This pnode was not written which just means that the LEB
1250 * properties in it describe empty LEBs. We make the pnode as
1251 * though we had read it.
1252 */
1253 int i;
1254
1255 if (c->big_lpt)
1256 pnode->num = calc_pnode_num_from_parent(c, parent, iip);
1257 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1258 struct ubifs_lprops * const lprops = &pnode->lprops[i];
1259
1260 lprops->free = c->leb_size;
1261 lprops->flags = ubifs_categorize_lprops(c, lprops);
1262 }
1263 } else {
1264 err = ubi_read(c->ubi, lnum, buf, offs, c->pnode_sz);
1265 if (err)
1266 goto out;
1267 err = unpack_pnode(c, buf, pnode);
1268 if (err)
1269 goto out;
1270 }
1271 err = validate_pnode(c, pnode, parent, iip);
1272 if (err)
1273 goto out;
1274 if (!c->big_lpt)
1275 pnode->num = calc_pnode_num_from_parent(c, parent, iip);
1276 branch->pnode = pnode;
1277 pnode->parent = parent;
1278 pnode->iip = iip;
1279 set_pnode_lnum(c, pnode);
1280 c->pnodes_have += 1;
1281 return 0;
1282
1283out:
1284 ubifs_err("error %d reading pnode at %d:%d", err, lnum, offs);
1285 dbg_dump_pnode(c, pnode, parent, iip);
1286 dbg_msg("calc num: %d", calc_pnode_num_from_parent(c, parent, iip));
1287 kfree(pnode);
1288 return err;
1289}
1290
1291/**
1292 * read_ltab - read LPT's own lprops table.
1293 * @c: UBIFS file-system description object
1294 *
1295 * This function returns %0 on success and a negative error code on failure.
1296 */
1297static int read_ltab(struct ubifs_info *c)
1298{
1299 int err;
1300 void *buf;
1301
1302 buf = vmalloc(c->ltab_sz);
1303 if (!buf)
1304 return -ENOMEM;
1305 err = ubi_read(c->ubi, c->ltab_lnum, buf, c->ltab_offs, c->ltab_sz);
1306 if (err)
1307 goto out;
1308 err = unpack_ltab(c, buf);
1309out:
1310 vfree(buf);
1311 return err;
1312}
1313
1314/**
1315 * read_lsave - read LPT's save table.
1316 * @c: UBIFS file-system description object
1317 *
1318 * This function returns %0 on success and a negative error code on failure.
1319 */
1320static int read_lsave(struct ubifs_info *c)
1321{
1322 int err, i;
1323 void *buf;
1324
1325 buf = vmalloc(c->lsave_sz);
1326 if (!buf)
1327 return -ENOMEM;
1328 err = ubi_read(c->ubi, c->lsave_lnum, buf, c->lsave_offs, c->lsave_sz);
1329 if (err)
1330 goto out;
1331 err = unpack_lsave(c, buf);
1332 if (err)
1333 goto out;
1334 for (i = 0; i < c->lsave_cnt; i++) {
1335 int lnum = c->lsave[i];
1336
1337 /*
1338 * Due to automatic resizing, the values in the lsave table
1339 * could be beyond the volume size - just ignore them.
1340 */
1341 if (lnum >= c->leb_cnt)
1342 continue;
1343 ubifs_lpt_lookup(c, lnum);
1344 }
1345out:
1346 vfree(buf);
1347 return err;
1348}
1349
1350/**
1351 * ubifs_get_nnode - get a nnode.
1352 * @c: UBIFS file-system description object
1353 * @parent: parent nnode (or NULL for the root)
1354 * @iip: index in parent
1355 *
1356 * This function returns a pointer to the nnode on success or a negative error
1357 * code on failure.
1358 */
1359struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
1360 struct ubifs_nnode *parent, int iip)
1361{
1362 struct ubifs_nbranch *branch;
1363 struct ubifs_nnode *nnode;
1364 int err;
1365
1366 branch = &parent->nbranch[iip];
1367 nnode = branch->nnode;
1368 if (nnode)
1369 return nnode;
1370 err = ubifs_read_nnode(c, parent, iip);
1371 if (err)
1372 return ERR_PTR(err);
1373 return branch->nnode;
1374}
1375
1376/**
1377 * ubifs_get_pnode - get a pnode.
1378 * @c: UBIFS file-system description object
1379 * @parent: parent nnode
1380 * @iip: index in parent
1381 *
1382 * This function returns a pointer to the pnode on success or a negative error
1383 * code on failure.
1384 */
1385struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
1386 struct ubifs_nnode *parent, int iip)
1387{
1388 struct ubifs_nbranch *branch;
1389 struct ubifs_pnode *pnode;
1390 int err;
1391
1392 branch = &parent->nbranch[iip];
1393 pnode = branch->pnode;
1394 if (pnode)
1395 return pnode;
1396 err = read_pnode(c, parent, iip);
1397 if (err)
1398 return ERR_PTR(err);
1399 update_cats(c, branch->pnode);
1400 return branch->pnode;
1401}
1402
1403/**
1404 * ubifs_lpt_lookup - lookup LEB properties in the LPT.
1405 * @c: UBIFS file-system description object
1406 * @lnum: LEB number to lookup
1407 *
1408 * This function returns a pointer to the LEB properties on success or a
1409 * negative error code on failure.
1410 */
1411struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum)
1412{
1413 int err, i, h, iip, shft;
1414 struct ubifs_nnode *nnode;
1415 struct ubifs_pnode *pnode;
1416
1417 if (!c->nroot) {
1418 err = ubifs_read_nnode(c, NULL, 0);
1419 if (err)
1420 return ERR_PTR(err);
1421 }
1422 nnode = c->nroot;
1423 i = lnum - c->main_first;
1424 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
1425 for (h = 1; h < c->lpt_hght; h++) {
1426 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1427 shft -= UBIFS_LPT_FANOUT_SHIFT;
1428 nnode = ubifs_get_nnode(c, nnode, iip);
1429 if (IS_ERR(nnode))
1430 return ERR_PTR(PTR_ERR(nnode));
1431 }
1432 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1433 shft -= UBIFS_LPT_FANOUT_SHIFT;
1434 pnode = ubifs_get_pnode(c, nnode, iip);
1435 if (IS_ERR(pnode))
1436 return ERR_PTR(PTR_ERR(pnode));
1437 iip = (i & (UBIFS_LPT_FANOUT - 1));
1438 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
1439 pnode->lprops[iip].free, pnode->lprops[iip].dirty,
1440 pnode->lprops[iip].flags);
1441 return &pnode->lprops[iip];
1442}
1443
1444/**
1445 * dirty_cow_nnode - ensure a nnode is not being committed.
1446 * @c: UBIFS file-system description object
1447 * @nnode: nnode to check
1448 *
1449 * Returns dirtied nnode on success or negative error code on failure.
1450 */
1451static struct ubifs_nnode *dirty_cow_nnode(struct ubifs_info *c,
1452 struct ubifs_nnode *nnode)
1453{
1454 struct ubifs_nnode *n;
1455 int i;
1456
1457 if (!test_bit(COW_CNODE, &nnode->flags)) {
1458 /* nnode is not being committed */
1459 if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
1460 c->dirty_nn_cnt += 1;
1461 ubifs_add_nnode_dirt(c, nnode);
1462 }
1463 return nnode;
1464 }
1465
1466 /* nnode is being committed, so copy it */
1467 n = kmalloc(sizeof(struct ubifs_nnode), GFP_NOFS);
1468 if (unlikely(!n))
1469 return ERR_PTR(-ENOMEM);
1470
1471 memcpy(n, nnode, sizeof(struct ubifs_nnode));
1472 n->cnext = NULL;
1473 __set_bit(DIRTY_CNODE, &n->flags);
1474 __clear_bit(COW_CNODE, &n->flags);
1475
1476 /* The children now have new parent */
1477 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1478 struct ubifs_nbranch *branch = &n->nbranch[i];
1479
1480 if (branch->cnode)
1481 branch->cnode->parent = n;
1482 }
1483
1484 ubifs_assert(!test_bit(OBSOLETE_CNODE, &nnode->flags));
1485 __set_bit(OBSOLETE_CNODE, &nnode->flags);
1486
1487 c->dirty_nn_cnt += 1;
1488 ubifs_add_nnode_dirt(c, nnode);
1489 if (nnode->parent)
1490 nnode->parent->nbranch[n->iip].nnode = n;
1491 else
1492 c->nroot = n;
1493 return n;
1494}
1495
1496/**
1497 * dirty_cow_pnode - ensure a pnode is not being committed.
1498 * @c: UBIFS file-system description object
1499 * @pnode: pnode to check
1500 *
1501 * Returns dirtied pnode on success or negative error code on failure.
1502 */
1503static struct ubifs_pnode *dirty_cow_pnode(struct ubifs_info *c,
1504 struct ubifs_pnode *pnode)
1505{
1506 struct ubifs_pnode *p;
1507
1508 if (!test_bit(COW_CNODE, &pnode->flags)) {
1509 /* pnode is not being committed */
1510 if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) {
1511 c->dirty_pn_cnt += 1;
1512 add_pnode_dirt(c, pnode);
1513 }
1514 return pnode;
1515 }
1516
1517 /* pnode is being committed, so copy it */
1518 p = kmalloc(sizeof(struct ubifs_pnode), GFP_NOFS);
1519 if (unlikely(!p))
1520 return ERR_PTR(-ENOMEM);
1521
1522 memcpy(p, pnode, sizeof(struct ubifs_pnode));
1523 p->cnext = NULL;
1524 __set_bit(DIRTY_CNODE, &p->flags);
1525 __clear_bit(COW_CNODE, &p->flags);
1526 replace_cats(c, pnode, p);
1527
1528 ubifs_assert(!test_bit(OBSOLETE_CNODE, &pnode->flags));
1529 __set_bit(OBSOLETE_CNODE, &pnode->flags);
1530
1531 c->dirty_pn_cnt += 1;
1532 add_pnode_dirt(c, pnode);
1533 pnode->parent->nbranch[p->iip].pnode = p;
1534 return p;
1535}
1536
1537/**
1538 * ubifs_lpt_lookup_dirty - lookup LEB properties in the LPT.
1539 * @c: UBIFS file-system description object
1540 * @lnum: LEB number to lookup
1541 *
1542 * This function returns a pointer to the LEB properties on success or a
1543 * negative error code on failure.
1544 */
1545struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum)
1546{
1547 int err, i, h, iip, shft;
1548 struct ubifs_nnode *nnode;
1549 struct ubifs_pnode *pnode;
1550
1551 if (!c->nroot) {
1552 err = ubifs_read_nnode(c, NULL, 0);
1553 if (err)
1554 return ERR_PTR(err);
1555 }
1556 nnode = c->nroot;
1557 nnode = dirty_cow_nnode(c, nnode);
1558 if (IS_ERR(nnode))
1559 return ERR_PTR(PTR_ERR(nnode));
1560 i = lnum - c->main_first;
1561 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
1562 for (h = 1; h < c->lpt_hght; h++) {
1563 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1564 shft -= UBIFS_LPT_FANOUT_SHIFT;
1565 nnode = ubifs_get_nnode(c, nnode, iip);
1566 if (IS_ERR(nnode))
1567 return ERR_PTR(PTR_ERR(nnode));
1568 nnode = dirty_cow_nnode(c, nnode);
1569 if (IS_ERR(nnode))
1570 return ERR_PTR(PTR_ERR(nnode));
1571 }
1572 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1573 shft -= UBIFS_LPT_FANOUT_SHIFT;
1574 pnode = ubifs_get_pnode(c, nnode, iip);
1575 if (IS_ERR(pnode))
1576 return ERR_PTR(PTR_ERR(pnode));
1577 pnode = dirty_cow_pnode(c, pnode);
1578 if (IS_ERR(pnode))
1579 return ERR_PTR(PTR_ERR(pnode));
1580 iip = (i & (UBIFS_LPT_FANOUT - 1));
1581 dbg_lp("LEB %d, free %d, dirty %d, flags %d", lnum,
1582 pnode->lprops[iip].free, pnode->lprops[iip].dirty,
1583 pnode->lprops[iip].flags);
1584 ubifs_assert(test_bit(DIRTY_CNODE, &pnode->flags));
1585 return &pnode->lprops[iip];
1586}
1587
1588/**
1589 * lpt_init_rd - initialize the LPT for reading.
1590 * @c: UBIFS file-system description object
1591 *
1592 * This function returns %0 on success and a negative error code on failure.
1593 */
1594static int lpt_init_rd(struct ubifs_info *c)
1595{
1596 int err, i;
1597
1598 c->ltab = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
1599 if (!c->ltab)
1600 return -ENOMEM;
1601
1602 i = max_t(int, c->nnode_sz, c->pnode_sz);
1603 c->lpt_nod_buf = kmalloc(i, GFP_KERNEL);
1604 if (!c->lpt_nod_buf)
1605 return -ENOMEM;
1606
1607 for (i = 0; i < LPROPS_HEAP_CNT; i++) {
1608 c->lpt_heap[i].arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ,
1609 GFP_KERNEL);
1610 if (!c->lpt_heap[i].arr)
1611 return -ENOMEM;
1612 c->lpt_heap[i].cnt = 0;
1613 c->lpt_heap[i].max_cnt = LPT_HEAP_SZ;
1614 }
1615
1616 c->dirty_idx.arr = kmalloc(sizeof(void *) * LPT_HEAP_SZ, GFP_KERNEL);
1617 if (!c->dirty_idx.arr)
1618 return -ENOMEM;
1619 c->dirty_idx.cnt = 0;
1620 c->dirty_idx.max_cnt = LPT_HEAP_SZ;
1621
1622 err = read_ltab(c);
1623 if (err)
1624 return err;
1625
1626 dbg_lp("space_bits %d", c->space_bits);
1627 dbg_lp("lpt_lnum_bits %d", c->lpt_lnum_bits);
1628 dbg_lp("lpt_offs_bits %d", c->lpt_offs_bits);
1629 dbg_lp("lpt_spc_bits %d", c->lpt_spc_bits);
1630 dbg_lp("pcnt_bits %d", c->pcnt_bits);
1631 dbg_lp("lnum_bits %d", c->lnum_bits);
1632 dbg_lp("pnode_sz %d", c->pnode_sz);
1633 dbg_lp("nnode_sz %d", c->nnode_sz);
1634 dbg_lp("ltab_sz %d", c->ltab_sz);
1635 dbg_lp("lsave_sz %d", c->lsave_sz);
1636 dbg_lp("lsave_cnt %d", c->lsave_cnt);
1637 dbg_lp("lpt_hght %d", c->lpt_hght);
1638 dbg_lp("big_lpt %d", c->big_lpt);
1639 dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
1640 dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
1641 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
1642 if (c->big_lpt)
1643 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
1644
1645 return 0;
1646}
1647
1648/**
1649 * lpt_init_wr - initialize the LPT for writing.
1650 * @c: UBIFS file-system description object
1651 *
1652 * 'lpt_init_rd()' must have been called already.
1653 *
1654 * This function returns %0 on success and a negative error code on failure.
1655 */
1656static int lpt_init_wr(struct ubifs_info *c)
1657{
1658 int err, i;
1659
1660 c->ltab_cmt = vmalloc(sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
1661 if (!c->ltab_cmt)
1662 return -ENOMEM;
1663
1664 c->lpt_buf = vmalloc(c->leb_size);
1665 if (!c->lpt_buf)
1666 return -ENOMEM;
1667
1668 if (c->big_lpt) {
1669 c->lsave = kmalloc(sizeof(int) * c->lsave_cnt, GFP_NOFS);
1670 if (!c->lsave)
1671 return -ENOMEM;
1672 err = read_lsave(c);
1673 if (err)
1674 return err;
1675 }
1676
1677 for (i = 0; i < c->lpt_lebs; i++)
1678 if (c->ltab[i].free == c->leb_size) {
1679 err = ubifs_leb_unmap(c, i + c->lpt_first);
1680 if (err)
1681 return err;
1682 }
1683
1684 return 0;
1685}
1686
1687/**
1688 * ubifs_lpt_init - initialize the LPT.
1689 * @c: UBIFS file-system description object
1690 * @rd: whether to initialize lpt for reading
1691 * @wr: whether to initialize lpt for writing
1692 *
1693 * For mounting 'rw', @rd and @wr are both true. For mounting 'ro', @rd is true
1694 * and @wr is false. For mounting from 'ro' to 'rw', @rd is false and @wr is
1695 * true.
1696 *
1697 * This function returns %0 on success and a negative error code on failure.
1698 */
1699int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr)
1700{
1701 int err;
1702
1703 if (rd) {
1704 err = lpt_init_rd(c);
1705 if (err)
1706 return err;
1707 }
1708
1709 if (wr) {
1710 err = lpt_init_wr(c);
1711 if (err)
1712 return err;
1713 }
1714
1715 return 0;
1716}
1717
1718/**
1719 * struct lpt_scan_node - somewhere to put nodes while we scan LPT.
1720 * @nnode: where to keep a nnode
1721 * @pnode: where to keep a pnode
1722 * @cnode: where to keep a cnode
1723 * @in_tree: is the node in the tree in memory
1724 * @ptr.nnode: pointer to the nnode (if it is an nnode) which may be here or in
1725 * the tree
1726 * @ptr.pnode: ditto for pnode
1727 * @ptr.cnode: ditto for cnode
1728 */
1729struct lpt_scan_node {
1730 union {
1731 struct ubifs_nnode nnode;
1732 struct ubifs_pnode pnode;
1733 struct ubifs_cnode cnode;
1734 };
1735 int in_tree;
1736 union {
1737 struct ubifs_nnode *nnode;
1738 struct ubifs_pnode *pnode;
1739 struct ubifs_cnode *cnode;
1740 } ptr;
1741};
1742
1743/**
1744 * scan_get_nnode - for the scan, get a nnode from either the tree or flash.
1745 * @c: the UBIFS file-system description object
1746 * @path: where to put the nnode
1747 * @parent: parent of the nnode
1748 * @iip: index in parent of the nnode
1749 *
1750 * This function returns a pointer to the nnode on success or a negative error
1751 * code on failure.
1752 */
1753static struct ubifs_nnode *scan_get_nnode(struct ubifs_info *c,
1754 struct lpt_scan_node *path,
1755 struct ubifs_nnode *parent, int iip)
1756{
1757 struct ubifs_nbranch *branch;
1758 struct ubifs_nnode *nnode;
1759 void *buf = c->lpt_nod_buf;
1760 int err;
1761
1762 branch = &parent->nbranch[iip];
1763 nnode = branch->nnode;
1764 if (nnode) {
1765 path->in_tree = 1;
1766 path->ptr.nnode = nnode;
1767 return nnode;
1768 }
1769 nnode = &path->nnode;
1770 path->in_tree = 0;
1771 path->ptr.nnode = nnode;
1772 memset(nnode, 0, sizeof(struct ubifs_nnode));
1773 if (branch->lnum == 0) {
1774 /*
1775 * This nnode was not written which just means that the LEB
1776 * properties in the subtree below it describe empty LEBs. We
1777 * make the nnode as though we had read it, which in fact means
1778 * doing almost nothing.
1779 */
1780 if (c->big_lpt)
1781 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1782 } else {
1783 err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
1784 c->nnode_sz);
1785 if (err)
1786 return ERR_PTR(err);
1787 err = unpack_nnode(c, buf, nnode);
1788 if (err)
1789 return ERR_PTR(err);
1790 }
1791 err = validate_nnode(c, nnode, parent, iip);
1792 if (err)
1793 return ERR_PTR(err);
1794 if (!c->big_lpt)
1795 nnode->num = calc_nnode_num_from_parent(c, parent, iip);
1796 nnode->level = parent->level - 1;
1797 nnode->parent = parent;
1798 nnode->iip = iip;
1799 return nnode;
1800}
1801
1802/**
1803 * scan_get_pnode - for the scan, get a pnode from either the tree or flash.
1804 * @c: the UBIFS file-system description object
1805 * @path: where to put the pnode
1806 * @parent: parent of the pnode
1807 * @iip: index in parent of the pnode
1808 *
1809 * This function returns a pointer to the pnode on success or a negative error
1810 * code on failure.
1811 */
1812static struct ubifs_pnode *scan_get_pnode(struct ubifs_info *c,
1813 struct lpt_scan_node *path,
1814 struct ubifs_nnode *parent, int iip)
1815{
1816 struct ubifs_nbranch *branch;
1817 struct ubifs_pnode *pnode;
1818 void *buf = c->lpt_nod_buf;
1819 int err;
1820
1821 branch = &parent->nbranch[iip];
1822 pnode = branch->pnode;
1823 if (pnode) {
1824 path->in_tree = 1;
1825 path->ptr.pnode = pnode;
1826 return pnode;
1827 }
1828 pnode = &path->pnode;
1829 path->in_tree = 0;
1830 path->ptr.pnode = pnode;
1831 memset(pnode, 0, sizeof(struct ubifs_pnode));
1832 if (branch->lnum == 0) {
1833 /*
1834 * This pnode was not written which just means that the LEB
1835 * properties in it describe empty LEBs. We make the pnode as
1836 * though we had read it.
1837 */
1838 int i;
1839
1840 if (c->big_lpt)
1841 pnode->num = calc_pnode_num_from_parent(c, parent, iip);
1842 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1843 struct ubifs_lprops * const lprops = &pnode->lprops[i];
1844
1845 lprops->free = c->leb_size;
1846 lprops->flags = ubifs_categorize_lprops(c, lprops);
1847 }
1848 } else {
1849 ubifs_assert(branch->lnum >= c->lpt_first &&
1850 branch->lnum <= c->lpt_last);
1851 ubifs_assert(branch->offs >= 0 && branch->offs < c->leb_size);
1852 err = ubi_read(c->ubi, branch->lnum, buf, branch->offs,
1853 c->pnode_sz);
1854 if (err)
1855 return ERR_PTR(err);
1856 err = unpack_pnode(c, buf, pnode);
1857 if (err)
1858 return ERR_PTR(err);
1859 }
1860 err = validate_pnode(c, pnode, parent, iip);
1861 if (err)
1862 return ERR_PTR(err);
1863 if (!c->big_lpt)
1864 pnode->num = calc_pnode_num_from_parent(c, parent, iip);
1865 pnode->parent = parent;
1866 pnode->iip = iip;
1867 set_pnode_lnum(c, pnode);
1868 return pnode;
1869}
1870
1871/**
1872 * ubifs_lpt_scan_nolock - scan the LPT.
1873 * @c: the UBIFS file-system description object
1874 * @start_lnum: LEB number from which to start scanning
1875 * @end_lnum: LEB number at which to stop scanning
1876 * @scan_cb: callback function called for each lprops
1877 * @data: data to be passed to the callback function
1878 *
1879 * This function returns %0 on success and a negative error code on failure.
1880 */
1881int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum,
1882 ubifs_lpt_scan_callback scan_cb, void *data)
1883{
1884 int err = 0, i, h, iip, shft;
1885 struct ubifs_nnode *nnode;
1886 struct ubifs_pnode *pnode;
1887 struct lpt_scan_node *path;
1888
1889 if (start_lnum == -1) {
1890 start_lnum = end_lnum + 1;
1891 if (start_lnum >= c->leb_cnt)
1892 start_lnum = c->main_first;
1893 }
1894
1895 ubifs_assert(start_lnum >= c->main_first && start_lnum < c->leb_cnt);
1896 ubifs_assert(end_lnum >= c->main_first && end_lnum < c->leb_cnt);
1897
1898 if (!c->nroot) {
1899 err = ubifs_read_nnode(c, NULL, 0);
1900 if (err)
1901 return err;
1902 }
1903
1904 path = kmalloc(sizeof(struct lpt_scan_node) * (c->lpt_hght + 1),
1905 GFP_NOFS);
1906 if (!path)
1907 return -ENOMEM;
1908
1909 path[0].ptr.nnode = c->nroot;
1910 path[0].in_tree = 1;
1911again:
1912 /* Descend to the pnode containing start_lnum */
1913 nnode = c->nroot;
1914 i = start_lnum - c->main_first;
1915 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
1916 for (h = 1; h < c->lpt_hght; h++) {
1917 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1918 shft -= UBIFS_LPT_FANOUT_SHIFT;
1919 nnode = scan_get_nnode(c, path + h, nnode, iip);
1920 if (IS_ERR(nnode)) {
1921 err = PTR_ERR(nnode);
1922 goto out;
1923 }
1924 }
1925 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
1926 shft -= UBIFS_LPT_FANOUT_SHIFT;
1927 pnode = scan_get_pnode(c, path + h, nnode, iip);
1928 if (IS_ERR(pnode)) {
1929 err = PTR_ERR(pnode);
1930 goto out;
1931 }
1932 iip = (i & (UBIFS_LPT_FANOUT - 1));
1933
1934 /* Loop for each lprops */
1935 while (1) {
1936 struct ubifs_lprops *lprops = &pnode->lprops[iip];
1937 int ret, lnum = lprops->lnum;
1938
1939 ret = scan_cb(c, lprops, path[h].in_tree, data);
1940 if (ret < 0) {
1941 err = ret;
1942 goto out;
1943 }
1944 if (ret & LPT_SCAN_ADD) {
1945 /* Add all the nodes in path to the tree in memory */
1946 for (h = 1; h < c->lpt_hght; h++) {
1947 const size_t sz = sizeof(struct ubifs_nnode);
1948 struct ubifs_nnode *parent;
1949
1950 if (path[h].in_tree)
1951 continue;
1952 nnode = kmalloc(sz, GFP_NOFS);
1953 if (!nnode) {
1954 err = -ENOMEM;
1955 goto out;
1956 }
1957 memcpy(nnode, &path[h].nnode, sz);
1958 parent = nnode->parent;
1959 parent->nbranch[nnode->iip].nnode = nnode;
1960 path[h].ptr.nnode = nnode;
1961 path[h].in_tree = 1;
1962 path[h + 1].cnode.parent = nnode;
1963 }
1964 if (path[h].in_tree)
1965 ubifs_ensure_cat(c, lprops);
1966 else {
1967 const size_t sz = sizeof(struct ubifs_pnode);
1968 struct ubifs_nnode *parent;
1969
1970 pnode = kmalloc(sz, GFP_NOFS);
1971 if (!pnode) {
1972 err = -ENOMEM;
1973 goto out;
1974 }
1975 memcpy(pnode, &path[h].pnode, sz);
1976 parent = pnode->parent;
1977 parent->nbranch[pnode->iip].pnode = pnode;
1978 path[h].ptr.pnode = pnode;
1979 path[h].in_tree = 1;
1980 update_cats(c, pnode);
1981 c->pnodes_have += 1;
1982 }
1983 err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)
1984 c->nroot, 0, 0);
1985 if (err)
1986 goto out;
1987 err = dbg_check_cats(c);
1988 if (err)
1989 goto out;
1990 }
1991 if (ret & LPT_SCAN_STOP) {
1992 err = 0;
1993 break;
1994 }
1995 /* Get the next lprops */
1996 if (lnum == end_lnum) {
1997 /*
1998 * We got to the end without finding what we were
1999 * looking for
2000 */
2001 err = -ENOSPC;
2002 goto out;
2003 }
2004 if (lnum + 1 >= c->leb_cnt) {
2005 /* Wrap-around to the beginning */
2006 start_lnum = c->main_first;
2007 goto again;
2008 }
2009 if (iip + 1 < UBIFS_LPT_FANOUT) {
2010 /* Next lprops is in the same pnode */
2011 iip += 1;
2012 continue;
2013 }
2014 /* We need to get the next pnode. Go up until we can go right */
2015 iip = pnode->iip;
2016 while (1) {
2017 h -= 1;
2018 ubifs_assert(h >= 0);
2019 nnode = path[h].ptr.nnode;
2020 if (iip + 1 < UBIFS_LPT_FANOUT)
2021 break;
2022 iip = nnode->iip;
2023 }
2024 /* Go right */
2025 iip += 1;
2026 /* Descend to the pnode */
2027 h += 1;
2028 for (; h < c->lpt_hght; h++) {
2029 nnode = scan_get_nnode(c, path + h, nnode, iip);
2030 if (IS_ERR(nnode)) {
2031 err = PTR_ERR(nnode);
2032 goto out;
2033 }
2034 iip = 0;
2035 }
2036 pnode = scan_get_pnode(c, path + h, nnode, iip);
2037 if (IS_ERR(pnode)) {
2038 err = PTR_ERR(pnode);
2039 goto out;
2040 }
2041 iip = 0;
2042 }
2043out:
2044 kfree(path);
2045 return err;
2046}
2047
2048#ifdef CONFIG_UBIFS_FS_DEBUG
2049
2050/**
2051 * dbg_chk_pnode - check a pnode.
2052 * @c: the UBIFS file-system description object
2053 * @pnode: pnode to check
2054 * @col: pnode column
2055 *
2056 * This function returns %0 on success and a negative error code on failure.
2057 */
2058static int dbg_chk_pnode(struct ubifs_info *c, struct ubifs_pnode *pnode,
2059 int col)
2060{
2061 int i;
2062
2063 if (pnode->num != col) {
2064 dbg_err("pnode num %d expected %d parent num %d iip %d",
2065 pnode->num, col, pnode->parent->num, pnode->iip);
2066 return -EINVAL;
2067 }
2068 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
2069 struct ubifs_lprops *lp, *lprops = &pnode->lprops[i];
2070 int lnum = (pnode->num << UBIFS_LPT_FANOUT_SHIFT) + i +
2071 c->main_first;
2072 int found, cat = lprops->flags & LPROPS_CAT_MASK;
2073 struct ubifs_lpt_heap *heap;
2074 struct list_head *list = NULL;
2075
2076 if (lnum >= c->leb_cnt)
2077 continue;
2078 if (lprops->lnum != lnum) {
2079 dbg_err("bad LEB number %d expected %d",
2080 lprops->lnum, lnum);
2081 return -EINVAL;
2082 }
2083 if (lprops->flags & LPROPS_TAKEN) {
2084 if (cat != LPROPS_UNCAT) {
2085 dbg_err("LEB %d taken but not uncat %d",
2086 lprops->lnum, cat);
2087 return -EINVAL;
2088 }
2089 continue;
2090 }
2091 if (lprops->flags & LPROPS_INDEX) {
2092 switch (cat) {
2093 case LPROPS_UNCAT:
2094 case LPROPS_DIRTY_IDX:
2095 case LPROPS_FRDI_IDX:
2096 break;
2097 default:
2098 dbg_err("LEB %d index but cat %d",
2099 lprops->lnum, cat);
2100 return -EINVAL;
2101 }
2102 } else {
2103 switch (cat) {
2104 case LPROPS_UNCAT:
2105 case LPROPS_DIRTY:
2106 case LPROPS_FREE:
2107 case LPROPS_EMPTY:
2108 case LPROPS_FREEABLE:
2109 break;
2110 default:
2111 dbg_err("LEB %d not index but cat %d",
2112 lprops->lnum, cat);
2113 return -EINVAL;
2114 }
2115 }
2116 switch (cat) {
2117 case LPROPS_UNCAT:
2118 list = &c->uncat_list;
2119 break;
2120 case LPROPS_EMPTY:
2121 list = &c->empty_list;
2122 break;
2123 case LPROPS_FREEABLE:
2124 list = &c->freeable_list;
2125 break;
2126 case LPROPS_FRDI_IDX:
2127 list = &c->frdi_idx_list;
2128 break;
2129 }
2130 found = 0;
2131 switch (cat) {
2132 case LPROPS_DIRTY:
2133 case LPROPS_DIRTY_IDX:
2134 case LPROPS_FREE:
2135 heap = &c->lpt_heap[cat - 1];
2136 if (lprops->hpos < heap->cnt &&
2137 heap->arr[lprops->hpos] == lprops)
2138 found = 1;
2139 break;
2140 case LPROPS_UNCAT:
2141 case LPROPS_EMPTY:
2142 case LPROPS_FREEABLE:
2143 case LPROPS_FRDI_IDX:
2144 list_for_each_entry(lp, list, list)
2145 if (lprops == lp) {
2146 found = 1;
2147 break;
2148 }
2149 break;
2150 }
2151 if (!found) {
2152 dbg_err("LEB %d cat %d not found in cat heap/list",
2153 lprops->lnum, cat);
2154 return -EINVAL;
2155 }
2156 switch (cat) {
2157 case LPROPS_EMPTY:
2158 if (lprops->free != c->leb_size) {
2159 dbg_err("LEB %d cat %d free %d dirty %d",
2160 lprops->lnum, cat, lprops->free,
2161 lprops->dirty);
2162 return -EINVAL;
2163 }
2164 case LPROPS_FREEABLE:
2165 case LPROPS_FRDI_IDX:
2166 if (lprops->free + lprops->dirty != c->leb_size) {
2167 dbg_err("LEB %d cat %d free %d dirty %d",
2168 lprops->lnum, cat, lprops->free,
2169 lprops->dirty);
2170 return -EINVAL;
2171 }
2172 }
2173 }
2174 return 0;
2175}
2176
2177/**
2178 * dbg_check_lpt_nodes - check nnodes and pnodes.
2179 * @c: the UBIFS file-system description object
2180 * @cnode: next cnode (nnode or pnode) to check
2181 * @row: row of cnode (root is zero)
2182 * @col: column of cnode (leftmost is zero)
2183 *
2184 * This function returns %0 on success and a negative error code on failure.
2185 */
2186int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode,
2187 int row, int col)
2188{
2189 struct ubifs_nnode *nnode, *nn;
2190 struct ubifs_cnode *cn;
2191 int num, iip = 0, err;
2192
2193 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
2194 return 0;
2195
2196 while (cnode) {
2197 ubifs_assert(row >= 0);
2198 nnode = cnode->parent;
2199 if (cnode->level) {
2200 /* cnode is a nnode */
2201 num = calc_nnode_num(row, col);
2202 if (cnode->num != num) {
2203 dbg_err("nnode num %d expected %d "
2204 "parent num %d iip %d", cnode->num, num,
2205 (nnode ? nnode->num : 0), cnode->iip);
2206 return -EINVAL;
2207 }
2208 nn = (struct ubifs_nnode *)cnode;
2209 while (iip < UBIFS_LPT_FANOUT) {
2210 cn = nn->nbranch[iip].cnode;
2211 if (cn) {
2212 /* Go down */
2213 row += 1;
2214 col <<= UBIFS_LPT_FANOUT_SHIFT;
2215 col += iip;
2216 iip = 0;
2217 cnode = cn;
2218 break;
2219 }
2220 /* Go right */
2221 iip += 1;
2222 }
2223 if (iip < UBIFS_LPT_FANOUT)
2224 continue;
2225 } else {
2226 struct ubifs_pnode *pnode;
2227
2228 /* cnode is a pnode */
2229 pnode = (struct ubifs_pnode *)cnode;
2230 err = dbg_chk_pnode(c, pnode, col);
2231 if (err)
2232 return err;
2233 }
2234 /* Go up and to the right */
2235 row -= 1;
2236 col >>= UBIFS_LPT_FANOUT_SHIFT;
2237 iip = cnode->iip + 1;
2238 cnode = (struct ubifs_cnode *)nnode;
2239 }
2240 return 0;
2241}
2242
2243#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c
new file mode 100644
index 00000000000..5f0b83e20af
--- /dev/null
+++ b/fs/ubifs/lpt_commit.c
@@ -0,0 +1,1648 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements commit-related functionality of the LEB properties
25 * subsystem.
26 */
27
28#include <linux/crc16.h>
29#include "ubifs.h"
30
31/**
32 * first_dirty_cnode - find first dirty cnode.
33 * @c: UBIFS file-system description object
34 * @nnode: nnode at which to start
35 *
36 * This function returns the first dirty cnode or %NULL if there is not one.
37 */
38static struct ubifs_cnode *first_dirty_cnode(struct ubifs_nnode *nnode)
39{
40 ubifs_assert(nnode);
41 while (1) {
42 int i, cont = 0;
43
44 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
45 struct ubifs_cnode *cnode;
46
47 cnode = nnode->nbranch[i].cnode;
48 if (cnode &&
49 test_bit(DIRTY_CNODE, &cnode->flags)) {
50 if (cnode->level == 0)
51 return cnode;
52 nnode = (struct ubifs_nnode *)cnode;
53 cont = 1;
54 break;
55 }
56 }
57 if (!cont)
58 return (struct ubifs_cnode *)nnode;
59 }
60}
61
62/**
63 * next_dirty_cnode - find next dirty cnode.
64 * @cnode: cnode from which to begin searching
65 *
66 * This function returns the next dirty cnode or %NULL if there is not one.
67 */
68static struct ubifs_cnode *next_dirty_cnode(struct ubifs_cnode *cnode)
69{
70 struct ubifs_nnode *nnode;
71 int i;
72
73 ubifs_assert(cnode);
74 nnode = cnode->parent;
75 if (!nnode)
76 return NULL;
77 for (i = cnode->iip + 1; i < UBIFS_LPT_FANOUT; i++) {
78 cnode = nnode->nbranch[i].cnode;
79 if (cnode && test_bit(DIRTY_CNODE, &cnode->flags)) {
80 if (cnode->level == 0)
81 return cnode; /* cnode is a pnode */
82 /* cnode is a nnode */
83 return first_dirty_cnode((struct ubifs_nnode *)cnode);
84 }
85 }
86 return (struct ubifs_cnode *)nnode;
87}
88
89/**
90 * get_cnodes_to_commit - create list of dirty cnodes to commit.
91 * @c: UBIFS file-system description object
92 *
93 * This function returns the number of cnodes to commit.
94 */
95static int get_cnodes_to_commit(struct ubifs_info *c)
96{
97 struct ubifs_cnode *cnode, *cnext;
98 int cnt = 0;
99
100 if (!c->nroot)
101 return 0;
102
103 if (!test_bit(DIRTY_CNODE, &c->nroot->flags))
104 return 0;
105
106 c->lpt_cnext = first_dirty_cnode(c->nroot);
107 cnode = c->lpt_cnext;
108 if (!cnode)
109 return 0;
110 cnt += 1;
111 while (1) {
112 ubifs_assert(!test_bit(COW_ZNODE, &cnode->flags));
113 __set_bit(COW_ZNODE, &cnode->flags);
114 cnext = next_dirty_cnode(cnode);
115 if (!cnext) {
116 cnode->cnext = c->lpt_cnext;
117 break;
118 }
119 cnode->cnext = cnext;
120 cnode = cnext;
121 cnt += 1;
122 }
123 dbg_cmt("committing %d cnodes", cnt);
124 dbg_lp("committing %d cnodes", cnt);
125 ubifs_assert(cnt == c->dirty_nn_cnt + c->dirty_pn_cnt);
126 return cnt;
127}
128
129/**
130 * upd_ltab - update LPT LEB properties.
131 * @c: UBIFS file-system description object
132 * @lnum: LEB number
133 * @free: amount of free space
134 * @dirty: amount of dirty space to add
135 */
136static void upd_ltab(struct ubifs_info *c, int lnum, int free, int dirty)
137{
138 dbg_lp("LEB %d free %d dirty %d to %d +%d",
139 lnum, c->ltab[lnum - c->lpt_first].free,
140 c->ltab[lnum - c->lpt_first].dirty, free, dirty);
141 ubifs_assert(lnum >= c->lpt_first && lnum <= c->lpt_last);
142 c->ltab[lnum - c->lpt_first].free = free;
143 c->ltab[lnum - c->lpt_first].dirty += dirty;
144}
145
146/**
147 * alloc_lpt_leb - allocate an LPT LEB that is empty.
148 * @c: UBIFS file-system description object
149 * @lnum: LEB number is passed and returned here
150 *
151 * This function finds the next empty LEB in the ltab starting from @lnum. If a
152 * an empty LEB is found it is returned in @lnum and the function returns %0.
153 * Otherwise the function returns -ENOSPC. Note however, that LPT is designed
154 * never to run out of space.
155 */
156static int alloc_lpt_leb(struct ubifs_info *c, int *lnum)
157{
158 int i, n;
159
160 n = *lnum - c->lpt_first + 1;
161 for (i = n; i < c->lpt_lebs; i++) {
162 if (c->ltab[i].tgc || c->ltab[i].cmt)
163 continue;
164 if (c->ltab[i].free == c->leb_size) {
165 c->ltab[i].cmt = 1;
166 *lnum = i + c->lpt_first;
167 return 0;
168 }
169 }
170
171 for (i = 0; i < n; i++) {
172 if (c->ltab[i].tgc || c->ltab[i].cmt)
173 continue;
174 if (c->ltab[i].free == c->leb_size) {
175 c->ltab[i].cmt = 1;
176 *lnum = i + c->lpt_first;
177 return 0;
178 }
179 }
180 dbg_err("last LEB %d", *lnum);
181 dump_stack();
182 return -ENOSPC;
183}
184
185/**
186 * layout_cnodes - layout cnodes for commit.
187 * @c: UBIFS file-system description object
188 *
189 * This function returns %0 on success and a negative error code on failure.
190 */
191static int layout_cnodes(struct ubifs_info *c)
192{
193 int lnum, offs, len, alen, done_lsave, done_ltab, err;
194 struct ubifs_cnode *cnode;
195
196 cnode = c->lpt_cnext;
197 if (!cnode)
198 return 0;
199 lnum = c->nhead_lnum;
200 offs = c->nhead_offs;
201 /* Try to place lsave and ltab nicely */
202 done_lsave = !c->big_lpt;
203 done_ltab = 0;
204 if (!done_lsave && offs + c->lsave_sz <= c->leb_size) {
205 done_lsave = 1;
206 c->lsave_lnum = lnum;
207 c->lsave_offs = offs;
208 offs += c->lsave_sz;
209 }
210
211 if (offs + c->ltab_sz <= c->leb_size) {
212 done_ltab = 1;
213 c->ltab_lnum = lnum;
214 c->ltab_offs = offs;
215 offs += c->ltab_sz;
216 }
217
218 do {
219 if (cnode->level) {
220 len = c->nnode_sz;
221 c->dirty_nn_cnt -= 1;
222 } else {
223 len = c->pnode_sz;
224 c->dirty_pn_cnt -= 1;
225 }
226 while (offs + len > c->leb_size) {
227 alen = ALIGN(offs, c->min_io_size);
228 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
229 err = alloc_lpt_leb(c, &lnum);
230 if (err)
231 return err;
232 offs = 0;
233 ubifs_assert(lnum >= c->lpt_first &&
234 lnum <= c->lpt_last);
235 /* Try to place lsave and ltab nicely */
236 if (!done_lsave) {
237 done_lsave = 1;
238 c->lsave_lnum = lnum;
239 c->lsave_offs = offs;
240 offs += c->lsave_sz;
241 continue;
242 }
243 if (!done_ltab) {
244 done_ltab = 1;
245 c->ltab_lnum = lnum;
246 c->ltab_offs = offs;
247 offs += c->ltab_sz;
248 continue;
249 }
250 break;
251 }
252 if (cnode->parent) {
253 cnode->parent->nbranch[cnode->iip].lnum = lnum;
254 cnode->parent->nbranch[cnode->iip].offs = offs;
255 } else {
256 c->lpt_lnum = lnum;
257 c->lpt_offs = offs;
258 }
259 offs += len;
260 cnode = cnode->cnext;
261 } while (cnode && cnode != c->lpt_cnext);
262
263 /* Make sure to place LPT's save table */
264 if (!done_lsave) {
265 if (offs + c->lsave_sz > c->leb_size) {
266 alen = ALIGN(offs, c->min_io_size);
267 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
268 err = alloc_lpt_leb(c, &lnum);
269 if (err)
270 return err;
271 offs = 0;
272 ubifs_assert(lnum >= c->lpt_first &&
273 lnum <= c->lpt_last);
274 }
275 done_lsave = 1;
276 c->lsave_lnum = lnum;
277 c->lsave_offs = offs;
278 offs += c->lsave_sz;
279 }
280
281 /* Make sure to place LPT's own lprops table */
282 if (!done_ltab) {
283 if (offs + c->ltab_sz > c->leb_size) {
284 alen = ALIGN(offs, c->min_io_size);
285 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
286 err = alloc_lpt_leb(c, &lnum);
287 if (err)
288 return err;
289 offs = 0;
290 ubifs_assert(lnum >= c->lpt_first &&
291 lnum <= c->lpt_last);
292 }
293 done_ltab = 1;
294 c->ltab_lnum = lnum;
295 c->ltab_offs = offs;
296 offs += c->ltab_sz;
297 }
298
299 alen = ALIGN(offs, c->min_io_size);
300 upd_ltab(c, lnum, c->leb_size - alen, alen - offs);
301 return 0;
302}
303
304/**
305 * realloc_lpt_leb - allocate an LPT LEB that is empty.
306 * @c: UBIFS file-system description object
307 * @lnum: LEB number is passed and returned here
308 *
309 * This function duplicates exactly the results of the function alloc_lpt_leb.
310 * It is used during end commit to reallocate the same LEB numbers that were
311 * allocated by alloc_lpt_leb during start commit.
312 *
313 * This function finds the next LEB that was allocated by the alloc_lpt_leb
314 * function starting from @lnum. If a LEB is found it is returned in @lnum and
315 * the function returns %0. Otherwise the function returns -ENOSPC.
316 * Note however, that LPT is designed never to run out of space.
317 */
318static int realloc_lpt_leb(struct ubifs_info *c, int *lnum)
319{
320 int i, n;
321
322 n = *lnum - c->lpt_first + 1;
323 for (i = n; i < c->lpt_lebs; i++)
324 if (c->ltab[i].cmt) {
325 c->ltab[i].cmt = 0;
326 *lnum = i + c->lpt_first;
327 return 0;
328 }
329
330 for (i = 0; i < n; i++)
331 if (c->ltab[i].cmt) {
332 c->ltab[i].cmt = 0;
333 *lnum = i + c->lpt_first;
334 return 0;
335 }
336 dbg_err("last LEB %d", *lnum);
337 dump_stack();
338 return -ENOSPC;
339}
340
341/**
342 * write_cnodes - write cnodes for commit.
343 * @c: UBIFS file-system description object
344 *
345 * This function returns %0 on success and a negative error code on failure.
346 */
347static int write_cnodes(struct ubifs_info *c)
348{
349 int lnum, offs, len, from, err, wlen, alen, done_ltab, done_lsave;
350 struct ubifs_cnode *cnode;
351 void *buf = c->lpt_buf;
352
353 cnode = c->lpt_cnext;
354 if (!cnode)
355 return 0;
356 lnum = c->nhead_lnum;
357 offs = c->nhead_offs;
358 from = offs;
359 /* Ensure empty LEB is unmapped */
360 if (offs == 0) {
361 err = ubifs_leb_unmap(c, lnum);
362 if (err)
363 return err;
364 }
365 /* Try to place lsave and ltab nicely */
366 done_lsave = !c->big_lpt;
367 done_ltab = 0;
368 if (!done_lsave && offs + c->lsave_sz <= c->leb_size) {
369 done_lsave = 1;
370 ubifs_pack_lsave(c, buf + offs, c->lsave);
371 offs += c->lsave_sz;
372 }
373
374 if (offs + c->ltab_sz <= c->leb_size) {
375 done_ltab = 1;
376 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
377 offs += c->ltab_sz;
378 }
379
380 /* Loop for each cnode */
381 do {
382 if (cnode->level)
383 len = c->nnode_sz;
384 else
385 len = c->pnode_sz;
386 while (offs + len > c->leb_size) {
387 wlen = offs - from;
388 if (wlen) {
389 alen = ALIGN(wlen, c->min_io_size);
390 memset(buf + offs, 0xff, alen - wlen);
391 err = ubifs_leb_write(c, lnum, buf + from, from,
392 alen, UBI_SHORTTERM);
393 if (err)
394 return err;
395 }
396 err = realloc_lpt_leb(c, &lnum);
397 if (err)
398 return err;
399 offs = 0;
400 from = 0;
401 ubifs_assert(lnum >= c->lpt_first &&
402 lnum <= c->lpt_last);
403 err = ubifs_leb_unmap(c, lnum);
404 if (err)
405 return err;
406 /* Try to place lsave and ltab nicely */
407 if (!done_lsave) {
408 done_lsave = 1;
409 ubifs_pack_lsave(c, buf + offs, c->lsave);
410 offs += c->lsave_sz;
411 continue;
412 }
413 if (!done_ltab) {
414 done_ltab = 1;
415 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
416 offs += c->ltab_sz;
417 continue;
418 }
419 break;
420 }
421 if (cnode->level)
422 ubifs_pack_nnode(c, buf + offs,
423 (struct ubifs_nnode *)cnode);
424 else
425 ubifs_pack_pnode(c, buf + offs,
426 (struct ubifs_pnode *)cnode);
427 /*
428 * The reason for the barriers is the same as in case of TNC.
429 * See comment in 'write_index()'. 'dirty_cow_nnode()' and
430 * 'dirty_cow_pnode()' are the functions for which this is
431 * important.
432 */
433 clear_bit(DIRTY_CNODE, &cnode->flags);
434 smp_mb__before_clear_bit();
435 clear_bit(COW_ZNODE, &cnode->flags);
436 smp_mb__after_clear_bit();
437 offs += len;
438 cnode = cnode->cnext;
439 } while (cnode && cnode != c->lpt_cnext);
440
441 /* Make sure to place LPT's save table */
442 if (!done_lsave) {
443 if (offs + c->lsave_sz > c->leb_size) {
444 wlen = offs - from;
445 alen = ALIGN(wlen, c->min_io_size);
446 memset(buf + offs, 0xff, alen - wlen);
447 err = ubifs_leb_write(c, lnum, buf + from, from, alen,
448 UBI_SHORTTERM);
449 if (err)
450 return err;
451 err = realloc_lpt_leb(c, &lnum);
452 if (err)
453 return err;
454 offs = 0;
455 ubifs_assert(lnum >= c->lpt_first &&
456 lnum <= c->lpt_last);
457 err = ubifs_leb_unmap(c, lnum);
458 if (err)
459 return err;
460 }
461 done_lsave = 1;
462 ubifs_pack_lsave(c, buf + offs, c->lsave);
463 offs += c->lsave_sz;
464 }
465
466 /* Make sure to place LPT's own lprops table */
467 if (!done_ltab) {
468 if (offs + c->ltab_sz > c->leb_size) {
469 wlen = offs - from;
470 alen = ALIGN(wlen, c->min_io_size);
471 memset(buf + offs, 0xff, alen - wlen);
472 err = ubifs_leb_write(c, lnum, buf + from, from, alen,
473 UBI_SHORTTERM);
474 if (err)
475 return err;
476 err = realloc_lpt_leb(c, &lnum);
477 if (err)
478 return err;
479 offs = 0;
480 ubifs_assert(lnum >= c->lpt_first &&
481 lnum <= c->lpt_last);
482 err = ubifs_leb_unmap(c, lnum);
483 if (err)
484 return err;
485 }
486 done_ltab = 1;
487 ubifs_pack_ltab(c, buf + offs, c->ltab_cmt);
488 offs += c->ltab_sz;
489 }
490
491 /* Write remaining data in buffer */
492 wlen = offs - from;
493 alen = ALIGN(wlen, c->min_io_size);
494 memset(buf + offs, 0xff, alen - wlen);
495 err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM);
496 if (err)
497 return err;
498 c->nhead_lnum = lnum;
499 c->nhead_offs = ALIGN(offs, c->min_io_size);
500
501 dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
502 dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
503 dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
504 if (c->big_lpt)
505 dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs);
506 return 0;
507}
508
509/**
510 * next_pnode - find next pnode.
511 * @c: UBIFS file-system description object
512 * @pnode: pnode
513 *
514 * This function returns the next pnode or %NULL if there are no more pnodes.
515 */
516static struct ubifs_pnode *next_pnode(struct ubifs_info *c,
517 struct ubifs_pnode *pnode)
518{
519 struct ubifs_nnode *nnode;
520 int iip;
521
522 /* Try to go right */
523 nnode = pnode->parent;
524 iip = pnode->iip + 1;
525 if (iip < UBIFS_LPT_FANOUT) {
526 /* We assume here that LEB zero is never an LPT LEB */
527 if (nnode->nbranch[iip].lnum)
528 return ubifs_get_pnode(c, nnode, iip);
529 else
530 return NULL;
531 }
532
533 /* Go up while can't go right */
534 do {
535 iip = nnode->iip + 1;
536 nnode = nnode->parent;
537 if (!nnode)
538 return NULL;
539 /* We assume here that LEB zero is never an LPT LEB */
540 } while (iip >= UBIFS_LPT_FANOUT || !nnode->nbranch[iip].lnum);
541
542 /* Go right */
543 nnode = ubifs_get_nnode(c, nnode, iip);
544 if (IS_ERR(nnode))
545 return (void *)nnode;
546
547 /* Go down to level 1 */
548 while (nnode->level > 1) {
549 nnode = ubifs_get_nnode(c, nnode, 0);
550 if (IS_ERR(nnode))
551 return (void *)nnode;
552 }
553
554 return ubifs_get_pnode(c, nnode, 0);
555}
556
557/**
558 * pnode_lookup - lookup a pnode in the LPT.
559 * @c: UBIFS file-system description object
560 * @i: pnode number (0 to main_lebs - 1)
561 *
562 * This function returns a pointer to the pnode on success or a negative
563 * error code on failure.
564 */
565static struct ubifs_pnode *pnode_lookup(struct ubifs_info *c, int i)
566{
567 int err, h, iip, shft;
568 struct ubifs_nnode *nnode;
569
570 if (!c->nroot) {
571 err = ubifs_read_nnode(c, NULL, 0);
572 if (err)
573 return ERR_PTR(err);
574 }
575 i <<= UBIFS_LPT_FANOUT_SHIFT;
576 nnode = c->nroot;
577 shft = c->lpt_hght * UBIFS_LPT_FANOUT_SHIFT;
578 for (h = 1; h < c->lpt_hght; h++) {
579 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
580 shft -= UBIFS_LPT_FANOUT_SHIFT;
581 nnode = ubifs_get_nnode(c, nnode, iip);
582 if (IS_ERR(nnode))
583 return ERR_PTR(PTR_ERR(nnode));
584 }
585 iip = ((i >> shft) & (UBIFS_LPT_FANOUT - 1));
586 return ubifs_get_pnode(c, nnode, iip);
587}
588
589/**
590 * add_pnode_dirt - add dirty space to LPT LEB properties.
591 * @c: UBIFS file-system description object
592 * @pnode: pnode for which to add dirt
593 */
594static void add_pnode_dirt(struct ubifs_info *c, struct ubifs_pnode *pnode)
595{
596 ubifs_add_lpt_dirt(c, pnode->parent->nbranch[pnode->iip].lnum,
597 c->pnode_sz);
598}
599
600/**
601 * do_make_pnode_dirty - mark a pnode dirty.
602 * @c: UBIFS file-system description object
603 * @pnode: pnode to mark dirty
604 */
605static void do_make_pnode_dirty(struct ubifs_info *c, struct ubifs_pnode *pnode)
606{
607 /* Assumes cnext list is empty i.e. not called during commit */
608 if (!test_and_set_bit(DIRTY_CNODE, &pnode->flags)) {
609 struct ubifs_nnode *nnode;
610
611 c->dirty_pn_cnt += 1;
612 add_pnode_dirt(c, pnode);
613 /* Mark parent and ancestors dirty too */
614 nnode = pnode->parent;
615 while (nnode) {
616 if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
617 c->dirty_nn_cnt += 1;
618 ubifs_add_nnode_dirt(c, nnode);
619 nnode = nnode->parent;
620 } else
621 break;
622 }
623 }
624}
625
626/**
627 * make_tree_dirty - mark the entire LEB properties tree dirty.
628 * @c: UBIFS file-system description object
629 *
630 * This function is used by the "small" LPT model to cause the entire LEB
631 * properties tree to be written. The "small" LPT model does not use LPT
632 * garbage collection because it is more efficient to write the entire tree
633 * (because it is small).
634 *
635 * This function returns %0 on success and a negative error code on failure.
636 */
637static int make_tree_dirty(struct ubifs_info *c)
638{
639 struct ubifs_pnode *pnode;
640
641 pnode = pnode_lookup(c, 0);
642 while (pnode) {
643 do_make_pnode_dirty(c, pnode);
644 pnode = next_pnode(c, pnode);
645 if (IS_ERR(pnode))
646 return PTR_ERR(pnode);
647 }
648 return 0;
649}
650
651/**
652 * need_write_all - determine if the LPT area is running out of free space.
653 * @c: UBIFS file-system description object
654 *
655 * This function returns %1 if the LPT area is running out of free space and %0
656 * if it is not.
657 */
658static int need_write_all(struct ubifs_info *c)
659{
660 long long free = 0;
661 int i;
662
663 for (i = 0; i < c->lpt_lebs; i++) {
664 if (i + c->lpt_first == c->nhead_lnum)
665 free += c->leb_size - c->nhead_offs;
666 else if (c->ltab[i].free == c->leb_size)
667 free += c->leb_size;
668 else if (c->ltab[i].free + c->ltab[i].dirty == c->leb_size)
669 free += c->leb_size;
670 }
671 /* Less than twice the size left */
672 if (free <= c->lpt_sz * 2)
673 return 1;
674 return 0;
675}
676
677/**
678 * lpt_tgc_start - start trivial garbage collection of LPT LEBs.
679 * @c: UBIFS file-system description object
680 *
681 * LPT trivial garbage collection is where a LPT LEB contains only dirty and
682 * free space and so may be reused as soon as the next commit is completed.
683 * This function is called during start commit to mark LPT LEBs for trivial GC.
684 */
685static void lpt_tgc_start(struct ubifs_info *c)
686{
687 int i;
688
689 for (i = 0; i < c->lpt_lebs; i++) {
690 if (i + c->lpt_first == c->nhead_lnum)
691 continue;
692 if (c->ltab[i].dirty > 0 &&
693 c->ltab[i].free + c->ltab[i].dirty == c->leb_size) {
694 c->ltab[i].tgc = 1;
695 c->ltab[i].free = c->leb_size;
696 c->ltab[i].dirty = 0;
697 dbg_lp("LEB %d", i + c->lpt_first);
698 }
699 }
700}
701
702/**
703 * lpt_tgc_end - end trivial garbage collection of LPT LEBs.
704 * @c: UBIFS file-system description object
705 *
706 * LPT trivial garbage collection is where a LPT LEB contains only dirty and
707 * free space and so may be reused as soon as the next commit is completed.
708 * This function is called after the commit is completed (master node has been
709 * written) and unmaps LPT LEBs that were marked for trivial GC.
710 */
711static int lpt_tgc_end(struct ubifs_info *c)
712{
713 int i, err;
714
715 for (i = 0; i < c->lpt_lebs; i++)
716 if (c->ltab[i].tgc) {
717 err = ubifs_leb_unmap(c, i + c->lpt_first);
718 if (err)
719 return err;
720 c->ltab[i].tgc = 0;
721 dbg_lp("LEB %d", i + c->lpt_first);
722 }
723 return 0;
724}
725
726/**
727 * populate_lsave - fill the lsave array with important LEB numbers.
728 * @c: the UBIFS file-system description object
729 *
730 * This function is only called for the "big" model. It records a small number
731 * of LEB numbers of important LEBs. Important LEBs are ones that are (from
732 * most important to least important): empty, freeable, freeable index, dirty
733 * index, dirty or free. Upon mount, we read this list of LEB numbers and bring
734 * their pnodes into memory. That will stop us from having to scan the LPT
735 * straight away. For the "small" model we assume that scanning the LPT is no
736 * big deal.
737 */
738static void populate_lsave(struct ubifs_info *c)
739{
740 struct ubifs_lprops *lprops;
741 struct ubifs_lpt_heap *heap;
742 int i, cnt = 0;
743
744 ubifs_assert(c->big_lpt);
745 if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) {
746 c->lpt_drty_flgs |= LSAVE_DIRTY;
747 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
748 }
749 list_for_each_entry(lprops, &c->empty_list, list) {
750 c->lsave[cnt++] = lprops->lnum;
751 if (cnt >= c->lsave_cnt)
752 return;
753 }
754 list_for_each_entry(lprops, &c->freeable_list, list) {
755 c->lsave[cnt++] = lprops->lnum;
756 if (cnt >= c->lsave_cnt)
757 return;
758 }
759 list_for_each_entry(lprops, &c->frdi_idx_list, list) {
760 c->lsave[cnt++] = lprops->lnum;
761 if (cnt >= c->lsave_cnt)
762 return;
763 }
764 heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1];
765 for (i = 0; i < heap->cnt; i++) {
766 c->lsave[cnt++] = heap->arr[i]->lnum;
767 if (cnt >= c->lsave_cnt)
768 return;
769 }
770 heap = &c->lpt_heap[LPROPS_DIRTY - 1];
771 for (i = 0; i < heap->cnt; i++) {
772 c->lsave[cnt++] = heap->arr[i]->lnum;
773 if (cnt >= c->lsave_cnt)
774 return;
775 }
776 heap = &c->lpt_heap[LPROPS_FREE - 1];
777 for (i = 0; i < heap->cnt; i++) {
778 c->lsave[cnt++] = heap->arr[i]->lnum;
779 if (cnt >= c->lsave_cnt)
780 return;
781 }
782 /* Fill it up completely */
783 while (cnt < c->lsave_cnt)
784 c->lsave[cnt++] = c->main_first;
785}
786
787/**
788 * nnode_lookup - lookup a nnode in the LPT.
789 * @c: UBIFS file-system description object
790 * @i: nnode number
791 *
792 * This function returns a pointer to the nnode on success or a negative
793 * error code on failure.
794 */
795static struct ubifs_nnode *nnode_lookup(struct ubifs_info *c, int i)
796{
797 int err, iip;
798 struct ubifs_nnode *nnode;
799
800 if (!c->nroot) {
801 err = ubifs_read_nnode(c, NULL, 0);
802 if (err)
803 return ERR_PTR(err);
804 }
805 nnode = c->nroot;
806 while (1) {
807 iip = i & (UBIFS_LPT_FANOUT - 1);
808 i >>= UBIFS_LPT_FANOUT_SHIFT;
809 if (!i)
810 break;
811 nnode = ubifs_get_nnode(c, nnode, iip);
812 if (IS_ERR(nnode))
813 return nnode;
814 }
815 return nnode;
816}
817
818/**
819 * make_nnode_dirty - find a nnode and, if found, make it dirty.
820 * @c: UBIFS file-system description object
821 * @node_num: nnode number of nnode to make dirty
822 * @lnum: LEB number where nnode was written
823 * @offs: offset where nnode was written
824 *
825 * This function is used by LPT garbage collection. LPT garbage collection is
826 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
827 * simply involves marking all the nodes in the LEB being garbage-collected as
828 * dirty. The dirty nodes are written next commit, after which the LEB is free
829 * to be reused.
830 *
831 * This function returns %0 on success and a negative error code on failure.
832 */
833static int make_nnode_dirty(struct ubifs_info *c, int node_num, int lnum,
834 int offs)
835{
836 struct ubifs_nnode *nnode;
837
838 nnode = nnode_lookup(c, node_num);
839 if (IS_ERR(nnode))
840 return PTR_ERR(nnode);
841 if (nnode->parent) {
842 struct ubifs_nbranch *branch;
843
844 branch = &nnode->parent->nbranch[nnode->iip];
845 if (branch->lnum != lnum || branch->offs != offs)
846 return 0; /* nnode is obsolete */
847 } else if (c->lpt_lnum != lnum || c->lpt_offs != offs)
848 return 0; /* nnode is obsolete */
849 /* Assumes cnext list is empty i.e. not called during commit */
850 if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
851 c->dirty_nn_cnt += 1;
852 ubifs_add_nnode_dirt(c, nnode);
853 /* Mark parent and ancestors dirty too */
854 nnode = nnode->parent;
855 while (nnode) {
856 if (!test_and_set_bit(DIRTY_CNODE, &nnode->flags)) {
857 c->dirty_nn_cnt += 1;
858 ubifs_add_nnode_dirt(c, nnode);
859 nnode = nnode->parent;
860 } else
861 break;
862 }
863 }
864 return 0;
865}
866
867/**
868 * make_pnode_dirty - find a pnode and, if found, make it dirty.
869 * @c: UBIFS file-system description object
870 * @node_num: pnode number of pnode to make dirty
871 * @lnum: LEB number where pnode was written
872 * @offs: offset where pnode was written
873 *
874 * This function is used by LPT garbage collection. LPT garbage collection is
875 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
876 * simply involves marking all the nodes in the LEB being garbage-collected as
877 * dirty. The dirty nodes are written next commit, after which the LEB is free
878 * to be reused.
879 *
880 * This function returns %0 on success and a negative error code on failure.
881 */
882static int make_pnode_dirty(struct ubifs_info *c, int node_num, int lnum,
883 int offs)
884{
885 struct ubifs_pnode *pnode;
886 struct ubifs_nbranch *branch;
887
888 pnode = pnode_lookup(c, node_num);
889 if (IS_ERR(pnode))
890 return PTR_ERR(pnode);
891 branch = &pnode->parent->nbranch[pnode->iip];
892 if (branch->lnum != lnum || branch->offs != offs)
893 return 0;
894 do_make_pnode_dirty(c, pnode);
895 return 0;
896}
897
898/**
899 * make_ltab_dirty - make ltab node dirty.
900 * @c: UBIFS file-system description object
901 * @lnum: LEB number where ltab was written
902 * @offs: offset where ltab was written
903 *
904 * This function is used by LPT garbage collection. LPT garbage collection is
905 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
906 * simply involves marking all the nodes in the LEB being garbage-collected as
907 * dirty. The dirty nodes are written next commit, after which the LEB is free
908 * to be reused.
909 *
910 * This function returns %0 on success and a negative error code on failure.
911 */
912static int make_ltab_dirty(struct ubifs_info *c, int lnum, int offs)
913{
914 if (lnum != c->ltab_lnum || offs != c->ltab_offs)
915 return 0; /* This ltab node is obsolete */
916 if (!(c->lpt_drty_flgs & LTAB_DIRTY)) {
917 c->lpt_drty_flgs |= LTAB_DIRTY;
918 ubifs_add_lpt_dirt(c, c->ltab_lnum, c->ltab_sz);
919 }
920 return 0;
921}
922
923/**
924 * make_lsave_dirty - make lsave node dirty.
925 * @c: UBIFS file-system description object
926 * @lnum: LEB number where lsave was written
927 * @offs: offset where lsave was written
928 *
929 * This function is used by LPT garbage collection. LPT garbage collection is
930 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
931 * simply involves marking all the nodes in the LEB being garbage-collected as
932 * dirty. The dirty nodes are written next commit, after which the LEB is free
933 * to be reused.
934 *
935 * This function returns %0 on success and a negative error code on failure.
936 */
937static int make_lsave_dirty(struct ubifs_info *c, int lnum, int offs)
938{
939 if (lnum != c->lsave_lnum || offs != c->lsave_offs)
940 return 0; /* This lsave node is obsolete */
941 if (!(c->lpt_drty_flgs & LSAVE_DIRTY)) {
942 c->lpt_drty_flgs |= LSAVE_DIRTY;
943 ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz);
944 }
945 return 0;
946}
947
948/**
949 * make_node_dirty - make node dirty.
950 * @c: UBIFS file-system description object
951 * @node_type: LPT node type
952 * @node_num: node number
953 * @lnum: LEB number where node was written
954 * @offs: offset where node was written
955 *
956 * This function is used by LPT garbage collection. LPT garbage collection is
957 * used only for the "big" LPT model (c->big_lpt == 1). Garbage collection
958 * simply involves marking all the nodes in the LEB being garbage-collected as
959 * dirty. The dirty nodes are written next commit, after which the LEB is free
960 * to be reused.
961 *
962 * This function returns %0 on success and a negative error code on failure.
963 */
964static int make_node_dirty(struct ubifs_info *c, int node_type, int node_num,
965 int lnum, int offs)
966{
967 switch (node_type) {
968 case UBIFS_LPT_NNODE:
969 return make_nnode_dirty(c, node_num, lnum, offs);
970 case UBIFS_LPT_PNODE:
971 return make_pnode_dirty(c, node_num, lnum, offs);
972 case UBIFS_LPT_LTAB:
973 return make_ltab_dirty(c, lnum, offs);
974 case UBIFS_LPT_LSAVE:
975 return make_lsave_dirty(c, lnum, offs);
976 }
977 return -EINVAL;
978}
979
980/**
981 * get_lpt_node_len - return the length of a node based on its type.
982 * @c: UBIFS file-system description object
983 * @node_type: LPT node type
984 */
985static int get_lpt_node_len(struct ubifs_info *c, int node_type)
986{
987 switch (node_type) {
988 case UBIFS_LPT_NNODE:
989 return c->nnode_sz;
990 case UBIFS_LPT_PNODE:
991 return c->pnode_sz;
992 case UBIFS_LPT_LTAB:
993 return c->ltab_sz;
994 case UBIFS_LPT_LSAVE:
995 return c->lsave_sz;
996 }
997 return 0;
998}
999
1000/**
1001 * get_pad_len - return the length of padding in a buffer.
1002 * @c: UBIFS file-system description object
1003 * @buf: buffer
1004 * @len: length of buffer
1005 */
1006static int get_pad_len(struct ubifs_info *c, uint8_t *buf, int len)
1007{
1008 int offs, pad_len;
1009
1010 if (c->min_io_size == 1)
1011 return 0;
1012 offs = c->leb_size - len;
1013 pad_len = ALIGN(offs, c->min_io_size) - offs;
1014 return pad_len;
1015}
1016
1017/**
1018 * get_lpt_node_type - return type (and node number) of a node in a buffer.
1019 * @c: UBIFS file-system description object
1020 * @buf: buffer
1021 * @node_num: node number is returned here
1022 */
1023static int get_lpt_node_type(struct ubifs_info *c, uint8_t *buf, int *node_num)
1024{
1025 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1026 int pos = 0, node_type;
1027
1028 node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
1029 *node_num = ubifs_unpack_bits(&addr, &pos, c->pcnt_bits);
1030 return node_type;
1031}
1032
1033/**
1034 * is_a_node - determine if a buffer contains a node.
1035 * @c: UBIFS file-system description object
1036 * @buf: buffer
1037 * @len: length of buffer
1038 *
1039 * This function returns %1 if the buffer contains a node or %0 if it does not.
1040 */
1041static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len)
1042{
1043 uint8_t *addr = buf + UBIFS_LPT_CRC_BYTES;
1044 int pos = 0, node_type, node_len;
1045 uint16_t crc, calc_crc;
1046
1047 node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS);
1048 if (node_type == UBIFS_LPT_NOT_A_NODE)
1049 return 0;
1050 node_len = get_lpt_node_len(c, node_type);
1051 if (!node_len || node_len > len)
1052 return 0;
1053 pos = 0;
1054 addr = buf;
1055 crc = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_CRC_BITS);
1056 calc_crc = crc16(-1, buf + UBIFS_LPT_CRC_BYTES,
1057 node_len - UBIFS_LPT_CRC_BYTES);
1058 if (crc != calc_crc)
1059 return 0;
1060 return 1;
1061}
1062
1063
1064/**
1065 * lpt_gc_lnum - garbage collect a LPT LEB.
1066 * @c: UBIFS file-system description object
1067 * @lnum: LEB number to garbage collect
1068 *
1069 * LPT garbage collection is used only for the "big" LPT model
1070 * (c->big_lpt == 1). Garbage collection simply involves marking all the nodes
1071 * in the LEB being garbage-collected as dirty. The dirty nodes are written
1072 * next commit, after which the LEB is free to be reused.
1073 *
1074 * This function returns %0 on success and a negative error code on failure.
1075 */
1076static int lpt_gc_lnum(struct ubifs_info *c, int lnum)
1077{
1078 int err, len = c->leb_size, node_type, node_num, node_len, offs;
1079 void *buf = c->lpt_buf;
1080
1081 dbg_lp("LEB %d", lnum);
1082 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1083 if (err) {
1084 ubifs_err("cannot read LEB %d, error %d", lnum, err);
1085 return err;
1086 }
1087 while (1) {
1088 if (!is_a_node(c, buf, len)) {
1089 int pad_len;
1090
1091 pad_len = get_pad_len(c, buf, len);
1092 if (pad_len) {
1093 buf += pad_len;
1094 len -= pad_len;
1095 continue;
1096 }
1097 return 0;
1098 }
1099 node_type = get_lpt_node_type(c, buf, &node_num);
1100 node_len = get_lpt_node_len(c, node_type);
1101 offs = c->leb_size - len;
1102 ubifs_assert(node_len != 0);
1103 mutex_lock(&c->lp_mutex);
1104 err = make_node_dirty(c, node_type, node_num, lnum, offs);
1105 mutex_unlock(&c->lp_mutex);
1106 if (err)
1107 return err;
1108 buf += node_len;
1109 len -= node_len;
1110 }
1111 return 0;
1112}
1113
1114/**
1115 * lpt_gc - LPT garbage collection.
1116 * @c: UBIFS file-system description object
1117 *
1118 * Select a LPT LEB for LPT garbage collection and call 'lpt_gc_lnum()'.
1119 * Returns %0 on success and a negative error code on failure.
1120 */
1121static int lpt_gc(struct ubifs_info *c)
1122{
1123 int i, lnum = -1, dirty = 0;
1124
1125 mutex_lock(&c->lp_mutex);
1126 for (i = 0; i < c->lpt_lebs; i++) {
1127 ubifs_assert(!c->ltab[i].tgc);
1128 if (i + c->lpt_first == c->nhead_lnum ||
1129 c->ltab[i].free + c->ltab[i].dirty == c->leb_size)
1130 continue;
1131 if (c->ltab[i].dirty > dirty) {
1132 dirty = c->ltab[i].dirty;
1133 lnum = i + c->lpt_first;
1134 }
1135 }
1136 mutex_unlock(&c->lp_mutex);
1137 if (lnum == -1)
1138 return -ENOSPC;
1139 return lpt_gc_lnum(c, lnum);
1140}
1141
1142/**
1143 * ubifs_lpt_start_commit - UBIFS commit starts.
1144 * @c: the UBIFS file-system description object
1145 *
1146 * This function has to be called when UBIFS starts the commit operation.
1147 * This function "freezes" all currently dirty LEB properties and does not
1148 * change them anymore. Further changes are saved and tracked separately
1149 * because they are not part of this commit. This function returns zero in case
1150 * of success and a negative error code in case of failure.
1151 */
1152int ubifs_lpt_start_commit(struct ubifs_info *c)
1153{
1154 int err, cnt;
1155
1156 dbg_lp("");
1157
1158 mutex_lock(&c->lp_mutex);
1159 err = dbg_check_ltab(c);
1160 if (err)
1161 goto out;
1162
1163 if (c->check_lpt_free) {
1164 /*
1165 * We ensure there is enough free space in
1166 * ubifs_lpt_post_commit() by marking nodes dirty. That
1167 * information is lost when we unmount, so we also need
1168 * to check free space once after mounting also.
1169 */
1170 c->check_lpt_free = 0;
1171 while (need_write_all(c)) {
1172 mutex_unlock(&c->lp_mutex);
1173 err = lpt_gc(c);
1174 if (err)
1175 return err;
1176 mutex_lock(&c->lp_mutex);
1177 }
1178 }
1179
1180 lpt_tgc_start(c);
1181
1182 if (!c->dirty_pn_cnt) {
1183 dbg_cmt("no cnodes to commit");
1184 err = 0;
1185 goto out;
1186 }
1187
1188 if (!c->big_lpt && need_write_all(c)) {
1189 /* If needed, write everything */
1190 err = make_tree_dirty(c);
1191 if (err)
1192 goto out;
1193 lpt_tgc_start(c);
1194 }
1195
1196 if (c->big_lpt)
1197 populate_lsave(c);
1198
1199 cnt = get_cnodes_to_commit(c);
1200 ubifs_assert(cnt != 0);
1201
1202 err = layout_cnodes(c);
1203 if (err)
1204 goto out;
1205
1206 /* Copy the LPT's own lprops for end commit to write */
1207 memcpy(c->ltab_cmt, c->ltab,
1208 sizeof(struct ubifs_lpt_lprops) * c->lpt_lebs);
1209 c->lpt_drty_flgs &= ~(LTAB_DIRTY | LSAVE_DIRTY);
1210
1211out:
1212 mutex_unlock(&c->lp_mutex);
1213 return err;
1214}
1215
1216/**
1217 * free_obsolete_cnodes - free obsolete cnodes for commit end.
1218 * @c: UBIFS file-system description object
1219 */
1220static void free_obsolete_cnodes(struct ubifs_info *c)
1221{
1222 struct ubifs_cnode *cnode, *cnext;
1223
1224 cnext = c->lpt_cnext;
1225 if (!cnext)
1226 return;
1227 do {
1228 cnode = cnext;
1229 cnext = cnode->cnext;
1230 if (test_bit(OBSOLETE_CNODE, &cnode->flags))
1231 kfree(cnode);
1232 else
1233 cnode->cnext = NULL;
1234 } while (cnext != c->lpt_cnext);
1235 c->lpt_cnext = NULL;
1236}
1237
1238/**
1239 * ubifs_lpt_end_commit - finish the commit operation.
1240 * @c: the UBIFS file-system description object
1241 *
1242 * This function has to be called when the commit operation finishes. It
1243 * flushes the changes which were "frozen" by 'ubifs_lprops_start_commit()' to
1244 * the media. Returns zero in case of success and a negative error code in case
1245 * of failure.
1246 */
1247int ubifs_lpt_end_commit(struct ubifs_info *c)
1248{
1249 int err;
1250
1251 dbg_lp("");
1252
1253 if (!c->lpt_cnext)
1254 return 0;
1255
1256 err = write_cnodes(c);
1257 if (err)
1258 return err;
1259
1260 mutex_lock(&c->lp_mutex);
1261 free_obsolete_cnodes(c);
1262 mutex_unlock(&c->lp_mutex);
1263
1264 return 0;
1265}
1266
1267/**
1268 * ubifs_lpt_post_commit - post commit LPT trivial GC and LPT GC.
1269 * @c: UBIFS file-system description object
1270 *
1271 * LPT trivial GC is completed after a commit. Also LPT GC is done after a
1272 * commit for the "big" LPT model.
1273 */
1274int ubifs_lpt_post_commit(struct ubifs_info *c)
1275{
1276 int err;
1277
1278 mutex_lock(&c->lp_mutex);
1279 err = lpt_tgc_end(c);
1280 if (err)
1281 goto out;
1282 if (c->big_lpt)
1283 while (need_write_all(c)) {
1284 mutex_unlock(&c->lp_mutex);
1285 err = lpt_gc(c);
1286 if (err)
1287 return err;
1288 mutex_lock(&c->lp_mutex);
1289 }
1290out:
1291 mutex_unlock(&c->lp_mutex);
1292 return err;
1293}
1294
1295/**
1296 * first_nnode - find the first nnode in memory.
1297 * @c: UBIFS file-system description object
1298 * @hght: height of tree where nnode found is returned here
1299 *
1300 * This function returns a pointer to the nnode found or %NULL if no nnode is
1301 * found. This function is a helper to 'ubifs_lpt_free()'.
1302 */
1303static struct ubifs_nnode *first_nnode(struct ubifs_info *c, int *hght)
1304{
1305 struct ubifs_nnode *nnode;
1306 int h, i, found;
1307
1308 nnode = c->nroot;
1309 *hght = 0;
1310 if (!nnode)
1311 return NULL;
1312 for (h = 1; h < c->lpt_hght; h++) {
1313 found = 0;
1314 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1315 if (nnode->nbranch[i].nnode) {
1316 found = 1;
1317 nnode = nnode->nbranch[i].nnode;
1318 *hght = h;
1319 break;
1320 }
1321 }
1322 if (!found)
1323 break;
1324 }
1325 return nnode;
1326}
1327
1328/**
1329 * next_nnode - find the next nnode in memory.
1330 * @c: UBIFS file-system description object
1331 * @nnode: nnode from which to start.
1332 * @hght: height of tree where nnode is, is passed and returned here
1333 *
1334 * This function returns a pointer to the nnode found or %NULL if no nnode is
1335 * found. This function is a helper to 'ubifs_lpt_free()'.
1336 */
1337static struct ubifs_nnode *next_nnode(struct ubifs_info *c,
1338 struct ubifs_nnode *nnode, int *hght)
1339{
1340 struct ubifs_nnode *parent;
1341 int iip, h, i, found;
1342
1343 parent = nnode->parent;
1344 if (!parent)
1345 return NULL;
1346 if (nnode->iip == UBIFS_LPT_FANOUT - 1) {
1347 *hght -= 1;
1348 return parent;
1349 }
1350 for (iip = nnode->iip + 1; iip < UBIFS_LPT_FANOUT; iip++) {
1351 nnode = parent->nbranch[iip].nnode;
1352 if (nnode)
1353 break;
1354 }
1355 if (!nnode) {
1356 *hght -= 1;
1357 return parent;
1358 }
1359 for (h = *hght + 1; h < c->lpt_hght; h++) {
1360 found = 0;
1361 for (i = 0; i < UBIFS_LPT_FANOUT; i++) {
1362 if (nnode->nbranch[i].nnode) {
1363 found = 1;
1364 nnode = nnode->nbranch[i].nnode;
1365 *hght = h;
1366 break;
1367 }
1368 }
1369 if (!found)
1370 break;
1371 }
1372 return nnode;
1373}
1374
1375/**
1376 * ubifs_lpt_free - free resources owned by the LPT.
1377 * @c: UBIFS file-system description object
1378 * @wr_only: free only resources used for writing
1379 */
1380void ubifs_lpt_free(struct ubifs_info *c, int wr_only)
1381{
1382 struct ubifs_nnode *nnode;
1383 int i, hght;
1384
1385 /* Free write-only things first */
1386
1387 free_obsolete_cnodes(c); /* Leftover from a failed commit */
1388
1389 vfree(c->ltab_cmt);
1390 c->ltab_cmt = NULL;
1391 vfree(c->lpt_buf);
1392 c->lpt_buf = NULL;
1393 kfree(c->lsave);
1394 c->lsave = NULL;
1395
1396 if (wr_only)
1397 return;
1398
1399 /* Now free the rest */
1400
1401 nnode = first_nnode(c, &hght);
1402 while (nnode) {
1403 for (i = 0; i < UBIFS_LPT_FANOUT; i++)
1404 kfree(nnode->nbranch[i].nnode);
1405 nnode = next_nnode(c, nnode, &hght);
1406 }
1407 for (i = 0; i < LPROPS_HEAP_CNT; i++)
1408 kfree(c->lpt_heap[i].arr);
1409 kfree(c->dirty_idx.arr);
1410 kfree(c->nroot);
1411 vfree(c->ltab);
1412 kfree(c->lpt_nod_buf);
1413}
1414
1415#ifdef CONFIG_UBIFS_FS_DEBUG
1416
1417/**
1418 * dbg_is_all_ff - determine if a buffer contains only 0xff bytes.
1419 * @buf: buffer
1420 * @len: buffer length
1421 */
1422static int dbg_is_all_ff(uint8_t *buf, int len)
1423{
1424 int i;
1425
1426 for (i = 0; i < len; i++)
1427 if (buf[i] != 0xff)
1428 return 0;
1429 return 1;
1430}
1431
1432/**
1433 * dbg_is_nnode_dirty - determine if a nnode is dirty.
1434 * @c: the UBIFS file-system description object
1435 * @lnum: LEB number where nnode was written
1436 * @offs: offset where nnode was written
1437 */
1438static int dbg_is_nnode_dirty(struct ubifs_info *c, int lnum, int offs)
1439{
1440 struct ubifs_nnode *nnode;
1441 int hght;
1442
1443 /* Entire tree is in memory so first_nnode / next_nnode are ok */
1444 nnode = first_nnode(c, &hght);
1445 for (; nnode; nnode = next_nnode(c, nnode, &hght)) {
1446 struct ubifs_nbranch *branch;
1447
1448 cond_resched();
1449 if (nnode->parent) {
1450 branch = &nnode->parent->nbranch[nnode->iip];
1451 if (branch->lnum != lnum || branch->offs != offs)
1452 continue;
1453 if (test_bit(DIRTY_CNODE, &nnode->flags))
1454 return 1;
1455 return 0;
1456 } else {
1457 if (c->lpt_lnum != lnum || c->lpt_offs != offs)
1458 continue;
1459 if (test_bit(DIRTY_CNODE, &nnode->flags))
1460 return 1;
1461 return 0;
1462 }
1463 }
1464 return 1;
1465}
1466
1467/**
1468 * dbg_is_pnode_dirty - determine if a pnode is dirty.
1469 * @c: the UBIFS file-system description object
1470 * @lnum: LEB number where pnode was written
1471 * @offs: offset where pnode was written
1472 */
1473static int dbg_is_pnode_dirty(struct ubifs_info *c, int lnum, int offs)
1474{
1475 int i, cnt;
1476
1477 cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
1478 for (i = 0; i < cnt; i++) {
1479 struct ubifs_pnode *pnode;
1480 struct ubifs_nbranch *branch;
1481
1482 cond_resched();
1483 pnode = pnode_lookup(c, i);
1484 if (IS_ERR(pnode))
1485 return PTR_ERR(pnode);
1486 branch = &pnode->parent->nbranch[pnode->iip];
1487 if (branch->lnum != lnum || branch->offs != offs)
1488 continue;
1489 if (test_bit(DIRTY_CNODE, &pnode->flags))
1490 return 1;
1491 return 0;
1492 }
1493 return 1;
1494}
1495
1496/**
1497 * dbg_is_ltab_dirty - determine if a ltab node is dirty.
1498 * @c: the UBIFS file-system description object
1499 * @lnum: LEB number where ltab node was written
1500 * @offs: offset where ltab node was written
1501 */
1502static int dbg_is_ltab_dirty(struct ubifs_info *c, int lnum, int offs)
1503{
1504 if (lnum != c->ltab_lnum || offs != c->ltab_offs)
1505 return 1;
1506 return (c->lpt_drty_flgs & LTAB_DIRTY) != 0;
1507}
1508
1509/**
1510 * dbg_is_lsave_dirty - determine if a lsave node is dirty.
1511 * @c: the UBIFS file-system description object
1512 * @lnum: LEB number where lsave node was written
1513 * @offs: offset where lsave node was written
1514 */
1515static int dbg_is_lsave_dirty(struct ubifs_info *c, int lnum, int offs)
1516{
1517 if (lnum != c->lsave_lnum || offs != c->lsave_offs)
1518 return 1;
1519 return (c->lpt_drty_flgs & LSAVE_DIRTY) != 0;
1520}
1521
1522/**
1523 * dbg_is_node_dirty - determine if a node is dirty.
1524 * @c: the UBIFS file-system description object
1525 * @node_type: node type
1526 * @lnum: LEB number where node was written
1527 * @offs: offset where node was written
1528 */
1529static int dbg_is_node_dirty(struct ubifs_info *c, int node_type, int lnum,
1530 int offs)
1531{
1532 switch (node_type) {
1533 case UBIFS_LPT_NNODE:
1534 return dbg_is_nnode_dirty(c, lnum, offs);
1535 case UBIFS_LPT_PNODE:
1536 return dbg_is_pnode_dirty(c, lnum, offs);
1537 case UBIFS_LPT_LTAB:
1538 return dbg_is_ltab_dirty(c, lnum, offs);
1539 case UBIFS_LPT_LSAVE:
1540 return dbg_is_lsave_dirty(c, lnum, offs);
1541 }
1542 return 1;
1543}
1544
1545/**
1546 * dbg_check_ltab_lnum - check the ltab for a LPT LEB number.
1547 * @c: the UBIFS file-system description object
1548 * @lnum: LEB number where node was written
1549 * @offs: offset where node was written
1550 *
1551 * This function returns %0 on success and a negative error code on failure.
1552 */
1553static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum)
1554{
1555 int err, len = c->leb_size, dirty = 0, node_type, node_num, node_len;
1556 int ret;
1557 void *buf = c->dbg_buf;
1558
1559 dbg_lp("LEB %d", lnum);
1560 err = ubi_read(c->ubi, lnum, buf, 0, c->leb_size);
1561 if (err) {
1562 dbg_msg("ubi_read failed, LEB %d, error %d", lnum, err);
1563 return err;
1564 }
1565 while (1) {
1566 if (!is_a_node(c, buf, len)) {
1567 int i, pad_len;
1568
1569 pad_len = get_pad_len(c, buf, len);
1570 if (pad_len) {
1571 buf += pad_len;
1572 len -= pad_len;
1573 dirty += pad_len;
1574 continue;
1575 }
1576 if (!dbg_is_all_ff(buf, len)) {
1577 dbg_msg("invalid empty space in LEB %d at %d",
1578 lnum, c->leb_size - len);
1579 err = -EINVAL;
1580 }
1581 i = lnum - c->lpt_first;
1582 if (len != c->ltab[i].free) {
1583 dbg_msg("invalid free space in LEB %d "
1584 "(free %d, expected %d)",
1585 lnum, len, c->ltab[i].free);
1586 err = -EINVAL;
1587 }
1588 if (dirty != c->ltab[i].dirty) {
1589 dbg_msg("invalid dirty space in LEB %d "
1590 "(dirty %d, expected %d)",
1591 lnum, dirty, c->ltab[i].dirty);
1592 err = -EINVAL;
1593 }
1594 return err;
1595 }
1596 node_type = get_lpt_node_type(c, buf, &node_num);
1597 node_len = get_lpt_node_len(c, node_type);
1598 ret = dbg_is_node_dirty(c, node_type, lnum, c->leb_size - len);
1599 if (ret == 1)
1600 dirty += node_len;
1601 buf += node_len;
1602 len -= node_len;
1603 }
1604}
1605
1606/**
1607 * dbg_check_ltab - check the free and dirty space in the ltab.
1608 * @c: the UBIFS file-system description object
1609 *
1610 * This function returns %0 on success and a negative error code on failure.
1611 */
1612int dbg_check_ltab(struct ubifs_info *c)
1613{
1614 int lnum, err, i, cnt;
1615
1616 if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS))
1617 return 0;
1618
1619 /* Bring the entire tree into memory */
1620 cnt = DIV_ROUND_UP(c->main_lebs, UBIFS_LPT_FANOUT);
1621 for (i = 0; i < cnt; i++) {
1622 struct ubifs_pnode *pnode;
1623
1624 pnode = pnode_lookup(c, i);
1625 if (IS_ERR(pnode))
1626 return PTR_ERR(pnode);
1627 cond_resched();
1628 }
1629
1630 /* Check nodes */
1631 err = dbg_check_lpt_nodes(c, (struct ubifs_cnode *)c->nroot, 0, 0);
1632 if (err)
1633 return err;
1634
1635 /* Check each LEB */
1636 for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) {
1637 err = dbg_check_ltab_lnum(c, lnum);
1638 if (err) {
1639 dbg_err("failed at LEB %d", lnum);
1640 return err;
1641 }
1642 }
1643
1644 dbg_lp("succeeded");
1645 return 0;
1646}
1647
1648#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
new file mode 100644
index 00000000000..71d5493bf56
--- /dev/null
+++ b/fs/ubifs/master.c
@@ -0,0 +1,387 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/* This file implements reading and writing the master node */
24
25#include "ubifs.h"
26
27/**
28 * scan_for_master - search the valid master node.
29 * @c: UBIFS file-system description object
30 *
31 * This function scans the master node LEBs and search for the latest master
32 * node. Returns zero in case of success and a negative error code in case of
33 * failure.
34 */
35static int scan_for_master(struct ubifs_info *c)
36{
37 struct ubifs_scan_leb *sleb;
38 struct ubifs_scan_node *snod;
39 int lnum, offs = 0, nodes_cnt;
40
41 lnum = UBIFS_MST_LNUM;
42
43 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
44 if (IS_ERR(sleb))
45 return PTR_ERR(sleb);
46 nodes_cnt = sleb->nodes_cnt;
47 if (nodes_cnt > 0) {
48 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
49 list);
50 if (snod->type != UBIFS_MST_NODE)
51 goto out;
52 memcpy(c->mst_node, snod->node, snod->len);
53 offs = snod->offs;
54 }
55 ubifs_scan_destroy(sleb);
56
57 lnum += 1;
58
59 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
60 if (IS_ERR(sleb))
61 return PTR_ERR(sleb);
62 if (sleb->nodes_cnt != nodes_cnt)
63 goto out;
64 if (!sleb->nodes_cnt)
65 goto out;
66 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node, list);
67 if (snod->type != UBIFS_MST_NODE)
68 goto out;
69 if (snod->offs != offs)
70 goto out;
71 if (memcmp((void *)c->mst_node + UBIFS_CH_SZ,
72 (void *)snod->node + UBIFS_CH_SZ,
73 UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
74 goto out;
75 c->mst_offs = offs;
76 ubifs_scan_destroy(sleb);
77 return 0;
78
79out:
80 ubifs_scan_destroy(sleb);
81 return -EINVAL;
82}
83
84/**
85 * validate_master - validate master node.
86 * @c: UBIFS file-system description object
87 *
88 * This function validates data which was read from master node. Returns zero
89 * if the data is all right and %-EINVAL if not.
90 */
91static int validate_master(const struct ubifs_info *c)
92{
93 long long main_sz;
94 int err;
95
96 if (c->max_sqnum >= SQNUM_WATERMARK) {
97 err = 1;
98 goto out;
99 }
100
101 if (c->cmt_no >= c->max_sqnum) {
102 err = 2;
103 goto out;
104 }
105
106 if (c->highest_inum >= INUM_WATERMARK) {
107 err = 3;
108 goto out;
109 }
110
111 if (c->lhead_lnum < UBIFS_LOG_LNUM ||
112 c->lhead_lnum >= UBIFS_LOG_LNUM + c->log_lebs ||
113 c->lhead_offs < 0 || c->lhead_offs >= c->leb_size ||
114 c->lhead_offs & (c->min_io_size - 1)) {
115 err = 4;
116 goto out;
117 }
118
119 if (c->zroot.lnum >= c->leb_cnt || c->zroot.lnum < c->main_first ||
120 c->zroot.offs >= c->leb_size || c->zroot.offs & 7) {
121 err = 5;
122 goto out;
123 }
124
125 if (c->zroot.len < c->ranges[UBIFS_IDX_NODE].min_len ||
126 c->zroot.len > c->ranges[UBIFS_IDX_NODE].max_len) {
127 err = 6;
128 goto out;
129 }
130
131 if (c->gc_lnum >= c->leb_cnt || c->gc_lnum < c->main_first) {
132 err = 7;
133 goto out;
134 }
135
136 if (c->ihead_lnum >= c->leb_cnt || c->ihead_lnum < c->main_first ||
137 c->ihead_offs % c->min_io_size || c->ihead_offs < 0 ||
138 c->ihead_offs > c->leb_size || c->ihead_offs & 7) {
139 err = 8;
140 goto out;
141 }
142
143 main_sz = (long long)c->main_lebs * c->leb_size;
144 if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) {
145 err = 9;
146 goto out;
147 }
148
149 if (c->lpt_lnum < c->lpt_first || c->lpt_lnum > c->lpt_last ||
150 c->lpt_offs < 0 || c->lpt_offs + c->nnode_sz > c->leb_size) {
151 err = 10;
152 goto out;
153 }
154
155 if (c->nhead_lnum < c->lpt_first || c->nhead_lnum > c->lpt_last ||
156 c->nhead_offs < 0 || c->nhead_offs % c->min_io_size ||
157 c->nhead_offs > c->leb_size) {
158 err = 11;
159 goto out;
160 }
161
162 if (c->ltab_lnum < c->lpt_first || c->ltab_lnum > c->lpt_last ||
163 c->ltab_offs < 0 ||
164 c->ltab_offs + c->ltab_sz > c->leb_size) {
165 err = 12;
166 goto out;
167 }
168
169 if (c->big_lpt && (c->lsave_lnum < c->lpt_first ||
170 c->lsave_lnum > c->lpt_last || c->lsave_offs < 0 ||
171 c->lsave_offs + c->lsave_sz > c->leb_size)) {
172 err = 13;
173 goto out;
174 }
175
176 if (c->lscan_lnum < c->main_first || c->lscan_lnum >= c->leb_cnt) {
177 err = 14;
178 goto out;
179 }
180
181 if (c->lst.empty_lebs < 0 || c->lst.empty_lebs > c->main_lebs - 2) {
182 err = 15;
183 goto out;
184 }
185
186 if (c->lst.idx_lebs < 0 || c->lst.idx_lebs > c->main_lebs - 1) {
187 err = 16;
188 goto out;
189 }
190
191 if (c->lst.total_free < 0 || c->lst.total_free > main_sz ||
192 c->lst.total_free & 7) {
193 err = 17;
194 goto out;
195 }
196
197 if (c->lst.total_dirty < 0 || (c->lst.total_dirty & 7)) {
198 err = 18;
199 goto out;
200 }
201
202 if (c->lst.total_used < 0 || (c->lst.total_used & 7)) {
203 err = 19;
204 goto out;
205 }
206
207 if (c->lst.total_free + c->lst.total_dirty +
208 c->lst.total_used > main_sz) {
209 err = 20;
210 goto out;
211 }
212
213 if (c->lst.total_dead + c->lst.total_dark +
214 c->lst.total_used + c->old_idx_sz > main_sz) {
215 err = 21;
216 goto out;
217 }
218
219 if (c->lst.total_dead < 0 ||
220 c->lst.total_dead > c->lst.total_free + c->lst.total_dirty ||
221 c->lst.total_dead & 7) {
222 err = 22;
223 goto out;
224 }
225
226 if (c->lst.total_dark < 0 ||
227 c->lst.total_dark > c->lst.total_free + c->lst.total_dirty ||
228 c->lst.total_dark & 7) {
229 err = 23;
230 goto out;
231 }
232
233 return 0;
234
235out:
236 ubifs_err("bad master node at offset %d error %d", c->mst_offs, err);
237 dbg_dump_node(c, c->mst_node);
238 return -EINVAL;
239}
240
241/**
242 * ubifs_read_master - read master node.
243 * @c: UBIFS file-system description object
244 *
245 * This function finds and reads the master node during file-system mount. If
246 * the flash is empty, it creates default master node as well. Returns zero in
247 * case of success and a negative error code in case of failure.
248 */
249int ubifs_read_master(struct ubifs_info *c)
250{
251 int err, old_leb_cnt;
252
253 c->mst_node = kzalloc(c->mst_node_alsz, GFP_KERNEL);
254 if (!c->mst_node)
255 return -ENOMEM;
256
257 err = scan_for_master(c);
258 if (err) {
259 err = ubifs_recover_master_node(c);
260 if (err)
261 /*
262 * Note, we do not free 'c->mst_node' here because the
263 * unmount routine will take care of this.
264 */
265 return err;
266 }
267
268 /* Make sure that the recovery flag is clear */
269 c->mst_node->flags &= cpu_to_le32(~UBIFS_MST_RCVRY);
270
271 c->max_sqnum = le64_to_cpu(c->mst_node->ch.sqnum);
272 c->highest_inum = le64_to_cpu(c->mst_node->highest_inum);
273 c->cmt_no = le64_to_cpu(c->mst_node->cmt_no);
274 c->zroot.lnum = le32_to_cpu(c->mst_node->root_lnum);
275 c->zroot.offs = le32_to_cpu(c->mst_node->root_offs);
276 c->zroot.len = le32_to_cpu(c->mst_node->root_len);
277 c->lhead_lnum = le32_to_cpu(c->mst_node->log_lnum);
278 c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum);
279 c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum);
280 c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs);
281 c->old_idx_sz = le64_to_cpu(c->mst_node->index_size);
282 c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum);
283 c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs);
284 c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum);
285 c->nhead_offs = le32_to_cpu(c->mst_node->nhead_offs);
286 c->ltab_lnum = le32_to_cpu(c->mst_node->ltab_lnum);
287 c->ltab_offs = le32_to_cpu(c->mst_node->ltab_offs);
288 c->lsave_lnum = le32_to_cpu(c->mst_node->lsave_lnum);
289 c->lsave_offs = le32_to_cpu(c->mst_node->lsave_offs);
290 c->lscan_lnum = le32_to_cpu(c->mst_node->lscan_lnum);
291 c->lst.empty_lebs = le32_to_cpu(c->mst_node->empty_lebs);
292 c->lst.idx_lebs = le32_to_cpu(c->mst_node->idx_lebs);
293 old_leb_cnt = le32_to_cpu(c->mst_node->leb_cnt);
294 c->lst.total_free = le64_to_cpu(c->mst_node->total_free);
295 c->lst.total_dirty = le64_to_cpu(c->mst_node->total_dirty);
296 c->lst.total_used = le64_to_cpu(c->mst_node->total_used);
297 c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead);
298 c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark);
299
300 c->calc_idx_sz = c->old_idx_sz;
301
302 if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS))
303 c->no_orphs = 1;
304
305 if (old_leb_cnt != c->leb_cnt) {
306 /* The file system has been resized */
307 int growth = c->leb_cnt - old_leb_cnt;
308
309 if (c->leb_cnt < old_leb_cnt ||
310 c->leb_cnt < UBIFS_MIN_LEB_CNT) {
311 ubifs_err("bad leb_cnt on master node");
312 dbg_dump_node(c, c->mst_node);
313 return -EINVAL;
314 }
315
316 dbg_mnt("Auto resizing (master) from %d LEBs to %d LEBs",
317 old_leb_cnt, c->leb_cnt);
318 c->lst.empty_lebs += growth;
319 c->lst.total_free += growth * (long long)c->leb_size;
320 c->lst.total_dark += growth * (long long)c->dark_wm;
321
322 /*
323 * Reflect changes back onto the master node. N.B. the master
324 * node gets written immediately whenever mounting (or
325 * remounting) in read-write mode, so we do not need to write it
326 * here.
327 */
328 c->mst_node->leb_cnt = cpu_to_le32(c->leb_cnt);
329 c->mst_node->empty_lebs = cpu_to_le32(c->lst.empty_lebs);
330 c->mst_node->total_free = cpu_to_le64(c->lst.total_free);
331 c->mst_node->total_dark = cpu_to_le64(c->lst.total_dark);
332 }
333
334 err = validate_master(c);
335 if (err)
336 return err;
337
338 err = dbg_old_index_check_init(c, &c->zroot);
339
340 return err;
341}
342
343/**
344 * ubifs_write_master - write master node.
345 * @c: UBIFS file-system description object
346 *
347 * This function writes the master node. The caller has to take the
348 * @c->mst_mutex lock before calling this function. Returns zero in case of
349 * success and a negative error code in case of failure. The master node is
350 * written twice to enable recovery.
351 */
352int ubifs_write_master(struct ubifs_info *c)
353{
354 int err, lnum, offs, len;
355
356 if (c->ro_media)
357 return -EINVAL;
358
359 lnum = UBIFS_MST_LNUM;
360 offs = c->mst_offs + c->mst_node_alsz;
361 len = UBIFS_MST_NODE_SZ;
362
363 if (offs + UBIFS_MST_NODE_SZ > c->leb_size) {
364 err = ubifs_leb_unmap(c, lnum);
365 if (err)
366 return err;
367 offs = 0;
368 }
369
370 c->mst_offs = offs;
371 c->mst_node->highest_inum = cpu_to_le64(c->highest_inum);
372
373 err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
374 if (err)
375 return err;
376
377 lnum += 1;
378
379 if (offs == 0) {
380 err = ubifs_leb_unmap(c, lnum);
381 if (err)
382 return err;
383 }
384 err = ubifs_write_node(c, c->mst_node, len, lnum, offs, UBI_SHORTTERM);
385
386 return err;
387}
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
new file mode 100644
index 00000000000..4beccfc256d
--- /dev/null
+++ b/fs/ubifs/misc.h
@@ -0,0 +1,342 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file contains miscellaneous helper functions.
25 */
26
27#ifndef __UBIFS_MISC_H__
28#define __UBIFS_MISC_H__
29
30/**
31 * ubifs_zn_dirty - check if znode is dirty.
32 * @znode: znode to check
33 *
34 * This helper function returns %1 if @znode is dirty and %0 otherwise.
35 */
36static inline int ubifs_zn_dirty(const struct ubifs_znode *znode)
37{
38 return !!test_bit(DIRTY_ZNODE, &znode->flags);
39}
40
41/**
42 * ubifs_wake_up_bgt - wake up background thread.
43 * @c: UBIFS file-system description object
44 */
45static inline void ubifs_wake_up_bgt(struct ubifs_info *c)
46{
47 if (c->bgt && !c->need_bgt) {
48 c->need_bgt = 1;
49 wake_up_process(c->bgt);
50 }
51}
52
53/**
54 * ubifs_tnc_find_child - find next child in znode.
55 * @znode: znode to search at
56 * @start: the zbranch index to start at
57 *
58 * This helper function looks for znode child starting at index @start. Returns
59 * the child or %NULL if no children were found.
60 */
61static inline struct ubifs_znode *
62ubifs_tnc_find_child(struct ubifs_znode *znode, int start)
63{
64 while (start < znode->child_cnt) {
65 if (znode->zbranch[start].znode)
66 return znode->zbranch[start].znode;
67 start += 1;
68 }
69
70 return NULL;
71}
72
73/**
74 * ubifs_inode - get UBIFS inode information by VFS 'struct inode' object.
75 * @inode: the VFS 'struct inode' pointer
76 */
77static inline struct ubifs_inode *ubifs_inode(const struct inode *inode)
78{
79 return container_of(inode, struct ubifs_inode, vfs_inode);
80}
81
82/**
83 * ubifs_ro_mode - switch UBIFS to read read-only mode.
84 * @c: UBIFS file-system description object
85 * @err: error code which is the reason of switching to R/O mode
86 */
87static inline void ubifs_ro_mode(struct ubifs_info *c, int err)
88{
89 if (!c->ro_media) {
90 c->ro_media = 1;
91 ubifs_warn("switched to read-only mode, error %d", err);
92 dbg_dump_stack();
93 }
94}
95
96/**
97 * ubifs_compr_present - check if compressor was compiled in.
98 * @compr_type: compressor type to check
99 *
100 * This function returns %1 of compressor of type @compr_type is present, and
101 * %0 if not.
102 */
103static inline int ubifs_compr_present(int compr_type)
104{
105 ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
106 return !!ubifs_compressors[compr_type]->capi_name;
107}
108
109/**
110 * ubifs_compr_name - get compressor name string by its type.
111 * @compr_type: compressor type
112 *
113 * This function returns compressor type string.
114 */
115static inline const char *ubifs_compr_name(int compr_type)
116{
117 ubifs_assert(compr_type >= 0 && compr_type < UBIFS_COMPR_TYPES_CNT);
118 return ubifs_compressors[compr_type]->name;
119}
120
121/**
122 * ubifs_wbuf_sync - synchronize write-buffer.
123 * @wbuf: write-buffer to synchronize
124 *
125 * This is the same as as 'ubifs_wbuf_sync_nolock()' but it does not assume
126 * that the write-buffer is already locked.
127 */
128static inline int ubifs_wbuf_sync(struct ubifs_wbuf *wbuf)
129{
130 int err;
131
132 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
133 err = ubifs_wbuf_sync_nolock(wbuf);
134 mutex_unlock(&wbuf->io_mutex);
135 return err;
136}
137
138/**
139 * ubifs_leb_unmap - unmap an LEB.
140 * @c: UBIFS file-system description object
141 * @lnum: LEB number to unmap
142 *
143 * This function returns %0 on success and a negative error code on failure.
144 */
145static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum)
146{
147 int err;
148
149 if (c->ro_media)
150 return -EROFS;
151 err = ubi_leb_unmap(c->ubi, lnum);
152 if (err) {
153 ubifs_err("unmap LEB %d failed, error %d", lnum, err);
154 return err;
155 }
156
157 return 0;
158}
159
160/**
161 * ubifs_leb_write - write to a LEB.
162 * @c: UBIFS file-system description object
163 * @lnum: LEB number to write
164 * @buf: buffer to write from
165 * @offs: offset within LEB to write to
166 * @len: length to write
167 * @dtype: data type
168 *
169 * This function returns %0 on success and a negative error code on failure.
170 */
171static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum,
172 const void *buf, int offs, int len, int dtype)
173{
174 int err;
175
176 if (c->ro_media)
177 return -EROFS;
178 err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype);
179 if (err) {
180 ubifs_err("writing %d bytes at %d:%d, error %d",
181 len, lnum, offs, err);
182 return err;
183 }
184
185 return 0;
186}
187
188/**
189 * ubifs_leb_change - atomic LEB change.
190 * @c: UBIFS file-system description object
191 * @lnum: LEB number to write
192 * @buf: buffer to write from
193 * @len: length to write
194 * @dtype: data type
195 *
196 * This function returns %0 on success and a negative error code on failure.
197 */
198static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum,
199 const void *buf, int len, int dtype)
200{
201 int err;
202
203 if (c->ro_media)
204 return -EROFS;
205 err = ubi_leb_change(c->ubi, lnum, buf, len, dtype);
206 if (err) {
207 ubifs_err("changing %d bytes in LEB %d, error %d",
208 len, lnum, err);
209 return err;
210 }
211
212 return 0;
213}
214
215/**
216 * ubifs_encode_dev - encode device node IDs.
217 * @dev: UBIFS device node information
218 * @rdev: device IDs to encode
219 *
220 * This is a helper function which encodes major/minor numbers of a device node
221 * into UBIFS device node description. We use standard Linux "new" and "huge"
222 * encodings.
223 */
224static inline int ubifs_encode_dev(union ubifs_dev_desc *dev, dev_t rdev)
225{
226 if (new_valid_dev(rdev)) {
227 dev->new = cpu_to_le32(new_encode_dev(rdev));
228 return sizeof(dev->new);
229 } else {
230 dev->huge = cpu_to_le64(huge_encode_dev(rdev));
231 return sizeof(dev->huge);
232 }
233}
234
235/**
236 * ubifs_add_dirt - add dirty space to LEB properties.
237 * @c: the UBIFS file-system description object
238 * @lnum: LEB to add dirty space for
239 * @dirty: dirty space to add
240 *
241 * This is a helper function which increased amount of dirty LEB space. Returns
242 * zero in case of success and a negative error code in case of failure.
243 */
244static inline int ubifs_add_dirt(struct ubifs_info *c, int lnum, int dirty)
245{
246 return ubifs_update_one_lp(c, lnum, LPROPS_NC, dirty, 0, 0);
247}
248
249/**
250 * ubifs_return_leb - return LEB to lprops.
251 * @c: the UBIFS file-system description object
252 * @lnum: LEB to return
253 *
254 * This helper function cleans the "taken" flag of a logical eraseblock in the
255 * lprops. Returns zero in case of success and a negative error code in case of
256 * failure.
257 */
258static inline int ubifs_return_leb(struct ubifs_info *c, int lnum)
259{
260 return ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
261 LPROPS_TAKEN, 0);
262}
263
264/**
265 * ubifs_idx_node_sz - return index node size.
266 * @c: the UBIFS file-system description object
267 * @child_cnt: number of children of this index node
268 */
269static inline int ubifs_idx_node_sz(const struct ubifs_info *c, int child_cnt)
270{
271 return UBIFS_IDX_NODE_SZ + (UBIFS_BRANCH_SZ + c->key_len) * child_cnt;
272}
273
274/**
275 * ubifs_idx_branch - return pointer to an index branch.
276 * @c: the UBIFS file-system description object
277 * @idx: index node
278 * @bnum: branch number
279 */
280static inline
281struct ubifs_branch *ubifs_idx_branch(const struct ubifs_info *c,
282 const struct ubifs_idx_node *idx,
283 int bnum)
284{
285 return (struct ubifs_branch *)((void *)idx->branches +
286 (UBIFS_BRANCH_SZ + c->key_len) * bnum);
287}
288
289/**
290 * ubifs_idx_key - return pointer to an index key.
291 * @c: the UBIFS file-system description object
292 * @idx: index node
293 */
294static inline void *ubifs_idx_key(const struct ubifs_info *c,
295 const struct ubifs_idx_node *idx)
296{
297 return (void *)((struct ubifs_branch *)idx->branches)->key;
298}
299
300/**
301 * ubifs_reported_space - calculate reported free space.
302 * @c: the UBIFS file-system description object
303 * @free: amount of free space
304 *
305 * This function calculates amount of free space which will be reported to
306 * user-space. User-space application tend to expect that if the file-system
307 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
308 * are able to write a file of size N. UBIFS attaches node headers to each data
309 * node and it has to write indexind nodes as well. This introduces additional
310 * overhead, and UBIFS it has to report sligtly less free space to meet the
311 * above expectetion.
312 *
313 * This function assumes free space is made up of uncompressed data nodes and
314 * full index nodes (one per data node, doubled because we always allow enough
315 * space to write the index twice).
316 *
317 * Note, the calculation is pessimistic, which means that most of the time
318 * UBIFS reports less space than it actually has.
319 */
320static inline long long ubifs_reported_space(const struct ubifs_info *c,
321 uint64_t free)
322{
323 int divisor, factor;
324
325 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz << 1);
326 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
327 do_div(free, divisor);
328
329 return free * factor;
330}
331
332/**
333 * ubifs_current_time - round current time to time granularity.
334 * @inode: inode
335 */
336static inline struct timespec ubifs_current_time(struct inode *inode)
337{
338 return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ?
339 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
340}
341
342#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
new file mode 100644
index 00000000000..3afeb9242c6
--- /dev/null
+++ b/fs/ubifs/orphan.c
@@ -0,0 +1,958 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Author: Adrian Hunter
20 */
21
22#include "ubifs.h"
23
24/*
25 * An orphan is an inode number whose inode node has been committed to the index
26 * with a link count of zero. That happens when an open file is deleted
27 * (unlinked) and then a commit is run. In the normal course of events the inode
28 * would be deleted when the file is closed. However in the case of an unclean
29 * unmount, orphans need to be accounted for. After an unclean unmount, the
30 * orphans' inodes must be deleted which means either scanning the entire index
31 * looking for them, or keeping a list on flash somewhere. This unit implements
32 * the latter approach.
33 *
34 * The orphan area is a fixed number of LEBs situated between the LPT area and
35 * the main area. The number of orphan area LEBs is specified when the file
36 * system is created. The minimum number is 1. The size of the orphan area
37 * should be so that it can hold the maximum number of orphans that are expected
38 * to ever exist at one time.
39 *
40 * The number of orphans that can fit in a LEB is:
41 *
42 * (c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64)
43 *
44 * For example: a 15872 byte LEB can fit 1980 orphans so 1 LEB may be enough.
45 *
46 * Orphans are accumulated in a rb-tree. When an inode's link count drops to
47 * zero, the inode number is added to the rb-tree. It is removed from the tree
48 * when the inode is deleted. Any new orphans that are in the orphan tree when
49 * the commit is run, are written to the orphan area in 1 or more orph nodes.
50 * If the orphan area is full, it is consolidated to make space. There is
51 * always enough space because validation prevents the user from creating more
52 * than the maximum number of orphans allowed.
53 */
54
55#ifdef CONFIG_UBIFS_FS_DEBUG
56static int dbg_check_orphans(struct ubifs_info *c);
57#else
58#define dbg_check_orphans(c) 0
59#endif
60
61/**
62 * ubifs_add_orphan - add an orphan.
63 * @c: UBIFS file-system description object
64 * @inum: orphan inode number
65 *
66 * Add an orphan. This function is called when an inodes link count drops to
67 * zero.
68 */
69int ubifs_add_orphan(struct ubifs_info *c, ino_t inum)
70{
71 struct ubifs_orphan *orphan, *o;
72 struct rb_node **p, *parent = NULL;
73
74 orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_NOFS);
75 if (!orphan)
76 return -ENOMEM;
77 orphan->inum = inum;
78 orphan->new = 1;
79
80 spin_lock(&c->orphan_lock);
81 if (c->tot_orphans >= c->max_orphans) {
82 spin_unlock(&c->orphan_lock);
83 kfree(orphan);
84 return -ENFILE;
85 }
86 p = &c->orph_tree.rb_node;
87 while (*p) {
88 parent = *p;
89 o = rb_entry(parent, struct ubifs_orphan, rb);
90 if (inum < o->inum)
91 p = &(*p)->rb_left;
92 else if (inum > o->inum)
93 p = &(*p)->rb_right;
94 else {
95 dbg_err("orphaned twice");
96 spin_unlock(&c->orphan_lock);
97 kfree(orphan);
98 return 0;
99 }
100 }
101 c->tot_orphans += 1;
102 c->new_orphans += 1;
103 rb_link_node(&orphan->rb, parent, p);
104 rb_insert_color(&orphan->rb, &c->orph_tree);
105 list_add_tail(&orphan->list, &c->orph_list);
106 list_add_tail(&orphan->new_list, &c->orph_new);
107 spin_unlock(&c->orphan_lock);
108 dbg_gen("ino %lu", inum);
109 return 0;
110}
111
112/**
113 * ubifs_delete_orphan - delete an orphan.
114 * @c: UBIFS file-system description object
115 * @inum: orphan inode number
116 *
117 * Delete an orphan. This function is called when an inode is deleted.
118 */
119void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum)
120{
121 struct ubifs_orphan *o;
122 struct rb_node *p;
123
124 spin_lock(&c->orphan_lock);
125 p = c->orph_tree.rb_node;
126 while (p) {
127 o = rb_entry(p, struct ubifs_orphan, rb);
128 if (inum < o->inum)
129 p = p->rb_left;
130 else if (inum > o->inum)
131 p = p->rb_right;
132 else {
133 if (o->dnext) {
134 spin_unlock(&c->orphan_lock);
135 dbg_gen("deleted twice ino %lu", inum);
136 return;
137 }
138 if (o->cnext) {
139 o->dnext = c->orph_dnext;
140 c->orph_dnext = o;
141 spin_unlock(&c->orphan_lock);
142 dbg_gen("delete later ino %lu", inum);
143 return;
144 }
145 rb_erase(p, &c->orph_tree);
146 list_del(&o->list);
147 c->tot_orphans -= 1;
148 if (o->new) {
149 list_del(&o->new_list);
150 c->new_orphans -= 1;
151 }
152 spin_unlock(&c->orphan_lock);
153 kfree(o);
154 dbg_gen("inum %lu", inum);
155 return;
156 }
157 }
158 spin_unlock(&c->orphan_lock);
159 dbg_err("missing orphan ino %lu", inum);
160 dbg_dump_stack();
161}
162
163/**
164 * ubifs_orphan_start_commit - start commit of orphans.
165 * @c: UBIFS file-system description object
166 *
167 * Start commit of orphans.
168 */
169int ubifs_orphan_start_commit(struct ubifs_info *c)
170{
171 struct ubifs_orphan *orphan, **last;
172
173 spin_lock(&c->orphan_lock);
174 last = &c->orph_cnext;
175 list_for_each_entry(orphan, &c->orph_new, new_list) {
176 ubifs_assert(orphan->new);
177 orphan->new = 0;
178 *last = orphan;
179 last = &orphan->cnext;
180 }
181 *last = orphan->cnext;
182 c->cmt_orphans = c->new_orphans;
183 c->new_orphans = 0;
184 dbg_cmt("%d orphans to commit", c->cmt_orphans);
185 INIT_LIST_HEAD(&c->orph_new);
186 if (c->tot_orphans == 0)
187 c->no_orphs = 1;
188 else
189 c->no_orphs = 0;
190 spin_unlock(&c->orphan_lock);
191 return 0;
192}
193
194/**
195 * avail_orphs - calculate available space.
196 * @c: UBIFS file-system description object
197 *
198 * This function returns the number of orphans that can be written in the
199 * available space.
200 */
201static int avail_orphs(struct ubifs_info *c)
202{
203 int avail_lebs, avail, gap;
204
205 avail_lebs = c->orph_lebs - (c->ohead_lnum - c->orph_first) - 1;
206 avail = avail_lebs *
207 ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64));
208 gap = c->leb_size - c->ohead_offs;
209 if (gap >= UBIFS_ORPH_NODE_SZ + sizeof(__le64))
210 avail += (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64);
211 return avail;
212}
213
214/**
215 * tot_avail_orphs - calculate total space.
216 * @c: UBIFS file-system description object
217 *
218 * This function returns the number of orphans that can be written in half
219 * the total space. That leaves half the space for adding new orphans.
220 */
221static int tot_avail_orphs(struct ubifs_info *c)
222{
223 int avail_lebs, avail;
224
225 avail_lebs = c->orph_lebs;
226 avail = avail_lebs *
227 ((c->leb_size - UBIFS_ORPH_NODE_SZ) / sizeof(__le64));
228 return avail / 2;
229}
230
231/**
232 * do_write_orph_node - write a node
233 * @c: UBIFS file-system description object
234 * @len: length of node
235 * @atomic: write atomically
236 *
237 * This function writes a node to the orphan head from the orphan buffer. If
238 * %atomic is not zero, then the write is done atomically. On success, %0 is
239 * returned, otherwise a negative error code is returned.
240 */
241static int do_write_orph_node(struct ubifs_info *c, int len, int atomic)
242{
243 int err = 0;
244
245 if (atomic) {
246 ubifs_assert(c->ohead_offs == 0);
247 ubifs_prepare_node(c, c->orph_buf, len, 1);
248 len = ALIGN(len, c->min_io_size);
249 err = ubifs_leb_change(c, c->ohead_lnum, c->orph_buf, len,
250 UBI_SHORTTERM);
251 } else {
252 if (c->ohead_offs == 0) {
253 /* Ensure LEB has been unmapped */
254 err = ubifs_leb_unmap(c, c->ohead_lnum);
255 if (err)
256 return err;
257 }
258 err = ubifs_write_node(c, c->orph_buf, len, c->ohead_lnum,
259 c->ohead_offs, UBI_SHORTTERM);
260 }
261 return err;
262}
263
264/**
265 * write_orph_node - write an orph node
266 * @c: UBIFS file-system description object
267 * @atomic: write atomically
268 *
269 * This function builds an orph node from the cnext list and writes it to the
270 * orphan head. On success, %0 is returned, otherwise a negative error code
271 * is returned.
272 */
273static int write_orph_node(struct ubifs_info *c, int atomic)
274{
275 struct ubifs_orphan *orphan, *cnext;
276 struct ubifs_orph_node *orph;
277 int gap, err, len, cnt, i;
278
279 ubifs_assert(c->cmt_orphans > 0);
280 gap = c->leb_size - c->ohead_offs;
281 if (gap < UBIFS_ORPH_NODE_SZ + sizeof(__le64)) {
282 c->ohead_lnum += 1;
283 c->ohead_offs = 0;
284 gap = c->leb_size;
285 if (c->ohead_lnum > c->orph_last) {
286 /*
287 * We limit the number of orphans so that this should
288 * never happen.
289 */
290 ubifs_err("out of space in orphan area");
291 return -EINVAL;
292 }
293 }
294 cnt = (gap - UBIFS_ORPH_NODE_SZ) / sizeof(__le64);
295 if (cnt > c->cmt_orphans)
296 cnt = c->cmt_orphans;
297 len = UBIFS_ORPH_NODE_SZ + cnt * sizeof(__le64);
298 ubifs_assert(c->orph_buf);
299 orph = c->orph_buf;
300 orph->ch.node_type = UBIFS_ORPH_NODE;
301 spin_lock(&c->orphan_lock);
302 cnext = c->orph_cnext;
303 for (i = 0; i < cnt; i++) {
304 orphan = cnext;
305 orph->inos[i] = cpu_to_le64(orphan->inum);
306 cnext = orphan->cnext;
307 orphan->cnext = NULL;
308 }
309 c->orph_cnext = cnext;
310 c->cmt_orphans -= cnt;
311 spin_unlock(&c->orphan_lock);
312 if (c->cmt_orphans)
313 orph->cmt_no = cpu_to_le64(c->cmt_no + 1);
314 else
315 /* Mark the last node of the commit */
316 orph->cmt_no = cpu_to_le64((c->cmt_no + 1) | (1ULL << 63));
317 ubifs_assert(c->ohead_offs + len <= c->leb_size);
318 ubifs_assert(c->ohead_lnum >= c->orph_first);
319 ubifs_assert(c->ohead_lnum <= c->orph_last);
320 err = do_write_orph_node(c, len, atomic);
321 c->ohead_offs += ALIGN(len, c->min_io_size);
322 c->ohead_offs = ALIGN(c->ohead_offs, 8);
323 return err;
324}
325
326/**
327 * write_orph_nodes - write orph nodes until there are no more to commit
328 * @c: UBIFS file-system description object
329 * @atomic: write atomically
330 *
331 * This function writes orph nodes for all the orphans to commit. On success,
332 * %0 is returned, otherwise a negative error code is returned.
333 */
334static int write_orph_nodes(struct ubifs_info *c, int atomic)
335{
336 int err;
337
338 while (c->cmt_orphans > 0) {
339 err = write_orph_node(c, atomic);
340 if (err)
341 return err;
342 }
343 if (atomic) {
344 int lnum;
345
346 /* Unmap any unused LEBs after consolidation */
347 lnum = c->ohead_lnum + 1;
348 for (lnum = c->ohead_lnum + 1; lnum <= c->orph_last; lnum++) {
349 err = ubifs_leb_unmap(c, lnum);
350 if (err)
351 return err;
352 }
353 }
354 return 0;
355}
356
357/**
358 * consolidate - consolidate the orphan area.
359 * @c: UBIFS file-system description object
360 *
361 * This function enables consolidation by putting all the orphans into the list
362 * to commit. The list is in the order that the orphans were added, and the
363 * LEBs are written atomically in order, so at no time can orphans be lost by
364 * an unclean unmount.
365 *
366 * This function returns %0 on success and a negative error code on failure.
367 */
368static int consolidate(struct ubifs_info *c)
369{
370 int tot_avail = tot_avail_orphs(c), err = 0;
371
372 spin_lock(&c->orphan_lock);
373 dbg_cmt("there is space for %d orphans and there are %d",
374 tot_avail, c->tot_orphans);
375 if (c->tot_orphans - c->new_orphans <= tot_avail) {
376 struct ubifs_orphan *orphan, **last;
377 int cnt = 0;
378
379 /* Change the cnext list to include all non-new orphans */
380 last = &c->orph_cnext;
381 list_for_each_entry(orphan, &c->orph_list, list) {
382 if (orphan->new)
383 continue;
384 *last = orphan;
385 last = &orphan->cnext;
386 cnt += 1;
387 }
388 *last = orphan->cnext;
389 ubifs_assert(cnt == c->tot_orphans - c->new_orphans);
390 c->cmt_orphans = cnt;
391 c->ohead_lnum = c->orph_first;
392 c->ohead_offs = 0;
393 } else {
394 /*
395 * We limit the number of orphans so that this should
396 * never happen.
397 */
398 ubifs_err("out of space in orphan area");
399 err = -EINVAL;
400 }
401 spin_unlock(&c->orphan_lock);
402 return err;
403}
404
405/**
406 * commit_orphans - commit orphans.
407 * @c: UBIFS file-system description object
408 *
409 * This function commits orphans to flash. On success, %0 is returned,
410 * otherwise a negative error code is returned.
411 */
412static int commit_orphans(struct ubifs_info *c)
413{
414 int avail, atomic = 0, err;
415
416 ubifs_assert(c->cmt_orphans > 0);
417 avail = avail_orphs(c);
418 if (avail < c->cmt_orphans) {
419 /* Not enough space to write new orphans, so consolidate */
420 err = consolidate(c);
421 if (err)
422 return err;
423 atomic = 1;
424 }
425 err = write_orph_nodes(c, atomic);
426 return err;
427}
428
429/**
430 * erase_deleted - erase the orphans marked for deletion.
431 * @c: UBIFS file-system description object
432 *
433 * During commit, the orphans being committed cannot be deleted, so they are
434 * marked for deletion and deleted by this function. Also, the recovery
435 * adds killed orphans to the deletion list, and therefore they are deleted
436 * here too.
437 */
438static void erase_deleted(struct ubifs_info *c)
439{
440 struct ubifs_orphan *orphan, *dnext;
441
442 spin_lock(&c->orphan_lock);
443 dnext = c->orph_dnext;
444 while (dnext) {
445 orphan = dnext;
446 dnext = orphan->dnext;
447 ubifs_assert(!orphan->new);
448 rb_erase(&orphan->rb, &c->orph_tree);
449 list_del(&orphan->list);
450 c->tot_orphans -= 1;
451 dbg_gen("deleting orphan ino %lu", orphan->inum);
452 kfree(orphan);
453 }
454 c->orph_dnext = NULL;
455 spin_unlock(&c->orphan_lock);
456}
457
458/**
459 * ubifs_orphan_end_commit - end commit of orphans.
460 * @c: UBIFS file-system description object
461 *
462 * End commit of orphans.
463 */
464int ubifs_orphan_end_commit(struct ubifs_info *c)
465{
466 int err;
467
468 if (c->cmt_orphans != 0) {
469 err = commit_orphans(c);
470 if (err)
471 return err;
472 }
473 erase_deleted(c);
474 err = dbg_check_orphans(c);
475 return err;
476}
477
478/**
479 * clear_orphans - erase all LEBs used for orphans.
480 * @c: UBIFS file-system description object
481 *
482 * If recovery is not required, then the orphans from the previous session
483 * are not needed. This function locates the LEBs used to record
484 * orphans, and un-maps them.
485 */
486static int clear_orphans(struct ubifs_info *c)
487{
488 int lnum, err;
489
490 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
491 err = ubifs_leb_unmap(c, lnum);
492 if (err)
493 return err;
494 }
495 c->ohead_lnum = c->orph_first;
496 c->ohead_offs = 0;
497 return 0;
498}
499
500/**
501 * insert_dead_orphan - insert an orphan.
502 * @c: UBIFS file-system description object
503 * @inum: orphan inode number
504 *
505 * This function is a helper to the 'do_kill_orphans()' function. The orphan
506 * must be kept until the next commit, so it is added to the rb-tree and the
507 * deletion list.
508 */
509static int insert_dead_orphan(struct ubifs_info *c, ino_t inum)
510{
511 struct ubifs_orphan *orphan, *o;
512 struct rb_node **p, *parent = NULL;
513
514 orphan = kzalloc(sizeof(struct ubifs_orphan), GFP_KERNEL);
515 if (!orphan)
516 return -ENOMEM;
517 orphan->inum = inum;
518
519 p = &c->orph_tree.rb_node;
520 while (*p) {
521 parent = *p;
522 o = rb_entry(parent, struct ubifs_orphan, rb);
523 if (inum < o->inum)
524 p = &(*p)->rb_left;
525 else if (inum > o->inum)
526 p = &(*p)->rb_right;
527 else {
528 /* Already added - no problem */
529 kfree(orphan);
530 return 0;
531 }
532 }
533 c->tot_orphans += 1;
534 rb_link_node(&orphan->rb, parent, p);
535 rb_insert_color(&orphan->rb, &c->orph_tree);
536 list_add_tail(&orphan->list, &c->orph_list);
537 orphan->dnext = c->orph_dnext;
538 c->orph_dnext = orphan;
539 dbg_mnt("ino %lu, new %d, tot %d",
540 inum, c->new_orphans, c->tot_orphans);
541 return 0;
542}
543
544/**
545 * do_kill_orphans - remove orphan inodes from the index.
546 * @c: UBIFS file-system description object
547 * @sleb: scanned LEB
548 * @last_cmt_no: cmt_no of last orph node read is passed and returned here
549 * @outofdate: whether the LEB is out of date is returned here
550 * @last_flagged: whether the end orph node is encountered
551 *
552 * This function is a helper to the 'kill_orphans()' function. It goes through
553 * every orphan node in a LEB and for every inode number recorded, removes
554 * all keys for that inode from the TNC.
555 */
556static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
557 unsigned long long *last_cmt_no, int *outofdate,
558 int *last_flagged)
559{
560 struct ubifs_scan_node *snod;
561 struct ubifs_orph_node *orph;
562 unsigned long long cmt_no;
563 ino_t inum;
564 int i, n, err, first = 1;
565
566 list_for_each_entry(snod, &sleb->nodes, list) {
567 if (snod->type != UBIFS_ORPH_NODE) {
568 ubifs_err("invalid node type %d in orphan area at "
569 "%d:%d", snod->type, sleb->lnum, snod->offs);
570 dbg_dump_node(c, snod->node);
571 return -EINVAL;
572 }
573
574 orph = snod->node;
575
576 /* Check commit number */
577 cmt_no = le64_to_cpu(orph->cmt_no) & LLONG_MAX;
578 /*
579 * The commit number on the master node may be less, because
580 * of a failed commit. If there are several failed commits in a
581 * row, the commit number written on orph nodes will continue to
582 * increase (because the commit number is adjusted here) even
583 * though the commit number on the master node stays the same
584 * because the master node has not been re-written.
585 */
586 if (cmt_no > c->cmt_no)
587 c->cmt_no = cmt_no;
588 if (cmt_no < *last_cmt_no && *last_flagged) {
589 /*
590 * The last orph node had a higher commit number and was
591 * flagged as the last written for that commit number.
592 * That makes this orph node, out of date.
593 */
594 if (!first) {
595 ubifs_err("out of order commit number %llu in "
596 "orphan node at %d:%d",
597 cmt_no, sleb->lnum, snod->offs);
598 dbg_dump_node(c, snod->node);
599 return -EINVAL;
600 }
601 dbg_rcvry("out of date LEB %d", sleb->lnum);
602 *outofdate = 1;
603 return 0;
604 }
605
606 if (first)
607 first = 0;
608
609 n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
610 for (i = 0; i < n; i++) {
611 inum = le64_to_cpu(orph->inos[i]);
612 dbg_rcvry("deleting orphaned inode %lu", inum);
613 err = ubifs_tnc_remove_ino(c, inum);
614 if (err)
615 return err;
616 err = insert_dead_orphan(c, inum);
617 if (err)
618 return err;
619 }
620
621 *last_cmt_no = cmt_no;
622 if (le64_to_cpu(orph->cmt_no) & (1ULL << 63)) {
623 dbg_rcvry("last orph node for commit %llu at %d:%d",
624 cmt_no, sleb->lnum, snod->offs);
625 *last_flagged = 1;
626 } else
627 *last_flagged = 0;
628 }
629
630 return 0;
631}
632
633/**
634 * kill_orphans - remove all orphan inodes from the index.
635 * @c: UBIFS file-system description object
636 *
637 * If recovery is required, then orphan inodes recorded during the previous
638 * session (which ended with an unclean unmount) must be deleted from the index.
639 * This is done by updating the TNC, but since the index is not updated until
640 * the next commit, the LEBs where the orphan information is recorded are not
641 * erased until the next commit.
642 */
643static int kill_orphans(struct ubifs_info *c)
644{
645 unsigned long long last_cmt_no = 0;
646 int lnum, err = 0, outofdate = 0, last_flagged = 0;
647
648 c->ohead_lnum = c->orph_first;
649 c->ohead_offs = 0;
650 /* Check no-orphans flag and skip this if no orphans */
651 if (c->no_orphs) {
652 dbg_rcvry("no orphans");
653 return 0;
654 }
655 /*
656 * Orph nodes always start at c->orph_first and are written to each
657 * successive LEB in turn. Generally unused LEBs will have been unmapped
658 * but may contain out of date orph nodes if the unmap didn't go
659 * through. In addition, the last orph node written for each commit is
660 * marked (top bit of orph->cmt_no is set to 1). It is possible that
661 * there are orph nodes from the next commit (i.e. the commit did not
662 * complete successfully). In that case, no orphans will have been lost
663 * due to the way that orphans are written, and any orphans added will
664 * be valid orphans anyway and so can be deleted.
665 */
666 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
667 struct ubifs_scan_leb *sleb;
668
669 dbg_rcvry("LEB %d", lnum);
670 sleb = ubifs_scan(c, lnum, 0, c->sbuf);
671 if (IS_ERR(sleb)) {
672 sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0);
673 if (IS_ERR(sleb)) {
674 err = PTR_ERR(sleb);
675 break;
676 }
677 }
678 err = do_kill_orphans(c, sleb, &last_cmt_no, &outofdate,
679 &last_flagged);
680 if (err || outofdate) {
681 ubifs_scan_destroy(sleb);
682 break;
683 }
684 if (sleb->endpt) {
685 c->ohead_lnum = lnum;
686 c->ohead_offs = sleb->endpt;
687 }
688 ubifs_scan_destroy(sleb);
689 }
690 return err;
691}
692
693/**
694 * ubifs_mount_orphans - delete orphan inodes and erase LEBs that recorded them.
695 * @c: UBIFS file-system description object
696 * @unclean: indicates recovery from unclean unmount
697 * @read_only: indicates read only mount
698 *
699 * This function is called when mounting to erase orphans from the previous
700 * session. If UBIFS was not unmounted cleanly, then the inodes recorded as
701 * orphans are deleted.
702 */
703int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only)
704{
705 int err = 0;
706
707 c->max_orphans = tot_avail_orphs(c);
708
709 if (!read_only) {
710 c->orph_buf = vmalloc(c->leb_size);
711 if (!c->orph_buf)
712 return -ENOMEM;
713 }
714
715 if (unclean)
716 err = kill_orphans(c);
717 else if (!read_only)
718 err = clear_orphans(c);
719
720 return err;
721}
722
723#ifdef CONFIG_UBIFS_FS_DEBUG
724
725struct check_orphan {
726 struct rb_node rb;
727 ino_t inum;
728};
729
730struct check_info {
731 unsigned long last_ino;
732 unsigned long tot_inos;
733 unsigned long missing;
734 unsigned long long leaf_cnt;
735 struct ubifs_ino_node *node;
736 struct rb_root root;
737};
738
739static int dbg_find_orphan(struct ubifs_info *c, ino_t inum)
740{
741 struct ubifs_orphan *o;
742 struct rb_node *p;
743
744 spin_lock(&c->orphan_lock);
745 p = c->orph_tree.rb_node;
746 while (p) {
747 o = rb_entry(p, struct ubifs_orphan, rb);
748 if (inum < o->inum)
749 p = p->rb_left;
750 else if (inum > o->inum)
751 p = p->rb_right;
752 else {
753 spin_unlock(&c->orphan_lock);
754 return 1;
755 }
756 }
757 spin_unlock(&c->orphan_lock);
758 return 0;
759}
760
761static int dbg_ins_check_orphan(struct rb_root *root, ino_t inum)
762{
763 struct check_orphan *orphan, *o;
764 struct rb_node **p, *parent = NULL;
765
766 orphan = kzalloc(sizeof(struct check_orphan), GFP_NOFS);
767 if (!orphan)
768 return -ENOMEM;
769 orphan->inum = inum;
770
771 p = &root->rb_node;
772 while (*p) {
773 parent = *p;
774 o = rb_entry(parent, struct check_orphan, rb);
775 if (inum < o->inum)
776 p = &(*p)->rb_left;
777 else if (inum > o->inum)
778 p = &(*p)->rb_right;
779 else {
780 kfree(orphan);
781 return 0;
782 }
783 }
784 rb_link_node(&orphan->rb, parent, p);
785 rb_insert_color(&orphan->rb, root);
786 return 0;
787}
788
789static int dbg_find_check_orphan(struct rb_root *root, ino_t inum)
790{
791 struct check_orphan *o;
792 struct rb_node *p;
793
794 p = root->rb_node;
795 while (p) {
796 o = rb_entry(p, struct check_orphan, rb);
797 if (inum < o->inum)
798 p = p->rb_left;
799 else if (inum > o->inum)
800 p = p->rb_right;
801 else
802 return 1;
803 }
804 return 0;
805}
806
807static void dbg_free_check_tree(struct rb_root *root)
808{
809 struct rb_node *this = root->rb_node;
810 struct check_orphan *o;
811
812 while (this) {
813 if (this->rb_left) {
814 this = this->rb_left;
815 continue;
816 } else if (this->rb_right) {
817 this = this->rb_right;
818 continue;
819 }
820 o = rb_entry(this, struct check_orphan, rb);
821 this = rb_parent(this);
822 if (this) {
823 if (this->rb_left == &o->rb)
824 this->rb_left = NULL;
825 else
826 this->rb_right = NULL;
827 }
828 kfree(o);
829 }
830}
831
832static int dbg_orphan_check(struct ubifs_info *c, struct ubifs_zbranch *zbr,
833 void *priv)
834{
835 struct check_info *ci = priv;
836 ino_t inum;
837 int err;
838
839 inum = key_inum(c, &zbr->key);
840 if (inum != ci->last_ino) {
841 /* Lowest node type is the inode node, so it comes first */
842 if (key_type(c, &zbr->key) != UBIFS_INO_KEY)
843 ubifs_err("found orphan node ino %lu, type %d", inum,
844 key_type(c, &zbr->key));
845 ci->last_ino = inum;
846 ci->tot_inos += 1;
847 err = ubifs_tnc_read_node(c, zbr, ci->node);
848 if (err) {
849 ubifs_err("node read failed, error %d", err);
850 return err;
851 }
852 if (ci->node->nlink == 0)
853 /* Must be recorded as an orphan */
854 if (!dbg_find_check_orphan(&ci->root, inum) &&
855 !dbg_find_orphan(c, inum)) {
856 ubifs_err("missing orphan, ino %lu", inum);
857 ci->missing += 1;
858 }
859 }
860 ci->leaf_cnt += 1;
861 return 0;
862}
863
864static int dbg_read_orphans(struct check_info *ci, struct ubifs_scan_leb *sleb)
865{
866 struct ubifs_scan_node *snod;
867 struct ubifs_orph_node *orph;
868 ino_t inum;
869 int i, n, err;
870
871 list_for_each_entry(snod, &sleb->nodes, list) {
872 cond_resched();
873 if (snod->type != UBIFS_ORPH_NODE)
874 continue;
875 orph = snod->node;
876 n = (le32_to_cpu(orph->ch.len) - UBIFS_ORPH_NODE_SZ) >> 3;
877 for (i = 0; i < n; i++) {
878 inum = le64_to_cpu(orph->inos[i]);
879 err = dbg_ins_check_orphan(&ci->root, inum);
880 if (err)
881 return err;
882 }
883 }
884 return 0;
885}
886
887static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci)
888{
889 int lnum, err = 0;
890
891 /* Check no-orphans flag and skip this if no orphans */
892 if (c->no_orphs)
893 return 0;
894
895 for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) {
896 struct ubifs_scan_leb *sleb;
897
898 sleb = ubifs_scan(c, lnum, 0, c->dbg_buf);
899 if (IS_ERR(sleb)) {
900 err = PTR_ERR(sleb);
901 break;
902 }
903
904 err = dbg_read_orphans(ci, sleb);
905 ubifs_scan_destroy(sleb);
906 if (err)
907 break;
908 }
909
910 return err;
911}
912
913static int dbg_check_orphans(struct ubifs_info *c)
914{
915 struct check_info ci;
916 int err;
917
918 if (!(ubifs_chk_flags & UBIFS_CHK_ORPH))
919 return 0;
920
921 ci.last_ino = 0;
922 ci.tot_inos = 0;
923 ci.missing = 0;
924 ci.leaf_cnt = 0;
925 ci.root = RB_ROOT;
926 ci.node = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
927 if (!ci.node) {
928 ubifs_err("out of memory");
929 return -ENOMEM;
930 }
931
932 err = dbg_scan_orphans(c, &ci);
933 if (err)
934 goto out;
935
936 err = dbg_walk_index(c, &dbg_orphan_check, NULL, &ci);
937 if (err) {
938 ubifs_err("cannot scan TNC, error %d", err);
939 goto out;
940 }
941
942 if (ci.missing) {
943 ubifs_err("%lu missing orphan(s)", ci.missing);
944 err = -EINVAL;
945 goto out;
946 }
947
948 dbg_cmt("last inode number is %lu", ci.last_ino);
949 dbg_cmt("total number of inodes is %lu", ci.tot_inos);
950 dbg_cmt("total number of leaf nodes is %llu", ci.leaf_cnt);
951
952out:
953 dbg_free_check_tree(&ci.root);
954 kfree(ci.node);
955 return err;
956}
957
958#endif /* CONFIG_UBIFS_FS_DEBUG */
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
new file mode 100644
index 00000000000..77d26c141cf
--- /dev/null
+++ b/fs/ubifs/recovery.c
@@ -0,0 +1,1519 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements functions needed to recover from unclean un-mounts.
25 * When UBIFS is mounted, it checks a flag on the master node to determine if
26 * an un-mount was completed sucessfully. If not, the process of mounting
27 * incorparates additional checking and fixing of on-flash data structures.
28 * UBIFS always cleans away all remnants of an unclean un-mount, so that
29 * errors do not accumulate. However UBIFS defers recovery if it is mounted
30 * read-only, and the flash is not modified in that case.
31 */
32
33#include <linux/crc32.h>
34#include "ubifs.h"
35
36/**
37 * is_empty - determine whether a buffer is empty (contains all 0xff).
38 * @buf: buffer to clean
39 * @len: length of buffer
40 *
41 * This function returns %1 if the buffer is empty (contains all 0xff) otherwise
42 * %0 is returned.
43 */
44static int is_empty(void *buf, int len)
45{
46 uint8_t *p = buf;
47 int i;
48
49 for (i = 0; i < len; i++)
50 if (*p++ != 0xff)
51 return 0;
52 return 1;
53}
54
55/**
56 * get_master_node - get the last valid master node allowing for corruption.
57 * @c: UBIFS file-system description object
58 * @lnum: LEB number
59 * @pbuf: buffer containing the LEB read, is returned here
60 * @mst: master node, if found, is returned here
61 * @cor: corruption, if found, is returned here
62 *
63 * This function allocates a buffer, reads the LEB into it, and finds and
64 * returns the last valid master node allowing for one area of corruption.
65 * The corrupt area, if there is one, must be consistent with the assumption
66 * that it is the result of an unclean unmount while the master node was being
67 * written. Under those circumstances, it is valid to use the previously written
68 * master node.
69 *
70 * This function returns %0 on success and a negative error code on failure.
71 */
72static int get_master_node(const struct ubifs_info *c, int lnum, void **pbuf,
73 struct ubifs_mst_node **mst, void **cor)
74{
75 const int sz = c->mst_node_alsz;
76 int err, offs, len;
77 void *sbuf, *buf;
78
79 sbuf = vmalloc(c->leb_size);
80 if (!sbuf)
81 return -ENOMEM;
82
83 err = ubi_read(c->ubi, lnum, sbuf, 0, c->leb_size);
84 if (err && err != -EBADMSG)
85 goto out_free;
86
87 /* Find the first position that is definitely not a node */
88 offs = 0;
89 buf = sbuf;
90 len = c->leb_size;
91 while (offs + UBIFS_MST_NODE_SZ <= c->leb_size) {
92 struct ubifs_ch *ch = buf;
93
94 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
95 break;
96 offs += sz;
97 buf += sz;
98 len -= sz;
99 }
100 /* See if there was a valid master node before that */
101 if (offs) {
102 int ret;
103
104 offs -= sz;
105 buf -= sz;
106 len += sz;
107 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
108 if (ret != SCANNED_A_NODE && offs) {
109 /* Could have been corruption so check one place back */
110 offs -= sz;
111 buf -= sz;
112 len += sz;
113 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
114 if (ret != SCANNED_A_NODE)
115 /*
116 * We accept only one area of corruption because
117 * we are assuming that it was caused while
118 * trying to write a master node.
119 */
120 goto out_err;
121 }
122 if (ret == SCANNED_A_NODE) {
123 struct ubifs_ch *ch = buf;
124
125 if (ch->node_type != UBIFS_MST_NODE)
126 goto out_err;
127 dbg_rcvry("found a master node at %d:%d", lnum, offs);
128 *mst = buf;
129 offs += sz;
130 buf += sz;
131 len -= sz;
132 }
133 }
134 /* Check for corruption */
135 if (offs < c->leb_size) {
136 if (!is_empty(buf, min_t(int, len, sz))) {
137 *cor = buf;
138 dbg_rcvry("found corruption at %d:%d", lnum, offs);
139 }
140 offs += sz;
141 buf += sz;
142 len -= sz;
143 }
144 /* Check remaining empty space */
145 if (offs < c->leb_size)
146 if (!is_empty(buf, len))
147 goto out_err;
148 *pbuf = sbuf;
149 return 0;
150
151out_err:
152 err = -EINVAL;
153out_free:
154 vfree(sbuf);
155 *mst = NULL;
156 *cor = NULL;
157 return err;
158}
159
160/**
161 * write_rcvrd_mst_node - write recovered master node.
162 * @c: UBIFS file-system description object
163 * @mst: master node
164 *
165 * This function returns %0 on success and a negative error code on failure.
166 */
167static int write_rcvrd_mst_node(struct ubifs_info *c,
168 struct ubifs_mst_node *mst)
169{
170 int err = 0, lnum = UBIFS_MST_LNUM, sz = c->mst_node_alsz;
171 uint32_t save_flags;
172
173 dbg_rcvry("recovery");
174
175 save_flags = mst->flags;
176 mst->flags = cpu_to_le32(le32_to_cpu(mst->flags) | UBIFS_MST_RCVRY);
177
178 ubifs_prepare_node(c, mst, UBIFS_MST_NODE_SZ, 1);
179 err = ubi_leb_change(c->ubi, lnum, mst, sz, UBI_SHORTTERM);
180 if (err)
181 goto out;
182 err = ubi_leb_change(c->ubi, lnum + 1, mst, sz, UBI_SHORTTERM);
183 if (err)
184 goto out;
185out:
186 mst->flags = save_flags;
187 return err;
188}
189
190/**
191 * ubifs_recover_master_node - recover the master node.
192 * @c: UBIFS file-system description object
193 *
194 * This function recovers the master node from corruption that may occur due to
195 * an unclean unmount.
196 *
197 * This function returns %0 on success and a negative error code on failure.
198 */
199int ubifs_recover_master_node(struct ubifs_info *c)
200{
201 void *buf1 = NULL, *buf2 = NULL, *cor1 = NULL, *cor2 = NULL;
202 struct ubifs_mst_node *mst1 = NULL, *mst2 = NULL, *mst;
203 const int sz = c->mst_node_alsz;
204 int err, offs1, offs2;
205
206 dbg_rcvry("recovery");
207
208 err = get_master_node(c, UBIFS_MST_LNUM, &buf1, &mst1, &cor1);
209 if (err)
210 goto out_free;
211
212 err = get_master_node(c, UBIFS_MST_LNUM + 1, &buf2, &mst2, &cor2);
213 if (err)
214 goto out_free;
215
216 if (mst1) {
217 offs1 = (void *)mst1 - buf1;
218 if ((le32_to_cpu(mst1->flags) & UBIFS_MST_RCVRY) &&
219 (offs1 == 0 && !cor1)) {
220 /*
221 * mst1 was written by recovery at offset 0 with no
222 * corruption.
223 */
224 dbg_rcvry("recovery recovery");
225 mst = mst1;
226 } else if (mst2) {
227 offs2 = (void *)mst2 - buf2;
228 if (offs1 == offs2) {
229 /* Same offset, so must be the same */
230 if (memcmp((void *)mst1 + UBIFS_CH_SZ,
231 (void *)mst2 + UBIFS_CH_SZ,
232 UBIFS_MST_NODE_SZ - UBIFS_CH_SZ))
233 goto out_err;
234 mst = mst1;
235 } else if (offs2 + sz == offs1) {
236 /* 1st LEB was written, 2nd was not */
237 if (cor1)
238 goto out_err;
239 mst = mst1;
240 } else if (offs1 == 0 && offs2 + sz >= c->leb_size) {
241 /* 1st LEB was unmapped and written, 2nd not */
242 if (cor1)
243 goto out_err;
244 mst = mst1;
245 } else
246 goto out_err;
247 } else {
248 /*
249 * 2nd LEB was unmapped and about to be written, so
250 * there must be only one master node in the first LEB
251 * and no corruption.
252 */
253 if (offs1 != 0 || cor1)
254 goto out_err;
255 mst = mst1;
256 }
257 } else {
258 if (!mst2)
259 goto out_err;
260 /*
261 * 1st LEB was unmapped and about to be written, so there must
262 * be no room left in 2nd LEB.
263 */
264 offs2 = (void *)mst2 - buf2;
265 if (offs2 + sz + sz <= c->leb_size)
266 goto out_err;
267 mst = mst2;
268 }
269
270 dbg_rcvry("recovered master node from LEB %d",
271 (mst == mst1 ? UBIFS_MST_LNUM : UBIFS_MST_LNUM + 1));
272
273 memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ);
274
275 if ((c->vfs_sb->s_flags & MS_RDONLY)) {
276 /* Read-only mode. Keep a copy for switching to rw mode */
277 c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL);
278 if (!c->rcvrd_mst_node) {
279 err = -ENOMEM;
280 goto out_free;
281 }
282 memcpy(c->rcvrd_mst_node, c->mst_node, UBIFS_MST_NODE_SZ);
283 } else {
284 /* Write the recovered master node */
285 c->max_sqnum = le64_to_cpu(mst->ch.sqnum) - 1;
286 err = write_rcvrd_mst_node(c, c->mst_node);
287 if (err)
288 goto out_free;
289 }
290
291 vfree(buf2);
292 vfree(buf1);
293
294 return 0;
295
296out_err:
297 err = -EINVAL;
298out_free:
299 ubifs_err("failed to recover master node");
300 if (mst1) {
301 dbg_err("dumping first master node");
302 dbg_dump_node(c, mst1);
303 }
304 if (mst2) {
305 dbg_err("dumping second master node");
306 dbg_dump_node(c, mst2);
307 }
308 vfree(buf2);
309 vfree(buf1);
310 return err;
311}
312
313/**
314 * ubifs_write_rcvrd_mst_node - write the recovered master node.
315 * @c: UBIFS file-system description object
316 *
317 * This function writes the master node that was recovered during mounting in
318 * read-only mode and must now be written because we are remounting rw.
319 *
320 * This function returns %0 on success and a negative error code on failure.
321 */
322int ubifs_write_rcvrd_mst_node(struct ubifs_info *c)
323{
324 int err;
325
326 if (!c->rcvrd_mst_node)
327 return 0;
328 c->rcvrd_mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
329 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
330 err = write_rcvrd_mst_node(c, c->rcvrd_mst_node);
331 if (err)
332 return err;
333 kfree(c->rcvrd_mst_node);
334 c->rcvrd_mst_node = NULL;
335 return 0;
336}
337
338/**
339 * is_last_write - determine if an offset was in the last write to a LEB.
340 * @c: UBIFS file-system description object
341 * @buf: buffer to check
342 * @offs: offset to check
343 *
344 * This function returns %1 if @offs was in the last write to the LEB whose data
345 * is in @buf, otherwise %0 is returned. The determination is made by checking
346 * for subsequent empty space starting from the next min_io_size boundary (or a
347 * bit less than the common header size if min_io_size is one).
348 */
349static int is_last_write(const struct ubifs_info *c, void *buf, int offs)
350{
351 int empty_offs;
352 int check_len;
353 uint8_t *p;
354
355 if (c->min_io_size == 1) {
356 check_len = c->leb_size - offs;
357 p = buf + check_len;
358 for (; check_len > 0; check_len--)
359 if (*--p != 0xff)
360 break;
361 /*
362 * 'check_len' is the size of the corruption which cannot be
363 * more than the size of 1 node if it was caused by an unclean
364 * unmount.
365 */
366 if (check_len > UBIFS_MAX_NODE_SZ)
367 return 0;
368 return 1;
369 }
370
371 /*
372 * Round up to the next c->min_io_size boundary i.e. 'offs' is in the
373 * last wbuf written. After that should be empty space.
374 */
375 empty_offs = ALIGN(offs + 1, c->min_io_size);
376 check_len = c->leb_size - empty_offs;
377 p = buf + empty_offs - offs;
378
379 for (; check_len > 0; check_len--)
380 if (*p++ != 0xff)
381 return 0;
382 return 1;
383}
384
385/**
386 * clean_buf - clean the data from an LEB sitting in a buffer.
387 * @c: UBIFS file-system description object
388 * @buf: buffer to clean
389 * @lnum: LEB number to clean
390 * @offs: offset from which to clean
391 * @len: length of buffer
392 *
393 * This function pads up to the next min_io_size boundary (if there is one) and
394 * sets empty space to all 0xff. @buf, @offs and @len are updated to the next
395 * min_io_size boundary (if there is one).
396 */
397static void clean_buf(const struct ubifs_info *c, void **buf, int lnum,
398 int *offs, int *len)
399{
400 int empty_offs, pad_len;
401
402 lnum = lnum;
403 dbg_rcvry("cleaning corruption at %d:%d", lnum, *offs);
404
405 if (c->min_io_size == 1) {
406 memset(*buf, 0xff, c->leb_size - *offs);
407 return;
408 }
409
410 ubifs_assert(!(*offs & 7));
411 empty_offs = ALIGN(*offs, c->min_io_size);
412 pad_len = empty_offs - *offs;
413 ubifs_pad(c, *buf, pad_len);
414 *offs += pad_len;
415 *buf += pad_len;
416 *len -= pad_len;
417 memset(*buf, 0xff, c->leb_size - empty_offs);
418}
419
420/**
421 * no_more_nodes - determine if there are no more nodes in a buffer.
422 * @c: UBIFS file-system description object
423 * @buf: buffer to check
424 * @len: length of buffer
425 * @lnum: LEB number of the LEB from which @buf was read
426 * @offs: offset from which @buf was read
427 *
428 * This function scans @buf for more nodes and returns %0 is a node is found and
429 * %1 if no more nodes are found.
430 */
431static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
432 int lnum, int offs)
433{
434 int skip, next_offs = 0;
435
436 if (len > UBIFS_DATA_NODE_SZ) {
437 struct ubifs_ch *ch = buf;
438 int dlen = le32_to_cpu(ch->len);
439
440 if (ch->node_type == UBIFS_DATA_NODE && dlen >= UBIFS_CH_SZ &&
441 dlen <= UBIFS_MAX_DATA_NODE_SZ)
442 /* The corrupt node looks like a data node */
443 next_offs = ALIGN(offs + dlen, 8);
444 }
445
446 if (c->min_io_size == 1)
447 skip = 8;
448 else
449 skip = ALIGN(offs + 1, c->min_io_size) - offs;
450
451 offs += skip;
452 buf += skip;
453 len -= skip;
454 while (len > 8) {
455 struct ubifs_ch *ch = buf;
456 uint32_t magic = le32_to_cpu(ch->magic);
457 int ret;
458
459 if (magic == UBIFS_NODE_MAGIC) {
460 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 1);
461 if (ret == SCANNED_A_NODE || ret > 0) {
462 /*
463 * There is a small chance this is just data in
464 * a data node, so check that possibility. e.g.
465 * this is part of a file that itself contains
466 * a UBIFS image.
467 */
468 if (next_offs && offs + le32_to_cpu(ch->len) <=
469 next_offs)
470 continue;
471 dbg_rcvry("unexpected node at %d:%d", lnum,
472 offs);
473 return 0;
474 }
475 }
476 offs += 8;
477 buf += 8;
478 len -= 8;
479 }
480 return 1;
481}
482
483/**
484 * fix_unclean_leb - fix an unclean LEB.
485 * @c: UBIFS file-system description object
486 * @sleb: scanned LEB information
487 * @start: offset where scan started
488 */
489static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
490 int start)
491{
492 int lnum = sleb->lnum, endpt = start;
493
494 /* Get the end offset of the last node we are keeping */
495 if (!list_empty(&sleb->nodes)) {
496 struct ubifs_scan_node *snod;
497
498 snod = list_entry(sleb->nodes.prev,
499 struct ubifs_scan_node, list);
500 endpt = snod->offs + snod->len;
501 }
502
503 if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) {
504 /* Add to recovery list */
505 struct ubifs_unclean_leb *ucleb;
506
507 dbg_rcvry("need to fix LEB %d start %d endpt %d",
508 lnum, start, sleb->endpt);
509 ucleb = kzalloc(sizeof(struct ubifs_unclean_leb), GFP_NOFS);
510 if (!ucleb)
511 return -ENOMEM;
512 ucleb->lnum = lnum;
513 ucleb->endpt = endpt;
514 list_add_tail(&ucleb->list, &c->unclean_leb_list);
515 } else {
516 /* Write the fixed LEB back to flash */
517 int err;
518
519 dbg_rcvry("fixing LEB %d start %d endpt %d",
520 lnum, start, sleb->endpt);
521 if (endpt == 0) {
522 err = ubifs_leb_unmap(c, lnum);
523 if (err)
524 return err;
525 } else {
526 int len = ALIGN(endpt, c->min_io_size);
527
528 if (start) {
529 err = ubi_read(c->ubi, lnum, sleb->buf, 0,
530 start);
531 if (err)
532 return err;
533 }
534 /* Pad to min_io_size */
535 if (len > endpt) {
536 int pad_len = len - ALIGN(endpt, 8);
537
538 if (pad_len > 0) {
539 void *buf = sleb->buf + len - pad_len;
540
541 ubifs_pad(c, buf, pad_len);
542 }
543 }
544 err = ubi_leb_change(c->ubi, lnum, sleb->buf, len,
545 UBI_UNKNOWN);
546 if (err)
547 return err;
548 }
549 }
550 return 0;
551}
552
553/**
554 * drop_incomplete_group - drop nodes from an incomplete group.
555 * @sleb: scanned LEB information
556 * @offs: offset of dropped nodes is returned here
557 *
558 * This function returns %1 if nodes are dropped and %0 otherwise.
559 */
560static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs)
561{
562 int dropped = 0;
563
564 while (!list_empty(&sleb->nodes)) {
565 struct ubifs_scan_node *snod;
566 struct ubifs_ch *ch;
567
568 snod = list_entry(sleb->nodes.prev, struct ubifs_scan_node,
569 list);
570 ch = snod->node;
571 if (ch->group_type != UBIFS_IN_NODE_GROUP)
572 return dropped;
573 dbg_rcvry("dropping node at %d:%d", sleb->lnum, snod->offs);
574 *offs = snod->offs;
575 list_del(&snod->list);
576 kfree(snod);
577 sleb->nodes_cnt -= 1;
578 dropped = 1;
579 }
580 return dropped;
581}
582
583/**
584 * ubifs_recover_leb - scan and recover a LEB.
585 * @c: UBIFS file-system description object
586 * @lnum: LEB number
587 * @offs: offset
588 * @sbuf: LEB-sized buffer to use
589 * @grouped: nodes may be grouped for recovery
590 *
591 * This function does a scan of a LEB, but caters for errors that might have
592 * been caused by the unclean unmount from which we are attempting to recover.
593 *
594 * This function returns %0 on success and a negative error code on failure.
595 */
596struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
597 int offs, void *sbuf, int grouped)
598{
599 int err, len = c->leb_size - offs, need_clean = 0, quiet = 1;
600 int empty_chkd = 0, start = offs;
601 struct ubifs_scan_leb *sleb;
602 void *buf = sbuf + offs;
603
604 dbg_rcvry("%d:%d", lnum, offs);
605
606 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
607 if (IS_ERR(sleb))
608 return sleb;
609
610 if (sleb->ecc)
611 need_clean = 1;
612
613 while (len >= 8) {
614 int ret;
615
616 dbg_scan("look at LEB %d:%d (%d bytes left)",
617 lnum, offs, len);
618
619 cond_resched();
620
621 /*
622 * Scan quietly until there is an error from which we cannot
623 * recover
624 */
625 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
626
627 if (ret == SCANNED_A_NODE) {
628 /* A valid node, and not a padding node */
629 struct ubifs_ch *ch = buf;
630 int node_len;
631
632 err = ubifs_add_snod(c, sleb, buf, offs);
633 if (err)
634 goto error;
635 node_len = ALIGN(le32_to_cpu(ch->len), 8);
636 offs += node_len;
637 buf += node_len;
638 len -= node_len;
639 continue;
640 }
641
642 if (ret > 0) {
643 /* Padding bytes or a valid padding node */
644 offs += ret;
645 buf += ret;
646 len -= ret;
647 continue;
648 }
649
650 if (ret == SCANNED_EMPTY_SPACE) {
651 if (!is_empty(buf, len)) {
652 if (!is_last_write(c, buf, offs))
653 break;
654 clean_buf(c, &buf, lnum, &offs, &len);
655 need_clean = 1;
656 }
657 empty_chkd = 1;
658 break;
659 }
660
661 if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE)
662 if (is_last_write(c, buf, offs)) {
663 clean_buf(c, &buf, lnum, &offs, &len);
664 need_clean = 1;
665 empty_chkd = 1;
666 break;
667 }
668
669 if (ret == SCANNED_A_CORRUPT_NODE)
670 if (no_more_nodes(c, buf, len, lnum, offs)) {
671 clean_buf(c, &buf, lnum, &offs, &len);
672 need_clean = 1;
673 empty_chkd = 1;
674 break;
675 }
676
677 if (quiet) {
678 /* Redo the last scan but noisily */
679 quiet = 0;
680 continue;
681 }
682
683 switch (ret) {
684 case SCANNED_GARBAGE:
685 dbg_err("garbage");
686 goto corrupted;
687 case SCANNED_A_CORRUPT_NODE:
688 case SCANNED_A_BAD_PAD_NODE:
689 dbg_err("bad node");
690 goto corrupted;
691 default:
692 dbg_err("unknown");
693 goto corrupted;
694 }
695 }
696
697 if (!empty_chkd && !is_empty(buf, len)) {
698 if (is_last_write(c, buf, offs)) {
699 clean_buf(c, &buf, lnum, &offs, &len);
700 need_clean = 1;
701 } else {
702 ubifs_err("corrupt empty space at LEB %d:%d",
703 lnum, offs);
704 goto corrupted;
705 }
706 }
707
708 /* Drop nodes from incomplete group */
709 if (grouped && drop_incomplete_group(sleb, &offs)) {
710 buf = sbuf + offs;
711 len = c->leb_size - offs;
712 clean_buf(c, &buf, lnum, &offs, &len);
713 need_clean = 1;
714 }
715
716 if (offs % c->min_io_size) {
717 clean_buf(c, &buf, lnum, &offs, &len);
718 need_clean = 1;
719 }
720
721 ubifs_end_scan(c, sleb, lnum, offs);
722
723 if (need_clean) {
724 err = fix_unclean_leb(c, sleb, start);
725 if (err)
726 goto error;
727 }
728
729 return sleb;
730
731corrupted:
732 ubifs_scanned_corruption(c, lnum, offs, buf);
733 err = -EUCLEAN;
734error:
735 ubifs_err("LEB %d scanning failed", lnum);
736 ubifs_scan_destroy(sleb);
737 return ERR_PTR(err);
738}
739
740/**
741 * get_cs_sqnum - get commit start sequence number.
742 * @c: UBIFS file-system description object
743 * @lnum: LEB number of commit start node
744 * @offs: offset of commit start node
745 * @cs_sqnum: commit start sequence number is returned here
746 *
747 * This function returns %0 on success and a negative error code on failure.
748 */
749static int get_cs_sqnum(struct ubifs_info *c, int lnum, int offs,
750 unsigned long long *cs_sqnum)
751{
752 struct ubifs_cs_node *cs_node = NULL;
753 int err, ret;
754
755 dbg_rcvry("at %d:%d", lnum, offs);
756 cs_node = kmalloc(UBIFS_CS_NODE_SZ, GFP_KERNEL);
757 if (!cs_node)
758 return -ENOMEM;
759 if (c->leb_size - offs < UBIFS_CS_NODE_SZ)
760 goto out_err;
761 err = ubi_read(c->ubi, lnum, (void *)cs_node, offs, UBIFS_CS_NODE_SZ);
762 if (err && err != -EBADMSG)
763 goto out_free;
764 ret = ubifs_scan_a_node(c, cs_node, UBIFS_CS_NODE_SZ, lnum, offs, 0);
765 if (ret != SCANNED_A_NODE) {
766 dbg_err("Not a valid node");
767 goto out_err;
768 }
769 if (cs_node->ch.node_type != UBIFS_CS_NODE) {
770 dbg_err("Node a CS node, type is %d", cs_node->ch.node_type);
771 goto out_err;
772 }
773 if (le64_to_cpu(cs_node->cmt_no) != c->cmt_no) {
774 dbg_err("CS node cmt_no %llu != current cmt_no %llu",
775 (unsigned long long)le64_to_cpu(cs_node->cmt_no),
776 c->cmt_no);
777 goto out_err;
778 }
779 *cs_sqnum = le64_to_cpu(cs_node->ch.sqnum);
780 dbg_rcvry("commit start sqnum %llu", *cs_sqnum);
781 kfree(cs_node);
782 return 0;
783
784out_err:
785 err = -EINVAL;
786out_free:
787 ubifs_err("failed to get CS sqnum");
788 kfree(cs_node);
789 return err;
790}
791
792/**
793 * ubifs_recover_log_leb - scan and recover a log LEB.
794 * @c: UBIFS file-system description object
795 * @lnum: LEB number
796 * @offs: offset
797 * @sbuf: LEB-sized buffer to use
798 *
799 * This function does a scan of a LEB, but caters for errors that might have
800 * been caused by the unclean unmount from which we are attempting to recover.
801 *
802 * This function returns %0 on success and a negative error code on failure.
803 */
804struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
805 int offs, void *sbuf)
806{
807 struct ubifs_scan_leb *sleb;
808 int next_lnum;
809
810 dbg_rcvry("LEB %d", lnum);
811 next_lnum = lnum + 1;
812 if (next_lnum >= UBIFS_LOG_LNUM + c->log_lebs)
813 next_lnum = UBIFS_LOG_LNUM;
814 if (next_lnum != c->ltail_lnum) {
815 /*
816 * We can only recover at the end of the log, so check that the
817 * next log LEB is empty or out of date.
818 */
819 sleb = ubifs_scan(c, next_lnum, 0, sbuf);
820 if (IS_ERR(sleb))
821 return sleb;
822 if (sleb->nodes_cnt) {
823 struct ubifs_scan_node *snod;
824 unsigned long long cs_sqnum = c->cs_sqnum;
825
826 snod = list_entry(sleb->nodes.next,
827 struct ubifs_scan_node, list);
828 if (cs_sqnum == 0) {
829 int err;
830
831 err = get_cs_sqnum(c, lnum, offs, &cs_sqnum);
832 if (err) {
833 ubifs_scan_destroy(sleb);
834 return ERR_PTR(err);
835 }
836 }
837 if (snod->sqnum > cs_sqnum) {
838 ubifs_err("unrecoverable log corruption "
839 "in LEB %d", lnum);
840 ubifs_scan_destroy(sleb);
841 return ERR_PTR(-EUCLEAN);
842 }
843 }
844 ubifs_scan_destroy(sleb);
845 }
846 return ubifs_recover_leb(c, lnum, offs, sbuf, 0);
847}
848
849/**
850 * recover_head - recover a head.
851 * @c: UBIFS file-system description object
852 * @lnum: LEB number of head to recover
853 * @offs: offset of head to recover
854 * @sbuf: LEB-sized buffer to use
855 *
856 * This function ensures that there is no data on the flash at a head location.
857 *
858 * This function returns %0 on success and a negative error code on failure.
859 */
860static int recover_head(const struct ubifs_info *c, int lnum, int offs,
861 void *sbuf)
862{
863 int len, err, need_clean = 0;
864
865 if (c->min_io_size > 1)
866 len = c->min_io_size;
867 else
868 len = 512;
869 if (offs + len > c->leb_size)
870 len = c->leb_size - offs;
871
872 if (!len)
873 return 0;
874
875 /* Read at the head location and check it is empty flash */
876 err = ubi_read(c->ubi, lnum, sbuf, offs, len);
877 if (err)
878 need_clean = 1;
879 else {
880 uint8_t *p = sbuf;
881
882 while (len--)
883 if (*p++ != 0xff) {
884 need_clean = 1;
885 break;
886 }
887 }
888
889 if (need_clean) {
890 dbg_rcvry("cleaning head at %d:%d", lnum, offs);
891 if (offs == 0)
892 return ubifs_leb_unmap(c, lnum);
893 err = ubi_read(c->ubi, lnum, sbuf, 0, offs);
894 if (err)
895 return err;
896 return ubi_leb_change(c->ubi, lnum, sbuf, offs, UBI_UNKNOWN);
897 }
898
899 return 0;
900}
901
902/**
903 * ubifs_recover_inl_heads - recover index and LPT heads.
904 * @c: UBIFS file-system description object
905 * @sbuf: LEB-sized buffer to use
906 *
907 * This function ensures that there is no data on the flash at the index and
908 * LPT head locations.
909 *
910 * This deals with the recovery of a half-completed journal commit. UBIFS is
911 * careful never to overwrite the last version of the index or the LPT. Because
912 * the index and LPT are wandering trees, data from a half-completed commit will
913 * not be referenced anywhere in UBIFS. The data will be either in LEBs that are
914 * assumed to be empty and will be unmapped anyway before use, or in the index
915 * and LPT heads.
916 *
917 * This function returns %0 on success and a negative error code on failure.
918 */
919int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf)
920{
921 int err;
922
923 ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw);
924
925 dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs);
926 err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf);
927 if (err)
928 return err;
929
930 dbg_rcvry("checking LPT head at %d:%d", c->nhead_lnum, c->nhead_offs);
931 err = recover_head(c, c->nhead_lnum, c->nhead_offs, sbuf);
932 if (err)
933 return err;
934
935 return 0;
936}
937
938/**
939 * clean_an_unclean_leb - read and write a LEB to remove corruption.
940 * @c: UBIFS file-system description object
941 * @ucleb: unclean LEB information
942 * @sbuf: LEB-sized buffer to use
943 *
944 * This function reads a LEB up to a point pre-determined by the mount recovery,
945 * checks the nodes, and writes the result back to the flash, thereby cleaning
946 * off any following corruption, or non-fatal ECC errors.
947 *
948 * This function returns %0 on success and a negative error code on failure.
949 */
950static int clean_an_unclean_leb(const struct ubifs_info *c,
951 struct ubifs_unclean_leb *ucleb, void *sbuf)
952{
953 int err, lnum = ucleb->lnum, offs = 0, len = ucleb->endpt, quiet = 1;
954 void *buf = sbuf;
955
956 dbg_rcvry("LEB %d len %d", lnum, len);
957
958 if (len == 0) {
959 /* Nothing to read, just unmap it */
960 err = ubifs_leb_unmap(c, lnum);
961 if (err)
962 return err;
963 return 0;
964 }
965
966 err = ubi_read(c->ubi, lnum, buf, offs, len);
967 if (err && err != -EBADMSG)
968 return err;
969
970 while (len >= 8) {
971 int ret;
972
973 cond_resched();
974
975 /* Scan quietly until there is an error */
976 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet);
977
978 if (ret == SCANNED_A_NODE) {
979 /* A valid node, and not a padding node */
980 struct ubifs_ch *ch = buf;
981 int node_len;
982
983 node_len = ALIGN(le32_to_cpu(ch->len), 8);
984 offs += node_len;
985 buf += node_len;
986 len -= node_len;
987 continue;
988 }
989
990 if (ret > 0) {
991 /* Padding bytes or a valid padding node */
992 offs += ret;
993 buf += ret;
994 len -= ret;
995 continue;
996 }
997
998 if (ret == SCANNED_EMPTY_SPACE) {
999 ubifs_err("unexpected empty space at %d:%d",
1000 lnum, offs);
1001 return -EUCLEAN;
1002 }
1003
1004 if (quiet) {
1005 /* Redo the last scan but noisily */
1006 quiet = 0;
1007 continue;
1008 }
1009
1010 ubifs_scanned_corruption(c, lnum, offs, buf);
1011 return -EUCLEAN;
1012 }
1013
1014 /* Pad to min_io_size */
1015 len = ALIGN(ucleb->endpt, c->min_io_size);
1016 if (len > ucleb->endpt) {
1017 int pad_len = len - ALIGN(ucleb->endpt, 8);
1018
1019 if (pad_len > 0) {
1020 buf = c->sbuf + len - pad_len;
1021 ubifs_pad(c, buf, pad_len);
1022 }
1023 }
1024
1025 /* Write back the LEB atomically */
1026 err = ubi_leb_change(c->ubi, lnum, sbuf, len, UBI_UNKNOWN);
1027 if (err)
1028 return err;
1029
1030 dbg_rcvry("cleaned LEB %d", lnum);
1031
1032 return 0;
1033}
1034
1035/**
1036 * ubifs_clean_lebs - clean LEBs recovered during read-only mount.
1037 * @c: UBIFS file-system description object
1038 * @sbuf: LEB-sized buffer to use
1039 *
1040 * This function cleans a LEB identified during recovery that needs to be
1041 * written but was not because UBIFS was mounted read-only. This happens when
1042 * remounting to read-write mode.
1043 *
1044 * This function returns %0 on success and a negative error code on failure.
1045 */
1046int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf)
1047{
1048 dbg_rcvry("recovery");
1049 while (!list_empty(&c->unclean_leb_list)) {
1050 struct ubifs_unclean_leb *ucleb;
1051 int err;
1052
1053 ucleb = list_entry(c->unclean_leb_list.next,
1054 struct ubifs_unclean_leb, list);
1055 err = clean_an_unclean_leb(c, ucleb, sbuf);
1056 if (err)
1057 return err;
1058 list_del(&ucleb->list);
1059 kfree(ucleb);
1060 }
1061 return 0;
1062}
1063
1064/**
1065 * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit.
1066 * @c: UBIFS file-system description object
1067 *
1068 * Out-of-place garbage collection requires always one empty LEB with which to
1069 * start garbage collection. The LEB number is recorded in c->gc_lnum and is
1070 * written to the master node on unmounting. In the case of an unclean unmount
1071 * the value of gc_lnum recorded in the master node is out of date and cannot
1072 * be used. Instead, recovery must allocate an empty LEB for this purpose.
1073 * However, there may not be enough empty space, in which case it must be
1074 * possible to GC the dirtiest LEB into the GC head LEB.
1075 *
1076 * This function also runs the commit which causes the TNC updates from
1077 * size-recovery and orphans to be written to the flash. That is important to
1078 * ensure correct replay order for subsequent mounts.
1079 *
1080 * This function returns %0 on success and a negative error code on failure.
1081 */
1082int ubifs_rcvry_gc_commit(struct ubifs_info *c)
1083{
1084 struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf;
1085 struct ubifs_lprops lp;
1086 int lnum, err;
1087
1088 c->gc_lnum = -1;
1089 if (wbuf->lnum == -1) {
1090 dbg_rcvry("no GC head LEB");
1091 goto find_free;
1092 }
1093 /*
1094 * See whether the used space in the dirtiest LEB fits in the GC head
1095 * LEB.
1096 */
1097 if (wbuf->offs == c->leb_size) {
1098 dbg_rcvry("no room in GC head LEB");
1099 goto find_free;
1100 }
1101 err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2);
1102 if (err) {
1103 if (err == -ENOSPC)
1104 dbg_err("could not find a dirty LEB");
1105 return err;
1106 }
1107 ubifs_assert(!(lp.flags & LPROPS_INDEX));
1108 lnum = lp.lnum;
1109 if (lp.free + lp.dirty == c->leb_size) {
1110 /* An empty LEB was returned */
1111 if (lp.free != c->leb_size) {
1112 err = ubifs_change_one_lp(c, lnum, c->leb_size,
1113 0, 0, 0, 0);
1114 if (err)
1115 return err;
1116 }
1117 err = ubifs_leb_unmap(c, lnum);
1118 if (err)
1119 return err;
1120 c->gc_lnum = lnum;
1121 dbg_rcvry("allocated LEB %d for GC", lnum);
1122 /* Run the commit */
1123 dbg_rcvry("committing");
1124 return ubifs_run_commit(c);
1125 }
1126 /*
1127 * There was no empty LEB so the used space in the dirtiest LEB must fit
1128 * in the GC head LEB.
1129 */
1130 if (lp.free + lp.dirty < wbuf->offs) {
1131 dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d",
1132 lnum, wbuf->lnum, wbuf->offs);
1133 err = ubifs_return_leb(c, lnum);
1134 if (err)
1135 return err;
1136 goto find_free;
1137 }
1138 /*
1139 * We run the commit before garbage collection otherwise subsequent
1140 * mounts will see the GC and orphan deletion in a different order.
1141 */
1142 dbg_rcvry("committing");
1143 err = ubifs_run_commit(c);
1144 if (err)
1145 return err;
1146 /*
1147 * The data in the dirtiest LEB fits in the GC head LEB, so do the GC
1148 * - use locking to keep 'ubifs_assert()' happy.
1149 */
1150 dbg_rcvry("GC'ing LEB %d", lnum);
1151 mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead);
1152 err = ubifs_garbage_collect_leb(c, &lp);
1153 if (err >= 0) {
1154 int err2 = ubifs_wbuf_sync_nolock(wbuf);
1155
1156 if (err2)
1157 err = err2;
1158 }
1159 mutex_unlock(&wbuf->io_mutex);
1160 if (err < 0) {
1161 dbg_err("GC failed, error %d", err);
1162 if (err == -EAGAIN)
1163 err = -EINVAL;
1164 return err;
1165 }
1166 if (err != LEB_RETAINED) {
1167 dbg_err("GC returned %d", err);
1168 return -EINVAL;
1169 }
1170 err = ubifs_leb_unmap(c, c->gc_lnum);
1171 if (err)
1172 return err;
1173 dbg_rcvry("allocated LEB %d for GC", lnum);
1174 return 0;
1175
1176find_free:
1177 /*
1178 * There is no GC head LEB or the free space in the GC head LEB is too
1179 * small. Allocate gc_lnum by calling 'ubifs_find_free_leb_for_idx()' so
1180 * GC is not run.
1181 */
1182 lnum = ubifs_find_free_leb_for_idx(c);
1183 if (lnum < 0) {
1184 dbg_err("could not find an empty LEB");
1185 return lnum;
1186 }
1187 /* And reset the index flag */
1188 err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
1189 LPROPS_INDEX, 0);
1190 if (err)
1191 return err;
1192 c->gc_lnum = lnum;
1193 dbg_rcvry("allocated LEB %d for GC", lnum);
1194 /* Run the commit */
1195 dbg_rcvry("committing");
1196 return ubifs_run_commit(c);
1197}
1198
1199/**
1200 * struct size_entry - inode size information for recovery.
1201 * @rb: link in the RB-tree of sizes
1202 * @inum: inode number
1203 * @i_size: size on inode
1204 * @d_size: maximum size based on data nodes
1205 * @exists: indicates whether the inode exists
1206 * @inode: inode if pinned in memory awaiting rw mode to fix it
1207 */
1208struct size_entry {
1209 struct rb_node rb;
1210 ino_t inum;
1211 loff_t i_size;
1212 loff_t d_size;
1213 int exists;
1214 struct inode *inode;
1215};
1216
1217/**
1218 * add_ino - add an entry to the size tree.
1219 * @c: UBIFS file-system description object
1220 * @inum: inode number
1221 * @i_size: size on inode
1222 * @d_size: maximum size based on data nodes
1223 * @exists: indicates whether the inode exists
1224 */
1225static int add_ino(struct ubifs_info *c, ino_t inum, loff_t i_size,
1226 loff_t d_size, int exists)
1227{
1228 struct rb_node **p = &c->size_tree.rb_node, *parent = NULL;
1229 struct size_entry *e;
1230
1231 while (*p) {
1232 parent = *p;
1233 e = rb_entry(parent, struct size_entry, rb);
1234 if (inum < e->inum)
1235 p = &(*p)->rb_left;
1236 else
1237 p = &(*p)->rb_right;
1238 }
1239
1240 e = kzalloc(sizeof(struct size_entry), GFP_KERNEL);
1241 if (!e)
1242 return -ENOMEM;
1243
1244 e->inum = inum;
1245 e->i_size = i_size;
1246 e->d_size = d_size;
1247 e->exists = exists;
1248
1249 rb_link_node(&e->rb, parent, p);
1250 rb_insert_color(&e->rb, &c->size_tree);
1251
1252 return 0;
1253}
1254
1255/**
1256 * find_ino - find an entry on the size tree.
1257 * @c: UBIFS file-system description object
1258 * @inum: inode number
1259 */
1260static struct size_entry *find_ino(struct ubifs_info *c, ino_t inum)
1261{
1262 struct rb_node *p = c->size_tree.rb_node;
1263 struct size_entry *e;
1264
1265 while (p) {
1266 e = rb_entry(p, struct size_entry, rb);
1267 if (inum < e->inum)
1268 p = p->rb_left;
1269 else if (inum > e->inum)
1270 p = p->rb_right;
1271 else
1272 return e;
1273 }
1274 return NULL;
1275}
1276
1277/**
1278 * remove_ino - remove an entry from the size tree.
1279 * @c: UBIFS file-system description object
1280 * @inum: inode number
1281 */
1282static void remove_ino(struct ubifs_info *c, ino_t inum)
1283{
1284 struct size_entry *e = find_ino(c, inum);
1285
1286 if (!e)
1287 return;
1288 rb_erase(&e->rb, &c->size_tree);
1289 kfree(e);
1290}
1291
1292/**
1293 * ubifs_destroy_size_tree - free resources related to the size tree.
1294 * @c: UBIFS file-system description object
1295 */
1296void ubifs_destroy_size_tree(struct ubifs_info *c)
1297{
1298 struct rb_node *this = c->size_tree.rb_node;
1299 struct size_entry *e;
1300
1301 while (this) {
1302 if (this->rb_left) {
1303 this = this->rb_left;
1304 continue;
1305 } else if (this->rb_right) {
1306 this = this->rb_right;
1307 continue;
1308 }
1309 e = rb_entry(this, struct size_entry, rb);
1310 if (e->inode)
1311 iput(e->inode);
1312 this = rb_parent(this);
1313 if (this) {
1314 if (this->rb_left == &e->rb)
1315 this->rb_left = NULL;
1316 else
1317 this->rb_right = NULL;
1318 }
1319 kfree(e);
1320 }
1321 c->size_tree = RB_ROOT;
1322}
1323
1324/**
1325 * ubifs_recover_size_accum - accumulate inode sizes for recovery.
1326 * @c: UBIFS file-system description object
1327 * @key: node key
1328 * @deletion: node is for a deletion
1329 * @new_size: inode size
1330 *
1331 * This function has two purposes:
1332 * 1) to ensure there are no data nodes that fall outside the inode size
1333 * 2) to ensure there are no data nodes for inodes that do not exist
1334 * To accomplish those purposes, a rb-tree is constructed containing an entry
1335 * for each inode number in the journal that has not been deleted, and recording
1336 * the size from the inode node, the maximum size of any data node (also altered
1337 * by truncations) and a flag indicating a inode number for which no inode node
1338 * was present in the journal.
1339 *
1340 * Note that there is still the possibility that there are data nodes that have
1341 * been committed that are beyond the inode size, however the only way to find
1342 * them would be to scan the entire index. Alternatively, some provision could
1343 * be made to record the size of inodes at the start of commit, which would seem
1344 * very cumbersome for a scenario that is quite unlikely and the only negative
1345 * consequence of which is wasted space.
1346 *
1347 * This functions returns %0 on success and a negative error code on failure.
1348 */
1349int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1350 int deletion, loff_t new_size)
1351{
1352 ino_t inum = key_inum(c, key);
1353 struct size_entry *e;
1354 int err;
1355
1356 switch (key_type(c, key)) {
1357 case UBIFS_INO_KEY:
1358 if (deletion)
1359 remove_ino(c, inum);
1360 else {
1361 e = find_ino(c, inum);
1362 if (e) {
1363 e->i_size = new_size;
1364 e->exists = 1;
1365 } else {
1366 err = add_ino(c, inum, new_size, 0, 1);
1367 if (err)
1368 return err;
1369 }
1370 }
1371 break;
1372 case UBIFS_DATA_KEY:
1373 e = find_ino(c, inum);
1374 if (e) {
1375 if (new_size > e->d_size)
1376 e->d_size = new_size;
1377 } else {
1378 err = add_ino(c, inum, 0, new_size, 0);
1379 if (err)
1380 return err;
1381 }
1382 break;
1383 case UBIFS_TRUN_KEY:
1384 e = find_ino(c, inum);
1385 if (e)
1386 e->d_size = new_size;
1387 break;
1388 }
1389 return 0;
1390}
1391
1392/**
1393 * fix_size_in_place - fix inode size in place on flash.
1394 * @c: UBIFS file-system description object
1395 * @e: inode size information for recovery
1396 */
1397static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e)
1398{
1399 struct ubifs_ino_node *ino = c->sbuf;
1400 unsigned char *p;
1401 union ubifs_key key;
1402 int err, lnum, offs, len;
1403 loff_t i_size;
1404 uint32_t crc;
1405
1406 /* Locate the inode node LEB number and offset */
1407 ino_key_init(c, &key, e->inum);
1408 err = ubifs_tnc_locate(c, &key, ino, &lnum, &offs);
1409 if (err)
1410 goto out;
1411 /*
1412 * If the size recorded on the inode node is greater than the size that
1413 * was calculated from nodes in the journal then don't change the inode.
1414 */
1415 i_size = le64_to_cpu(ino->size);
1416 if (i_size >= e->d_size)
1417 return 0;
1418 /* Read the LEB */
1419 err = ubi_read(c->ubi, lnum, c->sbuf, 0, c->leb_size);
1420 if (err)
1421 goto out;
1422 /* Change the size field and recalculate the CRC */
1423 ino = c->sbuf + offs;
1424 ino->size = cpu_to_le64(e->d_size);
1425 len = le32_to_cpu(ino->ch.len);
1426 crc = crc32(UBIFS_CRC32_INIT, (void *)ino + 8, len - 8);
1427 ino->ch.crc = cpu_to_le32(crc);
1428 /* Work out where data in the LEB ends and free space begins */
1429 p = c->sbuf;
1430 len = c->leb_size - 1;
1431 while (p[len] == 0xff)
1432 len -= 1;
1433 len = ALIGN(len + 1, c->min_io_size);
1434 /* Atomically write the fixed LEB back again */
1435 err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN);
1436 if (err)
1437 goto out;
1438 dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", e->inum, lnum, offs,
1439 i_size, e->d_size);
1440 return 0;
1441
1442out:
1443 ubifs_warn("inode %lu failed to fix size %lld -> %lld error %d",
1444 e->inum, e->i_size, e->d_size, err);
1445 return err;
1446}
1447
1448/**
1449 * ubifs_recover_size - recover inode size.
1450 * @c: UBIFS file-system description object
1451 *
1452 * This function attempts to fix inode size discrepancies identified by the
1453 * 'ubifs_recover_size_accum()' function.
1454 *
1455 * This functions returns %0 on success and a negative error code on failure.
1456 */
1457int ubifs_recover_size(struct ubifs_info *c)
1458{
1459 struct rb_node *this = rb_first(&c->size_tree);
1460
1461 while (this) {
1462 struct size_entry *e;
1463 int err;
1464
1465 e = rb_entry(this, struct size_entry, rb);
1466 if (!e->exists) {
1467 union ubifs_key key;
1468
1469 ino_key_init(c, &key, e->inum);
1470 err = ubifs_tnc_lookup(c, &key, c->sbuf);
1471 if (err && err != -ENOENT)
1472 return err;
1473 if (err == -ENOENT) {
1474 /* Remove data nodes that have no inode */
1475 dbg_rcvry("removing ino %lu", e->inum);
1476 err = ubifs_tnc_remove_ino(c, e->inum);
1477 if (err)
1478 return err;
1479 } else {
1480 struct ubifs_ino_node *ino = c->sbuf;
1481
1482 e->exists = 1;
1483 e->i_size = le64_to_cpu(ino->size);
1484 }
1485 }
1486 if (e->exists && e->i_size < e->d_size) {
1487 if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) {
1488 /* Fix the inode size and pin it in memory */
1489 struct inode *inode;
1490
1491 inode = ubifs_iget(c->vfs_sb, e->inum);
1492 if (IS_ERR(inode))
1493 return PTR_ERR(inode);
1494 if (inode->i_size < e->d_size) {
1495 dbg_rcvry("ino %lu size %lld -> %lld",
1496 e->inum, e->d_size,
1497 inode->i_size);
1498 inode->i_size = e->d_size;
1499 ubifs_inode(inode)->ui_size = e->d_size;
1500 e->inode = inode;
1501 this = rb_next(this);
1502 continue;
1503 }
1504 iput(inode);
1505 } else {
1506 /* Fix the size in place */
1507 err = fix_size_in_place(c, e);
1508 if (err)
1509 return err;
1510 if (e->inode)
1511 iput(e->inode);
1512 }
1513 }
1514 this = rb_next(this);
1515 rb_erase(&e->rb, &c->size_tree);
1516 kfree(e);
1517 }
1518 return 0;
1519}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
new file mode 100644
index 00000000000..7399692af85
--- /dev/null
+++ b/fs/ubifs/replay.c
@@ -0,0 +1,1075 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file contains journal replay code. It runs when the file-system is being
25 * mounted and requires no locking.
26 *
27 * The larger is the journal, the longer it takes to scan it, so the longer it
28 * takes to mount UBIFS. This is why the journal has limited size which may be
29 * changed depending on the system requirements. But a larger journal gives
30 * faster I/O speed because it writes the index less frequently. So this is a
31 * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the
32 * larger is the journal, the more memory its index may consume.
33 */
34
35#include "ubifs.h"
36
37/*
38 * Replay flags.
39 *
40 * REPLAY_DELETION: node was deleted
41 * REPLAY_REF: node is a reference node
42 */
43enum {
44 REPLAY_DELETION = 1,
45 REPLAY_REF = 2,
46};
47
48/**
49 * struct replay_entry - replay tree entry.
50 * @lnum: logical eraseblock number of the node
51 * @offs: node offset
52 * @len: node length
53 * @sqnum: node sequence number
54 * @flags: replay flags
55 * @rb: links the replay tree
56 * @key: node key
57 * @nm: directory entry name
58 * @old_size: truncation old size
59 * @new_size: truncation new size
60 * @free: amount of free space in a bud
61 * @dirty: amount of dirty space in a bud from padding and deletion nodes
62 *
63 * UBIFS journal replay must compare node sequence numbers, which means it must
64 * build a tree of node information to insert into the TNC.
65 */
66struct replay_entry {
67 int lnum;
68 int offs;
69 int len;
70 unsigned long long sqnum;
71 int flags;
72 struct rb_node rb;
73 union ubifs_key key;
74 union {
75 struct qstr nm;
76 struct {
77 loff_t old_size;
78 loff_t new_size;
79 };
80 struct {
81 int free;
82 int dirty;
83 };
84 };
85};
86
87/**
88 * struct bud_entry - entry in the list of buds to replay.
89 * @list: next bud in the list
90 * @bud: bud description object
91 * @free: free bytes in the bud
92 * @sqnum: reference node sequence number
93 */
94struct bud_entry {
95 struct list_head list;
96 struct ubifs_bud *bud;
97 int free;
98 unsigned long long sqnum;
99};
100
101/**
102 * set_bud_lprops - set free and dirty space used by a bud.
103 * @c: UBIFS file-system description object
104 * @r: replay entry of bud
105 */
106static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r)
107{
108 const struct ubifs_lprops *lp;
109 int err = 0, dirty;
110
111 ubifs_get_lprops(c);
112
113 lp = ubifs_lpt_lookup_dirty(c, r->lnum);
114 if (IS_ERR(lp)) {
115 err = PTR_ERR(lp);
116 goto out;
117 }
118
119 dirty = lp->dirty;
120 if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) {
121 /*
122 * The LEB was added to the journal with a starting offset of
123 * zero which means the LEB must have been empty. The LEB
124 * property values should be lp->free == c->leb_size and
125 * lp->dirty == 0, but that is not the case. The reason is that
126 * the LEB was garbage collected. The garbage collector resets
127 * the free and dirty space without recording it anywhere except
128 * lprops, so if there is not a commit then lprops does not have
129 * that information next time the file system is mounted.
130 *
131 * We do not need to adjust free space because the scan has told
132 * us the exact value which is recorded in the replay entry as
133 * r->free.
134 *
135 * However we do need to subtract from the dirty space the
136 * amount of space that the garbage collector reclaimed, which
137 * is the whole LEB minus the amount of space that was free.
138 */
139 dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
140 lp->free, lp->dirty);
141 dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum,
142 lp->free, lp->dirty);
143 dirty -= c->leb_size - lp->free;
144 /*
145 * If the replay order was perfect the dirty space would now be
146 * zero. The order is not perfect because the the journal heads
147 * race with eachother. This is not a problem but is does mean
148 * that the dirty space may temporarily exceed c->leb_size
149 * during the replay.
150 */
151 if (dirty != 0)
152 dbg_msg("LEB %d lp: %d free %d dirty "
153 "replay: %d free %d dirty", r->lnum, lp->free,
154 lp->dirty, r->free, r->dirty);
155 }
156 lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty,
157 lp->flags | LPROPS_TAKEN, 0);
158 if (IS_ERR(lp)) {
159 err = PTR_ERR(lp);
160 goto out;
161 }
162out:
163 ubifs_release_lprops(c);
164 return err;
165}
166
167/**
168 * trun_remove_range - apply a replay entry for a truncation to the TNC.
169 * @c: UBIFS file-system description object
170 * @r: replay entry of truncation
171 */
172static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r)
173{
174 unsigned min_blk, max_blk;
175 union ubifs_key min_key, max_key;
176 ino_t ino;
177
178 min_blk = r->new_size / UBIFS_BLOCK_SIZE;
179 if (r->new_size & (UBIFS_BLOCK_SIZE - 1))
180 min_blk += 1;
181
182 max_blk = r->old_size / UBIFS_BLOCK_SIZE;
183 if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0)
184 max_blk -= 1;
185
186 ino = key_inum(c, &r->key);
187
188 data_key_init(c, &min_key, ino, min_blk);
189 data_key_init(c, &max_key, ino, max_blk);
190
191 return ubifs_tnc_remove_range(c, &min_key, &max_key);
192}
193
194/**
195 * apply_replay_entry - apply a replay entry to the TNC.
196 * @c: UBIFS file-system description object
197 * @r: replay entry to apply
198 *
199 * Apply a replay entry to the TNC.
200 */
201static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r)
202{
203 int err, deletion = ((r->flags & REPLAY_DELETION) != 0);
204
205 dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum,
206 r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key));
207
208 /* Set c->replay_sqnum to help deal with dangling branches. */
209 c->replay_sqnum = r->sqnum;
210
211 if (r->flags & REPLAY_REF)
212 err = set_bud_lprops(c, r);
213 else if (is_hash_key(c, &r->key)) {
214 if (deletion)
215 err = ubifs_tnc_remove_nm(c, &r->key, &r->nm);
216 else
217 err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs,
218 r->len, &r->nm);
219 } else {
220 if (deletion)
221 switch (key_type(c, &r->key)) {
222 case UBIFS_INO_KEY:
223 {
224 ino_t inum = key_inum(c, &r->key);
225
226 err = ubifs_tnc_remove_ino(c, inum);
227 break;
228 }
229 case UBIFS_TRUN_KEY:
230 err = trun_remove_range(c, r);
231 break;
232 default:
233 err = ubifs_tnc_remove(c, &r->key);
234 break;
235 }
236 else
237 err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs,
238 r->len);
239 if (err)
240 return err;
241
242 if (c->need_recovery)
243 err = ubifs_recover_size_accum(c, &r->key, deletion,
244 r->new_size);
245 }
246
247 return err;
248}
249
250/**
251 * destroy_replay_tree - destroy the replay.
252 * @c: UBIFS file-system description object
253 *
254 * Destroy the replay tree.
255 */
256static void destroy_replay_tree(struct ubifs_info *c)
257{
258 struct rb_node *this = c->replay_tree.rb_node;
259 struct replay_entry *r;
260
261 while (this) {
262 if (this->rb_left) {
263 this = this->rb_left;
264 continue;
265 } else if (this->rb_right) {
266 this = this->rb_right;
267 continue;
268 }
269 r = rb_entry(this, struct replay_entry, rb);
270 this = rb_parent(this);
271 if (this) {
272 if (this->rb_left == &r->rb)
273 this->rb_left = NULL;
274 else
275 this->rb_right = NULL;
276 }
277 if (is_hash_key(c, &r->key))
278 kfree(r->nm.name);
279 kfree(r);
280 }
281 c->replay_tree = RB_ROOT;
282}
283
284/**
285 * apply_replay_tree - apply the replay tree to the TNC.
286 * @c: UBIFS file-system description object
287 *
288 * Apply the replay tree.
289 * Returns zero in case of success and a negative error code in case of
290 * failure.
291 */
292static int apply_replay_tree(struct ubifs_info *c)
293{
294 struct rb_node *this = rb_first(&c->replay_tree);
295
296 while (this) {
297 struct replay_entry *r;
298 int err;
299
300 cond_resched();
301
302 r = rb_entry(this, struct replay_entry, rb);
303 err = apply_replay_entry(c, r);
304 if (err)
305 return err;
306 this = rb_next(this);
307 }
308 return 0;
309}
310
311/**
312 * insert_node - insert a node to the replay tree.
313 * @c: UBIFS file-system description object
314 * @lnum: node logical eraseblock number
315 * @offs: node offset
316 * @len: node length
317 * @key: node key
318 * @sqnum: sequence number
319 * @deletion: non-zero if this is a deletion
320 * @used: number of bytes in use in a LEB
321 * @old_size: truncation old size
322 * @new_size: truncation new size
323 *
324 * This function inserts a scanned non-direntry node to the replay tree. The
325 * replay tree is an RB-tree containing @struct replay_entry elements which are
326 * indexed by the sequence number. The replay tree is applied at the very end
327 * of the replay process. Since the tree is sorted in sequence number order,
328 * the older modifications are applied first. This function returns zero in
329 * case of success and a negative error code in case of failure.
330 */
331static int insert_node(struct ubifs_info *c, int lnum, int offs, int len,
332 union ubifs_key *key, unsigned long long sqnum,
333 int deletion, int *used, loff_t old_size,
334 loff_t new_size)
335{
336 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
337 struct replay_entry *r;
338
339 if (key_inum(c, key) >= c->highest_inum)
340 c->highest_inum = key_inum(c, key);
341
342 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
343 while (*p) {
344 parent = *p;
345 r = rb_entry(parent, struct replay_entry, rb);
346 if (sqnum < r->sqnum) {
347 p = &(*p)->rb_left;
348 continue;
349 } else if (sqnum > r->sqnum) {
350 p = &(*p)->rb_right;
351 continue;
352 }
353 ubifs_err("duplicate sqnum in replay");
354 return -EINVAL;
355 }
356
357 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
358 if (!r)
359 return -ENOMEM;
360
361 if (!deletion)
362 *used += ALIGN(len, 8);
363 r->lnum = lnum;
364 r->offs = offs;
365 r->len = len;
366 r->sqnum = sqnum;
367 r->flags = (deletion ? REPLAY_DELETION : 0);
368 r->old_size = old_size;
369 r->new_size = new_size;
370 key_copy(c, key, &r->key);
371
372 rb_link_node(&r->rb, parent, p);
373 rb_insert_color(&r->rb, &c->replay_tree);
374 return 0;
375}
376
377/**
378 * insert_dent - insert a directory entry node into the replay tree.
379 * @c: UBIFS file-system description object
380 * @lnum: node logical eraseblock number
381 * @offs: node offset
382 * @len: node length
383 * @key: node key
384 * @name: directory entry name
385 * @nlen: directory entry name length
386 * @sqnum: sequence number
387 * @deletion: non-zero if this is a deletion
388 * @used: number of bytes in use in a LEB
389 *
390 * This function inserts a scanned directory entry node to the replay tree.
391 * Returns zero in case of success and a negative error code in case of
392 * failure.
393 *
394 * This function is also used for extended attribute entries because they are
395 * implemented as directory entry nodes.
396 */
397static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len,
398 union ubifs_key *key, const char *name, int nlen,
399 unsigned long long sqnum, int deletion, int *used)
400{
401 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
402 struct replay_entry *r;
403 char *nbuf;
404
405 if (key_inum(c, key) >= c->highest_inum)
406 c->highest_inum = key_inum(c, key);
407
408 dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key));
409 while (*p) {
410 parent = *p;
411 r = rb_entry(parent, struct replay_entry, rb);
412 if (sqnum < r->sqnum) {
413 p = &(*p)->rb_left;
414 continue;
415 }
416 if (sqnum > r->sqnum) {
417 p = &(*p)->rb_right;
418 continue;
419 }
420 ubifs_err("duplicate sqnum in replay");
421 return -EINVAL;
422 }
423
424 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
425 if (!r)
426 return -ENOMEM;
427 nbuf = kmalloc(nlen + 1, GFP_KERNEL);
428 if (!nbuf) {
429 kfree(r);
430 return -ENOMEM;
431 }
432
433 if (!deletion)
434 *used += ALIGN(len, 8);
435 r->lnum = lnum;
436 r->offs = offs;
437 r->len = len;
438 r->sqnum = sqnum;
439 r->nm.len = nlen;
440 memcpy(nbuf, name, nlen);
441 nbuf[nlen] = '\0';
442 r->nm.name = nbuf;
443 r->flags = (deletion ? REPLAY_DELETION : 0);
444 key_copy(c, key, &r->key);
445
446 ubifs_assert(!*p);
447 rb_link_node(&r->rb, parent, p);
448 rb_insert_color(&r->rb, &c->replay_tree);
449 return 0;
450}
451
452/**
453 * ubifs_validate_entry - validate directory or extended attribute entry node.
454 * @c: UBIFS file-system description object
455 * @dent: the node to validate
456 *
457 * This function validates directory or extended attribute entry node @dent.
458 * Returns zero if the node is all right and a %-EINVAL if not.
459 */
460int ubifs_validate_entry(struct ubifs_info *c,
461 const struct ubifs_dent_node *dent)
462{
463 int key_type = key_type_flash(c, dent->key);
464 int nlen = le16_to_cpu(dent->nlen);
465
466 if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 ||
467 dent->type >= UBIFS_ITYPES_CNT ||
468 nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 ||
469 strnlen(dent->name, nlen) != nlen ||
470 le64_to_cpu(dent->inum) > MAX_INUM) {
471 ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ?
472 "directory entry" : "extended attribute entry");
473 return -EINVAL;
474 }
475
476 if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) {
477 ubifs_err("bad key type %d", key_type);
478 return -EINVAL;
479 }
480
481 return 0;
482}
483
484/**
485 * replay_bud - replay a bud logical eraseblock.
486 * @c: UBIFS file-system description object
487 * @lnum: bud logical eraseblock number to replay
488 * @offs: bud start offset
489 * @jhead: journal head to which this bud belongs
490 * @free: amount of free space in the bud is returned here
491 * @dirty: amount of dirty space from padding and deletion nodes is returned
492 * here
493 *
494 * This function returns zero in case of success and a negative error code in
495 * case of failure.
496 */
497static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
498 int *free, int *dirty)
499{
500 int err = 0, used = 0;
501 struct ubifs_scan_leb *sleb;
502 struct ubifs_scan_node *snod;
503 struct ubifs_bud *bud;
504
505 dbg_mnt("replay bud LEB %d, head %d", lnum, jhead);
506 if (c->need_recovery)
507 sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD);
508 else
509 sleb = ubifs_scan(c, lnum, offs, c->sbuf);
510 if (IS_ERR(sleb))
511 return PTR_ERR(sleb);
512
513 /*
514 * The bud does not have to start from offset zero - the beginning of
515 * the 'lnum' LEB may contain previously committed data. One of the
516 * things we have to do in replay is to correctly update lprops with
517 * newer information about this LEB.
518 *
519 * At this point lprops thinks that this LEB has 'c->leb_size - offs'
520 * bytes of free space because it only contain information about
521 * committed data.
522 *
523 * But we know that real amount of free space is 'c->leb_size -
524 * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and
525 * 'sleb->endpt' is used by bud data. We have to correctly calculate
526 * how much of these data are dirty and update lprops with this
527 * information.
528 *
529 * The dirt in that LEB region is comprised of padding nodes, deletion
530 * nodes, truncation nodes and nodes which are obsoleted by subsequent
531 * nodes in this LEB. So instead of calculating clean space, we
532 * calculate used space ('used' variable).
533 */
534
535 list_for_each_entry(snod, &sleb->nodes, list) {
536 int deletion = 0;
537
538 cond_resched();
539
540 if (snod->sqnum >= SQNUM_WATERMARK) {
541 ubifs_err("file system's life ended");
542 goto out_dump;
543 }
544
545 if (snod->sqnum > c->max_sqnum)
546 c->max_sqnum = snod->sqnum;
547
548 switch (snod->type) {
549 case UBIFS_INO_NODE:
550 {
551 struct ubifs_ino_node *ino = snod->node;
552 loff_t new_size = le64_to_cpu(ino->size);
553
554 if (le32_to_cpu(ino->nlink) == 0)
555 deletion = 1;
556 err = insert_node(c, lnum, snod->offs, snod->len,
557 &snod->key, snod->sqnum, deletion,
558 &used, 0, new_size);
559 break;
560 }
561 case UBIFS_DATA_NODE:
562 {
563 struct ubifs_data_node *dn = snod->node;
564 loff_t new_size = le32_to_cpu(dn->size) +
565 key_block(c, &snod->key) *
566 UBIFS_BLOCK_SIZE;
567
568 err = insert_node(c, lnum, snod->offs, snod->len,
569 &snod->key, snod->sqnum, deletion,
570 &used, 0, new_size);
571 break;
572 }
573 case UBIFS_DENT_NODE:
574 case UBIFS_XENT_NODE:
575 {
576 struct ubifs_dent_node *dent = snod->node;
577
578 err = ubifs_validate_entry(c, dent);
579 if (err)
580 goto out_dump;
581
582 err = insert_dent(c, lnum, snod->offs, snod->len,
583 &snod->key, dent->name,
584 le16_to_cpu(dent->nlen), snod->sqnum,
585 !le64_to_cpu(dent->inum), &used);
586 break;
587 }
588 case UBIFS_TRUN_NODE:
589 {
590 struct ubifs_trun_node *trun = snod->node;
591 loff_t old_size = le64_to_cpu(trun->old_size);
592 loff_t new_size = le64_to_cpu(trun->new_size);
593 union ubifs_key key;
594
595 /* Validate truncation node */
596 if (old_size < 0 || old_size > c->max_inode_sz ||
597 new_size < 0 || new_size > c->max_inode_sz ||
598 old_size <= new_size) {
599 ubifs_err("bad truncation node");
600 goto out_dump;
601 }
602
603 /*
604 * Create a fake truncation key just to use the same
605 * functions which expect nodes to have keys.
606 */
607 trun_key_init(c, &key, le32_to_cpu(trun->inum));
608 err = insert_node(c, lnum, snod->offs, snod->len,
609 &key, snod->sqnum, 1, &used,
610 old_size, new_size);
611 break;
612 }
613 default:
614 ubifs_err("unexpected node type %d in bud LEB %d:%d",
615 snod->type, lnum, snod->offs);
616 err = -EINVAL;
617 goto out_dump;
618 }
619 if (err)
620 goto out;
621 }
622
623 bud = ubifs_search_bud(c, lnum);
624 if (!bud)
625 BUG();
626
627 ubifs_assert(sleb->endpt - offs >= used);
628 ubifs_assert(sleb->endpt % c->min_io_size == 0);
629
630 if (sleb->endpt + c->min_io_size <= c->leb_size &&
631 !(c->vfs_sb->s_flags & MS_RDONLY))
632 err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum,
633 sleb->endpt, UBI_SHORTTERM);
634
635 *dirty = sleb->endpt - offs - used;
636 *free = c->leb_size - sleb->endpt;
637
638out:
639 ubifs_scan_destroy(sleb);
640 return err;
641
642out_dump:
643 ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs);
644 dbg_dump_node(c, snod->node);
645 ubifs_scan_destroy(sleb);
646 return -EINVAL;
647}
648
649/**
650 * insert_ref_node - insert a reference node to the replay tree.
651 * @c: UBIFS file-system description object
652 * @lnum: node logical eraseblock number
653 * @offs: node offset
654 * @sqnum: sequence number
655 * @free: amount of free space in bud
656 * @dirty: amount of dirty space from padding and deletion nodes
657 *
658 * This function inserts a reference node to the replay tree and returns zero
659 * in case of success ort a negative error code in case of failure.
660 */
661static int insert_ref_node(struct ubifs_info *c, int lnum, int offs,
662 unsigned long long sqnum, int free, int dirty)
663{
664 struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL;
665 struct replay_entry *r;
666
667 dbg_mnt("add ref LEB %d:%d", lnum, offs);
668 while (*p) {
669 parent = *p;
670 r = rb_entry(parent, struct replay_entry, rb);
671 if (sqnum < r->sqnum) {
672 p = &(*p)->rb_left;
673 continue;
674 } else if (sqnum > r->sqnum) {
675 p = &(*p)->rb_right;
676 continue;
677 }
678 ubifs_err("duplicate sqnum in replay tree");
679 return -EINVAL;
680 }
681
682 r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL);
683 if (!r)
684 return -ENOMEM;
685
686 r->lnum = lnum;
687 r->offs = offs;
688 r->sqnum = sqnum;
689 r->flags = REPLAY_REF;
690 r->free = free;
691 r->dirty = dirty;
692
693 rb_link_node(&r->rb, parent, p);
694 rb_insert_color(&r->rb, &c->replay_tree);
695 return 0;
696}
697
698/**
699 * replay_buds - replay all buds.
700 * @c: UBIFS file-system description object
701 *
702 * This function returns zero in case of success and a negative error code in
703 * case of failure.
704 */
705static int replay_buds(struct ubifs_info *c)
706{
707 struct bud_entry *b;
708 int err, uninitialized_var(free), uninitialized_var(dirty);
709
710 list_for_each_entry(b, &c->replay_buds, list) {
711 err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead,
712 &free, &dirty);
713 if (err)
714 return err;
715 err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum,
716 free, dirty);
717 if (err)
718 return err;
719 }
720
721 return 0;
722}
723
724/**
725 * destroy_bud_list - destroy the list of buds to replay.
726 * @c: UBIFS file-system description object
727 */
728static void destroy_bud_list(struct ubifs_info *c)
729{
730 struct bud_entry *b;
731
732 while (!list_empty(&c->replay_buds)) {
733 b = list_entry(c->replay_buds.next, struct bud_entry, list);
734 list_del(&b->list);
735 kfree(b);
736 }
737}
738
739/**
740 * add_replay_bud - add a bud to the list of buds to replay.
741 * @c: UBIFS file-system description object
742 * @lnum: bud logical eraseblock number to replay
743 * @offs: bud start offset
744 * @jhead: journal head to which this bud belongs
745 * @sqnum: reference node sequence number
746 *
747 * This function returns zero in case of success and a negative error code in
748 * case of failure.
749 */
750static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead,
751 unsigned long long sqnum)
752{
753 struct ubifs_bud *bud;
754 struct bud_entry *b;
755
756 dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead);
757
758 bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL);
759 if (!bud)
760 return -ENOMEM;
761
762 b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL);
763 if (!b) {
764 kfree(bud);
765 return -ENOMEM;
766 }
767
768 bud->lnum = lnum;
769 bud->start = offs;
770 bud->jhead = jhead;
771 ubifs_add_bud(c, bud);
772
773 b->bud = bud;
774 b->sqnum = sqnum;
775 list_add_tail(&b->list, &c->replay_buds);
776
777 return 0;
778}
779
780/**
781 * validate_ref - validate a reference node.
782 * @c: UBIFS file-system description object
783 * @ref: the reference node to validate
784 * @ref_lnum: LEB number of the reference node
785 * @ref_offs: reference node offset
786 *
787 * This function returns %1 if a bud reference already exists for the LEB. %0 is
788 * returned if the reference node is new, otherwise %-EINVAL is returned if
789 * validation failed.
790 */
791static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref)
792{
793 struct ubifs_bud *bud;
794 int lnum = le32_to_cpu(ref->lnum);
795 unsigned int offs = le32_to_cpu(ref->offs);
796 unsigned int jhead = le32_to_cpu(ref->jhead);
797
798 /*
799 * ref->offs may point to the end of LEB when the journal head points
800 * to the end of LEB and we write reference node for it during commit.
801 * So this is why we require 'offs > c->leb_size'.
802 */
803 if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt ||
804 lnum < c->main_first || offs > c->leb_size ||
805 offs & (c->min_io_size - 1))
806 return -EINVAL;
807
808 /* Make sure we have not already looked at this bud */
809 bud = ubifs_search_bud(c, lnum);
810 if (bud) {
811 if (bud->jhead == jhead && bud->start <= offs)
812 return 1;
813 ubifs_err("bud at LEB %d:%d was already referred", lnum, offs);
814 return -EINVAL;
815 }
816
817 return 0;
818}
819
820/**
821 * replay_log_leb - replay a log logical eraseblock.
822 * @c: UBIFS file-system description object
823 * @lnum: log logical eraseblock to replay
824 * @offs: offset to start replaying from
825 * @sbuf: scan buffer
826 *
827 * This function replays a log LEB and returns zero in case of success, %1 if
828 * this is the last LEB in the log, and a negative error code in case of
829 * failure.
830 */
831static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf)
832{
833 int err;
834 struct ubifs_scan_leb *sleb;
835 struct ubifs_scan_node *snod;
836 const struct ubifs_cs_node *node;
837
838 dbg_mnt("replay log LEB %d:%d", lnum, offs);
839 sleb = ubifs_scan(c, lnum, offs, sbuf);
840 if (IS_ERR(sleb)) {
841 if (c->need_recovery)
842 sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf);
843 if (IS_ERR(sleb))
844 return PTR_ERR(sleb);
845 }
846
847 if (sleb->nodes_cnt == 0) {
848 err = 1;
849 goto out;
850 }
851
852 node = sleb->buf;
853
854 snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list);
855 if (c->cs_sqnum == 0) {
856 /*
857 * This is the first log LEB we are looking at, make sure that
858 * the first node is a commit start node. Also record its
859 * sequence number so that UBIFS can determine where the log
860 * ends, because all nodes which were have higher sequence
861 * numbers.
862 */
863 if (snod->type != UBIFS_CS_NODE) {
864 dbg_err("first log node at LEB %d:%d is not CS node",
865 lnum, offs);
866 goto out_dump;
867 }
868 if (le64_to_cpu(node->cmt_no) != c->cmt_no) {
869 dbg_err("first CS node at LEB %d:%d has wrong "
870 "commit number %llu expected %llu",
871 lnum, offs,
872 (unsigned long long)le64_to_cpu(node->cmt_no),
873 c->cmt_no);
874 goto out_dump;
875 }
876
877 c->cs_sqnum = le64_to_cpu(node->ch.sqnum);
878 dbg_mnt("commit start sqnum %llu", c->cs_sqnum);
879 }
880
881 if (snod->sqnum < c->cs_sqnum) {
882 /*
883 * This means that we reached end of log and now
884 * look to the older log data, which was already
885 * committed but the eraseblock was not erased (UBIFS
886 * only unmaps it). So this basically means we have to
887 * exit with "end of log" code.
888 */
889 err = 1;
890 goto out;
891 }
892
893 /* Make sure the first node sits at offset zero of the LEB */
894 if (snod->offs != 0) {
895 dbg_err("first node is not at zero offset");
896 goto out_dump;
897 }
898
899 list_for_each_entry(snod, &sleb->nodes, list) {
900
901 cond_resched();
902
903 if (snod->sqnum >= SQNUM_WATERMARK) {
904 ubifs_err("file system's life ended");
905 goto out_dump;
906 }
907
908 if (snod->sqnum < c->cs_sqnum) {
909 dbg_err("bad sqnum %llu, commit sqnum %llu",
910 snod->sqnum, c->cs_sqnum);
911 goto out_dump;
912 }
913
914 if (snod->sqnum > c->max_sqnum)
915 c->max_sqnum = snod->sqnum;
916
917 switch (snod->type) {
918 case UBIFS_REF_NODE: {
919 const struct ubifs_ref_node *ref = snod->node;
920
921 err = validate_ref(c, ref);
922 if (err == 1)
923 break; /* Already have this bud */
924 if (err)
925 goto out_dump;
926
927 err = add_replay_bud(c, le32_to_cpu(ref->lnum),
928 le32_to_cpu(ref->offs),
929 le32_to_cpu(ref->jhead),
930 snod->sqnum);
931 if (err)
932 goto out;
933
934 break;
935 }
936 case UBIFS_CS_NODE:
937 /* Make sure it sits at the beginning of LEB */
938 if (snod->offs != 0) {
939 ubifs_err("unexpected node in log");
940 goto out_dump;
941 }
942 break;
943 default:
944 ubifs_err("unexpected node in log");
945 goto out_dump;
946 }
947 }
948
949 if (sleb->endpt || c->lhead_offs >= c->leb_size) {
950 c->lhead_lnum = lnum;
951 c->lhead_offs = sleb->endpt;
952 }
953
954 err = !sleb->endpt;
955out:
956 ubifs_scan_destroy(sleb);
957 return err;
958
959out_dump:
960 ubifs_err("log error detected while replying the log at LEB %d:%d",
961 lnum, offs + snod->offs);
962 dbg_dump_node(c, snod->node);
963 ubifs_scan_destroy(sleb);
964 return -EINVAL;
965}
966
967/**
968 * take_ihead - update the status of the index head in lprops to 'taken'.
969 * @c: UBIFS file-system description object
970 *
971 * This function returns the amount of free space in the index head LEB or a
972 * negative error code.
973 */
974static int take_ihead(struct ubifs_info *c)
975{
976 const struct ubifs_lprops *lp;
977 int err, free;
978
979 ubifs_get_lprops(c);
980
981 lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum);
982 if (IS_ERR(lp)) {
983 err = PTR_ERR(lp);
984 goto out;
985 }
986
987 free = lp->free;
988
989 lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC,
990 lp->flags | LPROPS_TAKEN, 0);
991 if (IS_ERR(lp)) {
992 err = PTR_ERR(lp);
993 goto out;
994 }
995
996 err = free;
997out:
998 ubifs_release_lprops(c);
999 return err;
1000}
1001
1002/**
1003 * ubifs_replay_journal - replay journal.
1004 * @c: UBIFS file-system description object
1005 *
1006 * This function scans the journal, replays and cleans it up. It makes sure all
1007 * memory data structures related to uncommitted journal are built (dirty TNC
1008 * tree, tree of buds, modified lprops, etc).
1009 */
1010int ubifs_replay_journal(struct ubifs_info *c)
1011{
1012 int err, i, lnum, offs, free;
1013 void *sbuf = NULL;
1014
1015 BUILD_BUG_ON(UBIFS_TRUN_KEY > 5);
1016
1017 /* Update the status of the index head in lprops to 'taken' */
1018 free = take_ihead(c);
1019 if (free < 0)
1020 return free; /* Error code */
1021
1022 if (c->ihead_offs != c->leb_size - free) {
1023 ubifs_err("bad index head LEB %d:%d", c->ihead_lnum,
1024 c->ihead_offs);
1025 return -EINVAL;
1026 }
1027
1028 sbuf = vmalloc(c->leb_size);
1029 if (!sbuf)
1030 return -ENOMEM;
1031
1032 dbg_mnt("start replaying the journal");
1033
1034 c->replaying = 1;
1035
1036 lnum = c->ltail_lnum = c->lhead_lnum;
1037 offs = c->lhead_offs;
1038
1039 for (i = 0; i < c->log_lebs; i++, lnum++) {
1040 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) {
1041 /*
1042 * The log is logically circular, we reached the last
1043 * LEB, switch to the first one.
1044 */
1045 lnum = UBIFS_LOG_LNUM;
1046 offs = 0;
1047 }
1048 err = replay_log_leb(c, lnum, offs, sbuf);
1049 if (err == 1)
1050 /* We hit the end of the log */
1051 break;
1052 if (err)
1053 goto out;
1054 offs = 0;
1055 }
1056
1057 err = replay_buds(c);
1058 if (err)
1059 goto out;
1060
1061 err = apply_replay_tree(c);
1062 if (err)
1063 goto out;
1064
1065 ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery);
1066 dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, "
1067 "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum,
1068 c->highest_inum);
1069out:
1070 destroy_replay_tree(c);
1071 destroy_bud_list(c);
1072 vfree(sbuf);
1073 c->replaying = 0;
1074 return err;
1075}
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
new file mode 100644
index 00000000000..2bf753b3888
--- /dev/null
+++ b/fs/ubifs/sb.c
@@ -0,0 +1,629 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS superblock. The superblock is stored at the first
25 * LEB of the volume and is never changed by UBIFS. Only user-space tools may
26 * change it. The superblock node mostly contains geometry information.
27 */
28
29#include "ubifs.h"
30#include <linux/random.h>
31
32/*
33 * Default journal size in logical eraseblocks as a percent of total
34 * flash size.
35 */
36#define DEFAULT_JNL_PERCENT 5
37
38/* Default maximum journal size in bytes */
39#define DEFAULT_MAX_JNL (32*1024*1024)
40
41/* Default indexing tree fanout */
42#define DEFAULT_FANOUT 8
43
44/* Default number of data journal heads */
45#define DEFAULT_JHEADS_CNT 1
46
47/* Default positions of different LEBs in the main area */
48#define DEFAULT_IDX_LEB 0
49#define DEFAULT_DATA_LEB 1
50#define DEFAULT_GC_LEB 2
51
52/* Default number of LEB numbers in LPT's save table */
53#define DEFAULT_LSAVE_CNT 256
54
55/* Default reserved pool size as a percent of maximum free space */
56#define DEFAULT_RP_PERCENT 5
57
58/* The default maximum size of reserved pool in bytes */
59#define DEFAULT_MAX_RP_SIZE (5*1024*1024)
60
61/* Default time granularity in nanoseconds */
62#define DEFAULT_TIME_GRAN 1000000000
63
64/**
65 * create_default_filesystem - format empty UBI volume.
66 * @c: UBIFS file-system description object
67 *
68 * This function creates default empty file-system. Returns zero in case of
69 * success and a negative error code in case of failure.
70 */
71static int create_default_filesystem(struct ubifs_info *c)
72{
73 struct ubifs_sb_node *sup;
74 struct ubifs_mst_node *mst;
75 struct ubifs_idx_node *idx;
76 struct ubifs_branch *br;
77 struct ubifs_ino_node *ino;
78 struct ubifs_cs_node *cs;
79 union ubifs_key key;
80 int err, tmp, jnl_lebs, log_lebs, max_buds, main_lebs, main_first;
81 int lpt_lebs, lpt_first, orph_lebs, big_lpt, ino_waste, sup_flags = 0;
82 int min_leb_cnt = UBIFS_MIN_LEB_CNT;
83 uint64_t tmp64, main_bytes;
84
85 /* Some functions called from here depend on the @c->key_len filed */
86 c->key_len = UBIFS_SK_LEN;
87
88 /*
89 * First of all, we have to calculate default file-system geometry -
90 * log size, journal size, etc.
91 */
92 if (c->leb_cnt < 0x7FFFFFFF / DEFAULT_JNL_PERCENT)
93 /* We can first multiply then divide and have no overflow */
94 jnl_lebs = c->leb_cnt * DEFAULT_JNL_PERCENT / 100;
95 else
96 jnl_lebs = (c->leb_cnt / 100) * DEFAULT_JNL_PERCENT;
97
98 if (jnl_lebs < UBIFS_MIN_JNL_LEBS)
99 jnl_lebs = UBIFS_MIN_JNL_LEBS;
100 if (jnl_lebs * c->leb_size > DEFAULT_MAX_JNL)
101 jnl_lebs = DEFAULT_MAX_JNL / c->leb_size;
102
103 /*
104 * The log should be large enough to fit reference nodes for all bud
105 * LEBs. Because buds do not have to start from the beginning of LEBs
106 * (half of the LEB may contain committed data), the log should
107 * generally be larger, make it twice as large.
108 */
109 tmp = 2 * (c->ref_node_alsz * jnl_lebs) + c->leb_size - 1;
110 log_lebs = tmp / c->leb_size;
111 /* Plus one LEB reserved for commit */
112 log_lebs += 1;
113 if (c->leb_cnt - min_leb_cnt > 8) {
114 /* And some extra space to allow writes while committing */
115 log_lebs += 1;
116 min_leb_cnt += 1;
117 }
118
119 max_buds = jnl_lebs - log_lebs;
120 if (max_buds < UBIFS_MIN_BUD_LEBS)
121 max_buds = UBIFS_MIN_BUD_LEBS;
122
123 /*
124 * Orphan nodes are stored in a separate area. One node can store a lot
125 * of orphan inode numbers, but when new orphan comes we just add a new
126 * orphan node. At some point the nodes are consolidated into one
127 * orphan node.
128 */
129 orph_lebs = UBIFS_MIN_ORPH_LEBS;
130#ifdef CONFIG_UBIFS_FS_DEBUG
131 if (c->leb_cnt - min_leb_cnt > 1)
132 /*
133 * For debugging purposes it is better to have at least 2
134 * orphan LEBs, because the orphan subsystem would need to do
135 * consolidations and would be stressed more.
136 */
137 orph_lebs += 1;
138#endif
139
140 main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS - log_lebs;
141 main_lebs -= orph_lebs;
142
143 lpt_first = UBIFS_LOG_LNUM + log_lebs;
144 c->lsave_cnt = DEFAULT_LSAVE_CNT;
145 c->max_leb_cnt = c->leb_cnt;
146 err = ubifs_create_dflt_lpt(c, &main_lebs, lpt_first, &lpt_lebs,
147 &big_lpt);
148 if (err)
149 return err;
150
151 dbg_gen("LEB Properties Tree created (LEBs %d-%d)", lpt_first,
152 lpt_first + lpt_lebs - 1);
153
154 main_first = c->leb_cnt - main_lebs;
155
156 /* Create default superblock */
157 tmp = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
158 sup = kzalloc(tmp, GFP_KERNEL);
159 if (!sup)
160 return -ENOMEM;
161
162 tmp64 = (uint64_t)max_buds * c->leb_size;
163 if (big_lpt)
164 sup_flags |= UBIFS_FLG_BIGLPT;
165
166 sup->ch.node_type = UBIFS_SB_NODE;
167 sup->key_hash = UBIFS_KEY_HASH_R5;
168 sup->flags = cpu_to_le32(sup_flags);
169 sup->min_io_size = cpu_to_le32(c->min_io_size);
170 sup->leb_size = cpu_to_le32(c->leb_size);
171 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
172 sup->max_leb_cnt = cpu_to_le32(c->max_leb_cnt);
173 sup->max_bud_bytes = cpu_to_le64(tmp64);
174 sup->log_lebs = cpu_to_le32(log_lebs);
175 sup->lpt_lebs = cpu_to_le32(lpt_lebs);
176 sup->orph_lebs = cpu_to_le32(orph_lebs);
177 sup->jhead_cnt = cpu_to_le32(DEFAULT_JHEADS_CNT);
178 sup->fanout = cpu_to_le32(DEFAULT_FANOUT);
179 sup->lsave_cnt = cpu_to_le32(c->lsave_cnt);
180 sup->fmt_version = cpu_to_le32(UBIFS_FORMAT_VERSION);
181 sup->default_compr = cpu_to_le16(UBIFS_COMPR_LZO);
182 sup->time_gran = cpu_to_le32(DEFAULT_TIME_GRAN);
183
184 generate_random_uuid(sup->uuid);
185
186 main_bytes = (uint64_t)main_lebs * c->leb_size;
187 tmp64 = main_bytes * DEFAULT_RP_PERCENT;
188 do_div(tmp64, 100);
189 if (tmp64 > DEFAULT_MAX_RP_SIZE)
190 tmp64 = DEFAULT_MAX_RP_SIZE;
191 sup->rp_size = cpu_to_le64(tmp64);
192
193 err = ubifs_write_node(c, sup, UBIFS_SB_NODE_SZ, 0, 0, UBI_LONGTERM);
194 kfree(sup);
195 if (err)
196 return err;
197
198 dbg_gen("default superblock created at LEB 0:0");
199
200 /* Create default master node */
201 mst = kzalloc(c->mst_node_alsz, GFP_KERNEL);
202 if (!mst)
203 return -ENOMEM;
204
205 mst->ch.node_type = UBIFS_MST_NODE;
206 mst->log_lnum = cpu_to_le32(UBIFS_LOG_LNUM);
207 mst->highest_inum = cpu_to_le64(UBIFS_FIRST_INO);
208 mst->cmt_no = 0;
209 mst->root_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB);
210 mst->root_offs = 0;
211 tmp = ubifs_idx_node_sz(c, 1);
212 mst->root_len = cpu_to_le32(tmp);
213 mst->gc_lnum = cpu_to_le32(main_first + DEFAULT_GC_LEB);
214 mst->ihead_lnum = cpu_to_le32(main_first + DEFAULT_IDX_LEB);
215 mst->ihead_offs = cpu_to_le32(ALIGN(tmp, c->min_io_size));
216 mst->index_size = cpu_to_le64(ALIGN(tmp, 8));
217 mst->lpt_lnum = cpu_to_le32(c->lpt_lnum);
218 mst->lpt_offs = cpu_to_le32(c->lpt_offs);
219 mst->nhead_lnum = cpu_to_le32(c->nhead_lnum);
220 mst->nhead_offs = cpu_to_le32(c->nhead_offs);
221 mst->ltab_lnum = cpu_to_le32(c->ltab_lnum);
222 mst->ltab_offs = cpu_to_le32(c->ltab_offs);
223 mst->lsave_lnum = cpu_to_le32(c->lsave_lnum);
224 mst->lsave_offs = cpu_to_le32(c->lsave_offs);
225 mst->lscan_lnum = cpu_to_le32(main_first);
226 mst->empty_lebs = cpu_to_le32(main_lebs - 2);
227 mst->idx_lebs = cpu_to_le32(1);
228 mst->leb_cnt = cpu_to_le32(c->leb_cnt);
229
230 /* Calculate lprops statistics */
231 tmp64 = main_bytes;
232 tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);
233 tmp64 -= ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
234 mst->total_free = cpu_to_le64(tmp64);
235
236 tmp64 = ALIGN(ubifs_idx_node_sz(c, 1), c->min_io_size);
237 ino_waste = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size) -
238 UBIFS_INO_NODE_SZ;
239 tmp64 += ino_waste;
240 tmp64 -= ALIGN(ubifs_idx_node_sz(c, 1), 8);
241 mst->total_dirty = cpu_to_le64(tmp64);
242
243 /* The indexing LEB does not contribute to dark space */
244 tmp64 = (c->main_lebs - 1) * c->dark_wm;
245 mst->total_dark = cpu_to_le64(tmp64);
246
247 mst->total_used = cpu_to_le64(UBIFS_INO_NODE_SZ);
248
249 err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM, 0,
250 UBI_UNKNOWN);
251 if (err) {
252 kfree(mst);
253 return err;
254 }
255 err = ubifs_write_node(c, mst, UBIFS_MST_NODE_SZ, UBIFS_MST_LNUM + 1, 0,
256 UBI_UNKNOWN);
257 kfree(mst);
258 if (err)
259 return err;
260
261 dbg_gen("default master node created at LEB %d:0", UBIFS_MST_LNUM);
262
263 /* Create the root indexing node */
264 tmp = ubifs_idx_node_sz(c, 1);
265 idx = kzalloc(ALIGN(tmp, c->min_io_size), GFP_KERNEL);
266 if (!idx)
267 return -ENOMEM;
268
269 c->key_fmt = UBIFS_SIMPLE_KEY_FMT;
270 c->key_hash = key_r5_hash;
271
272 idx->ch.node_type = UBIFS_IDX_NODE;
273 idx->child_cnt = cpu_to_le16(1);
274 ino_key_init(c, &key, UBIFS_ROOT_INO);
275 br = ubifs_idx_branch(c, idx, 0);
276 key_write_idx(c, &key, &br->key);
277 br->lnum = cpu_to_le32(main_first + DEFAULT_DATA_LEB);
278 br->len = cpu_to_le32(UBIFS_INO_NODE_SZ);
279 err = ubifs_write_node(c, idx, tmp, main_first + DEFAULT_IDX_LEB, 0,
280 UBI_UNKNOWN);
281 kfree(idx);
282 if (err)
283 return err;
284
285 dbg_gen("default root indexing node created LEB %d:0",
286 main_first + DEFAULT_IDX_LEB);
287
288 /* Create default root inode */
289 tmp = ALIGN(UBIFS_INO_NODE_SZ, c->min_io_size);
290 ino = kzalloc(tmp, GFP_KERNEL);
291 if (!ino)
292 return -ENOMEM;
293
294 ino_key_init_flash(c, &ino->key, UBIFS_ROOT_INO);
295 ino->ch.node_type = UBIFS_INO_NODE;
296 ino->creat_sqnum = cpu_to_le64(++c->max_sqnum);
297 ino->nlink = cpu_to_le32(2);
298 tmp = cpu_to_le64(CURRENT_TIME_SEC.tv_sec);
299 ino->atime_sec = tmp;
300 ino->ctime_sec = tmp;
301 ino->mtime_sec = tmp;
302 ino->atime_nsec = 0;
303 ino->ctime_nsec = 0;
304 ino->mtime_nsec = 0;
305 ino->mode = cpu_to_le32(S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO);
306 ino->size = cpu_to_le64(UBIFS_INO_NODE_SZ);
307
308 /* Set compression enabled by default */
309 ino->flags = cpu_to_le32(UBIFS_COMPR_FL);
310
311 err = ubifs_write_node(c, ino, UBIFS_INO_NODE_SZ,
312 main_first + DEFAULT_DATA_LEB, 0,
313 UBI_UNKNOWN);
314 kfree(ino);
315 if (err)
316 return err;
317
318 dbg_gen("root inode created at LEB %d:0",
319 main_first + DEFAULT_DATA_LEB);
320
321 /*
322 * The first node in the log has to be the commit start node. This is
323 * always the case during normal file-system operation. Write a fake
324 * commit start node to the log.
325 */
326 tmp = ALIGN(UBIFS_CS_NODE_SZ, c->min_io_size);
327 cs = kzalloc(tmp, GFP_KERNEL);
328 if (!cs)
329 return -ENOMEM;
330
331 cs->ch.node_type = UBIFS_CS_NODE;
332 err = ubifs_write_node(c, cs, UBIFS_CS_NODE_SZ, UBIFS_LOG_LNUM,
333 0, UBI_UNKNOWN);
334 kfree(cs);
335
336 ubifs_msg("default file-system created");
337 return 0;
338}
339
340/**
341 * validate_sb - validate superblock node.
342 * @c: UBIFS file-system description object
343 * @sup: superblock node
344 *
345 * This function validates superblock node @sup. Since most of data was read
346 * from the superblock and stored in @c, the function validates fields in @c
347 * instead. Returns zero in case of success and %-EINVAL in case of validation
348 * failure.
349 */
350static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
351{
352 long long max_bytes;
353 int err = 1, min_leb_cnt;
354
355 if (!c->key_hash) {
356 err = 2;
357 goto failed;
358 }
359
360 if (sup->key_fmt != UBIFS_SIMPLE_KEY_FMT) {
361 err = 3;
362 goto failed;
363 }
364
365 if (le32_to_cpu(sup->min_io_size) != c->min_io_size) {
366 ubifs_err("min. I/O unit mismatch: %d in superblock, %d real",
367 le32_to_cpu(sup->min_io_size), c->min_io_size);
368 goto failed;
369 }
370
371 if (le32_to_cpu(sup->leb_size) != c->leb_size) {
372 ubifs_err("LEB size mismatch: %d in superblock, %d real",
373 le32_to_cpu(sup->leb_size), c->leb_size);
374 goto failed;
375 }
376
377 if (c->log_lebs < UBIFS_MIN_LOG_LEBS ||
378 c->lpt_lebs < UBIFS_MIN_LPT_LEBS ||
379 c->orph_lebs < UBIFS_MIN_ORPH_LEBS ||
380 c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
381 err = 4;
382 goto failed;
383 }
384
385 /*
386 * Calculate minimum allowed amount of main area LEBs. This is very
387 * similar to %UBIFS_MIN_LEB_CNT, but we take into account real what we
388 * have just read from the superblock.
389 */
390 min_leb_cnt = UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs;
391 min_leb_cnt += c->lpt_lebs + c->orph_lebs + c->jhead_cnt + 6;
392
393 if (c->leb_cnt < min_leb_cnt || c->leb_cnt > c->vi.size) {
394 ubifs_err("bad LEB count: %d in superblock, %d on UBI volume, "
395 "%d minimum required", c->leb_cnt, c->vi.size,
396 min_leb_cnt);
397 goto failed;
398 }
399
400 if (c->max_leb_cnt < c->leb_cnt) {
401 ubifs_err("max. LEB count %d less than LEB count %d",
402 c->max_leb_cnt, c->leb_cnt);
403 goto failed;
404 }
405
406 if (c->main_lebs < UBIFS_MIN_MAIN_LEBS) {
407 err = 7;
408 goto failed;
409 }
410
411 if (c->max_bud_bytes < (long long)c->leb_size * UBIFS_MIN_BUD_LEBS ||
412 c->max_bud_bytes > (long long)c->leb_size * c->main_lebs) {
413 err = 8;
414 goto failed;
415 }
416
417 if (c->jhead_cnt < NONDATA_JHEADS_CNT + 1 ||
418 c->jhead_cnt > NONDATA_JHEADS_CNT + UBIFS_MAX_JHEADS) {
419 err = 9;
420 goto failed;
421 }
422
423 if (c->fanout < UBIFS_MIN_FANOUT ||
424 ubifs_idx_node_sz(c, c->fanout) > c->leb_size) {
425 err = 10;
426 goto failed;
427 }
428
429 if (c->lsave_cnt < 0 || (c->lsave_cnt > DEFAULT_LSAVE_CNT &&
430 c->lsave_cnt > c->max_leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS -
431 c->log_lebs - c->lpt_lebs - c->orph_lebs)) {
432 err = 11;
433 goto failed;
434 }
435
436 if (UBIFS_SB_LEBS + UBIFS_MST_LEBS + c->log_lebs + c->lpt_lebs +
437 c->orph_lebs + c->main_lebs != c->leb_cnt) {
438 err = 12;
439 goto failed;
440 }
441
442 if (c->default_compr < 0 || c->default_compr >= UBIFS_COMPR_TYPES_CNT) {
443 err = 13;
444 goto failed;
445 }
446
447 max_bytes = c->main_lebs * (long long)c->leb_size;
448 if (c->rp_size < 0 || max_bytes < c->rp_size) {
449 err = 14;
450 goto failed;
451 }
452
453 if (le32_to_cpu(sup->time_gran) > 1000000000 ||
454 le32_to_cpu(sup->time_gran) < 1) {
455 err = 15;
456 goto failed;
457 }
458
459 return 0;
460
461failed:
462 ubifs_err("bad superblock, error %d", err);
463 dbg_dump_node(c, sup);
464 return -EINVAL;
465}
466
467/**
468 * ubifs_read_sb_node - read superblock node.
469 * @c: UBIFS file-system description object
470 *
471 * This function returns a pointer to the superblock node or a negative error
472 * code.
473 */
474struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c)
475{
476 struct ubifs_sb_node *sup;
477 int err;
478
479 sup = kmalloc(ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size), GFP_NOFS);
480 if (!sup)
481 return ERR_PTR(-ENOMEM);
482
483 err = ubifs_read_node(c, sup, UBIFS_SB_NODE, UBIFS_SB_NODE_SZ,
484 UBIFS_SB_LNUM, 0);
485 if (err) {
486 kfree(sup);
487 return ERR_PTR(err);
488 }
489
490 return sup;
491}
492
493/**
494 * ubifs_write_sb_node - write superblock node.
495 * @c: UBIFS file-system description object
496 * @sup: superblock node read with 'ubifs_read_sb_node()'
497 *
498 * This function returns %0 on success and a negative error code on failure.
499 */
500int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup)
501{
502 int len = ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size);
503
504 ubifs_prepare_node(c, sup, UBIFS_SB_NODE_SZ, 1);
505 return ubifs_leb_change(c, UBIFS_SB_LNUM, sup, len, UBI_LONGTERM);
506}
507
508/**
509 * ubifs_read_superblock - read superblock.
510 * @c: UBIFS file-system description object
511 *
512 * This function finds, reads and checks the superblock. If an empty UBI volume
513 * is being mounted, this function creates default superblock. Returns zero in
514 * case of success, and a negative error code in case of failure.
515 */
516int ubifs_read_superblock(struct ubifs_info *c)
517{
518 int err, sup_flags;
519 struct ubifs_sb_node *sup;
520
521 if (c->empty) {
522 err = create_default_filesystem(c);
523 if (err)
524 return err;
525 }
526
527 sup = ubifs_read_sb_node(c);
528 if (IS_ERR(sup))
529 return PTR_ERR(sup);
530
531 /*
532 * The software supports all previous versions but not future versions,
533 * due to the unavailability of time-travelling equipment.
534 */
535 c->fmt_version = le32_to_cpu(sup->fmt_version);
536 if (c->fmt_version > UBIFS_FORMAT_VERSION) {
537 ubifs_err("on-flash format version is %d, but software only "
538 "supports up to version %d", c->fmt_version,
539 UBIFS_FORMAT_VERSION);
540 err = -EINVAL;
541 goto out;
542 }
543
544 if (c->fmt_version < 3) {
545 ubifs_err("on-flash format version %d is not supported",
546 c->fmt_version);
547 err = -EINVAL;
548 goto out;
549 }
550
551 switch (sup->key_hash) {
552 case UBIFS_KEY_HASH_R5:
553 c->key_hash = key_r5_hash;
554 c->key_hash_type = UBIFS_KEY_HASH_R5;
555 break;
556
557 case UBIFS_KEY_HASH_TEST:
558 c->key_hash = key_test_hash;
559 c->key_hash_type = UBIFS_KEY_HASH_TEST;
560 break;
561 };
562
563 c->key_fmt = sup->key_fmt;
564
565 switch (c->key_fmt) {
566 case UBIFS_SIMPLE_KEY_FMT:
567 c->key_len = UBIFS_SK_LEN;
568 break;
569 default:
570 ubifs_err("unsupported key format");
571 err = -EINVAL;
572 goto out;
573 }
574
575 c->leb_cnt = le32_to_cpu(sup->leb_cnt);
576 c->max_leb_cnt = le32_to_cpu(sup->max_leb_cnt);
577 c->max_bud_bytes = le64_to_cpu(sup->max_bud_bytes);
578 c->log_lebs = le32_to_cpu(sup->log_lebs);
579 c->lpt_lebs = le32_to_cpu(sup->lpt_lebs);
580 c->orph_lebs = le32_to_cpu(sup->orph_lebs);
581 c->jhead_cnt = le32_to_cpu(sup->jhead_cnt) + NONDATA_JHEADS_CNT;
582 c->fanout = le32_to_cpu(sup->fanout);
583 c->lsave_cnt = le32_to_cpu(sup->lsave_cnt);
584 c->default_compr = le16_to_cpu(sup->default_compr);
585 c->rp_size = le64_to_cpu(sup->rp_size);
586 c->rp_uid = le32_to_cpu(sup->rp_uid);
587 c->rp_gid = le32_to_cpu(sup->rp_gid);
588 sup_flags = le32_to_cpu(sup->flags);
589
590 c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran);
591
592 memcpy(&c->uuid, &sup->uuid, 16);
593
594 c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT);
595
596 /* Automatically increase file system size to the maximum size */
597 c->old_leb_cnt = c->leb_cnt;
598 if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) {
599 c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size);
600 if (c->vfs_sb->s_flags & MS_RDONLY)
601 dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs",
602 c->old_leb_cnt, c->leb_cnt);
603 else {
604 dbg_mnt("Auto resizing (sb) from %d LEBs to %d LEBs",
605 c->old_leb_cnt, c->leb_cnt);
606 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
607 err = ubifs_write_sb_node(c, sup);
608 if (err)
609 goto out;
610 c->old_leb_cnt = c->leb_cnt;
611 }
612 }
613
614 c->log_bytes = (long long)c->log_lebs * c->leb_size;
615 c->log_last = UBIFS_LOG_LNUM + c->log_lebs - 1;
616 c->lpt_first = UBIFS_LOG_LNUM + c->log_lebs;
617 c->lpt_last = c->lpt_first + c->lpt_lebs - 1;
618 c->orph_first = c->lpt_last + 1;
619 c->orph_last = c->orph_first + c->orph_lebs - 1;
620 c->main_lebs = c->leb_cnt - UBIFS_SB_LEBS - UBIFS_MST_LEBS;
621 c->main_lebs -= c->log_lebs + c->lpt_lebs + c->orph_lebs;
622 c->main_first = c->leb_cnt - c->main_lebs;
623 c->report_rp_size = ubifs_reported_space(c, c->rp_size);
624
625 err = validate_sb(c, sup);
626out:
627 kfree(sup);
628 return err;
629}
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
new file mode 100644
index 00000000000..acf5c5fffc6
--- /dev/null
+++ b/fs/ubifs/scan.c
@@ -0,0 +1,362 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements the scan which is a general-purpose function for
25 * determining what nodes are in an eraseblock. The scan is used to replay the
26 * journal, to do garbage collection. for the TNC in-the-gaps method, and by
27 * debugging functions.
28 */
29
30#include "ubifs.h"
31
32/**
33 * scan_padding_bytes - scan for padding bytes.
34 * @buf: buffer to scan
35 * @len: length of buffer
36 *
37 * This function returns the number of padding bytes on success and
38 * %SCANNED_GARBAGE on failure.
39 */
40static int scan_padding_bytes(void *buf, int len)
41{
42 int pad_len = 0, max_pad_len = min_t(int, UBIFS_PAD_NODE_SZ, len);
43 uint8_t *p = buf;
44
45 dbg_scan("not a node");
46
47 while (pad_len < max_pad_len && *p++ == UBIFS_PADDING_BYTE)
48 pad_len += 1;
49
50 if (!pad_len || (pad_len & 7))
51 return SCANNED_GARBAGE;
52
53 dbg_scan("%d padding bytes", pad_len);
54
55 return pad_len;
56}
57
58/**
59 * ubifs_scan_a_node - scan for a node or padding.
60 * @c: UBIFS file-system description object
61 * @buf: buffer to scan
62 * @len: length of buffer
63 * @lnum: logical eraseblock number
64 * @offs: offset within the logical eraseblock
65 * @quiet: print no messages
66 *
67 * This function returns a scanning code to indicate what was scanned.
68 */
69int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
70 int offs, int quiet)
71{
72 struct ubifs_ch *ch = buf;
73 uint32_t magic;
74
75 magic = le32_to_cpu(ch->magic);
76
77 if (magic == 0xFFFFFFFF) {
78 dbg_scan("hit empty space");
79 return SCANNED_EMPTY_SPACE;
80 }
81
82 if (magic != UBIFS_NODE_MAGIC)
83 return scan_padding_bytes(buf, len);
84
85 if (len < UBIFS_CH_SZ)
86 return SCANNED_GARBAGE;
87
88 dbg_scan("scanning %s", dbg_ntype(ch->node_type));
89
90 if (ubifs_check_node(c, buf, lnum, offs, quiet))
91 return SCANNED_A_CORRUPT_NODE;
92
93 if (ch->node_type == UBIFS_PAD_NODE) {
94 struct ubifs_pad_node *pad = buf;
95 int pad_len = le32_to_cpu(pad->pad_len);
96 int node_len = le32_to_cpu(ch->len);
97
98 /* Validate the padding node */
99 if (pad_len < 0 ||
100 offs + node_len + pad_len > c->leb_size) {
101 if (!quiet) {
102 ubifs_err("bad pad node at LEB %d:%d",
103 lnum, offs);
104 dbg_dump_node(c, pad);
105 }
106 return SCANNED_A_BAD_PAD_NODE;
107 }
108
109 /* Make the node pads to 8-byte boundary */
110 if ((node_len + pad_len) & 7) {
111 if (!quiet) {
112 dbg_err("bad padding length %d - %d",
113 offs, offs + node_len + pad_len);
114 }
115 return SCANNED_A_BAD_PAD_NODE;
116 }
117
118 dbg_scan("%d bytes padded, offset now %d",
119 pad_len, ALIGN(offs + node_len + pad_len, 8));
120
121 return node_len + pad_len;
122 }
123
124 return SCANNED_A_NODE;
125}
126
127/**
128 * ubifs_start_scan - create LEB scanning information at start of scan.
129 * @c: UBIFS file-system description object
130 * @lnum: logical eraseblock number
131 * @offs: offset to start at (usually zero)
132 * @sbuf: scan buffer (must be c->leb_size)
133 *
134 * This function returns %0 on success and a negative error code on failure.
135 */
136struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
137 int offs, void *sbuf)
138{
139 struct ubifs_scan_leb *sleb;
140 int err;
141
142 dbg_scan("scan LEB %d:%d", lnum, offs);
143
144 sleb = kzalloc(sizeof(struct ubifs_scan_leb), GFP_NOFS);
145 if (!sleb)
146 return ERR_PTR(-ENOMEM);
147
148 sleb->lnum = lnum;
149 INIT_LIST_HEAD(&sleb->nodes);
150 sleb->buf = sbuf;
151
152 err = ubi_read(c->ubi, lnum, sbuf + offs, offs, c->leb_size - offs);
153 if (err && err != -EBADMSG) {
154 ubifs_err("cannot read %d bytes from LEB %d:%d,"
155 " error %d", c->leb_size - offs, lnum, offs, err);
156 kfree(sleb);
157 return ERR_PTR(err);
158 }
159
160 if (err == -EBADMSG)
161 sleb->ecc = 1;
162
163 return sleb;
164}
165
166/**
167 * ubifs_end_scan - update LEB scanning information at end of scan.
168 * @c: UBIFS file-system description object
169 * @sleb: scanning information
170 * @lnum: logical eraseblock number
171 * @offs: offset to start at (usually zero)
172 *
173 * This function returns %0 on success and a negative error code on failure.
174 */
175void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
176 int lnum, int offs)
177{
178 lnum = lnum;
179 dbg_scan("stop scanning LEB %d at offset %d", lnum, offs);
180 ubifs_assert(offs % c->min_io_size == 0);
181
182 sleb->endpt = ALIGN(offs, c->min_io_size);
183}
184
185/**
186 * ubifs_add_snod - add a scanned node to LEB scanning information.
187 * @c: UBIFS file-system description object
188 * @sleb: scanning information
189 * @buf: buffer containing node
190 * @offs: offset of node on flash
191 *
192 * This function returns %0 on success and a negative error code on failure.
193 */
194int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
195 void *buf, int offs)
196{
197 struct ubifs_ch *ch = buf;
198 struct ubifs_ino_node *ino = buf;
199 struct ubifs_scan_node *snod;
200
201 snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS);
202 if (!snod)
203 return -ENOMEM;
204
205 snod->sqnum = le64_to_cpu(ch->sqnum);
206 snod->type = ch->node_type;
207 snod->offs = offs;
208 snod->len = le32_to_cpu(ch->len);
209 snod->node = buf;
210
211 switch (ch->node_type) {
212 case UBIFS_INO_NODE:
213 case UBIFS_DENT_NODE:
214 case UBIFS_XENT_NODE:
215 case UBIFS_DATA_NODE:
216 case UBIFS_TRUN_NODE:
217 /*
218 * The key is in the same place in all keyed
219 * nodes.
220 */
221 key_read(c, &ino->key, &snod->key);
222 break;
223 }
224 list_add_tail(&snod->list, &sleb->nodes);
225 sleb->nodes_cnt += 1;
226 return 0;
227}
228
229/**
230 * ubifs_scanned_corruption - print information after UBIFS scanned corruption.
231 * @c: UBIFS file-system description object
232 * @lnum: LEB number of corruption
233 * @offs: offset of corruption
234 * @buf: buffer containing corruption
235 */
236void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
237 void *buf)
238{
239 int len;
240
241 ubifs_err("corrupted data at LEB %d:%d", lnum, offs);
242 if (dbg_failure_mode)
243 return;
244 len = c->leb_size - offs;
245 if (len > 4096)
246 len = 4096;
247 dbg_err("first %d bytes from LEB %d:%d", len, lnum, offs);
248 print_hex_dump(KERN_DEBUG, "", DUMP_PREFIX_OFFSET, 32, 4, buf, len, 1);
249}
250
251/**
252 * ubifs_scan - scan a logical eraseblock.
253 * @c: UBIFS file-system description object
254 * @lnum: logical eraseblock number
255 * @offs: offset to start at (usually zero)
256 * @sbuf: scan buffer (must be c->leb_size)
257 *
258 * This function scans LEB number @lnum and returns complete information about
259 * its contents. Returns an error code in case of failure.
260 */
261struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
262 int offs, void *sbuf)
263{
264 void *buf = sbuf + offs;
265 int err, len = c->leb_size - offs;
266 struct ubifs_scan_leb *sleb;
267
268 sleb = ubifs_start_scan(c, lnum, offs, sbuf);
269 if (IS_ERR(sleb))
270 return sleb;
271
272 while (len >= 8) {
273 struct ubifs_ch *ch = buf;
274 int node_len, ret;
275
276 dbg_scan("look at LEB %d:%d (%d bytes left)",
277 lnum, offs, len);
278
279 cond_resched();
280
281 ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0);
282
283 if (ret > 0) {
284 /* Padding bytes or a valid padding node */
285 offs += ret;
286 buf += ret;
287 len -= ret;
288 continue;
289 }
290
291 if (ret == SCANNED_EMPTY_SPACE)
292 /* Empty space is checked later */
293 break;
294
295 switch (ret) {
296 case SCANNED_GARBAGE:
297 dbg_err("garbage");
298 goto corrupted;
299 case SCANNED_A_NODE:
300 break;
301 case SCANNED_A_CORRUPT_NODE:
302 case SCANNED_A_BAD_PAD_NODE:
303 dbg_err("bad node");
304 goto corrupted;
305 default:
306 dbg_err("unknown");
307 goto corrupted;
308 }
309
310 err = ubifs_add_snod(c, sleb, buf, offs);
311 if (err)
312 goto error;
313
314 node_len = ALIGN(le32_to_cpu(ch->len), 8);
315 offs += node_len;
316 buf += node_len;
317 len -= node_len;
318 }
319
320 if (offs % c->min_io_size)
321 goto corrupted;
322
323 ubifs_end_scan(c, sleb, lnum, offs);
324
325 for (; len > 4; offs += 4, buf = buf + 4, len -= 4)
326 if (*(uint32_t *)buf != 0xffffffff)
327 break;
328 for (; len; offs++, buf++, len--)
329 if (*(uint8_t *)buf != 0xff) {
330 ubifs_err("corrupt empty space at LEB %d:%d",
331 lnum, offs);
332 goto corrupted;
333 }
334
335 return sleb;
336
337corrupted:
338 ubifs_scanned_corruption(c, lnum, offs, buf);
339 err = -EUCLEAN;
340error:
341 ubifs_err("LEB %d scanning failed", lnum);
342 ubifs_scan_destroy(sleb);
343 return ERR_PTR(err);
344}
345
346/**
347 * ubifs_scan_destroy - destroy LEB scanning information.
348 * @sleb: scanning information to free
349 */
350void ubifs_scan_destroy(struct ubifs_scan_leb *sleb)
351{
352 struct ubifs_scan_node *node;
353 struct list_head *head;
354
355 head = &sleb->nodes;
356 while (!list_empty(head)) {
357 node = list_entry(head->next, struct ubifs_scan_node, list);
358 list_del(&node->list);
359 kfree(node);
360 }
361 kfree(sleb);
362}
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
new file mode 100644
index 00000000000..f248533841a
--- /dev/null
+++ b/fs/ubifs/shrinker.c
@@ -0,0 +1,322 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS shrinker which evicts clean znodes from the TNC
25 * tree when Linux VM needs more RAM.
26 *
27 * We do not implement any LRU lists to find oldest znodes to free because it
28 * would add additional overhead to the file system fast paths. So the shrinker
29 * just walks the TNC tree when searching for znodes to free.
30 *
31 * If the root of a TNC sub-tree is clean and old enough, then the children are
32 * also clean and old enough. So the shrinker walks the TNC in level order and
33 * dumps entire sub-trees.
34 *
35 * The age of znodes is just the time-stamp when they were last looked at.
36 * The current shrinker first tries to evict old znodes, then young ones.
37 *
38 * Since the shrinker is global, it has to protect against races with FS
39 * un-mounts, which is done by the 'ubifs_infos_lock' and 'c->umount_mutex'.
40 */
41
42#include "ubifs.h"
43
44/* List of all UBIFS file-system instances */
45LIST_HEAD(ubifs_infos);
46
47/*
48 * We number each shrinker run and record the number on the ubifs_info structure
49 * so that we can easily work out which ubifs_info structures have already been
50 * done by the current run.
51 */
52static unsigned int shrinker_run_no;
53
54/* Protects 'ubifs_infos' list */
55DEFINE_SPINLOCK(ubifs_infos_lock);
56
57/* Global clean znode counter (for all mounted UBIFS instances) */
58atomic_long_t ubifs_clean_zn_cnt;
59
60/**
61 * shrink_tnc - shrink TNC tree.
62 * @c: UBIFS file-system description object
63 * @nr: number of znodes to free
64 * @age: the age of znodes to free
65 * @contention: if any contention, this is set to %1
66 *
67 * This function traverses TNC tree and frees clean znodes. It does not free
68 * clean znodes which younger then @age. Returns number of freed znodes.
69 */
70static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
71{
72 int total_freed = 0;
73 struct ubifs_znode *znode, *zprev;
74 int time = get_seconds();
75
76 ubifs_assert(mutex_is_locked(&c->umount_mutex));
77 ubifs_assert(mutex_is_locked(&c->tnc_mutex));
78
79 if (!c->zroot.znode || atomic_long_read(&c->clean_zn_cnt) == 0)
80 return 0;
81
82 /*
83 * Traverse the TNC tree in levelorder manner, so that it is possible
84 * to destroy large sub-trees. Indeed, if a znode is old, then all its
85 * children are older or of the same age.
86 *
87 * Note, we are holding 'c->tnc_mutex', so we do not have to lock the
88 * 'c->space_lock' when _reading_ 'c->clean_zn_cnt', because it is
89 * changed only when the 'c->tnc_mutex' is held.
90 */
91 zprev = NULL;
92 znode = ubifs_tnc_levelorder_next(c->zroot.znode, NULL);
93 while (znode && total_freed < nr &&
94 atomic_long_read(&c->clean_zn_cnt) > 0) {
95 int freed;
96
97 /*
98 * If the znode is clean, but it is in the 'c->cnext' list, this
99 * means that this znode has just been written to flash as a
100 * part of commit and was marked clean. They will be removed
101 * from the list at end commit. We cannot change the list,
102 * because it is not protected by any mutex (design decision to
103 * make commit really independent and parallel to main I/O). So
104 * we just skip these znodes.
105 *
106 * Note, the 'clean_zn_cnt' counters are not updated until
107 * after the commit, so the UBIFS shrinker does not report
108 * the znodes which are in the 'c->cnext' list as freeable.
109 *
110 * Also note, if the root of a sub-tree is not in 'c->cnext',
111 * then the whole sub-tree is not in 'c->cnext' as well, so it
112 * is safe to dump whole sub-tree.
113 */
114
115 if (znode->cnext) {
116 /*
117 * Very soon these znodes will be removed from the list
118 * and become freeable.
119 */
120 *contention = 1;
121 } else if (!ubifs_zn_dirty(znode) &&
122 abs(time - znode->time) >= age) {
123 if (znode->parent)
124 znode->parent->zbranch[znode->iip].znode = NULL;
125 else
126 c->zroot.znode = NULL;
127
128 freed = ubifs_destroy_tnc_subtree(znode);
129 atomic_long_sub(freed, &ubifs_clean_zn_cnt);
130 atomic_long_sub(freed, &c->clean_zn_cnt);
131 ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
132 total_freed += freed;
133 znode = zprev;
134 }
135
136 if (unlikely(!c->zroot.znode))
137 break;
138
139 zprev = znode;
140 znode = ubifs_tnc_levelorder_next(c->zroot.znode, znode);
141 cond_resched();
142 }
143
144 return total_freed;
145}
146
147/**
148 * shrink_tnc_trees - shrink UBIFS TNC trees.
149 * @nr: number of znodes to free
150 * @age: the age of znodes to free
151 * @contention: if any contention, this is set to %1
152 *
153 * This function walks the list of mounted UBIFS file-systems and frees clean
154 * znodes which are older then @age, until at least @nr znodes are freed.
155 * Returns the number of freed znodes.
156 */
157static int shrink_tnc_trees(int nr, int age, int *contention)
158{
159 struct ubifs_info *c;
160 struct list_head *p;
161 unsigned int run_no;
162 int freed = 0;
163
164 spin_lock(&ubifs_infos_lock);
165 do {
166 run_no = ++shrinker_run_no;
167 } while (run_no == 0);
168 /* Iterate over all mounted UBIFS file-systems and try to shrink them */
169 p = ubifs_infos.next;
170 while (p != &ubifs_infos) {
171 c = list_entry(p, struct ubifs_info, infos_list);
172 /*
173 * We move the ones we do to the end of the list, so we stop
174 * when we see one we have already done.
175 */
176 if (c->shrinker_run_no == run_no)
177 break;
178 if (!mutex_trylock(&c->umount_mutex)) {
179 /* Some un-mount is in progress, try next FS */
180 *contention = 1;
181 p = p->next;
182 continue;
183 }
184 /*
185 * We're holding 'c->umount_mutex', so the file-system won't go
186 * away.
187 */
188 if (!mutex_trylock(&c->tnc_mutex)) {
189 mutex_unlock(&c->umount_mutex);
190 *contention = 1;
191 p = p->next;
192 continue;
193 }
194 spin_unlock(&ubifs_infos_lock);
195 /*
196 * OK, now we have TNC locked, the file-system cannot go away -
197 * it is safe to reap the cache.
198 */
199 c->shrinker_run_no = run_no;
200 freed += shrink_tnc(c, nr, age, contention);
201 mutex_unlock(&c->tnc_mutex);
202 spin_lock(&ubifs_infos_lock);
203 /* Get the next list element before we move this one */
204 p = p->next;
205 /*
206 * Move this one to the end of the list to provide some
207 * fairness.
208 */
209 list_del(&c->infos_list);
210 list_add_tail(&c->infos_list, &ubifs_infos);
211 mutex_unlock(&c->umount_mutex);
212 if (freed >= nr)
213 break;
214 }
215 spin_unlock(&ubifs_infos_lock);
216 return freed;
217}
218
219/**
220 * kick_a_thread - kick a background thread to start commit.
221 *
222 * This function kicks a background thread to start background commit. Returns
223 * %-1 if a thread was kicked or there is another reason to assume the memory
224 * will soon be freed or become freeable. If there are no dirty znodes, returns
225 * %0.
226 */
227static int kick_a_thread(void)
228{
229 int i;
230 struct ubifs_info *c;
231
232 /*
233 * Iterate over all mounted UBIFS file-systems and find out if there is
234 * already an ongoing commit operation there. If no, then iterate for
235 * the second time and initiate background commit.
236 */
237 spin_lock(&ubifs_infos_lock);
238 for (i = 0; i < 2; i++) {
239 list_for_each_entry(c, &ubifs_infos, infos_list) {
240 long dirty_zn_cnt;
241
242 if (!mutex_trylock(&c->umount_mutex)) {
243 /*
244 * Some un-mount is in progress, it will
245 * certainly free memory, so just return.
246 */
247 spin_unlock(&ubifs_infos_lock);
248 return -1;
249 }
250
251 dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt);
252
253 if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN ||
254 c->ro_media) {
255 mutex_unlock(&c->umount_mutex);
256 continue;
257 }
258
259 if (c->cmt_state != COMMIT_RESTING) {
260 spin_unlock(&ubifs_infos_lock);
261 mutex_unlock(&c->umount_mutex);
262 return -1;
263 }
264
265 if (i == 1) {
266 list_del(&c->infos_list);
267 list_add_tail(&c->infos_list, &ubifs_infos);
268 spin_unlock(&ubifs_infos_lock);
269
270 ubifs_request_bg_commit(c);
271 mutex_unlock(&c->umount_mutex);
272 return -1;
273 }
274 mutex_unlock(&c->umount_mutex);
275 }
276 }
277 spin_unlock(&ubifs_infos_lock);
278
279 return 0;
280}
281
282int ubifs_shrinker(int nr, gfp_t gfp_mask)
283{
284 int freed, contention = 0;
285 long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
286
287 if (nr == 0)
288 return clean_zn_cnt;
289
290 if (!clean_zn_cnt) {
291 /*
292 * No clean znodes, nothing to reap. All we can do in this case
293 * is to kick background threads to start commit, which will
294 * probably make clean znodes which, in turn, will be freeable.
295 * And we return -1 which means will make VM call us again
296 * later.
297 */
298 dbg_tnc("no clean znodes, kick a thread");
299 return kick_a_thread();
300 }
301
302 freed = shrink_tnc_trees(nr, OLD_ZNODE_AGE, &contention);
303 if (freed >= nr)
304 goto out;
305
306 dbg_tnc("not enough old znodes, try to free young ones");
307 freed += shrink_tnc_trees(nr - freed, YOUNG_ZNODE_AGE, &contention);
308 if (freed >= nr)
309 goto out;
310
311 dbg_tnc("not enough young znodes, free all");
312 freed += shrink_tnc_trees(nr - freed, 0, &contention);
313
314 if (!freed && contention) {
315 dbg_tnc("freed nothing, but contention");
316 return -1;
317 }
318
319out:
320 dbg_tnc("%d znodes were freed, requested %d", freed, nr);
321 return freed;
322}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
new file mode 100644
index 00000000000..00eb9c68ad0
--- /dev/null
+++ b/fs/ubifs/super.c
@@ -0,0 +1,1951 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS initialization and VFS superblock operations. Some
25 * initialization stuff which is rather large and complex is placed at
26 * corresponding subsystems, but most of it is here.
27 */
28
29#include <linux/init.h>
30#include <linux/slab.h>
31#include <linux/module.h>
32#include <linux/ctype.h>
33#include <linux/random.h>
34#include <linux/kthread.h>
35#include <linux/parser.h>
36#include <linux/seq_file.h>
37#include <linux/mount.h>
38#include "ubifs.h"
39
40/* Slab cache for UBIFS inodes */
41struct kmem_cache *ubifs_inode_slab;
42
43/* UBIFS TNC shrinker description */
44static struct shrinker ubifs_shrinker_info = {
45 .shrink = ubifs_shrinker,
46 .seeks = DEFAULT_SEEKS,
47};
48
49/**
50 * validate_inode - validate inode.
51 * @c: UBIFS file-system description object
52 * @inode: the inode to validate
53 *
54 * This is a helper function for 'ubifs_iget()' which validates various fields
55 * of a newly built inode to make sure they contain sane values and prevent
56 * possible vulnerabilities. Returns zero if the inode is all right and
57 * a non-zero error code if not.
58 */
59static int validate_inode(struct ubifs_info *c, const struct inode *inode)
60{
61 int err;
62 const struct ubifs_inode *ui = ubifs_inode(inode);
63
64 if (inode->i_size > c->max_inode_sz) {
65 ubifs_err("inode is too large (%lld)",
66 (long long)inode->i_size);
67 return 1;
68 }
69
70 if (ui->compr_type < 0 || ui->compr_type >= UBIFS_COMPR_TYPES_CNT) {
71 ubifs_err("unknown compression type %d", ui->compr_type);
72 return 2;
73 }
74
75 if (ui->xattr_names + ui->xattr_cnt > XATTR_LIST_MAX)
76 return 3;
77
78 if (ui->data_len < 0 || ui->data_len > UBIFS_MAX_INO_DATA)
79 return 4;
80
81 if (ui->xattr && (inode->i_mode & S_IFMT) != S_IFREG)
82 return 5;
83
84 if (!ubifs_compr_present(ui->compr_type)) {
85 ubifs_warn("inode %lu uses '%s' compression, but it was not "
86 "compiled in", inode->i_ino,
87 ubifs_compr_name(ui->compr_type));
88 }
89
90 err = dbg_check_dir_size(c, inode);
91 return err;
92}
93
94struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
95{
96 int err;
97 union ubifs_key key;
98 struct ubifs_ino_node *ino;
99 struct ubifs_info *c = sb->s_fs_info;
100 struct inode *inode;
101 struct ubifs_inode *ui;
102
103 dbg_gen("inode %lu", inum);
104
105 inode = iget_locked(sb, inum);
106 if (!inode)
107 return ERR_PTR(-ENOMEM);
108 if (!(inode->i_state & I_NEW))
109 return inode;
110 ui = ubifs_inode(inode);
111
112 ino = kmalloc(UBIFS_MAX_INO_NODE_SZ, GFP_NOFS);
113 if (!ino) {
114 err = -ENOMEM;
115 goto out;
116 }
117
118 ino_key_init(c, &key, inode->i_ino);
119
120 err = ubifs_tnc_lookup(c, &key, ino);
121 if (err)
122 goto out_ino;
123
124 inode->i_flags |= (S_NOCMTIME | S_NOATIME);
125 inode->i_nlink = le32_to_cpu(ino->nlink);
126 inode->i_uid = le32_to_cpu(ino->uid);
127 inode->i_gid = le32_to_cpu(ino->gid);
128 inode->i_atime.tv_sec = (int64_t)le64_to_cpu(ino->atime_sec);
129 inode->i_atime.tv_nsec = le32_to_cpu(ino->atime_nsec);
130 inode->i_mtime.tv_sec = (int64_t)le64_to_cpu(ino->mtime_sec);
131 inode->i_mtime.tv_nsec = le32_to_cpu(ino->mtime_nsec);
132 inode->i_ctime.tv_sec = (int64_t)le64_to_cpu(ino->ctime_sec);
133 inode->i_ctime.tv_nsec = le32_to_cpu(ino->ctime_nsec);
134 inode->i_mode = le32_to_cpu(ino->mode);
135 inode->i_size = le64_to_cpu(ino->size);
136
137 ui->data_len = le32_to_cpu(ino->data_len);
138 ui->flags = le32_to_cpu(ino->flags);
139 ui->compr_type = le16_to_cpu(ino->compr_type);
140 ui->creat_sqnum = le64_to_cpu(ino->creat_sqnum);
141 ui->xattr_cnt = le32_to_cpu(ino->xattr_cnt);
142 ui->xattr_size = le32_to_cpu(ino->xattr_size);
143 ui->xattr_names = le32_to_cpu(ino->xattr_names);
144 ui->synced_i_size = ui->ui_size = inode->i_size;
145
146 ui->xattr = (ui->flags & UBIFS_XATTR_FL) ? 1 : 0;
147
148 err = validate_inode(c, inode);
149 if (err)
150 goto out_invalid;
151
152 /* Disable readahead */
153 inode->i_mapping->backing_dev_info = &c->bdi;
154
155 switch (inode->i_mode & S_IFMT) {
156 case S_IFREG:
157 inode->i_mapping->a_ops = &ubifs_file_address_operations;
158 inode->i_op = &ubifs_file_inode_operations;
159 inode->i_fop = &ubifs_file_operations;
160 if (ui->xattr) {
161 ui->data = kmalloc(ui->data_len + 1, GFP_NOFS);
162 if (!ui->data) {
163 err = -ENOMEM;
164 goto out_ino;
165 }
166 memcpy(ui->data, ino->data, ui->data_len);
167 ((char *)ui->data)[ui->data_len] = '\0';
168 } else if (ui->data_len != 0) {
169 err = 10;
170 goto out_invalid;
171 }
172 break;
173 case S_IFDIR:
174 inode->i_op = &ubifs_dir_inode_operations;
175 inode->i_fop = &ubifs_dir_operations;
176 if (ui->data_len != 0) {
177 err = 11;
178 goto out_invalid;
179 }
180 break;
181 case S_IFLNK:
182 inode->i_op = &ubifs_symlink_inode_operations;
183 if (ui->data_len <= 0 || ui->data_len > UBIFS_MAX_INO_DATA) {
184 err = 12;
185 goto out_invalid;
186 }
187 ui->data = kmalloc(ui->data_len + 1, GFP_NOFS);
188 if (!ui->data) {
189 err = -ENOMEM;
190 goto out_ino;
191 }
192 memcpy(ui->data, ino->data, ui->data_len);
193 ((char *)ui->data)[ui->data_len] = '\0';
194 break;
195 case S_IFBLK:
196 case S_IFCHR:
197 {
198 dev_t rdev;
199 union ubifs_dev_desc *dev;
200
201 ui->data = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
202 if (!ui->data) {
203 err = -ENOMEM;
204 goto out_ino;
205 }
206
207 dev = (union ubifs_dev_desc *)ino->data;
208 if (ui->data_len == sizeof(dev->new))
209 rdev = new_decode_dev(le32_to_cpu(dev->new));
210 else if (ui->data_len == sizeof(dev->huge))
211 rdev = huge_decode_dev(le64_to_cpu(dev->huge));
212 else {
213 err = 13;
214 goto out_invalid;
215 }
216 memcpy(ui->data, ino->data, ui->data_len);
217 inode->i_op = &ubifs_file_inode_operations;
218 init_special_inode(inode, inode->i_mode, rdev);
219 break;
220 }
221 case S_IFSOCK:
222 case S_IFIFO:
223 inode->i_op = &ubifs_file_inode_operations;
224 init_special_inode(inode, inode->i_mode, 0);
225 if (ui->data_len != 0) {
226 err = 14;
227 goto out_invalid;
228 }
229 break;
230 default:
231 err = 15;
232 goto out_invalid;
233 }
234
235 kfree(ino);
236 ubifs_set_inode_flags(inode);
237 unlock_new_inode(inode);
238 return inode;
239
240out_invalid:
241 ubifs_err("inode %lu validation failed, error %d", inode->i_ino, err);
242 dbg_dump_node(c, ino);
243 dbg_dump_inode(c, inode);
244 err = -EINVAL;
245out_ino:
246 kfree(ino);
247out:
248 ubifs_err("failed to read inode %lu, error %d", inode->i_ino, err);
249 iget_failed(inode);
250 return ERR_PTR(err);
251}
252
253static struct inode *ubifs_alloc_inode(struct super_block *sb)
254{
255 struct ubifs_inode *ui;
256
257 ui = kmem_cache_alloc(ubifs_inode_slab, GFP_NOFS);
258 if (!ui)
259 return NULL;
260
261 memset((void *)ui + sizeof(struct inode), 0,
262 sizeof(struct ubifs_inode) - sizeof(struct inode));
263 mutex_init(&ui->ui_mutex);
264 spin_lock_init(&ui->ui_lock);
265 return &ui->vfs_inode;
266};
267
268static void ubifs_destroy_inode(struct inode *inode)
269{
270 struct ubifs_inode *ui = ubifs_inode(inode);
271
272 kfree(ui->data);
273 kmem_cache_free(ubifs_inode_slab, inode);
274}
275
276/*
277 * Note, Linux write-back code calls this without 'i_mutex'.
278 */
279static int ubifs_write_inode(struct inode *inode, int wait)
280{
281 int err;
282 struct ubifs_info *c = inode->i_sb->s_fs_info;
283 struct ubifs_inode *ui = ubifs_inode(inode);
284
285 ubifs_assert(!ui->xattr);
286 if (is_bad_inode(inode))
287 return 0;
288
289 mutex_lock(&ui->ui_mutex);
290 /*
291 * Due to races between write-back forced by budgeting
292 * (see 'sync_some_inodes()') and pdflush write-back, the inode may
293 * have already been synchronized, do not do this again. This might
294 * also happen if it was synchronized in an VFS operation, e.g.
295 * 'ubifs_link()'.
296 */
297 if (!ui->dirty) {
298 mutex_unlock(&ui->ui_mutex);
299 return 0;
300 }
301
302 dbg_gen("inode %lu", inode->i_ino);
303 err = ubifs_jnl_write_inode(c, inode, 0);
304 if (err)
305 ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
306
307 ui->dirty = 0;
308 mutex_unlock(&ui->ui_mutex);
309 ubifs_release_dirty_inode_budget(c, ui);
310 return err;
311}
312
313static void ubifs_delete_inode(struct inode *inode)
314{
315 int err;
316 struct ubifs_info *c = inode->i_sb->s_fs_info;
317
318 if (ubifs_inode(inode)->xattr)
319 /*
320 * Extended attribute inode deletions are fully handled in
321 * 'ubifs_removexattr()'. These inodes are special and have
322 * limited usage, so there is nothing to do here.
323 */
324 goto out;
325
326 dbg_gen("inode %lu", inode->i_ino);
327 ubifs_assert(!atomic_read(&inode->i_count));
328 ubifs_assert(inode->i_nlink == 0);
329
330 truncate_inode_pages(&inode->i_data, 0);
331 if (is_bad_inode(inode))
332 goto out;
333
334 ubifs_inode(inode)->ui_size = inode->i_size = 0;
335 err = ubifs_jnl_write_inode(c, inode, 1);
336 if (err)
337 /*
338 * Worst case we have a lost orphan inode wasting space, so a
339 * simple error message is ok here.
340 */
341 ubifs_err("can't write inode %lu, error %d", inode->i_ino, err);
342out:
343 clear_inode(inode);
344}
345
346static void ubifs_dirty_inode(struct inode *inode)
347{
348 struct ubifs_inode *ui = ubifs_inode(inode);
349
350 ubifs_assert(mutex_is_locked(&ui->ui_mutex));
351 if (!ui->dirty) {
352 ui->dirty = 1;
353 dbg_gen("inode %lu", inode->i_ino);
354 }
355}
356
357static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
358{
359 struct ubifs_info *c = dentry->d_sb->s_fs_info;
360 unsigned long long free;
361
362 free = ubifs_budg_get_free_space(c);
363 dbg_gen("free space %lld bytes (%lld blocks)",
364 free, free >> UBIFS_BLOCK_SHIFT);
365
366 buf->f_type = UBIFS_SUPER_MAGIC;
367 buf->f_bsize = UBIFS_BLOCK_SIZE;
368 buf->f_blocks = c->block_cnt;
369 buf->f_bfree = free >> UBIFS_BLOCK_SHIFT;
370 if (free > c->report_rp_size)
371 buf->f_bavail = (free - c->report_rp_size) >> UBIFS_BLOCK_SHIFT;
372 else
373 buf->f_bavail = 0;
374 buf->f_files = 0;
375 buf->f_ffree = 0;
376 buf->f_namelen = UBIFS_MAX_NLEN;
377
378 return 0;
379}
380
381static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt)
382{
383 struct ubifs_info *c = mnt->mnt_sb->s_fs_info;
384
385 if (c->mount_opts.unmount_mode == 2)
386 seq_printf(s, ",fast_unmount");
387 else if (c->mount_opts.unmount_mode == 1)
388 seq_printf(s, ",norm_unmount");
389
390 return 0;
391}
392
393static int ubifs_sync_fs(struct super_block *sb, int wait)
394{
395 struct ubifs_info *c = sb->s_fs_info;
396 int i, ret = 0, err;
397
398 if (c->jheads)
399 for (i = 0; i < c->jhead_cnt; i++) {
400 err = ubifs_wbuf_sync(&c->jheads[i].wbuf);
401 if (err && !ret)
402 ret = err;
403 }
404 /*
405 * We ought to call sync for c->ubi but it does not have one. If it had
406 * it would in turn call mtd->sync, however mtd operations are
407 * synchronous anyway, so we don't lose any sleep here.
408 */
409 return ret;
410}
411
412/**
413 * init_constants_early - initialize UBIFS constants.
414 * @c: UBIFS file-system description object
415 *
416 * This function initialize UBIFS constants which do not need the superblock to
417 * be read. It also checks that the UBI volume satisfies basic UBIFS
418 * requirements. Returns zero in case of success and a negative error code in
419 * case of failure.
420 */
421static int init_constants_early(struct ubifs_info *c)
422{
423 if (c->vi.corrupted) {
424 ubifs_warn("UBI volume is corrupted - read-only mode");
425 c->ro_media = 1;
426 }
427
428 if (c->di.ro_mode) {
429 ubifs_msg("read-only UBI device");
430 c->ro_media = 1;
431 }
432
433 if (c->vi.vol_type == UBI_STATIC_VOLUME) {
434 ubifs_msg("static UBI volume - read-only mode");
435 c->ro_media = 1;
436 }
437
438 c->leb_cnt = c->vi.size;
439 c->leb_size = c->vi.usable_leb_size;
440 c->half_leb_size = c->leb_size / 2;
441 c->min_io_size = c->di.min_io_size;
442 c->min_io_shift = fls(c->min_io_size) - 1;
443
444 if (c->leb_size < UBIFS_MIN_LEB_SZ) {
445 ubifs_err("too small LEBs (%d bytes), min. is %d bytes",
446 c->leb_size, UBIFS_MIN_LEB_SZ);
447 return -EINVAL;
448 }
449
450 if (c->leb_cnt < UBIFS_MIN_LEB_CNT) {
451 ubifs_err("too few LEBs (%d), min. is %d",
452 c->leb_cnt, UBIFS_MIN_LEB_CNT);
453 return -EINVAL;
454 }
455
456 if (!is_power_of_2(c->min_io_size)) {
457 ubifs_err("bad min. I/O size %d", c->min_io_size);
458 return -EINVAL;
459 }
460
461 /*
462 * UBIFS aligns all node to 8-byte boundary, so to make function in
463 * io.c simpler, assume minimum I/O unit size to be 8 bytes if it is
464 * less than 8.
465 */
466 if (c->min_io_size < 8) {
467 c->min_io_size = 8;
468 c->min_io_shift = 3;
469 }
470
471 c->ref_node_alsz = ALIGN(UBIFS_REF_NODE_SZ, c->min_io_size);
472 c->mst_node_alsz = ALIGN(UBIFS_MST_NODE_SZ, c->min_io_size);
473
474 /*
475 * Initialize node length ranges which are mostly needed for node
476 * length validation.
477 */
478 c->ranges[UBIFS_PAD_NODE].len = UBIFS_PAD_NODE_SZ;
479 c->ranges[UBIFS_SB_NODE].len = UBIFS_SB_NODE_SZ;
480 c->ranges[UBIFS_MST_NODE].len = UBIFS_MST_NODE_SZ;
481 c->ranges[UBIFS_REF_NODE].len = UBIFS_REF_NODE_SZ;
482 c->ranges[UBIFS_TRUN_NODE].len = UBIFS_TRUN_NODE_SZ;
483 c->ranges[UBIFS_CS_NODE].len = UBIFS_CS_NODE_SZ;
484
485 c->ranges[UBIFS_INO_NODE].min_len = UBIFS_INO_NODE_SZ;
486 c->ranges[UBIFS_INO_NODE].max_len = UBIFS_MAX_INO_NODE_SZ;
487 c->ranges[UBIFS_ORPH_NODE].min_len =
488 UBIFS_ORPH_NODE_SZ + sizeof(__le64);
489 c->ranges[UBIFS_ORPH_NODE].max_len = c->leb_size;
490 c->ranges[UBIFS_DENT_NODE].min_len = UBIFS_DENT_NODE_SZ;
491 c->ranges[UBIFS_DENT_NODE].max_len = UBIFS_MAX_DENT_NODE_SZ;
492 c->ranges[UBIFS_XENT_NODE].min_len = UBIFS_XENT_NODE_SZ;
493 c->ranges[UBIFS_XENT_NODE].max_len = UBIFS_MAX_XENT_NODE_SZ;
494 c->ranges[UBIFS_DATA_NODE].min_len = UBIFS_DATA_NODE_SZ;
495 c->ranges[UBIFS_DATA_NODE].max_len = UBIFS_MAX_DATA_NODE_SZ;
496 /*
497 * Minimum indexing node size is amended later when superblock is
498 * read and the key length is known.
499 */
500 c->ranges[UBIFS_IDX_NODE].min_len = UBIFS_IDX_NODE_SZ + UBIFS_BRANCH_SZ;
501 /*
502 * Maximum indexing node size is amended later when superblock is
503 * read and the fanout is known.
504 */
505 c->ranges[UBIFS_IDX_NODE].max_len = INT_MAX;
506
507 /*
508 * Initialize dead and dark LEB space watermarks.
509 *
510 * Dead space is the space which cannot be used. Its watermark is
511 * equivalent to min. I/O unit or minimum node size if it is greater
512 * then min. I/O unit.
513 *
514 * Dark space is the space which might be used, or might not, depending
515 * on which node should be written to the LEB. Its watermark is
516 * equivalent to maximum UBIFS node size.
517 */
518 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
519 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
520
521 return 0;
522}
523
524/**
525 * bud_wbuf_callback - bud LEB write-buffer synchronization call-back.
526 * @c: UBIFS file-system description object
527 * @lnum: LEB the write-buffer was synchronized to
528 * @free: how many free bytes left in this LEB
529 * @pad: how many bytes were padded
530 *
531 * This is a callback function which is called by the I/O unit when the
532 * write-buffer is synchronized. We need this to correctly maintain space
533 * accounting in bud logical eraseblocks. This function returns zero in case of
534 * success and a negative error code in case of failure.
535 *
536 * This function actually belongs to the journal, but we keep it here because
537 * we want to keep it static.
538 */
539static int bud_wbuf_callback(struct ubifs_info *c, int lnum, int free, int pad)
540{
541 return ubifs_update_one_lp(c, lnum, free, pad, 0, 0);
542}
543
544/*
545 * init_constants_late - initialize UBIFS constants.
546 * @c: UBIFS file-system description object
547 *
548 * This is a helper function which initializes various UBIFS constants after
549 * the superblock has been read. It also checks various UBIFS parameters and
550 * makes sure they are all right. Returns zero in case of success and a
551 * negative error code in case of failure.
552 */
553static int init_constants_late(struct ubifs_info *c)
554{
555 int tmp, err;
556 uint64_t tmp64;
557
558 c->main_bytes = (long long)c->main_lebs * c->leb_size;
559 c->max_znode_sz = sizeof(struct ubifs_znode) +
560 c->fanout * sizeof(struct ubifs_zbranch);
561
562 tmp = ubifs_idx_node_sz(c, 1);
563 c->ranges[UBIFS_IDX_NODE].min_len = tmp;
564 c->min_idx_node_sz = ALIGN(tmp, 8);
565
566 tmp = ubifs_idx_node_sz(c, c->fanout);
567 c->ranges[UBIFS_IDX_NODE].max_len = tmp;
568 c->max_idx_node_sz = ALIGN(tmp, 8);
569
570 /* Make sure LEB size is large enough to fit full commit */
571 tmp = UBIFS_CS_NODE_SZ + UBIFS_REF_NODE_SZ * c->jhead_cnt;
572 tmp = ALIGN(tmp, c->min_io_size);
573 if (tmp > c->leb_size) {
574 dbg_err("too small LEB size %d, at least %d needed",
575 c->leb_size, tmp);
576 return -EINVAL;
577 }
578
579 /*
580 * Make sure that the log is large enough to fit reference nodes for
581 * all buds plus one reserved LEB.
582 */
583 tmp64 = c->max_bud_bytes;
584 tmp = do_div(tmp64, c->leb_size);
585 c->max_bud_cnt = tmp64 + !!tmp;
586 tmp = (c->ref_node_alsz * c->max_bud_cnt + c->leb_size - 1);
587 tmp /= c->leb_size;
588 tmp += 1;
589 if (c->log_lebs < tmp) {
590 dbg_err("too small log %d LEBs, required min. %d LEBs",
591 c->log_lebs, tmp);
592 return -EINVAL;
593 }
594
595 /*
596 * When budgeting we assume worst-case scenarios when the pages are not
597 * be compressed and direntries are of the maximum size.
598 *
599 * Note, data, which may be stored in inodes is budgeted separately, so
600 * it is not included into 'c->inode_budget'.
601 */
602 c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE;
603 c->inode_budget = UBIFS_INO_NODE_SZ;
604 c->dent_budget = UBIFS_MAX_DENT_NODE_SZ;
605
606 /*
607 * When the amount of flash space used by buds becomes
608 * 'c->max_bud_bytes', UBIFS just blocks all writers and starts commit.
609 * The writers are unblocked when the commit is finished. To avoid
610 * writers to be blocked UBIFS initiates background commit in advance,
611 * when number of bud bytes becomes above the limit defined below.
612 */
613 c->bg_bud_bytes = (c->max_bud_bytes * 13) >> 4;
614
615 /*
616 * Ensure minimum journal size. All the bytes in the journal heads are
617 * considered to be used, when calculating the current journal usage.
618 * Consequently, if the journal is too small, UBIFS will treat it as
619 * always full.
620 */
621 tmp64 = (uint64_t)(c->jhead_cnt + 1) * c->leb_size + 1;
622 if (c->bg_bud_bytes < tmp64)
623 c->bg_bud_bytes = tmp64;
624 if (c->max_bud_bytes < tmp64 + c->leb_size)
625 c->max_bud_bytes = tmp64 + c->leb_size;
626
627 err = ubifs_calc_lpt_geom(c);
628 if (err)
629 return err;
630
631 c->min_idx_lebs = ubifs_calc_min_idx_lebs(c);
632
633 /*
634 * Calculate total amount of FS blocks. This number is not used
635 * internally because it does not make much sense for UBIFS, but it is
636 * necessary to report something for the 'statfs()' call.
637 *
638 * Subtract the LEB reserved for GC and the LEB which is reserved for
639 * deletions.
640 *
641 * Review 'ubifs_calc_available()' if changing this calculation.
642 */
643 tmp64 = c->main_lebs - 2;
644 tmp64 *= (uint64_t)c->leb_size - c->dark_wm;
645 tmp64 = ubifs_reported_space(c, tmp64);
646 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
647
648 return 0;
649}
650
651/**
652 * take_gc_lnum - reserve GC LEB.
653 * @c: UBIFS file-system description object
654 *
655 * This function ensures that the LEB reserved for garbage collection is
656 * unmapped and is marked as "taken" in lprops. We also have to set free space
657 * to LEB size and dirty space to zero, because lprops may contain out-of-date
658 * information if the file-system was un-mounted before it has been committed.
659 * This function returns zero in case of success and a negative error code in
660 * case of failure.
661 */
662static int take_gc_lnum(struct ubifs_info *c)
663{
664 int err;
665
666 if (c->gc_lnum == -1) {
667 ubifs_err("no LEB for GC");
668 return -EINVAL;
669 }
670
671 err = ubifs_leb_unmap(c, c->gc_lnum);
672 if (err)
673 return err;
674
675 /* And we have to tell lprops that this LEB is taken */
676 err = ubifs_change_one_lp(c, c->gc_lnum, c->leb_size, 0,
677 LPROPS_TAKEN, 0, 0);
678 return err;
679}
680
681/**
682 * alloc_wbufs - allocate write-buffers.
683 * @c: UBIFS file-system description object
684 *
685 * This helper function allocates and initializes UBIFS write-buffers. Returns
686 * zero in case of success and %-ENOMEM in case of failure.
687 */
688static int alloc_wbufs(struct ubifs_info *c)
689{
690 int i, err;
691
692 c->jheads = kzalloc(c->jhead_cnt * sizeof(struct ubifs_jhead),
693 GFP_KERNEL);
694 if (!c->jheads)
695 return -ENOMEM;
696
697 /* Initialize journal heads */
698 for (i = 0; i < c->jhead_cnt; i++) {
699 INIT_LIST_HEAD(&c->jheads[i].buds_list);
700 err = ubifs_wbuf_init(c, &c->jheads[i].wbuf);
701 if (err)
702 return err;
703
704 c->jheads[i].wbuf.sync_callback = &bud_wbuf_callback;
705 c->jheads[i].wbuf.jhead = i;
706 }
707
708 c->jheads[BASEHD].wbuf.dtype = UBI_SHORTTERM;
709 /*
710 * Garbage Collector head likely contains long-term data and
711 * does not need to be synchronized by timer.
712 */
713 c->jheads[GCHD].wbuf.dtype = UBI_LONGTERM;
714 c->jheads[GCHD].wbuf.timeout = 0;
715
716 return 0;
717}
718
719/**
720 * free_wbufs - free write-buffers.
721 * @c: UBIFS file-system description object
722 */
723static void free_wbufs(struct ubifs_info *c)
724{
725 int i;
726
727 if (c->jheads) {
728 for (i = 0; i < c->jhead_cnt; i++) {
729 kfree(c->jheads[i].wbuf.buf);
730 kfree(c->jheads[i].wbuf.inodes);
731 }
732 kfree(c->jheads);
733 c->jheads = NULL;
734 }
735}
736
737/**
738 * free_orphans - free orphans.
739 * @c: UBIFS file-system description object
740 */
741static void free_orphans(struct ubifs_info *c)
742{
743 struct ubifs_orphan *orph;
744
745 while (c->orph_dnext) {
746 orph = c->orph_dnext;
747 c->orph_dnext = orph->dnext;
748 list_del(&orph->list);
749 kfree(orph);
750 }
751
752 while (!list_empty(&c->orph_list)) {
753 orph = list_entry(c->orph_list.next, struct ubifs_orphan, list);
754 list_del(&orph->list);
755 kfree(orph);
756 dbg_err("orphan list not empty at unmount");
757 }
758
759 vfree(c->orph_buf);
760 c->orph_buf = NULL;
761}
762
763/**
764 * free_buds - free per-bud objects.
765 * @c: UBIFS file-system description object
766 */
767static void free_buds(struct ubifs_info *c)
768{
769 struct rb_node *this = c->buds.rb_node;
770 struct ubifs_bud *bud;
771
772 while (this) {
773 if (this->rb_left)
774 this = this->rb_left;
775 else if (this->rb_right)
776 this = this->rb_right;
777 else {
778 bud = rb_entry(this, struct ubifs_bud, rb);
779 this = rb_parent(this);
780 if (this) {
781 if (this->rb_left == &bud->rb)
782 this->rb_left = NULL;
783 else
784 this->rb_right = NULL;
785 }
786 kfree(bud);
787 }
788 }
789}
790
791/**
792 * check_volume_empty - check if the UBI volume is empty.
793 * @c: UBIFS file-system description object
794 *
795 * This function checks if the UBIFS volume is empty by looking if its LEBs are
796 * mapped or not. The result of checking is stored in the @c->empty variable.
797 * Returns zero in case of success and a negative error code in case of
798 * failure.
799 */
800static int check_volume_empty(struct ubifs_info *c)
801{
802 int lnum, err;
803
804 c->empty = 1;
805 for (lnum = 0; lnum < c->leb_cnt; lnum++) {
806 err = ubi_is_mapped(c->ubi, lnum);
807 if (unlikely(err < 0))
808 return err;
809 if (err == 1) {
810 c->empty = 0;
811 break;
812 }
813
814 cond_resched();
815 }
816
817 return 0;
818}
819
820/*
821 * UBIFS mount options.
822 *
823 * Opt_fast_unmount: do not run a journal commit before un-mounting
824 * Opt_norm_unmount: run a journal commit before un-mounting
825 * Opt_err: just end of array marker
826 */
827enum {
828 Opt_fast_unmount,
829 Opt_norm_unmount,
830 Opt_err,
831};
832
833static match_table_t tokens = {
834 {Opt_fast_unmount, "fast_unmount"},
835 {Opt_norm_unmount, "norm_unmount"},
836 {Opt_err, NULL},
837};
838
839/**
840 * ubifs_parse_options - parse mount parameters.
841 * @c: UBIFS file-system description object
842 * @options: parameters to parse
843 * @is_remount: non-zero if this is FS re-mount
844 *
845 * This function parses UBIFS mount options and returns zero in case success
846 * and a negative error code in case of failure.
847 */
848static int ubifs_parse_options(struct ubifs_info *c, char *options,
849 int is_remount)
850{
851 char *p;
852 substring_t args[MAX_OPT_ARGS];
853
854 if (!options)
855 return 0;
856
857 while ((p = strsep(&options, ","))) {
858 int token;
859
860 if (!*p)
861 continue;
862
863 token = match_token(p, tokens, args);
864 switch (token) {
865 case Opt_fast_unmount:
866 c->mount_opts.unmount_mode = 2;
867 c->fast_unmount = 1;
868 break;
869 case Opt_norm_unmount:
870 c->mount_opts.unmount_mode = 1;
871 c->fast_unmount = 0;
872 break;
873 default:
874 ubifs_err("unrecognized mount option \"%s\" "
875 "or missing value", p);
876 return -EINVAL;
877 }
878 }
879
880 return 0;
881}
882
883/**
884 * destroy_journal - destroy journal data structures.
885 * @c: UBIFS file-system description object
886 *
887 * This function destroys journal data structures including those that may have
888 * been created by recovery functions.
889 */
890static void destroy_journal(struct ubifs_info *c)
891{
892 while (!list_empty(&c->unclean_leb_list)) {
893 struct ubifs_unclean_leb *ucleb;
894
895 ucleb = list_entry(c->unclean_leb_list.next,
896 struct ubifs_unclean_leb, list);
897 list_del(&ucleb->list);
898 kfree(ucleb);
899 }
900 while (!list_empty(&c->old_buds)) {
901 struct ubifs_bud *bud;
902
903 bud = list_entry(c->old_buds.next, struct ubifs_bud, list);
904 list_del(&bud->list);
905 kfree(bud);
906 }
907 ubifs_destroy_idx_gc(c);
908 ubifs_destroy_size_tree(c);
909 ubifs_tnc_close(c);
910 free_buds(c);
911}
912
913/**
914 * mount_ubifs - mount UBIFS file-system.
915 * @c: UBIFS file-system description object
916 *
917 * This function mounts UBIFS file system. Returns zero in case of success and
918 * a negative error code in case of failure.
919 *
920 * Note, the function does not de-allocate resources it it fails half way
921 * through, and the caller has to do this instead.
922 */
923static int mount_ubifs(struct ubifs_info *c)
924{
925 struct super_block *sb = c->vfs_sb;
926 int err, mounted_read_only = (sb->s_flags & MS_RDONLY);
927 long long x;
928 size_t sz;
929
930 err = init_constants_early(c);
931 if (err)
932 return err;
933
934#ifdef CONFIG_UBIFS_FS_DEBUG
935 c->dbg_buf = vmalloc(c->leb_size);
936 if (!c->dbg_buf)
937 return -ENOMEM;
938#endif
939
940 err = check_volume_empty(c);
941 if (err)
942 goto out_free;
943
944 if (c->empty && (mounted_read_only || c->ro_media)) {
945 /*
946 * This UBI volume is empty, and read-only, or the file system
947 * is mounted read-only - we cannot format it.
948 */
949 ubifs_err("can't format empty UBI volume: read-only %s",
950 c->ro_media ? "UBI volume" : "mount");
951 err = -EROFS;
952 goto out_free;
953 }
954
955 if (c->ro_media && !mounted_read_only) {
956 ubifs_err("cannot mount read-write - read-only media");
957 err = -EROFS;
958 goto out_free;
959 }
960
961 /*
962 * The requirement for the buffer is that it should fit indexing B-tree
963 * height amount of integers. We assume the height if the TNC tree will
964 * never exceed 64.
965 */
966 err = -ENOMEM;
967 c->bottom_up_buf = kmalloc(BOTTOM_UP_HEIGHT * sizeof(int), GFP_KERNEL);
968 if (!c->bottom_up_buf)
969 goto out_free;
970
971 c->sbuf = vmalloc(c->leb_size);
972 if (!c->sbuf)
973 goto out_free;
974
975 if (!mounted_read_only) {
976 c->ileb_buf = vmalloc(c->leb_size);
977 if (!c->ileb_buf)
978 goto out_free;
979 }
980
981 err = ubifs_read_superblock(c);
982 if (err)
983 goto out_free;
984
985 /*
986 * Make sure the compressor which is set as the default on in the
987 * superblock was actually compiled in.
988 */
989 if (!ubifs_compr_present(c->default_compr)) {
990 ubifs_warn("'%s' compressor is set by superblock, but not "
991 "compiled in", ubifs_compr_name(c->default_compr));
992 c->default_compr = UBIFS_COMPR_NONE;
993 }
994
995 dbg_failure_mode_registration(c);
996
997 err = init_constants_late(c);
998 if (err)
999 goto out_dereg;
1000
1001 sz = ALIGN(c->max_idx_node_sz, c->min_io_size);
1002 sz = ALIGN(sz + c->max_idx_node_sz, c->min_io_size);
1003 c->cbuf = kmalloc(sz, GFP_NOFS);
1004 if (!c->cbuf) {
1005 err = -ENOMEM;
1006 goto out_dereg;
1007 }
1008
1009 if (!mounted_read_only) {
1010 err = alloc_wbufs(c);
1011 if (err)
1012 goto out_cbuf;
1013
1014 /* Create background thread */
1015 sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num,
1016 c->vi.vol_id);
1017 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1018 if (!c->bgt)
1019 c->bgt = ERR_PTR(-EINVAL);
1020 if (IS_ERR(c->bgt)) {
1021 err = PTR_ERR(c->bgt);
1022 c->bgt = NULL;
1023 ubifs_err("cannot spawn \"%s\", error %d",
1024 c->bgt_name, err);
1025 goto out_wbufs;
1026 }
1027 wake_up_process(c->bgt);
1028 }
1029
1030 err = ubifs_read_master(c);
1031 if (err)
1032 goto out_master;
1033
1034 if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) {
1035 ubifs_msg("recovery needed");
1036 c->need_recovery = 1;
1037 if (!mounted_read_only) {
1038 err = ubifs_recover_inl_heads(c, c->sbuf);
1039 if (err)
1040 goto out_master;
1041 }
1042 } else if (!mounted_read_only) {
1043 /*
1044 * Set the "dirty" flag so that if we reboot uncleanly we
1045 * will notice this immediately on the next mount.
1046 */
1047 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
1048 err = ubifs_write_master(c);
1049 if (err)
1050 goto out_master;
1051 }
1052
1053 err = ubifs_lpt_init(c, 1, !mounted_read_only);
1054 if (err)
1055 goto out_lpt;
1056
1057 err = dbg_check_idx_size(c, c->old_idx_sz);
1058 if (err)
1059 goto out_lpt;
1060
1061 err = ubifs_replay_journal(c);
1062 if (err)
1063 goto out_journal;
1064
1065 err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only);
1066 if (err)
1067 goto out_orphans;
1068
1069 if (!mounted_read_only) {
1070 int lnum;
1071
1072 /* Check for enough free space */
1073 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
1074 ubifs_err("insufficient available space");
1075 err = -EINVAL;
1076 goto out_orphans;
1077 }
1078
1079 /* Check for enough log space */
1080 lnum = c->lhead_lnum + 1;
1081 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs)
1082 lnum = UBIFS_LOG_LNUM;
1083 if (lnum == c->ltail_lnum) {
1084 err = ubifs_consolidate_log(c);
1085 if (err)
1086 goto out_orphans;
1087 }
1088
1089 if (c->need_recovery) {
1090 err = ubifs_recover_size(c);
1091 if (err)
1092 goto out_orphans;
1093 err = ubifs_rcvry_gc_commit(c);
1094 } else
1095 err = take_gc_lnum(c);
1096 if (err)
1097 goto out_orphans;
1098
1099 err = dbg_check_lprops(c);
1100 if (err)
1101 goto out_orphans;
1102 } else if (c->need_recovery) {
1103 err = ubifs_recover_size(c);
1104 if (err)
1105 goto out_orphans;
1106 }
1107
1108 spin_lock(&ubifs_infos_lock);
1109 list_add_tail(&c->infos_list, &ubifs_infos);
1110 spin_unlock(&ubifs_infos_lock);
1111
1112 if (c->need_recovery) {
1113 if (mounted_read_only)
1114 ubifs_msg("recovery deferred");
1115 else {
1116 c->need_recovery = 0;
1117 ubifs_msg("recovery completed");
1118 }
1119 }
1120
1121 err = dbg_check_filesystem(c);
1122 if (err)
1123 goto out_infos;
1124
1125 ubifs_msg("mounted UBI device %d, volume %d", c->vi.ubi_num,
1126 c->vi.vol_id);
1127 if (mounted_read_only)
1128 ubifs_msg("mounted read-only");
1129 x = (long long)c->main_lebs * c->leb_size;
1130 ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
1131 x, x >> 10, x >> 20, c->main_lebs);
1132 x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
1133 ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)",
1134 x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt);
1135 ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr));
1136 ubifs_msg("media format %d, latest format %d",
1137 c->fmt_version, UBIFS_FORMAT_VERSION);
1138
1139 dbg_msg("compiled on: " __DATE__ " at " __TIME__);
1140 dbg_msg("min. I/O unit size: %d bytes", c->min_io_size);
1141 dbg_msg("LEB size: %d bytes (%d KiB)",
1142 c->leb_size, c->leb_size / 1024);
1143 dbg_msg("data journal heads: %d",
1144 c->jhead_cnt - NONDATA_JHEADS_CNT);
1145 dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X"
1146 "-%02X%02X-%02X%02X-%02X%02X%02X%02X%02X%02X",
1147 c->uuid[0], c->uuid[1], c->uuid[2], c->uuid[3],
1148 c->uuid[4], c->uuid[5], c->uuid[6], c->uuid[7],
1149 c->uuid[8], c->uuid[9], c->uuid[10], c->uuid[11],
1150 c->uuid[12], c->uuid[13], c->uuid[14], c->uuid[15]);
1151 dbg_msg("fast unmount: %d", c->fast_unmount);
1152 dbg_msg("big_lpt %d", c->big_lpt);
1153 dbg_msg("log LEBs: %d (%d - %d)",
1154 c->log_lebs, UBIFS_LOG_LNUM, c->log_last);
1155 dbg_msg("LPT area LEBs: %d (%d - %d)",
1156 c->lpt_lebs, c->lpt_first, c->lpt_last);
1157 dbg_msg("orphan area LEBs: %d (%d - %d)",
1158 c->orph_lebs, c->orph_first, c->orph_last);
1159 dbg_msg("main area LEBs: %d (%d - %d)",
1160 c->main_lebs, c->main_first, c->leb_cnt - 1);
1161 dbg_msg("index LEBs: %d", c->lst.idx_lebs);
1162 dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)",
1163 c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20);
1164 dbg_msg("key hash type: %d", c->key_hash_type);
1165 dbg_msg("tree fanout: %d", c->fanout);
1166 dbg_msg("reserved GC LEB: %d", c->gc_lnum);
1167 dbg_msg("first main LEB: %d", c->main_first);
1168 dbg_msg("dead watermark: %d", c->dead_wm);
1169 dbg_msg("dark watermark: %d", c->dark_wm);
1170 x = (long long)c->main_lebs * c->dark_wm;
1171 dbg_msg("max. dark space: %lld (%lld KiB, %lld MiB)",
1172 x, x >> 10, x >> 20);
1173 dbg_msg("maximum bud bytes: %lld (%lld KiB, %lld MiB)",
1174 c->max_bud_bytes, c->max_bud_bytes >> 10,
1175 c->max_bud_bytes >> 20);
1176 dbg_msg("BG commit bud bytes: %lld (%lld KiB, %lld MiB)",
1177 c->bg_bud_bytes, c->bg_bud_bytes >> 10,
1178 c->bg_bud_bytes >> 20);
1179 dbg_msg("current bud bytes %lld (%lld KiB, %lld MiB)",
1180 c->bud_bytes, c->bud_bytes >> 10, c->bud_bytes >> 20);
1181 dbg_msg("max. seq. number: %llu", c->max_sqnum);
1182 dbg_msg("commit number: %llu", c->cmt_no);
1183
1184 return 0;
1185
1186out_infos:
1187 spin_lock(&ubifs_infos_lock);
1188 list_del(&c->infos_list);
1189 spin_unlock(&ubifs_infos_lock);
1190out_orphans:
1191 free_orphans(c);
1192out_journal:
1193 destroy_journal(c);
1194out_lpt:
1195 ubifs_lpt_free(c, 0);
1196out_master:
1197 kfree(c->mst_node);
1198 kfree(c->rcvrd_mst_node);
1199 if (c->bgt)
1200 kthread_stop(c->bgt);
1201out_wbufs:
1202 free_wbufs(c);
1203out_cbuf:
1204 kfree(c->cbuf);
1205out_dereg:
1206 dbg_failure_mode_deregistration(c);
1207out_free:
1208 vfree(c->ileb_buf);
1209 vfree(c->sbuf);
1210 kfree(c->bottom_up_buf);
1211 UBIFS_DBG(vfree(c->dbg_buf));
1212 return err;
1213}
1214
1215/**
1216 * ubifs_umount - un-mount UBIFS file-system.
1217 * @c: UBIFS file-system description object
1218 *
1219 * Note, this function is called to free allocated resourced when un-mounting,
1220 * as well as free resources when an error occurred while we were half way
1221 * through mounting (error path cleanup function). So it has to make sure the
1222 * resource was actually allocated before freeing it.
1223 */
1224static void ubifs_umount(struct ubifs_info *c)
1225{
1226 dbg_gen("un-mounting UBI device %d, volume %d", c->vi.ubi_num,
1227 c->vi.vol_id);
1228
1229 spin_lock(&ubifs_infos_lock);
1230 list_del(&c->infos_list);
1231 spin_unlock(&ubifs_infos_lock);
1232
1233 if (c->bgt)
1234 kthread_stop(c->bgt);
1235
1236 destroy_journal(c);
1237 free_wbufs(c);
1238 free_orphans(c);
1239 ubifs_lpt_free(c, 0);
1240
1241 kfree(c->cbuf);
1242 kfree(c->rcvrd_mst_node);
1243 kfree(c->mst_node);
1244 vfree(c->sbuf);
1245 kfree(c->bottom_up_buf);
1246 UBIFS_DBG(vfree(c->dbg_buf));
1247 vfree(c->ileb_buf);
1248 dbg_failure_mode_deregistration(c);
1249}
1250
1251/**
1252 * ubifs_remount_rw - re-mount in read-write mode.
1253 * @c: UBIFS file-system description object
1254 *
1255 * UBIFS avoids allocating many unnecessary resources when mounted in read-only
1256 * mode. This function allocates the needed resources and re-mounts UBIFS in
1257 * read-write mode.
1258 */
1259static int ubifs_remount_rw(struct ubifs_info *c)
1260{
1261 int err, lnum;
1262
1263 if (c->ro_media)
1264 return -EINVAL;
1265
1266 mutex_lock(&c->umount_mutex);
1267 c->remounting_rw = 1;
1268
1269 /* Check for enough free space */
1270 if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) {
1271 ubifs_err("insufficient available space");
1272 err = -EINVAL;
1273 goto out;
1274 }
1275
1276 if (c->old_leb_cnt != c->leb_cnt) {
1277 struct ubifs_sb_node *sup;
1278
1279 sup = ubifs_read_sb_node(c);
1280 if (IS_ERR(sup)) {
1281 err = PTR_ERR(sup);
1282 goto out;
1283 }
1284 sup->leb_cnt = cpu_to_le32(c->leb_cnt);
1285 err = ubifs_write_sb_node(c, sup);
1286 if (err)
1287 goto out;
1288 }
1289
1290 if (c->need_recovery) {
1291 ubifs_msg("completing deferred recovery");
1292 err = ubifs_write_rcvrd_mst_node(c);
1293 if (err)
1294 goto out;
1295 err = ubifs_recover_size(c);
1296 if (err)
1297 goto out;
1298 err = ubifs_clean_lebs(c, c->sbuf);
1299 if (err)
1300 goto out;
1301 err = ubifs_recover_inl_heads(c, c->sbuf);
1302 if (err)
1303 goto out;
1304 }
1305
1306 if (!(c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY))) {
1307 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_DIRTY);
1308 err = ubifs_write_master(c);
1309 if (err)
1310 goto out;
1311 }
1312
1313 c->ileb_buf = vmalloc(c->leb_size);
1314 if (!c->ileb_buf) {
1315 err = -ENOMEM;
1316 goto out;
1317 }
1318
1319 err = ubifs_lpt_init(c, 0, 1);
1320 if (err)
1321 goto out;
1322
1323 err = alloc_wbufs(c);
1324 if (err)
1325 goto out;
1326
1327 ubifs_create_buds_lists(c);
1328
1329 /* Create background thread */
1330 c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name);
1331 if (!c->bgt)
1332 c->bgt = ERR_PTR(-EINVAL);
1333 if (IS_ERR(c->bgt)) {
1334 err = PTR_ERR(c->bgt);
1335 c->bgt = NULL;
1336 ubifs_err("cannot spawn \"%s\", error %d",
1337 c->bgt_name, err);
1338 return err;
1339 }
1340 wake_up_process(c->bgt);
1341
1342 c->orph_buf = vmalloc(c->leb_size);
1343 if (!c->orph_buf)
1344 return -ENOMEM;
1345
1346 /* Check for enough log space */
1347 lnum = c->lhead_lnum + 1;
1348 if (lnum >= UBIFS_LOG_LNUM + c->log_lebs)
1349 lnum = UBIFS_LOG_LNUM;
1350 if (lnum == c->ltail_lnum) {
1351 err = ubifs_consolidate_log(c);
1352 if (err)
1353 goto out;
1354 }
1355
1356 if (c->need_recovery)
1357 err = ubifs_rcvry_gc_commit(c);
1358 else
1359 err = take_gc_lnum(c);
1360 if (err)
1361 goto out;
1362
1363 if (c->need_recovery) {
1364 c->need_recovery = 0;
1365 ubifs_msg("deferred recovery completed");
1366 }
1367
1368 dbg_gen("re-mounted read-write");
1369 c->vfs_sb->s_flags &= ~MS_RDONLY;
1370 c->remounting_rw = 0;
1371 mutex_unlock(&c->umount_mutex);
1372 return 0;
1373
1374out:
1375 vfree(c->orph_buf);
1376 c->orph_buf = NULL;
1377 if (c->bgt) {
1378 kthread_stop(c->bgt);
1379 c->bgt = NULL;
1380 }
1381 free_wbufs(c);
1382 vfree(c->ileb_buf);
1383 c->ileb_buf = NULL;
1384 ubifs_lpt_free(c, 1);
1385 c->remounting_rw = 0;
1386 mutex_unlock(&c->umount_mutex);
1387 return err;
1388}
1389
1390/**
1391 * commit_on_unmount - commit the journal when un-mounting.
1392 * @c: UBIFS file-system description object
1393 *
1394 * This function is called during un-mounting and it commits the journal unless
1395 * the "fast unmount" mode is enabled. It also avoids committing the journal if
1396 * it contains too few data.
1397 *
1398 * Sometimes recovery requires the journal to be committed at least once, and
1399 * this function takes care about this.
1400 */
1401static void commit_on_unmount(struct ubifs_info *c)
1402{
1403 if (!c->fast_unmount) {
1404 long long bud_bytes;
1405
1406 spin_lock(&c->buds_lock);
1407 bud_bytes = c->bud_bytes;
1408 spin_unlock(&c->buds_lock);
1409 if (bud_bytes > c->leb_size)
1410 ubifs_run_commit(c);
1411 }
1412}
1413
1414/**
1415 * ubifs_remount_ro - re-mount in read-only mode.
1416 * @c: UBIFS file-system description object
1417 *
1418 * We rely on VFS to have stopped writing. Possibly the background thread could
1419 * be running a commit, however kthread_stop will wait in that case.
1420 */
1421static void ubifs_remount_ro(struct ubifs_info *c)
1422{
1423 int i, err;
1424
1425 ubifs_assert(!c->need_recovery);
1426 commit_on_unmount(c);
1427
1428 mutex_lock(&c->umount_mutex);
1429 if (c->bgt) {
1430 kthread_stop(c->bgt);
1431 c->bgt = NULL;
1432 }
1433
1434 for (i = 0; i < c->jhead_cnt; i++) {
1435 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1436 del_timer_sync(&c->jheads[i].wbuf.timer);
1437 }
1438
1439 if (!c->ro_media) {
1440 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
1441 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
1442 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
1443 err = ubifs_write_master(c);
1444 if (err)
1445 ubifs_ro_mode(c, err);
1446 }
1447
1448 ubifs_destroy_idx_gc(c);
1449 free_wbufs(c);
1450 vfree(c->orph_buf);
1451 c->orph_buf = NULL;
1452 vfree(c->ileb_buf);
1453 c->ileb_buf = NULL;
1454 ubifs_lpt_free(c, 1);
1455 mutex_unlock(&c->umount_mutex);
1456}
1457
1458static void ubifs_put_super(struct super_block *sb)
1459{
1460 int i;
1461 struct ubifs_info *c = sb->s_fs_info;
1462
1463 ubifs_msg("un-mount UBI device %d, volume %d", c->vi.ubi_num,
1464 c->vi.vol_id);
1465 /*
1466 * The following asserts are only valid if there has not been a failure
1467 * of the media. For example, there will be dirty inodes if we failed
1468 * to write them back because of I/O errors.
1469 */
1470 ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0);
1471 ubifs_assert(c->budg_idx_growth == 0);
1472 ubifs_assert(c->budg_data_growth == 0);
1473
1474 /*
1475 * The 'c->umount_lock' prevents races between UBIFS memory shrinker
1476 * and file system un-mount. Namely, it prevents the shrinker from
1477 * picking this superblock for shrinking - it will be just skipped if
1478 * the mutex is locked.
1479 */
1480 mutex_lock(&c->umount_mutex);
1481 if (!(c->vfs_sb->s_flags & MS_RDONLY)) {
1482 /*
1483 * First of all kill the background thread to make sure it does
1484 * not interfere with un-mounting and freeing resources.
1485 */
1486 if (c->bgt) {
1487 kthread_stop(c->bgt);
1488 c->bgt = NULL;
1489 }
1490
1491 /* Synchronize write-buffers */
1492 if (c->jheads)
1493 for (i = 0; i < c->jhead_cnt; i++) {
1494 ubifs_wbuf_sync(&c->jheads[i].wbuf);
1495 del_timer_sync(&c->jheads[i].wbuf.timer);
1496 }
1497
1498 /*
1499 * On fatal errors c->ro_media is set to 1, in which case we do
1500 * not write the master node.
1501 */
1502 if (!c->ro_media) {
1503 /*
1504 * We are being cleanly unmounted which means the
1505 * orphans were killed - indicate this in the master
1506 * node. Also save the reserved GC LEB number.
1507 */
1508 int err;
1509
1510 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY);
1511 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
1512 c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum);
1513 err = ubifs_write_master(c);
1514 if (err)
1515 /*
1516 * Recovery will attempt to fix the master area
1517 * next mount, so we just print a message and
1518 * continue to unmount normally.
1519 */
1520 ubifs_err("failed to write master node, "
1521 "error %d", err);
1522 }
1523 }
1524
1525 ubifs_umount(c);
1526 bdi_destroy(&c->bdi);
1527 ubi_close_volume(c->ubi);
1528 mutex_unlock(&c->umount_mutex);
1529 kfree(c);
1530}
1531
1532static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data)
1533{
1534 int err;
1535 struct ubifs_info *c = sb->s_fs_info;
1536
1537 dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags);
1538
1539 err = ubifs_parse_options(c, data, 1);
1540 if (err) {
1541 ubifs_err("invalid or unknown remount parameter");
1542 return err;
1543 }
1544 if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
1545 err = ubifs_remount_rw(c);
1546 if (err)
1547 return err;
1548 } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY))
1549 ubifs_remount_ro(c);
1550
1551 return 0;
1552}
1553
1554struct super_operations ubifs_super_operations = {
1555 .alloc_inode = ubifs_alloc_inode,
1556 .destroy_inode = ubifs_destroy_inode,
1557 .put_super = ubifs_put_super,
1558 .write_inode = ubifs_write_inode,
1559 .delete_inode = ubifs_delete_inode,
1560 .statfs = ubifs_statfs,
1561 .dirty_inode = ubifs_dirty_inode,
1562 .remount_fs = ubifs_remount_fs,
1563 .show_options = ubifs_show_options,
1564 .sync_fs = ubifs_sync_fs,
1565};
1566
1567/**
1568 * open_ubi - parse UBI device name string and open the UBI device.
1569 * @name: UBI volume name
1570 * @mode: UBI volume open mode
1571 *
1572 * There are several ways to specify UBI volumes when mounting UBIFS:
1573 * o ubiX_Y - UBI device number X, volume Y;
1574 * o ubiY - UBI device number 0, volume Y;
1575 * o ubiX:NAME - mount UBI device X, volume with name NAME;
1576 * o ubi:NAME - mount UBI device 0, volume with name NAME.
1577 *
1578 * Alternative '!' separator may be used instead of ':' (because some shells
1579 * like busybox may interpret ':' as an NFS host name separator). This function
1580 * returns ubi volume object in case of success and a negative error code in
1581 * case of failure.
1582 */
1583static struct ubi_volume_desc *open_ubi(const char *name, int mode)
1584{
1585 int dev, vol;
1586 char *endptr;
1587
1588 if (name[0] != 'u' || name[1] != 'b' || name[2] != 'i')
1589 return ERR_PTR(-EINVAL);
1590
1591 /* ubi:NAME method */
1592 if ((name[3] == ':' || name[3] == '!') && name[4] != '\0')
1593 return ubi_open_volume_nm(0, name + 4, mode);
1594
1595 if (!isdigit(name[3]))
1596 return ERR_PTR(-EINVAL);
1597
1598 dev = simple_strtoul(name + 3, &endptr, 0);
1599
1600 /* ubiY method */
1601 if (*endptr == '\0')
1602 return ubi_open_volume(0, dev, mode);
1603
1604 /* ubiX_Y method */
1605 if (*endptr == '_' && isdigit(endptr[1])) {
1606 vol = simple_strtoul(endptr + 1, &endptr, 0);
1607 if (*endptr != '\0')
1608 return ERR_PTR(-EINVAL);
1609 return ubi_open_volume(dev, vol, mode);
1610 }
1611
1612 /* ubiX:NAME method */
1613 if ((*endptr == ':' || *endptr == '!') && endptr[1] != '\0')
1614 return ubi_open_volume_nm(dev, ++endptr, mode);
1615
1616 return ERR_PTR(-EINVAL);
1617}
1618
1619static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
1620{
1621 struct ubi_volume_desc *ubi = sb->s_fs_info;
1622 struct ubifs_info *c;
1623 struct inode *root;
1624 int err;
1625
1626 c = kzalloc(sizeof(struct ubifs_info), GFP_KERNEL);
1627 if (!c)
1628 return -ENOMEM;
1629
1630 spin_lock_init(&c->cnt_lock);
1631 spin_lock_init(&c->cs_lock);
1632 spin_lock_init(&c->buds_lock);
1633 spin_lock_init(&c->space_lock);
1634 spin_lock_init(&c->orphan_lock);
1635 init_rwsem(&c->commit_sem);
1636 mutex_init(&c->lp_mutex);
1637 mutex_init(&c->tnc_mutex);
1638 mutex_init(&c->log_mutex);
1639 mutex_init(&c->mst_mutex);
1640 mutex_init(&c->umount_mutex);
1641 init_waitqueue_head(&c->cmt_wq);
1642 c->buds = RB_ROOT;
1643 c->old_idx = RB_ROOT;
1644 c->size_tree = RB_ROOT;
1645 c->orph_tree = RB_ROOT;
1646 INIT_LIST_HEAD(&c->infos_list);
1647 INIT_LIST_HEAD(&c->idx_gc);
1648 INIT_LIST_HEAD(&c->replay_list);
1649 INIT_LIST_HEAD(&c->replay_buds);
1650 INIT_LIST_HEAD(&c->uncat_list);
1651 INIT_LIST_HEAD(&c->empty_list);
1652 INIT_LIST_HEAD(&c->freeable_list);
1653 INIT_LIST_HEAD(&c->frdi_idx_list);
1654 INIT_LIST_HEAD(&c->unclean_leb_list);
1655 INIT_LIST_HEAD(&c->old_buds);
1656 INIT_LIST_HEAD(&c->orph_list);
1657 INIT_LIST_HEAD(&c->orph_new);
1658
1659 c->highest_inum = UBIFS_FIRST_INO;
1660 get_random_bytes(&c->vfs_gen, sizeof(int));
1661 c->lhead_lnum = c->ltail_lnum = UBIFS_LOG_LNUM;
1662
1663 ubi_get_volume_info(ubi, &c->vi);
1664 ubi_get_device_info(c->vi.ubi_num, &c->di);
1665
1666 /* Re-open the UBI device in read-write mode */
1667 c->ubi = ubi_open_volume(c->vi.ubi_num, c->vi.vol_id, UBI_READWRITE);
1668 if (IS_ERR(c->ubi)) {
1669 err = PTR_ERR(c->ubi);
1670 goto out_free;
1671 }
1672
1673 /*
1674 * UBIFS provids 'backing_dev_info' in order to disable readahead. For
1675 * UBIFS, I/O is not deferred, it is done immediately in readpage,
1676 * which means the user would have to wait not just for their own I/O
1677 * but the readahead I/O as well i.e. completely pointless.
1678 *
1679 * Read-ahead will be disabled because @c->bdi.ra_pages is 0.
1680 */
1681 c->bdi.capabilities = BDI_CAP_MAP_COPY;
1682 c->bdi.unplug_io_fn = default_unplug_io_fn;
1683 err = bdi_init(&c->bdi);
1684 if (err)
1685 goto out_close;
1686
1687 err = ubifs_parse_options(c, data, 0);
1688 if (err)
1689 goto out_bdi;
1690
1691 c->vfs_sb = sb;
1692
1693 sb->s_fs_info = c;
1694 sb->s_magic = UBIFS_SUPER_MAGIC;
1695 sb->s_blocksize = UBIFS_BLOCK_SIZE;
1696 sb->s_blocksize_bits = UBIFS_BLOCK_SHIFT;
1697 sb->s_dev = c->vi.cdev;
1698 sb->s_maxbytes = c->max_inode_sz = key_max_inode_size(c);
1699 if (c->max_inode_sz > MAX_LFS_FILESIZE)
1700 sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
1701 sb->s_op = &ubifs_super_operations;
1702
1703 mutex_lock(&c->umount_mutex);
1704 err = mount_ubifs(c);
1705 if (err) {
1706 ubifs_assert(err < 0);
1707 goto out_unlock;
1708 }
1709
1710 /* Read the root inode */
1711 root = ubifs_iget(sb, UBIFS_ROOT_INO);
1712 if (IS_ERR(root)) {
1713 err = PTR_ERR(root);
1714 goto out_umount;
1715 }
1716
1717 sb->s_root = d_alloc_root(root);
1718 if (!sb->s_root)
1719 goto out_iput;
1720
1721 mutex_unlock(&c->umount_mutex);
1722
1723 return 0;
1724
1725out_iput:
1726 iput(root);
1727out_umount:
1728 ubifs_umount(c);
1729out_unlock:
1730 mutex_unlock(&c->umount_mutex);
1731out_bdi:
1732 bdi_destroy(&c->bdi);
1733out_close:
1734 ubi_close_volume(c->ubi);
1735out_free:
1736 kfree(c);
1737 return err;
1738}
1739
1740static int sb_test(struct super_block *sb, void *data)
1741{
1742 dev_t *dev = data;
1743
1744 return sb->s_dev == *dev;
1745}
1746
1747static int sb_set(struct super_block *sb, void *data)
1748{
1749 dev_t *dev = data;
1750
1751 sb->s_dev = *dev;
1752 return 0;
1753}
1754
1755static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
1756 const char *name, void *data, struct vfsmount *mnt)
1757{
1758 struct ubi_volume_desc *ubi;
1759 struct ubi_volume_info vi;
1760 struct super_block *sb;
1761 int err;
1762
1763 dbg_gen("name %s, flags %#x", name, flags);
1764
1765 /*
1766 * Get UBI device number and volume ID. Mount it read-only so far
1767 * because this might be a new mount point, and UBI allows only one
1768 * read-write user at a time.
1769 */
1770 ubi = open_ubi(name, UBI_READONLY);
1771 if (IS_ERR(ubi)) {
1772 ubifs_err("cannot open \"%s\", error %d",
1773 name, (int)PTR_ERR(ubi));
1774 return PTR_ERR(ubi);
1775 }
1776 ubi_get_volume_info(ubi, &vi);
1777
1778 dbg_gen("opened ubi%d_%d", vi.ubi_num, vi.vol_id);
1779
1780 sb = sget(fs_type, &sb_test, &sb_set, &vi.cdev);
1781 if (IS_ERR(sb)) {
1782 err = PTR_ERR(sb);
1783 goto out_close;
1784 }
1785
1786 if (sb->s_root) {
1787 /* A new mount point for already mounted UBIFS */
1788 dbg_gen("this ubi volume is already mounted");
1789 if ((flags ^ sb->s_flags) & MS_RDONLY) {
1790 err = -EBUSY;
1791 goto out_deact;
1792 }
1793 } else {
1794 sb->s_flags = flags;
1795 /*
1796 * Pass 'ubi' to 'fill_super()' in sb->s_fs_info where it is
1797 * replaced by 'c'.
1798 */
1799 sb->s_fs_info = ubi;
1800 err = ubifs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
1801 if (err)
1802 goto out_deact;
1803 /* We do not support atime */
1804 sb->s_flags |= MS_ACTIVE | MS_NOATIME;
1805 }
1806
1807 /* 'fill_super()' opens ubi again so we must close it here */
1808 ubi_close_volume(ubi);
1809
1810 return simple_set_mnt(mnt, sb);
1811
1812out_deact:
1813 up_write(&sb->s_umount);
1814 deactivate_super(sb);
1815out_close:
1816 ubi_close_volume(ubi);
1817 return err;
1818}
1819
1820static void ubifs_kill_sb(struct super_block *sb)
1821{
1822 struct ubifs_info *c = sb->s_fs_info;
1823
1824 /*
1825 * We do 'commit_on_unmount()' here instead of 'ubifs_put_super()'
1826 * in order to be outside BKL.
1827 */
1828 if (sb->s_root && !(sb->s_flags & MS_RDONLY))
1829 commit_on_unmount(c);
1830 /* The un-mount routine is actually done in put_super() */
1831 generic_shutdown_super(sb);
1832}
1833
1834static struct file_system_type ubifs_fs_type = {
1835 .name = "ubifs",
1836 .owner = THIS_MODULE,
1837 .get_sb = ubifs_get_sb,
1838 .kill_sb = ubifs_kill_sb
1839};
1840
1841/*
1842 * Inode slab cache constructor.
1843 */
1844static void inode_slab_ctor(struct kmem_cache *cachep, void *obj)
1845{
1846 struct ubifs_inode *ui = obj;
1847 inode_init_once(&ui->vfs_inode);
1848}
1849
1850static int __init ubifs_init(void)
1851{
1852 int err;
1853
1854 BUILD_BUG_ON(sizeof(struct ubifs_ch) != 24);
1855
1856 /* Make sure node sizes are 8-byte aligned */
1857 BUILD_BUG_ON(UBIFS_CH_SZ & 7);
1858 BUILD_BUG_ON(UBIFS_INO_NODE_SZ & 7);
1859 BUILD_BUG_ON(UBIFS_DENT_NODE_SZ & 7);
1860 BUILD_BUG_ON(UBIFS_XENT_NODE_SZ & 7);
1861 BUILD_BUG_ON(UBIFS_DATA_NODE_SZ & 7);
1862 BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ & 7);
1863 BUILD_BUG_ON(UBIFS_SB_NODE_SZ & 7);
1864 BUILD_BUG_ON(UBIFS_MST_NODE_SZ & 7);
1865 BUILD_BUG_ON(UBIFS_REF_NODE_SZ & 7);
1866 BUILD_BUG_ON(UBIFS_CS_NODE_SZ & 7);
1867 BUILD_BUG_ON(UBIFS_ORPH_NODE_SZ & 7);
1868
1869 BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ & 7);
1870 BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ & 7);
1871 BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ & 7);
1872 BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ & 7);
1873 BUILD_BUG_ON(UBIFS_MAX_NODE_SZ & 7);
1874 BUILD_BUG_ON(MIN_WRITE_SZ & 7);
1875
1876 /* Check min. node size */
1877 BUILD_BUG_ON(UBIFS_INO_NODE_SZ < MIN_WRITE_SZ);
1878 BUILD_BUG_ON(UBIFS_DENT_NODE_SZ < MIN_WRITE_SZ);
1879 BUILD_BUG_ON(UBIFS_XENT_NODE_SZ < MIN_WRITE_SZ);
1880 BUILD_BUG_ON(UBIFS_TRUN_NODE_SZ < MIN_WRITE_SZ);
1881
1882 BUILD_BUG_ON(UBIFS_MAX_DENT_NODE_SZ > UBIFS_MAX_NODE_SZ);
1883 BUILD_BUG_ON(UBIFS_MAX_XENT_NODE_SZ > UBIFS_MAX_NODE_SZ);
1884 BUILD_BUG_ON(UBIFS_MAX_DATA_NODE_SZ > UBIFS_MAX_NODE_SZ);
1885 BUILD_BUG_ON(UBIFS_MAX_INO_NODE_SZ > UBIFS_MAX_NODE_SZ);
1886
1887 /* Defined node sizes */
1888 BUILD_BUG_ON(UBIFS_SB_NODE_SZ != 4096);
1889 BUILD_BUG_ON(UBIFS_MST_NODE_SZ != 512);
1890 BUILD_BUG_ON(UBIFS_INO_NODE_SZ != 160);
1891 BUILD_BUG_ON(UBIFS_REF_NODE_SZ != 64);
1892
1893 /*
1894 * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
1895 * UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
1896 */
1897 if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
1898 ubifs_err("VFS page cache size is %u bytes, but UBIFS requires"
1899 " at least 4096 bytes",
1900 (unsigned int)PAGE_CACHE_SIZE);
1901 return -EINVAL;
1902 }
1903
1904 err = register_filesystem(&ubifs_fs_type);
1905 if (err) {
1906 ubifs_err("cannot register file system, error %d", err);
1907 return err;
1908 }
1909
1910 err = -ENOMEM;
1911 ubifs_inode_slab = kmem_cache_create("ubifs_inode_slab",
1912 sizeof(struct ubifs_inode), 0,
1913 SLAB_MEM_SPREAD | SLAB_RECLAIM_ACCOUNT,
1914 &inode_slab_ctor);
1915 if (!ubifs_inode_slab)
1916 goto out_reg;
1917
1918 register_shrinker(&ubifs_shrinker_info);
1919
1920 err = ubifs_compressors_init();
1921 if (err)
1922 goto out_compr;
1923
1924 return 0;
1925
1926out_compr:
1927 unregister_shrinker(&ubifs_shrinker_info);
1928 kmem_cache_destroy(ubifs_inode_slab);
1929out_reg:
1930 unregister_filesystem(&ubifs_fs_type);
1931 return err;
1932}
1933/* late_initcall to let compressors initialize first */
1934late_initcall(ubifs_init);
1935
1936static void __exit ubifs_exit(void)
1937{
1938 ubifs_assert(list_empty(&ubifs_infos));
1939 ubifs_assert(atomic_long_read(&ubifs_clean_zn_cnt) == 0);
1940
1941 ubifs_compressors_exit();
1942 unregister_shrinker(&ubifs_shrinker_info);
1943 kmem_cache_destroy(ubifs_inode_slab);
1944 unregister_filesystem(&ubifs_fs_type);
1945}
1946module_exit(ubifs_exit);
1947
1948MODULE_LICENSE("GPL");
1949MODULE_VERSION(__stringify(UBIFS_VERSION));
1950MODULE_AUTHOR("Artem Bityutskiy, Adrian Hunter");
1951MODULE_DESCRIPTION("UBIFS - UBI File System");
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
new file mode 100644
index 00000000000..e909f4a9644
--- /dev/null
+++ b/fs/ubifs/tnc.c
@@ -0,0 +1,2956 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file implements TNC (Tree Node Cache) which caches indexing nodes of
25 * the UBIFS B-tree.
26 *
27 * At the moment the locking rules of the TNC tree are quite simple and
28 * straightforward. We just have a mutex and lock it when we traverse the
29 * tree. If a znode is not in memory, we read it from flash while still having
30 * the mutex locked.
31 */
32
33#include <linux/crc32.h>
34#include "ubifs.h"
35
36/*
37 * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions.
38 * @NAME_LESS: name corresponding to the first argument is less than second
39 * @NAME_MATCHES: names match
40 * @NAME_GREATER: name corresponding to the second argument is greater than
41 * first
42 * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media
43 *
44 * These constants were introduce to improve readability.
45 */
46enum {
47 NAME_LESS = 0,
48 NAME_MATCHES = 1,
49 NAME_GREATER = 2,
50 NOT_ON_MEDIA = 3,
51};
52
53/**
54 * insert_old_idx - record an index node obsoleted since the last commit start.
55 * @c: UBIFS file-system description object
56 * @lnum: LEB number of obsoleted index node
57 * @offs: offset of obsoleted index node
58 *
59 * Returns %0 on success, and a negative error code on failure.
60 *
61 * For recovery, there must always be a complete intact version of the index on
62 * flash at all times. That is called the "old index". It is the index as at the
63 * time of the last successful commit. Many of the index nodes in the old index
64 * may be dirty, but they must not be erased until the next successful commit
65 * (at which point that index becomes the old index).
66 *
67 * That means that the garbage collection and the in-the-gaps method of
68 * committing must be able to determine if an index node is in the old index.
69 * Most of the old index nodes can be found by looking up the TNC using the
70 * 'lookup_znode()' function. However, some of the old index nodes may have
71 * been deleted from the current index or may have been changed so much that
72 * they cannot be easily found. In those cases, an entry is added to an RB-tree.
73 * That is what this function does. The RB-tree is ordered by LEB number and
74 * offset because they uniquely identify the old index node.
75 */
76static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
77{
78 struct ubifs_old_idx *old_idx, *o;
79 struct rb_node **p, *parent = NULL;
80
81 old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
82 if (unlikely(!old_idx))
83 return -ENOMEM;
84 old_idx->lnum = lnum;
85 old_idx->offs = offs;
86
87 p = &c->old_idx.rb_node;
88 while (*p) {
89 parent = *p;
90 o = rb_entry(parent, struct ubifs_old_idx, rb);
91 if (lnum < o->lnum)
92 p = &(*p)->rb_left;
93 else if (lnum > o->lnum)
94 p = &(*p)->rb_right;
95 else if (offs < o->offs)
96 p = &(*p)->rb_left;
97 else if (offs > o->offs)
98 p = &(*p)->rb_right;
99 else {
100 ubifs_err("old idx added twice!");
101 kfree(old_idx);
102 return 0;
103 }
104 }
105 rb_link_node(&old_idx->rb, parent, p);
106 rb_insert_color(&old_idx->rb, &c->old_idx);
107 return 0;
108}
109
110/**
111 * insert_old_idx_znode - record a znode obsoleted since last commit start.
112 * @c: UBIFS file-system description object
113 * @znode: znode of obsoleted index node
114 *
115 * Returns %0 on success, and a negative error code on failure.
116 */
117int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode)
118{
119 if (znode->parent) {
120 struct ubifs_zbranch *zbr;
121
122 zbr = &znode->parent->zbranch[znode->iip];
123 if (zbr->len)
124 return insert_old_idx(c, zbr->lnum, zbr->offs);
125 } else
126 if (c->zroot.len)
127 return insert_old_idx(c, c->zroot.lnum,
128 c->zroot.offs);
129 return 0;
130}
131
132/**
133 * ins_clr_old_idx_znode - record a znode obsoleted since last commit start.
134 * @c: UBIFS file-system description object
135 * @znode: znode of obsoleted index node
136 *
137 * Returns %0 on success, and a negative error code on failure.
138 */
139static int ins_clr_old_idx_znode(struct ubifs_info *c,
140 struct ubifs_znode *znode)
141{
142 int err;
143
144 if (znode->parent) {
145 struct ubifs_zbranch *zbr;
146
147 zbr = &znode->parent->zbranch[znode->iip];
148 if (zbr->len) {
149 err = insert_old_idx(c, zbr->lnum, zbr->offs);
150 if (err)
151 return err;
152 zbr->lnum = 0;
153 zbr->offs = 0;
154 zbr->len = 0;
155 }
156 } else
157 if (c->zroot.len) {
158 err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs);
159 if (err)
160 return err;
161 c->zroot.lnum = 0;
162 c->zroot.offs = 0;
163 c->zroot.len = 0;
164 }
165 return 0;
166}
167
168/**
169 * destroy_old_idx - destroy the old_idx RB-tree.
170 * @c: UBIFS file-system description object
171 *
172 * During start commit, the old_idx RB-tree is used to avoid overwriting index
173 * nodes that were in the index last commit but have since been deleted. This
174 * is necessary for recovery i.e. the old index must be kept intact until the
175 * new index is successfully written. The old-idx RB-tree is used for the
176 * in-the-gaps method of writing index nodes and is destroyed every commit.
177 */
178void destroy_old_idx(struct ubifs_info *c)
179{
180 struct rb_node *this = c->old_idx.rb_node;
181 struct ubifs_old_idx *old_idx;
182
183 while (this) {
184 if (this->rb_left) {
185 this = this->rb_left;
186 continue;
187 } else if (this->rb_right) {
188 this = this->rb_right;
189 continue;
190 }
191 old_idx = rb_entry(this, struct ubifs_old_idx, rb);
192 this = rb_parent(this);
193 if (this) {
194 if (this->rb_left == &old_idx->rb)
195 this->rb_left = NULL;
196 else
197 this->rb_right = NULL;
198 }
199 kfree(old_idx);
200 }
201 c->old_idx = RB_ROOT;
202}
203
204/**
205 * copy_znode - copy a dirty znode.
206 * @c: UBIFS file-system description object
207 * @znode: znode to copy
208 *
209 * A dirty znode being committed may not be changed, so it is copied.
210 */
211static struct ubifs_znode *copy_znode(struct ubifs_info *c,
212 struct ubifs_znode *znode)
213{
214 struct ubifs_znode *zn;
215
216 zn = kmalloc(c->max_znode_sz, GFP_NOFS);
217 if (unlikely(!zn))
218 return ERR_PTR(-ENOMEM);
219
220 memcpy(zn, znode, c->max_znode_sz);
221 zn->cnext = NULL;
222 __set_bit(DIRTY_ZNODE, &zn->flags);
223 __clear_bit(COW_ZNODE, &zn->flags);
224
225 ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
226 __set_bit(OBSOLETE_ZNODE, &znode->flags);
227
228 if (znode->level != 0) {
229 int i;
230 const int n = zn->child_cnt;
231
232 /* The children now have new parent */
233 for (i = 0; i < n; i++) {
234 struct ubifs_zbranch *zbr = &zn->zbranch[i];
235
236 if (zbr->znode)
237 zbr->znode->parent = zn;
238 }
239 }
240
241 atomic_long_inc(&c->dirty_zn_cnt);
242 return zn;
243}
244
245/**
246 * add_idx_dirt - add dirt due to a dirty znode.
247 * @c: UBIFS file-system description object
248 * @lnum: LEB number of index node
249 * @dirt: size of index node
250 *
251 * This function updates lprops dirty space and the new size of the index.
252 */
253static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt)
254{
255 c->calc_idx_sz -= ALIGN(dirt, 8);
256 return ubifs_add_dirt(c, lnum, dirt);
257}
258
259/**
260 * dirty_cow_znode - ensure a znode is not being committed.
261 * @c: UBIFS file-system description object
262 * @zbr: branch of znode to check
263 *
264 * Returns dirtied znode on success or negative error code on failure.
265 */
266static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
267 struct ubifs_zbranch *zbr)
268{
269 struct ubifs_znode *znode = zbr->znode;
270 struct ubifs_znode *zn;
271 int err;
272
273 if (!test_bit(COW_ZNODE, &znode->flags)) {
274 /* znode is not being committed */
275 if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
276 atomic_long_inc(&c->dirty_zn_cnt);
277 atomic_long_dec(&c->clean_zn_cnt);
278 atomic_long_dec(&ubifs_clean_zn_cnt);
279 err = add_idx_dirt(c, zbr->lnum, zbr->len);
280 if (unlikely(err))
281 return ERR_PTR(err);
282 }
283 return znode;
284 }
285
286 zn = copy_znode(c, znode);
287 if (unlikely(IS_ERR(zn)))
288 return zn;
289
290 if (zbr->len) {
291 err = insert_old_idx(c, zbr->lnum, zbr->offs);
292 if (unlikely(err))
293 return ERR_PTR(err);
294 err = add_idx_dirt(c, zbr->lnum, zbr->len);
295 } else
296 err = 0;
297
298 zbr->znode = zn;
299 zbr->lnum = 0;
300 zbr->offs = 0;
301 zbr->len = 0;
302
303 if (unlikely(err))
304 return ERR_PTR(err);
305 return zn;
306}
307
308/**
309 * lnc_add - add a leaf node to the leaf node cache.
310 * @c: UBIFS file-system description object
311 * @zbr: zbranch of leaf node
312 * @node: leaf node
313 *
314 * Leaf nodes are non-index nodes directory entry nodes or data nodes. The
315 * purpose of the leaf node cache is to save re-reading the same leaf node over
316 * and over again. Most things are cached by VFS, however the file system must
317 * cache directory entries for readdir and for resolving hash collisions. The
318 * present implementation of the leaf node cache is extremely simple, and
319 * allows for error returns that are not used but that may be needed if a more
320 * complex implementation is created.
321 *
322 * Note, this function does not add the @node object to LNC directly, but
323 * allocates a copy of the object and adds the copy to LNC. The reason for this
324 * is that @node has been allocated outside of the TNC subsystem and will be
325 * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC
326 * may be changed at any time, e.g. freed by the shrinker.
327 */
328static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
329 const void *node)
330{
331 int err;
332 void *lnc_node;
333 const struct ubifs_dent_node *dent = node;
334
335 ubifs_assert(!zbr->leaf);
336 ubifs_assert(zbr->len != 0);
337 ubifs_assert(is_hash_key(c, &zbr->key));
338
339 err = ubifs_validate_entry(c, dent);
340 if (err) {
341 dbg_dump_stack();
342 dbg_dump_node(c, dent);
343 return err;
344 }
345
346 lnc_node = kmalloc(zbr->len, GFP_NOFS);
347 if (!lnc_node)
348 /* We don't have to have the cache, so no error */
349 return 0;
350
351 memcpy(lnc_node, node, zbr->len);
352 zbr->leaf = lnc_node;
353 return 0;
354}
355
356 /**
357 * lnc_add_directly - add a leaf node to the leaf-node-cache.
358 * @c: UBIFS file-system description object
359 * @zbr: zbranch of leaf node
360 * @node: leaf node
361 *
362 * This function is similar to 'lnc_add()', but it does not create a copy of
363 * @node but inserts @node to TNC directly.
364 */
365static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
366 void *node)
367{
368 int err;
369
370 ubifs_assert(!zbr->leaf);
371 ubifs_assert(zbr->len != 0);
372
373 err = ubifs_validate_entry(c, node);
374 if (err) {
375 dbg_dump_stack();
376 dbg_dump_node(c, node);
377 return err;
378 }
379
380 zbr->leaf = node;
381 return 0;
382}
383
384/**
385 * lnc_free - remove a leaf node from the leaf node cache.
386 * @zbr: zbranch of leaf node
387 * @node: leaf node
388 */
389static void lnc_free(struct ubifs_zbranch *zbr)
390{
391 if (!zbr->leaf)
392 return;
393 kfree(zbr->leaf);
394 zbr->leaf = NULL;
395}
396
397/**
398 * tnc_read_node_nm - read a "hashed" leaf node.
399 * @c: UBIFS file-system description object
400 * @zbr: key and position of the node
401 * @node: node is returned here
402 *
403 * This function reads a "hashed" node defined by @zbr from the leaf node cache
404 * (in it is there) or from the hash media, in which case the node is also
405 * added to LNC. Returns zero in case of success or a negative negative error
406 * code in case of failure.
407 */
408static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
409 void *node)
410{
411 int err;
412
413 ubifs_assert(is_hash_key(c, &zbr->key));
414
415 if (zbr->leaf) {
416 /* Read from the leaf node cache */
417 ubifs_assert(zbr->len != 0);
418 memcpy(node, zbr->leaf, zbr->len);
419 return 0;
420 }
421
422 err = ubifs_tnc_read_node(c, zbr, node);
423 if (err)
424 return err;
425
426 /* Add the node to the leaf node cache */
427 err = lnc_add(c, zbr, node);
428 return err;
429}
430
431/**
432 * try_read_node - read a node if it is a node.
433 * @c: UBIFS file-system description object
434 * @buf: buffer to read to
435 * @type: node type
436 * @len: node length (not aligned)
437 * @lnum: LEB number of node to read
438 * @offs: offset of node to read
439 *
440 * This function tries to read a node of known type and length, checks it and
441 * stores it in @buf. This function returns %1 if a node is present and %0 if
442 * a node is not present. A negative error code is returned for I/O errors.
443 * This function performs that same function as ubifs_read_node except that
444 * it does not require that there is actually a node present and instead
445 * the return code indicates if a node was read.
446 */
447static int try_read_node(const struct ubifs_info *c, void *buf, int type,
448 int len, int lnum, int offs)
449{
450 int err, node_len;
451 struct ubifs_ch *ch = buf;
452 uint32_t crc, node_crc;
453
454 dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
455
456 err = ubi_read(c->ubi, lnum, buf, offs, len);
457 if (err) {
458 ubifs_err("cannot read node type %d from LEB %d:%d, error %d",
459 type, lnum, offs, err);
460 return err;
461 }
462
463 if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
464 return 0;
465
466 if (ch->node_type != type)
467 return 0;
468
469 node_len = le32_to_cpu(ch->len);
470 if (node_len != len)
471 return 0;
472
473 crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
474 node_crc = le32_to_cpu(ch->crc);
475 if (crc != node_crc)
476 return 0;
477
478 return 1;
479}
480
481/**
482 * fallible_read_node - try to read a leaf node.
483 * @c: UBIFS file-system description object
484 * @key: key of node to read
485 * @zbr: position of node
486 * @node: node returned
487 *
488 * This function tries to read a node and returns %1 if the node is read, %0
489 * if the node is not present, and a negative error code in the case of error.
490 */
491static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
492 struct ubifs_zbranch *zbr, void *node)
493{
494 int ret;
495
496 dbg_tnc("LEB %d:%d, key %s", zbr->lnum, zbr->offs, DBGKEY(key));
497
498 ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
499 zbr->offs);
500 if (ret == 1) {
501 union ubifs_key node_key;
502 struct ubifs_dent_node *dent = node;
503
504 /* All nodes have key in the same place */
505 key_read(c, &dent->key, &node_key);
506 if (keys_cmp(c, key, &node_key) != 0)
507 ret = 0;
508 }
509 if (ret == 0)
510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
512 return ret;
513}
514
515/**
516 * matches_name - determine if a direntry or xattr entry matches a given name.
517 * @c: UBIFS file-system description object
518 * @zbr: zbranch of dent
519 * @nm: name to match
520 *
521 * This function checks if xentry/direntry referred by zbranch @zbr matches name
522 * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by
523 * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case
524 * of failure, a negative error code is returned.
525 */
526static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr,
527 const struct qstr *nm)
528{
529 struct ubifs_dent_node *dent;
530 int nlen, err;
531
532 /* If possible, match against the dent in the leaf node cache */
533 if (!zbr->leaf) {
534 dent = kmalloc(zbr->len, GFP_NOFS);
535 if (!dent)
536 return -ENOMEM;
537
538 err = ubifs_tnc_read_node(c, zbr, dent);
539 if (err)
540 goto out_free;
541
542 /* Add the node to the leaf node cache */
543 err = lnc_add_directly(c, zbr, dent);
544 if (err)
545 goto out_free;
546 } else
547 dent = zbr->leaf;
548
549 nlen = le16_to_cpu(dent->nlen);
550 err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
551 if (err == 0) {
552 if (nlen == nm->len)
553 return NAME_MATCHES;
554 else if (nlen < nm->len)
555 return NAME_LESS;
556 else
557 return NAME_GREATER;
558 } else if (err < 0)
559 return NAME_LESS;
560 else
561 return NAME_GREATER;
562
563out_free:
564 kfree(dent);
565 return err;
566}
567
568/**
569 * get_znode - get a TNC znode that may not be loaded yet.
570 * @c: UBIFS file-system description object
571 * @znode: parent znode
572 * @n: znode branch slot number
573 *
574 * This function returns the znode or a negative error code.
575 */
576static struct ubifs_znode *get_znode(struct ubifs_info *c,
577 struct ubifs_znode *znode, int n)
578{
579 struct ubifs_zbranch *zbr;
580
581 zbr = &znode->zbranch[n];
582 if (zbr->znode)
583 znode = zbr->znode;
584 else
585 znode = ubifs_load_znode(c, zbr, znode, n);
586 return znode;
587}
588
589/**
590 * tnc_next - find next TNC entry.
591 * @c: UBIFS file-system description object
592 * @zn: znode is passed and returned here
593 * @n: znode branch slot number is passed and returned here
594 *
595 * This function returns %0 if the next TNC entry is found, %-ENOENT if there is
596 * no next entry, or a negative error code otherwise.
597 */
598static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
599{
600 struct ubifs_znode *znode = *zn;
601 int nn = *n;
602
603 nn += 1;
604 if (nn < znode->child_cnt) {
605 *n = nn;
606 return 0;
607 }
608 while (1) {
609 struct ubifs_znode *zp;
610
611 zp = znode->parent;
612 if (!zp)
613 return -ENOENT;
614 nn = znode->iip + 1;
615 znode = zp;
616 if (nn < znode->child_cnt) {
617 znode = get_znode(c, znode, nn);
618 if (IS_ERR(znode))
619 return PTR_ERR(znode);
620 while (znode->level != 0) {
621 znode = get_znode(c, znode, 0);
622 if (IS_ERR(znode))
623 return PTR_ERR(znode);
624 }
625 nn = 0;
626 break;
627 }
628 }
629 *zn = znode;
630 *n = nn;
631 return 0;
632}
633
634/**
635 * tnc_prev - find previous TNC entry.
636 * @c: UBIFS file-system description object
637 * @zn: znode is returned here
638 * @n: znode branch slot number is passed and returned here
639 *
640 * This function returns %0 if the previous TNC entry is found, %-ENOENT if
641 * there is no next entry, or a negative error code otherwise.
642 */
643static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
644{
645 struct ubifs_znode *znode = *zn;
646 int nn = *n;
647
648 if (nn > 0) {
649 *n = nn - 1;
650 return 0;
651 }
652 while (1) {
653 struct ubifs_znode *zp;
654
655 zp = znode->parent;
656 if (!zp)
657 return -ENOENT;
658 nn = znode->iip - 1;
659 znode = zp;
660 if (nn >= 0) {
661 znode = get_znode(c, znode, nn);
662 if (IS_ERR(znode))
663 return PTR_ERR(znode);
664 while (znode->level != 0) {
665 nn = znode->child_cnt - 1;
666 znode = get_znode(c, znode, nn);
667 if (IS_ERR(znode))
668 return PTR_ERR(znode);
669 }
670 nn = znode->child_cnt - 1;
671 break;
672 }
673 }
674 *zn = znode;
675 *n = nn;
676 return 0;
677}
678
679/**
680 * resolve_collision - resolve a collision.
681 * @c: UBIFS file-system description object
682 * @key: key of a directory or extended attribute entry
683 * @zn: znode is returned here
684 * @n: zbranch number is passed and returned here
685 * @nm: name of the entry
686 *
687 * This function is called for "hashed" keys to make sure that the found key
688 * really corresponds to the looked up node (directory or extended attribute
689 * entry). It returns %1 and sets @zn and @n if the collision is resolved.
690 * %0 is returned if @nm is not found and @zn and @n are set to the previous
691 * entry, i.e. to the entry after which @nm could follow if it were in TNC.
692 * This means that @n may be set to %-1 if the leftmost key in @zn is the
693 * previous one. A negative error code is returned on failures.
694 */
695static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
696 struct ubifs_znode **zn, int *n,
697 const struct qstr *nm)
698{
699 int err;
700
701 err = matches_name(c, &(*zn)->zbranch[*n], nm);
702 if (unlikely(err < 0))
703 return err;
704 if (err == NAME_MATCHES)
705 return 1;
706
707 if (err == NAME_GREATER) {
708 /* Look left */
709 while (1) {
710 err = tnc_prev(c, zn, n);
711 if (err == -ENOENT) {
712 ubifs_assert(*n == 0);
713 *n = -1;
714 return 0;
715 }
716 if (err < 0)
717 return err;
718 if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
719 /*
720 * We have found the branch after which we would
721 * like to insert, but inserting in this znode
722 * may still be wrong. Consider the following 3
723 * znodes, in the case where we are resolving a
724 * collision with Key2.
725 *
726 * znode zp
727 * ----------------------
728 * level 1 | Key0 | Key1 |
729 * -----------------------
730 * | |
731 * znode za | | znode zb
732 * ------------ ------------
733 * level 0 | Key0 | | Key2 |
734 * ------------ ------------
735 *
736 * The lookup finds Key2 in znode zb. Lets say
737 * there is no match and the name is greater so
738 * we look left. When we find Key0, we end up
739 * here. If we return now, we will insert into
740 * znode za at slot n = 1. But that is invalid
741 * according to the parent's keys. Key2 must
742 * be inserted into znode zb.
743 *
744 * Note, this problem is not relevant for the
745 * case when we go right, because
746 * 'tnc_insert()' would correct the parent key.
747 */
748 if (*n == (*zn)->child_cnt - 1) {
749 err = tnc_next(c, zn, n);
750 if (err) {
751 /* Should be impossible */
752 ubifs_assert(0);
753 if (err == -ENOENT)
754 err = -EINVAL;
755 return err;
756 }
757 ubifs_assert(*n == 0);
758 *n = -1;
759 }
760 return 0;
761 }
762 err = matches_name(c, &(*zn)->zbranch[*n], nm);
763 if (err < 0)
764 return err;
765 if (err == NAME_LESS)
766 return 0;
767 if (err == NAME_MATCHES)
768 return 1;
769 ubifs_assert(err == NAME_GREATER);
770 }
771 } else {
772 int nn = *n;
773 struct ubifs_znode *znode = *zn;
774
775 /* Look right */
776 while (1) {
777 err = tnc_next(c, &znode, &nn);
778 if (err == -ENOENT)
779 return 0;
780 if (err < 0)
781 return err;
782 if (keys_cmp(c, &znode->zbranch[nn].key, key))
783 return 0;
784 err = matches_name(c, &znode->zbranch[nn], nm);
785 if (err < 0)
786 return err;
787 if (err == NAME_GREATER)
788 return 0;
789 *zn = znode;
790 *n = nn;
791 if (err == NAME_MATCHES)
792 return 1;
793 ubifs_assert(err == NAME_LESS);
794 }
795 }
796}
797
798/**
799 * fallible_matches_name - determine if a dent matches a given name.
800 * @c: UBIFS file-system description object
801 * @zbr: zbranch of dent
802 * @nm: name to match
803 *
804 * This is a "fallible" version of 'matches_name()' function which does not
805 * panic if the direntry/xentry referred by @zbr does not exist on the media.
806 *
807 * This function checks if xentry/direntry referred by zbranch @zbr matches name
808 * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr
809 * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA
810 * if xentry/direntry referred by @zbr does not exist on the media. A negative
811 * error code is returned in case of failure.
812 */
813static int fallible_matches_name(struct ubifs_info *c,
814 struct ubifs_zbranch *zbr,
815 const struct qstr *nm)
816{
817 struct ubifs_dent_node *dent;
818 int nlen, err;
819
820 /* If possible, match against the dent in the leaf node cache */
821 if (!zbr->leaf) {
822 dent = kmalloc(zbr->len, GFP_NOFS);
823 if (!dent)
824 return -ENOMEM;
825
826 err = fallible_read_node(c, &zbr->key, zbr, dent);
827 if (err < 0)
828 goto out_free;
829 if (err == 0) {
830 /* The node was not present */
831 err = NOT_ON_MEDIA;
832 goto out_free;
833 }
834 ubifs_assert(err == 1);
835
836 err = lnc_add_directly(c, zbr, dent);
837 if (err)
838 goto out_free;
839 } else
840 dent = zbr->leaf;
841
842 nlen = le16_to_cpu(dent->nlen);
843 err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
844 if (err == 0) {
845 if (nlen == nm->len)
846 return NAME_MATCHES;
847 else if (nlen < nm->len)
848 return NAME_LESS;
849 else
850 return NAME_GREATER;
851 } else if (err < 0)
852 return NAME_LESS;
853 else
854 return NAME_GREATER;
855
856out_free:
857 kfree(dent);
858 return err;
859}
860
861/**
862 * fallible_resolve_collision - resolve a collision even if nodes are missing.
863 * @c: UBIFS file-system description object
864 * @key: key
865 * @zn: znode is returned here
866 * @n: branch number is passed and returned here
867 * @nm: name of directory entry
868 * @adding: indicates caller is adding a key to the TNC
869 *
870 * This is a "fallible" version of the 'resolve_collision()' function which
871 * does not panic if one of the nodes referred to by TNC does not exist on the
872 * media. This may happen when replaying the journal if a deleted node was
873 * Garbage-collected and the commit was not done. A branch that refers to a node
874 * that is not present is called a dangling branch. The following are the return
875 * codes for this function:
876 * o if @nm was found, %1 is returned and @zn and @n are set to the found
877 * branch;
878 * o if we are @adding and @nm was not found, %0 is returned;
879 * o if we are not @adding and @nm was not found, but a dangling branch was
880 * found, then %1 is returned and @zn and @n are set to the dangling branch;
881 * o a negative error code is returned in case of failure.
882 */
883static int fallible_resolve_collision(struct ubifs_info *c,
884 const union ubifs_key *key,
885 struct ubifs_znode **zn, int *n,
886 const struct qstr *nm, int adding)
887{
888 struct ubifs_znode *o_znode = NULL, *znode = *zn;
889 int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n;
890
891 cmp = fallible_matches_name(c, &znode->zbranch[nn], nm);
892 if (unlikely(cmp < 0))
893 return cmp;
894 if (cmp == NAME_MATCHES)
895 return 1;
896 if (cmp == NOT_ON_MEDIA) {
897 o_znode = znode;
898 o_n = nn;
899 /*
900 * We are unlucky and hit a dangling branch straight away.
901 * Now we do not really know where to go to find the needed
902 * branch - to the left or to the right. Well, let's try left.
903 */
904 unsure = 1;
905 } else if (!adding)
906 unsure = 1; /* Remove a dangling branch wherever it is */
907
908 if (cmp == NAME_GREATER || unsure) {
909 /* Look left */
910 while (1) {
911 err = tnc_prev(c, zn, n);
912 if (err == -ENOENT) {
913 ubifs_assert(*n == 0);
914 *n = -1;
915 break;
916 }
917 if (err < 0)
918 return err;
919 if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
920 /* See comments in 'resolve_collision()' */
921 if (*n == (*zn)->child_cnt - 1) {
922 err = tnc_next(c, zn, n);
923 if (err) {
924 /* Should be impossible */
925 ubifs_assert(0);
926 if (err == -ENOENT)
927 err = -EINVAL;
928 return err;
929 }
930 ubifs_assert(*n == 0);
931 *n = -1;
932 }
933 break;
934 }
935 err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm);
936 if (err < 0)
937 return err;
938 if (err == NAME_MATCHES)
939 return 1;
940 if (err == NOT_ON_MEDIA) {
941 o_znode = *zn;
942 o_n = *n;
943 continue;
944 }
945 if (!adding)
946 continue;
947 if (err == NAME_LESS)
948 break;
949 else
950 unsure = 0;
951 }
952 }
953
954 if (cmp == NAME_LESS || unsure) {
955 /* Look right */
956 *zn = znode;
957 *n = nn;
958 while (1) {
959 err = tnc_next(c, &znode, &nn);
960 if (err == -ENOENT)
961 break;
962 if (err < 0)
963 return err;
964 if (keys_cmp(c, &znode->zbranch[nn].key, key))
965 break;
966 err = fallible_matches_name(c, &znode->zbranch[nn], nm);
967 if (err < 0)
968 return err;
969 if (err == NAME_GREATER)
970 break;
971 *zn = znode;
972 *n = nn;
973 if (err == NAME_MATCHES)
974 return 1;
975 if (err == NOT_ON_MEDIA) {
976 o_znode = znode;
977 o_n = nn;
978 }
979 }
980 }
981
982 /* Never match a dangling branch when adding */
983 if (adding || !o_znode)
984 return 0;
985
986 dbg_mnt("dangling match LEB %d:%d len %d %s",
987 o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs,
988 o_znode->zbranch[o_n].len, DBGKEY(key));
989 *zn = o_znode;
990 *n = o_n;
991 return 1;
992}
993
994/**
995 * matches_position - determine if a zbranch matches a given position.
996 * @zbr: zbranch of dent
997 * @lnum: LEB number of dent to match
998 * @offs: offset of dent to match
999 *
1000 * This function returns %1 if @lnum:@offs matches, and %0 otherwise.
1001 */
1002static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs)
1003{
1004 if (zbr->lnum == lnum && zbr->offs == offs)
1005 return 1;
1006 else
1007 return 0;
1008}
1009
1010/**
1011 * resolve_collision_directly - resolve a collision directly.
1012 * @c: UBIFS file-system description object
1013 * @key: key of directory entry
1014 * @zn: znode is passed and returned here
1015 * @n: zbranch number is passed and returned here
1016 * @lnum: LEB number of dent node to match
1017 * @offs: offset of dent node to match
1018 *
1019 * This function is used for "hashed" keys to make sure the found directory or
1020 * extended attribute entry node is what was looked for. It is used when the
1021 * flash address of the right node is known (@lnum:@offs) which makes it much
1022 * easier to resolve collisions (no need to read entries and match full
1023 * names). This function returns %1 and sets @zn and @n if the collision is
1024 * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the
1025 * previous directory entry. Otherwise a negative error code is returned.
1026 */
1027static int resolve_collision_directly(struct ubifs_info *c,
1028 const union ubifs_key *key,
1029 struct ubifs_znode **zn, int *n,
1030 int lnum, int offs)
1031{
1032 struct ubifs_znode *znode;
1033 int nn, err;
1034
1035 znode = *zn;
1036 nn = *n;
1037 if (matches_position(&znode->zbranch[nn], lnum, offs))
1038 return 1;
1039
1040 /* Look left */
1041 while (1) {
1042 err = tnc_prev(c, &znode, &nn);
1043 if (err == -ENOENT)
1044 break;
1045 if (err < 0)
1046 return err;
1047 if (keys_cmp(c, &znode->zbranch[nn].key, key))
1048 break;
1049 if (matches_position(&znode->zbranch[nn], lnum, offs)) {
1050 *zn = znode;
1051 *n = nn;
1052 return 1;
1053 }
1054 }
1055
1056 /* Look right */
1057 znode = *zn;
1058 nn = *n;
1059 while (1) {
1060 err = tnc_next(c, &znode, &nn);
1061 if (err == -ENOENT)
1062 return 0;
1063 if (err < 0)
1064 return err;
1065 if (keys_cmp(c, &znode->zbranch[nn].key, key))
1066 return 0;
1067 *zn = znode;
1068 *n = nn;
1069 if (matches_position(&znode->zbranch[nn], lnum, offs))
1070 return 1;
1071 }
1072}
1073
1074/**
1075 * dirty_cow_bottom_up - dirty a znode and its ancestors.
1076 * @c: UBIFS file-system description object
1077 * @znode: znode to dirty
1078 *
1079 * If we do not have a unique key that resides in a znode, then we cannot
1080 * dirty that znode from the top down (i.e. by using lookup_level0_dirty)
1081 * This function records the path back to the last dirty ancestor, and then
1082 * dirties the znodes on that path.
1083 */
1084static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
1085 struct ubifs_znode *znode)
1086{
1087 struct ubifs_znode *zp;
1088 int *path = c->bottom_up_buf, p = 0;
1089
1090 ubifs_assert(c->zroot.znode);
1091 ubifs_assert(znode);
1092 if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) {
1093 kfree(c->bottom_up_buf);
1094 c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int),
1095 GFP_NOFS);
1096 if (!c->bottom_up_buf)
1097 return ERR_PTR(-ENOMEM);
1098 path = c->bottom_up_buf;
1099 }
1100 if (c->zroot.znode->level) {
1101 /* Go up until parent is dirty */
1102 while (1) {
1103 int n;
1104
1105 zp = znode->parent;
1106 if (!zp)
1107 break;
1108 n = znode->iip;
1109 ubifs_assert(p < c->zroot.znode->level);
1110 path[p++] = n;
1111 if (!zp->cnext && ubifs_zn_dirty(znode))
1112 break;
1113 znode = zp;
1114 }
1115 }
1116
1117 /* Come back down, dirtying as we go */
1118 while (1) {
1119 struct ubifs_zbranch *zbr;
1120
1121 zp = znode->parent;
1122 if (zp) {
1123 ubifs_assert(path[p - 1] >= 0);
1124 ubifs_assert(path[p - 1] < zp->child_cnt);
1125 zbr = &zp->zbranch[path[--p]];
1126 znode = dirty_cow_znode(c, zbr);
1127 } else {
1128 ubifs_assert(znode == c->zroot.znode);
1129 znode = dirty_cow_znode(c, &c->zroot);
1130 }
1131 if (unlikely(IS_ERR(znode)) || !p)
1132 break;
1133 ubifs_assert(path[p - 1] >= 0);
1134 ubifs_assert(path[p - 1] < znode->child_cnt);
1135 znode = znode->zbranch[path[p - 1]].znode;
1136 }
1137
1138 return znode;
1139}
1140
1141/**
1142 * ubifs_lookup_level0 - search for zero-level znode.
1143 * @c: UBIFS file-system description object
1144 * @key: key to lookup
1145 * @zn: znode is returned here
1146 * @n: znode branch slot number is returned here
1147 *
1148 * This function looks up the TNC tree and search for zero-level znode which
1149 * refers key @key. The found zero-level znode is returned in @zn. There are 3
1150 * cases:
1151 * o exact match, i.e. the found zero-level znode contains key @key, then %1
1152 * is returned and slot number of the matched branch is stored in @n;
1153 * o not exact match, which means that zero-level znode does not contain
1154 * @key, then %0 is returned and slot number of the closed branch is stored
1155 * in @n;
1156 * o @key is so small that it is even less than the lowest key of the
1157 * leftmost zero-level node, then %0 is returned and %0 is stored in @n.
1158 *
1159 * Note, when the TNC tree is traversed, some znodes may be absent, then this
1160 * function reads corresponding indexing nodes and inserts them to TNC. In
1161 * case of failure, a negative error code is returned.
1162 */
1163int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1164 struct ubifs_znode **zn, int *n)
1165{
1166 int err, exact;
1167 struct ubifs_znode *znode;
1168 unsigned long time = get_seconds();
1169
1170 dbg_tnc("search key %s", DBGKEY(key));
1171
1172 znode = c->zroot.znode;
1173 if (unlikely(!znode)) {
1174 znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
1175 if (IS_ERR(znode))
1176 return PTR_ERR(znode);
1177 }
1178
1179 znode->time = time;
1180
1181 while (1) {
1182 struct ubifs_zbranch *zbr;
1183
1184 exact = ubifs_search_zbranch(c, znode, key, n);
1185
1186 if (znode->level == 0)
1187 break;
1188
1189 if (*n < 0)
1190 *n = 0;
1191 zbr = &znode->zbranch[*n];
1192
1193 if (zbr->znode) {
1194 znode->time = time;
1195 znode = zbr->znode;
1196 continue;
1197 }
1198
1199 /* znode is not in TNC cache, load it from the media */
1200 znode = ubifs_load_znode(c, zbr, znode, *n);
1201 if (IS_ERR(znode))
1202 return PTR_ERR(znode);
1203 }
1204
1205 *zn = znode;
1206 if (exact || !is_hash_key(c, key) || *n != -1) {
1207 dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n);
1208 return exact;
1209 }
1210
1211 /*
1212 * Here is a tricky place. We have not found the key and this is a
1213 * "hashed" key, which may collide. The rest of the code deals with
1214 * situations like this:
1215 *
1216 * | 3 | 5 |
1217 * / \
1218 * | 3 | 5 | | 6 | 7 | (x)
1219 *
1220 * Or more a complex example:
1221 *
1222 * | 1 | 5 |
1223 * / \
1224 * | 1 | 3 | | 5 | 8 |
1225 * \ /
1226 * | 5 | 5 | | 6 | 7 | (x)
1227 *
1228 * In the examples, if we are looking for key "5", we may reach nodes
1229 * marked with "(x)". In this case what we have do is to look at the
1230 * left and see if there is "5" key there. If there is, we have to
1231 * return it.
1232 *
1233 * Note, this whole situation is possible because we allow to have
1234 * elements which are equivalent to the next key in the parent in the
1235 * children of current znode. For example, this happens if we split a
1236 * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something
1237 * like this:
1238 * | 3 | 5 |
1239 * / \
1240 * | 3 | 5 | | 5 | 6 | 7 |
1241 * ^
1242 * And this becomes what is at the first "picture" after key "5" marked
1243 * with "^" is removed. What could be done is we could prohibit
1244 * splitting in the middle of the colliding sequence. Also, when
1245 * removing the leftmost key, we would have to correct the key of the
1246 * parent node, which would introduce additional complications. Namely,
1247 * if we changed the the leftmost key of the parent znode, the garbage
1248 * collector would be unable to find it (GC is doing this when GC'ing
1249 * indexing LEBs). Although we already have an additional RB-tree where
1250 * we save such changed znodes (see 'ins_clr_old_idx_znode()') until
1251 * after the commit. But anyway, this does not look easy to implement
1252 * so we did not try this.
1253 */
1254 err = tnc_prev(c, &znode, n);
1255 if (err == -ENOENT) {
1256 dbg_tnc("found 0, lvl %d, n -1", znode->level);
1257 *n = -1;
1258 return 0;
1259 }
1260 if (unlikely(err < 0))
1261 return err;
1262 if (keys_cmp(c, key, &znode->zbranch[*n].key)) {
1263 dbg_tnc("found 0, lvl %d, n -1", znode->level);
1264 *n = -1;
1265 return 0;
1266 }
1267
1268 dbg_tnc("found 1, lvl %d, n %d", znode->level, *n);
1269 *zn = znode;
1270 return 1;
1271}
1272
1273/**
1274 * lookup_level0_dirty - search for zero-level znode dirtying.
1275 * @c: UBIFS file-system description object
1276 * @key: key to lookup
1277 * @zn: znode is returned here
1278 * @n: znode branch slot number is returned here
1279 *
1280 * This function looks up the TNC tree and search for zero-level znode which
1281 * refers key @key. The found zero-level znode is returned in @zn. There are 3
1282 * cases:
1283 * o exact match, i.e. the found zero-level znode contains key @key, then %1
1284 * is returned and slot number of the matched branch is stored in @n;
1285 * o not exact match, which means that zero-level znode does not contain @key
1286 * then %0 is returned and slot number of the closed branch is stored in
1287 * @n;
1288 * o @key is so small that it is even less than the lowest key of the
1289 * leftmost zero-level node, then %0 is returned and %-1 is stored in @n.
1290 *
1291 * Additionally all znodes in the path from the root to the located zero-level
1292 * znode are marked as dirty.
1293 *
1294 * Note, when the TNC tree is traversed, some znodes may be absent, then this
1295 * function reads corresponding indexing nodes and inserts them to TNC. In
1296 * case of failure, a negative error code is returned.
1297 */
1298static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1299 struct ubifs_znode **zn, int *n)
1300{
1301 int err, exact;
1302 struct ubifs_znode *znode;
1303 unsigned long time = get_seconds();
1304
1305 dbg_tnc("search and dirty key %s", DBGKEY(key));
1306
1307 znode = c->zroot.znode;
1308 if (unlikely(!znode)) {
1309 znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
1310 if (IS_ERR(znode))
1311 return PTR_ERR(znode);
1312 }
1313
1314 znode = dirty_cow_znode(c, &c->zroot);
1315 if (IS_ERR(znode))
1316 return PTR_ERR(znode);
1317
1318 znode->time = time;
1319
1320 while (1) {
1321 struct ubifs_zbranch *zbr;
1322
1323 exact = ubifs_search_zbranch(c, znode, key, n);
1324
1325 if (znode->level == 0)
1326 break;
1327
1328 if (*n < 0)
1329 *n = 0;
1330 zbr = &znode->zbranch[*n];
1331
1332 if (zbr->znode) {
1333 znode->time = time;
1334 znode = dirty_cow_znode(c, zbr);
1335 if (IS_ERR(znode))
1336 return PTR_ERR(znode);
1337 continue;
1338 }
1339
1340 /* znode is not in TNC cache, load it from the media */
1341 znode = ubifs_load_znode(c, zbr, znode, *n);
1342 if (IS_ERR(znode))
1343 return PTR_ERR(znode);
1344 znode = dirty_cow_znode(c, zbr);
1345 if (IS_ERR(znode))
1346 return PTR_ERR(znode);
1347 }
1348
1349 *zn = znode;
1350 if (exact || !is_hash_key(c, key) || *n != -1) {
1351 dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n);
1352 return exact;
1353 }
1354
1355 /*
1356 * See huge comment at 'lookup_level0_dirty()' what is the rest of the
1357 * code.
1358 */
1359 err = tnc_prev(c, &znode, n);
1360 if (err == -ENOENT) {
1361 *n = -1;
1362 dbg_tnc("found 0, lvl %d, n -1", znode->level);
1363 return 0;
1364 }
1365 if (unlikely(err < 0))
1366 return err;
1367 if (keys_cmp(c, key, &znode->zbranch[*n].key)) {
1368 *n = -1;
1369 dbg_tnc("found 0, lvl %d, n -1", znode->level);
1370 return 0;
1371 }
1372
1373 if (znode->cnext || !ubifs_zn_dirty(znode)) {
1374 znode = dirty_cow_bottom_up(c, znode);
1375 if (IS_ERR(znode))
1376 return PTR_ERR(znode);
1377 }
1378
1379 dbg_tnc("found 1, lvl %d, n %d", znode->level, *n);
1380 *zn = znode;
1381 return 1;
1382}
1383
1384/**
1385 * ubifs_tnc_lookup - look up a file-system node.
1386 * @c: UBIFS file-system description object
1387 * @key: node key to lookup
1388 * @node: the node is returned here
1389 *
1390 * This function look up and reads node with key @key. The caller has to make
1391 * sure the @node buffer is large enough to fit the node. Returns zero in case
1392 * of success, %-ENOENT if the node was not found, and a negative error code in
1393 * case of failure.
1394 */
1395int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1396 void *node)
1397{
1398 int found, n, err;
1399 struct ubifs_znode *znode;
1400 struct ubifs_zbranch zbr, *zt;
1401
1402 mutex_lock(&c->tnc_mutex);
1403 found = ubifs_lookup_level0(c, key, &znode, &n);
1404 if (!found) {
1405 err = -ENOENT;
1406 goto out;
1407 } else if (found < 0) {
1408 err = found;
1409 goto out;
1410 }
1411 zt = &znode->zbranch[n];
1412 if (is_hash_key(c, key)) {
1413 /*
1414 * In this case the leaf node cache gets used, so we pass the
1415 * address of the zbranch and keep the mutex locked
1416 */
1417 err = tnc_read_node_nm(c, zt, node);
1418 goto out;
1419 }
1420 zbr = znode->zbranch[n];
1421 mutex_unlock(&c->tnc_mutex);
1422
1423 err = ubifs_tnc_read_node(c, &zbr, node);
1424 return err;
1425
1426out:
1427 mutex_unlock(&c->tnc_mutex);
1428 return err;
1429}
1430
1431/**
1432 * ubifs_tnc_locate - look up a file-system node and return it and its location.
1433 * @c: UBIFS file-system description object
1434 * @key: node key to lookup
1435 * @node: the node is returned here
1436 * @lnum: LEB number is returned here
1437 * @offs: offset is returned here
1438 *
1439 * This function is the same as 'ubifs_tnc_lookup()' but it returns the node
1440 * location also. See 'ubifs_tnc_lookup()'.
1441 */
1442int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1443 void *node, int *lnum, int *offs)
1444{
1445 int found, n, err;
1446 struct ubifs_znode *znode;
1447 struct ubifs_zbranch zbr, *zt;
1448
1449 mutex_lock(&c->tnc_mutex);
1450 found = ubifs_lookup_level0(c, key, &znode, &n);
1451 if (!found) {
1452 err = -ENOENT;
1453 goto out;
1454 } else if (found < 0) {
1455 err = found;
1456 goto out;
1457 }
1458 zt = &znode->zbranch[n];
1459 if (is_hash_key(c, key)) {
1460 /*
1461 * In this case the leaf node cache gets used, so we pass the
1462 * address of the zbranch and keep the mutex locked
1463 */
1464 *lnum = zt->lnum;
1465 *offs = zt->offs;
1466 err = tnc_read_node_nm(c, zt, node);
1467 goto out;
1468 }
1469 zbr = znode->zbranch[n];
1470 mutex_unlock(&c->tnc_mutex);
1471
1472 *lnum = zbr.lnum;
1473 *offs = zbr.offs;
1474
1475 err = ubifs_tnc_read_node(c, &zbr, node);
1476 return err;
1477
1478out:
1479 mutex_unlock(&c->tnc_mutex);
1480 return err;
1481}
1482
1483/**
1484 * do_lookup_nm- look up a "hashed" node.
1485 * @c: UBIFS file-system description object
1486 * @key: node key to lookup
1487 * @node: the node is returned here
1488 * @nm: node name
1489 *
1490 * This function look up and reads a node which contains name hash in the key.
1491 * Since the hash may have collisions, there may be many nodes with the same
1492 * key, so we have to sequentially look to all of them until the needed one is
1493 * found. This function returns zero in case of success, %-ENOENT if the node
1494 * was not found, and a negative error code in case of failure.
1495 */
1496static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1497 void *node, const struct qstr *nm)
1498{
1499 int found, n, err;
1500 struct ubifs_znode *znode;
1501 struct ubifs_zbranch zbr;
1502
1503 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
1504 mutex_lock(&c->tnc_mutex);
1505 found = ubifs_lookup_level0(c, key, &znode, &n);
1506 if (!found) {
1507 err = -ENOENT;
1508 goto out_unlock;
1509 } else if (found < 0) {
1510 err = found;
1511 goto out_unlock;
1512 }
1513
1514 ubifs_assert(n >= 0);
1515
1516 err = resolve_collision(c, key, &znode, &n, nm);
1517 dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
1518 if (unlikely(err < 0))
1519 goto out_unlock;
1520 if (err == 0) {
1521 err = -ENOENT;
1522 goto out_unlock;
1523 }
1524
1525 zbr = znode->zbranch[n];
1526 mutex_unlock(&c->tnc_mutex);
1527
1528 err = tnc_read_node_nm(c, &zbr, node);
1529 return err;
1530
1531out_unlock:
1532 mutex_unlock(&c->tnc_mutex);
1533 return err;
1534}
1535
1536/**
1537 * ubifs_tnc_lookup_nm - look up a "hashed" node.
1538 * @c: UBIFS file-system description object
1539 * @key: node key to lookup
1540 * @node: the node is returned here
1541 * @nm: node name
1542 *
1543 * This function look up and reads a node which contains name hash in the key.
1544 * Since the hash may have collisions, there may be many nodes with the same
1545 * key, so we have to sequentially look to all of them until the needed one is
1546 * found. This function returns zero in case of success, %-ENOENT if the node
1547 * was not found, and a negative error code in case of failure.
1548 */
1549int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1550 void *node, const struct qstr *nm)
1551{
1552 int err, len;
1553 const struct ubifs_dent_node *dent = node;
1554
1555 /*
1556 * We assume that in most of the cases there are no name collisions and
1557 * 'ubifs_tnc_lookup()' returns us the right direntry.
1558 */
1559 err = ubifs_tnc_lookup(c, key, node);
1560 if (err)
1561 return err;
1562
1563 len = le16_to_cpu(dent->nlen);
1564 if (nm->len == len && !memcmp(dent->name, nm->name, len))
1565 return 0;
1566
1567 /*
1568 * Unluckily, there are hash collisions and we have to iterate over
1569 * them look at each direntry with colliding name hash sequentially.
1570 */
1571 return do_lookup_nm(c, key, node, nm);
1572}
1573
1574/**
1575 * correct_parent_keys - correct parent znodes' keys.
1576 * @c: UBIFS file-system description object
1577 * @znode: znode to correct parent znodes for
1578 *
1579 * This is a helper function for 'tnc_insert()'. When the key of the leftmost
1580 * zbranch changes, keys of parent znodes have to be corrected. This helper
1581 * function is called in such situations and corrects the keys if needed.
1582 */
1583static void correct_parent_keys(const struct ubifs_info *c,
1584 struct ubifs_znode *znode)
1585{
1586 union ubifs_key *key, *key1;
1587
1588 ubifs_assert(znode->parent);
1589 ubifs_assert(znode->iip == 0);
1590
1591 key = &znode->zbranch[0].key;
1592 key1 = &znode->parent->zbranch[0].key;
1593
1594 while (keys_cmp(c, key, key1) < 0) {
1595 key_copy(c, key, key1);
1596 znode = znode->parent;
1597 znode->alt = 1;
1598 if (!znode->parent || znode->iip)
1599 break;
1600 key1 = &znode->parent->zbranch[0].key;
1601 }
1602}
1603
1604/**
1605 * insert_zbranch - insert a zbranch into a znode.
1606 * @znode: znode into which to insert
1607 * @zbr: zbranch to insert
1608 * @n: slot number to insert to
1609 *
1610 * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in
1611 * znode's array of zbranches and keeps zbranches consolidated, so when a new
1612 * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th
1613 * slot, zbranches starting from @n have to be moved right.
1614 */
1615static void insert_zbranch(struct ubifs_znode *znode,
1616 const struct ubifs_zbranch *zbr, int n)
1617{
1618 int i;
1619
1620 ubifs_assert(ubifs_zn_dirty(znode));
1621
1622 if (znode->level) {
1623 for (i = znode->child_cnt; i > n; i--) {
1624 znode->zbranch[i] = znode->zbranch[i - 1];
1625 if (znode->zbranch[i].znode)
1626 znode->zbranch[i].znode->iip = i;
1627 }
1628 if (zbr->znode)
1629 zbr->znode->iip = n;
1630 } else
1631 for (i = znode->child_cnt; i > n; i--)
1632 znode->zbranch[i] = znode->zbranch[i - 1];
1633
1634 znode->zbranch[n] = *zbr;
1635 znode->child_cnt += 1;
1636
1637 /*
1638 * After inserting at slot zero, the lower bound of the key range of
1639 * this znode may have changed. If this znode is subsequently split
1640 * then the upper bound of the key range may change, and furthermore
1641 * it could change to be lower than the original lower bound. If that
1642 * happens, then it will no longer be possible to find this znode in the
1643 * TNC using the key from the index node on flash. That is bad because
1644 * if it is not found, we will assume it is obsolete and may overwrite
1645 * it. Then if there is an unclean unmount, we will start using the
1646 * old index which will be broken.
1647 *
1648 * So we first mark znodes that have insertions at slot zero, and then
1649 * if they are split we add their lnum/offs to the old_idx tree.
1650 */
1651 if (n == 0)
1652 znode->alt = 1;
1653}
1654
1655/**
1656 * tnc_insert - insert a node into TNC.
1657 * @c: UBIFS file-system description object
1658 * @znode: znode to insert into
1659 * @zbr: branch to insert
1660 * @n: slot number to insert new zbranch to
1661 *
1662 * This function inserts a new node described by @zbr into znode @znode. If
1663 * znode does not have a free slot for new zbranch, it is split. Parent znodes
1664 * are splat as well if needed. Returns zero in case of success or a negative
1665 * error code in case of failure.
1666 */
1667static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
1668 struct ubifs_zbranch *zbr, int n)
1669{
1670 struct ubifs_znode *zn, *zi, *zp;
1671 int i, keep, move, appending = 0;
1672 union ubifs_key *key = &zbr->key;
1673
1674 ubifs_assert(n >= 0 && n <= c->fanout);
1675
1676 /* Implement naive insert for now */
1677again:
1678 zp = znode->parent;
1679 if (znode->child_cnt < c->fanout) {
1680 ubifs_assert(n != c->fanout);
1681 dbg_tnc("inserted at %d level %d, key %s", n, znode->level,
1682 DBGKEY(key));
1683
1684 insert_zbranch(znode, zbr, n);
1685
1686 /* Ensure parent's key is correct */
1687 if (n == 0 && zp && znode->iip == 0)
1688 correct_parent_keys(c, znode);
1689
1690 return 0;
1691 }
1692
1693 /*
1694 * Unfortunately, @znode does not have more empty slots and we have to
1695 * split it.
1696 */
1697 dbg_tnc("splitting level %d, key %s", znode->level, DBGKEY(key));
1698
1699 if (znode->alt)
1700 /*
1701 * We can no longer be sure of finding this znode by key, so we
1702 * record it in the old_idx tree.
1703 */
1704 ins_clr_old_idx_znode(c, znode);
1705
1706 zn = kzalloc(c->max_znode_sz, GFP_NOFS);
1707 if (!zn)
1708 return -ENOMEM;
1709 zn->parent = zp;
1710 zn->level = znode->level;
1711
1712 /* Decide where to split */
1713 if (znode->level == 0 && n == c->fanout &&
1714 key_type(c, key) == UBIFS_DATA_KEY) {
1715 union ubifs_key *key1;
1716
1717 /*
1718 * If this is an inode which is being appended - do not split
1719 * it because no other zbranches can be inserted between
1720 * zbranches of consecutive data nodes anyway.
1721 */
1722 key1 = &znode->zbranch[n - 1].key;
1723 if (key_inum(c, key1) == key_inum(c, key) &&
1724 key_type(c, key1) == UBIFS_DATA_KEY &&
1725 key_block(c, key1) == key_block(c, key) - 1)
1726 appending = 1;
1727 }
1728
1729 if (appending) {
1730 keep = c->fanout;
1731 move = 0;
1732 } else {
1733 keep = (c->fanout + 1) / 2;
1734 move = c->fanout - keep;
1735 }
1736
1737 /*
1738 * Although we don't at present, we could look at the neighbors and see
1739 * if we can move some zbranches there.
1740 */
1741
1742 if (n < keep) {
1743 /* Insert into existing znode */
1744 zi = znode;
1745 move += 1;
1746 keep -= 1;
1747 } else {
1748 /* Insert into new znode */
1749 zi = zn;
1750 n -= keep;
1751 /* Re-parent */
1752 if (zn->level != 0)
1753 zbr->znode->parent = zn;
1754 }
1755
1756 __set_bit(DIRTY_ZNODE, &zn->flags);
1757 atomic_long_inc(&c->dirty_zn_cnt);
1758
1759 zn->child_cnt = move;
1760 znode->child_cnt = keep;
1761
1762 dbg_tnc("moving %d, keeping %d", move, keep);
1763
1764 /* Move zbranch */
1765 for (i = 0; i < move; i++) {
1766 zn->zbranch[i] = znode->zbranch[keep + i];
1767 /* Re-parent */
1768 if (zn->level != 0)
1769 if (zn->zbranch[i].znode) {
1770 zn->zbranch[i].znode->parent = zn;
1771 zn->zbranch[i].znode->iip = i;
1772 }
1773 }
1774
1775 /* Insert new key and branch */
1776 dbg_tnc("inserting at %d level %d, key %s", n, zn->level, DBGKEY(key));
1777
1778 insert_zbranch(zi, zbr, n);
1779
1780 /* Insert new znode (produced by spitting) into the parent */
1781 if (zp) {
1782 i = n;
1783 /* Locate insertion point */
1784 n = znode->iip + 1;
1785 if (appending && n != c->fanout)
1786 appending = 0;
1787
1788 if (i == 0 && zi == znode && znode->iip == 0)
1789 correct_parent_keys(c, znode);
1790
1791 /* Tail recursion */
1792 zbr->key = zn->zbranch[0].key;
1793 zbr->znode = zn;
1794 zbr->lnum = 0;
1795 zbr->offs = 0;
1796 zbr->len = 0;
1797 znode = zp;
1798
1799 goto again;
1800 }
1801
1802 /* We have to split root znode */
1803 dbg_tnc("creating new zroot at level %d", znode->level + 1);
1804
1805 zi = kzalloc(c->max_znode_sz, GFP_NOFS);
1806 if (!zi)
1807 return -ENOMEM;
1808
1809 zi->child_cnt = 2;
1810 zi->level = znode->level + 1;
1811
1812 __set_bit(DIRTY_ZNODE, &zi->flags);
1813 atomic_long_inc(&c->dirty_zn_cnt);
1814
1815 zi->zbranch[0].key = znode->zbranch[0].key;
1816 zi->zbranch[0].znode = znode;
1817 zi->zbranch[0].lnum = c->zroot.lnum;
1818 zi->zbranch[0].offs = c->zroot.offs;
1819 zi->zbranch[0].len = c->zroot.len;
1820 zi->zbranch[1].key = zn->zbranch[0].key;
1821 zi->zbranch[1].znode = zn;
1822
1823 c->zroot.lnum = 0;
1824 c->zroot.offs = 0;
1825 c->zroot.len = 0;
1826 c->zroot.znode = zi;
1827
1828 zn->parent = zi;
1829 zn->iip = 1;
1830 znode->parent = zi;
1831 znode->iip = 0;
1832
1833 return 0;
1834}
1835
1836/**
1837 * ubifs_tnc_add - add a node to TNC.
1838 * @c: UBIFS file-system description object
1839 * @key: key to add
1840 * @lnum: LEB number of node
1841 * @offs: node offset
1842 * @len: node length
1843 *
1844 * This function adds a node with key @key to TNC. The node may be new or it may
1845 * obsolete some existing one. Returns %0 on success or negative error code on
1846 * failure.
1847 */
1848int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
1849 int offs, int len)
1850{
1851 int found, n, err = 0;
1852 struct ubifs_znode *znode;
1853
1854 mutex_lock(&c->tnc_mutex);
1855 dbg_tnc("%d:%d, len %d, key %s", lnum, offs, len, DBGKEY(key));
1856 found = lookup_level0_dirty(c, key, &znode, &n);
1857 if (!found) {
1858 struct ubifs_zbranch zbr;
1859
1860 zbr.znode = NULL;
1861 zbr.lnum = lnum;
1862 zbr.offs = offs;
1863 zbr.len = len;
1864 key_copy(c, key, &zbr.key);
1865 err = tnc_insert(c, znode, &zbr, n + 1);
1866 } else if (found == 1) {
1867 struct ubifs_zbranch *zbr = &znode->zbranch[n];
1868
1869 lnc_free(zbr);
1870 err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
1871 zbr->lnum = lnum;
1872 zbr->offs = offs;
1873 zbr->len = len;
1874 } else
1875 err = found;
1876 if (!err)
1877 err = dbg_check_tnc(c, 0);
1878 mutex_unlock(&c->tnc_mutex);
1879
1880 return err;
1881}
1882
1883/**
1884 * ubifs_tnc_replace - replace a node in the TNC only if the old node is found.
1885 * @c: UBIFS file-system description object
1886 * @key: key to add
1887 * @old_lnum: LEB number of old node
1888 * @old_offs: old node offset
1889 * @lnum: LEB number of node
1890 * @offs: node offset
1891 * @len: node length
1892 *
1893 * This function replaces a node with key @key in the TNC only if the old node
1894 * is found. This function is called by garbage collection when node are moved.
1895 * Returns %0 on success or negative error code on failure.
1896 */
1897int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
1898 int old_lnum, int old_offs, int lnum, int offs, int len)
1899{
1900 int found, n, err = 0;
1901 struct ubifs_znode *znode;
1902
1903 mutex_lock(&c->tnc_mutex);
1904 dbg_tnc("old LEB %d:%d, new LEB %d:%d, len %d, key %s", old_lnum,
1905 old_offs, lnum, offs, len, DBGKEY(key));
1906 found = lookup_level0_dirty(c, key, &znode, &n);
1907 if (found < 0) {
1908 err = found;
1909 goto out_unlock;
1910 }
1911
1912 if (found == 1) {
1913 struct ubifs_zbranch *zbr = &znode->zbranch[n];
1914
1915 found = 0;
1916 if (zbr->lnum == old_lnum && zbr->offs == old_offs) {
1917 lnc_free(zbr);
1918 err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
1919 if (err)
1920 goto out_unlock;
1921 zbr->lnum = lnum;
1922 zbr->offs = offs;
1923 zbr->len = len;
1924 found = 1;
1925 } else if (is_hash_key(c, key)) {
1926 found = resolve_collision_directly(c, key, &znode, &n,
1927 old_lnum, old_offs);
1928 dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d",
1929 found, znode, n, old_lnum, old_offs);
1930 if (found < 0) {
1931 err = found;
1932 goto out_unlock;
1933 }
1934
1935 if (found) {
1936 /* Ensure the znode is dirtied */
1937 if (znode->cnext || !ubifs_zn_dirty(znode)) {
1938 znode = dirty_cow_bottom_up(c,
1939 znode);
1940 if (IS_ERR(znode)) {
1941 err = PTR_ERR(znode);
1942 goto out_unlock;
1943 }
1944 }
1945 zbr = &znode->zbranch[n];
1946 lnc_free(zbr);
1947 err = ubifs_add_dirt(c, zbr->lnum,
1948 zbr->len);
1949 if (err)
1950 goto out_unlock;
1951 zbr->lnum = lnum;
1952 zbr->offs = offs;
1953 zbr->len = len;
1954 }
1955 }
1956 }
1957
1958 if (!found)
1959 err = ubifs_add_dirt(c, lnum, len);
1960
1961 if (!err)
1962 err = dbg_check_tnc(c, 0);
1963
1964out_unlock:
1965 mutex_unlock(&c->tnc_mutex);
1966 return err;
1967}
1968
1969/**
1970 * ubifs_tnc_add_nm - add a "hashed" node to TNC.
1971 * @c: UBIFS file-system description object
1972 * @key: key to add
1973 * @lnum: LEB number of node
1974 * @offs: node offset
1975 * @len: node length
1976 * @nm: node name
1977 *
1978 * This is the same as 'ubifs_tnc_add()' but it should be used with keys which
1979 * may have collisions, like directory entry keys.
1980 */
1981int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
1982 int lnum, int offs, int len, const struct qstr *nm)
1983{
1984 int found, n, err = 0;
1985 struct ubifs_znode *znode;
1986
1987 mutex_lock(&c->tnc_mutex);
1988 dbg_tnc("LEB %d:%d, name '%.*s', key %s", lnum, offs, nm->len, nm->name,
1989 DBGKEY(key));
1990 found = lookup_level0_dirty(c, key, &znode, &n);
1991 if (found < 0) {
1992 err = found;
1993 goto out_unlock;
1994 }
1995
1996 if (found == 1) {
1997 if (c->replaying)
1998 found = fallible_resolve_collision(c, key, &znode, &n,
1999 nm, 1);
2000 else
2001 found = resolve_collision(c, key, &znode, &n, nm);
2002 dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n);
2003 if (found < 0) {
2004 err = found;
2005 goto out_unlock;
2006 }
2007
2008 /* Ensure the znode is dirtied */
2009 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2010 znode = dirty_cow_bottom_up(c, znode);
2011 if (IS_ERR(znode)) {
2012 err = PTR_ERR(znode);
2013 goto out_unlock;
2014 }
2015 }
2016
2017 if (found == 1) {
2018 struct ubifs_zbranch *zbr = &znode->zbranch[n];
2019
2020 lnc_free(zbr);
2021 err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
2022 zbr->lnum = lnum;
2023 zbr->offs = offs;
2024 zbr->len = len;
2025 goto out_unlock;
2026 }
2027 }
2028
2029 if (!found) {
2030 struct ubifs_zbranch zbr;
2031
2032 zbr.znode = NULL;
2033 zbr.lnum = lnum;
2034 zbr.offs = offs;
2035 zbr.len = len;
2036 key_copy(c, key, &zbr.key);
2037 err = tnc_insert(c, znode, &zbr, n + 1);
2038 if (err)
2039 goto out_unlock;
2040 if (c->replaying) {
2041 /*
2042 * We did not find it in the index so there may be a
2043 * dangling branch still in the index. So we remove it
2044 * by passing 'ubifs_tnc_remove_nm()' the same key but
2045 * an unmatchable name.
2046 */
2047 struct qstr noname = { .len = 0, .name = "" };
2048
2049 err = dbg_check_tnc(c, 0);
2050 mutex_unlock(&c->tnc_mutex);
2051 if (err)
2052 return err;
2053 return ubifs_tnc_remove_nm(c, key, &noname);
2054 }
2055 }
2056
2057out_unlock:
2058 if (!err)
2059 err = dbg_check_tnc(c, 0);
2060 mutex_unlock(&c->tnc_mutex);
2061 return err;
2062}
2063
2064/**
2065 * tnc_delete - delete a znode form TNC.
2066 * @c: UBIFS file-system description object
2067 * @znode: znode to delete from
2068 * @n: zbranch slot number to delete
2069 *
2070 * This function deletes a leaf node from @n-th slot of @znode. Returns zero in
2071 * case of success and a negative error code in case of failure.
2072 */
2073static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
2074{
2075 struct ubifs_zbranch *zbr;
2076 struct ubifs_znode *zp;
2077 int i, err;
2078
2079 /* Delete without merge for now */
2080 ubifs_assert(znode->level == 0);
2081 ubifs_assert(n >= 0 && n < c->fanout);
2082 dbg_tnc("deleting %s", DBGKEY(&znode->zbranch[n].key));
2083
2084 zbr = &znode->zbranch[n];
2085 lnc_free(zbr);
2086
2087 err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
2088 if (err) {
2089 dbg_dump_znode(c, znode);
2090 return err;
2091 }
2092
2093 /* We do not "gap" zbranch slots */
2094 for (i = n; i < znode->child_cnt - 1; i++)
2095 znode->zbranch[i] = znode->zbranch[i + 1];
2096 znode->child_cnt -= 1;
2097
2098 if (znode->child_cnt > 0)
2099 return 0;
2100
2101 /*
2102 * This was the last zbranch, we have to delete this znode from the
2103 * parent.
2104 */
2105
2106 do {
2107 ubifs_assert(!test_bit(OBSOLETE_ZNODE, &znode->flags));
2108 ubifs_assert(ubifs_zn_dirty(znode));
2109
2110 zp = znode->parent;
2111 n = znode->iip;
2112
2113 atomic_long_dec(&c->dirty_zn_cnt);
2114
2115 err = insert_old_idx_znode(c, znode);
2116 if (err)
2117 return err;
2118
2119 if (znode->cnext) {
2120 __set_bit(OBSOLETE_ZNODE, &znode->flags);
2121 atomic_long_inc(&c->clean_zn_cnt);
2122 atomic_long_inc(&ubifs_clean_zn_cnt);
2123 } else
2124 kfree(znode);
2125 znode = zp;
2126 } while (znode->child_cnt == 1); /* while removing last child */
2127
2128 /* Remove from znode, entry n - 1 */
2129 znode->child_cnt -= 1;
2130 ubifs_assert(znode->level != 0);
2131 for (i = n; i < znode->child_cnt; i++) {
2132 znode->zbranch[i] = znode->zbranch[i + 1];
2133 if (znode->zbranch[i].znode)
2134 znode->zbranch[i].znode->iip = i;
2135 }
2136
2137 /*
2138 * If this is the root and it has only 1 child then
2139 * collapse the tree.
2140 */
2141 if (!znode->parent) {
2142 while (znode->child_cnt == 1 && znode->level != 0) {
2143 zp = znode;
2144 zbr = &znode->zbranch[0];
2145 znode = get_znode(c, znode, 0);
2146 if (IS_ERR(znode))
2147 return PTR_ERR(znode);
2148 znode = dirty_cow_znode(c, zbr);
2149 if (IS_ERR(znode))
2150 return PTR_ERR(znode);
2151 znode->parent = NULL;
2152 znode->iip = 0;
2153 if (c->zroot.len) {
2154 err = insert_old_idx(c, c->zroot.lnum,
2155 c->zroot.offs);
2156 if (err)
2157 return err;
2158 }
2159 c->zroot.lnum = zbr->lnum;
2160 c->zroot.offs = zbr->offs;
2161 c->zroot.len = zbr->len;
2162 c->zroot.znode = znode;
2163 ubifs_assert(!test_bit(OBSOLETE_ZNODE,
2164 &zp->flags));
2165 ubifs_assert(test_bit(DIRTY_ZNODE, &zp->flags));
2166 atomic_long_dec(&c->dirty_zn_cnt);
2167
2168 if (zp->cnext) {
2169 __set_bit(OBSOLETE_ZNODE, &zp->flags);
2170 atomic_long_inc(&c->clean_zn_cnt);
2171 atomic_long_inc(&ubifs_clean_zn_cnt);
2172 } else
2173 kfree(zp);
2174 }
2175 }
2176
2177 return 0;
2178}
2179
2180/**
2181 * ubifs_tnc_remove - remove an index entry of a node.
2182 * @c: UBIFS file-system description object
2183 * @key: key of node
2184 *
2185 * Returns %0 on success or negative error code on failure.
2186 */
2187int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key)
2188{
2189 int found, n, err = 0;
2190 struct ubifs_znode *znode;
2191
2192 mutex_lock(&c->tnc_mutex);
2193 dbg_tnc("key %s", DBGKEY(key));
2194 found = lookup_level0_dirty(c, key, &znode, &n);
2195 if (found < 0) {
2196 err = found;
2197 goto out_unlock;
2198 }
2199 if (found == 1)
2200 err = tnc_delete(c, znode, n);
2201 if (!err)
2202 err = dbg_check_tnc(c, 0);
2203
2204out_unlock:
2205 mutex_unlock(&c->tnc_mutex);
2206 return err;
2207}
2208
2209/**
2210 * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node.
2211 * @c: UBIFS file-system description object
2212 * @key: key of node
2213 * @nm: directory entry name
2214 *
2215 * Returns %0 on success or negative error code on failure.
2216 */
2217int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
2218 const struct qstr *nm)
2219{
2220 int n, err;
2221 struct ubifs_znode *znode;
2222
2223 mutex_lock(&c->tnc_mutex);
2224 dbg_tnc("%.*s, key %s", nm->len, nm->name, DBGKEY(key));
2225 err = lookup_level0_dirty(c, key, &znode, &n);
2226 if (err < 0)
2227 goto out_unlock;
2228
2229 if (err) {
2230 if (c->replaying)
2231 err = fallible_resolve_collision(c, key, &znode, &n,
2232 nm, 0);
2233 else
2234 err = resolve_collision(c, key, &znode, &n, nm);
2235 dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
2236 if (err < 0)
2237 goto out_unlock;
2238 if (err) {
2239 /* Ensure the znode is dirtied */
2240 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2241 znode = dirty_cow_bottom_up(c, znode);
2242 if (IS_ERR(znode)) {
2243 err = PTR_ERR(znode);
2244 goto out_unlock;
2245 }
2246 }
2247 err = tnc_delete(c, znode, n);
2248 }
2249 }
2250
2251out_unlock:
2252 if (!err)
2253 err = dbg_check_tnc(c, 0);
2254 mutex_unlock(&c->tnc_mutex);
2255 return err;
2256}
2257
2258/**
2259 * key_in_range - determine if a key falls within a range of keys.
2260 * @c: UBIFS file-system description object
2261 * @key: key to check
2262 * @from_key: lowest key in range
2263 * @to_key: highest key in range
2264 *
2265 * This function returns %1 if the key is in range and %0 otherwise.
2266 */
2267static int key_in_range(struct ubifs_info *c, union ubifs_key *key,
2268 union ubifs_key *from_key, union ubifs_key *to_key)
2269{
2270 if (keys_cmp(c, key, from_key) < 0)
2271 return 0;
2272 if (keys_cmp(c, key, to_key) > 0)
2273 return 0;
2274 return 1;
2275}
2276
2277/**
2278 * ubifs_tnc_remove_range - remove index entries in range.
2279 * @c: UBIFS file-system description object
2280 * @from_key: lowest key to remove
2281 * @to_key: highest key to remove
2282 *
2283 * This function removes index entries starting at @from_key and ending at
2284 * @to_key. This function returns zero in case of success and a negative error
2285 * code in case of failure.
2286 */
2287int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
2288 union ubifs_key *to_key)
2289{
2290 int i, n, k, err = 0;
2291 struct ubifs_znode *znode;
2292 union ubifs_key *key;
2293
2294 mutex_lock(&c->tnc_mutex);
2295 while (1) {
2296 /* Find first level 0 znode that contains keys to remove */
2297 err = ubifs_lookup_level0(c, from_key, &znode, &n);
2298 if (err < 0)
2299 goto out_unlock;
2300
2301 if (err)
2302 key = from_key;
2303 else {
2304 err = tnc_next(c, &znode, &n);
2305 if (err == -ENOENT) {
2306 err = 0;
2307 goto out_unlock;
2308 }
2309 if (err < 0)
2310 goto out_unlock;
2311 key = &znode->zbranch[n].key;
2312 if (!key_in_range(c, key, from_key, to_key)) {
2313 err = 0;
2314 goto out_unlock;
2315 }
2316 }
2317
2318 /* Ensure the znode is dirtied */
2319 if (znode->cnext || !ubifs_zn_dirty(znode)) {
2320 znode = dirty_cow_bottom_up(c, znode);
2321 if (IS_ERR(znode)) {
2322 err = PTR_ERR(znode);
2323 goto out_unlock;
2324 }
2325 }
2326
2327 /* Remove all keys in range except the first */
2328 for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) {
2329 key = &znode->zbranch[i].key;
2330 if (!key_in_range(c, key, from_key, to_key))
2331 break;
2332 lnc_free(&znode->zbranch[i]);
2333 err = ubifs_add_dirt(c, znode->zbranch[i].lnum,
2334 znode->zbranch[i].len);
2335 if (err) {
2336 dbg_dump_znode(c, znode);
2337 goto out_unlock;
2338 }
2339 dbg_tnc("removing %s", DBGKEY(key));
2340 }
2341 if (k) {
2342 for (i = n + 1 + k; i < znode->child_cnt; i++)
2343 znode->zbranch[i - k] = znode->zbranch[i];
2344 znode->child_cnt -= k;
2345 }
2346
2347 /* Now delete the first */
2348 err = tnc_delete(c, znode, n);
2349 if (err)
2350 goto out_unlock;
2351 }
2352
2353out_unlock:
2354 if (!err)
2355 err = dbg_check_tnc(c, 0);
2356 mutex_unlock(&c->tnc_mutex);
2357 return err;
2358}
2359
2360/**
2361 * ubifs_tnc_remove_ino - remove an inode from TNC.
2362 * @c: UBIFS file-system description object
2363 * @inum: inode number to remove
2364 *
2365 * This function remove inode @inum and all the extended attributes associated
2366 * with the anode from TNC and returns zero in case of success or a negative
2367 * error code in case of failure.
2368 */
2369int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
2370{
2371 union ubifs_key key1, key2;
2372 struct ubifs_dent_node *xent, *pxent = NULL;
2373 struct qstr nm = { .name = NULL };
2374
2375 dbg_tnc("ino %lu", inum);
2376
2377 /*
2378 * Walk all extended attribute entries and remove them together with
2379 * corresponding extended attribute inodes.
2380 */
2381 lowest_xent_key(c, &key1, inum);
2382 while (1) {
2383 ino_t xattr_inum;
2384 int err;
2385
2386 xent = ubifs_tnc_next_ent(c, &key1, &nm);
2387 if (IS_ERR(xent)) {
2388 err = PTR_ERR(xent);
2389 if (err == -ENOENT)
2390 break;
2391 return err;
2392 }
2393
2394 xattr_inum = le64_to_cpu(xent->inum);
2395 dbg_tnc("xent '%s', ino %lu", xent->name, xattr_inum);
2396
2397 nm.name = xent->name;
2398 nm.len = le16_to_cpu(xent->nlen);
2399 err = ubifs_tnc_remove_nm(c, &key1, &nm);
2400 if (err) {
2401 kfree(xent);
2402 return err;
2403 }
2404
2405 lowest_ino_key(c, &key1, xattr_inum);
2406 highest_ino_key(c, &key2, xattr_inum);
2407 err = ubifs_tnc_remove_range(c, &key1, &key2);
2408 if (err) {
2409 kfree(xent);
2410 return err;
2411 }
2412
2413 kfree(pxent);
2414 pxent = xent;
2415 key_read(c, &xent->key, &key1);
2416 }
2417
2418 kfree(pxent);
2419 lowest_ino_key(c, &key1, inum);
2420 highest_ino_key(c, &key2, inum);
2421
2422 return ubifs_tnc_remove_range(c, &key1, &key2);
2423}
2424
2425/**
2426 * ubifs_tnc_next_ent - walk directory or extended attribute entries.
2427 * @c: UBIFS file-system description object
2428 * @key: key of last entry
2429 * @nm: name of last entry found or %NULL
2430 *
2431 * This function finds and reads the next directory or extended attribute entry
2432 * after the given key (@key) if there is one. @nm is used to resolve
2433 * collisions.
2434 *
2435 * If the name of the current entry is not known and only the key is known,
2436 * @nm->name has to be %NULL. In this case the semantics of this function is a
2437 * little bit different and it returns the entry corresponding to this key, not
2438 * the next one. If the key was not found, the closest "right" entry is
2439 * returned.
2440 *
2441 * If the fist entry has to be found, @key has to contain the lowest possible
2442 * key value for this inode and @name has to be %NULL.
2443 *
2444 * This function returns the found directory or extended attribute entry node
2445 * in case of success, %-ENOENT is returned if no entry was found, and a
2446 * negative error code is returned in case of failure.
2447 */
2448struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
2449 union ubifs_key *key,
2450 const struct qstr *nm)
2451{
2452 int n, err, type = key_type(c, key);
2453 struct ubifs_znode *znode;
2454 struct ubifs_dent_node *dent;
2455 struct ubifs_zbranch *zbr;
2456 union ubifs_key *dkey;
2457
2458 dbg_tnc("%s %s", nm->name ? (char *)nm->name : "(lowest)", DBGKEY(key));
2459 ubifs_assert(is_hash_key(c, key));
2460
2461 mutex_lock(&c->tnc_mutex);
2462 err = ubifs_lookup_level0(c, key, &znode, &n);
2463 if (unlikely(err < 0))
2464 goto out_unlock;
2465
2466 if (nm->name) {
2467 if (err) {
2468 /* Handle collisions */
2469 err = resolve_collision(c, key, &znode, &n, nm);
2470 dbg_tnc("rc returned %d, znode %p, n %d",
2471 err, znode, n);
2472 if (unlikely(err < 0))
2473 goto out_unlock;
2474 }
2475
2476 /* Now find next entry */
2477 err = tnc_next(c, &znode, &n);
2478 if (unlikely(err))
2479 goto out_unlock;
2480 } else {
2481 /*
2482 * The full name of the entry was not given, in which case the
2483 * behavior of this function is a little different and it
2484 * returns current entry, not the next one.
2485 */
2486 if (!err) {
2487 /*
2488 * However, the given key does not exist in the TNC
2489 * tree and @znode/@n variables contain the closest
2490 * "preceding" element. Switch to the next one.
2491 */
2492 err = tnc_next(c, &znode, &n);
2493 if (err)
2494 goto out_unlock;
2495 }
2496 }
2497
2498 zbr = &znode->zbranch[n];
2499 dent = kmalloc(zbr->len, GFP_NOFS);
2500 if (unlikely(!dent)) {
2501 err = -ENOMEM;
2502 goto out_unlock;
2503 }
2504
2505 /*
2506 * The above 'tnc_next()' call could lead us to the next inode, check
2507 * this.
2508 */
2509 dkey = &zbr->key;
2510 if (key_inum(c, dkey) != key_inum(c, key) ||
2511 key_type(c, dkey) != type) {
2512 err = -ENOENT;
2513 goto out_free;
2514 }
2515
2516 err = tnc_read_node_nm(c, zbr, dent);
2517 if (unlikely(err))
2518 goto out_free;
2519
2520 mutex_unlock(&c->tnc_mutex);
2521 return dent;
2522
2523out_free:
2524 kfree(dent);
2525out_unlock:
2526 mutex_unlock(&c->tnc_mutex);
2527 return ERR_PTR(err);
2528}
2529
2530/**
2531 * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit.
2532 * @c: UBIFS file-system description object
2533 *
2534 * Destroy left-over obsolete znodes from a failed commit.
2535 */
2536static void tnc_destroy_cnext(struct ubifs_info *c)
2537{
2538 struct ubifs_znode *cnext;
2539
2540 if (!c->cnext)
2541 return;
2542 ubifs_assert(c->cmt_state == COMMIT_BROKEN);
2543 cnext = c->cnext;
2544 do {
2545 struct ubifs_znode *znode = cnext;
2546
2547 cnext = cnext->cnext;
2548 if (test_bit(OBSOLETE_ZNODE, &znode->flags))
2549 kfree(znode);
2550 } while (cnext && cnext != c->cnext);
2551}
2552
2553/**
2554 * ubifs_tnc_close - close TNC subsystem and free all related resources.
2555 * @c: UBIFS file-system description object
2556 */
2557void ubifs_tnc_close(struct ubifs_info *c)
2558{
2559 long clean_freed;
2560
2561 tnc_destroy_cnext(c);
2562 if (c->zroot.znode) {
2563 clean_freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
2564 atomic_long_sub(clean_freed, &ubifs_clean_zn_cnt);
2565 }
2566 kfree(c->gap_lebs);
2567 kfree(c->ilebs);
2568 destroy_old_idx(c);
2569}
2570
2571/**
2572 * left_znode - get the znode to the left.
2573 * @c: UBIFS file-system description object
2574 * @znode: znode
2575 *
2576 * This function returns a pointer to the znode to the left of @znode or NULL if
2577 * there is not one. A negative error code is returned on failure.
2578 */
2579static struct ubifs_znode *left_znode(struct ubifs_info *c,
2580 struct ubifs_znode *znode)
2581{
2582 int level = znode->level;
2583
2584 while (1) {
2585 int n = znode->iip - 1;
2586
2587 /* Go up until we can go left */
2588 znode = znode->parent;
2589 if (!znode)
2590 return NULL;
2591 if (n >= 0) {
2592 /* Now go down the rightmost branch to 'level' */
2593 znode = get_znode(c, znode, n);
2594 if (IS_ERR(znode))
2595 return znode;
2596 while (znode->level != level) {
2597 n = znode->child_cnt - 1;
2598 znode = get_znode(c, znode, n);
2599 if (IS_ERR(znode))
2600 return znode;
2601 }
2602 break;
2603 }
2604 }
2605 return znode;
2606}
2607
2608/**
2609 * right_znode - get the znode to the right.
2610 * @c: UBIFS file-system description object
2611 * @znode: znode
2612 *
2613 * This function returns a pointer to the znode to the right of @znode or NULL
2614 * if there is not one. A negative error code is returned on failure.
2615 */
2616static struct ubifs_znode *right_znode(struct ubifs_info *c,
2617 struct ubifs_znode *znode)
2618{
2619 int level = znode->level;
2620
2621 while (1) {
2622 int n = znode->iip + 1;
2623
2624 /* Go up until we can go right */
2625 znode = znode->parent;
2626 if (!znode)
2627 return NULL;
2628 if (n < znode->child_cnt) {
2629 /* Now go down the leftmost branch to 'level' */
2630 znode = get_znode(c, znode, n);
2631 if (IS_ERR(znode))
2632 return znode;
2633 while (znode->level != level) {
2634 znode = get_znode(c, znode, 0);
2635 if (IS_ERR(znode))
2636 return znode;
2637 }
2638 break;
2639 }
2640 }
2641 return znode;
2642}
2643
2644/**
2645 * lookup_znode - find a particular indexing node from TNC.
2646 * @c: UBIFS file-system description object
2647 * @key: index node key to lookup
2648 * @level: index node level
2649 * @lnum: index node LEB number
2650 * @offs: index node offset
2651 *
2652 * This function searches an indexing node by its first key @key and its
2653 * address @lnum:@offs. It looks up the indexing tree by pulling all indexing
2654 * nodes it traverses to TNC. This function is called fro indexing nodes which
2655 * were found on the media by scanning, for example when garbage-collecting or
2656 * when doing in-the-gaps commit. This means that the indexing node which is
2657 * looked for does not have to have exactly the same leftmost key @key, because
2658 * the leftmost key may have been changed, in which case TNC will contain a
2659 * dirty znode which still refers the same @lnum:@offs. This function is clever
2660 * enough to recognize such indexing nodes.
2661 *
2662 * Note, if a znode was deleted or changed too much, then this function will
2663 * not find it. For situations like this UBIFS has the old index RB-tree
2664 * (indexed by @lnum:@offs).
2665 *
2666 * This function returns a pointer to the znode found or %NULL if it is not
2667 * found. A negative error code is returned on failure.
2668 */
2669static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
2670 union ubifs_key *key, int level,
2671 int lnum, int offs)
2672{
2673 struct ubifs_znode *znode, *zn;
2674 int n, nn;
2675
2676 /*
2677 * The arguments have probably been read off flash, so don't assume
2678 * they are valid.
2679 */
2680 if (level < 0)
2681 return ERR_PTR(-EINVAL);
2682
2683 /* Get the root znode */
2684 znode = c->zroot.znode;
2685 if (!znode) {
2686 znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
2687 if (IS_ERR(znode))
2688 return znode;
2689 }
2690 /* Check if it is the one we are looking for */
2691 if (c->zroot.lnum == lnum && c->zroot.offs == offs)
2692 return znode;
2693 /* Descend to the parent level i.e. (level + 1) */
2694 if (level >= znode->level)
2695 return NULL;
2696 while (1) {
2697 ubifs_search_zbranch(c, znode, key, &n);
2698 if (n < 0) {
2699 /*
2700 * We reached a znode where the leftmost key is greater
2701 * than the key we are searching for. This is the same
2702 * situation as the one described in a huge comment at
2703 * the end of the 'ubifs_lookup_level0()' function. And
2704 * for exactly the same reasons we have to try to look
2705 * left before giving up.
2706 */
2707 znode = left_znode(c, znode);
2708 if (!znode)
2709 return NULL;
2710 if (IS_ERR(znode))
2711 return znode;
2712 ubifs_search_zbranch(c, znode, key, &n);
2713 ubifs_assert(n >= 0);
2714 }
2715 if (znode->level == level + 1)
2716 break;
2717 znode = get_znode(c, znode, n);
2718 if (IS_ERR(znode))
2719 return znode;
2720 }
2721 /* Check if the child is the one we are looking for */
2722 if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs)
2723 return get_znode(c, znode, n);
2724 /* If the key is unique, there is nowhere else to look */
2725 if (!is_hash_key(c, key))
2726 return NULL;
2727 /*
2728 * The key is not unique and so may be also in the znodes to either
2729 * side.
2730 */
2731 zn = znode;
2732 nn = n;
2733 /* Look left */
2734 while (1) {
2735 /* Move one branch to the left */
2736 if (n)
2737 n -= 1;
2738 else {
2739 znode = left_znode(c, znode);
2740 if (!znode)
2741 break;
2742 if (IS_ERR(znode))
2743 return znode;
2744 n = znode->child_cnt - 1;
2745 }
2746 /* Check it */
2747 if (znode->zbranch[n].lnum == lnum &&
2748 znode->zbranch[n].offs == offs)
2749 return get_znode(c, znode, n);
2750 /* Stop if the key is less than the one we are looking for */
2751 if (keys_cmp(c, &znode->zbranch[n].key, key) < 0)
2752 break;
2753 }
2754 /* Back to the middle */
2755 znode = zn;
2756 n = nn;
2757 /* Look right */
2758 while (1) {
2759 /* Move one branch to the right */
2760 if (++n >= znode->child_cnt) {
2761 znode = right_znode(c, znode);
2762 if (!znode)
2763 break;
2764 if (IS_ERR(znode))
2765 return znode;
2766 n = 0;
2767 }
2768 /* Check it */
2769 if (znode->zbranch[n].lnum == lnum &&
2770 znode->zbranch[n].offs == offs)
2771 return get_znode(c, znode, n);
2772 /* Stop if the key is greater than the one we are looking for */
2773 if (keys_cmp(c, &znode->zbranch[n].key, key) > 0)
2774 break;
2775 }
2776 return NULL;
2777}
2778
2779/**
2780 * is_idx_node_in_tnc - determine if an index node is in the TNC.
2781 * @c: UBIFS file-system description object
2782 * @key: key of index node
2783 * @level: index node level
2784 * @lnum: LEB number of index node
2785 * @offs: offset of index node
2786 *
2787 * This function returns %0 if the index node is not referred to in the TNC, %1
2788 * if the index node is referred to in the TNC and the corresponding znode is
2789 * dirty, %2 if an index node is referred to in the TNC and the corresponding
2790 * znode is clean, and a negative error code in case of failure.
2791 *
2792 * Note, the @key argument has to be the key of the first child. Also note,
2793 * this function relies on the fact that 0:0 is never a valid LEB number and
2794 * offset for a main-area node.
2795 */
2796int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
2797 int lnum, int offs)
2798{
2799 struct ubifs_znode *znode;
2800
2801 znode = lookup_znode(c, key, level, lnum, offs);
2802 if (!znode)
2803 return 0;
2804 if (IS_ERR(znode))
2805 return PTR_ERR(znode);
2806
2807 return ubifs_zn_dirty(znode) ? 1 : 2;
2808}
2809
2810/**
2811 * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC.
2812 * @c: UBIFS file-system description object
2813 * @key: node key
2814 * @lnum: node LEB number
2815 * @offs: node offset
2816 *
2817 * This function returns %1 if the node is referred to in the TNC, %0 if it is
2818 * not, and a negative error code in case of failure.
2819 *
2820 * Note, this function relies on the fact that 0:0 is never a valid LEB number
2821 * and offset for a main-area node.
2822 */
2823static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key,
2824 int lnum, int offs)
2825{
2826 struct ubifs_zbranch *zbr;
2827 struct ubifs_znode *znode, *zn;
2828 int n, found, err, nn;
2829 const int unique = !is_hash_key(c, key);
2830
2831 found = ubifs_lookup_level0(c, key, &znode, &n);
2832 if (found < 0)
2833 return found; /* Error code */
2834 if (!found)
2835 return 0;
2836 zbr = &znode->zbranch[n];
2837 if (lnum == zbr->lnum && offs == zbr->offs)
2838 return 1; /* Found it */
2839 if (unique)
2840 return 0;
2841 /*
2842 * Because the key is not unique, we have to look left
2843 * and right as well
2844 */
2845 zn = znode;
2846 nn = n;
2847 /* Look left */
2848 while (1) {
2849 err = tnc_prev(c, &znode, &n);
2850 if (err == -ENOENT)
2851 break;
2852 if (err)
2853 return err;
2854 if (keys_cmp(c, key, &znode->zbranch[n].key))
2855 break;
2856 zbr = &znode->zbranch[n];
2857 if (lnum == zbr->lnum && offs == zbr->offs)
2858 return 1; /* Found it */
2859 }
2860 /* Look right */
2861 znode = zn;
2862 n = nn;
2863 while (1) {
2864 err = tnc_next(c, &znode, &n);
2865 if (err) {
2866 if (err == -ENOENT)
2867 return 0;
2868 return err;
2869 }
2870 if (keys_cmp(c, key, &znode->zbranch[n].key))
2871 break;
2872 zbr = &znode->zbranch[n];
2873 if (lnum == zbr->lnum && offs == zbr->offs)
2874 return 1; /* Found it */
2875 }
2876 return 0;
2877}
2878
2879/**
2880 * ubifs_tnc_has_node - determine whether a node is in the TNC.
2881 * @c: UBIFS file-system description object
2882 * @key: node key
2883 * @level: index node level (if it is an index node)
2884 * @lnum: node LEB number
2885 * @offs: node offset
2886 * @is_idx: non-zero if the node is an index node
2887 *
2888 * This function returns %1 if the node is in the TNC, %0 if it is not, and a
2889 * negative error code in case of failure. For index nodes, @key has to be the
2890 * key of the first child. An index node is considered to be in the TNC only if
2891 * the corresponding znode is clean or has not been loaded.
2892 */
2893int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level,
2894 int lnum, int offs, int is_idx)
2895{
2896 int err;
2897
2898 mutex_lock(&c->tnc_mutex);
2899 if (is_idx) {
2900 err = is_idx_node_in_tnc(c, key, level, lnum, offs);
2901 if (err < 0)
2902 goto out_unlock;
2903 if (err == 1)
2904 /* The index node was found but it was dirty */
2905 err = 0;
2906 else if (err == 2)
2907 /* The index node was found and it was clean */
2908 err = 1;
2909 else
2910 BUG_ON(err != 0);
2911 } else
2912 err = is_leaf_node_in_tnc(c, key, lnum, offs);
2913
2914out_unlock:
2915 mutex_unlock(&c->tnc_mutex);
2916 return err;
2917}
2918
2919/**
2920 * ubifs_dirty_idx_node - dirty an index node.
2921 * @c: UBIFS file-system description object
2922 * @key: index node key
2923 * @level: index node level
2924 * @lnum: index node LEB number
2925 * @offs: index node offset
2926 *
2927 * This function loads and dirties an index node so that it can be garbage
2928 * collected. The @key argument has to be the key of the first child. This
2929 * function relies on the fact that 0:0 is never a valid LEB number and offset
2930 * for a main-area node. Returns %0 on success and a negative error code on
2931 * failure.
2932 */
2933int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level,
2934 int lnum, int offs)
2935{
2936 struct ubifs_znode *znode;
2937 int err = 0;
2938
2939 mutex_lock(&c->tnc_mutex);
2940 znode = lookup_znode(c, key, level, lnum, offs);
2941 if (!znode)
2942 goto out_unlock;
2943 if (IS_ERR(znode)) {
2944 err = PTR_ERR(znode);
2945 goto out_unlock;
2946 }
2947 znode = dirty_cow_bottom_up(c, znode);
2948 if (IS_ERR(znode)) {
2949 err = PTR_ERR(znode);
2950 goto out_unlock;
2951 }
2952
2953out_unlock:
2954 mutex_unlock(&c->tnc_mutex);
2955 return err;
2956}
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
new file mode 100644
index 00000000000..8117e65ba2e
--- /dev/null
+++ b/fs/ubifs/tnc_commit.c
@@ -0,0 +1,1103 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/* This file implements TNC functions for committing */
24
25#include "ubifs.h"
26
27/**
28 * make_idx_node - make an index node for fill-the-gaps method of TNC commit.
29 * @c: UBIFS file-system description object
30 * @idx: buffer in which to place new index node
31 * @znode: znode from which to make new index node
32 * @lnum: LEB number where new index node will be written
33 * @offs: offset where new index node will be written
34 * @len: length of new index node
35 */
36static int make_idx_node(struct ubifs_info *c, struct ubifs_idx_node *idx,
37 struct ubifs_znode *znode, int lnum, int offs, int len)
38{
39 struct ubifs_znode *zp;
40 int i, err;
41
42 /* Make index node */
43 idx->ch.node_type = UBIFS_IDX_NODE;
44 idx->child_cnt = cpu_to_le16(znode->child_cnt);
45 idx->level = cpu_to_le16(znode->level);
46 for (i = 0; i < znode->child_cnt; i++) {
47 struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
48 struct ubifs_zbranch *zbr = &znode->zbranch[i];
49
50 key_write_idx(c, &zbr->key, &br->key);
51 br->lnum = cpu_to_le32(zbr->lnum);
52 br->offs = cpu_to_le32(zbr->offs);
53 br->len = cpu_to_le32(zbr->len);
54 if (!zbr->lnum || !zbr->len) {
55 ubifs_err("bad ref in znode");
56 dbg_dump_znode(c, znode);
57 if (zbr->znode)
58 dbg_dump_znode(c, zbr->znode);
59 }
60 }
61 ubifs_prepare_node(c, idx, len, 0);
62
63#ifdef CONFIG_UBIFS_FS_DEBUG
64 znode->lnum = lnum;
65 znode->offs = offs;
66 znode->len = len;
67#endif
68
69 err = insert_old_idx_znode(c, znode);
70
71 /* Update the parent */
72 zp = znode->parent;
73 if (zp) {
74 struct ubifs_zbranch *zbr;
75
76 zbr = &zp->zbranch[znode->iip];
77 zbr->lnum = lnum;
78 zbr->offs = offs;
79 zbr->len = len;
80 } else {
81 c->zroot.lnum = lnum;
82 c->zroot.offs = offs;
83 c->zroot.len = len;
84 }
85 c->calc_idx_sz += ALIGN(len, 8);
86
87 atomic_long_dec(&c->dirty_zn_cnt);
88
89 ubifs_assert(ubifs_zn_dirty(znode));
90 ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
91
92 __clear_bit(DIRTY_ZNODE, &znode->flags);
93 __clear_bit(COW_ZNODE, &znode->flags);
94
95 return err;
96}
97
98/**
99 * fill_gap - make index nodes in gaps in dirty index LEBs.
100 * @c: UBIFS file-system description object
101 * @lnum: LEB number that gap appears in
102 * @gap_start: offset of start of gap
103 * @gap_end: offset of end of gap
104 * @dirt: adds dirty space to this
105 *
106 * This function returns the number of index nodes written into the gap.
107 */
108static int fill_gap(struct ubifs_info *c, int lnum, int gap_start, int gap_end,
109 int *dirt)
110{
111 int len, gap_remains, gap_pos, written, pad_len;
112
113 ubifs_assert((gap_start & 7) == 0);
114 ubifs_assert((gap_end & 7) == 0);
115 ubifs_assert(gap_end >= gap_start);
116
117 gap_remains = gap_end - gap_start;
118 if (!gap_remains)
119 return 0;
120 gap_pos = gap_start;
121 written = 0;
122 while (c->enext) {
123 len = ubifs_idx_node_sz(c, c->enext->child_cnt);
124 if (len < gap_remains) {
125 struct ubifs_znode *znode = c->enext;
126 const int alen = ALIGN(len, 8);
127 int err;
128
129 ubifs_assert(alen <= gap_remains);
130 err = make_idx_node(c, c->ileb_buf + gap_pos, znode,
131 lnum, gap_pos, len);
132 if (err)
133 return err;
134 gap_remains -= alen;
135 gap_pos += alen;
136 c->enext = znode->cnext;
137 if (c->enext == c->cnext)
138 c->enext = NULL;
139 written += 1;
140 } else
141 break;
142 }
143 if (gap_end == c->leb_size) {
144 c->ileb_len = ALIGN(gap_pos, c->min_io_size);
145 /* Pad to end of min_io_size */
146 pad_len = c->ileb_len - gap_pos;
147 } else
148 /* Pad to end of gap */
149 pad_len = gap_remains;
150 dbg_gc("LEB %d:%d to %d len %d nodes written %d wasted bytes %d",
151 lnum, gap_start, gap_end, gap_end - gap_start, written, pad_len);
152 ubifs_pad(c, c->ileb_buf + gap_pos, pad_len);
153 *dirt += pad_len;
154 return written;
155}
156
157/**
158 * find_old_idx - find an index node obsoleted since the last commit start.
159 * @c: UBIFS file-system description object
160 * @lnum: LEB number of obsoleted index node
161 * @offs: offset of obsoleted index node
162 *
163 * Returns %1 if found and %0 otherwise.
164 */
165static int find_old_idx(struct ubifs_info *c, int lnum, int offs)
166{
167 struct ubifs_old_idx *o;
168 struct rb_node *p;
169
170 p = c->old_idx.rb_node;
171 while (p) {
172 o = rb_entry(p, struct ubifs_old_idx, rb);
173 if (lnum < o->lnum)
174 p = p->rb_left;
175 else if (lnum > o->lnum)
176 p = p->rb_right;
177 else if (offs < o->offs)
178 p = p->rb_left;
179 else if (offs > o->offs)
180 p = p->rb_right;
181 else
182 return 1;
183 }
184 return 0;
185}
186
187/**
188 * is_idx_node_in_use - determine if an index node can be overwritten.
189 * @c: UBIFS file-system description object
190 * @key: key of index node
191 * @level: index node level
192 * @lnum: LEB number of index node
193 * @offs: offset of index node
194 *
195 * If @key / @lnum / @offs identify an index node that was not part of the old
196 * index, then this function returns %0 (obsolete). Else if the index node was
197 * part of the old index but is now dirty %1 is returned, else if it is clean %2
198 * is returned. A negative error code is returned on failure.
199 */
200static int is_idx_node_in_use(struct ubifs_info *c, union ubifs_key *key,
201 int level, int lnum, int offs)
202{
203 int ret;
204
205 ret = is_idx_node_in_tnc(c, key, level, lnum, offs);
206 if (ret < 0)
207 return ret; /* Error code */
208 if (ret == 0)
209 if (find_old_idx(c, lnum, offs))
210 return 1;
211 return ret;
212}
213
214/**
215 * layout_leb_in_gaps - layout index nodes using in-the-gaps method.
216 * @c: UBIFS file-system description object
217 * @p: return LEB number here
218 *
219 * This function lays out new index nodes for dirty znodes using in-the-gaps
220 * method of TNC commit.
221 * This function merely puts the next znode into the next gap, making no attempt
222 * to try to maximise the number of znodes that fit.
223 * This function returns the number of index nodes written into the gaps, or a
224 * negative error code on failure.
225 */
226static int layout_leb_in_gaps(struct ubifs_info *c, int *p)
227{
228 struct ubifs_scan_leb *sleb;
229 struct ubifs_scan_node *snod;
230 int lnum, dirt = 0, gap_start, gap_end, err, written, tot_written;
231
232 tot_written = 0;
233 /* Get an index LEB with lots of obsolete index nodes */
234 lnum = ubifs_find_dirty_idx_leb(c);
235 if (lnum < 0)
236 /*
237 * There also may be dirt in the index head that could be
238 * filled, however we do not check there at present.
239 */
240 return lnum; /* Error code */
241 *p = lnum;
242 dbg_gc("LEB %d", lnum);
243 /*
244 * Scan the index LEB. We use the generic scan for this even though
245 * it is more comprehensive and less efficient than is needed for this
246 * purpose.
247 */
248 sleb = ubifs_scan(c, lnum, 0, c->ileb_buf);
249 c->ileb_len = 0;
250 if (IS_ERR(sleb))
251 return PTR_ERR(sleb);
252 gap_start = 0;
253 list_for_each_entry(snod, &sleb->nodes, list) {
254 struct ubifs_idx_node *idx;
255 int in_use, level;
256
257 ubifs_assert(snod->type == UBIFS_IDX_NODE);
258 idx = snod->node;
259 key_read(c, ubifs_idx_key(c, idx), &snod->key);
260 level = le16_to_cpu(idx->level);
261 /* Determine if the index node is in use (not obsolete) */
262 in_use = is_idx_node_in_use(c, &snod->key, level, lnum,
263 snod->offs);
264 if (in_use < 0) {
265 ubifs_scan_destroy(sleb);
266 return in_use; /* Error code */
267 }
268 if (in_use) {
269 if (in_use == 1)
270 dirt += ALIGN(snod->len, 8);
271 /*
272 * The obsolete index nodes form gaps that can be
273 * overwritten. This gap has ended because we have
274 * found an index node that is still in use
275 * i.e. not obsolete
276 */
277 gap_end = snod->offs;
278 /* Try to fill gap */
279 written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
280 if (written < 0) {
281 ubifs_scan_destroy(sleb);
282 return written; /* Error code */
283 }
284 tot_written += written;
285 gap_start = ALIGN(snod->offs + snod->len, 8);
286 }
287 }
288 ubifs_scan_destroy(sleb);
289 c->ileb_len = c->leb_size;
290 gap_end = c->leb_size;
291 /* Try to fill gap */
292 written = fill_gap(c, lnum, gap_start, gap_end, &dirt);
293 if (written < 0)
294 return written; /* Error code */
295 tot_written += written;
296 if (tot_written == 0) {
297 struct ubifs_lprops lp;
298
299 dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
300 err = ubifs_read_one_lp(c, lnum, &lp);
301 if (err)
302 return err;
303 if (lp.free == c->leb_size) {
304 /*
305 * We must have snatched this LEB from the idx_gc list
306 * so we need to correct the free and dirty space.
307 */
308 err = ubifs_change_one_lp(c, lnum,
309 c->leb_size - c->ileb_len,
310 dirt, 0, 0, 0);
311 if (err)
312 return err;
313 }
314 return 0;
315 }
316 err = ubifs_change_one_lp(c, lnum, c->leb_size - c->ileb_len, dirt,
317 0, 0, 0);
318 if (err)
319 return err;
320 err = ubifs_leb_change(c, lnum, c->ileb_buf, c->ileb_len,
321 UBI_SHORTTERM);
322 if (err)
323 return err;
324 dbg_gc("LEB %d wrote %d index nodes", lnum, tot_written);
325 return tot_written;
326}
327
328/**
329 * get_leb_cnt - calculate the number of empty LEBs needed to commit.
330 * @c: UBIFS file-system description object
331 * @cnt: number of znodes to commit
332 *
333 * This function returns the number of empty LEBs needed to commit @cnt znodes
334 * to the current index head. The number is not exact and may be more than
335 * needed.
336 */
337static int get_leb_cnt(struct ubifs_info *c, int cnt)
338{
339 int d;
340
341 /* Assume maximum index node size (i.e. overestimate space needed) */
342 cnt -= (c->leb_size - c->ihead_offs) / c->max_idx_node_sz;
343 if (cnt < 0)
344 cnt = 0;
345 d = c->leb_size / c->max_idx_node_sz;
346 return DIV_ROUND_UP(cnt, d);
347}
348
349/**
350 * layout_in_gaps - in-the-gaps method of committing TNC.
351 * @c: UBIFS file-system description object
352 * @cnt: number of dirty znodes to commit.
353 *
354 * This function lays out new index nodes for dirty znodes using in-the-gaps
355 * method of TNC commit.
356 *
357 * This function returns %0 on success and a negative error code on failure.
358 */
359static int layout_in_gaps(struct ubifs_info *c, int cnt)
360{
361 int err, leb_needed_cnt, written, *p;
362
363 dbg_gc("%d znodes to write", cnt);
364
365 c->gap_lebs = kmalloc(sizeof(int) * (c->lst.idx_lebs + 1), GFP_NOFS);
366 if (!c->gap_lebs)
367 return -ENOMEM;
368
369 p = c->gap_lebs;
370 do {
371 ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
372 written = layout_leb_in_gaps(c, p);
373 if (written < 0) {
374 err = written;
375 if (err == -ENOSPC) {
376 if (!dbg_force_in_the_gaps_enabled) {
377 /*
378 * Do not print scary warnings if the
379 * debugging option which forces
380 * in-the-gaps is enabled.
381 */
382 ubifs_err("out of space");
383 spin_lock(&c->space_lock);
384 dbg_dump_budg(c);
385 spin_unlock(&c->space_lock);
386 dbg_dump_lprops(c);
387 }
388 /* Try to commit anyway */
389 err = 0;
390 break;
391 }
392 kfree(c->gap_lebs);
393 c->gap_lebs = NULL;
394 return err;
395 }
396 p++;
397 cnt -= written;
398 leb_needed_cnt = get_leb_cnt(c, cnt);
399 dbg_gc("%d znodes remaining, need %d LEBs, have %d", cnt,
400 leb_needed_cnt, c->ileb_cnt);
401 } while (leb_needed_cnt > c->ileb_cnt);
402
403 *p = -1;
404 return 0;
405}
406
407/**
408 * layout_in_empty_space - layout index nodes in empty space.
409 * @c: UBIFS file-system description object
410 *
411 * This function lays out new index nodes for dirty znodes using empty LEBs.
412 *
413 * This function returns %0 on success and a negative error code on failure.
414 */
415static int layout_in_empty_space(struct ubifs_info *c)
416{
417 struct ubifs_znode *znode, *cnext, *zp;
418 int lnum, offs, len, next_len, buf_len, buf_offs, used, avail;
419 int wlen, blen, err;
420
421 cnext = c->enext;
422 if (!cnext)
423 return 0;
424
425 lnum = c->ihead_lnum;
426 buf_offs = c->ihead_offs;
427
428 buf_len = ubifs_idx_node_sz(c, c->fanout);
429 buf_len = ALIGN(buf_len, c->min_io_size);
430 used = 0;
431 avail = buf_len;
432
433 /* Ensure there is enough room for first write */
434 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
435 if (buf_offs + next_len > c->leb_size)
436 lnum = -1;
437
438 while (1) {
439 znode = cnext;
440
441 len = ubifs_idx_node_sz(c, znode->child_cnt);
442
443 /* Determine the index node position */
444 if (lnum == -1) {
445 if (c->ileb_nxt >= c->ileb_cnt) {
446 ubifs_err("out of space");
447 return -ENOSPC;
448 }
449 lnum = c->ilebs[c->ileb_nxt++];
450 buf_offs = 0;
451 used = 0;
452 avail = buf_len;
453 }
454
455 offs = buf_offs + used;
456
457#ifdef CONFIG_UBIFS_FS_DEBUG
458 znode->lnum = lnum;
459 znode->offs = offs;
460 znode->len = len;
461#endif
462
463 /* Update the parent */
464 zp = znode->parent;
465 if (zp) {
466 struct ubifs_zbranch *zbr;
467 int i;
468
469 i = znode->iip;
470 zbr = &zp->zbranch[i];
471 zbr->lnum = lnum;
472 zbr->offs = offs;
473 zbr->len = len;
474 } else {
475 c->zroot.lnum = lnum;
476 c->zroot.offs = offs;
477 c->zroot.len = len;
478 }
479 c->calc_idx_sz += ALIGN(len, 8);
480
481 /*
482 * Once lprops is updated, we can decrease the dirty znode count
483 * but it is easier to just do it here.
484 */
485 atomic_long_dec(&c->dirty_zn_cnt);
486
487 /*
488 * Calculate the next index node length to see if there is
489 * enough room for it
490 */
491 cnext = znode->cnext;
492 if (cnext == c->cnext)
493 next_len = 0;
494 else
495 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
496
497 if (c->min_io_size == 1) {
498 buf_offs += ALIGN(len, 8);
499 if (next_len) {
500 if (buf_offs + next_len <= c->leb_size)
501 continue;
502 err = ubifs_update_one_lp(c, lnum, 0,
503 c->leb_size - buf_offs, 0, 0);
504 if (err)
505 return err;
506 lnum = -1;
507 continue;
508 }
509 err = ubifs_update_one_lp(c, lnum,
510 c->leb_size - buf_offs, 0, 0, 0);
511 if (err)
512 return err;
513 break;
514 }
515
516 /* Update buffer positions */
517 wlen = used + len;
518 used += ALIGN(len, 8);
519 avail -= ALIGN(len, 8);
520
521 if (next_len != 0 &&
522 buf_offs + used + next_len <= c->leb_size &&
523 avail > 0)
524 continue;
525
526 if (avail <= 0 && next_len &&
527 buf_offs + used + next_len <= c->leb_size)
528 blen = buf_len;
529 else
530 blen = ALIGN(wlen, c->min_io_size);
531
532 /* The buffer is full or there are no more znodes to do */
533 buf_offs += blen;
534 if (next_len) {
535 if (buf_offs + next_len > c->leb_size) {
536 err = ubifs_update_one_lp(c, lnum,
537 c->leb_size - buf_offs, blen - used,
538 0, 0);
539 if (err)
540 return err;
541 lnum = -1;
542 }
543 used -= blen;
544 if (used < 0)
545 used = 0;
546 avail = buf_len - used;
547 continue;
548 }
549 err = ubifs_update_one_lp(c, lnum, c->leb_size - buf_offs,
550 blen - used, 0, 0);
551 if (err)
552 return err;
553 break;
554 }
555
556#ifdef CONFIG_UBIFS_FS_DEBUG
557 c->new_ihead_lnum = lnum;
558 c->new_ihead_offs = buf_offs;
559#endif
560
561 return 0;
562}
563
564/**
565 * layout_commit - determine positions of index nodes to commit.
566 * @c: UBIFS file-system description object
567 * @no_space: indicates that insufficient empty LEBs were allocated
568 * @cnt: number of znodes to commit
569 *
570 * Calculate and update the positions of index nodes to commit. If there were
571 * an insufficient number of empty LEBs allocated, then index nodes are placed
572 * into the gaps created by obsolete index nodes in non-empty index LEBs. For
573 * this purpose, an obsolete index node is one that was not in the index as at
574 * the end of the last commit. To write "in-the-gaps" requires that those index
575 * LEBs are updated atomically in-place.
576 */
577static int layout_commit(struct ubifs_info *c, int no_space, int cnt)
578{
579 int err;
580
581 if (no_space) {
582 err = layout_in_gaps(c, cnt);
583 if (err)
584 return err;
585 }
586 err = layout_in_empty_space(c);
587 return err;
588}
589
590/**
591 * find_first_dirty - find first dirty znode.
592 * @znode: znode to begin searching from
593 */
594static struct ubifs_znode *find_first_dirty(struct ubifs_znode *znode)
595{
596 int i, cont;
597
598 if (!znode)
599 return NULL;
600
601 while (1) {
602 if (znode->level == 0) {
603 if (ubifs_zn_dirty(znode))
604 return znode;
605 return NULL;
606 }
607 cont = 0;
608 for (i = 0; i < znode->child_cnt; i++) {
609 struct ubifs_zbranch *zbr = &znode->zbranch[i];
610
611 if (zbr->znode && ubifs_zn_dirty(zbr->znode)) {
612 znode = zbr->znode;
613 cont = 1;
614 break;
615 }
616 }
617 if (!cont) {
618 if (ubifs_zn_dirty(znode))
619 return znode;
620 return NULL;
621 }
622 }
623}
624
625/**
626 * find_next_dirty - find next dirty znode.
627 * @znode: znode to begin searching from
628 */
629static struct ubifs_znode *find_next_dirty(struct ubifs_znode *znode)
630{
631 int n = znode->iip + 1;
632
633 znode = znode->parent;
634 if (!znode)
635 return NULL;
636 for (; n < znode->child_cnt; n++) {
637 struct ubifs_zbranch *zbr = &znode->zbranch[n];
638
639 if (zbr->znode && ubifs_zn_dirty(zbr->znode))
640 return find_first_dirty(zbr->znode);
641 }
642 return znode;
643}
644
645/**
646 * get_znodes_to_commit - create list of dirty znodes to commit.
647 * @c: UBIFS file-system description object
648 *
649 * This function returns the number of znodes to commit.
650 */
651static int get_znodes_to_commit(struct ubifs_info *c)
652{
653 struct ubifs_znode *znode, *cnext;
654 int cnt = 0;
655
656 c->cnext = find_first_dirty(c->zroot.znode);
657 znode = c->enext = c->cnext;
658 if (!znode) {
659 dbg_cmt("no znodes to commit");
660 return 0;
661 }
662 cnt += 1;
663 while (1) {
664 ubifs_assert(!test_bit(COW_ZNODE, &znode->flags));
665 __set_bit(COW_ZNODE, &znode->flags);
666 znode->alt = 0;
667 cnext = find_next_dirty(znode);
668 if (!cnext) {
669 znode->cnext = c->cnext;
670 break;
671 }
672 znode->cnext = cnext;
673 znode = cnext;
674 cnt += 1;
675 }
676 dbg_cmt("committing %d znodes", cnt);
677 ubifs_assert(cnt == atomic_long_read(&c->dirty_zn_cnt));
678 return cnt;
679}
680
681/**
682 * alloc_idx_lebs - allocate empty LEBs to be used to commit.
683 * @c: UBIFS file-system description object
684 * @cnt: number of znodes to commit
685 *
686 * This function returns %-ENOSPC if it cannot allocate a sufficient number of
687 * empty LEBs. %0 is returned on success, otherwise a negative error code
688 * is returned.
689 */
690static int alloc_idx_lebs(struct ubifs_info *c, int cnt)
691{
692 int i, leb_cnt, lnum;
693
694 c->ileb_cnt = 0;
695 c->ileb_nxt = 0;
696 leb_cnt = get_leb_cnt(c, cnt);
697 dbg_cmt("need about %d empty LEBS for TNC commit", leb_cnt);
698 if (!leb_cnt)
699 return 0;
700 c->ilebs = kmalloc(leb_cnt * sizeof(int), GFP_NOFS);
701 if (!c->ilebs)
702 return -ENOMEM;
703 for (i = 0; i < leb_cnt; i++) {
704 lnum = ubifs_find_free_leb_for_idx(c);
705 if (lnum < 0)
706 return lnum;
707 c->ilebs[c->ileb_cnt++] = lnum;
708 dbg_cmt("LEB %d", lnum);
709 }
710 if (dbg_force_in_the_gaps())
711 return -ENOSPC;
712 return 0;
713}
714
715/**
716 * free_unused_idx_lebs - free unused LEBs that were allocated for the commit.
717 * @c: UBIFS file-system description object
718 *
719 * It is possible that we allocate more empty LEBs for the commit than we need.
720 * This functions frees the surplus.
721 *
722 * This function returns %0 on success and a negative error code on failure.
723 */
724static int free_unused_idx_lebs(struct ubifs_info *c)
725{
726 int i, err = 0, lnum, er;
727
728 for (i = c->ileb_nxt; i < c->ileb_cnt; i++) {
729 lnum = c->ilebs[i];
730 dbg_cmt("LEB %d", lnum);
731 er = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0,
732 LPROPS_INDEX | LPROPS_TAKEN, 0);
733 if (!err)
734 err = er;
735 }
736 return err;
737}
738
739/**
740 * free_idx_lebs - free unused LEBs after commit end.
741 * @c: UBIFS file-system description object
742 *
743 * This function returns %0 on success and a negative error code on failure.
744 */
745static int free_idx_lebs(struct ubifs_info *c)
746{
747 int err;
748
749 err = free_unused_idx_lebs(c);
750 kfree(c->ilebs);
751 c->ilebs = NULL;
752 return err;
753}
754
755/**
756 * ubifs_tnc_start_commit - start TNC commit.
757 * @c: UBIFS file-system description object
758 * @zroot: new index root position is returned here
759 *
760 * This function prepares the list of indexing nodes to commit and lays out
761 * their positions on flash. If there is not enough free space it uses the
762 * in-gap commit method. Returns zero in case of success and a negative error
763 * code in case of failure.
764 */
765int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot)
766{
767 int err = 0, cnt;
768
769 mutex_lock(&c->tnc_mutex);
770 err = dbg_check_tnc(c, 1);
771 if (err)
772 goto out;
773 cnt = get_znodes_to_commit(c);
774 if (cnt != 0) {
775 int no_space = 0;
776
777 err = alloc_idx_lebs(c, cnt);
778 if (err == -ENOSPC)
779 no_space = 1;
780 else if (err)
781 goto out_free;
782 err = layout_commit(c, no_space, cnt);
783 if (err)
784 goto out_free;
785 ubifs_assert(atomic_long_read(&c->dirty_zn_cnt) == 0);
786 err = free_unused_idx_lebs(c);
787 if (err)
788 goto out;
789 }
790 destroy_old_idx(c);
791 memcpy(zroot, &c->zroot, sizeof(struct ubifs_zbranch));
792
793 err = ubifs_save_dirty_idx_lnums(c);
794 if (err)
795 goto out;
796
797 spin_lock(&c->space_lock);
798 /*
799 * Although we have not finished committing yet, update size of the
800 * committed index ('c->old_idx_sz') and zero out the index growth
801 * budget. It is OK to do this now, because we've reserved all the
802 * space which is needed to commit the index, and it is save for the
803 * budgeting subsystem to assume the index is already committed,
804 * even though it is not.
805 */
806 c->old_idx_sz = c->calc_idx_sz;
807 c->budg_uncommitted_idx = 0;
808 spin_unlock(&c->space_lock);
809 mutex_unlock(&c->tnc_mutex);
810
811 dbg_cmt("number of index LEBs %d", c->lst.idx_lebs);
812 dbg_cmt("size of index %llu", c->calc_idx_sz);
813 return err;
814
815out_free:
816 free_idx_lebs(c);
817out:
818 mutex_unlock(&c->tnc_mutex);
819 return err;
820}
821
822/**
823 * write_index - write index nodes.
824 * @c: UBIFS file-system description object
825 *
826 * This function writes the index nodes whose positions were laid out in the
827 * layout_in_empty_space function.
828 */
829static int write_index(struct ubifs_info *c)
830{
831 struct ubifs_idx_node *idx;
832 struct ubifs_znode *znode, *cnext;
833 int i, lnum, offs, len, next_len, buf_len, buf_offs, used;
834 int avail, wlen, err, lnum_pos = 0;
835
836 cnext = c->enext;
837 if (!cnext)
838 return 0;
839
840 /*
841 * Always write index nodes to the index head so that index nodes and
842 * other types of nodes are never mixed in the same erase block.
843 */
844 lnum = c->ihead_lnum;
845 buf_offs = c->ihead_offs;
846
847 /* Allocate commit buffer */
848 buf_len = ALIGN(c->max_idx_node_sz, c->min_io_size);
849 used = 0;
850 avail = buf_len;
851
852 /* Ensure there is enough room for first write */
853 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
854 if (buf_offs + next_len > c->leb_size) {
855 err = ubifs_update_one_lp(c, lnum, LPROPS_NC, 0, 0,
856 LPROPS_TAKEN);
857 if (err)
858 return err;
859 lnum = -1;
860 }
861
862 while (1) {
863 cond_resched();
864
865 znode = cnext;
866 idx = c->cbuf + used;
867
868 /* Make index node */
869 idx->ch.node_type = UBIFS_IDX_NODE;
870 idx->child_cnt = cpu_to_le16(znode->child_cnt);
871 idx->level = cpu_to_le16(znode->level);
872 for (i = 0; i < znode->child_cnt; i++) {
873 struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
874 struct ubifs_zbranch *zbr = &znode->zbranch[i];
875
876 key_write_idx(c, &zbr->key, &br->key);
877 br->lnum = cpu_to_le32(zbr->lnum);
878 br->offs = cpu_to_le32(zbr->offs);
879 br->len = cpu_to_le32(zbr->len);
880 if (!zbr->lnum || !zbr->len) {
881 ubifs_err("bad ref in znode");
882 dbg_dump_znode(c, znode);
883 if (zbr->znode)
884 dbg_dump_znode(c, zbr->znode);
885 }
886 }
887 len = ubifs_idx_node_sz(c, znode->child_cnt);
888 ubifs_prepare_node(c, idx, len, 0);
889
890 /* Determine the index node position */
891 if (lnum == -1) {
892 lnum = c->ilebs[lnum_pos++];
893 buf_offs = 0;
894 used = 0;
895 avail = buf_len;
896 }
897 offs = buf_offs + used;
898
899#ifdef CONFIG_UBIFS_FS_DEBUG
900 if (lnum != znode->lnum || offs != znode->offs ||
901 len != znode->len) {
902 ubifs_err("inconsistent znode posn");
903 return -EINVAL;
904 }
905#endif
906
907 /* Grab some stuff from znode while we still can */
908 cnext = znode->cnext;
909
910 ubifs_assert(ubifs_zn_dirty(znode));
911 ubifs_assert(test_bit(COW_ZNODE, &znode->flags));
912
913 /*
914 * It is important that other threads should see %DIRTY_ZNODE
915 * flag cleared before %COW_ZNODE. Specifically, it matters in
916 * the 'dirty_cow_znode()' function. This is the reason for the
917 * first barrier. Also, we want the bit changes to be seen to
918 * other threads ASAP, to avoid unnecesarry copying, which is
919 * the reason for the second barrier.
920 */
921 clear_bit(DIRTY_ZNODE, &znode->flags);
922 smp_mb__before_clear_bit();
923 clear_bit(COW_ZNODE, &znode->flags);
924 smp_mb__after_clear_bit();
925
926 /* Do not access znode from this point on */
927
928 /* Update buffer positions */
929 wlen = used + len;
930 used += ALIGN(len, 8);
931 avail -= ALIGN(len, 8);
932
933 /*
934 * Calculate the next index node length to see if there is
935 * enough room for it
936 */
937 if (cnext == c->cnext)
938 next_len = 0;
939 else
940 next_len = ubifs_idx_node_sz(c, cnext->child_cnt);
941
942 if (c->min_io_size == 1) {
943 /*
944 * Write the prepared index node immediately if there is
945 * no minimum IO size
946 */
947 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
948 wlen, UBI_SHORTTERM);
949 if (err)
950 return err;
951 buf_offs += ALIGN(wlen, 8);
952 if (next_len) {
953 used = 0;
954 avail = buf_len;
955 if (buf_offs + next_len > c->leb_size) {
956 err = ubifs_update_one_lp(c, lnum,
957 LPROPS_NC, 0, 0, LPROPS_TAKEN);
958 if (err)
959 return err;
960 lnum = -1;
961 }
962 continue;
963 }
964 } else {
965 int blen, nxt_offs = buf_offs + used + next_len;
966
967 if (next_len && nxt_offs <= c->leb_size) {
968 if (avail > 0)
969 continue;
970 else
971 blen = buf_len;
972 } else {
973 wlen = ALIGN(wlen, 8);
974 blen = ALIGN(wlen, c->min_io_size);
975 ubifs_pad(c, c->cbuf + wlen, blen - wlen);
976 }
977 /*
978 * The buffer is full or there are no more znodes
979 * to do
980 */
981 err = ubifs_leb_write(c, lnum, c->cbuf, buf_offs,
982 blen, UBI_SHORTTERM);
983 if (err)
984 return err;
985 buf_offs += blen;
986 if (next_len) {
987 if (nxt_offs > c->leb_size) {
988 err = ubifs_update_one_lp(c, lnum,
989 LPROPS_NC, 0, 0, LPROPS_TAKEN);
990 if (err)
991 return err;
992 lnum = -1;
993 }
994 used -= blen;
995 if (used < 0)
996 used = 0;
997 avail = buf_len - used;
998 memmove(c->cbuf, c->cbuf + blen, used);
999 continue;
1000 }
1001 }
1002 break;
1003 }
1004
1005#ifdef CONFIG_UBIFS_FS_DEBUG
1006 if (lnum != c->new_ihead_lnum || buf_offs != c->new_ihead_offs) {
1007 ubifs_err("inconsistent ihead");
1008 return -EINVAL;
1009 }
1010#endif
1011
1012 c->ihead_lnum = lnum;
1013 c->ihead_offs = buf_offs;
1014
1015 return 0;
1016}
1017
1018/**
1019 * free_obsolete_znodes - free obsolete znodes.
1020 * @c: UBIFS file-system description object
1021 *
1022 * At the end of commit end, obsolete znodes are freed.
1023 */
1024static void free_obsolete_znodes(struct ubifs_info *c)
1025{
1026 struct ubifs_znode *znode, *cnext;
1027
1028 cnext = c->cnext;
1029 do {
1030 znode = cnext;
1031 cnext = znode->cnext;
1032 if (test_bit(OBSOLETE_ZNODE, &znode->flags))
1033 kfree(znode);
1034 else {
1035 znode->cnext = NULL;
1036 atomic_long_inc(&c->clean_zn_cnt);
1037 atomic_long_inc(&ubifs_clean_zn_cnt);
1038 }
1039 } while (cnext != c->cnext);
1040}
1041
1042/**
1043 * return_gap_lebs - return LEBs used by the in-gap commit method.
1044 * @c: UBIFS file-system description object
1045 *
1046 * This function clears the "taken" flag for the LEBs which were used by the
1047 * "commit in-the-gaps" method.
1048 */
1049static int return_gap_lebs(struct ubifs_info *c)
1050{
1051 int *p, err;
1052
1053 if (!c->gap_lebs)
1054 return 0;
1055
1056 dbg_cmt("");
1057 for (p = c->gap_lebs; *p != -1; p++) {
1058 err = ubifs_change_one_lp(c, *p, LPROPS_NC, LPROPS_NC, 0,
1059 LPROPS_TAKEN, 0);
1060 if (err)
1061 return err;
1062 }
1063
1064 kfree(c->gap_lebs);
1065 c->gap_lebs = NULL;
1066 return 0;
1067}
1068
1069/**
1070 * ubifs_tnc_end_commit - update the TNC for commit end.
1071 * @c: UBIFS file-system description object
1072 *
1073 * Write the dirty znodes.
1074 */
1075int ubifs_tnc_end_commit(struct ubifs_info *c)
1076{
1077 int err;
1078
1079 if (!c->cnext)
1080 return 0;
1081
1082 err = return_gap_lebs(c);
1083 if (err)
1084 return err;
1085
1086 err = write_index(c);
1087 if (err)
1088 return err;
1089
1090 mutex_lock(&c->tnc_mutex);
1091
1092 dbg_cmt("TNC height is %d", c->zroot.znode->level + 1);
1093
1094 free_obsolete_znodes(c);
1095
1096 c->cnext = NULL;
1097 kfree(c->ilebs);
1098 c->ilebs = NULL;
1099
1100 mutex_unlock(&c->tnc_mutex);
1101
1102 return 0;
1103}
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
new file mode 100644
index 00000000000..a25c1cc1f8d
--- /dev/null
+++ b/fs/ubifs/tnc_misc.c
@@ -0,0 +1,494 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Adrian Hunter
20 * Artem Bityutskiy (Битюцкий Артём)
21 */
22
23/*
24 * This file contains miscelanious TNC-related functions shared betweend
25 * different files. This file does not form any logically separate TNC
26 * sub-system. The file was created because there is a lot of TNC code and
27 * putting it all in one file would make that file too big and unreadable.
28 */
29
30#include "ubifs.h"
31
32/**
33 * ubifs_tnc_levelorder_next - next TNC tree element in levelorder traversal.
34 * @zr: root of the subtree to traverse
35 * @znode: previous znode
36 *
37 * This function implements levelorder TNC traversal. The LNC is ignored.
38 * Returns the next element or %NULL if @znode is already the last one.
39 */
40struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
41 struct ubifs_znode *znode)
42{
43 int level, iip, level_search = 0;
44 struct ubifs_znode *zn;
45
46 ubifs_assert(zr);
47
48 if (unlikely(!znode))
49 return zr;
50
51 if (unlikely(znode == zr)) {
52 if (znode->level == 0)
53 return NULL;
54 return ubifs_tnc_find_child(zr, 0);
55 }
56
57 level = znode->level;
58
59 iip = znode->iip;
60 while (1) {
61 ubifs_assert(znode->level <= zr->level);
62
63 /*
64 * First walk up until there is a znode with next branch to
65 * look at.
66 */
67 while (znode->parent != zr && iip >= znode->parent->child_cnt) {
68 znode = znode->parent;
69 iip = znode->iip;
70 }
71
72 if (unlikely(znode->parent == zr &&
73 iip >= znode->parent->child_cnt)) {
74 /* This level is done, switch to the lower one */
75 level -= 1;
76 if (level_search || level < 0)
77 /*
78 * We were already looking for znode at lower
79 * level ('level_search'). As we are here
80 * again, it just does not exist. Or all levels
81 * were finished ('level < 0').
82 */
83 return NULL;
84
85 level_search = 1;
86 iip = -1;
87 znode = ubifs_tnc_find_child(zr, 0);
88 ubifs_assert(znode);
89 }
90
91 /* Switch to the next index */
92 zn = ubifs_tnc_find_child(znode->parent, iip + 1);
93 if (!zn) {
94 /* No more children to look at, we have walk up */
95 iip = znode->parent->child_cnt;
96 continue;
97 }
98
99 /* Walk back down to the level we came from ('level') */
100 while (zn->level != level) {
101 znode = zn;
102 zn = ubifs_tnc_find_child(zn, 0);
103 if (!zn) {
104 /*
105 * This path is not too deep so it does not
106 * reach 'level'. Try next path.
107 */
108 iip = znode->iip;
109 break;
110 }
111 }
112
113 if (zn) {
114 ubifs_assert(zn->level >= 0);
115 return zn;
116 }
117 }
118}
119
120/**
121 * ubifs_search_zbranch - search znode branch.
122 * @c: UBIFS file-system description object
123 * @znode: znode to search in
124 * @key: key to search for
125 * @n: znode branch slot number is returned here
126 *
127 * This is a helper function which search branch with key @key in @znode using
128 * binary search. The result of the search may be:
129 * o exact match, then %1 is returned, and the slot number of the branch is
130 * stored in @n;
131 * o no exact match, then %0 is returned and the slot number of the left
132 * closest branch is returned in @n; the slot if all keys in this znode are
133 * greater than @key, then %-1 is returned in @n.
134 */
135int ubifs_search_zbranch(const struct ubifs_info *c,
136 const struct ubifs_znode *znode,
137 const union ubifs_key *key, int *n)
138{
139 int beg = 0, end = znode->child_cnt, uninitialized_var(mid);
140 int uninitialized_var(cmp);
141 const struct ubifs_zbranch *zbr = &znode->zbranch[0];
142
143 ubifs_assert(end > beg);
144
145 while (end > beg) {
146 mid = (beg + end) >> 1;
147 cmp = keys_cmp(c, key, &zbr[mid].key);
148 if (cmp > 0)
149 beg = mid + 1;
150 else if (cmp < 0)
151 end = mid;
152 else {
153 *n = mid;
154 return 1;
155 }
156 }
157
158 *n = end - 1;
159
160 /* The insert point is after *n */
161 ubifs_assert(*n >= -1 && *n < znode->child_cnt);
162 if (*n == -1)
163 ubifs_assert(keys_cmp(c, key, &zbr[0].key) < 0);
164 else
165 ubifs_assert(keys_cmp(c, key, &zbr[*n].key) > 0);
166 if (*n + 1 < znode->child_cnt)
167 ubifs_assert(keys_cmp(c, key, &zbr[*n + 1].key) < 0);
168
169 return 0;
170}
171
172/**
173 * ubifs_tnc_postorder_first - find first znode to do postorder tree traversal.
174 * @znode: znode to start at (root of the sub-tree to traverse)
175 *
176 * Find the lowest leftmost znode in a subtree of the TNC tree. The LNC is
177 * ignored.
178 */
179struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode)
180{
181 if (unlikely(!znode))
182 return NULL;
183
184 while (znode->level > 0) {
185 struct ubifs_znode *child;
186
187 child = ubifs_tnc_find_child(znode, 0);
188 if (!child)
189 return znode;
190 znode = child;
191 }
192
193 return znode;
194}
195
196/**
197 * ubifs_tnc_postorder_next - next TNC tree element in postorder traversal.
198 * @znode: previous znode
199 *
200 * This function implements postorder TNC traversal. The LNC is ignored.
201 * Returns the next element or %NULL if @znode is already the last one.
202 */
203struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode)
204{
205 struct ubifs_znode *zn;
206
207 ubifs_assert(znode);
208 if (unlikely(!znode->parent))
209 return NULL;
210
211 /* Switch to the next index in the parent */
212 zn = ubifs_tnc_find_child(znode->parent, znode->iip + 1);
213 if (!zn)
214 /* This is in fact the last child, return parent */
215 return znode->parent;
216
217 /* Go to the first znode in this new subtree */
218 return ubifs_tnc_postorder_first(zn);
219}
220
221/**
222 * ubifs_destroy_tnc_subtree - destroy all znodes connected to a subtree.
223 * @znode: znode defining subtree to destroy
224 *
225 * This function destroys subtree of the TNC tree. Returns number of clean
226 * znodes in the subtree.
227 */
228long ubifs_destroy_tnc_subtree(struct ubifs_znode *znode)
229{
230 struct ubifs_znode *zn = ubifs_tnc_postorder_first(znode);
231 long clean_freed = 0;
232 int n;
233
234 ubifs_assert(zn);
235 while (1) {
236 for (n = 0; n < zn->child_cnt; n++) {
237 if (!zn->zbranch[n].znode)
238 continue;
239
240 if (zn->level > 0 &&
241 !ubifs_zn_dirty(zn->zbranch[n].znode))
242 clean_freed += 1;
243
244 cond_resched();
245 kfree(zn->zbranch[n].znode);
246 }
247
248 if (zn == znode) {
249 if (!ubifs_zn_dirty(zn))
250 clean_freed += 1;
251 kfree(zn);
252 return clean_freed;
253 }
254
255 zn = ubifs_tnc_postorder_next(zn);
256 }
257}
258
259/**
260 * read_znode - read an indexing node from flash and fill znode.
261 * @c: UBIFS file-system description object
262 * @lnum: LEB of the indexing node to read
263 * @offs: node offset
264 * @len: node length
265 * @znode: znode to read to
266 *
267 * This function reads an indexing node from the flash media and fills znode
268 * with the read data. Returns zero in case of success and a negative error
269 * code in case of failure. The read indexing node is validated and if anything
270 * is wrong with it, this function prints complaint messages and returns
271 * %-EINVAL.
272 */
273static int read_znode(struct ubifs_info *c, int lnum, int offs, int len,
274 struct ubifs_znode *znode)
275{
276 int i, err, type, cmp;
277 struct ubifs_idx_node *idx;
278
279 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS);
280 if (!idx)
281 return -ENOMEM;
282
283 err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs);
284 if (err < 0) {
285 kfree(idx);
286 return err;
287 }
288
289 znode->child_cnt = le16_to_cpu(idx->child_cnt);
290 znode->level = le16_to_cpu(idx->level);
291
292 dbg_tnc("LEB %d:%d, level %d, %d branch",
293 lnum, offs, znode->level, znode->child_cnt);
294
295 if (znode->child_cnt > c->fanout || znode->level > UBIFS_MAX_LEVELS) {
296 dbg_err("current fanout %d, branch count %d",
297 c->fanout, znode->child_cnt);
298 dbg_err("max levels %d, znode level %d",
299 UBIFS_MAX_LEVELS, znode->level);
300 err = 1;
301 goto out_dump;
302 }
303
304 for (i = 0; i < znode->child_cnt; i++) {
305 const struct ubifs_branch *br = ubifs_idx_branch(c, idx, i);
306 struct ubifs_zbranch *zbr = &znode->zbranch[i];
307
308 key_read(c, &br->key, &zbr->key);
309 zbr->lnum = le32_to_cpu(br->lnum);
310 zbr->offs = le32_to_cpu(br->offs);
311 zbr->len = le32_to_cpu(br->len);
312 zbr->znode = NULL;
313
314 /* Validate branch */
315
316 if (zbr->lnum < c->main_first ||
317 zbr->lnum >= c->leb_cnt || zbr->offs < 0 ||
318 zbr->offs + zbr->len > c->leb_size || zbr->offs & 7) {
319 dbg_err("bad branch %d", i);
320 err = 2;
321 goto out_dump;
322 }
323
324 switch (key_type(c, &zbr->key)) {
325 case UBIFS_INO_KEY:
326 case UBIFS_DATA_KEY:
327 case UBIFS_DENT_KEY:
328 case UBIFS_XENT_KEY:
329 break;
330 default:
331 dbg_msg("bad key type at slot %d: %s", i,
332 DBGKEY(&zbr->key));
333 err = 3;
334 goto out_dump;
335 }
336
337 if (znode->level)
338 continue;
339
340 type = key_type(c, &zbr->key);
341 if (c->ranges[type].max_len == 0) {
342 if (zbr->len != c->ranges[type].len) {
343 dbg_err("bad target node (type %d) length (%d)",
344 type, zbr->len);
345 dbg_err("have to be %d", c->ranges[type].len);
346 err = 4;
347 goto out_dump;
348 }
349 } else if (zbr->len < c->ranges[type].min_len ||
350 zbr->len > c->ranges[type].max_len) {
351 dbg_err("bad target node (type %d) length (%d)",
352 type, zbr->len);
353 dbg_err("have to be in range of %d-%d",
354 c->ranges[type].min_len,
355 c->ranges[type].max_len);
356 err = 5;
357 goto out_dump;
358 }
359 }
360
361 /*
362 * Ensure that the next key is greater or equivalent to the
363 * previous one.
364 */
365 for (i = 0; i < znode->child_cnt - 1; i++) {
366 const union ubifs_key *key1, *key2;
367
368 key1 = &znode->zbranch[i].key;
369 key2 = &znode->zbranch[i + 1].key;
370
371 cmp = keys_cmp(c, key1, key2);
372 if (cmp > 0) {
373 dbg_err("bad key order (keys %d and %d)", i, i + 1);
374 err = 6;
375 goto out_dump;
376 } else if (cmp == 0 && !is_hash_key(c, key1)) {
377 /* These can only be keys with colliding hash */
378 dbg_err("keys %d and %d are not hashed but equivalent",
379 i, i + 1);
380 err = 7;
381 goto out_dump;
382 }
383 }
384
385 kfree(idx);
386 return 0;
387
388out_dump:
389 ubifs_err("bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
390 dbg_dump_node(c, idx);
391 kfree(idx);
392 return -EINVAL;
393}
394
395/**
396 * ubifs_load_znode - load znode to TNC cache.
397 * @c: UBIFS file-system description object
398 * @zbr: znode branch
399 * @parent: znode's parent
400 * @iip: index in parent
401 *
402 * This function loads znode pointed to by @zbr into the TNC cache and
403 * returns pointer to it in case of success and a negative error code in case
404 * of failure.
405 */
406struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
407 struct ubifs_zbranch *zbr,
408 struct ubifs_znode *parent, int iip)
409{
410 int err;
411 struct ubifs_znode *znode;
412
413 ubifs_assert(!zbr->znode);
414 /*
415 * A slab cache is not presently used for znodes because the znode size
416 * depends on the fanout which is stored in the superblock.
417 */
418 znode = kzalloc(c->max_znode_sz, GFP_NOFS);
419 if (!znode)
420 return ERR_PTR(-ENOMEM);
421
422 err = read_znode(c, zbr->lnum, zbr->offs, zbr->len, znode);
423 if (err)
424 goto out;
425
426 atomic_long_inc(&c->clean_zn_cnt);
427
428 /*
429 * Increment the global clean znode counter as well. It is OK that
430 * global and per-FS clean znode counters may be inconsistent for some
431 * short time (because we might be preempted at this point), the global
432 * one is only used in shrinker.
433 */
434 atomic_long_inc(&ubifs_clean_zn_cnt);
435
436 zbr->znode = znode;
437 znode->parent = parent;
438 znode->time = get_seconds();
439 znode->iip = iip;
440
441 return znode;
442
443out:
444 kfree(znode);
445 return ERR_PTR(err);
446}
447
448/**
449 * ubifs_tnc_read_node - read a leaf node from the flash media.
450 * @c: UBIFS file-system description object
451 * @zbr: key and position of the node
452 * @node: node is returned here
453 *
454 * This function reads a node defined by @zbr from the flash media. Returns
455 * zero in case of success or a negative negative error code in case of
456 * failure.
457 */
458int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
459 void *node)
460{
461 union ubifs_key key1, *key = &zbr->key;
462 int err, type = key_type(c, key);
463 struct ubifs_wbuf *wbuf;
464
465 /*
466 * 'zbr' has to point to on-flash node. The node may sit in a bud and
467 * may even be in a write buffer, so we have to take care about this.
468 */
469 wbuf = ubifs_get_wbuf(c, zbr->lnum);
470 if (wbuf)
471 err = ubifs_read_node_wbuf(wbuf, node, type, zbr->len,
472 zbr->lnum, zbr->offs);
473 else
474 err = ubifs_read_node(c, node, type, zbr->len, zbr->lnum,
475 zbr->offs);
476
477 if (err) {
478 dbg_tnc("key %s", DBGKEY(key));
479 return err;
480 }
481
482 /* Make sure the key of the read node is correct */
483 key_read(c, key, &key1);
484 if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) {
485 ubifs_err("bad key in node at LEB %d:%d",
486 zbr->lnum, zbr->offs);
487 dbg_tnc("looked for key %s found node's key %s",
488 DBGKEY(key), DBGKEY1(&key1));
489 dbg_dump_node(c, node);
490 return -EINVAL;
491 }
492
493 return 0;
494}
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
new file mode 100644
index 00000000000..0cc7da9bed4
--- /dev/null
+++ b/fs/ubifs/ubifs-media.h
@@ -0,0 +1,745 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file describes UBIFS on-flash format and contains definitions of all the
25 * relevant data structures and constants.
26 *
27 * All UBIFS on-flash objects are stored in the form of nodes. All nodes start
28 * with the UBIFS node magic number and have the same common header. Nodes
29 * always sit at 8-byte aligned positions on the media and node header sizes are
30 * also 8-byte aligned (except for the indexing node and the padding node).
31 */
32
33#ifndef __UBIFS_MEDIA_H__
34#define __UBIFS_MEDIA_H__
35
36/* UBIFS node magic number (must not have the padding byte first or last) */
37#define UBIFS_NODE_MAGIC 0x06101831
38
39/* UBIFS on-flash format version */
40#define UBIFS_FORMAT_VERSION 4
41
42/* Minimum logical eraseblock size in bytes */
43#define UBIFS_MIN_LEB_SZ (15*1024)
44
45/* Initial CRC32 value used when calculating CRC checksums */
46#define UBIFS_CRC32_INIT 0xFFFFFFFFU
47
48/*
49 * UBIFS does not try to compress data if its length is less than the below
50 * constant.
51 */
52#define UBIFS_MIN_COMPR_LEN 128
53
54/* Root inode number */
55#define UBIFS_ROOT_INO 1
56
57/* Lowest inode number used for regular inodes (not UBIFS-only internal ones) */
58#define UBIFS_FIRST_INO 64
59
60/*
61 * Maximum file name and extended attribute length (must be a multiple of 8,
62 * minus 1).
63 */
64#define UBIFS_MAX_NLEN 255
65
66/* Maximum number of data journal heads */
67#define UBIFS_MAX_JHEADS 1
68
69/*
70 * Size of UBIFS data block. Note, UBIFS is not a block oriented file-system,
71 * which means that it does not treat the underlying media as consisting of
72 * blocks like in case of hard drives. Do not be confused. UBIFS block is just
73 * the maximum amount of data which one data node can have or which can be
74 * attached to an inode node.
75 */
76#define UBIFS_BLOCK_SIZE 4096
77#define UBIFS_BLOCK_SHIFT 12
78#define UBIFS_BLOCK_MASK 0x00000FFF
79
80/* UBIFS padding byte pattern (must not be first or last byte of node magic) */
81#define UBIFS_PADDING_BYTE 0xCE
82
83/* Maximum possible key length */
84#define UBIFS_MAX_KEY_LEN 16
85
86/* Key length ("simple" format) */
87#define UBIFS_SK_LEN 8
88
89/* Minimum index tree fanout */
90#define UBIFS_MIN_FANOUT 2
91
92/* Maximum number of levels in UBIFS indexing B-tree */
93#define UBIFS_MAX_LEVELS 512
94
95/* Maximum amount of data attached to an inode in bytes */
96#define UBIFS_MAX_INO_DATA UBIFS_BLOCK_SIZE
97
98/* LEB Properties Tree fanout (must be power of 2) and fanout shift */
99#define UBIFS_LPT_FANOUT 4
100#define UBIFS_LPT_FANOUT_SHIFT 2
101
102/* LEB Properties Tree bit field sizes */
103#define UBIFS_LPT_CRC_BITS 16
104#define UBIFS_LPT_CRC_BYTES 2
105#define UBIFS_LPT_TYPE_BITS 4
106
107/* The key is always at the same position in all keyed nodes */
108#define UBIFS_KEY_OFFSET offsetof(struct ubifs_ino_node, key)
109
110/*
111 * LEB Properties Tree node types.
112 *
113 * UBIFS_LPT_PNODE: LPT leaf node (contains LEB properties)
114 * UBIFS_LPT_NNODE: LPT internal node
115 * UBIFS_LPT_LTAB: LPT's own lprops table
116 * UBIFS_LPT_LSAVE: LPT's save table (big model only)
117 * UBIFS_LPT_NODE_CNT: count of LPT node types
118 * UBIFS_LPT_NOT_A_NODE: all ones (15 for 4 bits) is never a valid node type
119 */
120enum {
121 UBIFS_LPT_PNODE,
122 UBIFS_LPT_NNODE,
123 UBIFS_LPT_LTAB,
124 UBIFS_LPT_LSAVE,
125 UBIFS_LPT_NODE_CNT,
126 UBIFS_LPT_NOT_A_NODE = (1 << UBIFS_LPT_TYPE_BITS) - 1,
127};
128
129/*
130 * UBIFS inode types.
131 *
132 * UBIFS_ITYPE_REG: regular file
133 * UBIFS_ITYPE_DIR: directory
134 * UBIFS_ITYPE_LNK: soft link
135 * UBIFS_ITYPE_BLK: block device node
136 * UBIFS_ITYPE_CHR: character device node
137 * UBIFS_ITYPE_FIFO: fifo
138 * UBIFS_ITYPE_SOCK: socket
139 * UBIFS_ITYPES_CNT: count of supported file types
140 */
141enum {
142 UBIFS_ITYPE_REG,
143 UBIFS_ITYPE_DIR,
144 UBIFS_ITYPE_LNK,
145 UBIFS_ITYPE_BLK,
146 UBIFS_ITYPE_CHR,
147 UBIFS_ITYPE_FIFO,
148 UBIFS_ITYPE_SOCK,
149 UBIFS_ITYPES_CNT,
150};
151
152/*
153 * Supported key hash functions.
154 *
155 * UBIFS_KEY_HASH_R5: R5 hash
156 * UBIFS_KEY_HASH_TEST: test hash which just returns first 4 bytes of the name
157 */
158enum {
159 UBIFS_KEY_HASH_R5,
160 UBIFS_KEY_HASH_TEST,
161};
162
163/*
164 * Supported key formats.
165 *
166 * UBIFS_SIMPLE_KEY_FMT: simple key format
167 */
168enum {
169 UBIFS_SIMPLE_KEY_FMT,
170};
171
172/*
173 * The simple key format uses 29 bits for storing UBIFS block number and hash
174 * value.
175 */
176#define UBIFS_S_KEY_BLOCK_BITS 29
177#define UBIFS_S_KEY_BLOCK_MASK 0x1FFFFFFF
178#define UBIFS_S_KEY_HASH_BITS UBIFS_S_KEY_BLOCK_BITS
179#define UBIFS_S_KEY_HASH_MASK UBIFS_S_KEY_BLOCK_MASK
180
181/*
182 * Key types.
183 *
184 * UBIFS_INO_KEY: inode node key
185 * UBIFS_DATA_KEY: data node key
186 * UBIFS_DENT_KEY: directory entry node key
187 * UBIFS_XENT_KEY: extended attribute entry key
188 * UBIFS_KEY_TYPES_CNT: number of supported key types
189 */
190enum {
191 UBIFS_INO_KEY,
192 UBIFS_DATA_KEY,
193 UBIFS_DENT_KEY,
194 UBIFS_XENT_KEY,
195 UBIFS_KEY_TYPES_CNT,
196};
197
198/* Count of LEBs reserved for the superblock area */
199#define UBIFS_SB_LEBS 1
200/* Count of LEBs reserved for the master area */
201#define UBIFS_MST_LEBS 2
202
203/* First LEB of the superblock area */
204#define UBIFS_SB_LNUM 0
205/* First LEB of the master area */
206#define UBIFS_MST_LNUM (UBIFS_SB_LNUM + UBIFS_SB_LEBS)
207/* First LEB of the log area */
208#define UBIFS_LOG_LNUM (UBIFS_MST_LNUM + UBIFS_MST_LEBS)
209
210/*
211 * The below constants define the absolute minimum values for various UBIFS
212 * media areas. Many of them actually depend of flash geometry and the FS
213 * configuration (number of journal heads, orphan LEBs, etc). This means that
214 * the smallest volume size which can be used for UBIFS cannot be pre-defined
215 * by these constants. The file-system that meets the below limitation will not
216 * necessarily mount. UBIFS does run-time calculations and validates the FS
217 * size.
218 */
219
220/* Minimum number of logical eraseblocks in the log */
221#define UBIFS_MIN_LOG_LEBS 2
222/* Minimum number of bud logical eraseblocks (one for each head) */
223#define UBIFS_MIN_BUD_LEBS 3
224/* Minimum number of journal logical eraseblocks */
225#define UBIFS_MIN_JNL_LEBS (UBIFS_MIN_LOG_LEBS + UBIFS_MIN_BUD_LEBS)
226/* Minimum number of LPT area logical eraseblocks */
227#define UBIFS_MIN_LPT_LEBS 2
228/* Minimum number of orphan area logical eraseblocks */
229#define UBIFS_MIN_ORPH_LEBS 1
230/*
231 * Minimum number of main area logical eraseblocks (buds, 2 for the index, 1
232 * for GC, 1 for deletions, and at least 1 for committed data).
233 */
234#define UBIFS_MIN_MAIN_LEBS (UBIFS_MIN_BUD_LEBS + 5)
235
236/* Minimum number of logical eraseblocks */
237#define UBIFS_MIN_LEB_CNT (UBIFS_SB_LEBS + UBIFS_MST_LEBS + \
238 UBIFS_MIN_LOG_LEBS + UBIFS_MIN_LPT_LEBS + \
239 UBIFS_MIN_ORPH_LEBS + UBIFS_MIN_MAIN_LEBS)
240
241/* Node sizes (N.B. these are guaranteed to be multiples of 8) */
242#define UBIFS_CH_SZ sizeof(struct ubifs_ch)
243#define UBIFS_INO_NODE_SZ sizeof(struct ubifs_ino_node)
244#define UBIFS_DATA_NODE_SZ sizeof(struct ubifs_data_node)
245#define UBIFS_DENT_NODE_SZ sizeof(struct ubifs_dent_node)
246#define UBIFS_TRUN_NODE_SZ sizeof(struct ubifs_trun_node)
247#define UBIFS_PAD_NODE_SZ sizeof(struct ubifs_pad_node)
248#define UBIFS_SB_NODE_SZ sizeof(struct ubifs_sb_node)
249#define UBIFS_MST_NODE_SZ sizeof(struct ubifs_mst_node)
250#define UBIFS_REF_NODE_SZ sizeof(struct ubifs_ref_node)
251#define UBIFS_IDX_NODE_SZ sizeof(struct ubifs_idx_node)
252#define UBIFS_CS_NODE_SZ sizeof(struct ubifs_cs_node)
253#define UBIFS_ORPH_NODE_SZ sizeof(struct ubifs_orph_node)
254/* Extended attribute entry nodes are identical to directory entry nodes */
255#define UBIFS_XENT_NODE_SZ UBIFS_DENT_NODE_SZ
256/* Only this does not have to be multiple of 8 bytes */
257#define UBIFS_BRANCH_SZ sizeof(struct ubifs_branch)
258
259/* Maximum node sizes (N.B. these are guaranteed to be multiples of 8) */
260#define UBIFS_MAX_DATA_NODE_SZ (UBIFS_DATA_NODE_SZ + UBIFS_BLOCK_SIZE)
261#define UBIFS_MAX_INO_NODE_SZ (UBIFS_INO_NODE_SZ + UBIFS_MAX_INO_DATA)
262#define UBIFS_MAX_DENT_NODE_SZ (UBIFS_DENT_NODE_SZ + UBIFS_MAX_NLEN + 1)
263#define UBIFS_MAX_XENT_NODE_SZ UBIFS_MAX_DENT_NODE_SZ
264
265/* The largest UBIFS node */
266#define UBIFS_MAX_NODE_SZ UBIFS_MAX_INO_NODE_SZ
267
268/*
269 * On-flash inode flags.
270 *
271 * UBIFS_COMPR_FL: use compression for this inode
272 * UBIFS_SYNC_FL: I/O on this inode has to be synchronous
273 * UBIFS_IMMUTABLE_FL: inode is immutable
274 * UBIFS_APPEND_FL: writes to the inode may only append data
275 * UBIFS_DIRSYNC_FL: I/O on this directory inode has to be synchronous
276 * UBIFS_XATTR_FL: this inode is the inode for an extended attribute value
277 *
278 * Note, these are on-flash flags which correspond to ioctl flags
279 * (@FS_COMPR_FL, etc). They have the same values now, but generally, do not
280 * have to be the same.
281 */
282enum {
283 UBIFS_COMPR_FL = 0x01,
284 UBIFS_SYNC_FL = 0x02,
285 UBIFS_IMMUTABLE_FL = 0x04,
286 UBIFS_APPEND_FL = 0x08,
287 UBIFS_DIRSYNC_FL = 0x10,
288 UBIFS_XATTR_FL = 0x20,
289};
290
291/* Inode flag bits used by UBIFS */
292#define UBIFS_FL_MASK 0x0000001F
293
294/*
295 * UBIFS compression algorithms.
296 *
297 * UBIFS_COMPR_NONE: no compression
298 * UBIFS_COMPR_LZO: LZO compression
299 * UBIFS_COMPR_ZLIB: ZLIB compression
300 * UBIFS_COMPR_TYPES_CNT: count of supported compression types
301 */
302enum {
303 UBIFS_COMPR_NONE,
304 UBIFS_COMPR_LZO,
305 UBIFS_COMPR_ZLIB,
306 UBIFS_COMPR_TYPES_CNT,
307};
308
309/*
310 * UBIFS node types.
311 *
312 * UBIFS_INO_NODE: inode node
313 * UBIFS_DATA_NODE: data node
314 * UBIFS_DENT_NODE: directory entry node
315 * UBIFS_XENT_NODE: extended attribute node
316 * UBIFS_TRUN_NODE: truncation node
317 * UBIFS_PAD_NODE: padding node
318 * UBIFS_SB_NODE: superblock node
319 * UBIFS_MST_NODE: master node
320 * UBIFS_REF_NODE: LEB reference node
321 * UBIFS_IDX_NODE: index node
322 * UBIFS_CS_NODE: commit start node
323 * UBIFS_ORPH_NODE: orphan node
324 * UBIFS_NODE_TYPES_CNT: count of supported node types
325 *
326 * Note, we index arrays by these numbers, so keep them low and contiguous.
327 * Node type constants for inodes, direntries and so on have to be the same as
328 * corresponding key type constants.
329 */
330enum {
331 UBIFS_INO_NODE,
332 UBIFS_DATA_NODE,
333 UBIFS_DENT_NODE,
334 UBIFS_XENT_NODE,
335 UBIFS_TRUN_NODE,
336 UBIFS_PAD_NODE,
337 UBIFS_SB_NODE,
338 UBIFS_MST_NODE,
339 UBIFS_REF_NODE,
340 UBIFS_IDX_NODE,
341 UBIFS_CS_NODE,
342 UBIFS_ORPH_NODE,
343 UBIFS_NODE_TYPES_CNT,
344};
345
346/*
347 * Master node flags.
348 *
349 * UBIFS_MST_DIRTY: rebooted uncleanly - master node is dirty
350 * UBIFS_MST_NO_ORPHS: no orphan inodes present
351 * UBIFS_MST_RCVRY: written by recovery
352 */
353enum {
354 UBIFS_MST_DIRTY = 1,
355 UBIFS_MST_NO_ORPHS = 2,
356 UBIFS_MST_RCVRY = 4,
357};
358
359/*
360 * Node group type (used by recovery to recover whole group or none).
361 *
362 * UBIFS_NO_NODE_GROUP: this node is not part of a group
363 * UBIFS_IN_NODE_GROUP: this node is a part of a group
364 * UBIFS_LAST_OF_NODE_GROUP: this node is the last in a group
365 */
366enum {
367 UBIFS_NO_NODE_GROUP = 0,
368 UBIFS_IN_NODE_GROUP,
369 UBIFS_LAST_OF_NODE_GROUP,
370};
371
372/*
373 * Superblock flags.
374 *
375 * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set
376 */
377enum {
378 UBIFS_FLG_BIGLPT = 0x02,
379};
380
381/**
382 * struct ubifs_ch - common header node.
383 * @magic: UBIFS node magic number (%UBIFS_NODE_MAGIC)
384 * @crc: CRC-32 checksum of the node header
385 * @sqnum: sequence number
386 * @len: full node length
387 * @node_type: node type
388 * @group_type: node group type
389 * @padding: reserved for future, zeroes
390 *
391 * Every UBIFS node starts with this common part. If the node has a key, the
392 * key always goes next.
393 */
394struct ubifs_ch {
395 __le32 magic;
396 __le32 crc;
397 __le64 sqnum;
398 __le32 len;
399 __u8 node_type;
400 __u8 group_type;
401 __u8 padding[2];
402} __attribute__ ((packed));
403
404/**
405 * union ubifs_dev_desc - device node descriptor.
406 * @new: new type device descriptor
407 * @huge: huge type device descriptor
408 *
409 * This data structure describes major/minor numbers of a device node. In an
410 * inode is a device node then its data contains an object of this type. UBIFS
411 * uses standard Linux "new" and "huge" device node encodings.
412 */
413union ubifs_dev_desc {
414 __le32 new;
415 __le64 huge;
416} __attribute__ ((packed));
417
418/**
419 * struct ubifs_ino_node - inode node.
420 * @ch: common header
421 * @key: node key
422 * @creat_sqnum: sequence number at time of creation
423 * @size: inode size in bytes (amount of uncompressed data)
424 * @atime_sec: access time seconds
425 * @ctime_sec: creation time seconds
426 * @mtime_sec: modification time seconds
427 * @atime_nsec: access time nanoseconds
428 * @ctime_nsec: creation time nanoseconds
429 * @mtime_nsec: modification time nanoseconds
430 * @nlink: number of hard links
431 * @uid: owner ID
432 * @gid: group ID
433 * @mode: access flags
434 * @flags: per-inode flags (%UBIFS_COMPR_FL, %UBIFS_SYNC_FL, etc)
435 * @data_len: inode data length
436 * @xattr_cnt: count of extended attributes this inode has
437 * @xattr_size: summarized size of all extended attributes in bytes
438 * @padding1: reserved for future, zeroes
439 * @xattr_names: sum of lengths of all extended attribute names belonging to
440 * this inode
441 * @compr_type: compression type used for this inode
442 * @padding2: reserved for future, zeroes
443 * @data: data attached to the inode
444 *
445 * Note, even though inode compression type is defined by @compr_type, some
446 * nodes of this inode may be compressed with different compressor - this
447 * happens if compression type is changed while the inode already has data
448 * nodes. But @compr_type will be use for further writes to the inode.
449 *
450 * Note, do not forget to amend 'zero_ino_node_unused()' function when changing
451 * the padding fields.
452 */
453struct ubifs_ino_node {
454 struct ubifs_ch ch;
455 __u8 key[UBIFS_MAX_KEY_LEN];
456 __le64 creat_sqnum;
457 __le64 size;
458 __le64 atime_sec;
459 __le64 ctime_sec;
460 __le64 mtime_sec;
461 __le32 atime_nsec;
462 __le32 ctime_nsec;
463 __le32 mtime_nsec;
464 __le32 nlink;
465 __le32 uid;
466 __le32 gid;
467 __le32 mode;
468 __le32 flags;
469 __le32 data_len;
470 __le32 xattr_cnt;
471 __le32 xattr_size;
472 __u8 padding1[4]; /* Watch 'zero_ino_node_unused()' if changing! */
473 __le32 xattr_names;
474 __le16 compr_type;
475 __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */
476 __u8 data[];
477} __attribute__ ((packed));
478
479/**
480 * struct ubifs_dent_node - directory entry node.
481 * @ch: common header
482 * @key: node key
483 * @inum: target inode number
484 * @padding1: reserved for future, zeroes
485 * @type: type of the target inode (%UBIFS_ITYPE_REG, %UBIFS_ITYPE_DIR, etc)
486 * @nlen: name length
487 * @padding2: reserved for future, zeroes
488 * @name: zero-terminated name
489 *
490 * Note, do not forget to amend 'zero_dent_node_unused()' function when
491 * changing the padding fields.
492 */
493struct ubifs_dent_node {
494 struct ubifs_ch ch;
495 __u8 key[UBIFS_MAX_KEY_LEN];
496 __le64 inum;
497 __u8 padding1;
498 __u8 type;
499 __le16 nlen;
500 __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */
501 __u8 name[];
502} __attribute__ ((packed));
503
504/**
505 * struct ubifs_data_node - data node.
506 * @ch: common header
507 * @key: node key
508 * @size: uncompressed data size in bytes
509 * @compr_type: compression type (%UBIFS_COMPR_NONE, %UBIFS_COMPR_LZO, etc)
510 * @padding: reserved for future, zeroes
511 * @data: data
512 *
513 * Note, do not forget to amend 'zero_data_node_unused()' function when
514 * changing the padding fields.
515 */
516struct ubifs_data_node {
517 struct ubifs_ch ch;
518 __u8 key[UBIFS_MAX_KEY_LEN];
519 __le32 size;
520 __le16 compr_type;
521 __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */
522 __u8 data[];
523} __attribute__ ((packed));
524
525/**
526 * struct ubifs_trun_node - truncation node.
527 * @ch: common header
528 * @inum: truncated inode number
529 * @padding: reserved for future, zeroes
530 * @old_size: size before truncation
531 * @new_size: size after truncation
532 *
533 * This node exists only in the journal and never goes to the main area. Note,
534 * do not forget to amend 'zero_trun_node_unused()' function when changing the
535 * padding fields.
536 */
537struct ubifs_trun_node {
538 struct ubifs_ch ch;
539 __le32 inum;
540 __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */
541 __le64 old_size;
542 __le64 new_size;
543} __attribute__ ((packed));
544
545/**
546 * struct ubifs_pad_node - padding node.
547 * @ch: common header
548 * @pad_len: how many bytes after this node are unused (because padded)
549 * @padding: reserved for future, zeroes
550 */
551struct ubifs_pad_node {
552 struct ubifs_ch ch;
553 __le32 pad_len;
554} __attribute__ ((packed));
555
556/**
557 * struct ubifs_sb_node - superblock node.
558 * @ch: common header
559 * @padding: reserved for future, zeroes
560 * @key_hash: type of hash function used in keys
561 * @key_fmt: format of the key
562 * @flags: file-system flags (%UBIFS_FLG_BIGLPT, etc)
563 * @min_io_size: minimal input/output unit size
564 * @leb_size: logical eraseblock size in bytes
565 * @leb_cnt: count of LEBs used by file-system
566 * @max_leb_cnt: maximum count of LEBs used by file-system
567 * @max_bud_bytes: maximum amount of data stored in buds
568 * @log_lebs: log size in logical eraseblocks
569 * @lpt_lebs: number of LEBs used for lprops table
570 * @orph_lebs: number of LEBs used for recording orphans
571 * @jhead_cnt: count of journal heads
572 * @fanout: tree fanout (max. number of links per indexing node)
573 * @lsave_cnt: number of LEB numbers in LPT's save table
574 * @fmt_version: UBIFS on-flash format version
575 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
576 * @padding1: reserved for future, zeroes
577 * @rp_uid: reserve pool UID
578 * @rp_gid: reserve pool GID
579 * @rp_size: size of the reserved pool in bytes
580 * @padding2: reserved for future, zeroes
581 * @time_gran: time granularity in nanoseconds
582 * @uuid: UUID generated when the file system image was created
583 */
584struct ubifs_sb_node {
585 struct ubifs_ch ch;
586 __u8 padding[2];
587 __u8 key_hash;
588 __u8 key_fmt;
589 __le32 flags;
590 __le32 min_io_size;
591 __le32 leb_size;
592 __le32 leb_cnt;
593 __le32 max_leb_cnt;
594 __le64 max_bud_bytes;
595 __le32 log_lebs;
596 __le32 lpt_lebs;
597 __le32 orph_lebs;
598 __le32 jhead_cnt;
599 __le32 fanout;
600 __le32 lsave_cnt;
601 __le32 fmt_version;
602 __le16 default_compr;
603 __u8 padding1[2];
604 __le32 rp_uid;
605 __le32 rp_gid;
606 __le64 rp_size;
607 __le32 time_gran;
608 __u8 uuid[16];
609 __u8 padding2[3972];
610} __attribute__ ((packed));
611
612/**
613 * struct ubifs_mst_node - master node.
614 * @ch: common header
615 * @highest_inum: highest inode number in the committed index
616 * @cmt_no: commit number
617 * @flags: various flags (%UBIFS_MST_DIRTY, etc)
618 * @log_lnum: start of the log
619 * @root_lnum: LEB number of the root indexing node
620 * @root_offs: offset within @root_lnum
621 * @root_len: root indexing node length
622 * @gc_lnum: LEB reserved for garbage collection (%-1 value means the LEB was
623 * not reserved and should be reserved on mount)
624 * @ihead_lnum: LEB number of index head
625 * @ihead_offs: offset of index head
626 * @index_size: size of index on flash
627 * @total_free: total free space in bytes
628 * @total_dirty: total dirty space in bytes
629 * @total_used: total used space in bytes (includes only data LEBs)
630 * @total_dead: total dead space in bytes (includes only data LEBs)
631 * @total_dark: total dark space in bytes (includes only data LEBs)
632 * @lpt_lnum: LEB number of LPT root nnode
633 * @lpt_offs: offset of LPT root nnode
634 * @nhead_lnum: LEB number of LPT head
635 * @nhead_offs: offset of LPT head
636 * @ltab_lnum: LEB number of LPT's own lprops table
637 * @ltab_offs: offset of LPT's own lprops table
638 * @lsave_lnum: LEB number of LPT's save table (big model only)
639 * @lsave_offs: offset of LPT's save table (big model only)
640 * @lscan_lnum: LEB number of last LPT scan
641 * @empty_lebs: number of empty logical eraseblocks
642 * @idx_lebs: number of indexing logical eraseblocks
643 * @leb_cnt: count of LEBs used by file-system
644 * @padding: reserved for future, zeroes
645 */
646struct ubifs_mst_node {
647 struct ubifs_ch ch;
648 __le64 highest_inum;
649 __le64 cmt_no;
650 __le32 flags;
651 __le32 log_lnum;
652 __le32 root_lnum;
653 __le32 root_offs;
654 __le32 root_len;
655 __le32 gc_lnum;
656 __le32 ihead_lnum;
657 __le32 ihead_offs;
658 __le64 index_size;
659 __le64 total_free;
660 __le64 total_dirty;
661 __le64 total_used;
662 __le64 total_dead;
663 __le64 total_dark;
664 __le32 lpt_lnum;
665 __le32 lpt_offs;
666 __le32 nhead_lnum;
667 __le32 nhead_offs;
668 __le32 ltab_lnum;
669 __le32 ltab_offs;
670 __le32 lsave_lnum;
671 __le32 lsave_offs;
672 __le32 lscan_lnum;
673 __le32 empty_lebs;
674 __le32 idx_lebs;
675 __le32 leb_cnt;
676 __u8 padding[344];
677} __attribute__ ((packed));
678
679/**
680 * struct ubifs_ref_node - logical eraseblock reference node.
681 * @ch: common header
682 * @lnum: the referred logical eraseblock number
683 * @offs: start offset in the referred LEB
684 * @jhead: journal head number
685 * @padding: reserved for future, zeroes
686 */
687struct ubifs_ref_node {
688 struct ubifs_ch ch;
689 __le32 lnum;
690 __le32 offs;
691 __le32 jhead;
692 __u8 padding[28];
693} __attribute__ ((packed));
694
695/**
696 * struct ubifs_branch - key/reference/length branch
697 * @lnum: LEB number of the target node
698 * @offs: offset within @lnum
699 * @len: target node length
700 * @key: key
701 */
702struct ubifs_branch {
703 __le32 lnum;
704 __le32 offs;
705 __le32 len;
706 __u8 key[];
707} __attribute__ ((packed));
708
709/**
710 * struct ubifs_idx_node - indexing node.
711 * @ch: common header
712 * @child_cnt: number of child index nodes
713 * @level: tree level
714 * @branches: LEB number / offset / length / key branches
715 */
716struct ubifs_idx_node {
717 struct ubifs_ch ch;
718 __le16 child_cnt;
719 __le16 level;
720 __u8 branches[];
721} __attribute__ ((packed));
722
723/**
724 * struct ubifs_cs_node - commit start node.
725 * @ch: common header
726 * @cmt_no: commit number
727 */
728struct ubifs_cs_node {
729 struct ubifs_ch ch;
730 __le64 cmt_no;
731} __attribute__ ((packed));
732
733/**
734 * struct ubifs_orph_node - orphan node.
735 * @ch: common header
736 * @cmt_no: commit number (also top bit is set on the last node of the commit)
737 * @inos: inode numbers of orphans
738 */
739struct ubifs_orph_node {
740 struct ubifs_ch ch;
741 __le64 cmt_no;
742 __le64 inos[];
743} __attribute__ ((packed));
744
745#endif /* __UBIFS_MEDIA_H__ */
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
new file mode 100644
index 00000000000..e4f89f27182
--- /dev/null
+++ b/fs/ubifs/ubifs.h
@@ -0,0 +1,1649 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/* Implementation version 0.7 */
24
25#ifndef __UBIFS_H__
26#define __UBIFS_H__
27
28#include <asm/div64.h>
29#include <linux/statfs.h>
30#include <linux/fs.h>
31#include <linux/err.h>
32#include <linux/sched.h>
33#include <linux/vmalloc.h>
34#include <linux/spinlock.h>
35#include <linux/mutex.h>
36#include <linux/rwsem.h>
37#include <linux/mtd/ubi.h>
38#include <linux/pagemap.h>
39#include <linux/backing-dev.h>
40#include "ubifs-media.h"
41
42/* Version of this UBIFS implementation */
43#define UBIFS_VERSION 1
44
45/* Normal UBIFS messages */
46#define ubifs_msg(fmt, ...) \
47 printk(KERN_NOTICE "UBIFS: " fmt "\n", ##__VA_ARGS__)
48/* UBIFS error messages */
49#define ubifs_err(fmt, ...) \
50 printk(KERN_ERR "UBIFS error (pid %d): %s: " fmt "\n", current->pid, \
51 __func__, ##__VA_ARGS__)
52/* UBIFS warning messages */
53#define ubifs_warn(fmt, ...) \
54 printk(KERN_WARNING "UBIFS warning (pid %d): %s: " fmt "\n", \
55 current->pid, __func__, ##__VA_ARGS__)
56
57/* UBIFS file system VFS magic number */
58#define UBIFS_SUPER_MAGIC 0x24051905
59
60/* Number of UBIFS blocks per VFS page */
61#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE)
62#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT)
63
64/* "File system end of life" sequence number watermark */
65#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
66#define SQNUM_WATERMARK 0xFFFFFFFFFF000000ULL
67
68/* Minimum amount of data UBIFS writes to the flash */
69#define MIN_WRITE_SZ (UBIFS_DATA_NODE_SZ + 8)
70
71/*
72 * Currently we do not support inode number overlapping and re-using, so this
73 * watermark defines dangerous inode number level. This should be fixed later,
74 * although it is difficult to exceed current limit. Another option is to use
75 * 64-bit inode numbers, but this means more overhead.
76 */
77#define INUM_WARN_WATERMARK 0xFFF00000
78#define INUM_WATERMARK 0xFFFFFF00
79
80/* Largest key size supported in this implementation */
81#define CUR_MAX_KEY_LEN UBIFS_SK_LEN
82
83/* Maximum number of entries in each LPT (LEB category) heap */
84#define LPT_HEAP_SZ 256
85
86/*
87 * Background thread name pattern. The numbers are UBI device and volume
88 * numbers.
89 */
90#define BGT_NAME_PATTERN "ubifs_bgt%d_%d"
91
92/* Default write-buffer synchronization timeout (5 secs) */
93#define DEFAULT_WBUF_TIMEOUT (5 * HZ)
94
95/* Maximum possible inode number (only 32-bit inodes are supported now) */
96#define MAX_INUM 0xFFFFFFFF
97
98/* Number of non-data journal heads */
99#define NONDATA_JHEADS_CNT 2
100
101/* Garbage collector head */
102#define GCHD 0
103/* Base journal head number */
104#define BASEHD 1
105/* First "general purpose" journal head */
106#define DATAHD 2
107
108/* 'No change' value for 'ubifs_change_lp()' */
109#define LPROPS_NC 0x80000001
110
111/*
112 * There is no notion of truncation key because truncation nodes do not exist
113 * in TNC. However, when replaying, it is handy to introduce fake "truncation"
114 * keys for truncation nodes because the code becomes simpler. So we define
115 * %UBIFS_TRUN_KEY type.
116 */
117#define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT
118
119/*
120 * How much a directory entry/extended attribute entry adds to the parent/host
121 * inode.
122 */
123#define CALC_DENT_SIZE(name_len) ALIGN(UBIFS_DENT_NODE_SZ + (name_len) + 1, 8)
124
125/* How much an extended attribute adds to the host inode */
126#define CALC_XATTR_BYTES(data_len) ALIGN(UBIFS_INO_NODE_SZ + (data_len) + 1, 8)
127
128/*
129 * Znodes which were not touched for 'OLD_ZNODE_AGE' seconds are considered
130 * "old", and znode which were touched last 'YOUNG_ZNODE_AGE' seconds ago are
131 * considered "young". This is used by shrinker when selecting znode to trim
132 * off.
133 */
134#define OLD_ZNODE_AGE 20
135#define YOUNG_ZNODE_AGE 5
136
137/*
138 * Some compressors, like LZO, may end up with more data then the input buffer.
139 * So UBIFS always allocates larger output buffer, to be sure the compressor
140 * will not corrupt memory in case of worst case compression.
141 */
142#define WORST_COMPR_FACTOR 2
143
144/* Maximum expected tree height for use by bottom_up_buf */
145#define BOTTOM_UP_HEIGHT 64
146
147/*
148 * Lockdep classes for UBIFS inode @ui_mutex.
149 */
150enum {
151 WB_MUTEX_1 = 0,
152 WB_MUTEX_2 = 1,
153 WB_MUTEX_3 = 2,
154};
155
156/*
157 * Znode flags (actually, bit numbers which store the flags).
158 *
159 * DIRTY_ZNODE: znode is dirty
160 * COW_ZNODE: znode is being committed and a new instance of this znode has to
161 * be created before changing this znode
162 * OBSOLETE_ZNODE: znode is obsolete, which means it was deleted, but it is
163 * still in the commit list and the ongoing commit operation
164 * will commit it, and delete this znode after it is done
165 */
166enum {
167 DIRTY_ZNODE = 0,
168 COW_ZNODE = 1,
169 OBSOLETE_ZNODE = 2,
170};
171
172/*
173 * Commit states.
174 *
175 * COMMIT_RESTING: commit is not wanted
176 * COMMIT_BACKGROUND: background commit has been requested
177 * COMMIT_REQUIRED: commit is required
178 * COMMIT_RUNNING_BACKGROUND: background commit is running
179 * COMMIT_RUNNING_REQUIRED: commit is running and it is required
180 * COMMIT_BROKEN: commit failed
181 */
182enum {
183 COMMIT_RESTING = 0,
184 COMMIT_BACKGROUND,
185 COMMIT_REQUIRED,
186 COMMIT_RUNNING_BACKGROUND,
187 COMMIT_RUNNING_REQUIRED,
188 COMMIT_BROKEN,
189};
190
191/*
192 * 'ubifs_scan_a_node()' return values.
193 *
194 * SCANNED_GARBAGE: scanned garbage
195 * SCANNED_EMPTY_SPACE: scanned empty space
196 * SCANNED_A_NODE: scanned a valid node
197 * SCANNED_A_CORRUPT_NODE: scanned a corrupted node
198 * SCANNED_A_BAD_PAD_NODE: scanned a padding node with invalid pad length
199 *
200 * Greater than zero means: 'scanned that number of padding bytes'
201 */
202enum {
203 SCANNED_GARBAGE = 0,
204 SCANNED_EMPTY_SPACE = -1,
205 SCANNED_A_NODE = -2,
206 SCANNED_A_CORRUPT_NODE = -3,
207 SCANNED_A_BAD_PAD_NODE = -4,
208};
209
210/*
211 * LPT cnode flag bits.
212 *
213 * DIRTY_CNODE: cnode is dirty
214 * COW_CNODE: cnode is being committed and must be copied before writing
215 * OBSOLETE_CNODE: cnode is being committed and has been copied (or deleted),
216 * so it can (and must) be freed when the commit is finished
217 */
218enum {
219 DIRTY_CNODE = 0,
220 COW_CNODE = 1,
221 OBSOLETE_CNODE = 2,
222};
223
224/*
225 * Dirty flag bits (lpt_drty_flgs) for LPT special nodes.
226 *
227 * LTAB_DIRTY: ltab node is dirty
228 * LSAVE_DIRTY: lsave node is dirty
229 */
230enum {
231 LTAB_DIRTY = 1,
232 LSAVE_DIRTY = 2,
233};
234
235/*
236 * Return codes used by the garbage collector.
237 * @LEB_FREED: the logical eraseblock was freed and is ready to use
238 * @LEB_FREED_IDX: indexing LEB was freed and can be used only after the commit
239 * @LEB_RETAINED: the logical eraseblock was freed and retained for GC purposes
240 */
241enum {
242 LEB_FREED,
243 LEB_FREED_IDX,
244 LEB_RETAINED,
245};
246
247/**
248 * struct ubifs_old_idx - index node obsoleted since last commit start.
249 * @rb: rb-tree node
250 * @lnum: LEB number of obsoleted index node
251 * @offs: offset of obsoleted index node
252 */
253struct ubifs_old_idx {
254 struct rb_node rb;
255 int lnum;
256 int offs;
257};
258
259/* The below union makes it easier to deal with keys */
260union ubifs_key {
261 uint8_t u8[CUR_MAX_KEY_LEN];
262 uint32_t u32[CUR_MAX_KEY_LEN/4];
263 uint64_t u64[CUR_MAX_KEY_LEN/8];
264 __le32 j32[CUR_MAX_KEY_LEN/4];
265};
266
267/**
268 * struct ubifs_scan_node - UBIFS scanned node information.
269 * @list: list of scanned nodes
270 * @key: key of node scanned (if it has one)
271 * @sqnum: sequence number
272 * @type: type of node scanned
273 * @offs: offset with LEB of node scanned
274 * @len: length of node scanned
275 * @node: raw node
276 */
277struct ubifs_scan_node {
278 struct list_head list;
279 union ubifs_key key;
280 unsigned long long sqnum;
281 int type;
282 int offs;
283 int len;
284 void *node;
285};
286
287/**
288 * struct ubifs_scan_leb - UBIFS scanned LEB information.
289 * @lnum: logical eraseblock number
290 * @nodes_cnt: number of nodes scanned
291 * @nodes: list of struct ubifs_scan_node
292 * @endpt: end point (and therefore the start of empty space)
293 * @ecc: read returned -EBADMSG
294 * @buf: buffer containing entire LEB scanned
295 */
296struct ubifs_scan_leb {
297 int lnum;
298 int nodes_cnt;
299 struct list_head nodes;
300 int endpt;
301 int ecc;
302 void *buf;
303};
304
305/**
306 * struct ubifs_gced_idx_leb - garbage-collected indexing LEB.
307 * @list: list
308 * @lnum: LEB number
309 * @unmap: OK to unmap this LEB
310 *
311 * This data structure is used to temporary store garbage-collected indexing
312 * LEBs - they are not released immediately, but only after the next commit.
313 * This is needed to guarantee recoverability.
314 */
315struct ubifs_gced_idx_leb {
316 struct list_head list;
317 int lnum;
318 int unmap;
319};
320
321/**
322 * struct ubifs_inode - UBIFS in-memory inode description.
323 * @vfs_inode: VFS inode description object
324 * @creat_sqnum: sequence number at time of creation
325 * @xattr_size: summarized size of all extended attributes in bytes
326 * @xattr_cnt: count of extended attributes this inode has
327 * @xattr_names: sum of lengths of all extended attribute names belonging to
328 * this inode
329 * @dirty: non-zero if the inode is dirty
330 * @xattr: non-zero if this is an extended attribute inode
331 * @ui_mutex: serializes inode write-back with the rest of VFS operations,
332 * serializes "clean <-> dirty" state changes, protects @dirty,
333 * @ui_size, and @xattr_size
334 * @ui_lock: protects @synced_i_size
335 * @synced_i_size: synchronized size of inode, i.e. the value of inode size
336 * currently stored on the flash; used only for regular file
337 * inodes
338 * @ui_size: inode size used by UBIFS when writing to flash
339 * @flags: inode flags (@UBIFS_COMPR_FL, etc)
340 * @compr_type: default compression type used for this inode
341 * @data_len: length of the data attached to the inode
342 * @data: inode's data
343 *
344 * @ui_mutex exists for two main reasons. At first it prevents inodes from
345 * being written back while UBIFS changing them, being in the middle of an VFS
346 * operation. This way UBIFS makes sure the inode fields are consistent. For
347 * example, in 'ubifs_rename()' we change 3 inodes simultaneously, and
348 * write-back must not write any of them before we have finished.
349 *
350 * The second reason is budgeting - UBIFS has to budget all operations. If an
351 * operation is going to mark an inode dirty, it has to allocate budget for
352 * this. It cannot just mark it dirty because there is no guarantee there will
353 * be enough flash space to write the inode back later. This means UBIFS has
354 * to have full control over inode "clean <-> dirty" transitions (and pages
355 * actually). But unfortunately, VFS marks inodes dirty in many places, and it
356 * does not ask the file-system if it is allowed to do so (there is a notifier,
357 * but it is not enough), i.e., there is no mechanism to synchronize with this.
358 * So UBIFS has its own inode dirty flag and its own mutex to serialize
359 * "clean <-> dirty" transitions.
360 *
361 * The @synced_i_size field is used to make sure we never write pages which are
362 * beyond last synchronized inode size. See 'ubifs_writepage()' for more
363 * information.
364 *
365 * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses
366 * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot
367 * make sure @inode->i_size is always changed under @ui_mutex, because it
368 * cannot call 'vmtruncate()' with @ui_mutex locked, because it would deadlock
369 * with 'ubifs_writepage()' (see file.c). All the other inode fields are
370 * changed under @ui_mutex, so they do not need "shadow" fields. Note, one
371 * could consider to rework locking and base it on "shadow" fields.
372 */
373struct ubifs_inode {
374 struct inode vfs_inode;
375 unsigned long long creat_sqnum;
376 unsigned int xattr_size;
377 unsigned int xattr_cnt;
378 unsigned int xattr_names;
379 unsigned int dirty:1;
380 unsigned int xattr:1;
381 struct mutex ui_mutex;
382 spinlock_t ui_lock;
383 loff_t synced_i_size;
384 loff_t ui_size;
385 int flags;
386 int compr_type;
387 int data_len;
388 void *data;
389};
390
391/**
392 * struct ubifs_unclean_leb - records a LEB recovered under read-only mode.
393 * @list: list
394 * @lnum: LEB number of recovered LEB
395 * @endpt: offset where recovery ended
396 *
397 * This structure records a LEB identified during recovery that needs to be
398 * cleaned but was not because UBIFS was mounted read-only. The information
399 * is used to clean the LEB when remounting to read-write mode.
400 */
401struct ubifs_unclean_leb {
402 struct list_head list;
403 int lnum;
404 int endpt;
405};
406
407/*
408 * LEB properties flags.
409 *
410 * LPROPS_UNCAT: not categorized
411 * LPROPS_DIRTY: dirty > 0, not index
412 * LPROPS_DIRTY_IDX: dirty + free > UBIFS_CH_SZ and index
413 * LPROPS_FREE: free > 0, not empty, not index
414 * LPROPS_HEAP_CNT: number of heaps used for storing categorized LEBs
415 * LPROPS_EMPTY: LEB is empty, not taken
416 * LPROPS_FREEABLE: free + dirty == leb_size, not index, not taken
417 * LPROPS_FRDI_IDX: free + dirty == leb_size and index, may be taken
418 * LPROPS_CAT_MASK: mask for the LEB categories above
419 * LPROPS_TAKEN: LEB was taken (this flag is not saved on the media)
420 * LPROPS_INDEX: LEB contains indexing nodes (this flag also exists on flash)
421 */
422enum {
423 LPROPS_UNCAT = 0,
424 LPROPS_DIRTY = 1,
425 LPROPS_DIRTY_IDX = 2,
426 LPROPS_FREE = 3,
427 LPROPS_HEAP_CNT = 3,
428 LPROPS_EMPTY = 4,
429 LPROPS_FREEABLE = 5,
430 LPROPS_FRDI_IDX = 6,
431 LPROPS_CAT_MASK = 15,
432 LPROPS_TAKEN = 16,
433 LPROPS_INDEX = 32,
434};
435
436/**
437 * struct ubifs_lprops - logical eraseblock properties.
438 * @free: amount of free space in bytes
439 * @dirty: amount of dirty space in bytes
440 * @flags: LEB properties flags (see above)
441 * @lnum: LEB number
442 * @list: list of same-category lprops (for LPROPS_EMPTY and LPROPS_FREEABLE)
443 * @hpos: heap position in heap of same-category lprops (other categories)
444 */
445struct ubifs_lprops {
446 int free;
447 int dirty;
448 int flags;
449 int lnum;
450 union {
451 struct list_head list;
452 int hpos;
453 };
454};
455
456/**
457 * struct ubifs_lpt_lprops - LPT logical eraseblock properties.
458 * @free: amount of free space in bytes
459 * @dirty: amount of dirty space in bytes
460 * @tgc: trivial GC flag (1 => unmap after commit end)
461 * @cmt: commit flag (1 => reserved for commit)
462 */
463struct ubifs_lpt_lprops {
464 int free;
465 int dirty;
466 unsigned tgc : 1;
467 unsigned cmt : 1;
468};
469
470/**
471 * struct ubifs_lp_stats - statistics of eraseblocks in the main area.
472 * @empty_lebs: number of empty LEBs
473 * @taken_empty_lebs: number of taken LEBs
474 * @idx_lebs: number of indexing LEBs
475 * @total_free: total free space in bytes
476 * @total_dirty: total dirty space in bytes
477 * @total_used: total used space in bytes (includes only data LEBs)
478 * @total_dead: total dead space in bytes (includes only data LEBs)
479 * @total_dark: total dark space in bytes (includes only data LEBs)
480 *
481 * N.B. total_dirty and total_used are different to other total_* fields,
482 * because they account _all_ LEBs, not just data LEBs.
483 *
484 * 'taken_empty_lebs' counts the LEBs that are in the transient state of having
485 * been 'taken' for use but not yet written to. 'taken_empty_lebs' is needed
486 * to account correctly for gc_lnum, otherwise 'empty_lebs' could be used
487 * by itself (in which case 'unused_lebs' would be a better name). In the case
488 * of gc_lnum, it is 'taken' at mount time or whenever a LEB is retained by GC,
489 * but unlike other empty LEBs that are 'taken', it may not be written straight
490 * away (i.e. before the next commit start or unmount), so either gc_lnum must
491 * be specially accounted for, or the current approach followed i.e. count it
492 * under 'taken_empty_lebs'.
493 */
494struct ubifs_lp_stats {
495 int empty_lebs;
496 int taken_empty_lebs;
497 int idx_lebs;
498 long long total_free;
499 long long total_dirty;
500 long long total_used;
501 long long total_dead;
502 long long total_dark;
503};
504
505struct ubifs_nnode;
506
507/**
508 * struct ubifs_cnode - LEB Properties Tree common node.
509 * @parent: parent nnode
510 * @cnext: next cnode to commit
511 * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
512 * @iip: index in parent
513 * @level: level in the tree (zero for pnodes, greater than zero for nnodes)
514 * @num: node number
515 */
516struct ubifs_cnode {
517 struct ubifs_nnode *parent;
518 struct ubifs_cnode *cnext;
519 unsigned long flags;
520 int iip;
521 int level;
522 int num;
523};
524
525/**
526 * struct ubifs_pnode - LEB Properties Tree leaf node.
527 * @parent: parent nnode
528 * @cnext: next cnode to commit
529 * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
530 * @iip: index in parent
531 * @level: level in the tree (always zero for pnodes)
532 * @num: node number
533 * @lprops: LEB properties array
534 */
535struct ubifs_pnode {
536 struct ubifs_nnode *parent;
537 struct ubifs_cnode *cnext;
538 unsigned long flags;
539 int iip;
540 int level;
541 int num;
542 struct ubifs_lprops lprops[UBIFS_LPT_FANOUT];
543};
544
545/**
546 * struct ubifs_nbranch - LEB Properties Tree internal node branch.
547 * @lnum: LEB number of child
548 * @offs: offset of child
549 * @nnode: nnode child
550 * @pnode: pnode child
551 * @cnode: cnode child
552 */
553struct ubifs_nbranch {
554 int lnum;
555 int offs;
556 union {
557 struct ubifs_nnode *nnode;
558 struct ubifs_pnode *pnode;
559 struct ubifs_cnode *cnode;
560 };
561};
562
563/**
564 * struct ubifs_nnode - LEB Properties Tree internal node.
565 * @parent: parent nnode
566 * @cnext: next cnode to commit
567 * @flags: flags (%DIRTY_LPT_NODE or %OBSOLETE_LPT_NODE)
568 * @iip: index in parent
569 * @level: level in the tree (always greater than zero for nnodes)
570 * @num: node number
571 * @nbranch: branches to child nodes
572 */
573struct ubifs_nnode {
574 struct ubifs_nnode *parent;
575 struct ubifs_cnode *cnext;
576 unsigned long flags;
577 int iip;
578 int level;
579 int num;
580 struct ubifs_nbranch nbranch[UBIFS_LPT_FANOUT];
581};
582
583/**
584 * struct ubifs_lpt_heap - heap of categorized lprops.
585 * @arr: heap array
586 * @cnt: number in heap
587 * @max_cnt: maximum number allowed in heap
588 *
589 * There are %LPROPS_HEAP_CNT heaps.
590 */
591struct ubifs_lpt_heap {
592 struct ubifs_lprops **arr;
593 int cnt;
594 int max_cnt;
595};
596
597/*
598 * Return codes for LPT scan callback function.
599 *
600 * LPT_SCAN_CONTINUE: continue scanning
601 * LPT_SCAN_ADD: add the LEB properties scanned to the tree in memory
602 * LPT_SCAN_STOP: stop scanning
603 */
604enum {
605 LPT_SCAN_CONTINUE = 0,
606 LPT_SCAN_ADD = 1,
607 LPT_SCAN_STOP = 2,
608};
609
610struct ubifs_info;
611
612/* Callback used by the 'ubifs_lpt_scan_nolock()' function */
613typedef int (*ubifs_lpt_scan_callback)(struct ubifs_info *c,
614 const struct ubifs_lprops *lprops,
615 int in_tree, void *data);
616
617/**
618 * struct ubifs_wbuf - UBIFS write-buffer.
619 * @c: UBIFS file-system description object
620 * @buf: write-buffer (of min. flash I/O unit size)
621 * @lnum: logical eraseblock number the write-buffer points to
622 * @offs: write-buffer offset in this logical eraseblock
623 * @avail: number of bytes available in the write-buffer
624 * @used: number of used bytes in the write-buffer
625 * @dtype: type of data stored in this LEB (%UBI_LONGTERM, %UBI_SHORTTERM,
626 * %UBI_UNKNOWN)
627 * @jhead: journal head the mutex belongs to (note, needed only to shut lockdep
628 * up by 'mutex_lock_nested()).
629 * @sync_callback: write-buffer synchronization callback
630 * @io_mutex: serializes write-buffer I/O
631 * @lock: serializes @buf, @lnum, @offs, @avail, @used, @next_ino and @inodes
632 * fields
633 * @timer: write-buffer timer
634 * @timeout: timer expire interval in jiffies
635 * @need_sync: it is set if its timer expired and needs sync
636 * @next_ino: points to the next position of the following inode number
637 * @inodes: stores the inode numbers of the nodes which are in wbuf
638 *
639 * The write-buffer synchronization callback is called when the write-buffer is
640 * synchronized in order to notify how much space was wasted due to
641 * write-buffer padding and how much free space is left in the LEB.
642 *
643 * Note: the fields @buf, @lnum, @offs, @avail and @used can be read under
644 * spin-lock or mutex because they are written under both mutex and spin-lock.
645 * @buf is appended to under mutex but overwritten under both mutex and
646 * spin-lock. Thus the data between @buf and @buf + @used can be read under
647 * spinlock.
648 */
649struct ubifs_wbuf {
650 struct ubifs_info *c;
651 void *buf;
652 int lnum;
653 int offs;
654 int avail;
655 int used;
656 int dtype;
657 int jhead;
658 int (*sync_callback)(struct ubifs_info *c, int lnum, int free, int pad);
659 struct mutex io_mutex;
660 spinlock_t lock;
661 struct timer_list timer;
662 int timeout;
663 int need_sync;
664 int next_ino;
665 ino_t *inodes;
666};
667
668/**
669 * struct ubifs_bud - bud logical eraseblock.
670 * @lnum: logical eraseblock number
671 * @start: where the (uncommitted) bud data starts
672 * @jhead: journal head number this bud belongs to
673 * @list: link in the list buds belonging to the same journal head
674 * @rb: link in the tree of all buds
675 */
676struct ubifs_bud {
677 int lnum;
678 int start;
679 int jhead;
680 struct list_head list;
681 struct rb_node rb;
682};
683
684/**
685 * struct ubifs_jhead - journal head.
686 * @wbuf: head's write-buffer
687 * @buds_list: list of bud LEBs belonging to this journal head
688 *
689 * Note, the @buds list is protected by the @c->buds_lock.
690 */
691struct ubifs_jhead {
692 struct ubifs_wbuf wbuf;
693 struct list_head buds_list;
694};
695
696/**
697 * struct ubifs_zbranch - key/coordinate/length branch stored in znodes.
698 * @key: key
699 * @znode: znode address in memory
700 * @lnum: LEB number of the indexing node
701 * @offs: offset of the indexing node within @lnum
702 * @len: target node length
703 */
704struct ubifs_zbranch {
705 union ubifs_key key;
706 union {
707 struct ubifs_znode *znode;
708 void *leaf;
709 };
710 int lnum;
711 int offs;
712 int len;
713};
714
715/**
716 * struct ubifs_znode - in-memory representation of an indexing node.
717 * @parent: parent znode or NULL if it is the root
718 * @cnext: next znode to commit
719 * @flags: znode flags (%DIRTY_ZNODE, %COW_ZNODE or %OBSOLETE_ZNODE)
720 * @time: last access time (seconds)
721 * @level: level of the entry in the TNC tree
722 * @child_cnt: count of child znodes
723 * @iip: index in parent's zbranch array
724 * @alt: lower bound of key range has altered i.e. child inserted at slot 0
725 * @lnum: LEB number of the corresponding indexing node
726 * @offs: offset of the corresponding indexing node
727 * @len: length of the corresponding indexing node
728 * @zbranch: array of znode branches (@c->fanout elements)
729 */
730struct ubifs_znode {
731 struct ubifs_znode *parent;
732 struct ubifs_znode *cnext;
733 unsigned long flags;
734 unsigned long time;
735 int level;
736 int child_cnt;
737 int iip;
738 int alt;
739#ifdef CONFIG_UBIFS_FS_DEBUG
740 int lnum, offs, len;
741#endif
742 struct ubifs_zbranch zbranch[];
743};
744
745/**
746 * struct ubifs_node_range - node length range description data structure.
747 * @len: fixed node length
748 * @min_len: minimum possible node length
749 * @max_len: maximum possible node length
750 *
751 * If @max_len is %0, the node has fixed length @len.
752 */
753struct ubifs_node_range {
754 union {
755 int len;
756 int min_len;
757 };
758 int max_len;
759};
760
761/**
762 * struct ubifs_compressor - UBIFS compressor description structure.
763 * @compr_type: compressor type (%UBIFS_COMPR_LZO, etc)
764 * @cc: cryptoapi compressor handle
765 * @comp_mutex: mutex used during compression
766 * @decomp_mutex: mutex used during decompression
767 * @name: compressor name
768 * @capi_name: cryptoapi compressor name
769 */
770struct ubifs_compressor {
771 int compr_type;
772 struct crypto_comp *cc;
773 struct mutex *comp_mutex;
774 struct mutex *decomp_mutex;
775 const char *name;
776 const char *capi_name;
777};
778
779/**
780 * struct ubifs_budget_req - budget requirements of an operation.
781 *
782 * @fast: non-zero if the budgeting should try to aquire budget quickly and
783 * should not try to call write-back
784 * @recalculate: non-zero if @idx_growth, @data_growth, and @dd_growth fields
785 * have to be re-calculated
786 * @new_page: non-zero if the operation adds a new page
787 * @dirtied_page: non-zero if the operation makes a page dirty
788 * @new_dent: non-zero if the operation adds a new directory entry
789 * @mod_dent: non-zero if the operation removes or modifies an existing
790 * directory entry
791 * @new_ino: non-zero if the operation adds a new inode
792 * @new_ino_d: now much data newly created inode contains
793 * @dirtied_ino: how many inodes the operation makes dirty
794 * @dirtied_ino_d: now much data dirtied inode contains
795 * @idx_growth: how much the index will supposedly grow
796 * @data_growth: how much new data the operation will supposedly add
797 * @dd_growth: how much data that makes other data dirty the operation will
798 * supposedly add
799 *
800 * @idx_growth, @data_growth and @dd_growth are not used in budget request. The
801 * budgeting subsystem caches index and data growth values there to avoid
802 * re-calculating them when the budget is released. However, if @idx_growth is
803 * %-1, it is calculated by the release function using other fields.
804 *
805 * An inode may contain 4KiB of data at max., thus the widths of @new_ino_d
806 * is 13 bits, and @dirtied_ino_d - 15, because up to 4 inodes may be made
807 * dirty by the re-name operation.
808 */
809struct ubifs_budget_req {
810 unsigned int fast:1;
811 unsigned int recalculate:1;
812 unsigned int new_page:1;
813 unsigned int dirtied_page:1;
814 unsigned int new_dent:1;
815 unsigned int mod_dent:1;
816 unsigned int new_ino:1;
817 unsigned int new_ino_d:13;
818#ifndef UBIFS_DEBUG
819 unsigned int dirtied_ino:4;
820 unsigned int dirtied_ino_d:15;
821#else
822 /* Not bit-fields to check for overflows */
823 unsigned int dirtied_ino;
824 unsigned int dirtied_ino_d;
825#endif
826 int idx_growth;
827 int data_growth;
828 int dd_growth;
829};
830
831/**
832 * struct ubifs_orphan - stores the inode number of an orphan.
833 * @rb: rb-tree node of rb-tree of orphans sorted by inode number
834 * @list: list head of list of orphans in order added
835 * @new_list: list head of list of orphans added since the last commit
836 * @cnext: next orphan to commit
837 * @dnext: next orphan to delete
838 * @inum: inode number
839 * @new: %1 => added since the last commit, otherwise %0
840 */
841struct ubifs_orphan {
842 struct rb_node rb;
843 struct list_head list;
844 struct list_head new_list;
845 struct ubifs_orphan *cnext;
846 struct ubifs_orphan *dnext;
847 ino_t inum;
848 int new;
849};
850
851/**
852 * struct ubifs_mount_opts - UBIFS-specific mount options information.
853 * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast)
854 */
855struct ubifs_mount_opts {
856 unsigned int unmount_mode:2;
857};
858
859/**
860 * struct ubifs_info - UBIFS file-system description data structure
861 * (per-superblock).
862 * @vfs_sb: VFS @struct super_block object
863 * @bdi: backing device info object to make VFS happy and disable readahead
864 *
865 * @highest_inum: highest used inode number
866 * @vfs_gen: VFS inode generation counter
867 * @max_sqnum: current global sequence number
868 * @cmt_no: commit number (last successfully completed commit)
869 * @cnt_lock: protects @highest_inum, @vfs_gen, and @max_sqnum counters
870 * @fmt_version: UBIFS on-flash format version
871 * @uuid: UUID from super block
872 *
873 * @lhead_lnum: log head logical eraseblock number
874 * @lhead_offs: log head offset
875 * @ltail_lnum: log tail logical eraseblock number (offset is always 0)
876 * @log_mutex: protects the log, @lhead_lnum, @lhead_offs, @ltail_lnum, and
877 * @bud_bytes
878 * @min_log_bytes: minimum required number of bytes in the log
879 * @cmt_bud_bytes: used during commit to temporarily amount of bytes in
880 * committed buds
881 *
882 * @buds: tree of all buds indexed by bud LEB number
883 * @bud_bytes: how many bytes of flash is used by buds
884 * @buds_lock: protects the @buds tree, @bud_bytes, and per-journal head bud
885 * lists
886 * @jhead_cnt: count of journal heads
887 * @jheads: journal heads (head zero is base head)
888 * @max_bud_bytes: maximum number of bytes allowed in buds
889 * @bg_bud_bytes: number of bud bytes when background commit is initiated
890 * @old_buds: buds to be released after commit ends
891 * @max_bud_cnt: maximum number of buds
892 *
893 * @commit_sem: synchronizes committer with other processes
894 * @cmt_state: commit state
895 * @cs_lock: commit state lock
896 * @cmt_wq: wait queue to sleep on if the log is full and a commit is running
897 * @fast_unmount: do not run journal commit before un-mounting
898 * @big_lpt: flag that LPT is too big to write whole during commit
899 * @check_lpt_free: flag that indicates LPT GC may be needed
900 * @nospace: non-zero if the file-system does not have flash space (used as
901 * optimization)
902 * @nospace_rp: the same as @nospace, but additionally means that even reserved
903 * pool is full
904 *
905 * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and
906 * @calc_idx_sz
907 * @zroot: zbranch which points to the root index node and znode
908 * @cnext: next znode to commit
909 * @enext: next znode to commit to empty space
910 * @gap_lebs: array of LEBs used by the in-gaps commit method
911 * @cbuf: commit buffer
912 * @ileb_buf: buffer for commit in-the-gaps method
913 * @ileb_len: length of data in ileb_buf
914 * @ihead_lnum: LEB number of index head
915 * @ihead_offs: offset of index head
916 * @ilebs: pre-allocated index LEBs
917 * @ileb_cnt: number of pre-allocated index LEBs
918 * @ileb_nxt: next pre-allocated index LEBs
919 * @old_idx: tree of index nodes obsoleted since the last commit start
920 * @bottom_up_buf: a buffer which is used by 'dirty_cow_bottom_up()' in tnc.c
921 * @new_ihead_lnum: used by debugging to check ihead_lnum
922 * @new_ihead_offs: used by debugging to check ihead_offs
923 *
924 * @mst_node: master node
925 * @mst_offs: offset of valid master node
926 * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
927 *
928 * @log_lebs: number of logical eraseblocks in the log
929 * @log_bytes: log size in bytes
930 * @log_last: last LEB of the log
931 * @lpt_lebs: number of LEBs used for lprops table
932 * @lpt_first: first LEB of the lprops table area
933 * @lpt_last: last LEB of the lprops table area
934 * @orph_lebs: number of LEBs used for the orphan area
935 * @orph_first: first LEB of the orphan area
936 * @orph_last: last LEB of the orphan area
937 * @main_lebs: count of LEBs in the main area
938 * @main_first: first LEB of the main area
939 * @main_bytes: main area size in bytes
940 * @default_compr: default compression algorithm (%UBIFS_COMPR_LZO, etc)
941 *
942 * @key_hash_type: type of the key hash
943 * @key_hash: direntry key hash function
944 * @key_fmt: key format
945 * @key_len: key length
946 * @fanout: fanout of the index tree (number of links per indexing node)
947 *
948 * @min_io_size: minimal input/output unit size
949 * @min_io_shift: number of bits in @min_io_size minus one
950 * @leb_size: logical eraseblock size in bytes
951 * @half_leb_size: half LEB size
952 * @leb_cnt: count of logical eraseblocks
953 * @max_leb_cnt: maximum count of logical eraseblocks
954 * @old_leb_cnt: count of logical eraseblocks before re-size
955 * @ro_media: the underlying UBI volume is read-only
956 *
957 * @dirty_pg_cnt: number of dirty pages (not used)
958 * @dirty_zn_cnt: number of dirty znodes
959 * @clean_zn_cnt: number of clean znodes
960 *
961 * @budg_idx_growth: amount of bytes budgeted for index growth
962 * @budg_data_growth: amount of bytes budgeted for cached data
963 * @budg_dd_growth: amount of bytes budgeted for cached data that will make
964 * other data dirty
965 * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index,
966 * but which still have to be taken into account because
967 * the index has not been committed so far
968 * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth,
969 * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst;
970 * @min_idx_lebs: minimum number of LEBs required for the index
971 * @old_idx_sz: size of index on flash
972 * @calc_idx_sz: temporary variable which is used to calculate new index size
973 * (contains accurate new index size at end of TNC commit start)
974 * @lst: lprops statistics
975 *
976 * @page_budget: budget for a page
977 * @inode_budget: budget for an inode
978 * @dent_budget: budget for a directory entry
979 *
980 * @ref_node_alsz: size of the LEB reference node aligned to the min. flash
981 * I/O unit
982 * @mst_node_alsz: master node aligned size
983 * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary
984 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
985 * @max_inode_sz: maximum possible inode size in bytes
986 * @max_znode_sz: size of znode in bytes
987 * @dead_wm: LEB dead space watermark
988 * @dark_wm: LEB dark space watermark
989 * @block_cnt: count of 4KiB blocks on the FS
990 *
991 * @ranges: UBIFS node length ranges
992 * @ubi: UBI volume descriptor
993 * @di: UBI device information
994 * @vi: UBI volume information
995 *
996 * @orph_tree: rb-tree of orphan inode numbers
997 * @orph_list: list of orphan inode numbers in order added
998 * @orph_new: list of orphan inode numbers added since last commit
999 * @orph_cnext: next orphan to commit
1000 * @orph_dnext: next orphan to delete
1001 * @orphan_lock: lock for orph_tree and orph_new
1002 * @orph_buf: buffer for orphan nodes
1003 * @new_orphans: number of orphans since last commit
1004 * @cmt_orphans: number of orphans being committed
1005 * @tot_orphans: number of orphans in the rb_tree
1006 * @max_orphans: maximum number of orphans allowed
1007 * @ohead_lnum: orphan head LEB number
1008 * @ohead_offs: orphan head offset
1009 * @no_orphs: non-zero if there are no orphans
1010 *
1011 * @bgt: UBIFS background thread
1012 * @bgt_name: background thread name
1013 * @need_bgt: if background thread should run
1014 * @need_wbuf_sync: if write-buffers have to be synchronized
1015 *
1016 * @gc_lnum: LEB number used for garbage collection
1017 * @sbuf: a buffer of LEB size used by GC and replay for scanning
1018 * @idx_gc: list of index LEBs that have been garbage collected
1019 * @idx_gc_cnt: number of elements on the idx_gc list
1020 *
1021 * @infos_list: links all 'ubifs_info' objects
1022 * @umount_mutex: serializes shrinker and un-mount
1023 * @shrinker_run_no: shrinker run number
1024 *
1025 * @space_bits: number of bits needed to record free or dirty space
1026 * @lpt_lnum_bits: number of bits needed to record a LEB number in the LPT
1027 * @lpt_offs_bits: number of bits needed to record an offset in the LPT
1028 * @lpt_spc_bits: number of bits needed to space in the LPT
1029 * @pcnt_bits: number of bits needed to record pnode or nnode number
1030 * @lnum_bits: number of bits needed to record LEB number
1031 * @nnode_sz: size of on-flash nnode
1032 * @pnode_sz: size of on-flash pnode
1033 * @ltab_sz: size of on-flash LPT lprops table
1034 * @lsave_sz: size of on-flash LPT save table
1035 * @pnode_cnt: number of pnodes
1036 * @nnode_cnt: number of nnodes
1037 * @lpt_hght: height of the LPT
1038 * @pnodes_have: number of pnodes in memory
1039 *
1040 * @lp_mutex: protects lprops table and all the other lprops-related fields
1041 * @lpt_lnum: LEB number of the root nnode of the LPT
1042 * @lpt_offs: offset of the root nnode of the LPT
1043 * @nhead_lnum: LEB number of LPT head
1044 * @nhead_offs: offset of LPT head
1045 * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab
1046 * @dirty_nn_cnt: number of dirty nnodes
1047 * @dirty_pn_cnt: number of dirty pnodes
1048 * @lpt_sz: LPT size
1049 * @lpt_nod_buf: buffer for an on-flash nnode or pnode
1050 * @lpt_buf: buffer of LEB size used by LPT
1051 * @nroot: address in memory of the root nnode of the LPT
1052 * @lpt_cnext: next LPT node to commit
1053 * @lpt_heap: array of heaps of categorized lprops
1054 * @dirty_idx: a (reverse sorted) copy of the LPROPS_DIRTY_IDX heap as at
1055 * previous commit start
1056 * @uncat_list: list of un-categorized LEBs
1057 * @empty_list: list of empty LEBs
1058 * @freeable_list: list of freeable non-index LEBs (free + dirty == leb_size)
1059 * @frdi_idx_list: list of freeable index LEBs (free + dirty == leb_size)
1060 * @freeable_cnt: number of freeable LEBs in @freeable_list
1061 *
1062 * @ltab_lnum: LEB number of LPT's own lprops table
1063 * @ltab_offs: offset of LPT's own lprops table
1064 * @ltab: LPT's own lprops table
1065 * @ltab_cmt: LPT's own lprops table (commit copy)
1066 * @lsave_cnt: number of LEB numbers in LPT's save table
1067 * @lsave_lnum: LEB number of LPT's save table
1068 * @lsave_offs: offset of LPT's save table
1069 * @lsave: LPT's save table
1070 * @lscan_lnum: LEB number of last LPT scan
1071 *
1072 * @rp_size: size of the reserved pool in bytes
1073 * @report_rp_size: size of the reserved pool reported to user-space
1074 * @rp_uid: reserved pool user ID
1075 * @rp_gid: reserved pool group ID
1076 *
1077 * @empty: if the UBI device is empty
1078 * @replay_tree: temporary tree used during journal replay
1079 * @replay_list: temporary list used during journal replay
1080 * @replay_buds: list of buds to replay
1081 * @cs_sqnum: sequence number of first node in the log (commit start node)
1082 * @replay_sqnum: sequence number of node currently being replayed
1083 * @need_recovery: file-system needs recovery
1084 * @replaying: set to %1 during journal replay
1085 * @unclean_leb_list: LEBs to recover when mounting ro to rw
1086 * @rcvrd_mst_node: recovered master node to write when mounting ro to rw
1087 * @size_tree: inode size information for recovery
1088 * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY)
1089 * @mount_opts: UBIFS-specific mount options
1090 *
1091 * @dbg_buf: a buffer of LEB size used for debugging purposes
1092 * @old_zroot: old index root - used by 'dbg_check_old_index()'
1093 * @old_zroot_level: old index root level - used by 'dbg_check_old_index()'
1094 * @old_zroot_sqnum: old index root sqnum - used by 'dbg_check_old_index()'
1095 * @failure_mode: failure mode for recovery testing
1096 * @fail_delay: 0=>don't delay, 1=>delay a time, 2=>delay a number of calls
1097 * @fail_timeout: time in jiffies when delay of failure mode expires
1098 * @fail_cnt: current number of calls to failure mode I/O functions
1099 * @fail_cnt_max: number of calls by which to delay failure mode
1100 */
1101struct ubifs_info {
1102 struct super_block *vfs_sb;
1103 struct backing_dev_info bdi;
1104
1105 ino_t highest_inum;
1106 unsigned int vfs_gen;
1107 unsigned long long max_sqnum;
1108 unsigned long long cmt_no;
1109 spinlock_t cnt_lock;
1110 int fmt_version;
1111 unsigned char uuid[16];
1112
1113 int lhead_lnum;
1114 int lhead_offs;
1115 int ltail_lnum;
1116 struct mutex log_mutex;
1117 int min_log_bytes;
1118 long long cmt_bud_bytes;
1119
1120 struct rb_root buds;
1121 long long bud_bytes;
1122 spinlock_t buds_lock;
1123 int jhead_cnt;
1124 struct ubifs_jhead *jheads;
1125 long long max_bud_bytes;
1126 long long bg_bud_bytes;
1127 struct list_head old_buds;
1128 int max_bud_cnt;
1129
1130 struct rw_semaphore commit_sem;
1131 int cmt_state;
1132 spinlock_t cs_lock;
1133 wait_queue_head_t cmt_wq;
1134 unsigned int fast_unmount:1;
1135 unsigned int big_lpt:1;
1136 unsigned int check_lpt_free:1;
1137 unsigned int nospace:1;
1138 unsigned int nospace_rp:1;
1139
1140 struct mutex tnc_mutex;
1141 struct ubifs_zbranch zroot;
1142 struct ubifs_znode *cnext;
1143 struct ubifs_znode *enext;
1144 int *gap_lebs;
1145 void *cbuf;
1146 void *ileb_buf;
1147 int ileb_len;
1148 int ihead_lnum;
1149 int ihead_offs;
1150 int *ilebs;
1151 int ileb_cnt;
1152 int ileb_nxt;
1153 struct rb_root old_idx;
1154 int *bottom_up_buf;
1155#ifdef CONFIG_UBIFS_FS_DEBUG
1156 int new_ihead_lnum;
1157 int new_ihead_offs;
1158#endif
1159
1160 struct ubifs_mst_node *mst_node;
1161 int mst_offs;
1162 struct mutex mst_mutex;
1163
1164 int log_lebs;
1165 long long log_bytes;
1166 int log_last;
1167 int lpt_lebs;
1168 int lpt_first;
1169 int lpt_last;
1170 int orph_lebs;
1171 int orph_first;
1172 int orph_last;
1173 int main_lebs;
1174 int main_first;
1175 long long main_bytes;
1176 int default_compr;
1177
1178 uint8_t key_hash_type;
1179 uint32_t (*key_hash)(const char *str, int len);
1180 int key_fmt;
1181 int key_len;
1182 int fanout;
1183
1184 int min_io_size;
1185 int min_io_shift;
1186 int leb_size;
1187 int half_leb_size;
1188 int leb_cnt;
1189 int max_leb_cnt;
1190 int old_leb_cnt;
1191 int ro_media;
1192
1193 atomic_long_t dirty_pg_cnt;
1194 atomic_long_t dirty_zn_cnt;
1195 atomic_long_t clean_zn_cnt;
1196
1197 long long budg_idx_growth;
1198 long long budg_data_growth;
1199 long long budg_dd_growth;
1200 long long budg_uncommitted_idx;
1201 spinlock_t space_lock;
1202 int min_idx_lebs;
1203 unsigned long long old_idx_sz;
1204 unsigned long long calc_idx_sz;
1205 struct ubifs_lp_stats lst;
1206
1207 int page_budget;
1208 int inode_budget;
1209 int dent_budget;
1210
1211 int ref_node_alsz;
1212 int mst_node_alsz;
1213 int min_idx_node_sz;
1214 int max_idx_node_sz;
1215 long long max_inode_sz;
1216 int max_znode_sz;
1217 int dead_wm;
1218 int dark_wm;
1219 int block_cnt;
1220
1221 struct ubifs_node_range ranges[UBIFS_NODE_TYPES_CNT];
1222 struct ubi_volume_desc *ubi;
1223 struct ubi_device_info di;
1224 struct ubi_volume_info vi;
1225
1226 struct rb_root orph_tree;
1227 struct list_head orph_list;
1228 struct list_head orph_new;
1229 struct ubifs_orphan *orph_cnext;
1230 struct ubifs_orphan *orph_dnext;
1231 spinlock_t orphan_lock;
1232 void *orph_buf;
1233 int new_orphans;
1234 int cmt_orphans;
1235 int tot_orphans;
1236 int max_orphans;
1237 int ohead_lnum;
1238 int ohead_offs;
1239 int no_orphs;
1240
1241 struct task_struct *bgt;
1242 char bgt_name[sizeof(BGT_NAME_PATTERN) + 9];
1243 int need_bgt;
1244 int need_wbuf_sync;
1245
1246 int gc_lnum;
1247 void *sbuf;
1248 struct list_head idx_gc;
1249 int idx_gc_cnt;
1250
1251 struct list_head infos_list;
1252 struct mutex umount_mutex;
1253 unsigned int shrinker_run_no;
1254
1255 int space_bits;
1256 int lpt_lnum_bits;
1257 int lpt_offs_bits;
1258 int lpt_spc_bits;
1259 int pcnt_bits;
1260 int lnum_bits;
1261 int nnode_sz;
1262 int pnode_sz;
1263 int ltab_sz;
1264 int lsave_sz;
1265 int pnode_cnt;
1266 int nnode_cnt;
1267 int lpt_hght;
1268 int pnodes_have;
1269
1270 struct mutex lp_mutex;
1271 int lpt_lnum;
1272 int lpt_offs;
1273 int nhead_lnum;
1274 int nhead_offs;
1275 int lpt_drty_flgs;
1276 int dirty_nn_cnt;
1277 int dirty_pn_cnt;
1278 long long lpt_sz;
1279 void *lpt_nod_buf;
1280 void *lpt_buf;
1281 struct ubifs_nnode *nroot;
1282 struct ubifs_cnode *lpt_cnext;
1283 struct ubifs_lpt_heap lpt_heap[LPROPS_HEAP_CNT];
1284 struct ubifs_lpt_heap dirty_idx;
1285 struct list_head uncat_list;
1286 struct list_head empty_list;
1287 struct list_head freeable_list;
1288 struct list_head frdi_idx_list;
1289 int freeable_cnt;
1290
1291 int ltab_lnum;
1292 int ltab_offs;
1293 struct ubifs_lpt_lprops *ltab;
1294 struct ubifs_lpt_lprops *ltab_cmt;
1295 int lsave_cnt;
1296 int lsave_lnum;
1297 int lsave_offs;
1298 int *lsave;
1299 int lscan_lnum;
1300
1301 long long rp_size;
1302 long long report_rp_size;
1303 uid_t rp_uid;
1304 gid_t rp_gid;
1305
1306 /* The below fields are used only during mounting and re-mounting */
1307 int empty;
1308 struct rb_root replay_tree;
1309 struct list_head replay_list;
1310 struct list_head replay_buds;
1311 unsigned long long cs_sqnum;
1312 unsigned long long replay_sqnum;
1313 int need_recovery;
1314 int replaying;
1315 struct list_head unclean_leb_list;
1316 struct ubifs_mst_node *rcvrd_mst_node;
1317 struct rb_root size_tree;
1318 int remounting_rw;
1319 struct ubifs_mount_opts mount_opts;
1320
1321#ifdef CONFIG_UBIFS_FS_DEBUG
1322 void *dbg_buf;
1323 struct ubifs_zbranch old_zroot;
1324 int old_zroot_level;
1325 unsigned long long old_zroot_sqnum;
1326 int failure_mode;
1327 int fail_delay;
1328 unsigned long fail_timeout;
1329 unsigned int fail_cnt;
1330 unsigned int fail_cnt_max;
1331#endif
1332};
1333
1334extern struct list_head ubifs_infos;
1335extern spinlock_t ubifs_infos_lock;
1336extern atomic_long_t ubifs_clean_zn_cnt;
1337extern struct kmem_cache *ubifs_inode_slab;
1338extern struct super_operations ubifs_super_operations;
1339extern struct address_space_operations ubifs_file_address_operations;
1340extern struct file_operations ubifs_file_operations;
1341extern struct inode_operations ubifs_file_inode_operations;
1342extern struct file_operations ubifs_dir_operations;
1343extern struct inode_operations ubifs_dir_inode_operations;
1344extern struct inode_operations ubifs_symlink_inode_operations;
1345extern struct backing_dev_info ubifs_backing_dev_info;
1346extern struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT];
1347
1348/* io.c */
1349int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len);
1350int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs,
1351 int dtype);
1352int ubifs_wbuf_init(struct ubifs_info *c, struct ubifs_wbuf *wbuf);
1353int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
1354 int lnum, int offs);
1355int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
1356 int lnum, int offs);
1357int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
1358 int offs, int dtype);
1359int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
1360 int offs, int quiet);
1361void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
1362void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last);
1363int ubifs_io_init(struct ubifs_info *c);
1364void ubifs_pad(const struct ubifs_info *c, void *buf, int pad);
1365int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf);
1366int ubifs_bg_wbufs_sync(struct ubifs_info *c);
1367void ubifs_wbuf_add_ino_nolock(struct ubifs_wbuf *wbuf, ino_t inum);
1368int ubifs_sync_wbufs_by_inode(struct ubifs_info *c, struct inode *inode);
1369
1370/* scan.c */
1371struct ubifs_scan_leb *ubifs_scan(const struct ubifs_info *c, int lnum,
1372 int offs, void *sbuf);
1373void ubifs_scan_destroy(struct ubifs_scan_leb *sleb);
1374int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
1375 int offs, int quiet);
1376struct ubifs_scan_leb *ubifs_start_scan(const struct ubifs_info *c, int lnum,
1377 int offs, void *sbuf);
1378void ubifs_end_scan(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
1379 int lnum, int offs);
1380int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb,
1381 void *buf, int offs);
1382void ubifs_scanned_corruption(const struct ubifs_info *c, int lnum, int offs,
1383 void *buf);
1384
1385/* log.c */
1386void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud);
1387void ubifs_create_buds_lists(struct ubifs_info *c);
1388int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs);
1389struct ubifs_bud *ubifs_search_bud(struct ubifs_info *c, int lnum);
1390struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum);
1391int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum);
1392int ubifs_log_end_commit(struct ubifs_info *c, int new_ltail_lnum);
1393int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum);
1394int ubifs_consolidate_log(struct ubifs_info *c);
1395
1396/* journal.c */
1397int ubifs_jnl_update(struct ubifs_info *c, const struct inode *dir,
1398 const struct qstr *nm, const struct inode *inode,
1399 int deletion, int xent);
1400int ubifs_jnl_write_data(struct ubifs_info *c, const struct inode *inode,
1401 const union ubifs_key *key, const void *buf, int len);
1402int ubifs_jnl_write_inode(struct ubifs_info *c, const struct inode *inode,
1403 int last_reference);
1404int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
1405 const struct dentry *old_dentry,
1406 const struct inode *new_dir,
1407 const struct dentry *new_dentry, int sync);
1408int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
1409 loff_t old_size, loff_t new_size);
1410int ubifs_jnl_delete_xattr(struct ubifs_info *c, const struct inode *host,
1411 const struct inode *inode, const struct qstr *nm);
1412int ubifs_jnl_change_xattr(struct ubifs_info *c, const struct inode *inode1,
1413 const struct inode *inode2);
1414
1415/* budget.c */
1416int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req);
1417void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req);
1418void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
1419 struct ubifs_inode *ui);
1420int ubifs_budget_inode_op(struct ubifs_info *c, struct inode *inode,
1421 struct ubifs_budget_req *req);
1422void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
1423 struct ubifs_budget_req *req);
1424void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1425 struct ubifs_budget_req *req);
1426long long ubifs_budg_get_free_space(struct ubifs_info *c);
1427int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1428void ubifs_convert_page_budget(struct ubifs_info *c);
1429long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
1430
1431/* find.c */
1432int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free,
1433 int squeeze);
1434int ubifs_find_free_leb_for_idx(struct ubifs_info *c);
1435int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
1436 int min_space, int pick_free);
1437int ubifs_find_dirty_idx_leb(struct ubifs_info *c);
1438int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
1439
1440/* tnc.c */
1441int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1442 struct ubifs_znode **zn, int *n);
1443int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1444 void *node);
1445int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1446 void *node, const struct qstr *nm);
1447int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1448 void *node, int *lnum, int *offs);
1449int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
1450 int offs, int len);
1451int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
1452 int old_lnum, int old_offs, int lnum, int offs, int len);
1453int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
1454 int lnum, int offs, int len, const struct qstr *nm);
1455int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key);
1456int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
1457 const struct qstr *nm);
1458int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
1459 union ubifs_key *to_key);
1460int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum);
1461struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
1462 union ubifs_key *key,
1463 const struct qstr *nm);
1464void ubifs_tnc_close(struct ubifs_info *c);
1465int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level,
1466 int lnum, int offs, int is_idx);
1467int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level,
1468 int lnum, int offs);
1469/* Shared by tnc.c for tnc_commit.c */
1470void destroy_old_idx(struct ubifs_info *c);
1471int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
1472 int lnum, int offs);
1473int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode);
1474
1475/* tnc_misc.c */
1476struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr,
1477 struct ubifs_znode *znode);
1478int ubifs_search_zbranch(const struct ubifs_info *c,
1479 const struct ubifs_znode *znode,
1480 const union ubifs_key *key, int *n);
1481struct ubifs_znode *ubifs_tnc_postorder_first(struct ubifs_znode *znode);
1482struct ubifs_znode *ubifs_tnc_postorder_next(struct ubifs_znode *znode);
1483long ubifs_destroy_tnc_subtree(struct ubifs_znode *zr);
1484struct ubifs_znode *ubifs_load_znode(struct ubifs_info *c,
1485 struct ubifs_zbranch *zbr,
1486 struct ubifs_znode *parent, int iip);
1487int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
1488 void *node);
1489
1490/* tnc_commit.c */
1491int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
1492int ubifs_tnc_end_commit(struct ubifs_info *c);
1493
1494/* shrinker.c */
1495int ubifs_shrinker(int nr_to_scan, gfp_t gfp_mask);
1496
1497/* commit.c */
1498int ubifs_bg_thread(void *info);
1499void ubifs_commit_required(struct ubifs_info *c);
1500void ubifs_request_bg_commit(struct ubifs_info *c);
1501int ubifs_run_commit(struct ubifs_info *c);
1502void ubifs_recovery_commit(struct ubifs_info *c);
1503int ubifs_gc_should_commit(struct ubifs_info *c);
1504void ubifs_wait_for_commit(struct ubifs_info *c);
1505
1506/* master.c */
1507int ubifs_read_master(struct ubifs_info *c);
1508int ubifs_write_master(struct ubifs_info *c);
1509
1510/* sb.c */
1511int ubifs_read_superblock(struct ubifs_info *c);
1512struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c);
1513int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup);
1514
1515/* replay.c */
1516int ubifs_validate_entry(struct ubifs_info *c,
1517 const struct ubifs_dent_node *dent);
1518int ubifs_replay_journal(struct ubifs_info *c);
1519
1520/* gc.c */
1521int ubifs_garbage_collect(struct ubifs_info *c, int anyway);
1522int ubifs_gc_start_commit(struct ubifs_info *c);
1523int ubifs_gc_end_commit(struct ubifs_info *c);
1524void ubifs_destroy_idx_gc(struct ubifs_info *c);
1525int ubifs_get_idx_gc_leb(struct ubifs_info *c);
1526int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp);
1527
1528/* orphan.c */
1529int ubifs_add_orphan(struct ubifs_info *c, ino_t inum);
1530void ubifs_delete_orphan(struct ubifs_info *c, ino_t inum);
1531int ubifs_orphan_start_commit(struct ubifs_info *c);
1532int ubifs_orphan_end_commit(struct ubifs_info *c);
1533int ubifs_mount_orphans(struct ubifs_info *c, int unclean, int read_only);
1534
1535/* lpt.c */
1536int ubifs_calc_lpt_geom(struct ubifs_info *c);
1537int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
1538 int *lpt_lebs, int *big_lpt);
1539int ubifs_lpt_init(struct ubifs_info *c, int rd, int wr);
1540struct ubifs_lprops *ubifs_lpt_lookup(struct ubifs_info *c, int lnum);
1541struct ubifs_lprops *ubifs_lpt_lookup_dirty(struct ubifs_info *c, int lnum);
1542int ubifs_lpt_scan_nolock(struct ubifs_info *c, int start_lnum, int end_lnum,
1543 ubifs_lpt_scan_callback scan_cb, void *data);
1544
1545/* Shared by lpt.c for lpt_commit.c */
1546void ubifs_pack_lsave(struct ubifs_info *c, void *buf, int *lsave);
1547void ubifs_pack_ltab(struct ubifs_info *c, void *buf,
1548 struct ubifs_lpt_lprops *ltab);
1549void ubifs_pack_pnode(struct ubifs_info *c, void *buf,
1550 struct ubifs_pnode *pnode);
1551void ubifs_pack_nnode(struct ubifs_info *c, void *buf,
1552 struct ubifs_nnode *nnode);
1553struct ubifs_pnode *ubifs_get_pnode(struct ubifs_info *c,
1554 struct ubifs_nnode *parent, int iip);
1555struct ubifs_nnode *ubifs_get_nnode(struct ubifs_info *c,
1556 struct ubifs_nnode *parent, int iip);
1557int ubifs_read_nnode(struct ubifs_info *c, struct ubifs_nnode *parent, int iip);
1558void ubifs_add_lpt_dirt(struct ubifs_info *c, int lnum, int dirty);
1559void ubifs_add_nnode_dirt(struct ubifs_info *c, struct ubifs_nnode *nnode);
1560uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits);
1561struct ubifs_nnode *ubifs_first_nnode(struct ubifs_info *c, int *hght);
1562
1563/* lpt_commit.c */
1564int ubifs_lpt_start_commit(struct ubifs_info *c);
1565int ubifs_lpt_end_commit(struct ubifs_info *c);
1566int ubifs_lpt_post_commit(struct ubifs_info *c);
1567void ubifs_lpt_free(struct ubifs_info *c, int wr_only);
1568
1569/* lprops.c */
1570void ubifs_get_lprops(struct ubifs_info *c);
1571const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c,
1572 const struct ubifs_lprops *lp,
1573 int free, int dirty, int flags,
1574 int idx_gc_cnt);
1575void ubifs_release_lprops(struct ubifs_info *c);
1576void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats);
1577void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops,
1578 int cat);
1579void ubifs_replace_cat(struct ubifs_info *c, struct ubifs_lprops *old_lprops,
1580 struct ubifs_lprops *new_lprops);
1581void ubifs_ensure_cat(struct ubifs_info *c, struct ubifs_lprops *lprops);
1582int ubifs_categorize_lprops(const struct ubifs_info *c,
1583 const struct ubifs_lprops *lprops);
1584int ubifs_change_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
1585 int flags_set, int flags_clean, int idx_gc_cnt);
1586int ubifs_update_one_lp(struct ubifs_info *c, int lnum, int free, int dirty,
1587 int flags_set, int flags_clean);
1588int ubifs_read_one_lp(struct ubifs_info *c, int lnum, struct ubifs_lprops *lp);
1589const struct ubifs_lprops *ubifs_fast_find_free(struct ubifs_info *c);
1590const struct ubifs_lprops *ubifs_fast_find_empty(struct ubifs_info *c);
1591const struct ubifs_lprops *ubifs_fast_find_freeable(struct ubifs_info *c);
1592const struct ubifs_lprops *ubifs_fast_find_frdi_idx(struct ubifs_info *c);
1593
1594/* file.c */
1595int ubifs_fsync(struct file *file, struct dentry *dentry, int datasync);
1596int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
1597
1598/* dir.c */
1599struct inode *ubifs_new_inode(struct ubifs_info *c, const struct inode *dir,
1600 int mode);
1601int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
1602 struct kstat *stat);
1603
1604/* xattr.c */
1605int ubifs_setxattr(struct dentry *dentry, const char *name,
1606 const void *value, size_t size, int flags);
1607ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
1608 size_t size);
1609ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
1610int ubifs_removexattr(struct dentry *dentry, const char *name);
1611
1612/* super.c */
1613struct inode *ubifs_iget(struct super_block *sb, unsigned long inum);
1614
1615/* recovery.c */
1616int ubifs_recover_master_node(struct ubifs_info *c);
1617int ubifs_write_rcvrd_mst_node(struct ubifs_info *c);
1618struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum,
1619 int offs, void *sbuf, int grouped);
1620struct ubifs_scan_leb *ubifs_recover_log_leb(struct ubifs_info *c, int lnum,
1621 int offs, void *sbuf);
1622int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf);
1623int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf);
1624int ubifs_rcvry_gc_commit(struct ubifs_info *c);
1625int ubifs_recover_size_accum(struct ubifs_info *c, union ubifs_key *key,
1626 int deletion, loff_t new_size);
1627int ubifs_recover_size(struct ubifs_info *c);
1628void ubifs_destroy_size_tree(struct ubifs_info *c);
1629
1630/* ioctl.c */
1631long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1632void ubifs_set_inode_flags(struct inode *inode);
1633#ifdef CONFIG_COMPAT
1634long ubifs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
1635#endif
1636
1637/* compressor.c */
1638int __init ubifs_compressors_init(void);
1639void __exit ubifs_compressors_exit(void);
1640void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len,
1641 int *compr_type);
1642int ubifs_decompress(const void *buf, int len, void *out, int *out_len,
1643 int compr_type);
1644
1645#include "debug.h"
1646#include "misc.h"
1647#include "key.h"
1648
1649#endif /* !__UBIFS_H__ */
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
new file mode 100644
index 00000000000..1388a078e1a
--- /dev/null
+++ b/fs/ubifs/xattr.c
@@ -0,0 +1,581 @@
1/*
2 * This file is part of UBIFS.
3 *
4 * Copyright (C) 2006-2008 Nokia Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published by
8 * the Free Software Foundation.
9 *
10 * This program is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 51
17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 *
19 * Authors: Artem Bityutskiy (Битюцкий Артём)
20 * Adrian Hunter
21 */
22
23/*
24 * This file implements UBIFS extended attributes support.
25 *
26 * Extended attributes are implemented as regular inodes with attached data,
27 * which limits extended attribute size to UBIFS block size (4KiB). Names of
28 * extended attributes are described by extended attribute entries (xentries),
29 * which are almost identical to directory entries, but have different key type.
30 *
31 * In other words, the situation with extended attributes is very similar to
32 * directories. Indeed, any inode (but of course not xattr inodes) may have a
33 * number of associated xentries, just like directory inodes have associated
34 * directory entries. Extended attribute entries store the name of the extended
35 * attribute, the host inode number, and the extended attribute inode number.
36 * Similarly, direntries store the name, the parent and the target inode
37 * numbers. Thus, most of the common UBIFS mechanisms may be re-used for
38 * extended attributes.
39 *
40 * The number of extended attributes is not limited, but there is Linux
41 * limitation on the maximum possible size of the list of all extended
42 * attributes associated with an inode (%XATTR_LIST_MAX), so UBIFS makes sure
43 * the sum of all extended attribute names of the inode does not exceed that
44 * limit.
45 *
46 * Extended attributes are synchronous, which means they are written to the
47 * flash media synchronously and there is no write-back for extended attribute
48 * inodes. The extended attribute values are not stored in compressed form on
49 * the media.
50 *
51 * Since extended attributes are represented by regular inodes, they are cached
52 * in the VFS inode cache. The xentries are cached in the LNC cache (see
53 * tnc.c).
54 *
55 * ACL support is not implemented.
56 */
57
58#include <linux/xattr.h>
59#include <linux/posix_acl_xattr.h>
60#include "ubifs.h"
61
62/*
63 * Limit the number of extended attributes per inode so that the total size
64 * (xattr_size) is guaranteeded to fit in an 'unsigned int'.
65 */
66#define MAX_XATTRS_PER_INODE 65535
67
68/*
69 * Extended attribute type constants.
70 *
71 * USER_XATTR: user extended attribute ("user.*")
72 * TRUSTED_XATTR: trusted extended attribute ("trusted.*)
73 * SECURITY_XATTR: security extended attribute ("security.*")
74 */
75enum {
76 USER_XATTR,
77 TRUSTED_XATTR,
78 SECURITY_XATTR,
79};
80
81static struct inode_operations none_inode_operations;
82static struct address_space_operations none_address_operations;
83static struct file_operations none_file_operations;
84
85/**
86 * create_xattr - create an extended attribute.
87 * @c: UBIFS file-system description object
88 * @host: host inode
89 * @nm: extended attribute name
90 * @value: extended attribute value
91 * @size: size of extended attribute value
92 *
93 * This is a helper function which creates an extended attribute of name @nm
94 * and value @value for inode @host. The host inode is also updated on flash
95 * because the ctime and extended attribute accounting data changes. This
96 * function returns zero in case of success and a negative error code in case
97 * of failure.
98 */
99static int create_xattr(struct ubifs_info *c, struct inode *host,
100 const struct qstr *nm, const void *value, int size)
101{
102 int err;
103 struct inode *inode;
104 struct ubifs_inode *ui, *host_ui = ubifs_inode(host);
105 struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
106 .new_ino_d = size, .dirtied_ino = 1,
107 .dirtied_ino_d = host_ui->data_len};
108
109 if (host_ui->xattr_cnt >= MAX_XATTRS_PER_INODE)
110 return -ENOSPC;
111 /*
112 * Linux limits the maximum size of the extended attribute names list
113 * to %XATTR_LIST_MAX. This means we should not allow creating more*
114 * extended attributes if the name list becomes larger. This limitation
115 * is artificial for UBIFS, though.
116 */
117 if (host_ui->xattr_names + host_ui->xattr_cnt +
118 nm->len + 1 > XATTR_LIST_MAX)
119 return -ENOSPC;
120
121 err = ubifs_budget_space(c, &req);
122 if (err)
123 return err;
124
125 inode = ubifs_new_inode(c, host, S_IFREG | S_IRWXUGO);
126 if (IS_ERR(inode)) {
127 err = PTR_ERR(inode);
128 goto out_budg;
129 }
130
131 mutex_lock(&host_ui->ui_mutex);
132 /* Re-define all operations to be "nothing" */
133 inode->i_mapping->a_ops = &none_address_operations;
134 inode->i_op = &none_inode_operations;
135 inode->i_fop = &none_file_operations;
136
137 inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA;
138 ui = ubifs_inode(inode);
139 ui->xattr = 1;
140 ui->flags |= UBIFS_XATTR_FL;
141 ui->data = kmalloc(size, GFP_NOFS);
142 if (!ui->data) {
143 err = -ENOMEM;
144 goto out_unlock;
145 }
146
147 memcpy(ui->data, value, size);
148 host->i_ctime = ubifs_current_time(host);
149 host_ui->xattr_cnt += 1;
150 host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
151 host_ui->xattr_size += CALC_XATTR_BYTES(size);
152 host_ui->xattr_names += nm->len;
153
154 /*
155 * We do not use i_size_write() because nobody can race with us as we
156 * are holding host @host->i_mutex - every xattr operation for this
157 * inode is serialized by it.
158 */
159 inode->i_size = ui->ui_size = size;
160 ui->data_len = size;
161 err = ubifs_jnl_update(c, host, nm, inode, 0, 1);
162 if (err)
163 goto out_cancel;
164 mutex_unlock(&host_ui->ui_mutex);
165
166 ubifs_release_budget(c, &req);
167 insert_inode_hash(inode);
168 iput(inode);
169 return 0;
170
171out_cancel:
172 host_ui->xattr_cnt -= 1;
173 host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
174 host_ui->xattr_size -= CALC_XATTR_BYTES(size);
175out_unlock:
176 mutex_unlock(&host_ui->ui_mutex);
177 make_bad_inode(inode);
178 iput(inode);
179out_budg:
180 ubifs_release_budget(c, &req);
181 return err;
182}
183
184/**
185 * change_xattr - change an extended attribute.
186 * @c: UBIFS file-system description object
187 * @host: host inode
188 * @inode: extended attribute inode
189 * @value: extended attribute value
190 * @size: size of extended attribute value
191 *
192 * This helper function changes the value of extended attribute @inode with new
193 * data from @value. Returns zero in case of success and a negative error code
194 * in case of failure.
195 */
196static int change_xattr(struct ubifs_info *c, struct inode *host,
197 struct inode *inode, const void *value, int size)
198{
199 int err;
200 struct ubifs_inode *host_ui = ubifs_inode(host);
201 struct ubifs_inode *ui = ubifs_inode(inode);
202 struct ubifs_budget_req req = { .dirtied_ino = 2,
203 .dirtied_ino_d = size + host_ui->data_len };
204
205 ubifs_assert(ui->data_len == inode->i_size);
206 err = ubifs_budget_space(c, &req);
207 if (err)
208 return err;
209
210 mutex_lock(&host_ui->ui_mutex);
211 host->i_ctime = ubifs_current_time(host);
212 host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
213 host_ui->xattr_size += CALC_XATTR_BYTES(size);
214
215 kfree(ui->data);
216 ui->data = kmalloc(size, GFP_NOFS);
217 if (!ui->data) {
218 err = -ENOMEM;
219 goto out_unlock;
220 }
221
222 memcpy(ui->data, value, size);
223 inode->i_size = ui->ui_size = size;
224 ui->data_len = size;
225
226 /*
227 * It is important to write the host inode after the xattr inode
228 * because if the host inode gets synchronized (via 'fsync()'), then
229 * the extended attribute inode gets synchronized, because it goes
230 * before the host inode in the write-buffer.
231 */
232 err = ubifs_jnl_change_xattr(c, inode, host);
233 if (err)
234 goto out_cancel;
235 mutex_unlock(&host_ui->ui_mutex);
236
237 ubifs_release_budget(c, &req);
238 return 0;
239
240out_cancel:
241 host_ui->xattr_size -= CALC_XATTR_BYTES(size);
242 host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
243 make_bad_inode(inode);
244out_unlock:
245 mutex_unlock(&host_ui->ui_mutex);
246 ubifs_release_budget(c, &req);
247 return err;
248}
249
250/**
251 * check_namespace - check extended attribute name-space.
252 * @nm: extended attribute name
253 *
254 * This function makes sure the extended attribute name belongs to one of the
255 * supported extended attribute name-spaces. Returns name-space index in case
256 * of success and a negative error code in case of failure.
257 */
258static int check_namespace(const struct qstr *nm)
259{
260 int type;
261
262 if (nm->len > UBIFS_MAX_NLEN)
263 return -ENAMETOOLONG;
264
265 if (!strncmp(nm->name, XATTR_TRUSTED_PREFIX,
266 XATTR_TRUSTED_PREFIX_LEN)) {
267 if (nm->name[sizeof(XATTR_TRUSTED_PREFIX) - 1] == '\0')
268 return -EINVAL;
269 type = TRUSTED_XATTR;
270 } else if (!strncmp(nm->name, XATTR_USER_PREFIX,
271 XATTR_USER_PREFIX_LEN)) {
272 if (nm->name[XATTR_USER_PREFIX_LEN] == '\0')
273 return -EINVAL;
274 type = USER_XATTR;
275 } else if (!strncmp(nm->name, XATTR_SECURITY_PREFIX,
276 XATTR_SECURITY_PREFIX_LEN)) {
277 if (nm->name[sizeof(XATTR_SECURITY_PREFIX) - 1] == '\0')
278 return -EINVAL;
279 type = SECURITY_XATTR;
280 } else
281 return -EOPNOTSUPP;
282
283 return type;
284}
285
286static struct inode *iget_xattr(struct ubifs_info *c, ino_t inum)
287{
288 struct inode *inode;
289
290 inode = ubifs_iget(c->vfs_sb, inum);
291 if (IS_ERR(inode)) {
292 ubifs_err("dead extended attribute entry, error %d",
293 (int)PTR_ERR(inode));
294 return inode;
295 }
296 if (ubifs_inode(inode)->xattr)
297 return inode;
298 ubifs_err("corrupt extended attribute entry");
299 iput(inode);
300 return ERR_PTR(-EINVAL);
301}
302
303int ubifs_setxattr(struct dentry *dentry, const char *name,
304 const void *value, size_t size, int flags)
305{
306 struct inode *inode, *host = dentry->d_inode;
307 struct ubifs_info *c = host->i_sb->s_fs_info;
308 struct qstr nm = { .name = name, .len = strlen(name) };
309 struct ubifs_dent_node *xent;
310 union ubifs_key key;
311 int err, type;
312
313 dbg_gen("xattr '%s', host ino %lu ('%.*s'), size %zd", name,
314 host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
315
316 if (size > UBIFS_MAX_INO_DATA)
317 return -ERANGE;
318
319 type = check_namespace(&nm);
320 if (type < 0)
321 return type;
322
323 xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
324 if (!xent)
325 return -ENOMEM;
326
327 /*
328 * The extended attribute entries are stored in LNC, so multiple
329 * look-ups do not involve reading the flash.
330 */
331 xent_key_init(c, &key, host->i_ino, &nm);
332 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
333 if (err) {
334 if (err != -ENOENT)
335 goto out_free;
336
337 if (flags & XATTR_REPLACE)
338 /* We are asked not to create the xattr */
339 err = -ENODATA;
340 else
341 err = create_xattr(c, host, &nm, value, size);
342 goto out_free;
343 }
344
345 if (flags & XATTR_CREATE) {
346 /* We are asked not to replace the xattr */
347 err = -EEXIST;
348 goto out_free;
349 }
350
351 inode = iget_xattr(c, le64_to_cpu(xent->inum));
352 if (IS_ERR(inode)) {
353 err = PTR_ERR(inode);
354 goto out_free;
355 }
356
357 err = change_xattr(c, host, inode, value, size);
358 iput(inode);
359
360out_free:
361 kfree(xent);
362 return err;
363}
364
365ssize_t ubifs_getxattr(struct dentry *dentry, const char *name, void *buf,
366 size_t size)
367{
368 struct inode *inode, *host = dentry->d_inode;
369 struct ubifs_info *c = host->i_sb->s_fs_info;
370 struct qstr nm = { .name = name, .len = strlen(name) };
371 struct ubifs_inode *ui;
372 struct ubifs_dent_node *xent;
373 union ubifs_key key;
374 int err;
375
376 dbg_gen("xattr '%s', ino %lu ('%.*s'), buf size %zd", name,
377 host->i_ino, dentry->d_name.len, dentry->d_name.name, size);
378
379 err = check_namespace(&nm);
380 if (err < 0)
381 return err;
382
383 xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
384 if (!xent)
385 return -ENOMEM;
386
387 mutex_lock(&host->i_mutex);
388 xent_key_init(c, &key, host->i_ino, &nm);
389 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
390 if (err) {
391 if (err == -ENOENT)
392 err = -ENODATA;
393 goto out_unlock;
394 }
395
396 inode = iget_xattr(c, le64_to_cpu(xent->inum));
397 if (IS_ERR(inode)) {
398 err = PTR_ERR(inode);
399 goto out_unlock;
400 }
401
402 ui = ubifs_inode(inode);
403 ubifs_assert(inode->i_size == ui->data_len);
404 ubifs_assert(ubifs_inode(host)->xattr_size > ui->data_len);
405
406 if (buf) {
407 /* If @buf is %NULL we are supposed to return the length */
408 if (ui->data_len > size) {
409 dbg_err("buffer size %zd, xattr len %d",
410 size, ui->data_len);
411 err = -ERANGE;
412 goto out_iput;
413 }
414
415 memcpy(buf, ui->data, ui->data_len);
416 }
417 err = ui->data_len;
418
419out_iput:
420 iput(inode);
421out_unlock:
422 mutex_unlock(&host->i_mutex);
423 kfree(xent);
424 return err;
425}
426
427ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size)
428{
429 union ubifs_key key;
430 struct inode *host = dentry->d_inode;
431 struct ubifs_info *c = host->i_sb->s_fs_info;
432 struct ubifs_inode *host_ui = ubifs_inode(host);
433 struct ubifs_dent_node *xent, *pxent = NULL;
434 int err, len, written = 0;
435 struct qstr nm = { .name = NULL };
436
437 dbg_gen("ino %lu ('%.*s'), buffer size %zd", host->i_ino,
438 dentry->d_name.len, dentry->d_name.name, size);
439
440 len = host_ui->xattr_names + host_ui->xattr_cnt;
441 if (!buffer)
442 /*
443 * We should return the minimum buffer size which will fit a
444 * null-terminated list of all the extended attribute names.
445 */
446 return len;
447
448 if (len > size)
449 return -ERANGE;
450
451 lowest_xent_key(c, &key, host->i_ino);
452
453 mutex_lock(&host->i_mutex);
454 while (1) {
455 int type;
456
457 xent = ubifs_tnc_next_ent(c, &key, &nm);
458 if (unlikely(IS_ERR(xent))) {
459 err = PTR_ERR(xent);
460 break;
461 }
462
463 nm.name = xent->name;
464 nm.len = le16_to_cpu(xent->nlen);
465
466 type = check_namespace(&nm);
467 if (unlikely(type < 0)) {
468 err = type;
469 break;
470 }
471
472 /* Show trusted namespace only for "power" users */
473 if (type != TRUSTED_XATTR || capable(CAP_SYS_ADMIN)) {
474 memcpy(buffer + written, nm.name, nm.len + 1);
475 written += nm.len + 1;
476 }
477
478 kfree(pxent);
479 pxent = xent;
480 key_read(c, &xent->key, &key);
481 }
482 mutex_unlock(&host->i_mutex);
483
484 kfree(pxent);
485 if (err != -ENOENT) {
486 ubifs_err("cannot find next direntry, error %d", err);
487 return err;
488 }
489
490 ubifs_assert(written <= size);
491 return written;
492}
493
494static int remove_xattr(struct ubifs_info *c, struct inode *host,
495 struct inode *inode, const struct qstr *nm)
496{
497 int err;
498 struct ubifs_inode *host_ui = ubifs_inode(host);
499 struct ubifs_inode *ui = ubifs_inode(inode);
500 struct ubifs_budget_req req = { .dirtied_ino = 1, .mod_dent = 1,
501 .dirtied_ino_d = host_ui->data_len };
502
503 ubifs_assert(ui->data_len == inode->i_size);
504
505 err = ubifs_budget_space(c, &req);
506 if (err)
507 return err;
508
509 mutex_lock(&host_ui->ui_mutex);
510 host->i_ctime = ubifs_current_time(host);
511 host_ui->xattr_cnt -= 1;
512 host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
513 host_ui->xattr_size -= CALC_XATTR_BYTES(ui->data_len);
514 host_ui->xattr_names -= nm->len;
515
516 err = ubifs_jnl_delete_xattr(c, host, inode, nm);
517 if (err)
518 goto out_cancel;
519 mutex_unlock(&host_ui->ui_mutex);
520
521 ubifs_release_budget(c, &req);
522 return 0;
523
524out_cancel:
525 host_ui->xattr_cnt += 1;
526 host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
527 host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
528 mutex_unlock(&host_ui->ui_mutex);
529 ubifs_release_budget(c, &req);
530 make_bad_inode(inode);
531 return err;
532}
533
534int ubifs_removexattr(struct dentry *dentry, const char *name)
535{
536 struct inode *inode, *host = dentry->d_inode;
537 struct ubifs_info *c = host->i_sb->s_fs_info;
538 struct qstr nm = { .name = name, .len = strlen(name) };
539 struct ubifs_dent_node *xent;
540 union ubifs_key key;
541 int err;
542
543 dbg_gen("xattr '%s', ino %lu ('%.*s')", name,
544 host->i_ino, dentry->d_name.len, dentry->d_name.name);
545 ubifs_assert(mutex_is_locked(&host->i_mutex));
546
547 err = check_namespace(&nm);
548 if (err < 0)
549 return err;
550
551 xent = kmalloc(UBIFS_MAX_XENT_NODE_SZ, GFP_NOFS);
552 if (!xent)
553 return -ENOMEM;
554
555 xent_key_init(c, &key, host->i_ino, &nm);
556 err = ubifs_tnc_lookup_nm(c, &key, xent, &nm);
557 if (err) {
558 if (err == -ENOENT)
559 err = -ENODATA;
560 goto out_free;
561 }
562
563 inode = iget_xattr(c, le64_to_cpu(xent->inum));
564 if (IS_ERR(inode)) {
565 err = PTR_ERR(inode);
566 goto out_free;
567 }
568
569 ubifs_assert(inode->i_nlink == 1);
570 inode->i_nlink = 0;
571 err = remove_xattr(c, host, inode, &nm);
572 if (err)
573 inode->i_nlink = 1;
574
575 /* If @i_nlink is 0, 'iput()' will delete the inode */
576 iput(inode);
577
578out_free:
579 kfree(xent);
580 return err;
581}