aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-09-09 14:52:12 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-09-09 14:52:12 -0400
commitb975dee3811ae0c58bd0e19cbd041cac8dd37ec5 (patch)
treed65f5381024a00982e47fd6144d219bb6b34ff78 /fs
parentdeac93df26b20cf8438339b5935b5f5643bc30c9 (diff)
parenta5cb562d6977d9d7989c346b7b153cef31ec0228 (diff)
Merge branch 'linux-next' of git://git.infradead.org/~dedekind/ubifs-2.6
* 'linux-next' of git://git.infradead.org/~dedekind/ubifs-2.6: UBIFS: make minimum fanout 3 UBIFS: fix division by zero UBIFS: amend f_fsid UBIFS: fill f_fsid UBIFS: improve statfs reporting even more UBIFS: introduce LEB overhead UBIFS: add forgotten gc_idx_lebs component UBIFS: fix assertion UBIFS: improve statfs reporting UBIFS: remove incorrect index space check UBIFS: push empty flash hack down UBIFS: do not update min_idx_lebs in stafs UBIFS: allow for racing between GC and TNC UBIFS: always read hashed-key nodes under TNC mutex UBIFS: fix zero-length truncations
Diffstat (limited to 'fs')
-rw-r--r--fs/ubifs/budget.c114
-rw-r--r--fs/ubifs/dir.c1
-rw-r--r--fs/ubifs/file.c20
-rw-r--r--fs/ubifs/find.c18
-rw-r--r--fs/ubifs/gc.c6
-rw-r--r--fs/ubifs/misc.h49
-rw-r--r--fs/ubifs/super.c22
-rw-r--r--fs/ubifs/tnc.c116
-rw-r--r--fs/ubifs/ubifs-media.h2
-rw-r--r--fs/ubifs/ubifs.h14
10 files changed, 221 insertions, 141 deletions
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c
index 154098157473..73db464cd08b 100644
--- a/fs/ubifs/budget.c
+++ b/fs/ubifs/budget.c
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs)
302 int subtract_lebs; 302 int subtract_lebs;
303 long long available; 303 long long available;
304 304
305 /*
306 * Force the amount available to the total size reported if the used
307 * space is zero.
308 */
309 if (c->lst.total_used <= UBIFS_INO_NODE_SZ &&
310 c->budg_data_growth + c->budg_dd_growth == 0) {
311 /* Do the same calculation as for c->block_cnt */
312 available = c->main_lebs - 2;
313 available *= c->leb_size - c->dark_wm;
314 return available;
315 }
316
317 available = c->main_bytes - c->lst.total_used; 305 available = c->main_bytes - c->lst.total_used;
318 306
319 /* 307 /*
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c,
714} 702}
715 703
716/** 704/**
717 * ubifs_budg_get_free_space - return amount of free space. 705 * ubifs_reported_space - calculate reported free space.
706 * @c: the UBIFS file-system description object
707 * @free: amount of free space
708 *
709 * This function calculates amount of free space which will be reported to
710 * user-space. User-space application tend to expect that if the file-system
711 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
712 * are able to write a file of size N. UBIFS attaches node headers to each data
713 * node and it has to write indexind nodes as well. This introduces additional
714 * overhead, and UBIFS it has to report sligtly less free space to meet the
715 * above expectetion.
716 *
717 * This function assumes free space is made up of uncompressed data nodes and
718 * full index nodes (one per data node, tripled because we always allow enough
719 * space to write the index thrice).
720 *
721 * Note, the calculation is pessimistic, which means that most of the time
722 * UBIFS reports less space than it actually has.
723 */
724long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free)
725{
726 int divisor, factor, f;
727
728 /*
729 * Reported space size is @free * X, where X is UBIFS block size
730 * divided by UBIFS block size + all overhead one data block
731 * introduces. The overhead is the node header + indexing overhead.
732 *
733 * Indexing overhead calculations are based on the following formula:
734 * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number
735 * of data nodes, f - fanout. Because effective UBIFS fanout is twice
736 * as less than maximum fanout, we assume that each data node
737 * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes.
738 * Note, the multiplier 3 is because UBIFS reseves thrice as more space
739 * for the index.
740 */
741 f = c->fanout > 3 ? c->fanout >> 1 : 2;
742 factor = UBIFS_BLOCK_SIZE;
743 divisor = UBIFS_MAX_DATA_NODE_SZ;
744 divisor += (c->max_idx_node_sz * 3) / (f - 1);
745 free *= factor;
746 do_div(free, divisor);
747 return free;
748}
749
750/**
751 * ubifs_get_free_space - return amount of free space.
718 * @c: UBIFS file-system description object 752 * @c: UBIFS file-system description object
719 * 753 *
720 * This function returns amount of free space on the file-system. 754 * This function calculates amount of free space to report to user-space.
755 *
756 * Because UBIFS may introduce substantial overhead (the index, node headers,
757 * alighment, wastage at the end of eraseblocks, etc), it cannot report real
758 * amount of free flash space it has (well, because not all dirty space is
759 * reclamable, UBIFS does not actually know the real amount). If UBIFS did so,
760 * it would bread user expectetion about what free space is. Users seem to
761 * accustomed to assume that if the file-system reports N bytes of free space,
762 * they would be able to fit a file of N bytes to the FS. This almost works for
763 * traditional file-systems, because they have way less overhead than UBIFS.
764 * So, to keep users happy, UBIFS tries to take the overhead into account.
721 */ 765 */
722long long ubifs_budg_get_free_space(struct ubifs_info *c) 766long long ubifs_get_free_space(struct ubifs_info *c)
723{ 767{
724 int min_idx_lebs, rsvd_idx_lebs; 768 int min_idx_lebs, rsvd_idx_lebs, lebs;
725 long long available, outstanding, free; 769 long long available, outstanding, free;
726 770
727 /* Do exactly the same calculations as in 'do_budget_space()' */
728 spin_lock(&c->space_lock); 771 spin_lock(&c->space_lock);
729 min_idx_lebs = ubifs_calc_min_idx_lebs(c); 772 min_idx_lebs = ubifs_calc_min_idx_lebs(c);
773 outstanding = c->budg_data_growth + c->budg_dd_growth;
730 774
731 if (min_idx_lebs > c->lst.idx_lebs) 775 /*
732 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; 776 * Force the amount available to the total size reported if the used
733 else 777 * space is zero.
734 rsvd_idx_lebs = 0; 778 */
735 779 if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) {
736 if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt
737 - c->lst.taken_empty_lebs) {
738 spin_unlock(&c->space_lock); 780 spin_unlock(&c->space_lock);
739 return 0; 781 return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT;
740 } 782 }
741 783
742 available = ubifs_calc_available(c, min_idx_lebs); 784 available = ubifs_calc_available(c, min_idx_lebs);
743 outstanding = c->budg_data_growth + c->budg_dd_growth; 785
744 c->min_idx_lebs = min_idx_lebs; 786 /*
787 * When reporting free space to user-space, UBIFS guarantees that it is
788 * possible to write a file of free space size. This means that for
789 * empty LEBs we may use more precise calculations than
790 * 'ubifs_calc_available()' is using. Namely, we know that in empty
791 * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm.
792 * Thus, amend the available space.
793 *
794 * Note, the calculations below are similar to what we have in
795 * 'do_budget_space()', so refer there for comments.
796 */
797 if (min_idx_lebs > c->lst.idx_lebs)
798 rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs;
799 else
800 rsvd_idx_lebs = 0;
801 lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt -
802 c->lst.taken_empty_lebs;
803 lebs -= rsvd_idx_lebs;
804 available += lebs * (c->dark_wm - c->leb_overhead);
745 spin_unlock(&c->space_lock); 805 spin_unlock(&c->space_lock);
746 806
747 if (available > outstanding) 807 if (available > outstanding)
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 5c96f1fb7016..2b267c9a1806 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry)
587 if (err) { 587 if (err) {
588 if (err != -ENOSPC) 588 if (err != -ENOSPC)
589 return err; 589 return err;
590 err = 0;
591 budgeted = 0; 590 budgeted = 0;
592 } 591 }
593 592
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 4071d1cae29f..3d698e2022b1 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
793 int err; 793 int err;
794 struct ubifs_budget_req req; 794 struct ubifs_budget_req req;
795 loff_t old_size = inode->i_size, new_size = attr->ia_size; 795 loff_t old_size = inode->i_size, new_size = attr->ia_size;
796 int offset = new_size & (UBIFS_BLOCK_SIZE - 1); 796 int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1;
797 struct ubifs_inode *ui = ubifs_inode(inode); 797 struct ubifs_inode *ui = ubifs_inode(inode);
798 798
799 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); 799 dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size);
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
811 /* A funny way to budget for truncation node */ 811 /* A funny way to budget for truncation node */
812 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; 812 req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ;
813 err = ubifs_budget_space(c, &req); 813 err = ubifs_budget_space(c, &req);
814 if (err) 814 if (err) {
815 return err; 815 /*
816 * Treat truncations to zero as deletion and always allow them,
817 * just like we do for '->unlink()'.
818 */
819 if (new_size || err != -ENOSPC)
820 return err;
821 budgeted = 0;
822 }
816 823
817 err = vmtruncate(inode, new_size); 824 err = vmtruncate(inode, new_size);
818 if (err) 825 if (err)
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
869 err = ubifs_jnl_truncate(c, inode, old_size, new_size); 876 err = ubifs_jnl_truncate(c, inode, old_size, new_size);
870 mutex_unlock(&ui->ui_mutex); 877 mutex_unlock(&ui->ui_mutex);
871out_budg: 878out_budg:
872 ubifs_release_budget(c, &req); 879 if (budgeted)
880 ubifs_release_budget(c, &req);
881 else {
882 c->nospace = c->nospace_rp = 0;
883 smp_wmb();
884 }
873 return err; 885 return err;
874} 886}
875 887
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index adee7b5ddeab..e045c8b55423 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty 211 * dirty index heap, and it falls-back to LPT scanning if the heaps are empty
212 * or do not have an LEB which satisfies the @min_space criteria. 212 * or do not have an LEB which satisfies the @min_space criteria.
213 * 213 *
214 * Note: 214 * Note, LEBs which have less than dead watermark of free + dirty space are
215 * o LEBs which have less than dead watermark of dirty space are never picked 215 * never picked by this function.
216 * by this function;
217 *
218 * Returns zero and the LEB properties of
219 * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a
220 * negative error code in case of other failures. The returned LEB is marked as
221 * "taken".
222 * 216 *
223 * The additional @pick_free argument controls if this function has to return a 217 * The additional @pick_free argument controls if this function has to return a
224 * free or freeable LEB if one is present. For example, GC must to set it to %1, 218 * free or freeable LEB if one is present. For example, GC must to set it to %1,
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c,
231 * 225 *
232 * In addition @pick_free is set to %2 by the recovery process in order to 226 * In addition @pick_free is set to %2 by the recovery process in order to
233 * recover gc_lnum in which case an index LEB must not be returned. 227 * recover gc_lnum in which case an index LEB must not be returned.
228 *
229 * This function returns zero and the LEB properties of found dirty LEB in case
230 * of success, %-ENOSPC if no dirty LEB was found and a negative error code in
231 * case of other failures. The returned LEB is marked as "taken".
234 */ 232 */
235int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, 233int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
236 int min_space, int pick_free) 234 int min_space, int pick_free)
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
245 int lebs, rsvd_idx_lebs = 0; 243 int lebs, rsvd_idx_lebs = 0;
246 244
247 spin_lock(&c->space_lock); 245 spin_lock(&c->space_lock);
248 lebs = c->lst.empty_lebs; 246 lebs = c->lst.empty_lebs + c->idx_gc_cnt;
249 lebs += c->freeable_cnt - c->lst.taken_empty_lebs; 247 lebs += c->freeable_cnt - c->lst.taken_empty_lebs;
250 248
251 /* 249 /*
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp,
317 lp = idx_lp; 315 lp = idx_lp;
318 316
319 if (lp) { 317 if (lp) {
320 ubifs_assert(lp->dirty >= c->dead_wm); 318 ubifs_assert(lp->free + lp->dirty >= c->dead_wm);
321 goto found; 319 goto found;
322 } 320 }
323 321
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index d0f3dac29081..13f1019c859f 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp)
344 if (err) 344 if (err)
345 goto out; 345 goto out;
346 346
347 /* Allow for races with TNC */
348 c->gced_lnum = lnum;
349 smp_wmb();
350 c->gc_seq += 1;
351 smp_wmb();
352
347 if (c->gc_lnum == -1) { 353 if (c->gc_lnum == -1) {
348 c->gc_lnum = lnum; 354 c->gc_lnum = lnum;
349 err = LEB_RETAINED; 355 err = LEB_RETAINED;
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h
index 87dabf9fe742..4c12a9215d7f 100644
--- a/fs/ubifs/misc.h
+++ b/fs/ubifs/misc.h
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c,
284} 284}
285 285
286/** 286/**
287 * ubifs_reported_space - calculate reported free space.
288 * @c: the UBIFS file-system description object
289 * @free: amount of free space
290 *
291 * This function calculates amount of free space which will be reported to
292 * user-space. User-space application tend to expect that if the file-system
293 * (e.g., via the 'statfs()' call) reports that it has N bytes available, they
294 * are able to write a file of size N. UBIFS attaches node headers to each data
295 * node and it has to write indexind nodes as well. This introduces additional
296 * overhead, and UBIFS it has to report sligtly less free space to meet the
297 * above expectetion.
298 *
299 * This function assumes free space is made up of uncompressed data nodes and
300 * full index nodes (one per data node, doubled because we always allow enough
301 * space to write the index twice).
302 *
303 * Note, the calculation is pessimistic, which means that most of the time
304 * UBIFS reports less space than it actually has.
305 */
306static inline long long ubifs_reported_space(const struct ubifs_info *c,
307 uint64_t free)
308{
309 int divisor, factor;
310
311 divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3);
312 factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ;
313 do_div(free, divisor);
314
315 return free * factor;
316}
317
318/**
319 * ubifs_current_time - round current time to time granularity. 287 * ubifs_current_time - round current time to time granularity.
320 * @inode: inode 288 * @inode: inode
321 */ 289 */
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode)
325 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; 293 current_fs_time(inode->i_sb) : CURRENT_TIME_SEC;
326} 294}
327 295
296/**
297 * ubifs_tnc_lookup - look up a file-system node.
298 * @c: UBIFS file-system description object
299 * @key: node key to lookup
300 * @node: the node is returned here
301 *
302 * This function look up and reads node with key @key. The caller has to make
303 * sure the @node buffer is large enough to fit the node. Returns zero in case
304 * of success, %-ENOENT if the node was not found, and a negative error code in
305 * case of failure.
306 */
307static inline int ubifs_tnc_lookup(struct ubifs_info *c,
308 const union ubifs_key *key, void *node)
309{
310 return ubifs_tnc_locate(c, key, node, NULL, NULL);
311}
312
328#endif /* __UBIFS_MISC_H__ */ 313#endif /* __UBIFS_MISC_H__ */
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index f71e6b8822c4..7562464ac83f 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
370{ 370{
371 struct ubifs_info *c = dentry->d_sb->s_fs_info; 371 struct ubifs_info *c = dentry->d_sb->s_fs_info;
372 unsigned long long free; 372 unsigned long long free;
373 __le32 *uuid = (__le32 *)c->uuid;
373 374
374 free = ubifs_budg_get_free_space(c); 375 free = ubifs_get_free_space(c);
375 dbg_gen("free space %lld bytes (%lld blocks)", 376 dbg_gen("free space %lld bytes (%lld blocks)",
376 free, free >> UBIFS_BLOCK_SHIFT); 377 free, free >> UBIFS_BLOCK_SHIFT);
377 378
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf)
386 buf->f_files = 0; 387 buf->f_files = 0;
387 buf->f_ffree = 0; 388 buf->f_ffree = 0;
388 buf->f_namelen = UBIFS_MAX_NLEN; 389 buf->f_namelen = UBIFS_MAX_NLEN;
389 390 buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]);
391 buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]);
390 return 0; 392 return 0;
391} 393}
392 394
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c)
530 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); 532 c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size);
531 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); 533 c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size);
532 534
535 /*
536 * Calculate how many bytes would be wasted at the end of LEB if it was
537 * fully filled with data nodes of maximum size. This is used in
538 * calculations when reporting free space.
539 */
540 c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ;
533 return 0; 541 return 0;
534} 542}
535 543
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c)
647 * internally because it does not make much sense for UBIFS, but it is 655 * internally because it does not make much sense for UBIFS, but it is
648 * necessary to report something for the 'statfs()' call. 656 * necessary to report something for the 'statfs()' call.
649 * 657 *
650 * Subtract the LEB reserved for GC and the LEB which is reserved for 658 * Subtract the LEB reserved for GC, the LEB which is reserved for
651 * deletions. 659 * deletions, and assume only one journal head is available.
652 *
653 * Review 'ubifs_calc_available()' if changing this calculation.
654 */ 660 */
655 tmp64 = c->main_lebs - 2; 661 tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1;
656 tmp64 *= (uint64_t)c->leb_size - c->dark_wm; 662 tmp64 *= (uint64_t)c->leb_size - c->leb_overhead;
657 tmp64 = ubifs_reported_space(c, tmp64); 663 tmp64 = ubifs_reported_space(c, tmp64);
658 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; 664 c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT;
659 665
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index e909f4a96443..7da209ab9378 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
506 if (keys_cmp(c, key, &node_key) != 0) 506 if (keys_cmp(c, key, &node_key) != 0)
507 ret = 0; 507 ret = 0;
508 } 508 }
509 if (ret == 0) 509 if (ret == 0 && c->replaying)
510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s", 510 dbg_mnt("dangling branch LEB %d:%d len %d, key %s",
511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); 511 zbr->lnum, zbr->offs, zbr->len, DBGKEY(key));
512 return ret; 512 return ret;
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1382} 1382}
1383 1383
1384/** 1384/**
1385 * ubifs_tnc_lookup - look up a file-system node. 1385 * maybe_leb_gced - determine if a LEB may have been garbage collected.
1386 * @c: UBIFS file-system description object 1386 * @c: UBIFS file-system description object
1387 * @key: node key to lookup 1387 * @lnum: LEB number
1388 * @node: the node is returned here 1388 * @gc_seq1: garbage collection sequence number
1389 * 1389 *
1390 * This function look up and reads node with key @key. The caller has to make 1390 * This function determines if @lnum may have been garbage collected since
1391 * sure the @node buffer is large enough to fit the node. Returns zero in case 1391 * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
1392 * of success, %-ENOENT if the node was not found, and a negative error code in 1392 * %0 is returned.
1393 * case of failure.
1394 */ 1393 */
1395int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, 1394static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
1396 void *node)
1397{ 1395{
1398 int found, n, err; 1396 int gc_seq2, gced_lnum;
1399 struct ubifs_znode *znode;
1400 struct ubifs_zbranch zbr, *zt;
1401 1397
1402 mutex_lock(&c->tnc_mutex); 1398 gced_lnum = c->gced_lnum;
1403 found = ubifs_lookup_level0(c, key, &znode, &n); 1399 smp_rmb();
1404 if (!found) { 1400 gc_seq2 = c->gc_seq;
1405 err = -ENOENT; 1401 /* Same seq means no GC */
1406 goto out; 1402 if (gc_seq1 == gc_seq2)
1407 } else if (found < 0) { 1403 return 0;
1408 err = found; 1404 /* Different by more than 1 means we don't know */
1409 goto out; 1405 if (gc_seq1 + 1 != gc_seq2)
1410 } 1406 return 1;
1411 zt = &znode->zbranch[n]; 1407 /*
1412 if (is_hash_key(c, key)) { 1408 * We have seen the sequence number has increased by 1. Now we need to
1413 /* 1409 * be sure we read the right LEB number, so read it again.
1414 * In this case the leaf node cache gets used, so we pass the 1410 */
1415 * address of the zbranch and keep the mutex locked 1411 smp_rmb();
1416 */ 1412 if (gced_lnum != c->gced_lnum)
1417 err = tnc_read_node_nm(c, zt, node); 1413 return 1;
1418 goto out; 1414 /* Finally we can check lnum */
1419 } 1415 if (gced_lnum == lnum)
1420 zbr = znode->zbranch[n]; 1416 return 1;
1421 mutex_unlock(&c->tnc_mutex); 1417 return 0;
1422
1423 err = ubifs_tnc_read_node(c, &zbr, node);
1424 return err;
1425
1426out:
1427 mutex_unlock(&c->tnc_mutex);
1428 return err;
1429} 1418}
1430 1419
1431/** 1420/**
@@ -1436,16 +1425,19 @@ out:
1436 * @lnum: LEB number is returned here 1425 * @lnum: LEB number is returned here
1437 * @offs: offset is returned here 1426 * @offs: offset is returned here
1438 * 1427 *
1439 * This function is the same as 'ubifs_tnc_lookup()' but it returns the node 1428 * This function look up and reads node with key @key. The caller has to make
1440 * location also. See 'ubifs_tnc_lookup()'. 1429 * sure the @node buffer is large enough to fit the node. Returns zero in case
1430 * of success, %-ENOENT if the node was not found, and a negative error code in
1431 * case of failure. The node location can be returned in @lnum and @offs.
1441 */ 1432 */
1442int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1433int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1443 void *node, int *lnum, int *offs) 1434 void *node, int *lnum, int *offs)
1444{ 1435{
1445 int found, n, err; 1436 int found, n, err, safely = 0, gc_seq1;
1446 struct ubifs_znode *znode; 1437 struct ubifs_znode *znode;
1447 struct ubifs_zbranch zbr, *zt; 1438 struct ubifs_zbranch zbr, *zt;
1448 1439
1440again:
1449 mutex_lock(&c->tnc_mutex); 1441 mutex_lock(&c->tnc_mutex);
1450 found = ubifs_lookup_level0(c, key, &znode, &n); 1442 found = ubifs_lookup_level0(c, key, &znode, &n);
1451 if (!found) { 1443 if (!found) {
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1456 goto out; 1448 goto out;
1457 } 1449 }
1458 zt = &znode->zbranch[n]; 1450 zt = &znode->zbranch[n];
1451 if (lnum) {
1452 *lnum = zt->lnum;
1453 *offs = zt->offs;
1454 }
1459 if (is_hash_key(c, key)) { 1455 if (is_hash_key(c, key)) {
1460 /* 1456 /*
1461 * In this case the leaf node cache gets used, so we pass the 1457 * In this case the leaf node cache gets used, so we pass the
1462 * address of the zbranch and keep the mutex locked 1458 * address of the zbranch and keep the mutex locked
1463 */ 1459 */
1464 *lnum = zt->lnum;
1465 *offs = zt->offs;
1466 err = tnc_read_node_nm(c, zt, node); 1460 err = tnc_read_node_nm(c, zt, node);
1467 goto out; 1461 goto out;
1468 } 1462 }
1463 if (safely) {
1464 err = ubifs_tnc_read_node(c, zt, node);
1465 goto out;
1466 }
1467 /* Drop the TNC mutex prematurely and race with garbage collection */
1469 zbr = znode->zbranch[n]; 1468 zbr = znode->zbranch[n];
1469 gc_seq1 = c->gc_seq;
1470 mutex_unlock(&c->tnc_mutex); 1470 mutex_unlock(&c->tnc_mutex);
1471 1471
1472 *lnum = zbr.lnum; 1472 if (ubifs_get_wbuf(c, zbr.lnum)) {
1473 *offs = zbr.offs; 1473 /* We do not GC journal heads */
1474 err = ubifs_tnc_read_node(c, &zbr, node);
1475 return err;
1476 }
1474 1477
1475 err = ubifs_tnc_read_node(c, &zbr, node); 1478 err = fallible_read_node(c, key, &zbr, node);
1476 return err; 1479 if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
1480 /*
1481 * The node may have been GC'ed out from under us so try again
1482 * while keeping the TNC mutex locked.
1483 */
1484 safely = 1;
1485 goto again;
1486 }
1487 return 0;
1477 1488
1478out: 1489out:
1479 mutex_unlock(&c->tnc_mutex); 1490 mutex_unlock(&c->tnc_mutex);
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1498{ 1509{
1499 int found, n, err; 1510 int found, n, err;
1500 struct ubifs_znode *znode; 1511 struct ubifs_znode *znode;
1501 struct ubifs_zbranch zbr;
1502 1512
1503 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); 1513 dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key));
1504 mutex_lock(&c->tnc_mutex); 1514 mutex_lock(&c->tnc_mutex);
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1522 goto out_unlock; 1532 goto out_unlock;
1523 } 1533 }
1524 1534
1525 zbr = znode->zbranch[n]; 1535 err = tnc_read_node_nm(c, &znode->zbranch[n], node);
1526 mutex_unlock(&c->tnc_mutex);
1527
1528 err = tnc_read_node_nm(c, &zbr, node);
1529 return err;
1530 1536
1531out_unlock: 1537out_unlock:
1532 mutex_unlock(&c->tnc_mutex); 1538 mutex_unlock(&c->tnc_mutex);
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h
index bd2121f3426e..a9ecbd9af20d 100644
--- a/fs/ubifs/ubifs-media.h
+++ b/fs/ubifs/ubifs-media.h
@@ -87,7 +87,7 @@
87#define UBIFS_SK_LEN 8 87#define UBIFS_SK_LEN 8
88 88
89/* Minimum index tree fanout */ 89/* Minimum index tree fanout */
90#define UBIFS_MIN_FANOUT 2 90#define UBIFS_MIN_FANOUT 3
91 91
92/* Maximum number of levels in UBIFS indexing B-tree */ 92/* Maximum number of levels in UBIFS indexing B-tree */
93#define UBIFS_MAX_LEVELS 512 93#define UBIFS_MAX_LEVELS 512
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index d7f706f7a302..17c620b93eec 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -995,6 +995,9 @@ struct ubifs_mount_opts {
995 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary 995 * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary
996 * @max_inode_sz: maximum possible inode size in bytes 996 * @max_inode_sz: maximum possible inode size in bytes
997 * @max_znode_sz: size of znode in bytes 997 * @max_znode_sz: size of znode in bytes
998 *
999 * @leb_overhead: how many bytes are wasted in an LEB when it is filled with
1000 * data nodes of maximum size - used in free space reporting
998 * @dead_wm: LEB dead space watermark 1001 * @dead_wm: LEB dead space watermark
999 * @dark_wm: LEB dark space watermark 1002 * @dark_wm: LEB dark space watermark
1000 * @block_cnt: count of 4KiB blocks on the FS 1003 * @block_cnt: count of 4KiB blocks on the FS
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts {
1028 * @sbuf: a buffer of LEB size used by GC and replay for scanning 1031 * @sbuf: a buffer of LEB size used by GC and replay for scanning
1029 * @idx_gc: list of index LEBs that have been garbage collected 1032 * @idx_gc: list of index LEBs that have been garbage collected
1030 * @idx_gc_cnt: number of elements on the idx_gc list 1033 * @idx_gc_cnt: number of elements on the idx_gc list
1034 * @gc_seq: incremented for every non-index LEB garbage collected
1035 * @gced_lnum: last non-index LEB that was garbage collected
1031 * 1036 *
1032 * @infos_list: links all 'ubifs_info' objects 1037 * @infos_list: links all 'ubifs_info' objects
1033 * @umount_mutex: serializes shrinker and un-mount 1038 * @umount_mutex: serializes shrinker and un-mount
@@ -1224,6 +1229,8 @@ struct ubifs_info {
1224 int max_idx_node_sz; 1229 int max_idx_node_sz;
1225 long long max_inode_sz; 1230 long long max_inode_sz;
1226 int max_znode_sz; 1231 int max_znode_sz;
1232
1233 int leb_overhead;
1227 int dead_wm; 1234 int dead_wm;
1228 int dark_wm; 1235 int dark_wm;
1229 int block_cnt; 1236 int block_cnt;
@@ -1257,6 +1264,8 @@ struct ubifs_info {
1257 void *sbuf; 1264 void *sbuf;
1258 struct list_head idx_gc; 1265 struct list_head idx_gc;
1259 int idx_gc_cnt; 1266 int idx_gc_cnt;
1267 volatile int gc_seq;
1268 volatile int gced_lnum;
1260 1269
1261 struct list_head infos_list; 1270 struct list_head infos_list;
1262 struct mutex umount_mutex; 1271 struct mutex umount_mutex;
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode,
1434 struct ubifs_budget_req *req); 1443 struct ubifs_budget_req *req);
1435void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, 1444void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode,
1436 struct ubifs_budget_req *req); 1445 struct ubifs_budget_req *req);
1437long long ubifs_budg_get_free_space(struct ubifs_info *c); 1446long long ubifs_get_free_space(struct ubifs_info *c);
1438int ubifs_calc_min_idx_lebs(struct ubifs_info *c); 1447int ubifs_calc_min_idx_lebs(struct ubifs_info *c);
1439void ubifs_convert_page_budget(struct ubifs_info *c); 1448void ubifs_convert_page_budget(struct ubifs_info *c);
1449long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free);
1440long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); 1450long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs);
1441 1451
1442/* find.c */ 1452/* find.c */
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c);
1451/* tnc.c */ 1461/* tnc.c */
1452int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, 1462int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1453 struct ubifs_znode **zn, int *n); 1463 struct ubifs_znode **zn, int *n);
1454int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key,
1455 void *node);
1456int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, 1464int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1457 void *node, const struct qstr *nm); 1465 void *node, const struct qstr *nm);
1458int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, 1466int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,